Index: app/doc/utt.texinfo
===================================================================
--- app/doc/utt.texinfo	(revision 04ae41456e90c26bd9e177298f293a82662081dc)
+++ app/doc/utt.texinfo	(revision 261bf629fbaab5db9bf8a88242a386ba8b45b3bf)
@@ -9,13 +9,14 @@
 
 @copying
-This manual is for UAM Text Tools (version 0.90, November, 2007)
+This manual is for UAM Text Tools (version 0.90, October, 2008)
 
 Copyright @copyright{}  2005, 2007  Tomasz ObrÃªbski, MichaÂ³ Stolarski, Justyna Walkowska, PaweÂ³ Konieczka.
 
 Permission is granted to copy, distribute and/or modify this document
-under the terms of the GNU Free Documentation License, Version 1.2
-or any later version published by the Free Software Foundation;
-with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
-Texts.  A copy of the license is included in the section entitled GNU Free Documentation License,,GNU Free Documentation License.
+under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no
+Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.  A
+copy of the license is included in the section entitled GNU Free
+Documentation License,,GNU Free Documentation License.
 
 @c @quotation
@@ -358,10 +359,31 @@
 @end example
 
-because in the latter example the first segment (starting at position 0000, 2 characters long) ends at position @var{n}=0001 which is covered by the second segment and no segment starts at position @var{n+2}=0002.
+because in the latter example the first segment (starting at position
+0000, 2 characters long) ends at position @var{n}=0001 which is
+covered by the second segment and no segment starts at position
+@var{n+2}=0002.
+
+
+@section Flattened UTT file
+
+A UTT file format has two variants: regular and flattend. The regular
+format was described above.  In the flattened format some of the
+end-of-line characters are replaced with line-feed characters.
+
+The flatten format is basically used to represent whole sentences as
+single lines of the input file (all intrasentential end-of-line
+characters are replaced with line-feed characters).
+
+This technical trick permits to perform certain text
+processing operations on entire sentences with the use of such tools as
+@command{grep} (see @command{grp} component) or @command{sed} (see  @command{mar} component).
+
+The conversion between the two formats is performed by the tools:
+@command{fla} and @command{unfla}.
 
 @section Character encoding
 
 The UTT component programs accept only 1-byte character encoding, such
-as ISO, ANSI, DOS, UTF-8 (probably: not tested yet).
+as ISO, ANSI, DOS.
 
 
@@ -527,97 +549,4 @@
 
 @c ---------------------------------------------------------------------
-@c ---------------------------------------------------------------------
-
-@c @node Common command line options
-@c @chapter Common command line options
-
-@c @table @code
-
-@c @parhelp
-
-@c @item @b{@minus{}@minus{}help}, @b{@minus{}h}
-@c Print help.
-
-@c @item @b{@minus{}@minus{}version}, @b{@minus{}v}
-@c Print version information.
-
-@c @item @b{@minus{}@minus{}file=@var{filename}, @minus{}f @var{filename}}
-@c Input file name.
-@c If this option is absent or equal to '@minus{}', the program
-@c reads from the standard input.
-
-@c @item @b{@minus{}@minus{}output=@var{filename}, @minus{}o @var{filename}}
-@c Regular output file name. To regular output the program sends segments
-@c which it successfully processed and copies those which were not
-@c subject to processing. If this option is absent or equal to
-@c '@minus{}', standard output is used.
-
-@c @item @b{@minus{}@minus{}fail=@var{filename}, @minus{}e @var{filename}}
-@c Fail output file name. To fail output the program copies the segments
-@c it failed to process.  If this option is absent or equal to
-@c '@minus{}', standard output is used.
-
-@c @item @b{@minus{}@minus{}only-fail}
-@c Discard segments which would normally be sent to regular
-@c output. Print only segments the program failed to process.
-
-@c @item @b{@minus{}@minus{}no-fail}
-@c Discard segments the program failed to process.
-@c (This and the previous option are functionally equivalent to,
-@c respectively, @option{-o /dev/null} and @option{-e /dev/null}, but
-@c make the programs run faster.)
-
-@c @item @b{@minus{}@minus{}input-field=@var{fieldname}, @minus{}I @var{fieldname}}
-@c The field containing the input to the program. The default is usually
-@c the @var{form} field (unless otherwise stated in the program
-@c description). The fields @var{position}, @var{length}, @var{tag}, and
-@c @var{form} are referred to as @code{1}, @code{2}, @code{3}, @code{4},
-@c respectively.
-
-@c @item @b{@minus{}@minus{}output-field=@var{fieldname}, @minus{}O @var{fieldname}}
-@c The name of the field added by the program. The default is the name of
-@c the program.
-
-@c @c @item @b{@minus{}@minus{}copy, @minus{}c}
-@c @c Copy processed segments to regular output.
-
-@c @item @b{@minus{}@minus{}dictionary=@var{filename}, @minus{}d @var{filename}}
-@c Dictionary file name.
-@c (This option is used by programs which use dictionary data.)
-
-@c @item @b{@minus{}@minus{}process=@var{tag}, @minus{}p @var{tag}}
-@c Process segments with the specified value in the @var{tag} field.
-@c Multiple occurences of this option are allowed and are interpreted as
-@c disjunction. If this option is absent, all segments are processed.
-
-@c @item @b{@minus{}@minus{}select=@var{fieldname}, @minus{}s @var{fieldname}}
-@c Select for processing only segments in which the field named
-@c @var{fieldname} is present. Multiple occurences of this option are
-@c allowed and are interpreted as conjunction of conditions. If this
-@c option is absent, all segments are processed.
-
-@c @item @b{@minus{}@minus{}unselect=@var{fieldname}, @minus{}S @var{fieldname}}
-@c Select for processing only segments in which the field @var{fieldname}
-@c is absent.  Multiple occurences of this option are allowed and are
-@c interpreted as conjunction of conditions. If this option is absent,
-@c all segments are processed.
-
-@c @item @b{@minus{}@minus{}interactive @minus{}i}
-@c This option toggles interactive mode, which is by default off. In the
-@c interactive mode the program does not buffer the output.
-
-@c @item @b{@minus{}@minus{}config=@var{filename}}
-@c Read configuration from file @file{@var{filename}}.
-
-@c @item @b{@minus{}@minus{}one @minus{}1}
-@c This option makes the program print ambiguous annotation in one output
-@c segment. By default when
-@c ambiguous new annotation is being produced for a segment, the segment
-@c is multiplicated and each of the annotations is added to separate copy
-@c of the segment.
-
-@c @end table
-
-@c ---------------------------------------------------------------------
 @c CONFIGURATION FILES
 @c ---------------------------------------------------------------------
@@ -695,12 +624,14 @@
 
 Filters: programs which read and produce UTT-formatted data
-@c * sen - the sentencizer::
 * lem::         a morphological analyzer
 * gue::         a morphological guesser
-* cor::         a spelling corrector
+* cor::         a simple spelling corrector
+* kor::         a more elaborated spelling corrector
 * sen::         a sentensizer
-@c * gph - the graphizer::
 * ser::         a pattern search tool (marks matches)
+* mar::         a pattern search tool (introduces arbitrary markers into the text)
 * grp::         a pattern search tool (selects sentences containing a match)
+@c * gph::         a word-graph annotation tool::
+@c * dgp::         a dependency parser
 
 Sinks: programs which read UTT data and produce output in another format
@@ -722,4 +653,7 @@
 @item @strong{Authors:}                 @tab Tomasz ObrÃªbski
 @item @strong{Component category:}      @tab source
+@item @strong{Input format:}            @tab raw text file
+@item @strong{Output format:}           @tab UTT regular
+@item @strong{Required annotation:}     @tab -
 @end multitable
 
@@ -835,4 +769,7 @@
 @item @strong{Authors:}                 @tab Tomasz ObrÃªbski, MichaÂ³ Stolarski
 @item @strong{Component category:}      @tab filter
+@item @strong{Input format:}            @tab UTT regular
+@item @strong{Output format:}           @tab UTT regular
+@item @strong{Required annotation:}     @tab tok
 @end multitable
 
@@ -1032,26 +969,32 @@
 located by default in:
 
-@file{$HOME/.utt/pl/lem.bin}
+@file{$HOME/.local/share/utt/pl_PL.ISO-8859-2/lem.bin}
+
+in local installation or in
+
+@file{/usr/local/share/utt/pl_PL.ISO-8859-2/lem.bin}
+
+in system installation.
 
 @node lem hints
 @subsection Hints
 
-@c @subsubheading Combining data from multiple dictionaries
-
-@c @itemize
-
-@c @item Apply <dict1>, then apply <dict2> to words which were not annotatated.
-
-@c @example
-@c lem -d <dict1> | lem -S lem -d <dict2>
-@c @end example
-
-@c @item Add annotations from two dictionaries <dict1> and <dict2>.
-
-@c @example
-@c lem -c -d <dict1> | lem -S lem -d <dict2>
-@c @end example
-
-@c @end itemize
+@subsubheading Combining data from multiple dictionaries
+
+@itemize
+
+@item Apply <dict1>, then apply <dict2> to words which were not annotatated.
+
+@example
+lem -d <dict1> | lem -S lem -d <dict2>
+@end example
+
+@item Add annotations from two dictionaries <dict1> and <dict2>.
+
+@example
+lem -c -d <dict1> | lem -S lem -d <dict2>
+@end example
+
+@end itemize
 
 
@@ -1071,12 +1014,18 @@
 @end multitable
 
-@command{gue} guesess morphological descriptions of the form contained
-in the @var{form} field.
-
 @menu
+* gue description::    
 * gue command line options::    
 * gue example::                 
 * gue dictionaries::            
 @end menu
+
+
+@node gue description
+@subsection Description
+
+@command{gue} guesess morphological descriptions of the form contained
+in the @var{form} field.
+
 
 @node gue command line options
@@ -1182,5 +1131,18 @@
 @item @strong{Authors:}                 @tab Tomasz ObrÃªbski, MichaÂ³ Stolarski
 @item @strong{Component category:}      @tab filter
+@item @strong{Input format:}            @tab UTT regular
+@item @strong{Output format:}           @tab UTT regular
+@item @strong{Required annotation:}     @tab tok
 @end multitable
+
+@menu
+* cor description::
+* cor command line options::    
+* cor dictionaries::            
+@end menu
+
+
+@node cor description
+@subsection Description
 
 The spelling corrector applies Kemal Oflazer's dynamic programming
@@ -1189,14 +1151,4 @@
 word form it returns all word forms present in the dictionary whose
 edit distance is smaller than the threshold given as the parameter.
-
-By default @code{cor} replaces the contents of the @var{form} field
-with new corrected value, placing the old contents in the @code{cor}
-field.
-
-
-@menu
-* cor command line options::    
-* cor dictionaries::            
-@end menu
 
 
@@ -1225,4 +1177,8 @@
 Maximum edit distance (default='1').
 
+@c @item @b{@minus{}@minus{}replace, @minus{}r}
+@c Replace original form with corrected form, place original form in the
+@c cor field. This option has no effect in @option{--one-*} modes (default=off)
+
 
 @end table
@@ -1243,4 +1199,27 @@
 @end example
 
+@subsubheading Binary format
+
+The mandatory file name extension for a binary dictionary is @code{bin}. To
+compile a text dictionary into binary format, write:
+
+@example
+compiledic <dictionaryname>.dic
+@end example
+
+@c ---------------------------------------------------------------------
+@c KOR
+@c ---------------------------------------------------------------------
+
+@page
+@node kor
+@section kor - configurable spelling corrector
+
+[TODO]
+
+@c ---------------------------------------------------------------------
+@c SEN
+@c ---------------------------------------------------------------------
+
 @page
 @node sen
@@ -1251,14 +1230,22 @@
 @item @strong{Authors:}                 @tab Tomasz ObrÃªbski
 @item @strong{Component category:}      @tab filter
+@item @strong{Input format:}            @tab UTT regular
+@item @strong{Output format:}           @tab UTT regular
+@item @strong{Required annotation:}     @tab tok
 
 @end multitable
 
-@command{sen} detects sentence boundaries in UTT-formatted texts and marks them with special zero-length segments, in which the @var{type} field may contain the BOS (beginning of sentence) or EOS (end of sentence) annotation. 
 
 @menu
+* sen description::
 @c * sen input::
 @c * sen output::
 * sen example::                 
 @end menu
+
+@node sen description
+@subsection Description
+
+@command{sen} detects sentence boundaries in UTT-formatted texts and marks them with special zero-length segments, in which the @var{type} field may contain the BOS (beginning of sentence) or EOS (end of sentence) annotation. 
 
 @node sen example
@@ -1305,6 +1292,6 @@
 
 
+@c ---------------------------------------------------------------------
 @c SER
-@c ---------------------------------------------------------------------
 @c ---------------------------------------------------------------------
 
@@ -1316,9 +1303,11 @@
 @item @strong{Authors:}                 @tab Tomasz ObrÃªbski
 @item @strong{Component category:}      @tab filter
+@item @strong{Input format:}            @tab UTT regular
+@item @strong{Output format:}           @tab UTT regular
+@item @strong{Required annotation:}     @tab tok,  lem --one-field
 @end multitable
 
-@command{ser} looks for patterns in UTT-formatted texts.
-
 @menu
+* ser description::
 * ser command line options::    
 * ser pattern::                 
@@ -1328,4 +1317,10 @@
 * ser requirements::            
 @end menu
+
+
+@node ser description
+@subsection Description
+
+@command{ser} looks for patterns in UTT-formatted texts.
 
 
@@ -1504,5 +1499,5 @@
 
 @example
-define(`verbseq', `(cat(V) (space cat(V)))')
+define(`verbseq', `(cat(<V>) (space cat(<V>)))')
 @end example
 
@@ -1515,5 +1510,5 @@
 @subsection Limitations
 
-more than 3 attributes in <>.
+Do not use more than 3 attributes in <>.
 
 @node ser requirements
@@ -1533,6 +1528,6 @@
 
 
+@c ---------------------------------------------------------------------
 @c GRP
-@c ---------------------------------------------------------------------
 @c ---------------------------------------------------------------------
 
@@ -1544,26 +1539,12 @@
 @item @strong{Authors:}                 @tab Tomasz ObrÃªbski
 @item @strong{Component category:}      @tab filter
+@item @strong{Input format:}            @tab UTT flattened
+@item @strong{Output format:}           @tab UTT flattened
+@item @strong{Required annotation:}     @tab tok, sen, lem --one-field
 @end multitable
 
 
-@code{gre} selects sentences containing an expression matching a
-pattern. The pattern format is exactly the same as that accepted by
-@code{ser}.
-
-@code{gre} is intended mainly for speeding up corpus search process.
-It is extremely fast (processing speed is usually higher then the speed
-of reading the corpus file from disk). 
-
-
-
-@c @menu
-@c * ser command line options::    
-@c * ser pattern::                 
-@c * ser how ser works::           
-@c * ser customization::           
-@c * ser limitations::             
-@c * ser requirements::            
-@c @end menu
 @menu
+* grp description::
 * grp command line options::    
 * grp pattern::                 
@@ -1571,4 +1552,16 @@
 @end menu
 
+
+@node grp description
+@subsection Description
+
+@code{gre} selects sentences containing an expression matching a
+pattern. The pattern format is exactly the same as that accepted by
+@code{ser}.
+
+@code{gre} is intended mainly for speeding up corpus search process.
+It is extremely fast (processing speed is usually higher then the speed
+of reading the corpus file from disk). 
+
 @node grp command line options
 @subsection Command line options
@@ -1578,8 +1571,4 @@
 @parhelp
 @parversion
-@c @parfile
-@c @paroutput
-@c @parinputfield
-@c @paroutputfield
 @parprocess
 @parinteractive
@@ -1627,8 +1616,24 @@
 
 
-@c ---------------------------------------------------------------------
-@c kot
-@c ---------------------------------------------------------------------
-@c ---------------------------------------------------------------------
+
+@c ---------------------------------------------------------------------
+@c MAR
+@c ---------------------------------------------------------------------
+
+@page
+@node mar
+@section mar
+
+@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
+@item @strong{Authors:}                 @tab Marcin Walas, Tomasz ObrÃªbski
+@item @strong{Component category:}      @tab filter
+@end multitable
+
+[TODO]
+
+@c ---------------------------------------------------------------------
+@c KOT
+@c ---------------------------------------------------------------------
+
 
 @page
@@ -1636,13 +1641,24 @@
 @section kot - untokenizer
 
-Authors: Tomasz ObrÃªbski
-
-@command{kot} is the opposite of @command{tok}. It changes UTT-formatted text into plain text.
+@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
+@item @strong{Authors:}                 @tab Tomasz ObrÃªbski
+@item @strong{Component category:}      @tab filter
+@item @strong{Input format:}            @tab UTT regular
+@item @strong{Output format:}           @tab text
+@item @strong{Required annotation:}     @tab tok
+@end multitable
+
 
 @menu
+* kot description::
 * kot command line options::    
 * kot usage examples::    
 @end menu
 
+@node kot description
+@subsection Description
+
+@command{kot} transforms a UTT formatted file back into raw text format.
+
 @node kot command line options
 @subsection Command line options
@@ -1684,7 +1700,8 @@
 @end example
 
-@c CON............................................................
-@c ...............................................................
-@c ...............................................................
+@c ---------------------------------------------------------------
+@c CON
+@c ---------------------------------------------------------------
+
 
 @page
@@ -1692,17 +1709,26 @@
 @section con - concordance table generator
 
-@command{con} generates a concordance table based on a pattern given to @command{ser}.
-
 @multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
 @item @strong{Authors:}                 @tab Justyna Walkowska
 @item @strong{Component category:}      @tab sink
+@item @strong{Input format:}            @tab UTT regular
+@item @strong{Output format:}           @tab text
+@item @strong{Required annotation:}     @tab ser or mar
 @end multitable
 @c
 
 @menu
+* con description::
 * con command line options::
 * con usage example::
 * con hints::    
 @end menu
+
+
+@node con description
+@subsection Description
+
+@command{con} generates a concordance table based on a pattern given to @command{ser}.
+
 
 @node con command line options
@@ -1758,7 +1784,7 @@
 @item @b{@minus{}@minus{}ignore @minus{}i}            
 	Ignore segment inconsistency in the input.
-@item @b{@minus{}@minus{}bon}            
+@item @b{@minus{}@minus{}bom}            
 	Beginning of selected segment (regex, default='[0-9]+ [0-9]+ BOM .*').
-@item @b{@minus{}@minus{}eob}            
+@item @b{@minus{}@minus{}eom}            
 	End of selected segment (regex, default='[0-9]+ [0-9]+ EOM .*').
 @item @b{@minus{}@minus{}bod}            
@@ -1774,5 +1800,5 @@
 @subsection Usage example
 @example
-cat file.txt | tok | lem -1 | ser -e 'lexeme(dom) | con'  
+cat file.txt | tok | lem -1 | ser -e 'lexeme(dom)' | con  
 @end example
 
@@ -1788,5 +1814,4 @@
 ... | grp -e EXPR | ser -e EXPR | con
 @end example
-