Index: src/common/common.cc
===================================================================
--- src/common/common.cc	(revision cfdf333a512778a753f04c7ce616075f6059e7bb)
+++ src/common/common.cc	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
@@ -168,5 +168,5 @@
     if(!(outputf=fopen(args->output_arg,"w")))
     {
-      fprintf(stderr,"Cannot open output file: %s.\n", args->output_arg);
+      fprintf(stderr,"Cannot open the output file: %s.\n", args->output_arg);
       exit(1);
     }
@@ -175,5 +175,5 @@
       if(!(failedf=fopen(args->fail_arg,"w")))
       {
-	fprintf(stderr,"Cannot open output file: %s.\n", args->fail_arg);
+	fprintf(stderr,"Cannot open the output file: %s.\n", args->fail_arg);
 	exit(1);
       }
Index: src/compdic/Makefile
===================================================================
--- src/compdic/Makefile	(revision e7de6cc88c605c4f810cbc852e843294b4b0e8ac)
+++ src/compdic/Makefile	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
@@ -16,4 +16,10 @@
 ifdef BIN_DIR
 	install -m 0755 compdic $(BIN_DIR)
+	install -m 0755 compdic-create-fst $(BIN_DIR)
+	install -m 0755 compdic-fst-add $(BIN_DIR)
+	install -m 0755 compdic-fst-minus $(BIN_DIR)
+	install -m 0755 compdic-fst-to-bin $(BIN_DIR)
+	install -m 0755 compdic-fst-update $(BIN_DIR)
+
 	install -m 0755 fsm2aut $(BIN_DIR)
 	install -m 0755 aut2fsa $(BIN_DIR)
@@ -25,4 +31,9 @@
 ifdef BIN_DIR
 	rm $(BIN_DIR)/compdic
+	rm $(BIN_DIR)/compdic-create-fst
+	rm $(BIN_DIR)/compdic-fst-add
+	rm $(BIN_DIR)/compdic-fst-minus
+	rm $(BIN_DIR)/compdic-fst-to-bin
+	rm $(BIN_DIR)/compdic-fst-update
 	rm $(BIN_DIR)/fsm2aut
 	rm $(BIN_DIR)/aut2fsa
Index: src/compdic/compdic-create-fst
===================================================================
--- src/compdic/compdic-create-fst	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
+++ src/compdic/compdic-create-fst	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
@@ -0,0 +1,181 @@
+
+no_of_parts=0
+
+while [ $# -gt 2 ]
+do
+  case $1
+  in
+    -p)
+      no_of_parts=$2
+      shift 2
+    ;;
+
+    *)
+      echo "The arguments to use are"
+      echo "-p: number of parts"
+      shift 1
+    ;;
+  esac
+done
+
+if [ $# -lt 2 ]
+then
+    echo "Usage:"
+    echo "        compdic [-p <parts>] <wordlist> <automaton>"
+    echo "where"
+    echo "    <wordlist> - file containig a list of words, one per line, iso-8859-2 encoded"
+    echo "    <automaton> - a file to which the compiled automaton in openfst format shoul be written"
+    exit 0
+fi	
+
+if [ $no_of_parts -eq 0 ]
+then
+    no_of_parts=$(( `cat $1 | wc -l` / 75000 + 1 ))
+fi
+
+
+echo number of parts: $no_of_parts
+
+
+tempdir=`mktemp -d /tmp/compdic.XXXXXX`
+
+alphabet=`tempfile -d $tempdir`
+
+cat <<EOF > $alphabet
+<eps> 0
+a 1
+A 2
+ä 3
+± 4
+¡ 5
+b 6
+B 7
+c 8
+C 9
+æ 10
+Æ 11
+d 12
+D 13
+e 14
+E 15
+é 16
+ê 17
+Ê 18
+f 19
+F 20
+g 21
+G 22
+h 23
+H 24
+i 25
+I 26
+j 27
+J 28
+k 29
+K 30
+l 31
+L 32
+³ 33
+£ 34
+m 35
+M 36
+n 37
+N 38
+ñ 39
+Ñ 40
+o 41
+O 42
+ö 43
+ó 44
+Ó 45
+p 46
+P 47
+q 48
+Q 49
+r 50
+R 51
+s 52
+S 53
+¶ 54
+Š 55
+t 56
+T 57
+u 58
+U 59
+ü 60
+v 61
+V 62
+w 63
+W 64
+x 65
+X 66
+y 67
+Y 68
+z 69
+Z 70
+Œ 71
+¬ 72
+¿ 73
+¯ 74
+0 75
+1 76
+2 77
+3 78
+4 79
+5 80
+6 81
+7 82
+8 83
+9 84
+_ 85
+- 86
+? 87
+! 88
+~ 89
+; 90
+, 91
+/ 92
+* 93
++ 94
+EOF
+
+
+no_of_lines=$(( (`cat $1 | wc -l` / $no_of_parts) + 1 ))
+
+split -l $no_of_lines $1 $tempdir/part.
+
+automaton=$tempdir/output.fst
+
+cat <<EOF | fstcompile --acceptor --isymbols=$alphabet > $automaton
+EOF
+
+n=0
+
+for f in $tempdir/part.*
+do
+    temp1=`tempfile -d $tempdir`
+    temp2=`tempfile -d $tempdir`
+    temp3=`tempfile -d $tempdir`
+
+    n=$(( $n + 1 ))
+    echo processing part $n
+
+    cat $f |\
+    lst2fstext |\
+    fstcompile --acceptor --isymbols=$alphabet |\
+    fstrmepsilon |\
+    fstdeterminize > $temp1
+    fstminimize $temp1 $temp2
+
+    fstunion $automaton $temp2 | fstrmepsilon | fstdeterminize > $temp3
+    fstminimize $temp3 $automaton
+done
+
+echo generating binary automaton file ...
+
+cat $automaton | fsttopsort > $2
+rm -r $tempdir
+
+#echo generating cats file ...
+
+#cat $1 | cut -d ',' -f 2 | sort -u $1.cats
Index: src/compdic/compdic-fst-add
===================================================================
--- src/compdic/compdic-fst-add	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
+++ src/compdic/compdic-fst-add	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
@@ -0,0 +1,18 @@
+
+if [ $# -ne 2 ]
+then
+    echo "Usage:"
+    echo "        compdic-fst-add <automaton1> <automaton2>"
+    echo "where"
+    echo "    <automaton1> - automaton in openfst format"
+    echo "    <automaton2> - automaton in openfst format containing paths to be removed from <automaton1>"
+    exit 0
+fi	
+
+tempdir=`mktemp -d /tmp/compdic.XXXXXX`
+
+automaton=$tempdir/output.fst
+
+fstunion $1 $2 | fstrmepsilon | fstdeterminize | fstminimize | fsttopsort > $automaton
+mv $automaton $1
+rm -r $tempdir
Index: src/compdic/compdic-fst-minus
===================================================================
--- src/compdic/compdic-fst-minus	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
+++ src/compdic/compdic-fst-minus	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
@@ -0,0 +1,18 @@
+
+if [ $# -ne 2 ]
+then
+    echo "Usage:"
+    echo "        compdic-fst-remove <automaton1> <automaton2>"
+    echo "where"
+    echo "    <automaton1> - automaton in openfst format"
+    echo "    <automaton2> - automaton in openfst format containing paths to be removed from <automaton1>"
+    exit 0
+fi	
+
+tempdir=`mktemp -d /tmp/compdic.XXXXXX`
+
+automaton=$tempdir/output.fst
+
+fstdifference $1 $2 | fsttopsort > $automaton
+mv $automaton $1
+rm -r $tempdir
Index: src/compdic/compdic-fst-to-bin
===================================================================
--- src/compdic/compdic-fst-to-bin	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
+++ src/compdic/compdic-fst-to-bin	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
@@ -0,0 +1,120 @@
+
+
+if [ $# -lt 2 ]
+then
+    echo "Usage:"
+    echo "        compdic-fst-to-bin <fstautomaton> <binautomaton>"
+    echo "where"
+    echo "    <fstautomaton> - file containig automaton in openfst format"
+    echo "    <binautomaton> - a file to which the compiled binary automaton (lem/cor/kor format) shoul be written"
+    exit 0
+fi	
+
+tempdir=`mktemp -d /tmp/compdic.XXXXXX`
+
+alphabet=`tempfile -d $tempdir`
+
+cat <<EOF > $alphabet
+<eps> 0
+a 1
+A 2
+ä 3
+± 4
+¡ 5
+b 6
+B 7
+c 8
+C 9
+æ 10
+Æ 11
+d 12
+D 13
+e 14
+E 15
+é 16
+ê 17
+Ê 18
+f 19
+F 20
+g 21
+G 22
+h 23
+H 24
+i 25
+I 26
+j 27
+J 28
+k 29
+K 30
+l 31
+L 32
+³ 33
+£ 34
+m 35
+M 36
+n 37
+N 38
+ñ 39
+Ñ 40
+o 41
+O 42
+ö 43
+ó 44
+Ó 45
+p 46
+P 47
+q 48
+Q 49
+r 50
+R 51
+s 52
+S 53
+¶ 54
+Š 55
+t 56
+T 57
+u 58
+U 59
+ü 60
+v 61
+V 62
+w 63
+W 64
+x 65
+X 66
+y 67
+Y 68
+z 69
+Z 70
+Œ 71
+¬ 72
+¿ 73
+¯ 74
+0 75
+1 76
+2 77
+3 78
+4 79
+5 80
+6 81
+7 82
+8 83
+9 84
+_ 85
+- 86
+? 87
+! 88
+~ 89
+; 90
+, 91
+/ 92
+* 93
++ 94
+EOF
+
+cat $1 | fstrmepsilon | fstdeterminize | fstminimize | fstprint --acceptor --isymbols=$alphabet | fsm2aut | aut2fsa > $2
+rm -r $tempdir
+
+#echo generating cats file ...
+
+#cat $1 | cut -d ',' -f 2 | sort -u $2.cats
Index: src/compdic/compdic-fst-update
===================================================================
--- src/compdic/compdic-fst-update	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
+++ src/compdic/compdic-fst-update	(revision f4bf33ed04eb342d17a377035e58f078b78159c3)
@@ -0,0 +1,43 @@
+
+if [ $# -lt 2 ]
+then
+    echo "Usage:"
+    echo "        compdic-fst-update <dictionary> <difference> <difference> ..."
+    echo "where"
+    echo "    <dictionary> - file containig a list of words, one per line, iso-8859-2 encoded"
+    echo "    <difference> - a file to which the compiled automaton (cor/kor format) shoul be written"
+    exit 0
+fi	
+
+tempdir=`mktemp -d /tmp/compdic.XXXXXX`
+
+dict=$1
+shift
+
+dicplus=$tempdir/plus.dic
+fstplus=$tempdir/plus.fst
+dicminus=$tempdir/minus.dic
+fstminus=$tempdir/minus.fst
+tmpfst=$tempdir/tmp.fst
+
+touch $dicplus
+touch $dicminus
+
+while (($#))
+do
+    echo processing $1 ...
+    cat $1 | egrep '^>' | sed -r 's/^> *//' >> $dicplus
+    cat $1 | egrep '^<' | sed -r 's/^< *//' >> $dicminus
+    shift
+done
+
+echo updating $dict ...
+compdic-create-fst $dicplus $fstplus
+compdic-create-fst $dicminus $fstminus
+fstdifference $dict $fstminus | fstdeterminize > $tmpfst
+fstunion $tmpfst $fstplus | fstdeterminize | fstminimize > $tmpfst
+mv ${dict} ${dict}~
+mv $tmpfst ${dict}
+
+
+rm -r $tempdir
