source: src/compdic/compdic-fst-update @ f4bf33e

Last change on this file since f4bf33e was f4bf33e, checked in by Tomasz Obrebski <to@…>, 12 years ago

dodane programy do uaktualniania slownika form (compdic-*)

  • Property mode set to 100755
File size: 1.0 KB
Line 
1
2if [ $# -lt 2 ]
3then
4    echo "Usage:"
5    echo "        compdic-fst-update <dictionary> <difference> <difference> ..."
6    echo "where"
7    echo "    <dictionary> - file containig a list of words, one per line, iso-8859-2 encoded"
8    echo "    <difference> - a file to which the compiled automaton (cor/kor format) shoul be written"
9    exit 0
10fi     
11
12tempdir=`mktemp -d /tmp/compdic.XXXXXX`
13
14dict=$1
15shift
16
17dicplus=$tempdir/plus.dic
18fstplus=$tempdir/plus.fst
19dicminus=$tempdir/minus.dic
20fstminus=$tempdir/minus.fst
21tmpfst=$tempdir/tmp.fst
22
23touch $dicplus
24touch $dicminus
25
26while (($#))
27do
28    echo processing $1 ...
29    cat $1 | egrep '^>' | sed -r 's/^> *//' >> $dicplus
30    cat $1 | egrep '^<' | sed -r 's/^< *//' >> $dicminus
31    shift
32done
33
34echo updating $dict ...
35compdic-create-fst $dicplus $fstplus
36compdic-create-fst $dicminus $fstminus
37fstdifference $dict $fstminus | fstdeterminize > $tmpfst
38fstunion $tmpfst $fstplus | fstdeterminize | fstminimize > $tmpfst
39mv ${dict} ${dict}~
40mv $tmpfst ${dict}
41
42
43rm -r $tempdir
Note: See TracBrowser for help on using the repository browser.