|
Last change
on this file since 555c7f8 was
f4bf33e,
checked in by Tomasz Obrebski <to@…>, 14 years ago
|
|
dodane programy do uaktualniania slownika form (compdic-*)
|
-
Property mode set to
100755
|
|
File size:
1.0 KB
|
| Rev | Line | |
|---|
| [f4bf33e] | 1 | |
|---|
| 2 | if [ $# -lt 2 ] |
|---|
| 3 | then |
|---|
| 4 | echo "Usage:" |
|---|
| 5 | echo " compdic-fst-update <dictionary> <difference> <difference> ..." |
|---|
| 6 | echo "where" |
|---|
| 7 | echo " <dictionary> - file containig a list of words, one per line, iso-8859-2 encoded" |
|---|
| 8 | echo " <difference> - a file to which the compiled automaton (cor/kor format) shoul be written" |
|---|
| 9 | exit 0 |
|---|
| 10 | fi |
|---|
| 11 | |
|---|
| 12 | tempdir=`mktemp -d /tmp/compdic.XXXXXX` |
|---|
| 13 | |
|---|
| 14 | dict=$1 |
|---|
| 15 | shift |
|---|
| 16 | |
|---|
| 17 | dicplus=$tempdir/plus.dic |
|---|
| 18 | fstplus=$tempdir/plus.fst |
|---|
| 19 | dicminus=$tempdir/minus.dic |
|---|
| 20 | fstminus=$tempdir/minus.fst |
|---|
| 21 | tmpfst=$tempdir/tmp.fst |
|---|
| 22 | |
|---|
| 23 | touch $dicplus |
|---|
| 24 | touch $dicminus |
|---|
| 25 | |
|---|
| 26 | while (($#)) |
|---|
| 27 | do |
|---|
| 28 | echo processing $1 ... |
|---|
| 29 | cat $1 | egrep '^>' | sed -r 's/^> *//' >> $dicplus |
|---|
| 30 | cat $1 | egrep '^<' | sed -r 's/^< *//' >> $dicminus |
|---|
| 31 | shift |
|---|
| 32 | done |
|---|
| 33 | |
|---|
| 34 | echo updating $dict ... |
|---|
| 35 | compdic-create-fst $dicplus $fstplus |
|---|
| 36 | compdic-create-fst $dicminus $fstminus |
|---|
| 37 | fstdifference $dict $fstminus | fstdeterminize > $tmpfst |
|---|
| 38 | fstunion $tmpfst $fstplus | fstdeterminize | fstminimize > $tmpfst |
|---|
| 39 | mv ${dict} ${dict}~ |
|---|
| 40 | mv $tmpfst ${dict} |
|---|
| 41 | |
|---|
| 42 | |
|---|
| 43 | rm -r $tempdir |
|---|
Note: See
TracBrowser
for help on using the repository browser.