source: _old/nawszelkiwypadek/tools/gue_dic/prep.pl @ 93afab8

Last change on this file since 93afab8 was a6e708f, checked in by tom <tom@…>, 13 years ago

ANULOWANIE POPRZEDNIEGO COMMITU

Revert "Replacing old implementation with working implementation"

This reverts commit 1e121f45e2d091fcd34a893291b8453e350d5884.

Please enter the commit message for your changes. Lines starting
with '#' will be ignored, and an empty message aborts the commit.

Committer: tom <tom@lim.(none)>

On branch master
Changes to be committed:

(use "git reset HEAD <file>..." to unstage)

modified: _old/app/Makefile
deleted: _old/app/conf/Makefile
deleted: _old/app/conf/compiledic.conf
deleted: _old/app/conf/cor.conf
deleted: _old/app/conf/dgc.conf
deleted: _old/app/conf/dgp.conf
deleted: _old/app/conf/gph.conf
deleted: _old/app/conf/grp.conf
deleted: _old/app/conf/gue.conf
deleted: _old/app/conf/kor.conf
deleted: _old/app/conf/lem.conf
deleted: _old/app/conf/mar.conf
deleted: _old/app/conf/ser.conf
deleted: _old/app/conf/utt.conf
modified: _old/app/src/common/Makefile
modified: _old/app/src/compiledic/Makefile
modified: _old/app/src/compiledic/aut2fsa.cc
modified: _old/app/src/cor/Makefile
modified: _old/app/src/dgp/Makefile
new file: _old/app/src/dgp/canonize
new file: _old/app/src/dgp/dgc
modified: _old/app/src/dgp/grammar.hh
modified: _old/app/src/dgp/mgraph.hh
modified: _old/app/src/dgp/sgraph.hh
modified: _old/app/src/dgp/thesymbols.hh
new file: _old/app/src/dgp/tre
modified: _old/app/src/gue/Makefile
modified: _old/app/src/gue/guess.cc
modified: _old/app/src/kor/Makefile
modified: _old/app/src/kor/corlist.cc
modified: _old/app/src/kor/corr.cc
new file: _old/app/src/kor/corr.hh
modified: _old/app/src/kor/main.cc
modified: _old/app/src/lem/Makefile
modified: _old/app/src/lem/lem.cc
modified: _old/app/src/lib/Makefile
modified: _old/app/src/lib/auttools.cc
modified: _old/app/src/lib/symtab.cc
modified: _old/app/src/lib/tft.h
modified: _old/app/src/lib/tfti.h
modified: _old/app/src/lib/ttrans.h
modified: _old/app/src/lib/word.cc
modified: _old/app/src/lib/word.h
modified: _old/app/src/tok.c/Makefile
modified: _old/app/src/tok.c/cmdline_tok.ggo
modified: _old/app/src/tok.c/common_tok.cc
modified: _old/app/src/tok/Makefile
modified: _old/nawszelkiwypadek/tools/aut2fsa
modified: _old/nawszelkiwypadek/tools/cor_dic/makeLabels.pl
modified: _old/nawszelkiwypadek/tools/cor_dic/prep.pl
modified: _old/nawszelkiwypadek/tools/fsm2aut
modified: _old/nawszelkiwypadek/tools/gue_dic/canon.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/compile_user_dict.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/count_prefs.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/cut_prefs.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/makeLabels.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/prep.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/prep_user_dict.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/rmDup.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/stat.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/stat_pre.pl
modified: _old/nawszelkiwypadek/tools/lem_dic/makeLabels.pl
modified: _old/nawszelkiwypadek/tools/lem_dic/prep.pl
modified: auto/defaults
modified: auto/options
modified: auto/output/Makefile
modified: auto/output/config_h
modified: auto/summary
modified: configure

  • Property mode set to 100755
File size: 3.6 KB
RevLine 
[f1563c0]1#! /usr/bin/perl
2
3use locale;
4
5$linesPerFile = 20000;
6
7if (@ARGV < 1) {
8    print "usage: prep.pl dictionary_file\n";
9    exit;
10}
11
12$file = shift; # @ARGV;
13$kind = shift;
14
15if ($kind eq "") {
16    $kind="suf";
17}
18
19# Przygotowanie etykiet
20
21`makeLabels.pl > labels.sym`;
22
23`lexmakelab labels`;
24
25# Analiza pliku s³ownika
26
27
28print "Kanonizujê opisy.........................................";
29
30`canon.pl <$file >temp2`;
31
32print "OK\n";
33
34print "Analizuje prefiksy.......................................";
35
36`count_prefs.pl 2 4 < temp2 > prefs`;
37`sort -k1,1 -k3,3nr prefs > prefsS`;
38`cut_prefs.pl 0.5 0.01 100 prefsS > prefs`;
39`rm prefsS`;
40
41print "OK\n";
42
43print "Analizujê plik s³ownika";
44
45if ($kind eq "pre") {
46    print "(pre).............................";
47    `stat_pre.pl temp2 > temp1`;
48} else {
49    print "(suf).............................";
50    `stat.pl prefs < temp2 > temp1`;
51}
52
53print "OK\n";
54
55# zmniejszamy plik...
56
57print "Sortujê plik.............................................";
58
59`sort -t \\~ -k1,1 -k2,2nr <temp1 > temp2`;
60
61print "OK\n";
62
63print "Minimalizujê plik s³ownika...............................";
64
65`rmDup.pl < temp2 > temp1`;
66#`rmDup2.pl < temp1 > temp2`;
67
68`cp temp1 temp2`;
69
70`rm temp1`;
71
72print "OK\n";
73
74#dzielimy plik na wiele czê¶ci, uruchamiamy lexcomplex dla ka¿dej
75#czê¶ci osobno, nastêpnie ³±czymy to za pomoc± programu fsmunion
76
77print "Dzielê s³ownik na mniejsze czê¶ci........................";
78
79open(IN, "./temp2");
80
81$lineCount = 0;
82$fileCount = 0;
83
84`mkdir LemTEMP`;
85
86open(FILE, ">LemTEMP/slo_0");
87
88while (<IN>) {
89
90    if (++$lineCount >= $linesPerFile) {
91        $fileCount++;
92        $lineCount = 0;
93
94        close(FILE);
95#       print "Tworzê nowy plik tymczasowy: slo_".$fileCount."\n";
96        open(FILE, ">LemTEMP/slo_".$fileCount);
97    }
98
99    print(FILE $_);
100}
101
102print "OK\n";
103
104print "Tworzê automaty po¶rednie";
105
106#32 kropki, fileCount plikow
107$filesPerDot = $fileCount/32;
108$files=$filesPerDot;
109$dots=0;
110
111for ($i=0; $i<=$fileCount; $i++) {
112
113    if ($files >= $filesPerDot) {
114        $files = 0;
115        print ".";
116        $dots++;
117    }
118    $files++;
119
120    $command = "lexcomplex -l labels.lab -S labels.scl < LemTEMP/slo_".$i." > LemTEMP/slownik_".$i.".fsm";
121
122    `$command`;
123
124}
125if ($dots < 32) {
126    for ($i=0; $i<32 - $dots; $i++) {
127        print ".";
128    }
129}
130
131print "OK\n";
132
133`rm LemTEMP/slo_*`;
134
135print "Tworzê automat koñcowy";
136
137#35 kropek...
138$filesPerDot = $fileCount/35;
139$files=$filesPerDot;
140$dots=0;
141
142`cp LemTEMP/slownik_0.fsm slownik1.fsm`;
143
144for ($i=1; $i<=$filecount; $i++) {
145
146    if ($files >= $filesPerDot) {
147        $files = 0;
148        print ".";
149        $dots++;
150    }
151    $files++;
152
153    $command = "fsmunion LemTEMP/slownik_".$i." slownik1.fsm > slownik2.fsm";
154
155    `$command`;
156
157    `mv slownik2.fsm slownik1.fsm`;
158}
159
160if ($dots < 35) {
161    for ($i=0; $i<35 - $dots; $i++) {
162        print ".";
163    }
164}
165
166`fsmunion LemTEMP/* > slownik1.fsm`;
167
168print "OK\n";
169
170print "Usuwam epsilon-przejscia.................................";
171
172`fsmrmepsilon slownik1.fsm > slownik2.fsm`;
173
174`rm slownik1.fsm`;
175
176print "OK\n";
177
178print "Determinizujê automat....................................";
179
180`fsmdeterminize slownik2.fsm > slownik1.fsm`;
181
182`rm slownik2.fsm`;
183
184print "OK\n";
185
186print "Minimalizujê automat.....................................";
187
188`fsmminimize slownik1.fsm > slownik.fsm`;
189
190`rm slownik1.fsm`;
191
192print "OK\n";
193
194print "Konwertujê automat do formatu fsa........................";
195
196`fsmprint -i labels.lab slownik.fsm > slownik.txt`;
197
198`../fsm2aut slownik.txt > slownik.aut`;
199
200`../aut2fsa < slownik.aut > gue.bin`;
201
202print "OK\n";
203
204print "Czyszczê pliki pomocnicze................................";
205
206`rm LemTEMP/*`;
207`rmdir LemTEMP`;
208`rm temp2`;
209`rm slownik.fsm`;
210`rm slownik.txt`;
211`rm slownik.aut`;
212
213print "OK\n";
Note: See TracBrowser for help on using the repository browser.