source: _old/nawszelkiwypadek/tools/gue_dic/compile_user_dict.pl @ 93afab8

Last change on this file since 93afab8 was a6e708f, checked in by tom <tom@…>, 13 years ago

ANULOWANIE POPRZEDNIEGO COMMITU

Revert "Replacing old implementation with working implementation"

This reverts commit 1e121f45e2d091fcd34a893291b8453e350d5884.

Please enter the commit message for your changes. Lines starting
with '#' will be ignored, and an empty message aborts the commit.

Committer: tom <tom@lim.(none)>

On branch master
Changes to be committed:

(use "git reset HEAD <file>..." to unstage)

modified: _old/app/Makefile
deleted: _old/app/conf/Makefile
deleted: _old/app/conf/compiledic.conf
deleted: _old/app/conf/cor.conf
deleted: _old/app/conf/dgc.conf
deleted: _old/app/conf/dgp.conf
deleted: _old/app/conf/gph.conf
deleted: _old/app/conf/grp.conf
deleted: _old/app/conf/gue.conf
deleted: _old/app/conf/kor.conf
deleted: _old/app/conf/lem.conf
deleted: _old/app/conf/mar.conf
deleted: _old/app/conf/ser.conf
deleted: _old/app/conf/utt.conf
modified: _old/app/src/common/Makefile
modified: _old/app/src/compiledic/Makefile
modified: _old/app/src/compiledic/aut2fsa.cc
modified: _old/app/src/cor/Makefile
modified: _old/app/src/dgp/Makefile
new file: _old/app/src/dgp/canonize
new file: _old/app/src/dgp/dgc
modified: _old/app/src/dgp/grammar.hh
modified: _old/app/src/dgp/mgraph.hh
modified: _old/app/src/dgp/sgraph.hh
modified: _old/app/src/dgp/thesymbols.hh
new file: _old/app/src/dgp/tre
modified: _old/app/src/gue/Makefile
modified: _old/app/src/gue/guess.cc
modified: _old/app/src/kor/Makefile
modified: _old/app/src/kor/corlist.cc
modified: _old/app/src/kor/corr.cc
new file: _old/app/src/kor/corr.hh
modified: _old/app/src/kor/main.cc
modified: _old/app/src/lem/Makefile
modified: _old/app/src/lem/lem.cc
modified: _old/app/src/lib/Makefile
modified: _old/app/src/lib/auttools.cc
modified: _old/app/src/lib/symtab.cc
modified: _old/app/src/lib/tft.h
modified: _old/app/src/lib/tfti.h
modified: _old/app/src/lib/ttrans.h
modified: _old/app/src/lib/word.cc
modified: _old/app/src/lib/word.h
modified: _old/app/src/tok.c/Makefile
modified: _old/app/src/tok.c/cmdline_tok.ggo
modified: _old/app/src/tok.c/common_tok.cc
modified: _old/app/src/tok/Makefile
modified: _old/nawszelkiwypadek/tools/aut2fsa
modified: _old/nawszelkiwypadek/tools/cor_dic/makeLabels.pl
modified: _old/nawszelkiwypadek/tools/cor_dic/prep.pl
modified: _old/nawszelkiwypadek/tools/fsm2aut
modified: _old/nawszelkiwypadek/tools/gue_dic/canon.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/compile_user_dict.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/count_prefs.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/cut_prefs.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/makeLabels.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/prep.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/prep_user_dict.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/rmDup.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/stat.pl
modified: _old/nawszelkiwypadek/tools/gue_dic/stat_pre.pl
modified: _old/nawszelkiwypadek/tools/lem_dic/makeLabels.pl
modified: _old/nawszelkiwypadek/tools/lem_dic/prep.pl
modified: auto/defaults
modified: auto/options
modified: auto/output/Makefile
modified: auto/output/config_h
modified: auto/summary
modified: configure

  • Property mode set to 100755
File size: 3.3 KB
Line 
1#! /usr/bin/env perl
2
3use locale;
4#use strict;
5
6#
7##################################################
8$linesPerFile = 20000;
9
10if (@ARGV < 1) {
11    print "usage: prep_user_dict.pl dictionary_file\n";
12    exit;
13}
14
15$file = shift; # @ARGV;
16
17# Przygotowanie etykiet
18
19`makeLabels.pl > labels.sym`;
20
21`lexmakelab labels`;
22
23# Analiza pliku s³ownika
24
25
26print "Kanonizujê opisy.........................................";
27
28`canon.pl <$file >temp1`;
29
30print "OK\n";
31
32print "Sortujê plik.............................................";
33
34`sort -t \\~ -k1,1 -k2,2nr <temp1 > temp2`;
35
36print "OK\n";
37
38print "Minimalizujê plik s³ownika...............................";
39
40#`rmDup.pl < temp2 > temp1`;
41#`rmDup2.pl < temp1 > temp2`;
42
43`cp temp1 temp2`;
44
45`rm temp1`;
46
47print "OK\n";
48
49print "Czyszczê pliki...........................................";
50
51`sed -r "s/([[:punct:]])/[\\1]/g" < temp2 > temp1`;
52
53`cp temp1 temp2`;
54`rm temp1`;
55
56print "OK\n";
57
58#dzielimy plik na wiele czê¶ci, uruchamiamy lexcomplex dla ka¿dej
59#czê¶ci osobno, nastêpnie ³±czymy to za pomoc± programu fsmunion
60
61print "Dzielê s³ownik na mniejsze czê¶ci........................";
62
63open(IN, "./temp2");
64
65$lineCount = 0;
66$fileCount = 0;
67
68`mkdir LemTEMP`;
69
70open(FILE, ">LemTEMP/slo_0");
71
72while (<IN>) {
73
74    if (++$lineCount >= $linesPerFile) {
75        $fileCount++;
76        $lineCount = 0;
77
78        close(FILE);
79#       print "Tworzê nowy plik tymczasowy: slo_".$fileCount."\n";
80        open(FILE, ">LemTEMP/slo_".$fileCount);
81    }
82
83    print(FILE $_);
84}
85
86print "OK\n";
87
88print "Tworzê automaty po¶rednie";
89
90#32 kropki, fileCount plikow
91$filesPerDot = $fileCount/32;
92$files=$filesPerDot;
93$dots=0;
94
95for ($i=0; $i<=$fileCount; $i++) {
96
97    if ($files >= $filesPerDot) {
98        $files = 0;
99        print ".";
100        $dots++;
101    }
102    $files++;
103
104    $command = "lexcomplex -l labels.lab -S labels.scl < LemTEMP/slo_".$i." > LemTEMP/slownik_".$i.".fsm";
105
106    `$command`;
107
108}
109if ($dots < 32) {
110    for ($i=0; $i<32 - $dots; $i++) {
111        print ".";
112    }
113}
114
115print "OK\n";
116
117`rm LemTEMP/slo_*`;
118
119print "Tworzê automat koñcowy";
120
121#35 kropek...
122$filesPerDot = $fileCount/35;
123$files=$filesPerDot;
124$dots=0;
125
126`cp LemTEMP/slownik_0.fsm slownik1.fsm`;
127
128for ($i=1; $i<=$filecount; $i++) {
129
130    if ($files >= $filesPerDot) {
131        $files = 0;
132        print ".";
133        $dots++;
134    }
135    $files++;
136
137    $command = "fsmunion LemTEMP/slownik_".$i." slownik1.fsm > slownik2.fsm";
138
139    `$command`;
140
141    `mv slownik2.fsm slownik1.fsm`;
142}
143
144if ($dots < 35) {
145    for ($i=0; $i<35 - $dots; $i++) {
146        print ".";
147    }
148}
149
150`fsmunion LemTEMP/* > slownik1.fsm`;
151
152print "OK\n";
153
154print "Usuwam epsilon-przejscia.................................";
155
156`fsmrmepsilon slownik1.fsm > slownik2.fsm`;
157
158`rm slownik1.fsm`;
159
160print "OK\n";
161
162print "Determinizujê automat....................................";
163
164`fsmdeterminize slownik2.fsm > slownik1.fsm`;
165
166`rm slownik2.fsm`;
167
168print "OK\n";
169
170print "Minimalizujê automat.....................................";
171
172`fsmminimize slownik1.fsm > slownik.fsm`;
173
174`rm slownik1.fsm`;
175
176print "OK\n";
177
178print "Konwertujê automat do formatu fsa........................";
179
180`fsmprint -i labels.lab slownik.fsm > slownik.txt`;
181
182`../fsm2aut slownik.txt > slownik.aut`;
183
184`../aut2fsa < slownik.aut > gue.bin`;
185
186print "OK\n";
187
188print "Czyszczê pliki pomocnicze................................";
189
190`rm LemTEMP/*`;
191`rmdir LemTEMP`;
192`rm temp2`;
193`rm slownik.fsm`;
194`rm slownik.txt`;
195`rm slownik.aut`;
196
197print "OK\n";
Note: See TracBrowser for help on using the repository browser.