source: src/compdic/compdic-fst-to-bin @ e0cd003

Last change on this file since e0cd003 was 555c7f8, checked in by Tomasz Obrebski <to@…>, 12 years ago

dodanie mo�liwo�ci szybkiego uaktualniania s�ownika dla lema

  • Property mode set to 100755
File size: 1.1 KB
Line 
1#!/bin/bash
2
3if [ $# -lt 2 ]
4then
5    echo "Usage:"
6    echo "        compdic-fst-to-bin <fstautomaton> <binautomaton>"
7    echo "where"
8    echo "    <fstautomaton> - file containig automaton in openfst format"
9    echo "    <binautomaton> - a file to which the compiled binary automaton (lem/cor/kor format) shoul be written"
10    exit 0
11fi     
12
13tempdir=`mktemp -d /tmp/compdic.XXXXXX`
14
15alphabet=`tempfile -d $tempdir`
16
17cat <<EOF > $alphabet
18<eps> 0
19a 1
20A 2
21ä 3
22± 4
23¡ 5
24b 6
25B 7
26c 8
27C 9
28æ 10
29Æ 11
30d 12
31D 13
32e 14
33E 15
34é 16
35ê 17
36Ê 18
37f 19
38F 20
39g 21
40G 22
41h 23
42H 24
43i 25
44I 26
45j 27
46J 28
47k 29
48K 30
49l 31
50L 32
51³ 33
52£ 34
53m 35
54M 36
55n 37
56N 38
57ñ 39
58Ñ 40
59o 41
60O 42
61ö 43
62ó 44
63Ó 45
64p 46
65P 47
66q 48
67Q 49
68r 50
69R 51
70s 52
71S 53
72¶ 54
73Š 55
74t 56
75T 57
76u 58
77U 59
78ü 60
79v 61
80V 62
81w 63
82W 64
83x 65
84X 66
85y 67
86Y 68
87z 69
88Z 70
89Œ 71
90¬ 72
91¿ 73
92¯ 74
930 75
941 76
952 77
963 78
974 79
985 80
996 81
1007 82
1018 83
1029 84
103_ 85
104- 86
105? 87
106! 88
107~ 89
108; 90
109, 91
110/ 92
111* 93
112+ 94
113EOF
114
115cat $1 | fstrmepsilon | fstdeterminize | fstminimize | fstprint --acceptor --isymbols=$alphabet | fsm2aut | aut2fsa > $2
116rm -r $tempdir
117
118#echo generating cats file ...
119
120#cat $1 | cut -d ',' -f 2 | sort -u $2.cats
Note: See TracBrowser for help on using the repository browser.