source: src/dgp.old/canonize @ 555c7f8

Last change on this file since 555c7f8 was e7de6cc, checked in by Tomasz Obrebski <to@…>, 13 years ago

new version of dgp
added dgc, tre and compdic components
compiledic renamed to compdic_utf8
./configure updated

  • Property mode set to 100755
File size: 622 bytes
Line 
1#!/usr/bin/perl
2
3#package:       UAM TExt Tools
4#component:     canonize
5#version:       1.0
6#author:        Tomasz Obrebski
7
8use lib "/usr/local/lib/utt";
9use lib "$ENV{'HOME'}/.local/lib/utt";
10
11use strict;
12use Getopt::Long;
13use attr;
14
15
16my $help;
17
18GetOptions("help|h" => \$help);
19
20if($help)
21{
22    print <<'END'
23
24Transforms syntactic categories to their canonical form.
25
26Usage: canonize
27
28Options:
29   --help -h                    Help.
30
31END
32;
33    exit 0;
34}
35
36#$|=1;
37
38my %tra;
39
40while(<>)
41{
42    s/$attr::pos_re\/$attr::avlist_re/trans($&)/ge;
43    print;
44}
45
46sub trans
47{
48    my $cat=shift;
49    exists($tra{$cat}) ? $tra{$cat} : ( $tra{$cat} = attr::canonize $cat );
50}
Note: See TracBrowser for help on using the repository browser.