source: src/dgp/canonize @ cfdf333

Last change on this file since cfdf333 was 5f4d9c3, checked in by Maciej Prill <mprill@…>, 13 years ago

Rewritten the build system, added lem UTF-8 version.

  • Property mode set to 100755
File size: 622 bytes
RevLine 
[5f4d9c3]1#!/usr/bin/perl
2
3#package:       UAM TExt Tools
4#component:     canonize
5#version:       1.0
6#author:        Tomasz Obrebski
7
8use lib "/usr/local/lib/utt";
9use lib "$ENV{'HOME'}/.local/lib/utt";
10
11use strict;
12use Getopt::Long;
13use attr;
14
15
16my $help;
17
18GetOptions("help|h" => \$help);
19
20if($help)
21{
22    print <<'END'
23
24Transforms syntactic categories to their canonical form.
25
26Usage: canonize
27
28Options:
29   --help -h                    Help.
30
31END
32;
33    exit 0;
34}
35
36#$|=1;
37
38my %tra;
39
40while(<>)
41{
42    s/$attr::pos_re\/$attr::avlist_re/trans($&)/ge;
43    print;
44}
45
46sub trans
47{
48    my $cat=shift;
49    exists($tra{$cat}) ? $tra{$cat} : ( $tra{$cat} = attr::canonize $cat );
50}
Note: See TracBrowser for help on using the repository browser.