#!/usr/bin/perl #package: UAM Text Tools #component: dgc (dg compiler) #version: 1.0 #author: Tomasz Obrebski # wymaga niejawnie programu canonize!!!! #use lib "ENV{HOME}/.utt/lib/perl"; use strict; use Getopt::Long; use Data::Dumper; use attr; use File::HomeDir; my $systemconfigfile='/usr/local/etc/utt/dgc.conf'; my $userconfigfile=home()."/.utt/dgc.conf"; Getopt::Long::Configure('no_ignore_case_always'); my $help=0; my $catfile=0; my $dicfile=0; my $gramfile=0; my $outputfile=0; #read configuration files########################### my $file; foreach $file ($systemconfigfile, $userconfigfile){ if(open(CONFIG, $file)){ while () { chomp; s/#.*//; s/^\s+//; s/\s+$//; next unless length; my ($name, $value) = split(/\s*=\s*/, $_, 2); if(($name eq "catfile")or($name eq "c")){ $catfile=$value; } elsif(($name eq "dicfile")or($name eq "d")){ $dicfile=$value; } elsif(($name eq "gramfile")or($name eq "g")){ $gramfile=$value; } elsif(($name eq "outputfile")or($name eq "o")){ $outputfile=$value; } elsif(($name eq "help")or($name eq "h")){ $help=1; } } close CONFIG; } } ######################################################### GetOptions("help|h" => \$help, "catfile|c=s" => \$catfile, "dicfile|d=s" => \$dicfile, "gramfile|g=s" => \$gramfile, "outputfile|o=s" => \$outputfile); if($help) { print <<'END' Usage: dgc [OPTIONS] Options: --catfile -c filename List of syntactic categories. --dicfile -d filename Dictionary. --gramfile -g filename List of grammar rules. --outputfile -o filename Output filename. --help -h Help. END ; exit 0; } die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile; my $ncat=0; my $nrole=0; my $nsgl=0; my $nleft=0; my $nright=0; my $nreq=0; my $nlink=0; my %cats; my %roles; my %agr; my %gov; if(!$outputfile) { *OUTPUT = *STDOUT; } elsif($outputfile eq "-") { *OUTPUT = *STDOUT; } else { open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!"); } loadcats($catfile) if $catfile; extractcats($dicfile) if $dicfile; my $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/; # class parse_class: # /$attr::cat_re/g; if(!$gramfile) { *INPUT = *STDIN; } elsif($gramfile eq "-"){ *INPUT = *STDIN; } else { open(INPUT, $gramfile) or die("Unable to open: $gramfile!"); } while() { s/#.*//; s/^\s+//; s/\s+$//; if(/^AGR\s+(\S+)\s+(\S+)$/) { push @{$agr{$1}}, $2; } elsif(/^GOV\s+(\S+)\s+(\S+)$/) { push @{$gov{$1}}, attr::parse($2); } elsif(/^ROLE\s+\S+$/) { $roles{$_}=1; print OUTPUT "$_\n"; } elsif(/^SGL\s+\S+$/) { ++$nsgl; print OUTPUT "$_\n"; } elsif(/^REQ\s+(\S+)\s+(\S+)$/) { print OUTPUT "#$_\n"; my $cat = attr::parse $1; for my $atomcat (keys %cats) { if(attr::match @$cat, @{$cats{$atomcat}}) { print OUTPUT "REQ ".$atomcat." $2\n"; ++$nreq; } } } elsif(/^LEFT\s+\S+$/) { ++$nleft; print OUTPUT "$_\n"; } elsif(/^RIGHT\s+\S+$/) { ++$nright; print OUTPUT "$_\n"; } elsif(my ($hs,$ds,$r) = /^LINK\s+($cats_re)\s+($cats_re)\s+(\S+)$/) { print OUTPUT "#$_\n"; for my $h ($hs =~ /$attr::cat_re/g) { for my $d ($ds =~ /$attr::cat_re/g) { addlinks($h,$d,$r); } } } elsif(/^$/) { # pomijamy puste linie oraz komentarze } else { print STDERR "Illegal format: $_\n"; } } sub addlinks { my ($h,$d,$r) = @_; for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; } for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; } my $head = attr::parse $h; my $dep = attr::parse $d; for my $atomhead (keys %cats) { if(attr::match @$head, @{$cats{$atomhead}}) { DEP: for my $atomdep (keys %cats) { next DEP if ! attr::match @$dep, @{$cats{$atomdep}}; for my $a (@{$agr{$r}}) { next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a); } for my $c (@{$gov{$r}}) { next DEP if ! attr::match(@$c,@{$cats{$atomdep}}); } print OUTPUT "LINK "; print OUTPUT $atomhead." "; print OUTPUT $atomdep." $r\n"; ++$nlink; } } } } printf STDERR "%6d CAT statements\n", 0+keys(%cats); printf STDERR "%6d ROLE statements\n", 0+keys(%roles); printf STDERR "%6d SGL statements\n", $nsgl; printf STDERR "%6d REQ statements\n", $nreq; printf STDERR "%6d LEFT statements\n", $nleft; printf STDERR "%6d RIGHT statements\n", $nright; printf STDERR "%6d LINK statements\n", $nlink; sub extractcats { my $file = shift; open DICFILE, "canonize $file |"; while() { while(/,([^[:space:];]+)/g) { my $cat=$1; next if !$cat || exists $cats{$cat}; $ncat++; print OUTPUT "CAT $1\n"; $cats{$cat}=attr::parse($cat); } } close DICFILE; } sub loadcats { my $file = shift; open CATFILE, "canonize $file |"; while() { tr/ \t\n//d; next if !$_ || exists $cats{$_}; print OUTPUT "CAT $_\n"; ++$ncat; $cats{$_}=attr::parse($_); } close CATFILE; }