#!/usr/bin/perl #package: UAM Text Tools #component: dgc (dg compiler) #version: 1.0 #author: Tomasz Obrebski #use lib "ENV{HOME}/.utt/lib/perl"; #use strict; use Getopt::Long; use Data::Dumper; use attr; #use File::HomeDir; my $help=0; my $catfile=0; my $dicfile=0; my $gramfile=0; my $ncat=0; my $nrole=0; my $nsgl=0; my $nleft=0; my $nright=0; my $nreq=0; my $nlink=0; GetOptions("help|h" => \$help, "catfile|c=s" => \$catfile, "dicfile|d=s" => \$dicfile, "gramfile|g=s" => \$gramfile); if($help) { print <<'END' Usage: dgpcompile [OPTIONS] Options: --cats -c filename List of syntactic categories. --dic -d filename Dictionary. --help -h Help. END ; exit 0; } die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile; my %cats; my %roles; my %agr; my %gov; loadcats($catfile) if $catfile; extractcats($dicfile) if $dicfile; $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/; # class parse_class: # /$attr::cat_re/g; while(<>) { if(/^\s*AGR\s+(\S+)\s+(\S+)\s*$/) { push @{$agr{$1}}, $2; } elsif(/^\s*GOV\s+(\S+)\s+(\S+)\s*$/) { push @{$gov{$1}}, attr::parse($2); } elsif(/^\s*ROLE\s+\S+\s*$/) { $roles{$_}=1; print; } elsif(/^\s*SGL\s+\S+\s*$/) { ++$nsgl; print; } elsif(/^\s*REQ\s+(\S+)\s+(\S+)\s*$/) { print "#$_"; my $cat = attr::parse $1; for my $atomcat (keys %cats) { if(attr::match @$cat, @{$cats{$atomcat}}) { print "REQ ".$atomcat." $2\n"; ++$nreq; } } } elsif(/^\s*LEFT\s+\S+\s*$/) { ++$nleft; print; } elsif(/^\s*RIGHT\s+\S+\s*$/) { ++$nright; print; } elsif(($hs,$ds,$r) = /^\s*LINK\s+($cats_re)\s+($cats_re)\s+(\S+)\s*$/) { print "#$_"; for $h ($hs =~ /$attr::cat_re/g) { for $d ($ds =~ /$attr::cat_re/g) { addlinks($h,$d,$r); } } } else { print; } } sub addlinks { ($h,$d,$r) = @_; for my $a (@{$agr{$r}}) { print "#AGR $r $a\n"; } for my $c (@{$gov{$r}}) { print "#GOV $r ".attr::unparse(@$c)."\n"; } my $head = attr::parse $h; my $dep = attr::parse $d; for my $atomhead (keys %cats) { if(attr::match @$head, @{$cats{$atomhead}}) { DEP: for my $atomdep (keys %cats) { next DEP if ! attr::match @$dep, @{$cats{$atomdep}}; for my $a (@{$agr{$r}}) { next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a); } for my $c (@{$gov{$r}}) { next DEP if ! attr::match(@$c,@{$cats{$atomdep}}); } print "LINK "; print $atomhead." "; print $atomdep." $r\n"; ++$nlink; } } } } printf STDERR "%6d CAT statements\n", 0+keys(%cats); printf STDERR "%6d ROLE statements\n", 0+keys(%roles); printf STDERR "%6d SGL statements\n", $nsgl; printf STDERR "%6d REQ statements\n", $nreq; printf STDERR "%6d LEFT statements\n", $nleft; printf STDERR "%6d RIGHT statements\n", $nright; printf STDERR "%6d LINK statements\n", $nlink; sub extractcats { my $file = shift; open DICFILE, "canonize $file |"; while() { while(/,([^[:space:];]+)/g) { $cat=$1; next if !$cat || exists $cats{$cat}; $ncat++; print "CAT $1\n"; $cats{$cat}=attr::parse($cat); } } close DICFILE; } sub loadcats { my $file = shift; open CATFILE, "canonize $file |"; while() { tr/ \t\n//d; next if !$_ || exists $cats{$_}; print "CAT $_\n"; ++$ncat; $cats{$_}=attr::parse($_); } close CATFILE; }