#!/usr/bin/perl

#package:	UAM Text Tools
#component:	dgc (dg compiler)
#version:	1.0
#author:	Tomasz Obrebski

#use lib "ENV{HOME}/.utt/lib/perl";
#use strict;
use Getopt::Long;
use Data::Dumper;

use attr;
#use File::HomeDir;

my $help=0;
my $catfile=0;
my $dicfile=0;
my $gramfile=0;

my $ncat=0;
my $nrole=0;
my $nsgl=0;
my $nleft=0;
my $nright=0;
my $nreq=0;
my $nlink=0;

GetOptions("help|h" => \$help,
	   "catfile|c=s" => \$catfile,
	   "dicfile|d=s" => \$dicfile,
	   "gramfile|g=s" => \$gramfile);

if($help)
{
    print <<'END'
Usage: dgpcompile [OPTIONS]

Options:
   --cats -c filename		List of syntactic categories.
   --dic  -d filename           Dictionary.
   --help -h			Help.
END
;
    exit 0;
}

die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;

my %cats;
my %roles;
my %agr;
my %gov;

loadcats($catfile) if $catfile;
extractcats($dicfile) if $dicfile;


$cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;

# class parse_class:
# /$attr::cat_re/g;

while(<>)
{
    if(/^\s*AGR\s+(\S+)\s+(\S+)\s*$/)
    {
	push @{$agr{$1}}, $2;
    }
    elsif(/^\s*GOV\s+(\S+)\s+(\S+)\s*$/)
    {
	push @{$gov{$1}}, attr::parse($2);
    }
    elsif(/^\s*ROLE\s+\S+\s*$/)
    {
	$roles{$_}=1;
	print;
    }
    elsif(/^\s*SGL\s+\S+\s*$/)
    {
	++$nsgl;
	print;
    }
    elsif(/^\s*REQ\s+(\S+)\s+(\S+)\s*$/)
    {
	print "#$_";
	my $cat = attr::parse $1;
	for my $atomcat (keys %cats)
	{
	    if(attr::match @$cat, @{$cats{$atomcat}})
	    {
		print "REQ ".$atomcat." $2\n";
		++$nreq;
	    }
	}
    }
    elsif(/^\s*LEFT\s+\S+\s*$/)
    {
	++$nleft;
	print;
    }
    elsif(/^\s*RIGHT\s+\S+\s*$/)
    {
	++$nright;
	print;
    }
    elsif(($hs,$ds,$r) = /^\s*LINK\s+($cats_re)\s+($cats_re)\s+(\S+)\s*$/)
    {
	print "#$_";
	for $h ($hs =~ /$attr::cat_re/g)
	{
	    for $d ($ds =~ /$attr::cat_re/g)
	    {
		addlinks($h,$d,$r);
	    }
	}
    }
    
    else
    {
	print;
    }
}


sub addlinks
{
    ($h,$d,$r) = @_;

    for my $a (@{$agr{$r}}) { print "#AGR $r $a\n"; }
    for my $c (@{$gov{$r}}) { print "#GOV $r ".attr::unparse(@$c)."\n"; }
    my $head = attr::parse $h;
    my $dep = attr::parse $d;
    
    for my $atomhead (keys %cats)
    {
	if(attr::match @$head, @{$cats{$atomhead}})
	{
	  DEP:
	    for my $atomdep (keys %cats)
	    {
		next DEP if ! attr::match @$dep, @{$cats{$atomdep}};
		
		for my $a (@{$agr{$r}})
		{
		    next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a);
		}
		
		for my $c (@{$gov{$r}})
		{
		    next DEP if ! attr::match(@$c,@{$cats{$atomdep}});
		}
		
		print "LINK ";
		print $atomhead." ";
		print $atomdep." $r\n";
		++$nlink;
		
	    }
	}
    }
}


printf STDERR "%6d CAT   statements\n", 0+keys(%cats);
printf STDERR "%6d ROLE  statements\n", 0+keys(%roles);
printf STDERR "%6d SGL   statements\n", $nsgl;
printf STDERR "%6d REQ   statements\n", $nreq;
printf STDERR "%6d LEFT  statements\n", $nleft;
printf STDERR "%6d RIGHT statements\n", $nright;
printf STDERR "%6d LINK  statements\n", $nlink;


sub extractcats
{
    my $file = shift;
    open DICFILE, "canonize $file |";
    while(<DICFILE>)
    {
	while(/,([^[:space:];]+)/g)
	{
	    $cat=$1;
	    next if !$cat || exists $cats{$cat};
	    $ncat++;
	    print "CAT $1\n";
	    $cats{$cat}=attr::parse($cat);
	}
    }
    close DICFILE;
}


sub loadcats
{
    my $file = shift;
    open CATFILE, "canonize $file |";
    while(<CATFILE>)
    {
	tr/ \t\n//d;
	next if !$_ || exists $cats{$_};
	print "CAT $_\n";
	++$ncat;
	$cats{$_}=attr::parse($_);
    }
    close CATFILE;
}
