#!/usr/bin/ruby1.9

#package:	UAM Text Tools
#component:	dgc (dg compiler)
#version:	2.0
#author:	Tomasz Obrebski

use lib "/usr/local/lib/utt";
use lib "$ENV{'HOME'}/.local/lib/utt";

use strict;
use Getopt::Long;
use Data::Dumper;
use attr;
use File::HomeDir;

my $systemconfigfile='/usr/local/etc/utt/dgc.conf';
my $userconfigfile=home()."/.utt/dgc.conf";

Getopt::Long::Configure('no_ignore_case_always');

my $help=0;
my $catfile=0;
my $dicfile=0;
my $gramfile=0;
my $outputfile=0;

#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
  if(open(CONFIG, $file)){
        while (<CONFIG>) {
                chomp;
                s/#.*//;
                s/^\s+//;
                s/\s+$//;
                next unless length;
                my ($name, $value) = split(/\s*=\s*/, $_, 2);
                if(($name eq "categories")or($name eq "c")){
                        $catfile=$value;
                }
                elsif(($name eq "dictionary")or($name eq "d")){
                        $dicfile=$value;
                }
                elsif(($name eq "grammar")or($name eq "g")){
                        $gramfile=$value;
                }
                elsif(($name eq "outputfile")or($name eq "o")){
                        $outputfile=$value;
                }
                elsif(($name eq "help")or($name eq "h")){
                        $help=1;
                }

        }
        close CONFIG;
  }
}
#########################################################

GetOptions("help|h" => \$help,
	   "categories|c=s" => \$catfile,
	   "dictionary|d=s" => \$dicfile,
	   "grammar|g=s" => \$gramfile,
	   "outputfile|o=s" => \$outputfile);

my $homedir = $ENV{'HOME'};
$catfile =~ s/~/$homedir/;
$dicfile =~ s/~/$homedir/;
$gramfile =~ s/~/$homedir/;
$outputfile =~ s/~/$homedir/;


if($help)
{
    print <<'END'
Usage: dgc [OPTIONS]

Options:
   --categories -c filename	List of syntactic categories.
   --dictionary -d filename     Dictionary.
   --grammar -g filename	List of grammar rules.
   --outputfile -o filename	Output file name.
   --help -h			Help.
END
;
    exit 0;
}

die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;

my $ncat=0;
my $nrole=0;
my $nsgl=0;
my $nleft=0;
my $nright=0;
my $nreq=0;
my $nlink=0;
my $nflag=0;
my $nlong=0;
my $nclass=0;

my %cats;
my %classes; 
my %roles;
my %agr;
my %gov;

if(!$outputfile) {
	*OUTPUT = *STDOUT;
}
elsif($outputfile eq "-") {
    *OUTPUT = *STDOUT;
}
else {
	open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!");
}


loadcats($catfile) if $catfile;
extractcats($dicfile) if $dicfile;


my $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;

my $class_re = qr/(?:\@\w+)/;

# class parse_class:
# /$attr::cat_re/g;


if(!$gramfile) { 
	*INPUT = *STDIN;
}
elsif($gramfile eq "-"){
    *INPUT = *STDIN;
}
else {
	open(INPUT, $gramfile) or die("Unable to open: $gramfile!");
}

while(<INPUT>)
{
    s/#.*//;
    s/^\s+//;
    s/\s+$//;
    if(/^AGR\s+(\S+)\s+(\S+)$/)
    {
	push @{$agr{$1}}, $2;
    }
    elsif(/^GOV\s+(\S+)\s+(\S+)$/)
    {
	push @{$gov{$1}}, attr::parse($2);
    }
    elsif(/^ROLE\s+\S+$/)
    {
	$roles{$_}=1;
	print OUTPUT "$_\n";
    }
    elsif(/^SGL\s+\S+$/)
    {
	++$nsgl;
	print OUTPUT "$_\n";
    }
    elsif(/^REQ\s+(\S+)\s+(\S+)$/)
    {
	print OUTPUT "#$_\n";
	my $cat = attr::parse $1;
	for my $atomcat (keys %cats)
	{
	    if(attr::match @$cat, @{$cats{$atomcat}})
	    {
		print OUTPUT "REQ ".$atomcat." $2\n";
		++$nreq;
	    }
	}
    }
    elsif(/^LEFT\s+\S+$/)
    {
	++$nleft;
	print OUTPUT "$_\n";
    }
    elsif(/^RIGHT\s+\S+$/)
    {
	++$nright;
	print OUTPUT "$_\n";
    }
    elsif(my ($hs,$ds,$r) = /^LINK\s+($cats_re)\s+($cats_re)\s+(\S+)$/)
    {
	print OUTPUT "#$_\n";
	for my $h ($hs =~ /$attr::cat_re/g)
	{
	    for my $d ($ds =~ /$attr::cat_re/g)
	    {
		addlinks($h,$d,$r);
	    }
	}
    }
    elsif(/^FLAG\s+\S+$/)
    {
	++$nflag;
	print OUTPUT "$_\n"
    }
    elsif(/^LONG\s+\S+(\s+<\S+)*(\s+\S+)*$/)
    {
	++$nlong;
	print OUTPUT "$_\n"
    }
    elsif(my ($cl,$cs) = /^CLASS\s+(\S+)\s*\(.*)/)
    {
	print OUTPUT "#$_\n";
	for my $c ($cs =~ /\S+/g)
	{
	    my $cat = attr::parse $c;
	
	    for my $atomcat (sort(keys %cats))
	    {
		if(attr::match @$cat, @{$cats{$atomcat}})
		{
		    print OUTPUT "CLASS $cl $atomcat\n";
		    ++$nclass;
		}
	    }
	}
    }
    elsif(/^$/) {
	# pomijamy puste linie oraz komentarze
	}
	else
    {
	print STDERR "Illegal format: $_\n";
    }
}


sub addlinks
{
    my ($h,$d,$r) = @_;

    for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; }
    for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; }
    my $head = attr::parse $h;
    my $dep = attr::parse $d;
    
    for my $atomhead (keys %cats)
    {
	if(attr::match @$head, @{$cats{$atomhead}})
	{
	  DEP:
	    for my $atomdep (keys %cats)
	    {
		next DEP if ! attr::match @$dep, @{$cats{$atomdep}};
		
		for my $a (@{$agr{$r}})
		{
		    next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a);
		}
		
		for my $c (@{$gov{$r}})
		{
		    next DEP if ! attr::match(@$c,@{$cats{$atomdep}});
		}
		
		print OUTPUT "LINK ";
		print OUTPUT $atomhead." ";
		print OUTPUT $atomdep." $r\n";
		++$nlink;
		
	    }
	}
    }
}


printf STDERR "%6d CAT   statements\n", 0+keys(%cats);
printf STDERR "%6d ROLE  statements\n", 0+keys(%roles);
printf STDERR "%6d SGL   statements\n", $nsgl;
printf STDERR "%6d REQ   statements\n", $nreq;
printf STDERR "%6d LEFT  statements\n", $nleft;
printf STDERR "%6d RIGHT statements\n", $nright;
printf STDERR "%6d LINK  statements\n", $nlink;
printf STDERR "%6d CLASS statements\n", $nclass;
printf STDERR "%6d FLAG  statements\n", $nflag;


sub extractcats
{
    my $file = shift;
    open DICFILE, "$file";
    while(<DICFILE>)
    {
	while(/,([^[:space:];]+)/g)
	{
	    my $cat=$1;
	    next if !$cat || exists $cats{$cat};
	    $ncat++;
	    print OUTPUT "CAT $1\n";
	    $cats{$cat}=attr::parse($cat);
	}
    }
    close DICFILE;
}


sub loadcats
{
    my $file = shift;
    open CATFILE, "$file";
    while(<CATFILE>)
    {
	tr/ \t\n//d;
	next if !$_ || exists $cats{$_};
	print OUTPUT "CAT $_\n";
	++$ncat;
	$cats{$_}=attr::parse($_);
    }
    close CATFILE;
}

