#!/usr/bin/perl

#package:	UAM Text Tools
#component:	dgc (dg compiler)
#version:	1.0
#author:	Tomasz Obrebski

use lib "/usr/local/lib/utt";
use lib "$ENV{'HOME'}/.local/lib/utt";

use strict;
use Getopt::Long;
use Data::Dumper;
use attr;
use File::HomeDir;

my $systemconfigfile='/etc/utt/dgc.conf';
my $userconfigfile=home()."/.utt/dgc.conf";

Getopt::Long::Configure('no_ignore_case_always');

my $help=0;
my $catfile=0;
my $dicfile=0;
my $gramfile=0;
my $outputfile=0;

#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
  if(open(CONFIG, $file)){
        while (<CONFIG>) {
                chomp;
                s/#.*//;
                s/^\s+//;
                s/\s+$//;
                next unless length;
                my ($name, $value) = split(/\s*=\s*/, $_, 2);
                if(($name eq "categories")or($name eq "c")){
                        $catfile=$value;
                }
                elsif(($name eq "dictionary")or($name eq "d")){
                        $dicfile=$value;
                }
                elsif(($name eq "grammar")or($name eq "g")){
                        $gramfile=$value;
                }
                elsif(($name eq "outputfile")or($name eq "o")){
                        $outputfile=$value;
                }
                elsif(($name eq "help")or($name eq "h")){
                        $help=1;
                }

        }
        close CONFIG;
  }
}
#########################################################

GetOptions("help|h" => \$help,
	   "categories|c=s" => \$catfile,
	   "dictionary|d=s" => \$dicfile,
	   "grammar|g=s" => \$gramfile,
	   "outputfile|o=s" => \$outputfile);

my $homedir = $ENV{'HOME'};
$catfile =~ s/~/$homedir/;
$dicfile =~ s/~/$homedir/;
$gramfile =~ s/~/$homedir/;
$outputfile =~ s/~/$homedir/;


if($help)
{
    print <<'END'
Usage: dgc [OPTIONS]

Options:
   --categories -c filename	List of syntactic categories.
   --dictionary -d filename     Dictionary.
   --grammar -g filename	List of grammar rules.
   --outputfile -o filename	Output file name.
   --help -h			Help.
END
;
    exit 0;
}

die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;

my $ncat=0;
my $nrole=0;
my $nsgl=0;
my $nleft=0;
my $nright=0;
my $ninitr=0;
my $nfinr=0;
my $ninitf=0;
my $nfinf=0;
my $ninitc=0;
my $nfinc=0;
my $nreq=0;
my $nlink=0;
my $nflag=0;
my $nset=0;
my $npass=0;
my $nlong=0;
my $nconstr=0;
my $nclass=0;

my %cats;
my %roles;
my %agr;
my %gov;

if(!$outputfile) {
	*OUTPUT = *STDOUT;
}
elsif($outputfile eq "-") {
    *OUTPUT = *STDOUT;
}
else {
	open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!");
}


loadcats($catfile) if $catfile;
extractcats($dicfile) if $dicfile;


my $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;
my $class_re = qr/(?:\@\w+)/;

my $avlist_re = $attr::avlist_re;

my $role_re     = qr/(?:[[:lower:][:digit:]_]+)/;
my $prop_re     = qr/(?:\&[[:upper:]]+)/;
my $proplist_re = qr/(?:$prop_re+)/;

# class parse_class:
# /$attr::cat_re/g;


if(!$gramfile) { 
	*INPUT = *STDIN;
}
elsif($gramfile eq "-"){
    *INPUT = *STDIN;
}
else {
	open(INPUT, $gramfile) or die("Unable to open: $gramfile!");
}

while(<INPUT>)
{
    s/#.*//;
    s/^\s+//;
    s/\s+$//;
    if(/^AGR\s+(\S+)\s+(\S+)$/)
    {
	push @{$agr{$1}}, $2;
    }
    elsif(/^GOV\s+(\S+)\s+(\S+)$/)
    {
	push @{$gov{$1}}, attr::parse($2);
    }
    elsif(/^ROLE\s+\S+$/)
    {
	$roles{$_}=1;
	print OUTPUT "$_\n";
    }
    elsif(/^SGL\s+\S+$/)
    {
	++$nsgl;
	print OUTPUT "$_\n";
    }
    elsif(/^REQ\s+(\S+)\s+(\S+)$/)
    {
	print OUTPUT "#$_\n";
	my $cat = attr::parse $1;
	for my $atomcat (keys %cats)
	{
	    if(attr::match @$cat, @{$cats{$atomcat}})
	    {
		print OUTPUT "REQ ".$atomcat." $2\n";
		++$nreq;
	    }
	}
    }
    elsif(/^LEFT\s+\S+$/)
    {
	++$nleft;
	print OUTPUT "$_\n";
    }
    elsif(/^RIGHT\s+\S+$/)
    {
	++$nright;
	print OUTPUT "$_\n";
    }
    elsif(/^INIT\s+[[:lower:]]\S*$/)
    {
	++$ninitr;
	s/INIT/INITR/;
	print OUTPUT "$_\n";
    }
    elsif(/^FIN\s+[[:lower:]]\S*$/)
    {
	++$nfinr;
	s/FIN/FINR/;
	print OUTPUT "$_\n";
    }
    elsif(/^INIT\s+[[:upper:]]+[+-]$/)
    {
	++$ninitf;
	s/INIT/INITF/;
	s/[+-]//g;
	print OUTPUT "$_\n";
    }
    elsif(/^FIN\s+[[:upper:]]+$/)
    {
	++$nfinf;
	s/FIN/FINF/;
	s/[+-]//g;
	print OUTPUT "$_\n";
    }
    # elsif(/^INIT\s+([[:upper:]]\S*)$/)
    # {
    # 	print OUTPUT "#$_\n";
    # 	my $cat = attr::parse $1;
    # 	for my $atomcat (keys %cats)
    # 	{
    # 	    if(attr::match @$cat, @{$cats{$atomcat}})
    # 	    {
    # 		print OUTPUT "INITC ".$atomcat."\n";
    # 		++$ninitc;
    # 	    }
    # 	}
    # }
    # elsif(/^FIN\s+([[:upper:]]\S*)$/)
    # {
    # 	print OUTPUT "#$_\n";
    # 	my $cat = attr::parse $1;
    # 	for my $atomcat (keys %cats)
    # 	{
    # 	    if(attr::match @$cat, @{$cats{$atomcat}})
    # 	    {
    # 		print OUTPUT "FINC ".$atomcat."\n";
    # 		++$nfinc;
    # 	    }
    # 	}
    # }
    elsif(my ($hs,$hfs,$ds,$dfs,$r,$rprops) = /^LINK\s+($cats_re)((?:;$avlist_re)?)\s+($cats_re)((?:;$avlist_re)?)\s+($role_re)((?:$proplist_re)?)$/)
    {
    	print OUTPUT "#$_\n";
    	for my $h ($hs =~ /$attr::cat_re/g)
    	{
    	    for my $d ($ds =~ /$attr::cat_re/g)
    	    {
    		addlinks($h,$hfs,$d,$dfs,$r,$rprops);
    	    }
    	}
    }
    # elsif(my ($hs,$ds,$fs,$r) = /^LINK\s+($cats_re)\s+($cats_re)\s+(\S+)\s+(\S+)$/)
    # {
    # 	print OUTPUT "#$_\n";
    # 	for my $h ($hs =~ /$attr::cat_re/g)
    # 	{
    # 	    for my $d ($ds =~ /$attr::cat_re/g)
    # 	    {
    # 		addlinks1($h,$d,$fs,$r);
    # 	    }
    # 	}
    # }
    elsif(/^FLAG\s+\S+$/)
    {
	++$nflag;
	print OUTPUT "$_\n"
    }
    elsif(/^SET\s+(\S+)\s+(\S+)$/)
    {
	print OUTPUT "#$_\n";
	my $cat = attr::parse $1;
	my $flag = $2;
	for my $atomcat (keys %cats)
	{
	    if(attr::match @$cat, @{$cats{$atomcat}})
	    {
		print OUTPUT "SET ".$atomcat." $flag\n";
		++$nset;
	    }
	}
    }
    elsif(/^PASS\s+\S+\s+\S+$/)
    {
	++$npass;
	print OUTPUT "$_\n"
    }
    elsif(/^CONSTR[IE]\s+\S+\s+\S+$/)
    {
	++$nconstr;
	print OUTPUT "$_\n"
    }
    elsif(/^LONG\s+(\S+)((\s+<\S+)*)((\s+\S+>)*)$/)
    {
	++$nlong;
	my $rel = $1;
	my $ups = $2;
	my $downs = $4;

	$ups =~ s/<//g;
	$ups =~ s/^\s+//;
	my @up = split(/\s+/,$ups);

	$downs =~ s/>//g;
	$downs =~ s/^\s+//;
	my @down = split(/\s+/,$downs);
	print OUTPUT "LONG $rel " . join(",",@up) . "^" . join(",",@down) . "\n";
    }
    elsif(my ($cl,$cs) = /^CLASS\s+(\S+)\s*\:\s*(.*)/)
    {
	print OUTPUT "#$_\n";
	for my $c ($cs =~ /\S+/g)
	{
	    my $cat = attr::parse $c;
	
	    for my $atomcat (sort(keys %cats))
	    {
		if(attr::match @$cat, @{$cats{$atomcat}})
		{
		    print OUTPUT "CLASS $cl $atomcat\n";
		    ++$nclass;
		}
	    }
	}
    }
    elsif(/^$/) {
	# pomijamy puste linie oraz komentarze
	}
	else
    {
	print STDERR "Illegal format: $_\n";
    }
}


sub addlinks
{
    my ($h,$hfs,$d,$dfs,$r,$rprops) = @_;

    for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; }
    for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; }
    my $head = attr::parse $h;
    my $dep = attr::parse $d;
    
    for my $atomhead (keys %cats)
    {
	if(attr::match @$head, @{$cats{$atomhead}})
	{
	  DEP:
	    for my $atomdep (keys %cats)
	    {
		next DEP if ! attr::match @$dep, @{$cats{$atomdep}};
		
		for my $a (@{$agr{$r}})
		{
		    next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a);
		}
		
		for my $c (@{$gov{$r}})
		{
		    next DEP if ! attr::match(@$c,@{$cats{$atomdep}});
		}
		
		print OUTPUT "LINK $atomhead$hfs $atomdep$dfs $r$rprops\n";
		++$nlink;
		
	    }
	}
    }
}


sub addlinks1
{
    my ($h,$d,$fs,$r) = @_;

    for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; }
    for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; }
    my $head = attr::parse $h;
    my $dep = attr::parse $d;
    
    for my $atomhead (keys %cats)
    {
	if(attr::match @$head, @{$cats{$atomhead}})
	{
	  DEP:
	    for my $atomdep (keys %cats)
	    {
		next DEP if ! attr::match @$dep, @{$cats{$atomdep}};
		
		for my $a (@{$agr{$r}})
		{
		    next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a);
		}
		
		for my $c (@{$gov{$r}})
		{
		    next DEP if ! attr::match(@$c,@{$cats{$atomdep}});
		}
		
		print OUTPUT "LINK $atomhead $atomdep $fs $r\n";
		++$nlink;
		
	    }
	}
    }
}


printf STDERR "%6d CAT   statements\n", 0+keys(%cats);
printf STDERR "%6d ROLE  statements\n", 0+keys(%roles);
printf STDERR "%6d SGL   statements\n", $nsgl;
printf STDERR "%6d REQ   statements\n", $nreq;
printf STDERR "%6d LEFT  statements\n", $nleft;
printf STDERR "%6d RIGHT statements\n", $nright;
printf STDERR "%6d INITR statements\n", $ninitr;
printf STDERR "%6d FINR  statements\n", $nfinr;
printf STDERR "%6d INITF statements\n", $ninitf;
printf STDERR "%6d FINF  statements\n", $nfinf;
printf STDERR "%6d INITC statements\n", $ninitc;
printf STDERR "%6d FINC  statements\n", $nfinc;
printf STDERR "%6d LINK  statements\n", $nlink;
printf STDERR "%6d CLASS statements\n", $nclass;
printf STDERR "%6d FLAG  statements\n", $nflag;
printf STDERR "%6d SET   statements\n", $nset;
printf STDERR "%6d PASS  statements\n", $npass;


sub extractcats
{
    my $file = shift;
    open DICFILE, "$file";
    while(<DICFILE>)
    {
	while(/,([^[:space:];]+)/g)
	{
	    my $cat=$1;
	    next if !$cat || exists $cats{$cat};
	    $ncat++;
	    print OUTPUT "CAT $1\n";
	    $cats{$cat}=attr::parse($cat);
	}
    }
    close DICFILE;
}


sub loadcats
{
    my $file = shift;
    open CATFILE, "$file";
    while(<CATFILE>)
    {
	tr/ \t\n//d;
	next if !$_ || exists $cats{$_};
	print OUTPUT "CAT $_\n";
	++$ncat;
	$cats{$_}=attr::parse($_);
    }
    close CATFILE;
}

