#!/usr/bin/perl

#package:	UAM Text Tools
#component:	dgc (dg compiler)
#version:	1.0
#author:	Tomasz Obrebski

use lib "/usr/local/lib/utt";
use lib "$ENV{'HOME'}/.local/lib/utt";

#use strict;
use Getopt::Long;
use Data::Dumper;
use attr;
use File::HomeDir;
use Parse::RecDescent;

$::RD_HINT=1;
# use List::MoreUtils;

my $systemconfigfile='/etc/utt/dgc.conf';
my $userconfigfile=0; #home()."/.utt/dgc.conf";

Getopt::Long::Configure('no_ignore_case_always');

my $help=0;
my $catfile=0;
my $dicfile=0;
my $gramfile=0;
my $outputfile=0;

#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
  if(open(CONFIG, $file)){
        while (<CONFIG>) {
                chomp; s/#.*//; s/^\s+//; s/\s+$//;
                next unless length;
                my ($name, $value) = split(/\s*=\s*/, $_, 2);
                if(($name eq "categories")or($name eq "c"))      { $catfile=$value; }
                elsif(($name eq "dictionary")or($name eq "d"))   { $dicfile=$value; }
                elsif(($name eq "grammar")or($name eq "g"))      { $gramfile=$value; }
                elsif(($name eq "outputfile")or($name eq "o"))   { $outputfile=$value; }
                elsif(($name eq "help")or($name eq "h"))         { $help=1; }

        }
        close CONFIG;
  }
}
#########################################################

GetOptions("help|h" => \$help,
	   "categories|c=s" => \$catfile,
	   "dictionary|d=s" => \$dicfile,
	   "grammar|g=s" => \$gramfile,
	   "outputfile|o=s" => \$outputfile);

my $homedir = $ENV{'HOME'};
$catfile =~ s/~/$homedir/;
$dicfile =~ s/~/$homedir/;
$gramfile =~ s/~/$homedir/;
$outputfile =~ s/~/$homedir/;


if($help)
{
    print <<'END'
Usage: dgc [OPTIONS]

Options:
   --categories -c filename	List of syntactic categories.
   --dictionary -d filename     Dictionary.
   --grammar -g filename	List of grammar rules.
   --outputfile -o filename	Output file name.
   --help -h			Help.
END
;
    exit 0;
}

die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;


our %in;      #gramatyka wejciowa
our %idx;     #indeks gramatyki wejciowej (niektre stwierdzenia)
our %out;     #gramatyka wyjciowa
our %class;   #tablica klas

our $attr_re       = $attr::attr_re;
our $cat_re        = $attr::cat_re;
our $cats_re       = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;
our $class_re      = qr/(?:\@\w+)/;
our $av_re         = $attr::av_re;
our $avlist_re     = $attr::avlist_re;
our $role_re       = qr/(?:[[:lower:][:digit:]_]+)/;
our $prop_re       = qr/(?:[[:upper:]]+)/;
our $proplist_re   = qr/(?:(?:\&$prop_re)+)/;

my $inputlineno=0;

our $statementgrammar = q(

statement : statement1 ";" { $item[1] }

statement1: /cat/i     acat              { ['cat',     { cat=>$item{acat}, catexp=>attr::parse($item{acat}) },       $item{acat}] }
          | /flag/i    flag              { ['flag',    { flag=>$item{flag} },                                        $item{flag}] }
          | /role/i    role              { ['role',    { role=>$item{role} },                                        $item{role}] }
          | /left/i    role              { ['left',    { role=>$item{role} },                                                  0] }
          | /right/i   role              { ['right',   { role=>$item{role} },                                                  0] }
          | /sgl/i     role              { ['sgl',     { role=>$item{role} },                                                  0] }
          | /req/i     xcat role         { ['req',     { cats=>$item{xcat}, role=>$item{role} },                               0] }
          | /agr/i     role attr         { ['agr',     { role=>$item{role}, attr=>$item{attr} },                     $item{role}] }
          | /gov/i     role xcat         { ['gov',     { role=>$item{role}, cats=>$item{xcat} },                     $item{role}] }
          | /init/i    flagconstr        { ['initf',   { flag=>$item{flagconstr} },                                            0] }
          | /fin/i     flagconstr        { ['finf',    { flag=>$item{flagconstr} },                                            0] }
          | /init/i    role              { ['initr',   { role=>$item{role} },                                                  0] }
          | /fin/i     role              { ['finr',    { role=>$item{role} },                                                  0] }
          | /set/i     xcat flag         { ['set',     { cats=>$item{xcat}, flag=>$item{flag} },                               0] }
          | /pass/i    role flag         { ['pass',    { role=>$item{role}, flag=>$item{flag} },                               0] }
          | /constre/i role role         { ['constre', { role1=>$item[2], role2=>$item[3] },                                   0] }
          | /constri/i role role         { ['constri', { role1=>$item[2], role2=>$item[3] },                                   0] }

          | /link/i    xcat optflags(?) xcat optflags(?) role prop(s?)
                                         { ['link', { hcats=>$item[2], hflagconstr=>$item[3], 
                                                      dcats=>$item[4], dflagconstr=>$item[5],
                                                      role=>$item[6], props=>$item[7] },                                       0] }

          | /long/i role role(s? /,/) '^' role(s? /,/)
                                         { ['long', { rel=>$item[2], up=>$item[3], down=>$item[5] },                           0] }

          | /class/i classname '=' xcat  { ['class', { name=>$item{classname}, cats=>$item{xcat} },             $item{classname}] }

acat:       /$attr::cat_re/

attr:       /$attr::attr_re/

xcat:       classexpr

role:       /\w+/

flag:       /\w+/

optflags:   "//" flagconstr { $item[2] }

flagconstr: /\w+[+-]/

prop:       '&' /\w+/ { $item[2] }

classname:  /\$\w+[+-]/

classexpr  : classexpr1 '|' classexpr   { main::union($item[1],$item[3]) }
           | classexpr1 '~' classexpr   { main::intersection( $item[1], main::complement($item[3]) ) }
           | classexpr1

classexpr1 : classexpr2 '&' classexpr1  { main::intersection($item[1],$item[3]) }
           | classexpr2

classexpr2 : '~' classexpr2            { main::complement($item[2]) }
           | classexpr3

classexpr3 : classexpr4 '/' /$attr::avlist_re/  { main::intersection($item[1], [main::extension('*/' . $item[3])] ) }
           | classexpr4

classexpr4 : class
           | cat
           | '(' classexpr ')'          { $item[2] }

class :    classname                    { $main::class{$item[1]} or @{[]} }

cat :      /$main::cat_re/              { [main::extension($item[1])] }

);

our $statementparser = Parse::RecDescent->new($statementgrammar);

sub register
{
    my ($src, $statement, $data, $index) = @_ ;
    $data->{line} = $inputlineno;
    $data->{src} = $src;
    push @{$in{$statement}}, $data;
    push @{$idx{$statement}{$index}}, $data if($index);

    if ($statement eq 'class') { $class{ $data->{name} } = $data->{cats} }
}


if(!$outputfile)          { *OUTPUT = *STDOUT; }
elsif($outputfile eq "-") { *OUTPUT = *STDOUT; }
else                      { open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!"); }

if(!$gramfile)            { *INPUT = *STDIN; }
elsif($gramfile eq "-")   { *INPUT = *STDIN; }
else                      { open(INPUT, "cat $gramfile | m4 |") or die("Unable to open: $gramfile!"); }

# *INPUT = *STDIN; ############### TYMCZASOWO

loadcats($catfile) if $catfile;
extractcats($dicfile) if $dicfile;


# CZYTANIE GRAMATYKI DGC

while(<INPUT>)
{
    $inputlineno++;
    s/#.*//;
    s/^\s+//;
    s/\s+$//;
    s/\s+/ /g;
    next unless $_;
    my $result = $statementparser->statement("$_;");

    # print "#input line $inputlineno\n";
    # print Dumper($result);

    if($result) { register($_, @{$result}) } else { print STDERR "ERROR at line $inputlineno\n" }
}
    

# GENEROWANIE GRAMATYKI DGP

my $inline = 0;
my $outline = 0;


# print Dumper($idx{gov}->{subj});


for my $x (@{$in{cat}})    { print_outin("CAT $x->{cat}", $x); }

for my $x (@{$in{flag}})   { print_outin("FLAG $x->{flag}", $x); }

for my $x (@{$in{role}})   { print_outin("ROLE $x->{role}", $x); }

for my $x (@{$in{long}})    { print_outin("LONG $x->{rel} " . join(",",@{$x->{up}}) . "^" . join(",",@{$x->{down}}), $x) }

for my $x (@{$in{left}})   { print_outin("LEFT $x->{role}", $x)  if chk_role($x->{role}, $x) }

for my $x (@{$in{right}})  { print_outin("RIGHT $x->{role}", $x) if chk_role($x->{role}, $x) }

for my $x (@{$in{sgl}})    { print_outin("SGL $x->{role}", $x)   if chk_role($x->{role}, $x) }

for my $x (@{$in{req}})
{
    if( chk_role($x->{role}, $x) )
    {
	for my $atomcat (map{$_->{cat}} @{$x->{cats}})
	{
	    print_outin("REQ $atomcat $x->{role}", $x);
	}
    }
}

for my $x (@{$in{initr}}) { print_outin("INITR $x->{role}", $x)    if chk_role($x->{role}, $x) }

for my $x (@{$in{finr}}) { print_outin("FINR $x->{role}", $x)      if chk_role($x->{role}, $x) }

for my $x (@{$in{initf}}) { print_outin("INITF $x->{flag}", $x) } # SPRAWDZI CZY FLAGA JEST ZADEKLAROWANA

for my $x (@{$in{finf}}) { print_outin("FINF $x->{flag}", $x); } # SPRAWDZI CZY FLAGA JEST ZADEKLAROWANA

for my $x (@{$in{set}})
{
    for my $atomcat (map{$_->{cat}} @{$x->{cats}})
    {
	print_outin("SET $atomcat $x->{flag}", $x);
    }	
}

for my $x (@{$in{pass}})    { print_outin("PASS $x->{role} $x->{flag}", $x); }

for my $x (@{$in{constre}}) { print_outin("CONSTRE $x->{role1} $x->{role2}", $x) if chk_role($x->{role1}, $x) & chk_role($x->{role2}, $x) }

for my $x (@{$in{constri}}) { print_outin("CONSTRI $x->{role1} $x->{role2}", $x) if chk_role($x->{role1}, $x) & chk_role($x->{role2}, $x) }

for my $x (@{$in{link}})
{
    my @agrs = @{ $idx{agr}->{$x->{role} } or [] };
    my @govs = @{ $idx{gov}->{$x->{role} } or [] };

    my @deps = (@govs > 0) ? @{ intersection( $x->{dcats}, map { $_->{cats} } @govs ) } : @{ $x->{dcats} } ;

    for my $head ( @{ $x->{hcats} } )
    {
      DEP:
	for my $dep (@deps)
	{
	    for my $agr (@agrs)
	    {
		next DEP unless attr::agree(@{$head->{catexp}},@{$dep->{catexp}},$agr->{attr});
	    }
	    my $hflagconstr = @{$x->{hflagconstr}} ? "//@{$x->{hflagconstr}}" : "";
	    my $dflagconstr = @{$x->{dflagconstr}} ? "//@{$x->{dflagconstr}}" : "";
	    my $props = join(map { "\&$_" } $x->{props});
	    print_outin("LINK $head->{cat}$hflagconstr $dep->{cat}$dflagconstr $x->{role}$props",$x, @agrs, @govs);
	}
    }
}


sub chk_role
{
    ($role, $statement_details) = @_;
    if($idx{role}{$role}) { 1; } else { print_error("undefined role", $statement_details); 0; }
}

sub print_outin
{
    my ($out,@in) = (shift, @_);
    print OUTPUT "$out\t\t#";
    printf OUTPUT " %04d@\"%s\"", $_->{line}, $_->{src} foreach @in;
    print OUTPUT "\n";
}

sub print_error
{
    my ($message,@in) = (shift,@_);
    print STDERR "ERROR: $message in statement ";
    printf STDERR " %04d@\"%s\"", $_->{line}, $_->{src} foreach @in;
    print STDERR "\n";
}


sub extractcats
{
    my $file = shift;
    open DICFILE, "$file";
    while(<DICFILE>)
    {
	while(/,([^[:space:];]+)/g)
	{
	    my $cat=$1;
	    next if !$cat; # || exists $cats{$cat};
#	    print OUTPUT "CAT $1\n";
	    register('cat',     {src=>"CAT $cat", cat=>"$cat", catexp=>attr::parse($cat)},                 $cat);
	}
    }
    close DICFILE;
}


sub loadcats
{
    my $file = shift;
    open CATFILE, "$file";
    while(<CATFILE>)
    {
	tr/ \t\n//d;
	next if !$_; # || exists $cats{$_};
#	print OUTPUT "CAT $_\n";
	register("CAT $_", 'cat',     {cat=>"$_", catexp=>attr::parse($_)},                 $_);
    }
    close CATFILE;
}

sub extension
{
    my $cat = shift;
    my $catexp = attr::parse($cat);
    grep { attr::match(@{$_->{catexp}},@{$catexp}) } @{$in{cat}};
}

sub uniq  { my %seen; grep { ! $seen{$_}++ } @_ }
sub union { [ uniq( map { @{$_} } @_ ) ] }
sub intersection { my $n=@_; my %seen; [ grep { ++$seen{$_} == $n } map { @{$_} } @_ ] }
sub complement   { my %exclude;   for $c (@{shift()}) { $exclude{$c}++ };   [ grep { ! $exclude{$_} } @{$in{cat}} ] }

# printf STDERR "%6d CAT   statements\n", 0+keys(%cats);
# printf STDERR "%6d ROLE  statements\n", 0+keys(%role);
# printf STDERR "%6d SGL   statements\n", @sgl+0;
# printf STDERR "%6d REQ   statements\n", @req+0;
# printf STDERR "%6d LEFT  statements\n", $nleft;
# printf STDERR "%6d RIGHT statements\n", $nright;
# printf STDERR "%6d INITR statements\n", $ninitr;
# printf STDERR "%6d FINR  statements\n", $nfinr;
# printf STDERR "%6d INITF statements\n", $ninitf;
# printf STDERR "%6d FINF  statements\n", $nfinf;
# printf STDERR "%6d INITC statements\n", $ninitc;
# printf STDERR "%6d FINC  statements\n", $nfinc;
# printf STDERR "%6d LINK  statements\n", $nlink;
# printf STDERR "%6d CLASS statements\n", $nclass;
# printf STDERR "%6d FLAG  statements\n", $nflag;
# printf STDERR "%6d SET   statements\n", $nset;
# printf STDERR "%6d PASS  statements\n", $npass;


##################################################################################

# while(<INPUT>)
# {
#     $inputlineno++;
#     s/#.*//;
#     s/^\s+//;
#     s/\s+$//;
#     s/\s+/ /g;
#     if   (/^CAT ($cat_re)$/)         { register('cat',     {src=>$&, cat=>attr::parse($1)},                 $1); }
#     elsif(/^FLAG (\S+)$/)            { register('flag',    {src=>$&, flag=>$1},                             $1); }
#     elsif(/^ROLE (\S+)$/)            { register('role',    {src=>$&, role=>$1},                             $1); }
#     elsif(/^LEFT (\S+)$/)            { register('left',    {src=>$&, role=>$1},                              0); }
#     elsif(/^RIGHT (\S+)$/)           { register('right',   {src=>$&, role=>$1},                              0); }
#     elsif(/^SGL (\S+)$/)             { register('sgl',     {src=>$&, role=>$1},                              0); }
#     elsif(/^REQ (\S+) (\S+)$/)       { register('req',     {src=>$&, cat=>$1, role=>$2},                     0); }
#     elsif(/^AGR (\S+) (\S+)$/)       { register('agr',     {src=>$&, role=>$1, attr=>$2},                   $1); }
#     elsif(/^GOV (\S+) (\S+)$/)       { register('gov',     {src=>$&, role=>$1, cat=>$2, catexp=>attr::parse($2)},       $1); }
#     elsif(/^INIT ($role_re)$/)       { register('initr',   {src=>$&, role=>$1},                              0); }
#     elsif(/^FIN ($role_re)$/)        { register('finr',    {src=>$&, role=>$1},                              0); }
#     elsif(/^INIT ($av_re)$/)         { register('initf',   {src=>$&, flag=>$1},                              0); }
#     elsif(/^FIN ($av_re)$/)          { register('finf',    {src=>$&, flag=>$1},                              0); }
#     elsif(/^SET ($cat_re)\s+(\S+)$/) { register('set',     {src=>$&, cat=>$1, flag=>$2},                     0); }
#     elsif(/^PASS (\S+)\s+(\S+)$/)    { register('pass',    {src=>$&, role=>$1, flag=>$2},                    0); }
#     elsif(/^CONSTRE (\S+)\s+(\S+)$/) { register('constre', {src=>$&, role1=>$1, role2=>$2},                  0); }
#     elsif(/^CONSTRI (\S+)\s+(\S+)$/) { register('constri', {src=>$&, role1=>$1, role2=>$2},                  0); }

#     elsif(my ($hs,$hfs,$ds,$dfs,$r,$rprops) = /^LINK\s+($cats_re)((?:;$avlist_re)?)\s+($cats_re)((?:;$avlist_re)?)\s+($role_re)((?:$proplist_re)?)$/)
#                                        { register('link', {src=>$&, hs=>$hs, hfs=>$hfs, ds=>$ds, dfs=>$dfs, r=>$r, props=>$rprops},0) }
#     elsif(/^LONG\s+(\S+)((\s+<\S+)*)((\s+\S+>)*)$/)
#     {
# 	my $rel = $1;
# 	my $ups = $2;
# 	my $downs = $4;

# 	$ups =~ s/<//g;
# 	$ups =~ s/^\s+//;
# 	my @up = split(/\s+/,$ups) or ();

# 	$downs =~ s/>//g;
# 	$downs =~ s/^\s+//;
# 	my @down = split(/\s+/,$downs) or ();

# 	register('long', {src=>$&, rel=>$rel, up=>\@up, down=>\@down},0);

# 	print OUTPUT "LONG $rel " . join(",",@up) . "^" . join(",",@down) . "\n";
#     }
#     elsif(my ($cl,$cs) = /^CLASS\s+(\S+)\s*\=(.*)$/)
#     {
# 	$class{$1} = $classparser->classexpr($2);
#     }
#     elsif(/^$/)
#     {
# 	# pomijamy puste linie oraz komentarze
#     }
#     else
#     {
# 	print STDERR "Illegal format: $_\n";
#     }
# }



# sub is_cat  { shift =~ /$attr::cat_re/; }
# sub is_role { $role{shift}; }
# sub is_flag { $flag{shift}; }


# sub print_in
# {
#     my $data = shift();
#     printf "in@%04d ", $data->{line};
#     print $data->{src};
# }

# sub print_out
# {
#     printf "out@%08d ", $outline++;
#     print @_;
# }

# sub addlinks
# {
#     my ($l, $h,$hfs,$d,$dfs,$r,$rprops) = @_;

#     my @heads = extension($h);
#     my @deps = extension($d);

#     my @deps_gov;
#   DEP_GOV:
#     for my $dep (@deps)
#     {
# 	for my $gov (@govs)
# 	{
# 	    next DEP_GOV unless attr::match(@{$dep->{catexp}},@{$gov->{catexp}});
# 	}
# 	push @deps_gov, $dep;
#     }
    
#     for my $head (@heads)
#     {
#       DEP:
# 	for my $dep (@deps_gov)
# 	{
# 	    for my $agr (@agrs)
# 	    {
# 		next DEP unless attr::agree(@{$head->{catexp}},@{$dep->{catexp}},$agr->{attr});
# 	    }
# 	    print_outin("LINK $head->{cat}$hfs $dep->{cat}$dfs $r$rprops",$l, @agrs,@govs);
# 	}
#     }
# }




    # elsif(/^INIT\s+([[:upper:]]\S*)$/)
    # {
    # 	print OUTPUT "#$_\n";
    # 	my $cat = attr::parse $1;
    # 	for my $atomcat (keys %cats)
    # 	{
    # 	    if(attr::match @$cat, @{$cats{$atomcat}})
    # 	    {
    # 		print OUTPUT "INITC ".$atomcat."\n";
    # 		++$ninitc;
    # 	    }
    # 	}
    # }
    # elsif(/^FIN\s+([[:upper:]]\S*)$/)
    # {
    # 	print OUTPUT "#$_\n";
    # 	my $cat = attr::parse $1;
    # 	for my $atomcat (keys %cats)
    # 	{
    # 	    if(attr::match @$cat, @{$cats{$atomcat}})
    # 	    {
    # 		print OUTPUT "FINC ".$atomcat."\n";
    # 		++$nfinc;
    # 	    }
    # 	}
    # }
