#!/usr/bin/perl

#package:	UAM Text Tools
#component:	dgc (dg compiler)
#version:	1.0
#author:	Tomasz Obrebski

use lib "/usr/local/lib/utt";
use lib "$ENV{'HOME'}/.local/lib/utt";

#use strict;
use Getopt::Long;
use Data::Dumper;
use attr;
use File::HomeDir;
use Parse::RecDescent;

$::RD_HINT=1;
# use List::MoreUtils;

my $systemconfigfile='/etc/utt/dgc.conf';
my $userconfigfile=0; #home()."/.utt/dgc.conf";

Getopt::Long::Configure('no_ignore_case_always');

my $help=0;
my $catfile=0;
my $dicfile=0;
my $gramfile=0;
my $outputfile=0;

#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
  if(open(CONFIG, $file)){
        while (<CONFIG>) {
                chomp; s/#.*//; s/^\s+//; s/\s+$//;
                next unless length;
                my ($name, $value) = split(/\s*=\s*/, $_, 2);
                if(($name eq "categories")or($name eq "c"))      { $catfile=$value; }
                elsif(($name eq "dictionary")or($name eq "d"))   { $dicfile=$value; }
                elsif(($name eq "grammar")or($name eq "g"))      { $gramfile=$value; }
                elsif(($name eq "outputfile")or($name eq "o"))   { $outputfile=$value; }
                elsif(($name eq "help")or($name eq "h"))         { $help=1; }

        }
        close CONFIG;
  }
}
#########################################################

GetOptions("help|h" => \$help,
	   "categories|c=s" => \$catfile,
	   "dictionary|d=s" => \$dicfile,
	   "grammar|g=s" => \$gramfile,
	   "outputfile|o=s" => \$outputfile);

my $homedir = $ENV{'HOME'};
$catfile =~ s/~/$homedir/;
$dicfile =~ s/~/$homedir/;
$gramfile =~ s/~/$homedir/;
$outputfile =~ s/~/$homedir/;


if($help)
{
    print <<'END'
Usage: dgc [OPTIONS]

Options:
   --categories -c filename	List of syntactic categories.
   --dictionary -d filename     Dictionary.
   --grammar -g filename	List of grammar rules.
   --outputfile -o filename	Output file name.
   --help -h			Help.
END
;
    exit 0;
}

die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;


our %in;      #gramatyka wejściowa
our %idx;     #indeks gramatyki wejściowej (niektóre stwierdzenia)
our %out;     #gramatyka wyjściowa
our %class;   #tablica klas

our $attr_re       = $attr::attr_re;
our $cat_re        = $attr::cat_re;
our $cats_re       = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;
our $class_re      = qr/(?:\@\w+)/;
our $av_re         = $attr::av_re;
our $avlist_re     = $attr::avlist_re;
our $role_re       = qr/(?:[[:lower:][:digit:]_]+)/;
our $prop_re       = qr/(?:[[:upper:]]+)/;
our $proplist_re   = qr/(?:(?:\&$prop_re)+)/;

my $inputlineno=0;

our $statementgrammar = q(

statement : statement1 ";" { $item[1] }

statement1: /cat/i     acat              { ['cat',     { cat=>$item{acat}, catexp=>attr::parse($item{acat}) },       $item{acat}] }
          | /flag/i    flag              { ['flag',    { flag=>$item{flag} },                                        $item{flag}] }
          | /role/i    role              { ['role',    { role=>$item{role} },                                        $item{role}] }
          | /left/i    role              { ['left',    { role=>$item{role} },                                                  0] }
          | /right/i   role              { ['right',   { role=>$item{role} },                                                  0] }
          | /sgl/i     role              { ['sgl',     { role=>$item{role} },                                                  0] }
          | /req/i     xcat role         { ['req',     { cats=>$item{xcat}, role=>$item{role} },                               0] }
          | /agr/i     role attr         { ['agr',     { role=>$item{role}, attr=>$item{attr} },                     $item{role}] }
          | /gov/i     role xcat         { ['gov',     { role=>$item{role}, cats=>$item{xcat} },                     $item{role}] }
          | /init/i    flagconstr        { ['initf',   { flag=>$item{flagconstr} },                                            0] }
          | /fin/i     flagconstr        { ['finf',    { flag=>$item{flagconstr} },                                            0] }
          | /init/i    role              { ['initr',   { role=>$item{role} },                                                  0] }
          | /fin/i     role              { ['finr',    { role=>$item{role} },                                                  0] }
          | /set/i     xcat flag         { ['set',     { cats=>$item{xcat}, flag=>$item{flag} },                               0] }
          | /pass/i    role flag         { ['pass',    { role=>$item{role}, flag=>$item{flag} },                               0] }
          | /constre/i role role         { ['constre', { role1=>$item[2], role2=>$item[3] },                                   0] }
          | /constri/i role role         { ['constri', { role1=>$item[2], role2=>$item[3] },                                   0] }

          | /link/i    xcat optflags(?) xcat optflags(?) role prop(s?)
                                         { ['link', { hcats=>$item[2], hflagconstr=>$item[3], 
                                                      dcats=>$item[4], dflagconstr=>$item[5],
                                                      role=>$item[6], props=>$item[7] },                                       0] }

          | /long/i role role(s? /,/) '^' role(s? /,/)
                                         { ['long', { rel=>$item[2], up=>$item[3], down=>$item[5] },                           0] }

          | /class/i classname '=' xcat  { ['class', { name=>$item{classname}, cats=>$item{xcat} },             $item{classname}] }

acat:       /$attr::cat_re/

attr:       /$attr::attr_re/

xcat:       classexpr

role:       /\w+/

flag:       /\w+/

optflags:   "//" flagconstr { $item[2] }

flagconstr: /\w+[+-]/

prop:       '&' /\w+/ { $item[2] }

classname:  /\$\w+[+-]/

classexpr  : classexpr1 '|' classexpr   { main::union($item[1],$item[3]) }
           | classexpr1 '~' classexpr   { main::intersection( $item[1], main::complement($item[3]) ) }
           | classexpr1

classexpr1 : classexpr2 '&' classexpr1  { main::intersection($item[1],$item[3]) }
           | classexpr2

classexpr2 : '~' classexpr2            { main::complement($item[2]) }
           | classexpr3

classexpr3 : classexpr4 '/' /$attr::avlist_re/  { main::intersection($item[1], [main::extension('*/' . $item[3])] ) }
           | classexpr4

classexpr4 : class
           | cat
           | '(' classexpr ')'          { $item[2] }

class :    classname                    { $main::class{$item[1]} or @{[]} }

cat :      /$main::cat_re/              { [main::extension($item[1])] }

);

our $statementparser = Parse::RecDescent->new($statementgrammar);

sub register
{
    my ($src, $statement, $data, $index) = @_ ;
    $data->{line} = $inputlineno;
    $data->{src} = $src;
    push @{$in{$statement}}, $data;
    push @{$idx{$statement}{$index}}, $data if($index);

    if ($statement eq 'class') { $class{ $data->{name} } = $data->{cats} }
}


if(!$outputfile)          { *OUTPUT = *STDOUT; }
elsif($outputfile eq "-") { *OUTPUT = *STDOUT; }
else                      { open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!"); }

if(!$gramfile)            { *INPUT = *STDIN; }
elsif($gramfile eq "-")   { *INPUT = *STDIN; }
else                      { open(INPUT, "cat $gramfile | m4 |") or die("Unable to open: $gramfile!"); }

# *INPUT = *STDIN; ############### TYMCZASOWO

loadcats($catfile) if $catfile;
extractcats($dicfile) if $dicfile;


# CZYTANIE GRAMATYKI DGC

while(<INPUT>)
{
    $inputlineno++;
    s/#.*//;
    s/^\s+//;
    s/\s+$//;
    s/\s+/ /g;
    next unless $_;
    my $result = $statementparser->statement("$_;");

    # print "#input line $inputlineno\n";
    # print Dumper($result);

    if($result) { register($_, @{$result}) } else { print STDERR "ERROR at line $inputlineno\n" }
}
    

# GENEROWANIE GRAMATYKI DGP

my $inline = 0;
my $outline = 0;


# print Dumper($idx{gov}->{subj});


for my $x (@{$in{cat}})    { print_outin("CAT $x->{cat}", $x); }

for my $x (@{$in{flag}})   { print_outin("FLAG $x->{flag}", $x); }

for my $x (@{$in{role}})   { print_outin("ROLE $x->{role}", $x); }

for my $x (@{$in{long}})    { print_outin("LONG $x->{rel} " . join(",",@{$x->{up}}) . "^" . join(",",@{$x->{down}}), $x) }

for my $x (@{$in{left}})   { print_outin("LEFT $x->{role}", $x)  if chk_role($x->{role}, $x) }

for my $x (@{$in{right}})  { print_outin("RIGHT $x->{role}", $x) if chk_role($x->{role}, $x) }

for my $x (@{$in{sgl}})    { print_outin("SGL $x->{role}", $x)   if chk_role($x->{role}, $x) }

for my $x (@{$in{req}})
{
    if( chk_role($x->{role}, $x) )
    {
	for my $atomcat (map{$_->{cat}} @{$x->{cats}})
	{
	    print_outin("REQ $atomcat $x->{role}", $x);
	}
    }
}

for my $x (@{$in{initr}}) { print_outin("INITR $x->{role}", $x)    if chk_role($x->{role}, $x) }

for my $x (@{$in{finr}}) { print_outin("FINR $x->{role}", $x)      if chk_role($x->{role}, $x) }

for my $x (@{$in{initf}}) { print_outin("INITF $x->{flag}", $x) } # SPRAWDZIĆ CZY FLAGA JEST ZADEKLAROWANA

for my $x (@{$in{finf}}) { print_outin("FINF $x->{flag}", $x); } # SPRAWDZIĆ CZY FLAGA JEST ZADEKLAROWANA

for my $x (@{$in{set}})
{
    for my $atomcat (map{$_->{cat}} @{$x->{cats}})
    {
	print_outin("SET $atomcat $x->{flag}", $x);
    }	
}

for my $x (@{$in{pass}})    { print_outin("PASS $x->{role} $x->{flag}", $x); }

for my $x (@{$in{constre}}) { print_outin("CONSTRE $x->{role1} $x->{role2}", $x) if chk_role($x->{role1}, $x) & chk_role($x->{role2}, $x) }

for my $x (@{$in{constri}}) { print_outin("CONSTRI $x->{role1} $x->{role2}", $x) if chk_role($x->{role1}, $x) & chk_role($x->{role2}, $x) }

for my $x (@{$in{link}})
{
    my @agrs = @{ $idx{agr}->{$x->{role} } or [] };
    my @govs = @{ $idx{gov}->{$x->{role} } or [] };

    my @deps = (@govs > 0) ? @{ intersection( $x->{dcats}, map { $_->{cats} } @govs ) } : @{ $x->{dcats} } ;

    for my $head ( @{ $x->{hcats} } )
    {
      DEP:
	for my $dep (@deps)
	{
	    for my $agr (@agrs)
	    {
		next DEP unless attr::agree(@{$head->{catexp}},@{$dep->{catexp}},$agr->{attr});
	    }
	    my $hflagconstr = @{$x->{hflagconstr}} ? "//@{$x->{hflagconstr}}" : "";
	    my $dflagconstr = @{$x->{dflagconstr}} ? "//@{$x->{dflagconstr}}" : "";
	    my $props = join('',map("\&$_", @{$x->{props}}));

	    print_outin("LINK $head->{cat}$hflagconstr $dep->{cat}$dflagconstr $x->{role}$props",$x, @agrs, @govs);
	}
    }
}


sub chk_role
{
    ($role, $statement_details) = @_;
    if($idx{role}{$role}) { 1; } else { print_error("undefined role", $statement_details); 0; }
}

sub print_outin
{
    my ($out,@in) = (shift, @_);
    print OUTPUT "$out\t\t#";
    printf OUTPUT " %04d@\"%s\"", $_->{line}, $_->{src} foreach @in;
    print OUTPUT "\n";
}

sub print_error
{
    my ($message,@in) = (shift,@_);
    print STDERR "ERROR: $message in statement ";
    printf STDERR " %04d@\"%s\"", $_->{line}, $_->{src} foreach @in;
    print STDERR "\n";
}


sub extractcats
{
    my $file = shift;
    open DICFILE, "$file";
    while(<DICFILE>)
    {
	while(/,([^[:space:];]+)/g)
	{
	    my $cat=$1;
	    next if !$cat; # || exists $cats{$cat};
#	    print OUTPUT "CAT $1\n";
	    register('cat',     {src=>"CAT $cat", cat=>"$cat", catexp=>attr::parse($cat)},                 $cat);
	}
    }
    close DICFILE;
}


sub loadcats
{
    my $file = shift;
    open CATFILE, "$file";
    while(<CATFILE>)
    {
	tr/ \t\n//d;
	next if !$_; # || exists $cats{$_};
#	print OUTPUT "CAT $_\n";
	register("CAT $_", 'cat',     {cat=>"$_", catexp=>attr::parse($_)},                 $_);
    }
    close CATFILE;
}

sub extension
{
    my $cat = shift;
    my $catexp = attr::parse($cat);
    grep { attr::match(@{$_->{catexp}},@{$catexp}) } @{$in{cat}};
}

sub uniq  { my %seen; grep { ! $seen{$_}++ } @_ }
sub union { [ uniq( map { @{$_} } @_ ) ] }
sub intersection { my $n=@_; my %seen; [ grep { ++$seen{$_} == $n } map { @{$_} } @_ ] }
sub complement   { my %exclude;   for $c (@{shift()}) { $exclude{$c}++ };   [ grep { ! $exclude{$_} } @{$in{cat}} ] }

