#!/usr/bin/perl

#package:        UAM Text Tools
#component:      ser (pattern search tool)
#version:	 1.0
#author:         Tomasz Obrebski

use strict;
use Getopt::Long;
use File::Temp;
use File::HomeDir;

my $LIB_DIR="/usr/local/lib/utt";
my $systemconfigfile='/etc/utt/ser.conf';
my $userconfigfile=home()."/.utt/ser.conf";

Getopt::Long::Configure('no_ignore_case_always');

my $help=0;
my $pattern=0;
my $only_matching=0;
my $no_markers=0;
my $macros=0;
my $flextemplate=0;
my $flex=0;
my $morfield='lem';
my $tags=0;

#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
  if(open(CONFIG, $file)){
  	while (<CONFIG>) {
  		chomp;                  
      		s/#.*//;                
	      	s/^\s+//;               
      		s/\s+$//;               
    		next unless length;     
    		my ($name, $value) = split(/\s*=\s*/, $_, 2);
    		if(($name eq "pattern")or($name eq "e")){
			$pattern=$value;
    		}
    		elsif($name eq "morph"){
			$morfield=$value;
    		}
    		elsif(($name eq "only-matching")or($name eq "m")){
			$only_matching=1;
    		}
    		elsif(($name eq "no-markers")or($name eq "M")){
			$no_markers=1;
    		}
    		elsif($name eq "macros"){
			$macros=$value;
    		}
    		elsif($name eq "flex-template"){
			$flextemplate=$value;
    		}
    		elsif($name eq "tags"){
    			$tags=$value;
    		}
    		elsif($name eq "flex"){
			$flex=1;
    		}
    		elsif(($name eq "help")or($name eq "h")){
			$help=1;
    		}
    	
	} 
  	close CONFIG;
  }
}
#########################################################

GetOptions("pattern|e=s" => \$pattern,
           "morph=s" => \$morfield,
	   "only-matching|m" => \$only_matching,
           "no-markers|M" => \$no_markers,
	   "macros=s" => \$macros,
	   "flex-template=s" => \$flextemplate,
	   "tags=s" => \$tags,
	   "flex" => \$flex,
	   "help|h" => \$help);

if($help)
{
    print <<'END'
Usage: ser [OPTIONS] [file ..]

Options:
   --help -h                      Help.
   --pattern=PATTERN -e	PATTERN   Search pattern.
   --morph=STRING                 Field containing morphological information (default 'lem').
   --macros=FILE                  Read macrodefinitions from FILE.
   --flex-template=FILE           Read flex code template from FILE.
   --tags=STRING                  Morphosyntactic tag format.
   --only-matching -m		  Print only fragments matching PATTERN.
   --no-markers -M                Do not print BOM and EOM markers [TODO].
   --flex                         Print only the generated flex code and exit.
END
;
    exit 0;
}


die("$0: no pattern given.\n") unless $pattern;

die("$0: flex template file not found") unless
    $flextemplate or
    -e "$LIB_DIR/ser.l.template" and $flextemplate="$LIB_DIR/ser.l.template";

die("$0: macro file not found") unless
    $macros or
    -e "$LIB_DIR/terms.m4" and $macros="$LIB_DIR/terms.m4";

die("$0: $tags.tag2re program not found") unless
    1; #JAK NAPISAC WARUNEK???

die("$0: undefined tagset format (tags option missing)") unless
    $tags;


#$pattern =~ s/cat\(([^)]+)\)/'cat('.pre($1).')'/ge;
# quoting escaped commas /NIE DZIA£A/
$pattern =~ s/\\,/\\`\\`\\,''/g;

# protecting backslash
$pattern =~ s/\\/\\\\\\/g;

# discarding spaces
$pattern =~ s/\s+/\\`'/g; #` 


my $flexpattern = `echo \"$pattern\" | m4 --define=ENDOFSEGMENT=\\\\n --define=MORFIELD=$morfield $macros - 2>/dev/null`;

die("Incorrect pattern (m4).") if $? >> 8;


chomp $flexpattern;

# <> expansion
$flexpattern =~ s/<([^>]+)>/`echo $1 | $tags.tag2re`/ge;

# restricting the value of the . special symbol
$flexpattern =~ s/\./[^ \\t\\n\\r\\f]/g;

# perl-like shortcuts for character classes
# perl exact
$flexpattern =~ s/\\s/[ \\t]/g;
$flexpattern =~ s/\\S/[^ \\t\\n\\r\\f]/g;
$flexpattern =~ s/\\d/[0-9]/g;
$flexpattern =~ s/\\D/[^0-9 \\t\\n\\r\\f]/g;
$flexpattern =~ s/\\w/[a-z±æê³ñó¶¼¿A-Z¡ÆÊ£ÑÓ¦¬¯0-9_]/g;
$flexpattern =~ s/\\W/[^a-z±æê³ñó¶¼¿A-Z¡ÆÊ£ÑÓ¦¬¯0-9_ \\t\\n\\r\\f]/g;
# extensions
$flexpattern =~ s/\\l/[a-z±æê³ñó¶¼¿]/g; #lowercase letter
$flexpattern =~ s/\\L/[A-Z¡ÆÊ£ÑÓ¦¬¯]/g; #upercase letter

# protecting slash
$flexpattern =~ s/\//\\\//g;

my $defaultaction = ($only_matching) ? '' : 'ECHO';

# docelowo posrednie pliki powinny byc w jakims tempie !!!

(undef, my $tmpfile_l) = File::Temp::tempfile(SUFFIX=>'.l');
(undef, my $tmpfile_c) = File::Temp::tempfile(SUFFIX=>'.c');
(undef, my $tmpfile_x) = File::Temp::tempfile();

# w tych `` nie dziala
#`m4 "--define=PATTERN=$flexpattern" "--define=DEFAULTACTION=$defaultaction" $flextemplate > $tmpfile_l`;

system "m4 \"--define=PATTERN=$flexpattern\" \"--define=DEFAULTACTION=$defaultaction\" $flextemplate > $tmpfile_l";

if($flex)
{
    # w tych `` nie dziala
    system "cat $tmpfile_l";
# 	if(open(FLEX, $tmpfile_l)) {
# 		while(<FLEX>) {
# 			print @_;
# 		}
# 		close FLEX;
# 	}
# 	else {
# 		print "Unable to open file $tmpfile_l\n";
# 	}
    exit(0);
}

`flex -o$tmpfile_c $tmpfile_l`;
`cc -O3 -o $tmpfile_x $tmpfile_c -lfl`;
#`$tmpfile_x`;

system "$tmpfile_x";

unlink $tmpfile_l;
unlink $tmpfile_c;
unlink $tmpfile_x;
