#!/usr/bin/perl #package: UAM Text Tools #component: ser (pattern search tool) #version: 1.0 #author: Tomasz Obrebski use strict; use Getopt::Long; use File::Temp; use File::HomeDir; my $LIB_DIR="/usr/local/lib/utt"; my $systemconfigfile='/usr/local/etc/utt/ser.conf'; my $userconfigfile=home()."/.utt/ser.conf"; Getopt::Long::Configure('no_ignore_case_always'); my $help=0; my $pattern=0; my $only_matching=0; my $no_markers=0; my $macros=0; my $flextemplate=0; my $flex=0; my $morfield='lem'; my $tags=0; #read configuration files########################### my $file; foreach $file ($systemconfigfile, $userconfigfile){ if(open(CONFIG, $file)){ while () { chomp; s/#.*//; s/^\s+//; s/\s+$//; next unless length; my ($name, $value) = split(/\s*=\s*/, $_, 2); if(($name eq "pattern")or($name eq "e")){ $pattern=$value; } elsif($name eq "morph"){ $morfield=$value; } elsif(($name eq "only-matching")or($name eq "m")){ $only_matching=1; } elsif(($name eq "no-markers")or($name eq "M")){ $no_markers=1; } elsif($name eq "macros"){ $macros=$value; } elsif($name eq "flex-template"){ $flextemplate=$value; } elsif($name eq "tags"){ $tags=$value; } elsif($name eq "flex"){ $flex=1; } elsif(($name eq "help")or($name eq "h")){ $help=1; } } close CONFIG; } } ######################################################### GetOptions("pattern|e=s" => \$pattern, "morph=s" => \$morfield, "only-matching|m" => \$only_matching, "no-markers|M" => \$no_markers, "macros=s" => \$macros, "flex-template=s" => \$flextemplate, "tags=s" => \$tags, "flex" => \$flex, "help|h" => \$help); if($help) { print <<'END' Usage: ser [OPTIONS] [file ..] Options: --help -h Help. --pattern=PATTERN -e PATTERN Search pattern. --morph=STRING Field containing morphological information (default 'lem'). --macros=FILE Read macrodefinitions from FILE. --flex-template=FILE Read flex code template from FILE. --tags=STRING Morphosyntactic tag format. --only-matching -m Print only fragments matching PATTERN. --no-markers -M Do not print BOM and EOM markers [TODO]. --flex Print only the generated flex code and exit. END ; exit 0; } die("$0: no pattern given.\n") unless $pattern; die("$0: flex template file not found") unless $flextemplate or -e "$LIB_DIR/ser.l.template" and $flextemplate="$LIB_DIR/ser.l.template"; die("$0: macro file not found") unless $macros or -e "$LIB_DIR/terms.m4" and $macros="$LIB_DIR/terms.m4"; die("$0: $tags.tag2re program not found") unless 1; #JAK NAPISAC WARUNEK??? die("$0: undefined tagset format (tags option missing)") unless $tags; #$pattern =~ s/cat\(([^)]+)\)/'cat('.pre($1).')'/ge; # quoting escaped commas /NIE DZIA£A/ $pattern =~ s/\\,/\\`\\`\\,''/g; # protecting backslash $pattern =~ s/\\/\\\\\\/g; # discarding spaces $pattern =~ s/\s+/\\`'/g; #` my $flexpattern = `echo \"$pattern\" | m4 --define=ENDOFSEGMENT=\\\\n --define=MORFIELD=$morfield $macros - 2>/dev/null`; die("Incorrect pattern (m4).") if $? >> 8; chomp $flexpattern; # <> expansion $flexpattern =~ s/<([^>]+)>/`echo $1 | $tags.tag2re`/ge; # restricting the value of the . special symbol $flexpattern =~ s/\./[^ \\t\\n\\r\\f]/g; # perl-like shortcuts for character classes # perl exact $flexpattern =~ s/\\s/[ \\t]/g; $flexpattern =~ s/\\S/[^ \\t\\n\\r\\f]/g; $flexpattern =~ s/\\d/[0-9]/g; $flexpattern =~ s/\\D/[^0-9 \\t\\n\\r\\f]/g; $flexpattern =~ s/\\w/[a-z±æê³ñ󶼿A-Z¡ÆÊ£ÑÓ¦¬¯0-9_]/g; $flexpattern =~ s/\\W/[^a-z±æê³ñ󶼿A-Z¡ÆÊ£ÑÓ¦¬¯0-9_ \\t\\n\\r\\f]/g; # extensions $flexpattern =~ s/\\l/[a-z±æê³ñ󶼿]/g; #lowercase letter $flexpattern =~ s/\\L/[A-Z¡ÆÊ£ÑÓ¦¬¯]/g; #upercase letter # protecting slash $flexpattern =~ s/\//\\\//g; my $defaultaction = ($only_matching) ? '' : 'ECHO'; # docelowo posrednie pliki powinny byc w jakims tempie !!! (undef, my $tmpfile_l) = File::Temp::tempfile(SUFFIX=>'.l'); (undef, my $tmpfile_c) = File::Temp::tempfile(SUFFIX=>'.c'); (undef, my $tmpfile_x) = File::Temp::tempfile(); # w tych `` nie dziala #`m4 "--define=PATTERN=$flexpattern" "--define=DEFAULTACTION=$defaultaction" $flextemplate > $tmpfile_l`; system "m4 \"--define=PATTERN=$flexpattern\" \"--define=DEFAULTACTION=$defaultaction\" $flextemplate > $tmpfile_l"; if($flex) { # w tych `` nie dziala system "cat $tmpfile_l"; # if(open(FLEX, $tmpfile_l)) { # while() { # print @_; # } # close FLEX; # } # else { # print "Unable to open file $tmpfile_l\n"; # } exit(0); } `flex -o$tmpfile_c $tmpfile_l`; `cc -O3 -o $tmpfile_x $tmpfile_c -lfl`; #`$tmpfile_x`; system "$tmpfile_x"; unlink $tmpfile_l; unlink $tmpfile_c; unlink $tmpfile_x;