- Timestamp:
- 05/07/08 15:19:14 (17 years ago)
- Branches:
- master, help
- Children:
- a7b254c
- Parents:
- f5d3b20
- git-author:
- obrebski <obrebski@…> (05/07/08 15:19:14)
- git-committer:
- obrebski <obrebski@…> (05/07/08 15:19:14)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
app/src/mar/mar
r20b4e44 r7562131 13 13 use strict; 14 14 use Getopt::Long; 15 use File::HomeDir; 15 16 16 17 use attr; 18 19 20 my $LIB_DIR="/usr/local/lib/utt"; 21 my $systemconfigfile='/usr/local/etc/utt/mar.conf'; 22 my $userconfigfile=home()."/.utt/mar.conf"; 17 23 18 24 Getopt::Long::Configure('no_ignore_case_always'); … … 26 32 my $eos="seg(EOS)"; 27 33 my $explicit_space=0; 28 29 #this is our help function to cut the re to get another tag 30 #it takes only one argument which is our patern (after m4 processing) 31 #returns: the first root-level brace with content 32 sub cutRe 33 { 34 my $i = 0; 35 my $level = 0; 36 my $text = $_[0]; 37 my $temp; 38 for( $i =0; $i < (length $text);$i++) 39 { 40 $temp = substr($text, $i,1); 41 if( $temp eq "(") 42 {#we have an opening 43 $level++; 44 } 45 elsif ( $temp eq ")") 46 {#we close 47 $level--; 48 } 49 if ( $level == 0) 50 { 51 $temp = substr($text,0,$i+1); 52 last; 53 } 54 } 55 $temp; 56 } 57 58 #the same function as above althought it returns everything after the 59 #first root level brace 60 sub restRe 61 { 62 my $i = 0; 63 my $level = 0; 64 my $text = $_[0]; 65 my $temp; 66 for( $i =0; $i < (length $text);$i++) 67 { 68 $temp = substr($text, $i,1); 69 if( $temp eq "(") 70 {#we have an opening 71 $level++; 72 } 73 elsif ( $temp eq ")") 74 {#we close 75 $level--; 76 } 77 if ( $level == 0) 78 { #we cut everything in the begining 79 $temp = substr($text,$i+1); 80 last; 81 } 82 } 83 $temp; 84 } 34 my $morfield='lem'; 35 my $tags=0; 36 37 #read configuration files########################### 38 my $file; 39 foreach $file ($systemconfigfile, $userconfigfile){ 40 if(open(CONFIG, $file)){ 41 while (<CONFIG>) { 42 chomp; 43 s/#.*//; 44 s/^\s+//; 45 s/\s+$//; 46 next unless length; 47 my ($name, $value) = split(/\s*=\s*/, $_, 2); 48 if(($name eq "pattern")or($name eq "e")){ 49 $pattern=$value; 50 } 51 elsif($name eq "eos"){ 52 $eos=$value; 53 } 54 elsif($name eq "macros"){ 55 $macrofile=$value; 56 } 57 elsif($name eq "tags"){ 58 $tags=$value; 59 } 60 elsif($name eq "morph"){ 61 $morfield=$value; 62 } 63 elsif($name eq "command"){ 64 $command=1; 65 } 66 elsif($name eq "action"){ 67 $action=$value; 68 } 69 elsif($name eq "space"){ 70 $explicit_space=1; 71 } 72 elsif(($name eq "help")or($name eq "h")){ 73 $help=1; 74 } 75 76 } 77 close CONFIG; 78 } 79 } 80 ######################################################### 85 81 86 82 GetOptions("pattern|e=s" => \$pattern, … … 93 89 "space|s" => \$explicit_space 94 90 ); 91 92 93 95 94 96 95 if($help) … … 141 140 die("$0: no pattern given. Run with -h to get help.\n") unless $pattern || $action !~ /g/; 142 141 143 die("$0: macro file not found") unless -e "terms.m4" and $macrofile="terms.m4"; 142 die("$0: macro file not found") unless 143 $macrofile or 144 -e "$LIB_DIR/terms.m4" and $macrofile="$LIB_DIR/terms.m4"; 144 145 145 146 my $preproc = ($action =~ /p/) ? ' fla | ' : ''; 146 147 147 148 my $postproc = ($action =~ /P/) ? ' | unfla ' : ''; 149 150 151 #this is our help function to cut the re to get another tag 152 #it takes only one argument which is our patern (after m4 processing) 153 #returns: the first root-level brace with content 154 sub cutRe 155 { 156 my $i = 0; 157 my $level = 0; 158 my $text = $_[0]; 159 my $temp; 160 for( $i =0; $i < (length $text);$i++) 161 { 162 $temp = substr($text, $i,1); 163 if( $temp eq "(") 164 {#we have an opening 165 $level++; 166 } 167 elsif ( $temp eq ")") 168 {#we close 169 $level--; 170 } 171 if ( $level == 0) 172 { 173 $temp = substr($text,0,$i+1); 174 last; 175 } 176 } 177 $temp; 178 } 179 180 #the same function as above althought it returns everything after the 181 #first root level brace 182 sub restRe 183 { 184 my $i = 0; 185 my $level = 0; 186 my $text = $_[0]; 187 my $temp; 188 for( $i =0; $i < (length $text);$i++) 189 { 190 $temp = substr($text, $i,1); 191 if( $temp eq "(") 192 {#we have an opening 193 $level++; 194 } 195 elsif ( $temp eq ")") 196 {#we close 197 $level--; 198 } 199 if ( $level == 0) 200 { #we cut everything in the begining 201 $temp = substr($text,$i+1); 202 last; 203 } 204 } 205 $temp; 206 } 207 148 208 149 209 #here we are preparing re for extended matching … … 186 246 $patternmod =~ s/(\{\d*),(\d*\})/\1\\`\\`,''\2/g; 187 247 #print "After m4:".$re."\n"; 188 my $re = `echo \"$patternmod\" | m4 --define=ENDOFSEGMENT='[[:cntrl:]]' $macrofile - 2>/dev/null`; 248 249 my $re = `echo \"$patternmod\" | m4 --define=ENDOFSEGMENT='[[:cntrl:]]' --define=MORFIELD=$morfield $macrofile - 2>/dev/null`; 189 250 190 251 die("Incorrect pattern (m4).") if $? >> 8; … … 195 256 # <> expansion 196 257 197 $re =~ s/<([^>]+)>/`echo $1 | .\/terms\.tag2re`/ge;258 $re =~ s/<([^>]+)>/`echo $1 | $tags.tag2re`/ge; 198 259 199 260 # Perl-like special sequences
Note: See TracChangeset
for help on using the changeset viewer.