- Timestamp:
- 12/17/14 12:13:11 (10 years ago)
- Branches:
- master
- Children:
- 854bece
- Parents:
- d484a32
- git-author:
- Tomasz Obrebski <obrebski@…> (12/17/14 12:10:45)
- git-committer:
- Tomasz Obrebski <obrebski@…> (12/17/14 12:13:11)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
src/tre/tre
r0a58b3f racbabee 13 13 [ '--format', '-F', GetoptLong::REQUIRED_ARGUMENT ], 14 14 [ '--info', '-I', GetoptLong::REQUIRED_ARGUMENT ], 15 [ '--dgpids', GetoptLong::NO_ARGUMENT ], 16 [ '--graph', GetoptLong::NO_ARGUMENT ], 17 [ '--uniq', '-u', GetoptLong::NO_ARGUMENT ], 18 [ '--utt', GetoptLong::NO_ARGUMENT ], 15 19 [ '--span', '-s', GetoptLong::REQUIRED_ARGUMENT ], 16 20 [ '--maxsize', GetoptLong::REQUIRED_ARGUMENT ], 17 21 [ '--forest', GetoptLong::NO_ARGUMENT ], 18 [ '--ground', GetoptLong::NO_ARGUMENT ],19 22 [ '--only-trees','-t', GetoptLong::NO_ARGUMENT ]) 20 23 21 $helptext= 22 "The program generates trees from the graph output by dgp. dgp must\n"+ 23 "must be run with '--info=ds' option.\n\n"+ 24 "Command: tre [options]\n\n"+ 25 "Options:\n"+ 26 "--help -h Print help (this text) and exit.\n"+ 27 "--debug -d Verbose output. For developers only.\n"+ 28 "--format=s -F s Output format. Recognized values:\n"+ 29 " a root + list of arcs\n"+ 30 " p parenthesized notation\n"+ 31 " h human readable indented tree format\n"+ 32 " Multiple values are allowed. (default p)\n"+ 33 "--info=s -I s Information printed. Recognized values:\n"+ 34 " n node identifier\n"+ 35 " f surface form\n"+ 36 " m morphological information\n"+ 37 " l arc labels\n"+ 38 "--only-trees -t Do not copy input. Print trees only.\n" 24 $helptext = <<END 25 The program generates trees from the graph output by dgp. dgp must be run 26 with '--info=ds' option. 27 28 Command: tre [options] 29 30 Options: 31 --help -h Print help (this text) and exit. 32 --debug -d Verbose output. For developers only. 33 --format=s -F s Output format. Recognized values: 34 a root + list of arcs 35 p parenthesized notation 36 h human readable indented format 37 c CONLL format 38 Multiple values are allowed. (default p) 39 --info=s -I s Information printed. Recognized values: 40 n node identifier 41 f surface form 42 m morphological information 43 l arc labels\ 44 --gphids Used gph node identifiers (default: linear) 45 --dgpids Used dgp node identifiers (default: linear) 46 --graph Do not generate trees, just print the graph. 47 --uniq -u Remove duplicate trees. 48 --utt UTT formatted output. 49 50 END 39 51 40 52 $DEBUG=false 41 53 $FORMAT='p' 42 54 $INFO='DEFAULT' 43 $ ONLYTREES=false55 $UTTOUTPUT=false 44 56 $START=nil 45 57 $END=nil 46 58 $FOREST=false 47 59 $MAXSIZE=nil 60 $GPHIDS=false 61 $DGPIDS=false 62 $GRAPH==false 48 63 49 64 opts.each do |opt, arg| … … 58 73 when '--info' 59 74 $INFO=arg 60 when '--only-trees' 61 $ONLYTREES=true 75 when '--gphids' 76 $GPHIDS=true 77 when '--dgpids' 78 $DGPIDS=true 79 when '--graph' 80 $GRAPH=true 81 when '--uniq' 82 $UNIQ=true 83 when '--utt' 84 $UTTOUTPUT=true 62 85 when '--forest' 63 86 $FOREST=true 64 when '--ground'65 $GROUND=true66 87 when '--maxsize' 67 88 $MAXSIZE=arg.to_i … … 76 97 case $FORMAT 77 98 when 'p','a' 78 $INFO=' nl'99 $INFO='fl' 79 100 when 'h' 80 101 $INFO='fmnl' … … 95 116 for line in input 96 117 seg=Seg.new(line) 97 print line unless $ONLYTREES ||seg.field(3) == 'EOS'118 print line if $UTTOUTPUT && seg.field(3) == 'EOS' 98 119 99 120 if dgp=seg['dgp'] … … 122 143 123 144 $pref = "#{seg[1]} #{seg[2]} SYN *" 124 125 145 parsegraph(nodes) 126 127 146 set_ord #(0...(nodes.length)).each{|i| set_distance_from_i i } 128 129 147 printgraph if $DEBUG 130 131 if $GROUND 132 printground 148 if $GRAPH 149 if $FORMAT =~ /c/ 150 printconll 151 else 152 printground 153 end 133 154 else 134 155 thetrees = $FOREST ? genforest : gentrees 135 136 output_trees thetrees 137 138 print line unless $ONLYTREES 139 140 $gphid=[] # POWTÓRZENIE 156 outputs = output_trees thetrees 157 outputs = outputs.sort.uniq if $UNIQ 158 print outputs.join 159 print line if $UTTOUTPUT 160 $gphid=[] 141 161 $form=[] 142 162 $lem=[] … … 154 174 155 175 def output_trees trees 176 177 outputs = [] 178 156 179 for t in trees 157 180 $count += 1 … … 160 183 t1=t 161 184 162 span = $FOREST ? " span:" + (ground_tree_min(t1).to_s + ","+ground_tree_max(t1).to_s)+";" : "" 185 # span = $FOREST ? " span:" + (ground_tree_min(t1).to_s + ","+ground_tree_max(t1).to_s)+";" : "" 186 # case $FORMAT 187 # when /a/ 188 # outputs << "#{$pref} tre:#{$count}#{span} #{arc_output(t1)}\n" 189 # when /p/ 190 # outputs << "#{$pref}#{span} tre:#{$count} par:#{par_output(t1)}\n" 191 # when /h/ 192 # outputs << "#\n# tree #{$count}\n# ------\n#{dgp_output(t1,0)}" 193 # when /c/ 194 # outputs << conll_output(t1,0) 195 # end 196 163 197 case $FORMAT 164 198 when /a/ 165 print "#{$pref} tre:#{$count}#{span} #{arcsinfo(t1[0],t1[1])}" 166 # print arcsinfo(t1[0],t1[1]) 167 print "\n" 199 outputs << "#{arc_output(t1)}\n" 168 200 when /p/ 169 print "#{$pref}#{span} tre:#{$count} par:" 170 printpar(t1[0],t1[1]) 171 print "\n" 201 outputs << "#{par_output(t1)}\n" 172 202 when /h/ 173 print "#\n# tree #{$count}\n# ------\n" 174 printtree_dgp(t1[0],t1[1],0) 175 end 176 end 177 end 178 203 outputs << human_output(t1,0) 204 when /c/ 205 outputs << conll_output(t1,0) 206 end 207 208 end 209 210 outputs 211 212 end 213 214 def id_output id 215 if $DGPIDS then id elsif $GPHIDS then $gphid[id] else $ord1[$gphid[id]] end 216 end 179 217 180 218 def nodeinfo(id) … … 186 224 end 187 225 if $INFO =~ /n/ 188 info += gphid.to_s226 info += id_output(id).to_s 189 227 info += '.' if $INFO =~ /[fm]/ 190 228 end … … 200 238 201 239 202 def arcsinfo(root,arcs) 240 def arc_output(tree) 241 root, arcs = tree 203 242 "head:#{nodeinfo(root)} links:" + arcs.map{|a| "(#{($INFO =~ /l/) ? a[2]+":" : ""}#{nodeinfo(a[0])}-#{nodeinfo(a[1])})"}.join("") 204 # for a in arcs205 # print ';'206 # print "#{a[2]}:" if $INFO =~ /l/207 # print nodeinfo(a[0])+'-'+nodeinfo(a[1])208 # end209 243 end 210 244 … … 221 255 end 222 256 223 def printtree_dgp(root,arcs,o) 257 def human_output(tree,o) 258 root, arcs = tree 259 output = '' 224 260 if o==0 225 print"%-16s" % "root: "226 end 227 print nodeinfo(root),"\n"261 output += "%-16s" % "root: " 262 end 263 output += nodeinfo(root) + "\n" 228 264 for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] } 229 print " "*(o+1) 230 print "%-16s" % (arc[2]+": ") 231 printtree_dgp(arc[1],arcs,o+1) 232 end 233 end 234 235 # old: 236 # def printpar(root,arcs) 237 # print nodeinfo(root) 238 # deps = arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] } 239 # unless deps == [] 240 # print '(' 241 # cont=false 242 # for arc in deps 243 # if cont then print ',' else cont=true end 244 # print arc[2],':' if $INFO =~ /l/ 245 # printpar(arc[1],arcs) 246 # end 247 # print ')' 248 # end 249 # end 250 251 def printpar(root,arcs) 252 265 output += " "*(o+1) 266 output += "%-16s" % (arc[2]+": ") 267 output += human_output([arc[1],arcs],o+1) 268 end 269 output 270 end 271 272 def conll_output(tree,o) 273 root,arcs = tree 274 nodes = ([root] + arcs.map{|a| a[1]}).sort{|a,b| $gphid[a] <=> $gphid[b]} 275 conll_lines = [] 276 for i in nodes 277 gphid = $gphid[i] 278 id = $ord1[gphid] 279 form = $form[gphid] 280 /^(?<lemma>.*),(?<cpostag>[^\/]*)(\/(?<feats>.+))?/ =~ $lem[gphid] 281 thearcs = arcs.select{|a| a[1]==i }.map{|a| [$ord1[$gphid[a[0]]],a[2]] } 282 thearcs = [[0,'root']] if thearcs.empty? 283 for a in thearcs 284 head,deprel = a 285 conll_lines << [id,form,lemma,cpostag,cpostag,feats,head,deprel,nil,nil].map{|s| s ? s.to_s : "_"}.join("\t") 286 end 287 end 288 conll_lines.join("\n") + "\n\n" 289 end 290 291 def par_output(tree) 292 root, arcs = tree 253 293 ldeps = arcs.select{|a| a[0]==root and $gphid[a[1]] < $gphid[root]}.sort{|a,b| $gphid[a[1]]<=>$gphid[b[1]] } 254 294 rdeps = arcs.select{|a| a[0]==root and $gphid[a[1]] > $gphid[root]}.sort{|a,b| $gphid[a[1]]<=>$gphid[b[1]] } 255 295 256 for arc in ldeps 257 print ' (' 258 print arc[2].upcase if $INFO =~ /l/ 259 printpar(arc[1],arcs) 260 print ')' 261 end 262 263 print ' ',nodeinfo(root) 264 265 for arc in rdeps 266 print ' (' 267 print arc[2].upcase if $INFO =~ /l/ 268 printpar(arc[1],arcs) 269 print ')' 270 end 296 output = '' 297 298 output_left = ldeps.map{|arc| ' (' + (($INFO =~ /l/) ? arc[2].upcase : '') + par_output([arc[1],arcs]) + ')'}.join 299 output_right = rdeps.map{|arc| ' (' + (($INFO =~ /l/) ? arc[2].upcase : '') + par_output([arc[1],arcs]) + ')'}.join 300 301 # for arc in ldeps 302 # output += ' (' 303 # output += arc[2].upcase if $INFO =~ /l/ 304 # output += par_output(arc[1],arcs) 305 # output += ')' 306 # end 307 308 # print ' ',nodeinfo(root) 309 310 # for arc in rdeps 311 # print ' (' 312 # print arc[2].upcase if $INFO =~ /l/ 313 # printpar(arc[1],arcs) 314 # print ')' 315 # end 316 317 output_left + ' ' + nodeinfo(root) + output_right 318 271 319 end 272 320 … … 467 515 end 468 516 517 def printconll 518 for i in 1...($form.length-1) 519 id = $ord1[i] 520 form = $form[i] 521 /^(?<lemma>.*),(?<cpostag>[^\/]*)(\/(?<feats>.+))?/ =~ $lem[i] 522 arcs = $arcs.select{|a| $ord1[$gphid[a[1]]] == $ord1[i]}.map{|a| [$ord1[$gphid[a[0]]],a[2]]}.sort.uniq 523 arcs = [[0,'root']] if arcs.empty? 524 for a in arcs 525 head,deprel = a 526 puts [id,form,lemma,cpostag,cpostag,feats,head,deprel,nil,nil].map{|s| s ? s.to_s : "_"}.join("\t") 527 end 528 end 529 puts 530 end 531 469 532 470 533 def set_to_s(s) "{#{s.join(',')}}" end
Note: See TracChangeset
for help on using the changeset viewer.