Index: src/tre/tre
===================================================================
--- src/tre/tre	(revision 0a58b3f83413607e1ef21a807e29a3236a8dd615)
+++ src/tre/tre	(revision acbabee74272f626d34fbc8e42126d06c76c7903)
@@ -13,37 +13,52 @@
 [ '--format',   '-F',   GetoptLong::REQUIRED_ARGUMENT ],
 [ '--info',     '-I',   GetoptLong::REQUIRED_ARGUMENT ],
+[ '--dgpids',           GetoptLong::NO_ARGUMENT ],
+[ '--graph',            GetoptLong::NO_ARGUMENT ],
+[ '--uniq',     '-u',   GetoptLong::NO_ARGUMENT ],
+[ '--utt',              GetoptLong::NO_ARGUMENT ],
 [ '--span',     '-s',   GetoptLong::REQUIRED_ARGUMENT ],
 [ '--maxsize',          GetoptLong::REQUIRED_ARGUMENT ],
 [ '--forest',           GetoptLong::NO_ARGUMENT ],
-[ '--ground',           GetoptLong::NO_ARGUMENT ],
 [ '--only-trees','-t',  GetoptLong::NO_ARGUMENT ])
 
-$helptext=
-"The program generates trees from the graph output by dgp. dgp must\n"+
-"must be run with '--info=ds' option.\n\n"+
-"Command:       tre [options]\n\n"+
-"Options:\n"+
-"--help         -h      Print help (this text) and exit.\n"+
-"--debug        -d      Verbose output. For developers only.\n"+
-"--format=s     -F s    Output format. Recognized values:\n"+
-"                               a       root + list of arcs\n"+
-"                               p       parenthesized notation\n"+
-"                               h       human readable indented tree format\n"+
-"                       Multiple values are allowed. (default p)\n"+
-"--info=s       -I s    Information printed. Recognized values:\n"+
-"                               n       node identifier\n"+
-"                               f       surface form\n"+
-"                               m       morphological information\n"+
-"                               l       arc labels\n"+
-"--only-trees   -t      Do not copy input. Print trees only.\n"
+$helptext = <<END
+The program generates trees from the graph output by dgp. dgp must be run 
+with '--info=ds' option.
+
+Command:       tre [options]
+
+Options:
+--help         -h      Print help (this text) and exit.
+--debug        -d      Verbose output. For developers only.
+--format=s     -F s    Output format. Recognized values:
+                               a       root + list of arcs
+                               p       parenthesized notation
+                               h       human readable indented format
+                               c       CONLL format
+                       Multiple values are allowed. (default p)
+--info=s       -I s    Information printed. Recognized values:
+                               n       node identifier
+                               f       surface form
+                               m       morphological information
+                               l       arc labels\
+--gphids               Used gph node identifiers (default: linear)
+--dgpids               Used dgp node identifiers (default: linear)
+--graph                Do not generate trees, just print the graph.
+--uniq         -u      Remove duplicate trees.
+--utt                  UTT formatted output.
+
+END
 
 $DEBUG=false
 $FORMAT='p'
 $INFO='DEFAULT'
-$ONLYTREES=false
+$UTTOUTPUT=false
 $START=nil
 $END=nil
 $FOREST=false
 $MAXSIZE=nil
+$GPHIDS=false
+$DGPIDS=false
+$GRAPH==false
 
 opts.each do |opt, arg|
@@ -58,10 +73,16 @@
   when '--info'
     $INFO=arg
-  when '--only-trees'
-    $ONLYTREES=true
+  when '--gphids'
+    $GPHIDS=true
+  when '--dgpids'
+    $DGPIDS=true
+  when '--graph'
+    $GRAPH=true
+  when '--uniq'
+    $UNIQ=true
+  when '--utt'
+    $UTTOUTPUT=true
   when '--forest'
     $FOREST=true
-  when '--ground'
-    $GROUND=true
   when '--maxsize'
     $MAXSIZE=arg.to_i
@@ -76,5 +97,5 @@
   case $FORMAT
     when 'p','a'
-    $INFO='nl'
+    $INFO='fl'
     when 'h'
     $INFO='fmnl'
@@ -95,5 +116,5 @@
   for line in input
     seg=Seg.new(line)
-    print line unless $ONLYTREES || seg.field(3) == 'EOS'
+    print line if $UTTOUTPUT && seg.field(3) == 'EOS'
     
     if dgp=seg['dgp']
@@ -122,21 +143,20 @@
 
         $pref = "#{seg[1]} #{seg[2]} SYN *"
-
         parsegraph(nodes)
-
         set_ord #(0...(nodes.length)).each{|i| set_distance_from_i i }
-
         printgraph if $DEBUG
-
-        if $GROUND
-          printground
+        if $GRAPH
+          if $FORMAT =~ /c/
+            printconll
+          else
+            printground
+          end
         else
           thetrees = $FOREST ? genforest : gentrees
-          
-          output_trees thetrees
-          
-          print line unless $ONLYTREES
-          
-          $gphid=[]   # POWTÓRZENIE
+          outputs = output_trees thetrees
+          outputs = outputs.sort.uniq if $UNIQ
+          print outputs.join
+          print line if $UTTOUTPUT
+          $gphid=[]
           $form=[]
           $lem=[] 
@@ -154,4 +174,7 @@
 
 def output_trees trees
+  
+  outputs = []
+  
   for t in trees
     $count += 1
@@ -160,21 +183,36 @@
     t1=t
 
-    span = $FOREST ? " span:" + (ground_tree_min(t1).to_s + ","+ground_tree_max(t1).to_s)+";" : ""
+    # span = $FOREST ? " span:" + (ground_tree_min(t1).to_s + ","+ground_tree_max(t1).to_s)+";" : ""
+    # case $FORMAT
+    # when /a/
+    #   outputs << "#{$pref} tre:#{$count}#{span} #{arc_output(t1)}\n"
+    # when /p/
+    #   outputs << "#{$pref}#{span} tre:#{$count} par:#{par_output(t1)}\n"
+    # when /h/
+    #   outputs << "#\n# tree #{$count}\n# ------\n#{dgp_output(t1,0)}"
+    # when /c/
+    #   outputs << conll_output(t1,0)
+    # end
+
     case $FORMAT
     when /a/
-      print "#{$pref} tre:#{$count}#{span} #{arcsinfo(t1[0],t1[1])}"
-#       print arcsinfo(t1[0],t1[1])
-      print "\n"
+      outputs << "#{arc_output(t1)}\n"
     when /p/
-      print "#{$pref}#{span} tre:#{$count} par:"
-      printpar(t1[0],t1[1])
-      print "\n"
+      outputs << "#{par_output(t1)}\n"
     when /h/
-      print "#\n# tree #{$count}\n# ------\n"
-      printtree_dgp(t1[0],t1[1],0)
-    end
-  end
-end
-
+      outputs << human_output(t1,0)
+    when /c/
+      outputs << conll_output(t1,0)
+    end
+
+  end
+
+  outputs
+
+end
+
+def id_output id
+  if $DGPIDS then id elsif $GPHIDS then $gphid[id] else $ord1[$gphid[id]] end
+end
 
 def nodeinfo(id)
@@ -186,5 +224,5 @@
   end
   if $INFO =~ /n/
-    info += gphid.to_s                           
+    info += id_output(id).to_s                           
     info += '.' if $INFO =~ /[fm]/
   end
@@ -200,11 +238,7 @@
 
 
-def arcsinfo(root,arcs)
+def arc_output(tree)
+  root, arcs = tree
   "head:#{nodeinfo(root)} links:" + arcs.map{|a| "(#{($INFO =~ /l/) ? a[2]+":" : ""}#{nodeinfo(a[0])}-#{nodeinfo(a[1])})"}.join("")
-#   for a in arcs
-#     print ';'
-#     print "#{a[2]}:" if $INFO =~ /l/
-#       print nodeinfo(a[0])+'-'+nodeinfo(a[1])
-#   end
 end
 
@@ -221,52 +255,66 @@
 end
 
-def printtree_dgp(root,arcs,o)
+def human_output(tree,o)
+  root, arcs = tree
+  output = ''
   if o==0
-        print "%-16s" % "root: "
-  end
-  print nodeinfo(root),"\n"
+        output += "%-16s" % "root: "
+  end
+  output += nodeinfo(root) + "\n"
   for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
-    print "   "*(o+1)
-    print "%-16s" % (arc[2]+": ")
-    printtree_dgp(arc[1],arcs,o+1)
-  end
-end
-
-# old:
-# def printpar(root,arcs)
-#   print nodeinfo(root)
-#   deps = arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
-#   unless deps == []
-#     print '('
-#     cont=false
-#     for arc in deps
-#       if cont then print ',' else cont=true end
-#       print arc[2],':' if $INFO =~ /l/
-#       printpar(arc[1],arcs)
-#     end
-#     print ')'
-#   end
-# end
-
-def printpar(root,arcs)
-  
+    output += "   "*(o+1)
+    output += "%-16s" % (arc[2]+": ")
+    output += human_output([arc[1],arcs],o+1)
+  end
+  output
+end
+
+def conll_output(tree,o)
+  root,arcs = tree
+  nodes = ([root] + arcs.map{|a| a[1]}).sort{|a,b| $gphid[a] <=> $gphid[b]}
+  conll_lines = []
+  for i in nodes
+    gphid = $gphid[i]
+    id = $ord1[gphid]
+    form = $form[gphid]
+    /^(?<lemma>.*),(?<cpostag>[^\/]*)(\/(?<feats>.+))?/ =~ $lem[gphid]
+    thearcs = arcs.select{|a| a[1]==i }.map{|a| [$ord1[$gphid[a[0]]],a[2]] } 
+    thearcs = [[0,'root']] if thearcs.empty?
+    for a in thearcs
+      head,deprel = a
+      conll_lines << [id,form,lemma,cpostag,cpostag,feats,head,deprel,nil,nil].map{|s| s ? s.to_s : "_"}.join("\t")
+    end
+  end
+  conll_lines.join("\n") + "\n\n"
+end
+
+def par_output(tree)
+  root, arcs = tree
   ldeps = arcs.select{|a| a[0]==root and $gphid[a[1]] < $gphid[root]}.sort{|a,b| $gphid[a[1]]<=>$gphid[b[1]] }
   rdeps = arcs.select{|a| a[0]==root and $gphid[a[1]] > $gphid[root]}.sort{|a,b| $gphid[a[1]]<=>$gphid[b[1]] }
 
-  for arc in ldeps
-    print ' ('
-    print arc[2].upcase if $INFO =~ /l/
-    printpar(arc[1],arcs)
-    print ')'
-  end
-
-  print ' ',nodeinfo(root)
-
-  for arc in rdeps
-    print ' ('
-    print arc[2].upcase if $INFO =~ /l/
-    printpar(arc[1],arcs)
-    print ')'
-  end
+  output = ''
+
+  output_left  = ldeps.map{|arc| ' (' + (($INFO =~ /l/) ? arc[2].upcase : '') + par_output([arc[1],arcs]) + ')'}.join
+  output_right = rdeps.map{|arc| ' (' + (($INFO =~ /l/) ? arc[2].upcase : '') + par_output([arc[1],arcs]) + ')'}.join
+
+  # for arc in ldeps
+  #   output += ' ('
+  #   output += arc[2].upcase if $INFO =~ /l/
+  #   output += par_output(arc[1],arcs)
+  #   output += ')'
+  # end
+
+  # print ' ',nodeinfo(root)
+
+  # for arc in rdeps
+  #   print ' ('
+  #   print arc[2].upcase if $INFO =~ /l/
+  #   printpar(arc[1],arcs)
+  #   print ')'
+  # end
+
+  output_left + ' ' + nodeinfo(root) + output_right
+
 end
 
@@ -467,4 +515,19 @@
 end
 
+def printconll
+  for i in 1...($form.length-1)
+    id = $ord1[i]
+    form = $form[i]
+    /^(?<lemma>.*),(?<cpostag>[^\/]*)(\/(?<feats>.+))?/ =~ $lem[i]
+    arcs = $arcs.select{|a| $ord1[$gphid[a[1]]] == $ord1[i]}.map{|a| [$ord1[$gphid[a[0]]],a[2]]}.sort.uniq
+    arcs = [[0,'root']] if arcs.empty?
+    for a in arcs
+      head,deprel = a
+      puts [id,form,lemma,cpostag,cpostag,feats,head,deprel,nil,nil].map{|s| s ? s.to_s : "_"}.join("\t")
+    end
+  end
+  puts
+end
+
 
 def set_to_s(s) "{#{s.join(',')}}" end
