#!/usr/bin/ruby -I /usr/local/lib/utt -I $HOME/.local/lib/utt

$: << "#{ENV['HOME']}/.local/lib/utt"
$: << "/usr/local/lib/utt"

require 'getoptlong'
require 'seg.rb'

opts = GetoptLong.new(
[ '--help',     '-h',   GetoptLong::NO_ARGUMENT ],
[ '--debug',    '-d',   GetoptLong::NO_ARGUMENT ],
[ '--format',   '-F',   GetoptLong::REQUIRED_ARGUMENT ],
[ '--info',     '-I',   GetoptLong::REQUIRED_ARGUMENT ],
[ '--only-trees','-t',  GetoptLong::NO_ARGUMENT ])

$helptext=
"The program generates trees from the graph output by dgp. dgp must\n"+
"must be run with '-i ds' option.\n\n"+
"Command:       tre [options]\n\n"+
"Options:\n"+
"--help         -h      Print help (this text) and exit.\n"+
"--debug        -d      Verbose output. For developers only.\n"+
"--format=s     -F s    Output format. Recognized values:\n"+
"                               a       root + list of arcs\n"+
"                               p       parenthesized notation\n"+
"                               h       human readable indented tree format\n"+
"                       Multiple values are allowed. (default p)\n"+
"--info=s       -I s    Information printed. Recognized values:\n"+
"                               n       node identifier\n"+
"                               f       surface form\n"+
"                               m       morphological information\n"+
"                               l       arc labels\n"+
"--only-trees   -t      Do not copy input. Print trees only.\n"

$DEBUG=false
$FORMAT='p'
$INFO='DEFAULT'
$ONLYTREES=false

opts.each do |opt, arg|
  case opt
  when '--help'
    print $helptext
    exit 0
  when '--debug'
    $DEBUG=true
  when '--format'
    $FORMAT=arg
  when '--info'
    $INFO=arg
  when '--only-trees'
    $ONLYTREES=true
  else
    print "Unknown option #{opt}. Ignored.\n"
  end
end

if $INFO=='DEFAULT'
  case $FORMAT
    when 'p','a'
    $INFO='nl'
    when 'h'
    $INFO='fmnl'
  end
end

$dgpsep=';'

def tre(input)
  $gphid=[]
  $form=[]
  $lem=[]
  nodes=[]
  count=0
  seg=Seg.new
  for line in input
    print line unless $ONLYTREES
    seg.set(line)
    if dgp=seg['dgp']
      if nodes==[] && seg[3]!='BOS'
        print "A sentence must start with BOS segment. Aborting.\n"
        return
      end

      id=dgp[/^\d+/].to_i

      if gph=seg['gph']
        $gphid[id]=gph[/^\d+/].to_i
      else
        print "No gph field. Aborting.\n"
        return
      end

      $form[$gphid[id]]=seg[4]
      $lem[$gphid[id]]=seg['lem']
              
      nodes[id] = [seg[1].to_i,dgp]

      if seg[3]=='EOS'
        $pref = "#{seg[1]} #{seg[2]} SYN *"
        parsegraph(nodes)
        printgraph if $DEBUG
        $thetrees=[]
        gentrees2
        for t in $thetrees
          count += 1
          t1=ground(t)
          case $FORMAT
          when /a/
            print "#{$pref} tre:#{count} arc:"
            printarcs(t1[0],t1[1])
            print "\n"
          when /p/
            print "#{$pref} tre:#{count} par:"
            printpar(t1[0],t1[1])
            print "\n"
          when /h/
            print "#\n# tree #{count}\n# ------\n"
            printtree(t1[0],t1[1],0)
          end
        end
        nodes=[]
      end
    end
  end
end


def nodeinfo(id)
  info=""
  if $INFO =~ /n/
    info += id.to_s                           
    info += '.' if $INFO =~ /[fm]/
  end
  if $INFO =~ /f/
    info += $form[id] 
    info += ';' if $INFO =~ /m/
  end
  if $INFO =~ /m/
    info += $lem[id]  
  end
  info
end


def printarcs(root,arcs)
  print nodeinfo(root)
  for a in arcs
    print ';'
    print "#{a[2]}:" if $INFO =~ /l/
      print nodeinfo(a[0])+'-'+nodeinfo(a[1])
  end
end

def printtree(root,arcs,o)
  if o==0
        print "# %-16s" % "root: "
  end
  print nodeinfo(root),"\n"
  for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
    print '# ',"   "*(o+1)
    print "%-16s" % (arc[2]+": ")
    printtree(arc[1],arcs,o+1)
  end
end

def printpar(root,arcs)
  print nodeinfo(root)
  deps = arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
  unless deps == []
    print '('
    cont=false
    for arc in deps
      if cont then print ',' else cont=true end
      print arc[2],':' if $INFO =~ /l/
      printpar(arc[1],arcs)
    end
    print ')'
  end
end


def parsegraph(nodes)

  $n   =nodes.length
  $sat =[];

  $vis =[];
  $succ=[];
  $lhs =[];
  $arcs=[];
  $pos=[]

  for dgp in nodes

    parts  = dgp[1].split($dgpsep,6)

    if parts[3]==nil || parts[4]==nil || parts[5]==nil
      $stderr.print "ERR: tre requires dgp be called with '--info s' option. Aborting.\n"
      exit
    end

    i      = parts[0].to_i
    $pos[i] = dgp[0].to_i
    $sat << i if parts[1]=="s"
    $arcs |= parts[2].split(',').map{ |a| case a 
                                          when /\-\-(\w+)-(\d+)\/(\d+)/
                                            [i, $2.to_i, $1, $3.to_i]
                                          when /\+\+(\d+)-(\w+)\/(\d+)/
                                            [$1.to_i, i, $2, $3.to_i]
                                          end }
    $succ |= parts[3][1..-2].split(',').map{|x| [x.to_i,i]}
    $vis  |= parts[4][1..-2].split(',').map{|x| [x.to_i,i]} 
    $lhs  |= parts[5][1..-2].split(',').map{|x| [x.to_i,i]} + [[i,i]]

  end
end


def ground(t)
  [ $gphid[t[0]] , t[1].map{|a| [$gphid[a[0]],$gphid[a[1]],a[2]]} ]
end  


def gentrees2()
  $thetrees=[];
  bos=0; eos=$n-1;
  roots = (1...eos).select{|i| $vis.include? [i,eos]}.select{|i| $vis.include? [bos,i]}
  if $DEBUG then print "ROOTS: #{roots.inspect}\n" end
  for i in roots
    $theroot=i
    for r in buildR(i , eos, [])
      (rmin,rmax,rtree) = r
      buildR(bos, rmin, rtree)
    end
  end
end


def buildR(min, max, tree)
  if $DEBUG then print "buildR--#{min}--#{max}--#{tree.inspect}\n" end
  trees=[]
  for a in $arcs.select{|a| a[0]==max && $vis.include?([min,a[1]]) }
    if $DEBUG then print "ARC: #{a.inspect}\n" end
    for r in buildR(a[1],a[3],tree+[a])
      (rmin,rmax,rarcs) = r
      for l in buildR(min,rmin,rarcs)
        (lmin,lmax,larcs) = l
        trees << [lmin,rmax,larcs]
      end
    end
  end
  for i in (0...$n).select{|i| $succ.include?([i,max])}.select{|i| $lhs.include?([min,i])}
    for l in buildL(min,i,tree)
      (lmin,lmax,larcs) = l
      trees << [lmin,lmax,larcs]
    end
  end
  trees  
end
    

def buildL(min,max,tree)
  if $DEBUG then print "buildL--#{min}--#{max}--#{tree.inspect}\n" end
  if $pos[min]==$pos[max]
    if min==0 && max==0
      $thetrees.push [$theroot,tree]
      if $DEBUG then print "adding tree: #{tree.inspect}\n" end
    end
    return [[max,max,tree]]
  end
  trees=[]
  for arc in $arcs.select{|a| a[1]==max && $lhs.include?([min,a[0]]) }
    if $DEBUG then print "ARC: #{arc.inspect}\n" end
    for r in buildR(arc[3],max,tree+[arc])
      (rmin,rmax,rarcs) = r
      for l in buildL(min,rmin,rarcs)
        (lmin,lmax,larcs) = l
        trees << [lmin,lmax,larcs]
      end
    end
  end
  trees
end


def printgraph()
  
  print "N:    #{$n}\n"
  print "SAT:  #{set_to_s($sat)}\n"
  print "SUCC: #{rel_to_s($succ)}\n"
  print "VIS:  #{rel_to_s($vis)}\n"
  print "LHS:  #{rel_to_s($lhs)}\n"
  print "ARCS: #{arcs_to_s($arcs)}\n"
end

def set_to_s(s) "{#{s.join(',')}}" end
def rel_to_s(r) "{#{r.map{|p| "(#{p[0]},#{p[1]})"}.join(',')}}" end
def arc_to_s(q) "-#{q[0]}-#{q[2]}-#{q[1]}/#{q[3]}" end
def arcs_to_s(a) "{#{a.map{|q| arc_to_s(q)}.join(',')}}" end

######################################################################

tre($stdin)
