#!/usr/bin/perl

#package:	UAM Text Tools
#component:	tags for utt
#version:	1.0
#author:	Tomasz Obrebski

use strict;
use locale;

my $input = <>;
chomp $input;

our $pos_re    = qr/(?:[[:upper:]]+)/;
our $attr_re   = qr/(?:[[:upper:]]+)/;
our $val_re    = qr/(?:[[:lower:][:digit:]+?!*-]|<[^>\n]+>)/;
our $av_re     = qr/(?:$attr_re$val_re+)/;
our $avlist_re = qr/(?:$av_re+)/;
our $cat_re    = qr/(?:$pos_re(?:\/$avlist_re)?)/;

print pre($input);

sub parse ($)
{
    my ($dstr)=@_;
    my $avs={};
    my ($cat,$attrlist) = split '/', $dstr;
  ATTR:
    while( $attrlist =~ /($attr_re)($val_re+)/g )
    {
	my ($attrstr,$valstr)=($1,$2);
	my %vals;
	while($valstr =~ /$val_re/g)
	{
	    my $val = $&;
	    next ATTR if $val eq '*';
	    $val =~ s/^<([[:lower:]])>$/$1/;
	    $vals{$val}=1;
	}
	
	$avs->{$attrstr} = \%vals; # dlaczego to dziala? %vals jest lokalne
    }
    [$cat, $avs];
}

sub unparse (\@)
{
    my ($cat,$avs)= @{shift @_};
    my $dstr=$cat;
    my @attrs = keys %$avs;
    if(@attrs)
    {
	$dstr .= '/';
	for my $attr ( sort @attrs )
	{
	    $dstr .= $attr . (join '', sort keys %{$avs->{$attr}});
	}
    }
    $dstr;
}

sub canonize ($)
{
    unparse @{parse shift} ;
}

sub pre
{
    my $pos_res    = '[[:upper:]]+';
    my $attr_res   = '[[:upper:]]+';
    my $val_res    = '[[:lower:][:digit:]+?!*-]|<[^>\n[:cntrl:]]+>';
    my $av_res     = "$attr_res($val_res)+";
    my $avlist_res = "($av_res)+";

    my $pat = canonize(shift);
    my $ret;
    my ($pos,$avlist) = split /\//, $pat;
    $ret = $pos.'(\/';
    while ($avlist =~ /($attr_res)(${val_res}+)/g)
    {
	my $attr = $1;
	my $vals = $2;
	my $vals = "($val_res)*(".join('|',($vals =~ /$val_res/g)).")($val_res)*";
	$ret .= "($av_res)*$attr$vals";
    }
    $ret .= "($av_res)*)?";
    return $ret;
}

