#!/usr/bin/perl -w

my $TOOLX = "c:/Toolx";

# Converts OASIS 9401 socats to XML catalogs

# Copyright 2002 John Cowan and contributors.

# John Cowan asserts the moral right to be known
# as the author of this software.
# MathEngine plc cleaned and extend this code.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 
#    1. Redistributions of source code must retain the above copyright
#        notice, this list of conditions and the following disclaimer.
# 
#    2. Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.

# THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

# IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

# The views and conclusions contained in the software and documentation
# are those of the authors and should not be interpreted as representing
# official policies, either expressed or implied, of the auhor and/or
# the contributors.

use strict;
$/ = undef;

my $I = "  ";			# one indent level

my $lines = 1;			# current line number
my $token;			# current token type
my $value;			# current token value
my @commentq;			# queue of comments
my $kw;				# current keyword
my @args;			# arguments to current keyword
my $groups;			# number of groups entered

# start xml catalog
sub init
  {
    print "<?xml version=\"1.0\"?>\n";
    print "<!DOCTYPE catalog\n";
    print "${I}PUBLIC \"-//OASIS//DTD Entity Resolution XML Catalog V1.0//EN\"\n";
    print "${I}\"file:$TOOLX/sgml/XML-Catalog-1.0/catalog.dtd\">\n\n";
    print "<catalog\n";
    print "${I}xmlns=\"urn:oasis:names:tc:entity:xmlns:xml:catalog\"\n";
    print "${I}xmlns:soc=\"urn:oasis:names:tc:entity:xmlns:tr9401:catalog\"\n";
    print "${I}xmlns:unk=\"urn:oasis:names:tc:entity:xmlns:unknown\">\n";
  }

# wrap up xml catalog
sub fin
  {
    while ($groups--)
      {
    print "${I}</group>\n";
      }

    print "</catalog>\n";
}

$_ = <>;

init();

recover:
scan();

while ($token ne "EOF")
  {
    parse();
    emit();
    dequeue();
  }

fin();

# Scan the next token into $token, putting its value in $value
sub scan
  {
  rescan:
    # whitespace
    if (($value) = /^([ \t\r\n]+)/)
      {
    $_ = $';
    $lines++ while $value =~ s/\n//;
      }

    # comment
    if (/^--/)
      {
    ($value) = /^--([^-]+(?:-[^-]+)*)--/;
    $_ = $';
    push @commentq, $value;

    $lines++ while $value =~ s/\n//;
    goto rescan;
      }

    # end of input
    if ($_ eq "")
      {
    $token = "EOF";
    return;
      }

    # non-string
    if (($value) = /^([^\"\' \t\r\n]+)/)
      {
    $_ = $';
    $token = ($value =~ /[\\\/.<>]/) ? "NONSYM" : "SYM";

    my $uct = uc($value);

    if ($uct eq "OVERRIDE" || $uct eq "SYSTEM"
        || $uct eq "DELEGATE" || $uct eq "PUBLIC"
        || $uct eq "DTDDECL" || $uct eq "ENTITY"
        || $uct eq "DOCTYPE" || $uct eq "LINKTYPE"
        || $uct eq "NOTATION" || $uct eq "SGMLDECL"
        || $uct eq "DOCUMENT" || $uct eq "BASE"
        || $uct eq "CATALOG")
          {
            $value = $uct;
            $token = "KW";
      }

    return;
      }

    # double-quoted string
    if (($value) = /^\"([^\"]*)\"/)
      {
    $_ = $';
    $token = "LIT";
    return;
      }

    # single-quoted string
    if (($value) = /^\'([^\']*)\'/)
      {
    $_ = $';
    $token = "LIT";
    return;
      }

    die "can't happen '$_'";
  }

# Syntax error in input
sub yammer
  {
    my ($msg) = @_;
    warn "$msg at line $lines\n";
    goto recover;
  }

# Parse tokens into xcatalog entries
sub parse
  {
    $kw = $value;
    @args = ();

    # unknown keyword
    if ($token eq "SYM")
      {
    while (1) {
      scan();
      last if $token eq "KW" || $token eq "EOF";
      last if $token eq "SYM" && @args != 0;
      push @args, $value;
    }
    return;
      }

    yammer "$value not a valid keyword" unless $token eq "KW";

    scan();

    if ($kw eq "PUBLIC" || $kw eq "DTDDECL")
      {
    yammer "$value not a public id" unless $token eq "LIT";
    push @args, $value;
    scan();
    push @args, $value;
    scan();
    return;
      }

    if ($kw eq "ENTITY" || $kw eq "DOCTYPE"
    || $kw eq "LINKTYPE" || $kw eq "NOTATION")
      {
    push @args, $value;
    scan();
    push @args, $value;
    scan();
    return;
      }

    if ($kw eq "SGMLDECL" || $kw eq "DOCUMENT"
    || $kw eq "BASE" || $kw eq "CATALOG")
      {
    push @args, $value;
    scan();
    return;
      }

    if ($kw eq "SYSTEM")
      {
    yammer "$value not a system id" unless $token eq "LIT";
    push @args, $value;
    scan();
    push @args, $value;
    scan();
    return;
      }

    if ($kw eq "DELEGATE")
      {
    yammer "$value not a partial public id"
      unless $token eq "LIT";
    push @args, $value;
    scan();
    push @args, $value;
    scan();
    return;
      }

    if ($kw eq "OVERRIDE")
      {
    $value = uc($value);
    yammer "OVERRIDE requires YES or NO"
      unless $value eq "YES" || $value eq "NO";
    push @args, $value;
    scan();
    return;
      }

    die "can't happen";
  }

# Emit the XML catalog entry
sub emit
  {
    my $arg;

    foreach $arg (@args)
      {
    $arg = ($arg =~ /\"/) ? "'$arg'" : "\"$arg\"";
      }

    # General directives.

    if ($kw eq "OVERRIDE")
      {
        my $prefer = ($args[0] =~ /YES/)
          ? "\"public\"" : "\"system\"";
        print "\n${I}<group prefer=$prefer>\n";
        $groups++;
      }
    elsif ($kw eq "BASE")
      {
        print "\n${I}<group xml:base=$args[0]>\n";
        $groups++;
      }
    elsif ($kw eq "CATALOG")
      {
        print "\n${I}${I}<nextCatalog catalog=$args[0]/>\n";
      }
    elsif ($kw eq "DELEGATE")
      {
        print "${I}${I}<delegatePublic\n${I}${I}${I}publicIdStartString=$args[0]";
        print "\n${I}${I}${I}catalog=$args[1]/>\n";
      }
    elsif ($kw eq "DOCTYPE")
      {
        print "${I}${I}<soc:doctype\n${I}${I}${I}name=$args[0]\n${I}${I}${I}uri=$args[1]/>\n";
      }

    # Entity mappings

    elsif ($kw eq "PUBLIC")
      {
        print "${I}${I}<public\n${I}${I}${I}publicId=$args[0]\n${I}${I}${I}uri=$args[1]/>\n";
      }
    elsif ($kw eq "SYSTEM")
      {
    print "${I}${I}<system\n${I}${I}${I}systemId=$args[0]\n",
            "${I}${I}${I}uri=$args[1]/>\n";

        if ($args[0] =~ m/^"http:/)
        {
            print "${I}${I}<uri\n${I}${I}${I}name=$args[0]\n",
                "${I}${I}${I}uri=$args[1]/>\n"
        }
      }

    # TR9401 stuff that has no correspondence in the XML Catalog spec.
    # Namespaced to 'soc:', but perhaps better to just comment it out.

    elsif ($kw eq "SGMLDECL")
      {
        print "${I}${I}<!-- soc:sgmldecl\n${I}${I}${I}uri=$args[0]/ -->\n";
      }
    elsif ($kw eq "DTDDECL")
      {
        print "${I}${I}<!-- soc:dtddecl\n${I}${I}${I}publicId=$args[0]\n${I}${I}${I}uri=$args[1]/ -->\n";
      }
    elsif ($kw eq "ENTITY")
      {
        print "${I}${I}<!-- soc:entity\n${I}${I}${I}name=$args[0]\n${I}${I}${I}uri=$args[1]/ -->\n";
      }
    elsif ($kw eq "NOTATION")
      {
        print "${I}${I}<!-- soc:notation\n${I}${I}${I}name=$args[0]\n${I}${I}${I}uri=$args[1]/ -->\n";
      }
    elsif ($kw eq "LINKTYPE")
      {
        print "${I}${I}<!-- soc:linktype\n${I}${I}${I}name=$args[0]\n${I}${I}${I}uri=$args[1]/ -->\n";
      }
    elsif ($kw eq "DOCUMENT")
      {
        print "${I}${I}<!-- soc:document\n${I}${I}${I}uri=$args[0]/ -->\n";
      }
      else
      {
        print "${I}${I}\n<!-- unk:$kw ";
        my $i;
        for ($i = 0; $i <= $#args; $i++)
          {
            print "${I}${I}${I}arg$i=$args[$i]\n";
          }
        print "${I}${I}/ -->\n\n";
      }
  }

# dequeue comments
sub dequeue
  {
    my $comment;

    foreach $comment (@commentq)
      {
    print "${I}${I}<!--$comment-->\n";
      }

    @commentq = ();
  }
