#!/usr/bin/perl

use strict;
use XML::Writer;
use LWP::UserAgent;
use Date::Format;

my $media_url='http://www.isi.edu/in-notes/iana/assignments/media-types/media-types';
my $date=time2str("%Y-%m-%d", time);



# makes a mimeType schema file
# by
# Thomas Krichel (krichel@openlib.org) - 2002-09-16
# Simeon Warner (simeon@cs.cornell.edu) - 2002-09-11

my $documentation="schema for mime types";
$documentation.=" generated on $date";
$documentation.=" from $media_url,";
$documentation.=" write to Thomas Krichel (krichel\@openlib.org) or Simeon M. Warner (simeon\@cs.cornell.edu)\n";

my $file;
my $ua = LWP::UserAgent->new;
my $req = HTTP::Request->new(GET => $media_url);
my $res = $ua->request($req);
if ($res->is_success) {
    $file=$res->content;
} else {
    die "could not download types file\n";
}


my @lines=split(/\n/,$file); 

# complete types
my @mimetypes; 
# main types only
my @maintypes;
my $ctype=undef;
my $start;
foreach(@lines) { 
    $start=m/----            -------/ unless $start;
    next unless $start;
    last if (/The "media-types"/);
    chomp;
    next unless (m/\S/);
    if (m/^(\S*)\s+(\S+)\s*(\S*.*)$/) {
	my ($type,$subtype,$ref)=($1,$2,$3);
	# do not consider start line
	if($type=~/^-+$/) {
	    next;
	}
	elsif($type) {
	    push(@maintypes,$type);
	}
	$ctype=$type if ($type=~m/\S/);
	next if ($subtype=~/^\[/); #just type and ref
	push (@mimetypes,"$ctype/$subtype");
    } else {
	warn "can't parse: '$_'\n";
    }
}



my $out = new IO::File("> mime-types.xsd") ;

my  $x = new XML::Writer(OUTPUT => $out,DATA_MODE => 1, DATA_INDENT => 1);
$x->xmlDecl();

$x->startTag("xs:schema","xmlns:xs" => "http://www.w3.org/2001/XMLSchema",
	     "elementFormDefault" => "qualified");

$x->startTag("xs:annotation");
$x->startTag("xs:documentation");
$x->characters("$documentation");
$x->endTag();
$x->endTag();

#
# do x-types
#
my $maintype;
my $xcount;
foreach $maintype (@maintypes) {
    my $regex=&compose($maintype);
    $regex.="/[xX]\\-[a-zA-Z0-9\\.\\-\\+$_]+";
    $xcount++;
    $x->startTag("xs:simpleType", "name" => "xmime_".$xcount);
    $x->startTag("xs:restriction", "base"=> "xs:string");
    $x->startTag("xs:pattern", "value"=>$regex);
    $x->endTag();    
    $x->endTag();    
    $x->endTag();
}

#
# do normal types
#
my $count;
my $mimetype;
foreach $mimetype (@mimetypes) {
    $count++;
    my $regex=&compose($mimetype);
    $x->startTag("xs:simpleType", "name" => "mime_".$count);
    $x->comment($mimetype);
    $x->startTag("xs:restriction", "base"=> "xs:string");
    $x->startTag("xs:pattern", "value"=>$regex);
    $x->endTag();    
    $x->endTag();    
    $x->endTag();
}

#
# do union type
#
my $counter;
my $members;
while($counter<$xcount) {
    $counter++;
    $members.="xmime_$counter ";
}
$counter=0;
while($counter<$count) {
    $counter++;
    $members.="mime_$counter ";
}
$x->startTag("xs:simpleType", "name" => "mimeType");
$x->startTag("xs:union", "memberTypes" => $members);
$x->endTag();    
$x->endTag();

$x->endTag();
$out->close();


#
# compose regular expression string
#
sub compose {
    my @letters=split('',$_[0]) ;
    my $regex;
    my $letter;
    foreach $letter (@letters) {
	if($letter=~/[a-z]/) {
	    $regex.="[".$letter.uc($letter)."]";
	}
	elsif($letter=~/[A-Z]/) {
	    $regex.="[".$letter.lc($letter)."]";
	}
	elsif($letter=~/\./) {
	    $regex.='\.';
	}
	elsif($letter=~/\+/) {
	    $regex.='\+';
	}
	elsif($letter=~/-/) {
	    $regex.='\-';
	}
	else {
	    $regex.=$letter;
	}
    }
    return "$regex";
}
