#!/usr/bin/perl use strict; use XML::Writer; use LWP::UserAgent; use Date::Format; my $media_url='http://www.isi.edu/in-notes/iana/assignments/media-types/media-types'; my $date=time2str("%Y-%m-%d", time); # makes a mimeType schema file # by # Thomas Krichel (krichel@openlib.org) - 2002-09-16 # Simeon Warner (simeon@cs.cornell.edu) - 2002-09-11 my $documentation="schema for mime types"; $documentation.=" generated on $date"; $documentation.=" from $media_url,"; $documentation.=" write to Thomas Krichel (krichel\@openlib.org) or Simeon M. Warner (simeon\@cs.cornell.edu)\n"; my $file; my $ua = LWP::UserAgent->new; my $req = HTTP::Request->new(GET => $media_url); my $res = $ua->request($req); if ($res->is_success) { $file=$res->content; } else { die "could not download types file\n"; } my @lines=split(/\n/,$file); # complete types my @mimetypes; # main types only my @maintypes; my $ctype=undef; my $start; foreach(@lines) { $start=m/---- -------/ unless $start; next unless $start; last if (/The "media-types"/); chomp; next unless (m/\S/); if (m/^(\S*)\s+(\S+)\s*(\S*.*)$/) { my ($type,$subtype,$ref)=($1,$2,$3); # do not consider start line if($type=~/^-+$/) { next; } elsif($type) { push(@maintypes,$type); } $ctype=$type if ($type=~m/\S/); next if ($subtype=~/^\[/); #just type and ref push (@mimetypes,"$ctype/$subtype"); } else { warn "can't parse: '$_'\n"; } } my $out = new IO::File("> mime-types.xsd") ; my $x = new XML::Writer(OUTPUT => $out,DATA_MODE => 1, DATA_INDENT => 1); $x->xmlDecl(); $x->startTag("xs:schema","xmlns:xs" => "http://www.w3.org/2001/XMLSchema", "elementFormDefault" => "qualified"); $x->startTag("xs:annotation"); $x->startTag("xs:documentation"); $x->characters("$documentation"); $x->endTag(); $x->endTag(); # # do x-types # my $maintype; my $xcount; foreach $maintype (@maintypes) { my $regex=&compose($maintype); $regex.="/[xX]\\-[a-zA-Z0-9\\.\\-\\+$_]+"; $xcount++; $x->startTag("xs:simpleType", "name" => "xmime_".$xcount); $x->startTag("xs:restriction", "base"=> "xs:string"); $x->startTag("xs:pattern", "value"=>$regex); $x->endTag(); $x->endTag(); $x->endTag(); } # # do normal types # my $count; my $mimetype; foreach $mimetype (@mimetypes) { $count++; my $regex=&compose($mimetype); $x->startTag("xs:simpleType", "name" => "mime_".$count); $x->comment($mimetype); $x->startTag("xs:restriction", "base"=> "xs:string"); $x->startTag("xs:pattern", "value"=>$regex); $x->endTag(); $x->endTag(); $x->endTag(); } # # do union type # my $counter; my $members; while($counter<$xcount) { $counter++; $members.="xmime_$counter "; } $counter=0; while($counter<$count) { $counter++; $members.="mime_$counter "; } $x->startTag("xs:simpleType", "name" => "mimeType"); $x->startTag("xs:union", "memberTypes" => $members); $x->endTag(); $x->endTag(); $x->endTag(); $out->close(); # # compose regular expression string # sub compose { my @letters=split('',$_[0]) ; my $regex; my $letter; foreach $letter (@letters) { if($letter=~/[a-z]/) { $regex.="[".$letter.uc($letter)."]"; } elsif($letter=~/[A-Z]/) { $regex.="[".$letter.lc($letter)."]"; } elsif($letter=~/\./) { $regex.='\.'; } elsif($letter=~/\+/) { $regex.='\+'; } elsif($letter=~/-/) { $regex.='\-'; } else { $regex.=$letter; } } return "$regex"; }