#!/usr/bin/perl use LWP::Simple; use XML::Writer; use strict; my $directory="/var/tmp/pacs2003" ; my $url="http://www.aip.org/pacs/pacs03/all.txt"; # renew the download of the web page my $renew=1; if($renew) { system("w3mir -drr -r $url 2>> /dev/null"); } open(F,"cat all.txt |"); $_=join('',); s/\r//g; s/\n\n/\n/g; s/\n {13}//g; my @lines=split(/\n/); my %pacs; foreach(@lines) { if(/ (\d{2}\.[^ ]*) +(.*)/) { $pacs{$1}=$2; } else { print "$_\n"; } } my $out = new IO::File("> pacs2003.xsd"); my $x = new XML::Writer(OUTPUT => $out, DATA_MODE => 1, DATA_INDENT => 1); $x->xmlDecl(); $x->startTag('xs:schema', 'xmlns:xs' => "http://www.w3.org/2001/XMLSchema", 'elementFormDefault'=>"unqualified", 'attributeFormDefault'=>"unqualified", 'xmlns'=>"http://amf.openlib.org", 'targetNamespace'=>"http://amf.openlib.org"); $x->startTag('xs:simpleType', 'name'=>"pacs2003Element"); $x->startTag('xs:restriction', 'base'=>"xs:string"); foreach(sort keys %pacs) { $x->startTag('xs:enumeration', 'value'=>"$_"); $x->startTag('xs:annotation'); $x->startTag('xs:documentation'); $x->characters($pacs{$_}); $x->endTag; $x->endTag; $x->endTag; } $x->endTag; $x->endTag; # create the list type $x->startTag('xs:simpleType', 'name'=>"pacs2003"); $x->emptyTag('xs:list', 'itemType'=>"pacs2003Element"); $x->endTag; $x->endTag; $out->close; system("rm all.txt"); system("recode iso-8859-1..utf-8 pacs2003.xsd"); exit;