CSCI 5733
XML Application Development
Summer 2003
Suggested Solution to Homework #1
(1) For example (not fully documented):
use strict;
use CGI;
use LWP::Simple qw/get/;
$|++;
#
# Kwok-Bun Yue
# h1sol.pl: suggested solution for HW #1, Summer 2003, CSCI 5733
XML Application Development.
# See: http://dcm.uhcl.edu/yue/courses/xml/Summer2003/hw/h1.asp
#
my $q = new CGI;
# Porting Constants
# URLs of input CSV (MS DOS Excel format) and XML documents.
my $CSV_URL = "http://dcm.uhcl.edu/yue/courses/xml/Summer2003/hw/h1dat1.csv";
# my $CSV_URL = "http://dcm.uhcl.edu/yue/courses/xml/Summer2003/hw/h1dat1alt.csv";
my $XML_URL = "http://dcm.uhcl.edu/yue/courses/xml/Summer2003/hw/h1dat2.xml";
# Get HTTP parameters: ranking, author and subject.
my $ranking = $q->param("ranking");
my $author = $q->param("author");
my $subject = $q->param("subject");
# Retrieve contents of the input CSV and XML documents.
my $inputCsv = get($CSV_URL);
my $inputXml = get($XML_URL);
# Convert to local \n.
$inputCsv =~ s/\015?\012/\n/g;
$inputXml =~ s/\015?\012/\n/g;
# Get quotations from CSV and XML documents.
my @result = getQuotationsFromCSV($inputCsv);
# XML Encode values from CSV
for (my $i=0; $i<scalar @result; $i++) {
%{$result[$i]} = xmlEncodeHash(%{$result[$i]});
}
push @result, getQuotationsFromXML($inputXml);
# output result
print <<__XML_HEAD;
Content-type:text/xml
<?xml version="1.0"?>
__XML_HEAD
# print root start tag.
print "<quotation";
if ($author) {
print " author=\"$author\"";
}
if ($subject) {
print " subject=\"$subject\"";
}
if ($ranking) {
print " ranking=\"$ranking\"";
}
print ">\n";
my $record;
foreach $record (@result) {
if (matchQuote($author, $subject, $ranking, %$record)) {
print " <quote";
print " author=\"", ${$record}{"author"}, "\""
unless $author;
print " subject=\"", ${$record}{"subject"}, "\""
unless $subject;
print " ranking=\"", ${$record}{"ranking"}, "\""
unless $ranking;
print ">", ${$record}{"quote"}, "</quote>\n";
}
}
# print root end tag.
print "</quotation>";
exit 0; # main
#
# Extracting fields from CSV string. No error handling.
# Return an array of references to hashes. Each hash
# stores a record.
#
sub getQuotationsFromCSV {
my @result = ();
my @csvLines = split /\n/, shift;
# Get the first line containing field names.
my @names = getOneQuotationFromCSV(shift @csvLines);
# Break down csv line and store field values in a
# new hash. Add the hash to the result.
foreach (@csvLines) {
my @fields = getOneQuotationFromCSV($_);
my %record = ();
foreach (@names) {
$record{$_} = shift @fields;
}
push @result, \%record;
}
@result;
} # getQuotationFromCSV
#
# getOneQuotationFromCSV
# Break down a single line in CSV format into
an array
# of field values. The second input is the
delimiter
# used for the CSV format. Default is ".
# no error handling.
# Existing modules, such as CSV.pm can be used as an alternative.
sub getOneQuotationFromCSV {
my $line = shift;
my $delimiter = shift;
$delimiter = "," unless $delimiter;
my @result; # result
array to be retturned.
# State:
# 0: start state
# 1: normal: the first character
is not a ".
# 2: quoted: the first character
is a "
# 3: A " is ncountered within state
2.
# "x becomes x.
my $state = 0;
my $currentField = "";
foreach (split //, $line) {
if ("\"" eq $_) {
if ($state == 0) {
$state =
2;
}
elsif ($state == 2) {
$state =
3;
}
elsif ($state == 3) {
$currentField
.= "\"";
$state =
2;
}
# State 1 is not
possible if there is no error.
}
elsif ($delimiter eq $_) {
if ($state == 0 || $state ==
1) {
#
An empty field if state == 0.
push @result,
$currentField;
$state =
0;
$currentField
= "";
}
elsif ($state == 2) {
$currentField
.= $delimiter;
}
else { #
state = 3
push @result,
$currentField;
$state =
0;
$currentField
= "";
}
}
else { # other characters.
$currentField .= $_;
} # else
} # foreach
# push remaining contents
push @result, $currentField;
return @result;
} # getOneQuotationFromCSV
#
# Extracting fields from XML string without using
# an XML parser. No error handling.
#
sub getQuotationsFromXML {
my $line = shift;
my @result = ();
$line =~ s/\n//gs;
while ($line =~ /<quote\s+(.*?)>(.*?)<\/quote>/g) {
my %record = ();
$record{'quote'} = $2;
my $attrs = $1;
while ($attrs =~ /\s*(\w+)\s*=\s*(["'])(.*?)\2/g)
{
$record{$1} = $3;
}
push @result, \%record;
}
@result;
} # getQuotationsFromXML
# XML Encode one string
sub xmlEncodeOne {
my $result = shift;
$result =~ s/&/&/g;
$result =~ s/</</g;
$result =~ s/>/>/g;
$result =~ s/"/"/g;
$result =~ s/'/'/g;
$result;
} # xmlEncodeOne
# XML Encode the values of a hash.
sub xmlEncodeHash {
my %result = @_;
foreach (keys %result) {
$result{$_} = xmlEncodeOne($result{$_});
}
%result;
} # xmlEncodeHash
# Return true if the record hash matches all the values of author,
# subject and ranking, if their values are not null
sub matchQuote {
my $author = shift;
my $subject = shift;
my $ranking = shift;
my %record = @_;
return 0 if ($author && $author ne $record{"author"});
return 0 if ($subject && $subject ne $record{"subject"});
return 0 if ($ranking && $ranking ne $record{"ranking"});
1;
} # matchQuote;
(2) For example
<!ELEMENT quotations (quote*)>
<!ATTLIST quotations
author CDATA #IMPLIED
subject CDATA #IMPLIED
ranking NMTOKEN #IMPLIED>
<!ELEMENT quote (#PCDATA)>
<!ATTLIST quote
author CDATA #IMPLIED
subject CDATA #IMPLIED
ranking NMTOKEN #IMPLIED>