(1 and 2) For example:
my @students = ("Bun Yue;CSCI;A",
"Joe Go;SWEN;B+",
"Kenny Bee;CSCI;B",
"Sadegh Davari;SWEN;A",
"Mary Matalin;CSCI;C");
my %result = hashify(@students);
foreach my $major (sort keys %result) {
print "Major $major:\n";
foreach (sort keys %{$result{$major}})
{
print " $_ => ${$result{$major}}{$_}\n";
}
}
exit 0;
sub hashify {
my %result = ();
foreach (@_) {
my ($name, $major, $grade) = split /;/;
if ($result{$major}) {
${$result{$major}}{$name} = $grade;
}
else {
$result{$major} = {$name=>$grade};
}
}
%result;
}
# Output:
#Major CSCI:
# Bun Yue => A
# Kenny Bee => B
# Mary Matalin => C
#Major SWEN:
# Joe Go => B+
# Sadegh Davari => A
(3) For example,
use strict;
use LWP::Simple;
use LWP::UserAgent;
use URI;
use HTML::LinkExtor;
#
# PageDownloader.pl url dir report.txt
# Bun Yue February, 2001
#
# This program extracts and saves the page
specified by the full url
# into the directory dir and
generates a report to the file
# report.txt. The program
creates the subdirectory dir in the
# current directory and generates
an error message if the
# directory already exists. The program
saves the page
# as main.html. It also
extracts all html links (with
# extensions .htm or .html) and
save them in the
# subdirectory as link00001.html,
link00002.html, and
# so on.
#
# Constant strings for output html filenames.
my $MAIN_PAGE_FILENAME = 'main.html';
my $LINK_PAGE_FILENAME_PREFIX = 'link';
my $LINK_PAGE_LEADING_DIGITS = 5;
# Link page extensions that should be extracted.
my $LINK_PAGE_EXTENSION = 'html|htm';
# Get command line arguments
@ARGV < 3 && die "Usage: PageDownloader.pl
url dir report.txt\n";
# (input url, input directory
for storing page files
# and report, report file name)
my ($url, $dir, $reportFileName) = @ARGV;
# Get the URL page.
my $urlContents = ""; # Contents of
input url.
my $baseDNS = ""; # Base DNS
of the url.
$urlContents = get($url) || die "Error: Unsuccessful
to obtain the page for $url. Check validity of the url.\n";
# Create the subdirectory under the current
directory.
-e $dir && die "The directory name
$dir already exist as a directory or file name. Please use another
directory name.\n";
mkdir($dir, 0777) || die "Can't create the
directory $dir.\n";
chdir($dir) || die "Can't change to the directory
$dir.\n";
# Open report file.
open(REPORT ,">$reportFileName") || die "Can't
open report file $reportFileName.\n";
# Save the main page.
open (OUTPUT, ">$MAIN_PAGE_FILENAME") ||
die "Can't open output file $MAIN_PAGE_FILENAME.\n";
print OUTPUT $urlContents;
close OUTPUT;
print REPORT "This report: $reportFileName\n"
.
"URL for extraction: $url\n" .
"Subdirectory: $dir\n" .
"Extraction time: " .
(localtime) . "\n\n";
print REPORT "Main URL $url is saved as: $MAIN_PAGE_FILENAME.\n\n";
# Get base directory.
$url =~ /http:\/\/(.*)\//;
my $baseDNS = $1;
# Variables for parsing
links.
my $savedLinkReport = ""; # Report
string for link page saving.
my @savedLinks = (); # URL of link
pages saved successfully.
# For future
uses.
my @failedLinks = (); # URL of link pages
failed to be saved.
# Parsing links.
my $parser = HTML::LinkExtor->new;
$parser->parse($urlContents);
my @parsedTags = $parser->links();
foreach (@parsedTags) {
# Get the link tag.
my ($tag, %attr) = @$_;
next if $tag ne 'a';
my $linkUrl = $attr{'href'};
# Skip mailto link.
next if ($linkUrl =~ /^mailto:/i);
# Skip url that does not have the extensions.
next unless $linkUrl =~ /$LINK_PAGE_EXTENSION$/i;
# Add baseDNS for relative links.
$linkUrl = "http://" . $baseDNS . "/"
. $linkUrl unless $linkUrl =~ /^http:/i;
my $linkUrlContents = "";
if ($linkUrlContents = get($linkUrl))
{
# Get the file successfully.
push @savedLinks, $linkUrl;
# Get link file name.
my $linkFileName = $LINK_PAGE_FILENAME_PREFIX
.
digitString(scalar @savedLinks, $LINK_PAGE_LEADING_DIGITS) .
".html";
# Write the link file.
open LINK, ">$linkFileName.\n";
print LINK $linkUrlContents;
close LINK;
# Update string for the report file.
$savedLinkReport .= "$linkUrl => $linkFileName.\n";
}
else {
push @failedLinks, $linkUrl;
}
}
# Print report heading and statistics.
print REPORT "Total links: " . (scalar @savedLinks
+ scalar @failedLinks) .
"\n";
print REPORT "Total pages saved: " .(scalar
@savedLinks) . "\n";
print REPORT "Total pages failed to be saved:
" .(scalar @failedLinks) . "\n\n";
# Print saved links.
print REPORT "Saved links are: \n\n" .
$savedLinkReport . "\n";
# Print failed links
if (@failedLinks) {
print REPORT "Links that cannot be
saved: \n\n";
foreach (@failedLinks) {
print REPORT "$_\n";
}
}
close(REPORT);
exit 0; # main
# Return a string of numDigits digits containing
the
# number num. Padded with leading spaces.
sub digitString {
my ($num, $numDigits) = @_;
my $result = "0" x $numDigits . $num;
substr($result, (length
$result) - $numDigits, $numDigits);
}