(1) For example:
#!/opt/gnu/bin/perl
use strict;
$|++;
use LWP::Simple;
use LWP::UserAgent;
use URI::URL;
use CGI qw/:form :html param header/;
my $url_link = param("url");
my $url_doc = get($url_link);
print header, start_html("Check Links Result"),
h2("Result of links checking"), "\n";
if (not defined $url_doc) {
print "Sorry, the link you submitted,
<font color=green>$url_link</font> " .
"is broken.\n" . end_html;
exit 0;
}
$url_link =~ /(.*)\//;
my $base_dir = $1 . "/";
my $num_links = 0;
my $num_good_links = 0;
my $num_bad_links = 0;
my @links = ();
my @link_names = ();
my @goodlinkp = ();
my $doc = $url_doc;
my $ua = new LWP::UserAgent;
# Find links.
while ($doc =~ /<A HREF=("?)([^\s>]*)\1[^>]*>/i)
{
# check protocol.
$_ = $2;
$doc = $';
next if (/^mailto:/);
if (/^http:/) {
$links[$num_links]
= $_;
}
else {
$links[$num_links]
= $base_dir . $_ ;
}
$link_names[$num_links] = $_;
if ($goodlinkp[$num_links] =
$ua->request(new HTTP::Request('GET', $links[$num_links]))->is_success)
{
$num_good_links++;
}
else {
$num_bad_links++;
}
$num_links++;
}
# Print links
if ($num_links == 0) {
print "There is no link in <font
color=green>$url_link</font>.\n";
}
else {
print p,
"There are $num_links links in the submitted url: ",
"<font color=green>$url_link</font>: ",
"$num_good_links are good and $num_bad_links are broken.",
p,
"<UL>";
for (my $i=0; $i < $num_links;
$i++) {
if ($goodlinkp[$i])
{
print "<li><A HREF=\"$links[$i]\">$link_names[$i]</A>";
}
else {
print "<li>$link_names[$i] <FONT COLOR=\"RED\">is broken.</FONT>";
}
}
}
print end_html;
Alternatively, you may use the HTML packages. Note that the results are somewhat different as repeated links are counted as one and links within comments are not counted.
#!/opt/gnu/bin/perl
use strict;
$|++;
use LWP::Simple;
use LWP::UserAgent;
use URI;
use HTML::LinkExtor;
use CGI qw(:standard);
my $url_link = param("url");
my $url_doc = get($url_link);
print header, start_html("Check Links Result"),
h2("Result of links checking"), "\n";
if (not defined $url_doc) {
print "Sorry, the link you submitted,
<font color=green>$url_link</font> " .
"is broken.\n" . end_html;
exit 0;
}
# Get base directory.
$url_link =~ /(.*)\//;
my $base_dir = $1 . "/";
# Parse links.
my @links = ();
my $parser = HTML::LinkExtor->new;
$parser->parse($url_doc);
my @parsed_tags = $parser->links();
foreach (@parsed_tags) {
my ($tag, %attr) = @$_;
push(@links, $attr{'href'})
if $tag eq 'a';
}
my $num_links = 0;
my $num_good_links = 0;
my $num_bad_links = 0;
my @link_names = ();
my @goodlinkp = ();
my $ua = new LWP::UserAgent;
# Find links.
foreach (@links) {
next if (/^mailto:/);
if (/^http:/) {
$links[$num_links]
= $_;
}
else {
$links[$num_links]
= $base_dir . $_ ;
}
$link_names[$num_links] = $_;
if ($goodlinkp[$num_links] =
$ua->request(new HTTP::Request('GET', $links[$num_links]))->is_success)
{
$num_good_links++;
}
else {
$num_bad_links++;
}
$num_links++;
}
# Print links
if ($num_links == 0) {
print "There is no link in <font
color=green>$url_link</font>.\n";
}
else {
print p,
"There are $num_links links in the submitted url: ",
"<font color=green>$url_link</font>: ",
"$num_good_links are good and $num_bad_links are broken.",
p,
"<UL>";
for (my $i=0; $i < $num_links;
$i++) {
if ($goodlinkp[$i])
{
print "<li><A HREF=\"$links[$i]\">$link_names[$i]</A>", br;
}
else {
print "<li>$link_names[$i] <FONT COLOR=\"RED\">is broken.</FONT>",
br;
}
}
print "</UL>";
}
print end_html;