(1) For example:
use strict;
$|++;
use LWP::Simple qw/get/;
use CGI;
use URI::Escape;
#
# h4q1.pl
# by K. Yue August
2, 2000
#
# This program displays
a form to allow users to select a language from a predefined list.
# The program then displays
the Web interest on the lanuage by showing the number of hits,
# the number of positive
sentiment pages (with language 'rules' found), the number of negative
# sentiment pages (with
language 'sucks' found) and the rules/sucks ratio.
#
# Constants
my $LANGUAGE_STR = "language";
my @LANGUAGES = ('HTML', 'Java', 'Perl',
'C++', 'Javascript', 'XML', 'PHP', 'Visual Basic', 'VBScript', 'XSL');
# Search engine URL: add
keyword at the end.
my $SEARCH_ENGINE_NAME = "google.com";
my $SEARCH_ENGINE_URL = "http://sce.uhcl.edu/yue/courses/csci4230/Fall2000/h4query.pl?hl=en&safe=off&q=";
# Pattern of number of
hits from the result of the search engine.
# Example: Google results
<b>1-10</b> of about <b>11,300,000</b> for
my $RESULT_PATTERN = 'Google results <b>\d+-\d+<\/b>
of about <b>([\d,]+)<\/b> for';
my $RULES = "rules";
my $SUCKS = "sucks";
# Get parameters
my $q = new CGI;
my $language = $q->param($LANGUAGE_STR);
# Main logic.
if ($language) {
displayResult($language);
}
else {
displayInitialPage();
}
exit 0; #
End Main
# Initial Page.
sub displayInitialPage {
print $q->header,
$q->start_html(-title=>"Interests of Language in the Web",
-bgcolor=>"#ccccff"),
$q->h2("Interests of Language in the Web"),
$q->p,
"This page gauges the interests of various languages/technologies in the
Web by finding " .
"the number of Web pages of the languages through Google.com. " .
"It also gauges the sentiment on these languages",
$q->p,
"Select the language and find the result",
languageForm(),
$q->end_html;
} # End
displayInitialPage
# Return a string corresponding
to the initial form.
sub languageForm {
my $result = $q->start_form()
.
"Select language \n" .
$q->scrolling_list(-name=>$LANGUAGE_STR,
-values=>\@LANGUAGES,
-default=>[$LANGUAGES[0]],
-size=>1) .
" " .
$q->submit() .
$q->end_form();
$result;
} # End
languageForm
# Display result
sub displayResult {
my $language = shift;
my $numHits = getKeywordHitCount($language);
my $numRules = getKeywordHitCount("$language
$RULES");
my $numSucks = getKeywordHitCount("$language
$SUCKS");
print $q->header,
$q->start_html(-title=>"Web Interest on $language",
-bgcolor=>"#ccccff"),
$q->h2("Web Interest on $language"),
$q->p;
if ($numHits < 0) {
print "Sorry, cannot get hit counts from $SEARCH_ENGINE_NAME.";
}
else {
print "From $SEARCH_ENGINE_NAME, the following statistics are compiled
for $language at " .
(localtime),
":\n",
$q->p,
"Number of page hits: $numHits." ,
$q->br,
"Number of positive sentiment pages (with keyword '$language $RULES'):
$numRules." ,
$q->br,
"Number of negative sentiment pages (with keyword '$language $SUCKS'):
$numSucks." ,
$q->br;
if ($numSucks > 0) {
$numRules=~s/,//g;
$numSucks=~ s/,//g;
print "Rules/Sucks Ratio (1.0 is neutral; the higher the more compliementary):
",
sprintf("%0.2f", ($numRules/$numSucks));
}
}
print $q->p,
languageForm(),
$q->end_html();
} # End
displayResult.
# Get the keyword hit counts.
sub getKeywordHitCount {
my $keyword = shift;
my $urlContents = get($SEARCH_ENGINE_URL
. uri_escape($keyword, "^a-zA-Z0-9"));
if ($urlContents) {
if ($urlContents =~ /$RESULT_PATTERN/i) {
return $1;
}
}
-1; #
Cannot get count.
} # End
getKeywordHitCount