CSCI 4230
Internet Applicaiton Development
Fall 2000
Suggested Solution to Homework #4

(1)    For example:

use strict;
$|++;
use LWP::Simple qw/get/;
use CGI;
use URI::Escape;

#
#    h4q1.pl
#    by K. Yue  August 2, 2000
#
#    This program displays a form to allow users to select a language from a predefined list.
#    The program then displays the Web interest on the lanuage by showing the number of hits,
#    the number of positive sentiment pages (with language 'rules' found), the number of negative
#    sentiment pages (with language 'sucks' found) and the rules/sucks ratio.
#
#    Constants
my $LANGUAGE_STR = "language";
my @LANGUAGES = ('HTML', 'Java', 'Perl', 'C++', 'Javascript', 'XML', 'PHP', 'Visual Basic', 'VBScript', 'XSL');
#    Search engine URL: add keyword at the end.
my $SEARCH_ENGINE_NAME = "google.com";
my $SEARCH_ENGINE_URL = "http://nas.uhcl.edu/yue/courses/csci4230/Fall2000/h4query.pl?hl=en&safe=off&q=";
#    Pattern of number of hits from the result of the search engine.
#    Example: Google results <b>1-10</b> of about <b>11,300,000</b> for
my $RESULT_PATTERN = 'Google results <b>\d+-\d+<\/b> of about <b>([\d,]+)<\/b> for';
my $RULES = "rules";
my $SUCKS = "sucks";

#    Get parameters
my $q = new CGI;
my $language = $q->param($LANGUAGE_STR);

#    Main logic.
if ($language) {
    displayResult($language);
}
else {
    displayInitialPage();
}
exit 0;    #    End Main

#    Initial Page.
sub displayInitialPage {
    print $q->header,
          $q->start_html(-title=>"Interests of Language in the Web",
                         -bgcolor=>"#ccccff"),
          $q->h2("Interests of Language in the Web"),
          $q->p,
          "This page gauges the interests of various languages/technologies in the Web by finding " .
          "the number of Web pages of the languages through Google.com. " .
          "It also gauges the sentiment on these languages",
          $q->p,
          "Select the language and find the result",
          languageForm(),
          $q->end_html;
}    #    End displayInitialPage

#    Return a string corresponding to the initial form.
sub languageForm {
    my $result = $q->start_form() .
                 "Select language &nbsp;\n" .
                 $q->scrolling_list(-name=>$LANGUAGE_STR,
                                    -values=>\@LANGUAGES,
                                    -default=>[$LANGUAGES[0]],
                                    -size=>1) .
                 "&nbsp;&nbsp;" .
                 $q->submit() .
                 $q->end_form();
    $result;
}    #    End languageForm

#    Display result
sub displayResult {
    my $language = shift;
    my $numHits = getKeywordHitCount($language);
    my $numRules = getKeywordHitCount("$language $RULES");
    my $numSucks = getKeywordHitCount("$language $SUCKS");

    print $q->header,
          $q->start_html(-title=>"Web Interest on $language",
                         -bgcolor=>"#ccccff"),
          $q->h2("Web Interest on $language"),
          $q->p;
    if ($numHits < 0) {
        print "Sorry, cannot get hit counts from $SEARCH_ENGINE_NAME.";
    }
    else {
        print "From $SEARCH_ENGINE_NAME, the following statistics are compiled for $language at " .
        (localtime),
        ":\n",
        $q->p,
        "Number of page hits: $numHits." ,
        $q->br,
        "Number of positive sentiment pages (with keyword '$language $RULES'): $numRules." ,
        $q->br,
        "Number of negative sentiment pages (with keyword '$language $SUCKS'): $numSucks." ,
        $q->br;
        if ($numSucks > 0) {
            $numRules=~s/,//g;
            $numSucks=~ s/,//g;
            print "Rules/Sucks Ratio (1.0 is neutral; the higher the more compliementary): ",
                  sprintf("%0.2f", ($numRules/$numSucks));
    }
    }

    print $q->p,
          languageForm(),
          $q->end_html();
}    #    End displayResult.

#    Get the keyword hit counts.
sub getKeywordHitCount {
   my $keyword = shift;
   my $urlContents = get($SEARCH_ENGINE_URL . uri_escape($keyword, "^a-zA-Z0-9"));
   if ($urlContents) {
        if ($urlContents =~ /$RESULT_PATTERN/i) {
            return $1;
        }
    }
    -1;  #    Cannot get count.
}    #    End getKeywordHitCount