CSCI 5733
XML Application Development
Spring 2006
Suggested Solution to Homework #2

(1) For example:

(2)

(a) For examples:

(b) Add an optional baseURL child element to <urlset>. Change the minimal length restriction of <url> by possibly removing it. The data type may remain as anyURI.

(c) Pros for allowing relative URLs: smaller sized sitemap file; easier and less error prone to generate the URLs.
     Cons: more difficult to obtain the actual absolute URLs;

(3) For example:

package courses.xml.Spring2006;

/*--
      Kwok-Bun Yue
      Feb 1, 2006

      This program is a suggested solution for homework
      #2, question 3 of CSCI 5733 XML Application Development, Spring 2006 at UHCL. 
      See:
      http://dcm.cl.uh.edu/yue/courses/xml/Spring2006/hw/h2.asp (question 3)

      Minimal documentation
**/
import java.util.*;
import java.io.*;
//   import java.net.*;

import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;

import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;

public class XPathCounter
{   //   Testing purpose
   public static void main(String argv[])
   {   if (argv.length != 1)
      {   System.err.println("Usage: java XPathCounter inputfile.xml");
         System.exit(1);
      }
     
      String inFile = argv[0];
      XPathCounterSaxHandler handler = new XPathCounterSaxHandler();
      SAXParserFactory factory = SAXParserFactory.newInstance();
      try
      {   SAXParser saxParser = factory.newSAXParser();
         saxParser.parse(new File(inFile), handler);
         HashMap<String, Integer> xPathCounts =  handler.getXPathCounts();
        
         if (xPathCounts.isEmpty())
         {   //   This should not be executed. If there is no exception, xPathCounts should
            //   not be empty.
            System.out.println("The input file " + inFile + " contains no XPaths." +
               " It may not be well formed.");
         }
         else
         {   //   Print out result:
            System.out.println("XPaths found in the input file " + inFile + ": \n");
            Iterator iterator = (new TreeSet<String>(xPathCounts.keySet())).iterator();
            while (iterator.hasNext())
            {   String xPath = (String) iterator.next();
               System.out.println(xPath + ": " + xPathCounts.get(xPath));
            }
         }
      }
      catch (Throwable e)
      {   System.out.println("The input file " + inFile + " contains no XPaths." +
               " It may not be well formed.");
         e.printStackTrace();
      }
   }   //   main.
};   //   XPathCounter

/*--
      Kwok-Bun Yue
      Feb 1, 2006

      This class is used to as the default handler for SAX for
      implementing homework #2 of CSCI 5733, Spring 2006 at UHCL.  See:
      http://dcm.cl.uh.edu/yue/courses/xml/Spring2006/hw/h2.asp.

      The method getXPathCounts return a HashMap containing all
      XPaths found and their number of occurrences.

      Minimal documentation.
**/
class XPathCounterSaxHandler extends DefaultHandler
{  
   //   constructors
   public XPathCounterSaxHandler() {   }

   //   Return the element events.
   public HashMap<String, Integer> getXPathCounts() {
      return this.xPathCounts;
   }   //   getXPathCounts

   //   Data members
   //
   //   All XPaths found in the XML document and their counts.
   private HashMap<String, Integer> xPathCounts  = new HashMap<String, Integer>();
   //   The stack of XPaths to the current element.
   private Stack<String> xPaths = new Stack<String> ();

   //   Handler methods:
   //   Save event type (START:) and element name.
   public void startElement(String namespaceURI,
      String lName,
      String qName,
      Attributes attrs)
      throws SAXException
   {   //   Get element names.
      String eName = lName;  
      if ("".equals(eName)) eName = qName;

      //   Get parent's XPath
      String parentXPath = xPaths.empty() ? "" : xPaths.peek();
     
      //   current element's XPath
      String currentPath = parentXPath + "/" + eName;

      xPaths.push(currentPath);

      //   Insert current element and its attributes to the xPathCounts.
      if (xPathCounts.containsKey(currentPath))
      {   xPathCounts.put(currentPath, new Integer(xPathCounts.get(currentPath).intValue()+1));
      }
      else
      {   xPathCounts.put(currentPath, new Integer(1));
      }

      //   Insert attributes
      if (attrs != null) {
         for (int i = 0; i < attrs.getLength(); i++) {
                String aName = attrs.getLocalName(i);
            if ("".equals(aName)) aName = attrs.getQName(i);

            String attPath = currentPath + "/@" + aName;
            if (xPathCounts.containsKey(attPath))
            {   xPathCounts.put(attPath, new Integer(xPathCounts.get(attPath).intValue()+1));
            }
            else
            {   xPathCounts.put(attPath, new Integer(1));
            }
         }
      }
   }   //   startElement

   //   Save event type (START:) and element name.
   public void endElement(String namespaceURI,
      String lName,
      String qName)
      throws SAXException
   {   //   pop to remove the current element.
      String currentPath = xPaths.pop();
     
   }   //   endElement
};   //   XPathCounterSaxHandler