CSCI 5733
XML Application Development
Spring 2006
Suggested Solution to Homework #3

(1) For example:

package courses.xml.Spring2006;

/*--
      Kwok-Bun Yue
      Feb 14, 2006

      This program is a suggested solution for homework
      #3, question 1 of CSCI 5733 XML Application Development, Spring 2006 at UHCL. 

      See:
      http://dcm.cl.uh.edu/yue/courses/xml/Spring2006/hw/h3.asp (question 1)

      Minimal documentation
**/
import javax.xml.parsers.*;

import org.xml.sax.SAXException; 
import org.xml.sax.SAXParseException; 
import org.w3c.dom.*;

import java.io.*;
import java.util.*;

public class EqualXML {
   /*
    *   Main: Read in two XML file and check whether they have the
    *   Equal contents: elements, attributes and text.
    *   The program does not handle XML namespaces or entity references.
    *  Comments and PI are not considered
   */
   public static void main(String argv[]) {
      if(argv.length == 2) {
         //  Read the two input XML files.
         DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
         try
         {   //  Parse input XML file.
            DocumentBuilder builder = factory.newDocumentBuilder();
            Document document1 = builder.parse(new File(argv[0]));
            if (document1 == null) {
               System.err.println("Cannot parse the file " + argv[0] + ".");
               return;
            }
            Document document2 = builder.parse(new File(argv[1]));
            if (document2 == null) {
               System.err.println("Cannot parse the file " + argv[1] + ".");
               return;
            }

            if (equalXMLDocument(document1, document2)) {
               System.out.println("Contents (elements, attributes and text) of the two input XML files, " +
                           argv[0] + " and " + argv[1] + ", are equal.\n");
            }
            else {
               System.out.println("Contents (elements, attributes and text) of the two input XML files, " +
                           argv[0] + " and " + argv[1] + ", are different.\n");
            }


         } catch(Throwable e) {
            System.err.println("Cannot parse the files ");
            e.printStackTrace();
         }
      }
      else {
         System.err.println("usage: java EqualXML XMLfile1 XMLfile2");
      }
   }


   /*
    *   Whether the two XML documents are the equal.
    */
   private static boolean equalXMLDocument(Document document1, Document document2) {
      return equalXMLElement(document1.getDocumentElement(), document2.getDocumentElement());
   }   //   equalXMLDocument

   /*
    *   Whether the two XML nodes are the equal.  Only check for element
    *   and text nodes.
    */
   private static boolean equalXMLElement(Element element1, Element element2) {
      //   Check element name equality.
      if (! element1.getTagName().equals(element2.getTagName())) return false;
     
      //   Check elements having equal attribute sets.
      if (! equalAttributeSet(element1.getAttributes(), element2.getAttributes())) return false;

      //   Compact the children lists, retaining only element nodes and coalesced text nodes,
      //   which are stored as String.
      Vector<Object> children1 = compactList(element1.getChildNodes());
      Vector<Object> children2 = compactList(element2.getChildNodes());
      //   Check children nodes for equality.
      if (children1.size() != children2.size()) {
         //      System.out.println("Vector size difference.\n");
         return false;
      }
     
      for (int i=0; i < children1.size(); i++)
      {   if ((children1.elementAt(i) instanceof Element) &&
            (children2.elementAt(i) instanceof Element)) {
            if (! equalXMLElement((Element) children1.elementAt(i), (Element) children2.elementAt(i))) {
               //   debug: System.out.println("Element difference.\n");
               return false;
            }
         }
         else if ((children1.elementAt(i) instanceof String) &&
            (children2.elementAt(i) instanceof String)) {
            if (! ((String) children1.elementAt(i)).equals((String) children2.elementAt(i))) {
               //   debug: System.out.println("Text difference.\n");
               return false;
            }
         }
         else
            return false;
      }     
      return true;
   }   //   equalXMLNode

  
   /*
    *   Whether two attribute sets are the equal.
    */
   private static boolean equalAttributeSet(NamedNodeMap attributes1, NamedNodeMap attributes2) {
      if (attributes1.getLength() != attributes2.getLength()) return false;
      for (int i=0; i < attributes1.getLength(); i++)
      {   String attrName = ((Attr) attributes1.item(i)).getName();
         String attrValue = ((Attr) attributes1.item(i)).getValue();
         if ((attributes2.getNamedItem(attrName) == null) ||
            (! attrValue.equals(((Attr) attributes2.getNamedItem(attrName)).getValue())))
         {   return false;
         }
      }
      return true;
   }   //   equalAttributeSet

   /*
    *   Return a vector of either Element or String from a list of nodes.
    *   Element nodes are retained. Consequence text and CData nodes are
    *    coalesced into a String element.
    */
   private static Vector<Object> compactList(NodeList nodes) {
      Vector<Object> result = new Vector<Object>();
      StringBuffer buffer = new StringBuffer();

      for (int i=0; i< nodes.getLength(); i++) {
         Node node = nodes.item(i);
        
         switch (node.getNodeType())
         {
         case Node.ELEMENT_NODE:
            addList(result, buffer);
            buffer = new StringBuffer();
            //   Add the element.
            result.add((Element) node);
            break;
         case Node.TEXT_NODE:
         case Node.CDATA_SECTION_NODE:
            buffer.append(node.getNodeValue());
            break;
         case Node.PROCESSING_INSTRUCTION_NODE:
            addList(result, buffer);
            buffer = new StringBuffer();
            break;     
         //   Action of other node types are null.
         }
      }   
      //   Clean up buffer.
      addList(result, buffer);
      buffer = new StringBuffer();
      return result;
   }   //   compactList

   /*
    *   Add the buffer to the vector result.
    */
   private static void addList(Vector<Object> result,
                        StringBuffer buffer) {
      String bufferString = buffer.toString().replaceAll("\\s","");
      if (! bufferString.equals("")) {  
         result.add(bufferString.toString());
      }
   }   //   addList
}   //   EqualXML