Back to index

wims  3.65+svn20090927
TrivialDOM.java
Go to the documentation of this file.
00001 /*
00002     WIMSchem Elements: Chemistry molecular diagram drawing tool.
00003     
00004     (c) 2008 Dr. Alex M. Clark
00005     
00006     Released as GNUware, under the Gnu Public License (GPL)
00007     
00008     See www.gnu.org for details.
00009 */
00010 
00011 package WIMSchem.ds;
00012 
00013 import WIMSchem.*;
00014 
00015 import java.io.*;
00016 import java.util.*;
00017 
00018 /*
00019     An incredibly lightweight implementation of DOM-style access to XML content. Only a subset of XML files are supported, that 
00020     being simple combinations of elements, attributes and text. Overly sophisticated input files may break the reader. Also, some
00021     of the pedantic XML treatment of whitespace is simplified (which suits the rest of this application nicely). Malformed XML
00022     should generate vaguely helpful explanations, by and large.
00023 */
00024 
00025 public class TrivialDOM
00026 {
00027     public static final int TYPE_NODE=1;
00028     public static final int TYPE_TEXT=2;
00029 
00030     class Node
00031     {
00032        private Node parentNode=null;
00033        private String nodeName;
00034        private Hashtable<String,String> nodeAttr;
00035        private ArrayList<Object> children;
00036        
00037        public Node(String NodeName)
00038        {
00039            nodeName=NodeName;
00040            nodeAttr=new Hashtable<String,String>();
00041            children=new ArrayList<Object>();
00042        }
00043        
00044        public Node Parent() {return parentNode;}
00045        public void SetParent(Node Parent) {parentNode=Parent;}
00046        
00047        public String NodeName() {return nodeName;}
00048        public void SetNodeName(String Name) {nodeName=Name;}
00049        public String Attribute(String Attr) {return nodeAttr.containsKey(Attr) ? nodeAttr.get(Attr) : null;}
00050        public void SetAttribute(String Attr,String Value) {nodeAttr.put(Attr,Value);}
00051        public String[] GetAttributeNames() 
00052        {
00053            Set<String> attr=nodeAttr.keySet();
00054            String[] names=new String[attr.size()];
00055            return attr.toArray(names);
00056        }
00057        
00058        public int NumChildren() {return children.size();}
00059        public int ChildType(int N) 
00060        {
00061            Object child=children.get(N);
00062            if (child instanceof Node) return TYPE_NODE;
00063            if (child instanceof Text) return TYPE_TEXT;
00064            return 0;
00065        }
00066        public Node GetChildNode(int N) {return (Node)children.get(N);}
00067        public Text GetChildText(int N) {return (Text)children.get(N);}
00068        
00069        public void Clear() {children.clear();}
00070        public void DeleteChild(int N) {children.remove(N);}
00071        
00072        public void SetText(String Txt,boolean Preserve)
00073        {
00074            Clear();
00075            Text txt=new Text(Txt,Preserve);
00076            txt.SetParent(this);
00077            children.add(txt);
00078        }
00079        
00080        public String GetText()
00081        {
00082            String txt="";
00083            for (int n=0;n<NumChildren();n++)
00084            {
00085               if (ChildType(n)==TYPE_TEXT) txt+=GetChildText(n).Get();
00086               else txt+=GetChildNode(n).GetText();
00087            }
00088            return txt;
00089        }
00090        
00091        public void AppendChild(Node Nod) {Nod.SetParent(this); children.add(Nod);}
00092        public void AppendChild(Text Txt) {Txt.SetParent(this); children.add(Txt);}
00093        public void InsertChild(int N,Node Nod) {Nod.SetParent(this); children.add(N,Nod);}
00094        public void InsertChild(int N,Text Txt) {Txt.SetParent(this); children.add(N,Txt);}
00095 
00096        public Node AppendNode(String Name) 
00097        {
00098            Node nod=new Node(Name);
00099            nod.SetParent(this);
00100            children.add(nod); 
00101            return nod;
00102        }
00103        public Text AppendText(String Txt,boolean Preserve) 
00104        {
00105            Text txt=new Text(Txt,Preserve); 
00106            txt.SetParent(this);
00107            children.add(txt); 
00108            return txt;
00109        }
00110     }
00111     
00112     class Text
00113     {
00114        private Node parentNode=null;
00115        private String text;
00116        private boolean preserve; // if true, is CDATA type; otherwise may be freely trimmed for whitespace
00117     
00118        public Text(String Text,boolean Preserve) {text=Text; preserve=Preserve;}
00119 
00120        public Node Parent() {return parentNode;}
00121        public void SetParent(Node Parent) {parentNode=Parent;}
00122 
00123        public String Get() {return text;}
00124        public void Set(String Txt) {text=Txt;}
00125        public boolean Preserve() {return preserve;}
00126     }
00127     
00128     public Node CreateNode(String Name) {return new Node(Name);}
00129     public Text CreateText(String Text,boolean Preserve) {return new Text(Text,Preserve);}
00130     
00131     Node doc=null;
00132 
00133     // constructors
00134     
00135     public TrivialDOM() {}
00136 
00137     public TrivialDOM(String DocName)
00138     {
00139        doc=new Node(DocName);
00140     }
00141     public TrivialDOM(Node DocNode)
00142     {
00143        doc=DocNode;
00144     }
00145     
00146     public Node Document() {return doc;}
00147     public String toString()
00148     {
00149        StringWriter out=new StringWriter();
00150        try {WriteXML(out,this);}
00151        catch (IOException e) {return e.getMessage();}
00152        return out.toString();
00153     }
00154 
00155     // parsing input files
00156 
00157     public static TrivialDOM ReadXML(BufferedReader in) throws IOException
00158     {
00159        final String EOF="ReadXML: unexpected end of file during parsing";
00160 
00161        // PART 1: read the input file one character at a time, and carve it up into chunks, which are preserved as strings; these
00162        // include tag start & end, text, CDATA, and comments.
00163        
00164        ArrayList<String> chunks=new ArrayList<String>();
00165        String str="";
00166        while (true)
00167        {
00168            int ich;
00169            if (str.length()==0)
00170            {
00171               ich=in.read();
00172               if (ich<0) break;
00173               str=String.valueOf((char)ich);
00174            }
00175            
00176            if (str.charAt(0)=='<') // either a tag or a CDATA
00177            {
00178               for (int n=0;n<2;n++)
00179               {
00180                   ich=in.read();
00181                   if (ich<0) throw new IOException(EOF);
00182                   str=str+(char)ich;
00183               }
00184               
00185               if (str.startsWith("<![")) // it's a CDATA
00186               {
00187                   while (true)
00188                   {
00189                      ich=in.read();
00190                      if (ich<0) throw new IOException(EOF);
00191                      str=str+(char)ich;
00192                      if (str.endsWith("]]>")) 
00193                      {
00194                          chunks.add(str);
00195                          str="";
00196                          break;
00197                      }
00198                   }
00199               }
00200               else if (str.startsWith("<!-")) // it's a comment
00201               {
00202                   while (true)
00203                   {
00204                      ich=in.read();
00205                      if (ich<0) throw new IOException(EOF);
00206                      str=str+(char)ich;
00207                      if (str.endsWith("-->")) 
00208                      {
00209                          chunks.add(str);
00210                          str="";
00211                          break;
00212                      }
00213                   }
00214               }
00215               else // it's an opening tag, which will get closed later
00216               {
00217                   boolean inquot=false;
00218                   while (true)
00219                   {
00220                      ich=in.read();
00221                      if (ich<0) throw new IOException(EOF);
00222                      str=str+(char)ich;
00223                      if ((char)ich=='"') inquot=!inquot;
00224                      else if ((char)ich=='>') 
00225                      {
00226                          chunks.add(str);
00227                          str="";
00228                          break;
00229                      }
00230                   }
00231               }
00232            }
00233            else // must be plain text
00234            {
00235               boolean eof=false;
00236               while (true)
00237               {
00238                   ich=in.read();
00239                   if (ich<0) {eof=true; break;}
00240                   if ((char)ich=='<')
00241                   {
00242                      chunks.add(str);
00243                      str=String.valueOf((char)ich);
00244                      break;
00245                   }
00246                   str=str+(char)ich;
00247               }
00248               if (eof)
00249               {
00250                   if (str.trim().length()==0) break; else throw new IOException(EOF);
00251               }
00252            }
00253        }
00254 
00255        // PART 2: analyze the resulting pieces, and build up the node tree
00256 
00257        TrivialDOM xml=new TrivialDOM("unknown");
00258        Node node=null;
00259        for (int n=0;n<chunks.size();n++)
00260        {
00261            str=chunks.get(n);
00262            if (str.trim().length()==0) continue; // ignore chunks which are pure whitespace
00263 
00264            if (str.charAt(0)=='<' && str.length()>=2 && ((str.charAt(1)>='A' && str.charAt(1)<='Z') ||
00265                                                     (str.charAt(1)>='a' && str.charAt(1)<='z')) && str.endsWith(">"))
00266            {
00267               str=str.substring(1,str.length()-1);
00268               boolean isclosed=str.endsWith("/");
00269               if (isclosed) str=str.substring(0,str.length()-1);
00270               
00271               String[] bits=str.split(" ");
00272               Node newNode=null;
00273               if (node==null)
00274               {
00275                   newNode=xml.Document();
00276                   newNode.SetNodeName(bits[0]);
00277               }
00278               else newNode=node.AppendNode(bits[0]);
00279               
00280               for (int i=1;i<bits.length;i++)
00281               {
00282                   int spc=bits[i].indexOf("=");
00283                   if (spc<=0) throw new IOException("Malformatted attribute: ["+snip(bits[i])+"].");
00284                   String key=bits[i].substring(0,spc),val=bits[i].substring(spc+1);
00285                   if (!val.startsWith("\"") || !val.endsWith("\""))
00286                      throw new IOException("Malformed attribute value: ["+snip(bits[i])+"].");
00287                   val=val.substring(1,val.length()-1);
00288                   newNode.SetAttribute(key,val);
00289               }
00290               
00291               if (!isclosed) node=newNode;
00292            }
00293            else if (str.startsWith("</"))
00294            {
00295               if (node==null) throw new IOException("Unexpected end tag: ["+snip(str)+"].");
00296               str=str.substring(2,str.length()-1);
00297               if (str.compareTo(node.NodeName())!=0)
00298                   throw new IOException("Closing tag does not match opening tag: ["+snip(str)+"].");
00299               node=node.Parent();
00300            }
00301            else if (str.startsWith("<![CDATA["))
00302            {
00303               if (node==null) throw new IOException("Unexpected CDATA node: ["+snip(str)+"].");
00304               if (!str.endsWith("]]>")) throw new IOException("CDATA node not ended: ["+snip(str)+"].");
00305               str=str.substring(9,str.length()-3);
00306               node.AppendText(str,true);
00307            }
00308            else if (str.startsWith("<!--"))
00309            {
00310               if (!str.endsWith("-->")) throw new IOException("Unterminated comment: ["+snip(str)+"].");
00311            }
00312            else if (str.startsWith("<?")) {} // ignore
00313            else if (str.startsWith("<")) throw new IOException("Unexpected angle bracket, near: ["+snip(str)+"].");
00314            else
00315            {
00316               if (node==null) throw new IOException("Misplaced text-like block: ["+snip(str)+"].");
00317               node.AppendText(str.trim(),false);
00318            }
00319        }      
00320 
00321        return xml;
00322     }
00323     
00324     // chop a string off if it's too big to go in an exception
00325     private static String snip(String str)
00326     {
00327        if (str.length()<60) return str;
00328        return str.substring(0,60)+"...";
00329     }
00330     
00331     // writing output files
00332 
00333     public static void WriteXML(Writer out,TrivialDOM dom) throws IOException
00334     {
00335        out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
00336        RecursiveWriteNode(out,dom.Document(),0);
00337        out.flush();
00338     }
00339     
00340     private static void RecursiveWriteNode(Writer out,Node nod,int Level) throws IOException
00341     {
00342        // emit the node tag & attributes
00343        
00344        for (int n=0;n<Level;n++) out.write(" ");
00345        out.write("<"+nod.NodeName());
00346        String[] attr=nod.GetAttributeNames();
00347        for (int n=0;n<attr.length;n++) out.write(" "+attr[n]+"=\""+EscapeAttr(nod.Attribute(attr[n]))+"\"");
00348 
00349        // special case for empty nodes
00350        if (nod.NumChildren()==0) {out.write("/>\n"); return;}
00351 
00352        out.write(">");
00353        
00354        boolean doIndent=true;
00355        if (nod.NumChildren()==1 && nod.ChildType(0)==TYPE_TEXT) doIndent=false;
00356        else if (nod.NumChildren()>0 && nod.ChildType(0)==TYPE_TEXT && nod.GetChildText(0).Preserve()) doIndent=false;
00357        
00358        if (doIndent) out.write("\n");
00359        
00360        // emit the child nodes
00361        
00362        for (int n=0;n<nod.NumChildren();n++)
00363        {
00364            if (nod.ChildType(n)==TYPE_TEXT)
00365            {
00366               Text txt=nod.GetChildText(n);
00367               if (doIndent) for (int i=0;i<Level+1;i++) out.write(" ");
00368               if (txt.Preserve()) out.write("<![CDATA[");
00369               out.write(EscapeText(txt.Get()));
00370               if (txt.Preserve()) out.write("]]>");
00371               if (doIndent) out.write("\n");
00372            }
00373            else RecursiveWriteNode(out,nod.GetChildNode(n),Level+1);
00374        }
00375        
00376        // emit the closing tag
00377        
00378        if (doIndent) for (int n=0;n<Level;n++) out.write(" ");
00379        out.write("</"+nod.NodeName()+">\n");
00380     }
00381     
00382     // miscellaneous
00383     
00384     // make sure a string is suitable to encode in an attribute value (quoted)
00385     public static String EscapeAttr(String S)
00386     {
00387        int i;
00388        while ((i=S.indexOf('"'))>=0) {S=S.substring(0,i)+"&quot;"+S.substring(i+1);}
00389        while ((i=S.indexOf('\''))>=0) {S=S.substring(0,i)+"&apos;"+S.substring(i+1);}
00390        return S;
00391     }
00392     // make sure a string is suitable for general XML text
00393     public static String EscapeText(String S)
00394     {
00395        String str="";
00396        int i;
00397        while ((i=S.indexOf('&'))>=0) {str=str+S.substring(0,i)+"&amp;"; S=S.substring(i+1);}
00398        S=str+S;
00399        while ((i=S.indexOf('<'))>=0) {S=S.substring(0,i)+"&lt;"+S.substring(i+1);}
00400        while ((i=S.indexOf('>'))>=0) {S=S.substring(0,i)+"&gt;"+S.substring(i+1);}
00401        return S;
00402     }
00403 }