Back to index

wims  3.65+svn20090927
XmlReader.java
Go to the documentation of this file.
00001 package rene.util.xml;
00002 
00003 import java.io.BufferedInputStream;
00004 import java.io.BufferedReader;
00005 import java.io.ByteArrayInputStream;
00006 import java.io.FileInputStream;
00007 import java.io.IOException;
00008 import java.io.InputStream;
00009 import java.io.InputStreamReader;
00010 import java.io.UnsupportedEncodingException;
00011 
00012 import rene.util.SimpleByteBuffer;
00013 import rene.util.SimpleStringBuffer;
00014 import rene.util.list.ListElement;
00015 
00016 public class XmlReader
00017 {      BufferedReader In;
00018        SimpleStringBuffer buf=new SimpleStringBuffer(10000);
00019 
00020        public XmlReader ()
00021        {      In=null;
00022        }
00023 
00024        public XmlReader (BufferedReader in)
00025        {      In=in;
00026        }
00027 
00028        public XmlReader (InputStream in)
00029               throws XmlReaderException
00030        {      try
00031               {      // read the file into a buffer
00032                      BufferedInputStream rin=new BufferedInputStream(in);
00033                      SimpleByteBuffer bb=new SimpleByteBuffer(10000);
00034                      while (true)
00035                      {      int k=rin.read();
00036                             if (k<0) break;
00037                             bb.append((byte)k);
00038                      }
00039                      rin.close();
00040                      byte b[]=bb.getByteArray();
00041                      
00042                      // Try to open an ASCII stream, or a default stream
00043                      ByteArrayInputStream bin=new ByteArrayInputStream(b);
00044                      XmlReader R=null;
00045                      try
00046                      {      R=new XmlReader(new BufferedReader(new InputStreamReader(bin,"ASCII")));
00047                      }
00048                      catch (UnsupportedEncodingException ex)
00049                      {      R=new XmlReader(new BufferedReader(new InputStreamReader(bin)));
00050                      }             
00051                                           
00052                      // Determine the encoding
00053                      String Encoding=null;
00054                      while (true)
00055                      {      while (true)
00056                             {      int c=R.read();
00057                                    if (c==-1) throw new Exception("<?xml> tag not found");
00058                                    if (c=='<') break;
00059                             }
00060                             if (R.found("?xml"))
00061                             {      String s=R.scanFor("?>");
00062                                    if (s==null) throw new Exception("<?xml> tag error");
00063                                    int n=s.indexOf("encoding=\"");
00064                                    if (n>=0)
00065                                    {      n+="encoding=\"".length();
00066                                           s=s.substring(n);
00067                                           int m=s.indexOf('\"');
00068                                           if (m<0) throw new Exception("Closing bracket missing");
00069                                           Encoding=s.substring(0,m).toUpperCase();
00070                                           if (Encoding.equals("UTF-8")) Encoding="UTF8";
00071                                                  // for IE5 !
00072                                           break;
00073                                    }
00074                                    break;
00075                             }
00076                      }
00077                      
00078                      // Open a stream with this encoding
00079                      bin=new ByteArrayInputStream(b);
00080                      if (Encoding==null)
00081                             In=new BufferedReader(new InputStreamReader(bin));
00082                      else
00083                             try 
00084                             {      In=new BufferedReader(new InputStreamReader(
00085                                           bin,Encoding));
00086                             }
00087                             catch (UnsupportedEncodingException e)
00088                             {      try
00089                                    {      In=new BufferedReader(new InputStreamReader(
00090                                                  bin,"ASCII"));
00091                                    }
00092                                    catch (UnsupportedEncodingException ex)
00093                                    {      In=new BufferedReader(new InputStreamReader(bin));
00094                                    }                                  
00095                             }
00096               }
00097               catch (Exception e)
00098               {      throw new XmlReaderException(e.toString());
00099               }
00100        }
00101 
00102        public void init (InputStream in)
00103               throws XmlReaderException
00104        {      try
00105               {      // read the file into a buffer
00106                      BufferedInputStream rin=new BufferedInputStream(in);
00107                      SimpleByteBuffer bb=new SimpleByteBuffer(10000);
00108                      while (true)
00109                      {      int k=rin.read();
00110                             if (k<0) break;
00111                             bb.append((byte)k);
00112                      }
00113                      rin.close();
00114                      byte b[]=bb.getByteArray();
00115                      
00116                      // Try to open an ASCII stream, or a default stream
00117                      ByteArrayInputStream bin=new ByteArrayInputStream(b);
00118                      XmlReader R=null;
00119                      try
00120                      {      R=new XmlReader(new BufferedReader(new InputStreamReader(bin,"ASCII")));
00121                      }
00122                      catch (UnsupportedEncodingException ex)
00123                      {      R=new XmlReader(new BufferedReader(new InputStreamReader(bin)));
00124                      }             
00125                                           
00126                      // Determine the encoding
00127                      String Encoding=null;
00128                      while (true)
00129                      {      while (true)
00130                             {      int c=R.read();
00131                                    if (c==-1) throw new Exception("<?xml> tag not found");
00132                                    if (c=='<') break;
00133                             }
00134                             if (R.found("?xml"))
00135                             {      String s=R.scanFor("?>");
00136                                    if (s==null) throw new Exception("<?xml> tag error");
00137                                    int n=s.indexOf("encoding=\"");
00138                                    if (n>=0)
00139                                    {      n+="encoding=\"".length();
00140                                           s=s.substring(n);
00141                                           int m=s.indexOf('\"');
00142                                           if (m<0) throw new Exception("Closing bracket missing");
00143                                           Encoding=s.substring(0,m).toUpperCase();
00144                                           if (Encoding.equals("UTF-8")) Encoding="UTF8";
00145                                                  // for IE5 !
00146                                           break;
00147                                    }
00148                                    break;
00149                             }
00150                      }
00151                      
00152                      // Open a stream with this encoding
00153                      bin=new ByteArrayInputStream(b);
00154                      if (Encoding==null)
00155                             In=new BufferedReader(new InputStreamReader(bin));
00156                      else
00157                             try 
00158                             {      In=new BufferedReader(new InputStreamReader(
00159                                           bin,Encoding));
00160                             }
00161                             catch (UnsupportedEncodingException e)
00162                             {      try
00163                                    {      In=new BufferedReader(new InputStreamReader(
00164                                                  bin,"ASCII"));
00165                                    }
00166                                    catch (UnsupportedEncodingException ex)
00167                                    {      In=new BufferedReader(new InputStreamReader(bin));
00168                                    }                                  
00169                             }
00170               }
00171               catch (Exception e)
00172               {      throw new XmlReaderException(e.toString());
00173               }
00174        }
00175 
00181        public XmlTree scan ()
00182               throws XmlReaderException
00183        {      while (true)
00184               {      while (true)
00185                      {      int c=read();
00186                             if (c==-1) return null;
00187                             if (c=='<') break;
00188                      }
00189                      if (found("?xml"))
00190                      {      String s=scanFor("?>");
00191                             if (s==null) return null;
00192                             XmlTree t=new XmlTree(new XmlTagRoot());
00193                             t.addchild(new XmlTree(new XmlTagPI(s)));
00194                             scanContent(t);
00195                             return t;
00196                      }
00197               }
00198        }
00199        
00200        public void scanContent (XmlTree t)
00201               throws XmlReaderException
00202        {      //System.out.println("Sanning for "+t.getTag().name()+" ("+
00203               //     t.getTag().countParams()+")");
00204               while (true)
00205               {      String s=scanFor('<');
00206                      if (s==null)
00207                      {      if (t.getTag() instanceof XmlTagRoot) return;
00208                             exception("File ended surprisingly");
00209                      }
00210                      if (!empty(s))
00211                      {      t.addchild(new XmlTree(new XmlTagText(
00212                                    XmlTranslator.toText(s))));
00213                      }
00214                      if (found("!--"))
00215                      {      s=scanFor("-->");
00216                             continue;
00217                      }
00218                      if (found("!"))
00219                      {      s=scanTagFor('>');
00220                             continue;
00221                      }
00222                      if (found("?"))
00223                      {      s=scanTagFor("?>");
00224                             t.addchild(new XmlTree(new XmlTagPI(s)));
00225                             continue;
00226                      }
00227                      s=scanTagFor('>');
00228                      if (s==null)
00229                             exception("> missing");
00230                      if (s.startsWith("/"))
00231                      {      if (s.substring(1).equals(t.getTag().name()))
00232                                    return;
00233                             else 
00234                                    exception("End tag without start tag");
00235                      }
00236                      if (s.endsWith("/"))
00237                      {      t.addchild(new XmlTree(new XmlTag(s.substring(0,s.length()-1))));
00238                      }
00239                      else
00240                      {      XmlTree t0=new XmlTree(new XmlTag(s));
00241                             scanContent(t0);
00242                             t.addchild(t0);
00243                      }
00244               }
00245        }
00246 
00247        public boolean empty (String s)
00248        {      int n=s.length();
00249               for (int i=0; i<n; i++)
00250               {      char c=s.charAt(i);
00251                      if (c!=' ' && c!='\n' && c!='\t') return false;
00252               }
00253               return true;
00254        }
00255 
00260        public int skipBlanks ()
00261               throws XmlReaderException
00262        {      while (true)
00263               {      int c=read();
00264                      if (c==' ' || c=='\t' || c=='\n') continue;
00265                      else return c;
00266               }
00267        }
00268 
00273        public String scanFor (char end)
00274               throws XmlReaderException
00275        {      buf.clear();
00276               int c=read();
00277               while (c!=end)
00278               {      buf.append((char)c);
00279                      c=read();
00280                      if (c<0) return null;
00281               }
00282               return buf.toString();
00283        }
00284        
00289        public String scanFor (String s)
00290               throws XmlReaderException
00291        {      buf.clear();
00292               while (!found(s))
00293               {      int c=read();
00294                      if (c<0) return null;
00295                      buf.append((char)c);
00296               }
00297               for (int i=0; i<s.length(); i++) read();
00298               return buf.toString();
00299        }
00300        
00305        public String scanTagFor (char end)
00306               throws XmlReaderException
00307        {      buf.clear();
00308               int c=read();
00309               while (c!=end)
00310               {      if (c=='\"')
00311                      {      buf.append((char)c);
00312                             while (true)
00313                             {      c=read();
00314                                    if (c<0) return null;
00315                                    if (c=='\"') break;
00316                                    buf.append((char)c);
00317                             }
00318                             buf.append((char)c);
00319                      }
00320                      else if (c=='\'')
00321                      {      buf.append((char)c);
00322                             while (true)
00323                             {      c=read();
00324                                    if (c<0) return null;
00325                                    if (c=='\'') break;
00326                                    buf.append((char)c);
00327                             }
00328                             buf.append((char)c);
00329                      }
00330                      else buf.append((char)c);
00331                      c=read();
00332                      if (c<0) return null;
00333               }
00334               return buf.toString();
00335        }
00336        
00341        public String scanTagFor (String s)
00342               throws XmlReaderException
00343        {      buf.clear();
00344               while (!found(s))
00345               {      int c=read();
00346                      if (c<0) return null;
00347                      if (c=='\"')
00348                      {      buf.append((char)c);
00349                             while (true)
00350                             {      c=read();
00351                                    if (c<0) return null;
00352                                    if (c=='\"') break;
00353                                    buf.append((char)c);
00354                             }
00355                             buf.append((char)c);
00356                      }
00357                      else if (c=='\'')
00358                      {      buf.append((char)c);
00359                             while (true)
00360                             {      c=read();
00361                                    if (c<0) return null;
00362                                    if (c=='\'') break;
00363                                    buf.append((char)c);
00364                             }
00365                             buf.append((char)c);
00366                      }
00367                      else buf.append((char)c);
00368               }
00369               for (int i=0; i<s.length(); i++) read();
00370               return buf.toString();
00371        }
00372        
00373        String Line=null;
00374        int LinePos;
00375        
00376        public int read ()
00377               throws XmlReaderException
00378        {      try
00379               {      if (Line==null)
00380                      {      Line=In.readLine();
00381                             LinePos=0;
00382                             // System.out.println("Read --> "+Line);
00383                      }
00384                      if (Line==null) return -1;
00385                      if (LinePos>=Line.length())
00386                      {      Line=null;
00387                             return '\n';
00388                      }
00389                      return Line.charAt(LinePos++);
00390               }
00391               catch (Exception e)
00392               {      return -1;
00393               }
00394        }
00395 
00399        public boolean found (String s)
00400        {      int n=s.length();
00401               if (LinePos+n>Line.length()) return false;
00402               for (int i=0; i<n; i++)
00403                      if (s.charAt(i)!=Line.charAt(LinePos+i)) return false;
00404               return true;
00405        }
00406        
00407        public void exception (String s)
00408               throws XmlReaderException
00409        {      throw new XmlReaderException(s,Line,LinePos);
00410        }
00411        
00415        public static void main (String args[])
00416        {      try
00417               {      BufferedReader in=new BufferedReader(
00418                             new InputStreamReader(
00419                             new FileInputStream("rene\\util\\xml\\test.xml"),"UTF8"));
00420                      XmlReader reader=new XmlReader(in);
00421                      XmlTree tree=reader.scan();
00422                      in.close();
00423                      print(tree);
00424               }
00425               catch (XmlReaderException e)
00426               {      System.out.println(e.toString()+"\n"+
00427                             e.getLine()+"\n"+"Position : "+e.getPos());
00428               }
00429               catch (IOException e)
00430               {      System.out.println(e);
00431               }
00432        }
00433        
00434        public static void print (XmlTree t)
00435        {      XmlTag tag=t.getTag();
00436               System.out.print("<"+tag.name());
00437               for (int i=0; i<tag.countParams(); i++)
00438               {      System.out.print(" "+tag.getParam(i)+"=\""+tag.getValue(i)+"\"");
00439               }
00440               System.out.println(">");
00441               ListElement el=t.children().first();
00442               while (el!=null)
00443               {      print((XmlTree)(el.content()));
00444                      el=el.next();
00445               }
00446               System.out.println("</"+tag.name()+">");
00447        }
00448        
00449        public static boolean testXml (String s)
00450        {      int i=0;
00451               while (i<s.length())
00452               {      char c=s.charAt(i);
00453                      if (c=='<') break;
00454                      i++;
00455               }
00456               if (i>=s.length()) return false;
00457               if (s.substring(i).startsWith("<?xml") || s.substring(i).startsWith("< ?xml") ) return true;
00458               return false;
00459        }
00460 }