Back to index

lightning-sunbird  0.9+nobinonly
nsRDFParserUtils.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is Mozilla Communicator client code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 /*
00039 
00040   Some useful parsing routines.
00041 
00042   This isn't the best place for them: I wish that they'd go into some
00043   shared area (like mozilla/base).
00044 
00045  */
00046 
00047 #include <stdlib.h> // XXX for atoi(), maybe this should go into nsCRT?
00048 #include "nsIURL.h"
00049 #include "nsString.h"
00050 #include "nsRDFParserUtils.h"
00051 
00052 // XXX This totally sucks. I wish that mozilla/base had this code.
00053 PRUnichar
00054 nsRDFParserUtils::EntityToUnicode(const char* buf)
00055 {
00056     if ((buf[0] == 'g') &&
00057         (buf[1] == 't') &&
00058         (buf[2] == '\0'))
00059         return PRUnichar('>');
00060 
00061     if ((buf[0] == 'l') &&
00062         (buf[1] == 't') &&
00063         (buf[2] == '\0'))
00064         return PRUnichar('<');
00065 
00066     if ((buf[0] == 'a') &&
00067         (buf[1] == 'm') &&
00068         (buf[2] == 'p') &&
00069         (buf[3] == '\0'))
00070         return PRUnichar('&');
00071 
00072     if ((buf[0] == 'a') &&
00073         (buf[1] == 'p') &&
00074         (buf[2] == 'o') &&
00075         (buf[3] == 's') &&
00076         (buf[4] == '\0'))
00077         return PRUnichar('\'');
00078 
00079     if ((buf[0] == 'q') &&
00080         (buf[1] == 'u') &&
00081         (buf[2] == 'o') &&
00082         (buf[3] == 't') &&
00083         (buf[4] == '\0'))
00084         return PRUnichar('"');
00085 
00086     NS_NOTYETIMPLEMENTED("look this up in the declared-entity table");
00087     return PRUnichar('?');
00088 }
00089 
00090 // XXX Code copied from nsHTMLContentSink. It should be shared.
00091 void
00092 nsRDFParserUtils::StripAndConvert(nsString& aResult)
00093 {
00094     if ( !aResult.IsEmpty() ) {
00095       // Strip quotes if present
00096       PRUnichar first = aResult.First();
00097       if ((first == '"') || (first == '\'')) {
00098           if (aResult.Last() == first) {
00099               aResult.Cut(0, 1);
00100               PRInt32 pos = aResult.Length() - 1;
00101               if (pos >= 0) {
00102                   aResult.Cut(pos, 1);
00103               }
00104           } else {
00105               // Mismatched quotes - leave them in
00106           }
00107       }
00108     }
00109 
00110     // Reduce any entities
00111     // XXX Note: as coded today, this will only convert well formed
00112     // entities.  This may not be compatible enough.
00113     // XXX there is a table in navigator that translates some numeric entities
00114     // should we be doing that? If so then it needs to live in two places (bad)
00115     // so we should add a translate numeric entity method from the parser...
00116     char cbuf[100];
00117     PRUint32 i = 0;
00118     while (i < aResult.Length()) {
00119         // If we have the start of an entity (and it's not at the end of
00120         // our string) then translate the entity into it's unicode value.
00121         if ((aResult.CharAt(i++) == '&') && (i < aResult.Length())) {
00122             PRInt32 start = i - 1;
00123             PRUnichar e = aResult.CharAt(i);
00124             if (e == '#') {
00125                 // Convert a numeric character reference
00126                 i++;
00127                 char* cp = cbuf;
00128                 char* limit = cp + sizeof(cbuf) - 1;
00129                 PRBool ok = PR_FALSE;
00130                 PRUint32 slen = aResult.Length();
00131                 while ((i < slen) && (cp < limit)) {
00132                     PRUnichar f = aResult.CharAt(i);
00133                     if (f == ';') {
00134                         i++;
00135                         ok = PR_TRUE;
00136                         break;
00137                     }
00138                     if ((f >= '0') && (f <= '9')) {
00139                         *cp++ = char(f);
00140                         i++;
00141                         continue;
00142                     }
00143                     break;
00144                 }
00145                 if (!ok || (cp == cbuf)) {
00146                     continue;
00147                 }
00148                 *cp = '\0';
00149                 if (cp - cbuf > 5) {
00150                     continue;
00151                 }
00152                 PRInt32 ch = PRInt32( ::atoi(cbuf) );
00153                 if (ch > 65535) {
00154                     continue;
00155                 }
00156 
00157                 // Remove entity from string and replace it with the integer
00158                 // value.
00159                 aResult.Cut(start, i - start);
00160                 aResult.Insert(PRUnichar(ch), start);
00161                 i = start + 1;
00162             }
00163             else if (((e >= 'A') && (e <= 'Z')) ||
00164                      ((e >= 'a') && (e <= 'z'))) {
00165                 // Convert a named entity
00166                 i++;
00167                 char* cp = cbuf;
00168                 char* limit = cp + sizeof(cbuf) - 1;
00169                 *cp++ = char(e);
00170                 PRBool ok = PR_FALSE;
00171                 PRUint32 slen = aResult.Length();
00172                 while ((i < slen) && (cp < limit)) {
00173                     PRUnichar f = aResult.CharAt(i);
00174                     if (f == ';') {
00175                         i++;
00176                         ok = PR_TRUE;
00177                         break;
00178                     }
00179                     if (((f >= '0') && (f <= '9')) ||
00180                         ((f >= 'A') && (f <= 'Z')) ||
00181                         ((f >= 'a') && (f <= 'z'))) {
00182                         *cp++ = char(f);
00183                         i++;
00184                         continue;
00185                     }
00186                     break;
00187                 }
00188                 if (!ok || (cp == cbuf)) {
00189                     continue;
00190                 }
00191                 *cp = '\0';
00192                 PRInt32 ch;
00193 
00194                 // XXX Um, here's where we should be converting a
00195                 // named entity. I removed this to avoid a link-time
00196                 // dependency on core raptor.
00197                 ch = EntityToUnicode(cbuf);
00198 
00199                 if (ch < 0) {
00200                     continue;
00201                 }
00202 
00203                 // Remove entity from string and replace it with the integer
00204                 // value.
00205                 aResult.Cut(start, i - start);
00206                 aResult.Insert(PRUnichar(ch), start);
00207                 i = start + 1;
00208             }
00209             else if (e == '{') {
00210                 // Convert a script entity
00211                 // XXX write me!
00212                 NS_NOTYETIMPLEMENTED("convert a script entity");
00213             }
00214         }
00215     }
00216 }
00217 
00218 nsresult
00219 nsRDFParserUtils::GetQuotedAttributeValue(const nsString& aSource,
00220                                           const nsString& aAttribute,
00221                                           nsString& aValue)
00222 {
00223 static const char kQuote = '\"';
00224 static const char kApostrophe = '\'';
00225 
00226     PRInt32 offset;
00227     PRInt32 endOffset = -1;
00228     nsresult result = NS_OK;
00229 
00230     offset = aSource.Find(aAttribute, 0);
00231     if (-1 != offset) {
00232         offset = aSource.FindChar('=', offset);
00233 
00234         PRUnichar next = aSource.CharAt(++offset);
00235         if (kQuote == next) {
00236             endOffset = aSource.FindChar(kQuote, ++offset);
00237         }
00238         else if (kApostrophe == next) {
00239             endOffset = aSource.FindChar(kApostrophe, ++offset);
00240         }
00241 
00242         if (-1 != endOffset) {
00243             aSource.Mid(aValue, offset, endOffset-offset);
00244         }
00245         else {
00246             // Mismatched quotes - return an error
00247             result = NS_ERROR_FAILURE;
00248         }
00249     }
00250     else {
00251         aValue.Truncate();
00252     }
00253 
00254     return result;
00255 }
00256