Back to index

lightning-sunbird  0.9+nobinonly
nsMorkReader.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is the Mork Reader.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Google Inc.
00019  * Portions created by the Initial Developer are Copyright (C) 2006
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Brian Ryner <bryner@brianryner.com> (original author)
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either the GNU General Public License Version 2 or later (the "GPL"), or
00027  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 #include "nsMorkReader.h"
00040 #include "prio.h"
00041 #include "nsNetUtil.h"
00042 #include "nsVoidArray.h"
00043 
00044 // A FixedString implementation that can hold 2 80-character lines
00045 class nsCLineString : public nsFixedCString
00046 {
00047 public:
00048   nsCLineString() : fixed_string_type(mStorage, sizeof(mStorage), 0) {}
00049   explicit nsCLineString(const substring_type& str)
00050     : fixed_string_type(mStorage, sizeof(mStorage), 0)
00051   {
00052     Assign(str);
00053   }
00054 
00055 private:
00056   char_type mStorage[160];
00057 };
00058 
00059 // Convert a hex character (0-9, A-F) to its corresponding byte value.
00060 // The character pointed to by 'c' is modified in place.
00061 inline PRBool
00062 ConvertChar(char *c)
00063 {
00064   char c1 = *c;
00065   if ('0' <= c1 && c1 <= '9') {
00066     *c = c1 - '0';
00067     return PR_TRUE;
00068   }
00069   if ('A' <= c1 && c1 <= 'F') {
00070     *c = c1 - 'A' + 10;
00071     return PR_TRUE;
00072   }
00073   return PR_FALSE;
00074 }
00075 
00076 // Unescape a Mork value.  Mork uses $xx escaping to encode non-ASCII
00077 // characters.  Additionally, '$' and '\' are backslash-escaped.
00078 // The result of the unescape is in returned into aResult.
00079 
00080 static void
00081 MorkUnescape(const nsCSubstring &aString, nsCString &aResult)
00082 {
00083   PRUint32 len = aString.Length();
00084 
00085   // We optimize for speed over space here -- size the result buffer to
00086   // the size of the source, which is an upper bound on the size of the
00087   // unescaped string.
00088   // FIXME: Mork assume there will never be errors
00089   if (!EnsureStringLength(aResult, len)) {
00090     aResult.Truncate();
00091     return; // out of memory.
00092   }
00093 
00094   char *result = aResult.BeginWriting();
00095   const char *source = aString.BeginReading();
00096   const char *sourceEnd = source + len;
00097 
00098   const char *startPos = nsnull;
00099   PRUint32 bytes;
00100   for (; source < sourceEnd; ++source) {
00101     char c = *source;
00102     if (c == '\\') {
00103       if (startPos) {
00104         bytes = source - startPos;
00105         memcpy(result, startPos, bytes);
00106         result += bytes;
00107         startPos = nsnull;
00108       }
00109       if (source < sourceEnd - 1) {
00110         *(result++) = *(++source);
00111       }
00112     } else if (c == '$') {
00113       if (startPos) {
00114         bytes = source - startPos;
00115         memcpy(result, startPos, bytes);
00116         result += bytes;
00117         startPos = nsnull;
00118       }
00119       if (source < sourceEnd - 2) {
00120         // Would be nice to use ToInteger() here, but it currently
00121         // requires a null-terminated string.
00122         char c2 = *(++source);
00123         char c3 = *(++source);
00124         if (ConvertChar(&c2) && ConvertChar(&c3)) {
00125           *(result++) = ((c2 << 4) | c3);
00126         }
00127       }
00128     } else if (!startPos) {
00129       startPos = source;
00130     }
00131   }
00132   if (startPos) {
00133     bytes = source - startPos;
00134     memcpy(result, startPos, bytes);
00135     result += bytes;
00136   }
00137   aResult.SetLength(result - aResult.BeginReading());
00138 }
00139 
00140 nsresult
00141 nsMorkReader::Init()
00142 {
00143   NS_ENSURE_TRUE(mValueMap.Init(), NS_ERROR_OUT_OF_MEMORY);
00144   NS_ENSURE_TRUE(mTable.Init(), NS_ERROR_OUT_OF_MEMORY);
00145   return NS_OK;
00146 }
00147 
00148 PR_STATIC_CALLBACK(PLDHashOperator)
00149 DeleteStringArray(const nsCSubstring& aKey,
00150                   nsTArray<nsCString> *aData,
00151                   void *aUserArg)
00152 {
00153   delete aData;
00154   return PL_DHASH_NEXT;
00155 }
00156 
00157 nsMorkReader::~nsMorkReader()
00158 {
00159   mTable.EnumerateRead(DeleteStringArray, nsnull);
00160 }
00161 
00162 struct AddColumnClosure
00163 {
00164   AddColumnClosure(nsTArray<nsMorkReader::MorkColumn> *a,
00165                    nsMorkReader::IndexMap *c)
00166     : array(a), columnMap(c), result(NS_OK) {}
00167 
00168   nsTArray<nsMorkReader::MorkColumn> *array;
00169   nsMorkReader::IndexMap *columnMap;
00170   nsresult result;
00171 };
00172 
00173 PR_STATIC_CALLBACK(PLDHashOperator)
00174 AddColumn(const nsCSubstring &id, nsCString name, void *userData)
00175 {
00176   AddColumnClosure *closure = NS_STATIC_CAST(AddColumnClosure*, userData);
00177   nsTArray<nsMorkReader::MorkColumn> *array = closure->array;
00178 
00179   if (!array->AppendElement(nsMorkReader::MorkColumn(id, name)) ||
00180       !closure->columnMap->Put(id, array->Length() - 1)) {
00181     closure->result = NS_ERROR_OUT_OF_MEMORY;
00182     return PL_DHASH_STOP;
00183   }
00184 
00185   return PL_DHASH_NEXT;
00186 }
00187 
00188 nsresult
00189 nsMorkReader::Read(nsIFile *aFile)
00190 {
00191   nsCOMPtr<nsIFileInputStream> stream =
00192     do_CreateInstance(NS_LOCALFILEINPUTSTREAM_CONTRACTID);
00193   NS_ENSURE_TRUE(stream, NS_ERROR_FAILURE);
00194 
00195   nsresult rv = stream->Init(aFile, PR_RDONLY, 0, 0);
00196   NS_ENSURE_SUCCESS(rv, rv);
00197 
00198   mStream = do_QueryInterface(stream);
00199   NS_ASSERTION(mStream, "file input stream must impl nsILineInputStream");
00200 
00201   nsCLineString line;
00202   rv = ReadLine(line);
00203   if (!line.EqualsLiteral("// <!-- <mdb:mork:z v=\"1.4\"/> -->")) {
00204     return NS_ERROR_FAILURE; // unexpected file format
00205   }
00206 
00207   IndexMap columnMap;
00208   NS_ENSURE_TRUE(columnMap.Init(), NS_ERROR_OUT_OF_MEMORY);
00209 
00210   while (NS_SUCCEEDED(ReadLine(line))) {
00211     // Trim off leading spaces
00212     PRUint32 idx = 0, len = line.Length();
00213     while (idx < len && line[idx] == ' ') {
00214       ++idx;
00215     }
00216     if (idx >= len) {
00217       continue;
00218     }
00219 
00220     const nsCSubstring &l = Substring(line, idx);
00221 
00222     // Look at the line to figure out what section type this is
00223     if (StringBeginsWith(l, NS_LITERAL_CSTRING("< <(a=c)>"))) {
00224       // Column map.  We begin by creating a hash of column id to column name.
00225       StringMap columnNameMap;
00226       NS_ENSURE_TRUE(columnNameMap.Init(), NS_ERROR_OUT_OF_MEMORY);
00227 
00228       rv = ParseMap(l, &columnNameMap);
00229       NS_ENSURE_SUCCESS(rv, rv);
00230 
00231       // Now that we have the list of columns, we put them into a flat array.
00232       // Rows will have value arrays of the same size, with indexes that
00233       // correspond to the columns array.  As we insert each column into the
00234       // array, we also make an entry in columnMap so that we can look up the
00235       // index given the column id.
00236       mColumns.SetCapacity(columnNameMap.Count());
00237 
00238       AddColumnClosure closure(&mColumns, &columnMap);
00239       columnNameMap.EnumerateRead(AddColumn, &closure);
00240       if (NS_FAILED(closure.result)) {
00241         return closure.result;
00242       }
00243     } else if (StringBeginsWith(l, NS_LITERAL_CSTRING("<("))) {
00244       // Value map
00245       rv = ParseMap(l, &mValueMap);
00246       NS_ENSURE_SUCCESS(rv, rv);
00247     } else if (l[0] == '{' || l[0] == '[') {
00248       // Table / table row
00249       rv = ParseTable(l, columnMap);
00250       NS_ENSURE_SUCCESS(rv, rv);
00251     } else {
00252       // Don't know, hopefully don't care
00253     }
00254   }
00255 
00256   return NS_OK;
00257 }
00258 
00259 void
00260 nsMorkReader::EnumerateRows(RowEnumerator aCallback, void *aUserData) const
00261 {
00262   // Constify the table values
00263   typedef const nsDataHashtable<IDKey, const nsTArray<nsCString>* > ConstTable;
00264   NS_REINTERPRET_CAST(ConstTable*, &mTable)->EnumerateRead(aCallback,
00265                                                            aUserData);
00266 }
00267 
00268 // Parses a key/value map of the form
00269 // <(k1=v1)(k2=v2)...>
00270 
00271 nsresult
00272 nsMorkReader::ParseMap(const nsCSubstring &aLine, StringMap *aMap)
00273 {
00274   nsCLineString line(aLine);
00275   nsCAutoString key;
00276   nsresult rv = NS_OK;
00277 
00278   // If the first line is the a=c line (column map), just skip over it.
00279   if (StringBeginsWith(line, NS_LITERAL_CSTRING("< <(a=c)>"))) {
00280     rv = ReadLine(line);
00281   }
00282 
00283   for (; NS_SUCCEEDED(rv); rv = ReadLine(line)) {
00284     PRUint32 idx = 0;
00285     PRUint32 len = line.Length();
00286     PRUint32 tokenStart;
00287 
00288     while (idx < len) {
00289       switch (line[idx++]) {
00290       case '(':
00291         // Beginning of a key/value pair
00292         if (!key.IsEmpty()) {
00293           NS_WARNING("unterminated key/value pair?");
00294           key.Truncate(0);
00295         }
00296 
00297         tokenStart = idx;
00298         while (idx < len && line[idx] != '=') {
00299           ++idx;
00300         }
00301         key = Substring(line, tokenStart, idx - tokenStart);
00302         break;
00303       case '=':
00304         {
00305           // Beginning of the value
00306           if (key.IsEmpty()) {
00307             NS_WARNING("stray value");
00308             break;
00309           }
00310 
00311           tokenStart = idx;
00312           while (idx < len && line[idx] != ')') {
00313             if (line[idx] == '\\') {
00314               ++idx; // skip escaped ')' characters
00315             }
00316             ++idx;
00317           }
00318           PRUint32 tokenEnd = PR_MIN(idx, len);
00319           ++idx;
00320 
00321           nsCString value;
00322           MorkUnescape(Substring(line, tokenStart, tokenEnd - tokenStart),
00323                        value);
00324           aMap->Put(key, value);
00325           key.Truncate(0);
00326           break;
00327         }
00328       case '>':
00329         // End of the map.
00330         NS_WARN_IF_FALSE(key.IsEmpty(),
00331                          "map terminates inside of key/value pair");
00332         return NS_OK;
00333       }
00334     }
00335   }
00336 
00337   // We ran out of lines and the map never terminated.  This probably indicates
00338   // a parsing error.
00339   NS_WARNING("didn't find end of key/value map");
00340   return NS_ERROR_FAILURE;
00341 }
00342 
00343 // Parses a table row of the form [123(^45^67)..]
00344 // (row id 123 has the value with id 67 for the column with id 45).
00345 // A '^' prefix for a column or value references an entry in the column or
00346 // value map.  '=' is used as the separator when the value is a literal.
00347 
00348 nsresult
00349 nsMorkReader::ParseTable(const nsCSubstring &aLine, const IndexMap &aColumnMap)
00350 {
00351   nsCLineString line(aLine);
00352   const PRUint32 columnCount = mColumns.Length(); // total number of columns
00353 
00354   PRInt32 columnIndex = -1; // column index of the cell we're parsing
00355   // value array for the row we're parsing
00356   nsTArray<nsCString> *currentRow = nsnull;
00357   PRBool inMetaRow = PR_FALSE;
00358 
00359   do {
00360     PRUint32 idx = 0;
00361     PRUint32 len = line.Length();
00362     PRUint32 tokenStart, tokenEnd;
00363 
00364     while (idx < len) {
00365       switch (line[idx++]) {
00366       case '{':
00367         // This marks the beginning of a table section.  There's a lot of
00368         // junk before the first row that looks like cell values but isn't.
00369         // Skip to the first '['.
00370         while (idx < len && line[idx] != '[') {
00371           if (line[idx] == '{') {
00372             inMetaRow = PR_TRUE; // the meta row is enclosed in { }
00373           } else if (line[idx] == '}') {
00374             inMetaRow = PR_FALSE;
00375           }
00376           ++idx;
00377         }
00378         break;
00379       case '[':
00380         {
00381           // Start of a new row.  Consume the row id, up to the first '('.
00382           // Row edits also have a table namespace, separated from the row id
00383           // by a colon.  We don't make use of the namespace, but we need to
00384           // make sure not to consider it part of the row id.
00385           if (currentRow) {
00386             NS_WARNING("unterminated row?");
00387             currentRow = nsnull;
00388           }
00389 
00390           // Check for a '-' at the start of the id.  This signifies that
00391           // if the row already exists, we should delete all columns from it
00392           // before adding the new values.
00393           PRBool cutColumns;
00394           if (idx < len && line[idx] == '-') {
00395             cutColumns = PR_TRUE;
00396             ++idx;
00397           } else {
00398             cutColumns = PR_FALSE;
00399           }
00400 
00401           tokenStart = idx;
00402           while (idx < len &&
00403                  line[idx] != '(' &&
00404                  line[idx] != ']' &&
00405                  line[idx] != ':') {
00406             ++idx;
00407           }
00408           tokenEnd = idx;
00409           while (idx < len && line[idx] != '(' && line[idx] != ']') {
00410             ++idx;
00411           }
00412           
00413           if (inMetaRow) {
00414             mMetaRow = NewVoidStringArray(columnCount);
00415             NS_ENSURE_TRUE(mMetaRow, NS_ERROR_OUT_OF_MEMORY);
00416             currentRow = mMetaRow;
00417           } else {
00418             const nsCSubstring& row = Substring(line, tokenStart,
00419                                                 tokenEnd - tokenStart);
00420             if (!mTable.Get(row, &currentRow)) {
00421               currentRow = NewVoidStringArray(columnCount);
00422               NS_ENSURE_TRUE(currentRow, NS_ERROR_OUT_OF_MEMORY);
00423 
00424               NS_ENSURE_TRUE(mTable.Put(row, currentRow),
00425                              NS_ERROR_OUT_OF_MEMORY);
00426             }
00427           }
00428           if (cutColumns) {
00429             // Set all of the columns to void
00430             // (this differentiates them from columns which are empty strings).
00431             for (PRUint32 i = 0; i < columnCount; ++i) {
00432               currentRow->ElementAt(i).SetIsVoid(PR_TRUE);
00433             }
00434           }
00435           break;
00436         }
00437       case ']':
00438         // We're done with the row
00439         currentRow = nsnull;
00440         inMetaRow = PR_FALSE;
00441         break;
00442       case '(':
00443         {
00444           if (!currentRow) {
00445             NS_WARNING("cell value outside of row");
00446             break;
00447           }
00448 
00449           NS_WARN_IF_FALSE(columnIndex == -1, "unterminated cell?");
00450 
00451           PRBool columnIsAtom;
00452           if (line[idx] == '^') {
00453             columnIsAtom = PR_TRUE;
00454             ++idx; // this is not part of the column id, advance past it
00455           } else {
00456             columnIsAtom = PR_FALSE;
00457           }
00458           tokenStart = idx;
00459           while (idx < len && line[idx] != '^' && line[idx] != '=') {
00460             if (line[idx] == '\\') {
00461               ++idx; // skip escaped characters
00462             }
00463             ++idx;
00464           }
00465 
00466           tokenEnd = PR_MIN(idx, len);
00467 
00468           nsCAutoString column;
00469           const nsCSubstring &colValue =
00470             Substring(line, tokenStart, tokenEnd - tokenStart);
00471           if (columnIsAtom) {
00472             column.Assign(colValue);
00473           } else {
00474             MorkUnescape(colValue, column);
00475           }
00476 
00477           if (!aColumnMap.Get(colValue, &columnIndex)) {
00478             NS_WARNING("Column not in column map, discarding it");
00479             columnIndex = -1;
00480           }
00481         }
00482         break;
00483       case '=':
00484       case '^':
00485         {
00486           if (columnIndex == -1) {
00487             NS_WARNING("stray ^ or = marker");
00488             break;
00489           }
00490 
00491           PRBool valueIsAtom = (line[idx - 1] == '^');
00492           tokenStart = idx - 1;  // include the '=' or '^' marker in the value
00493           while (idx < len && line[idx] != ')') {
00494             if (line[idx] == '\\') {
00495               ++idx; // skip escaped characters
00496             }
00497             ++idx;
00498           }
00499           tokenEnd = PR_MIN(idx, len);
00500           ++idx;
00501 
00502           const nsCSubstring &value =
00503             Substring(line, tokenStart, tokenEnd - tokenStart);
00504           if (valueIsAtom) {
00505             (*currentRow)[columnIndex] = value;
00506           } else {
00507             nsCAutoString value2;
00508             MorkUnescape(value, value2);
00509             (*currentRow)[columnIndex] = value2;
00510           }
00511           columnIndex = -1;
00512         }
00513         break;
00514       }
00515     }
00516   } while (currentRow && NS_SUCCEEDED(ReadLine(line)));
00517 
00518   return NS_OK;
00519 }
00520 
00521 nsresult
00522 nsMorkReader::ReadLine(nsCString &aLine)
00523 {
00524   PRBool res;
00525   nsresult rv = mStream->ReadLine(aLine, &res);
00526   NS_ENSURE_SUCCESS(rv, rv);
00527   if (!res) {
00528     return NS_ERROR_NOT_AVAILABLE;
00529   }
00530 
00531   while (!aLine.IsEmpty() &&  aLine.Last() == '\\') {
00532     // There is a continuation for this line.  Read it and append.
00533     nsCLineString line2;
00534     rv = mStream->ReadLine(line2, &res);
00535     NS_ENSURE_SUCCESS(rv, rv);
00536     if (!res) {
00537       return NS_ERROR_NOT_AVAILABLE;
00538     }
00539     aLine.Truncate(aLine.Length() - 1);
00540     aLine.Append(line2);
00541   }
00542 
00543   return NS_OK;
00544 }
00545 
00546 void
00547 nsMorkReader::NormalizeValue(nsCString &aValue) const
00548 {
00549   PRUint32 len = aValue.Length();
00550   if (len == 0) {
00551     return;
00552   }
00553   const nsCSubstring &str = Substring(aValue, 1, len - 1);
00554   char c = aValue[0];
00555   if (c == '^') {
00556     if (!mValueMap.Get(str, &aValue)) {
00557       aValue.Truncate(0);
00558     }
00559   } else if (c == '=') {
00560     aValue.Assign(str);
00561   } else {
00562     aValue.Truncate(0);
00563   }
00564 }
00565 
00566 /* static */ nsTArray<nsCString>*
00567 nsMorkReader::NewVoidStringArray(PRInt32 aCount)
00568 {
00569   nsAutoPtr< nsTArray<nsCString> > array(new nsTArray<nsCString>(aCount));
00570   NS_ENSURE_TRUE(array, nsnull);
00571 
00572   for (PRInt32 i = 0; i < aCount; ++i) {
00573     nsCString *elem = array->AppendElement();
00574     NS_ENSURE_TRUE(elem, nsnull);
00575     elem->SetIsVoid(PR_TRUE);
00576   }
00577 
00578   return array.forget();
00579 }