Back to index

lightning-sunbird  0.9+nobinonly
nsDirIndexParser.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is the Mozilla Communicator client code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998-2001
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Chris Waterson           <waterson@netscape.com>
00024  *   Robert John Churchill    <rjc@netscape.com>
00025  *   Pierre Phaneuf           <pp@ludusdesign.com>
00026  *   Bradley Baetz            <bbaetz@cs.mcgill.ca>
00027  *
00028  * Alternatively, the contents of this file may be used under the terms of
00029  * either the GNU General Public License Version 2 or later (the "GPL"), or
00030  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00031  * in which case the provisions of the GPL or the LGPL are applicable instead
00032  * of those above. If you wish to allow use of your version of this file only
00033  * under the terms of either the GPL or the LGPL, and not to allow others to
00034  * use your version of this file under the terms of the MPL, indicate your
00035  * decision by deleting the provisions above and replace them with the notice
00036  * and other provisions required by the GPL or the LGPL. If you do not delete
00037  * the provisions above, a recipient may use your version of this file under
00038  * the terms of any one of the MPL, the GPL or the LGPL.
00039  *
00040  * ***** END LICENSE BLOCK ***** */
00041 
00042 /* This parsing code originally lived in xpfe/components/directory/ - bbaetz */
00043 
00044 #include "prprf.h"
00045 
00046 #include "nsDirIndexParser.h"
00047 #include "nsReadableUtils.h"
00048 #include "nsDirIndex.h"
00049 #include "nsEscape.h"
00050 #include "nsIServiceManager.h"
00051 #include "nsIInputStream.h"
00052 #include "nsIChannel.h"
00053 #include "nsIURI.h"
00054 #include "nsCRT.h"
00055 #include "nsIPrefService.h"
00056 #include "nsIPrefBranch.h"
00057 #include "nsIPrefLocalizedString.h"
00058 
00059 NS_IMPL_THREADSAFE_ISUPPORTS3(nsDirIndexParser,
00060                               nsIRequestObserver,
00061                               nsIStreamListener,
00062                               nsIDirIndexParser)
00063 
00064 nsDirIndexParser::nsDirIndexParser() {
00065 }
00066 
00067 nsresult
00068 nsDirIndexParser::Init() {
00069   mLineStart = 0;
00070   mHasDescription = PR_FALSE;
00071   mFormat = nsnull;
00072 
00073   // get default charset to be used for directory listings (fallback to
00074   // ISO-8859-1 if pref is unavailable).
00075   NS_NAMED_LITERAL_CSTRING(kFallbackEncoding, "ISO-8859-1");
00076   nsXPIDLString defCharset;
00077   nsCOMPtr<nsIPrefBranch> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
00078   if (prefs) {
00079     nsCOMPtr<nsIPrefLocalizedString> prefVal;
00080     prefs->GetComplexValue("intl.charset.default",
00081                            NS_GET_IID(nsIPrefLocalizedString),
00082                            getter_AddRefs(prefVal));
00083     if (prefVal)
00084       prefVal->ToString(getter_Copies(defCharset));
00085   }
00086   if (!defCharset.IsEmpty())
00087     LossyCopyUTF16toASCII(defCharset, mEncoding); // charset labels are always ASCII
00088   else
00089     mEncoding.Assign(kFallbackEncoding);
00090  
00091   nsresult rv;
00092   // XXX not threadsafe
00093   if (gRefCntParser++ == 0)
00094     rv = CallGetService(NS_ITEXTTOSUBURI_CONTRACTID, &gTextToSubURI);
00095   else
00096     rv = NS_OK;
00097 
00098   return rv;
00099 }
00100 
00101 nsDirIndexParser::~nsDirIndexParser() {
00102   delete[] mFormat;
00103   // XXX not threadsafe
00104   if (--gRefCntParser == 0) {
00105     NS_IF_RELEASE(gTextToSubURI);
00106   }
00107 }
00108 
00109 NS_IMETHODIMP
00110 nsDirIndexParser::SetListener(nsIDirIndexListener* aListener) {
00111   mListener = aListener;
00112   return NS_OK;
00113 }
00114 
00115 NS_IMETHODIMP
00116 nsDirIndexParser::GetListener(nsIDirIndexListener** aListener) {
00117   NS_IF_ADDREF(*aListener = mListener.get());
00118   return NS_OK;
00119 }
00120 
00121 NS_IMETHODIMP
00122 nsDirIndexParser::GetComment(char** aComment) {
00123   *aComment = ToNewCString(mComment);
00124 
00125   if (!*aComment)
00126     return NS_ERROR_OUT_OF_MEMORY;
00127   
00128   return NS_OK;
00129 }
00130 
00131 NS_IMETHODIMP
00132 nsDirIndexParser::SetEncoding(const char* aEncoding) {
00133   mEncoding.Assign(aEncoding);
00134   return NS_OK;
00135 }
00136 
00137 NS_IMETHODIMP
00138 nsDirIndexParser::GetEncoding(char** aEncoding) {
00139   *aEncoding = ToNewCString(mEncoding);
00140 
00141   if (!*aEncoding)
00142     return NS_ERROR_OUT_OF_MEMORY;
00143 
00144   return NS_OK;
00145 }
00146 
00147 NS_IMETHODIMP
00148 nsDirIndexParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aCtxt) {
00149   return NS_OK;
00150 }
00151 
00152 NS_IMETHODIMP
00153 nsDirIndexParser::OnStopRequest(nsIRequest *aRequest, nsISupports *aCtxt,
00154                                 nsresult aStatusCode) {
00155   // Finish up
00156   if (mBuf.Length() > (PRUint32) mLineStart) {
00157     ProcessData(aRequest, aCtxt);
00158   }
00159 
00160   return NS_OK;
00161 }
00162 
00163 nsDirIndexParser::Field
00164 nsDirIndexParser::gFieldTable[] = {
00165   { "Filename", FIELD_FILENAME },
00166   { "Description", FIELD_DESCRIPTION },
00167   { "Content-Length", FIELD_CONTENTLENGTH },
00168   { "Last-Modified", FIELD_LASTMODIFIED },
00169   { "Content-Type", FIELD_CONTENTTYPE },
00170   { "File-Type", FIELD_FILETYPE },
00171   { nsnull, FIELD_UNKNOWN }
00172 };
00173 
00174 nsrefcnt nsDirIndexParser::gRefCntParser = 0;
00175 nsITextToSubURI *nsDirIndexParser::gTextToSubURI;
00176 
00177 nsresult
00178 nsDirIndexParser::ParseFormat(const char* aFormatStr) {
00179   // Parse a "200" format line, and remember the fields and their
00180   // ordering in mFormat. Multiple 200 lines stomp on each other.
00181 
00182   delete[] mFormat;
00183 
00184   // Lets find out how many elements we have.
00185   // easier to do this then realloc
00186   const char* pos = aFormatStr;
00187   int num = 0;
00188   do {
00189     while (*pos && nsCRT::IsAsciiSpace(PRUnichar(*pos)))
00190       ++pos;
00191     
00192     ++num;
00193 
00194     if (! *pos)
00195       break;
00196 
00197     while (*pos && !nsCRT::IsAsciiSpace(PRUnichar(*pos)))
00198       ++pos;
00199 
00200   } while (*pos);
00201 
00202   mFormat = new int[num+1];
00203   mFormat[num] = -1;
00204   
00205   int formatNum=0;
00206   do {
00207     while (*aFormatStr && nsCRT::IsAsciiSpace(PRUnichar(*aFormatStr)))
00208       ++aFormatStr;
00209     
00210     if (! *aFormatStr)
00211       break;
00212 
00213     nsCAutoString name;
00214     PRInt32     len = 0;
00215     while (aFormatStr[len] && !nsCRT::IsAsciiSpace(PRUnichar(aFormatStr[len])))
00216       ++len;
00217     name.SetCapacity(len + 1);
00218     name.Append(aFormatStr, len);
00219     aFormatStr += len;
00220     
00221     // Okay, we're gonna monkey with the nsStr. Bold!
00222     name.SetLength(nsUnescapeCount(name.BeginWriting()));
00223 
00224     // All tokens are case-insensitive - http://www.mozilla.org/projects/netlib/dirindexformat.html
00225     if (name.LowerCaseEqualsLiteral("description"))
00226       mHasDescription = PR_TRUE;
00227     
00228     for (Field* i = gFieldTable; i->mName; ++i) {
00229       if (name.EqualsIgnoreCase(i->mName)) {
00230         mFormat[formatNum] = i->mType;
00231         ++formatNum;
00232         break;
00233       }
00234     }
00235 
00236   } while (*aFormatStr);
00237   
00238   return NS_OK;
00239 }
00240 
00241 nsresult
00242 nsDirIndexParser::ParseData(nsIDirIndex *aIdx, char* aDataStr) {
00243   // Parse a "201" data line, using the field ordering specified in
00244   // mFormat.
00245 
00246   if (!mFormat) {
00247     // Ignore if we haven't seen a format yet.
00248     return NS_OK;
00249   }
00250 
00251   nsresult rv = NS_OK;
00252 
00253   nsCAutoString filename;
00254 
00255   for (PRInt32 i = 0; mFormat[i] != -1; ++i) {
00256     // If we've exhausted the data before we run out of fields, just
00257     // bail.
00258     if (! *aDataStr)
00259       break;
00260 
00261     while (*aDataStr && nsCRT::IsAsciiSpace(*aDataStr))
00262       ++aDataStr;
00263 
00264     char    *value = aDataStr;
00265 
00266     if (*aDataStr == '"' || *aDataStr == '\'') {
00267       // it's a quoted string. snarf everything up to the next quote character
00268       const char quotechar = *(aDataStr++);
00269       ++value;
00270       while (*aDataStr && *aDataStr != quotechar)
00271         ++aDataStr;
00272       *aDataStr++ = '\0';
00273 
00274       if (! aDataStr) {
00275         NS_WARNING("quoted value not terminated");
00276       }
00277     } else {
00278       // it's unquoted. snarf until we see whitespace.
00279       value = aDataStr;
00280       while (*aDataStr && (!nsCRT::IsAsciiSpace(*aDataStr)))
00281         ++aDataStr;
00282       *aDataStr++ = '\0';
00283     }
00284 
00285     fieldType t = fieldType(mFormat[i]);
00286     switch (t) {
00287     case FIELD_FILENAME: {
00288       // don't unescape at this point, so that UnEscapeAndConvert() can
00289       filename = value;
00290       
00291       PRBool  success = PR_FALSE;
00292       
00293       nsAutoString entryuri;
00294       
00295       if (gTextToSubURI) {
00296         PRUnichar   *result = nsnull;
00297         if (NS_SUCCEEDED(rv = gTextToSubURI->UnEscapeAndConvert(mEncoding.get(), filename.get(),
00298                                                                 &result)) && (result)) {
00299           if (*result) {
00300             aIdx->SetLocation(filename.get());
00301             if (!mHasDescription)
00302               aIdx->SetDescription(result);
00303             success = PR_TRUE;
00304           }
00305           Recycle(result);
00306         } else {
00307           NS_WARNING("UnEscapeAndConvert error");
00308         }
00309       }
00310       
00311       if (success == PR_FALSE) {
00312         // if unsuccessfully at charset conversion, then
00313         // just fallback to unescape'ing in-place
00314         // XXX - this shouldn't be using UTF8, should it?
00315         // when can we fail to get the service, anyway? - bbaetz
00316         aIdx->SetLocation(filename.get());
00317         if (!mHasDescription) {
00318           aIdx->SetDescription(NS_ConvertUTF8toUCS2(value).get());
00319         }
00320       }
00321     }
00322       break;
00323     case FIELD_DESCRIPTION:
00324       nsUnescape(value);
00325       aIdx->SetDescription(NS_ConvertUTF8toUCS2(value).get());
00326       break;
00327     case FIELD_CONTENTLENGTH:
00328       {
00329         PRInt64 len;
00330         PRInt32 status = PR_sscanf(value, "%lld", &len);
00331         if (status == 1)
00332           aIdx->SetSize(len);
00333         else
00334           aIdx->SetSize(LL_MAXUINT); // LL_MAXUINT means unknown
00335       }
00336       break;
00337     case FIELD_LASTMODIFIED:
00338       {
00339         PRTime tm;
00340         nsUnescape(value);
00341         if (PR_ParseTimeString(value, PR_FALSE, &tm) == PR_SUCCESS) {
00342           aIdx->SetLastModified(tm);
00343         }
00344       }
00345       break;
00346     case FIELD_CONTENTTYPE:
00347       aIdx->SetContentType(value);
00348       break;
00349     case FIELD_FILETYPE:
00350       // unescape in-place
00351       nsUnescape(value);
00352       if (!nsCRT::strcasecmp(value, "directory")) {
00353         aIdx->SetType(nsIDirIndex::TYPE_DIRECTORY);
00354       } else if (!nsCRT::strcasecmp(value, "file")) {
00355         aIdx->SetType(nsIDirIndex::TYPE_FILE);
00356       } else if (!nsCRT::strcasecmp(value, "symbolic-link")) {
00357         aIdx->SetType(nsIDirIndex::TYPE_SYMLINK);
00358       } else {
00359         aIdx->SetType(nsIDirIndex::TYPE_UNKNOWN);
00360       }
00361       break;
00362     case FIELD_UNKNOWN:
00363       // ignore
00364       break;
00365     }
00366   }
00367 
00368   return NS_OK;
00369 }
00370 
00371 NS_IMETHODIMP
00372 nsDirIndexParser::OnDataAvailable(nsIRequest *aRequest, nsISupports *aCtxt,
00373                                   nsIInputStream *aStream,
00374                                   PRUint32 aSourceOffset,
00375                                   PRUint32 aCount) {
00376   if (aCount < 1)
00377     return NS_OK;
00378   
00379   PRInt32 len = mBuf.Length();
00380   
00381   // Ensure that our mBuf has capacity to hold the data we're about to
00382   // read.
00383   if (!EnsureStringLength(mBuf, len + aCount))
00384     return NS_ERROR_OUT_OF_MEMORY;
00385 
00386   // Now read the data into our buffer.
00387   nsresult rv;
00388   PRUint32 count;
00389   rv = aStream->Read(mBuf.BeginWriting() + len, aCount, &count);
00390   if (NS_FAILED(rv)) return rv;
00391 
00392   // Set the string's length according to the amount of data we've read.
00393   // Note: we know this to work on nsCString. This isn't guaranteed to
00394   //       work on other strings.
00395   mBuf.SetLength(len + count);
00396 
00397   return ProcessData(aRequest, aCtxt);
00398 }
00399 
00400 nsresult
00401 nsDirIndexParser::ProcessData(nsIRequest *aRequest, nsISupports *aCtxt) {
00402   if (!mListener)
00403     return NS_ERROR_FAILURE;
00404   
00405   PRInt32     numItems = 0;
00406   
00407   while(PR_TRUE) {
00408     ++numItems;
00409     
00410     PRInt32             eol = mBuf.FindCharInSet("\n\r", mLineStart);
00411     if (eol < 0)        break;
00412     mBuf.SetCharAt(PRUnichar('\0'), eol);
00413     
00414     const char  *line = mBuf.get() + mLineStart;
00415     
00416     PRInt32 lineLen = eol - mLineStart;
00417     mLineStart = eol + 1;
00418     
00419     if (lineLen >= 4) {
00420       nsresult  rv;
00421       const char        *buf = line;
00422       
00423       if (buf[0] == '1') {
00424         if (buf[1] == '0') {
00425           if (buf[2] == '0' && buf[3] == ':') {
00426             // 100. Human-readable comment line. Ignore
00427           } else if (buf[2] == '1' && buf[3] == ':') {
00428             // 101. Human-readable information line.
00429             mComment.Append(buf + 4);
00430 
00431             char    *value = ((char *)buf) + 4;
00432             nsUnescape(value);
00433             mListener->OnInformationAvailable(aRequest, aCtxt, NS_ConvertUTF8toUTF16(value));
00434 
00435           } else if (buf[2] == '2' && buf[3] == ':') {
00436             // 102. Human-readable information line, HTML.
00437             mComment.Append(buf + 4);
00438           }
00439         }
00440       } else if (buf[0] == '2') {
00441         if (buf[1] == '0') {
00442           if (buf[2] == '0' && buf[3] == ':') {
00443             // 200. Define field names
00444             rv = ParseFormat(buf + 4);
00445             if (NS_FAILED(rv)) {
00446               return rv;
00447             }
00448           } else if (buf[2] == '1' && buf[3] == ':') {
00449             // 201. Field data
00450             nsCOMPtr<nsIDirIndex> idx = do_CreateInstance("@mozilla.org/dirIndex;1",&rv);
00451             if (NS_FAILED(rv))
00452               return rv;
00453             
00454             rv = ParseData(idx, ((char *)buf) + 4);
00455             if (NS_FAILED(rv)) {
00456               return rv;
00457             }
00458 
00459             mListener->OnIndexAvailable(aRequest, aCtxt, idx);
00460           }
00461         }
00462       } else if (buf[0] == '3') {
00463         if (buf[1] == '0') {
00464           if (buf[2] == '0' && buf[3] == ':') {
00465             // 300. Self-referring URL
00466           } else if (buf[2] == '1' && buf[3] == ':') {
00467             // 301. OUR EXTENSION - encoding
00468             int i = 4;
00469             while (buf[i] && nsCRT::IsAsciiSpace(buf[i]))
00470               ++i;
00471             
00472             if (buf[i])
00473               SetEncoding(buf+i);
00474           }
00475         }
00476       }
00477     }
00478   }
00479   
00480   return NS_OK;
00481 }