Back to index

lightning-sunbird  0.9+nobinonly
nsURLHelper.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
00002 /* vim:set ts=4 sw=4 sts=4 et cindent: */
00003 /* ***** BEGIN LICENSE BLOCK *****
00004  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00005  *
00006  * The contents of this file are subject to the Mozilla Public License Version
00007  * 1.1 (the "License"); you may not use this file except in compliance with
00008  * the License. You may obtain a copy of the License at
00009  * http://www.mozilla.org/MPL/
00010  *
00011  * Software distributed under the License is distributed on an "AS IS" basis,
00012  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00013  * for the specific language governing rights and limitations under the
00014  * License.
00015  *
00016  * The Original Code is mozilla.org code.
00017  *
00018  * The Initial Developer of the Original Code is
00019  * Andreas Otte.
00020  * Portions created by the Initial Developer are Copyright (C) 2000
00021  * the Initial Developer. All Rights Reserved.
00022  *
00023  * Contributor(s):
00024  *   Darin Fisher <darin@netscape.com>
00025  *
00026  * Alternatively, the contents of this file may be used under the terms of
00027  * either the GNU General Public License Version 2 or later (the "GPL"), or
00028  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00029  * in which case the provisions of the GPL or the LGPL are applicable instead
00030  * of those above. If you wish to allow use of your version of this file only
00031  * under the terms of either the GPL or the LGPL, and not to allow others to
00032  * use your version of this file under the terms of the MPL, indicate your
00033  * decision by deleting the provisions above and replace them with the notice
00034  * and other provisions required by the GPL or the LGPL. If you do not delete
00035  * the provisions above, a recipient may use your version of this file under
00036  * the terms of any one of the MPL, the GPL or the LGPL.
00037  *
00038  * ***** END LICENSE BLOCK ***** */
00039 
00040 #include "nsURLHelper.h"
00041 #include "nsReadableUtils.h"
00042 #include "nsIServiceManager.h"
00043 #include "nsIIOService.h"
00044 #include "nsIURLParser.h"
00045 #include "nsIURI.h"
00046 #include "nsMemory.h"
00047 #include "nsEscape.h"
00048 #include "nsCOMPtr.h"
00049 #include "nsCRT.h"
00050 #include "nsNetCID.h"
00051 #include "netCore.h"
00052 #include "prprf.h"
00053 
00054 //----------------------------------------------------------------------------
00055 // Init/Shutdown
00056 //----------------------------------------------------------------------------
00057 
00058 static PRBool gInitialized = PR_FALSE;
00059 static nsIURLParser *gNoAuthURLParser = nsnull;
00060 static nsIURLParser *gAuthURLParser = nsnull;
00061 static nsIURLParser *gStdURLParser = nsnull;
00062 
00063 static void
00064 InitGlobals()
00065 {
00066     nsCOMPtr<nsIURLParser> parser;
00067 
00068     parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
00069     NS_ASSERTION(parser, "failed getting 'noauth' url parser");
00070     if (parser) {
00071         gNoAuthURLParser = parser.get();
00072         NS_ADDREF(gNoAuthURLParser);
00073     }
00074 
00075     parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
00076     NS_ASSERTION(parser, "failed getting 'auth' url parser");
00077     if (parser) {
00078         gAuthURLParser = parser.get();
00079         NS_ADDREF(gAuthURLParser);
00080     }
00081 
00082     parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
00083     NS_ASSERTION(parser, "failed getting 'std' url parser");
00084     if (parser) {
00085         gStdURLParser = parser.get();
00086         NS_ADDREF(gStdURLParser);
00087     }
00088 
00089     gInitialized = PR_TRUE;
00090 }
00091 
00092 void
00093 net_ShutdownURLHelper()
00094 {
00095     if (gInitialized) {
00096         NS_IF_RELEASE(gNoAuthURLParser);
00097         NS_IF_RELEASE(gAuthURLParser);
00098         NS_IF_RELEASE(gStdURLParser);
00099         gInitialized = PR_FALSE;
00100     }
00101 }
00102 
00103 //----------------------------------------------------------------------------
00104 // nsIURLParser getters
00105 //----------------------------------------------------------------------------
00106 
00107 nsIURLParser *
00108 net_GetAuthURLParser()
00109 {
00110     if (!gInitialized)
00111         InitGlobals();
00112     return gAuthURLParser;
00113 }
00114 
00115 nsIURLParser *
00116 net_GetNoAuthURLParser()
00117 {
00118     if (!gInitialized)
00119         InitGlobals();
00120     return gNoAuthURLParser;
00121 }
00122 
00123 nsIURLParser *
00124 net_GetStdURLParser()
00125 {
00126     if (!gInitialized)
00127         InitGlobals();
00128     return gStdURLParser;
00129 }
00130 
00131 //----------------------------------------------------------------------------
00132 // file:// URL parsing
00133 //----------------------------------------------------------------------------
00134 
00135 nsresult
00136 net_ParseFileURL(const nsACString &inURL,
00137                  nsACString &outDirectory,
00138                  nsACString &outFileBaseName,
00139                  nsACString &outFileExtension)
00140 {
00141     nsresult rv;
00142 
00143     outDirectory.Truncate();
00144     outFileBaseName.Truncate();
00145     outFileExtension.Truncate();
00146 
00147     const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
00148     const char *url = flatURL.get();
00149     
00150     PRUint32 schemeBeg, schemeEnd;
00151     rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nsnull);
00152     if (NS_FAILED(rv)) return rv;
00153 
00154     if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
00155         NS_ERROR("must be a file:// url");
00156         return NS_ERROR_UNEXPECTED;
00157     }
00158 
00159     nsIURLParser *parser = net_GetNoAuthURLParser();
00160     NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
00161 
00162     PRUint32 pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
00163     PRInt32 pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
00164 
00165     // invoke the parser to extract the URL path
00166     rv = parser->ParseURL(url, flatURL.Length(),
00167                           nsnull, nsnull, // dont care about scheme
00168                           nsnull, nsnull, // dont care about authority
00169                           &pathPos, &pathLen);
00170     if (NS_FAILED(rv)) return rv;
00171 
00172     // invoke the parser to extract filepath from the path
00173     rv = parser->ParsePath(url + pathPos, pathLen,
00174                            &filepathPos, &filepathLen,
00175                            nsnull, nsnull,  // dont care about param
00176                            nsnull, nsnull,  // dont care about query
00177                            nsnull, nsnull); // dont care about ref
00178     if (NS_FAILED(rv)) return rv;
00179 
00180     filepathPos += pathPos;
00181 
00182     // invoke the parser to extract the directory and filename from filepath
00183     rv = parser->ParseFilePath(url + filepathPos, filepathLen,
00184                                &directoryPos, &directoryLen,
00185                                &basenamePos, &basenameLen,
00186                                &extensionPos, &extensionLen);
00187     if (NS_FAILED(rv)) return rv;
00188 
00189     if (directoryLen > 0)
00190         outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
00191     if (basenameLen > 0)
00192         outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
00193     if (extensionLen > 0)
00194         outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen);
00195     // since we are using a no-auth url parser, there will never be a host
00196     // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
00197 
00198     return NS_OK;
00199 }
00200 
00201 //----------------------------------------------------------------------------
00202 // path manipulation functions
00203 //----------------------------------------------------------------------------
00204 
00205 // Replace all /./ with a / while resolving URLs
00206 // But only till #? 
00207 void 
00208 net_CoalesceDirs(netCoalesceFlags flags, char* path)
00209 {
00210     /* Stolen from the old netlib's mkparse.c.
00211      *
00212      * modifies a url of the form   /foo/../foo1  ->  /foo1
00213      *                       and    /foo/./foo1   ->  /foo/foo1
00214      *                       and    /foo/foo1/..  ->  /foo/
00215      */
00216     char *fwdPtr = path;
00217     char *urlPtr = path;
00218     char *lastslash = path;
00219     PRUint32 traversal = 0;
00220     PRUint32 special_ftp_len = 0;
00221 
00222     /* Remember if this url is a special ftp one: */
00223     if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT) 
00224     {
00225        /* some schemes (for example ftp) have the speciality that 
00226           the path can begin // or /%2F to mark the root of the 
00227           servers filesystem, a simple / only marks the root relative 
00228           to the user loging in. We remember the length of the marker */
00229         if (nsCRT::strncasecmp(path,"/%2F",4) == 0)
00230             special_ftp_len = 4;
00231         else if (nsCRT::strncmp(path,"//",2) == 0 )
00232             special_ftp_len = 2; 
00233     }
00234 
00235     /* find the last slash before # or ? */
00236     for(; (*fwdPtr != '\0') && 
00237             (*fwdPtr != '?') && 
00238             (*fwdPtr != '#'); ++fwdPtr)
00239     {
00240     }
00241 
00242     /* found nothing, but go back one only */
00243     /* if there is something to go back to */
00244     if (fwdPtr != path && *fwdPtr == '\0')
00245     {
00246         --fwdPtr;
00247     }
00248 
00249     /* search the slash */
00250     for(; (fwdPtr != path) && 
00251             (*fwdPtr != '/'); --fwdPtr)
00252     {
00253     }
00254     lastslash = fwdPtr;
00255     fwdPtr = path;
00256 
00257     /* replace all %2E or %2e with . in the path */
00258     /* but stop at lastchar if non null */
00259     for(; (*fwdPtr != '\0') && 
00260             (*fwdPtr != '?') && 
00261             (*fwdPtr != '#') &&
00262             (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr)
00263     {
00264         if (*fwdPtr == '%' && *(fwdPtr+1) == '2' && 
00265             (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e'))
00266         {
00267             *urlPtr++ = '.';
00268             ++fwdPtr;
00269             ++fwdPtr;
00270         } 
00271         else 
00272         {
00273             *urlPtr++ = *fwdPtr;
00274         }
00275     }
00276     // Copy remaining stuff past the #?;
00277     for (; *fwdPtr != '\0'; ++fwdPtr)
00278     {
00279         *urlPtr++ = *fwdPtr;
00280     }
00281     *urlPtr = '\0';  // terminate the url 
00282 
00283     // start again, this time for real 
00284     fwdPtr = path;
00285     urlPtr = path;
00286 
00287     for(; (*fwdPtr != '\0') && 
00288             (*fwdPtr != '?') && 
00289             (*fwdPtr != '#'); ++fwdPtr)
00290     {
00291         if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' )
00292         {
00293             // remove . followed by slash
00294             ++fwdPtr;
00295         }
00296         else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' && 
00297                 (*(fwdPtr+3) == '/' || 
00298                     *(fwdPtr+3) == '\0' || // This will take care of 
00299                     *(fwdPtr+3) == '?' ||  // something like foo/bar/..#sometag
00300                     *(fwdPtr+3) == '#'))
00301         {
00302             // remove foo/.. 
00303             // reverse the urlPtr to the previous slash if possible
00304             // if url does not allow relative root then drop .. above root 
00305             // otherwise retain them in the path 
00306             if(traversal > 0 || !(flags & 
00307                                   NET_COALESCE_ALLOW_RELATIVE_ROOT))
00308             { 
00309                 if (urlPtr != path)
00310                     urlPtr--; // we must be going back at least by one 
00311                 for(;*urlPtr != '/' && urlPtr != path; urlPtr--)
00312                     ;  // null body 
00313                 --traversal; // count back
00314                 // forward the fwdPtr past the ../
00315                 fwdPtr += 2;
00316                 // if we have reached the beginning of the path
00317                 // while searching for the previous / and we remember
00318                 // that it is an url that begins with /%2F then
00319                 // advance urlPtr again by 3 chars because /%2F already 
00320                 // marks the root of the path
00321                 if (urlPtr == path && special_ftp_len > 3) 
00322                 {
00323                     ++urlPtr;
00324                     ++urlPtr;
00325                     ++urlPtr;
00326                 }
00327                 // special case if we have reached the end 
00328                 // to preserve the last /
00329                 if (*fwdPtr == '.' && *(fwdPtr+1) == '\0')
00330                     ++urlPtr;
00331             } 
00332             else 
00333             {
00334                 // there are to much /.. in this path, just copy them instead.
00335                 // forward the urlPtr past the /.. and copying it
00336 
00337                 // However if we remember it is an url that starts with
00338                 // /%2F and urlPtr just points at the "F" of "/%2F" then do 
00339                 // not overwrite it with the /, just copy .. and move forward
00340                 // urlPtr. 
00341                 if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1)
00342                     ++urlPtr;
00343                 else 
00344                     *urlPtr++ = *fwdPtr;
00345                 ++fwdPtr;
00346                 *urlPtr++ = *fwdPtr;
00347                 ++fwdPtr;
00348                 *urlPtr++ = *fwdPtr;
00349             }
00350         }
00351         else
00352         {
00353             // count the hierachie, but only if we do not have reached
00354             // the root of some special urls with a special root marker 
00355             if (*fwdPtr == '/' &&  *(fwdPtr+1) != '.' &&
00356                (special_ftp_len != 2 || *(fwdPtr+1) != '/'))
00357                 traversal++;
00358             // copy the url incrementaly 
00359             *urlPtr++ = *fwdPtr;
00360         }
00361     }
00362     // Copy remaining stuff past the #?;
00363     for (; *fwdPtr != '\0'; ++fwdPtr)
00364     {
00365         *urlPtr++ = *fwdPtr;
00366     }
00367     *urlPtr = '\0';  // terminate the url 
00368 
00369     /* 
00370      *  Now lets remove trailing . case
00371      *     /foo/foo1/.   ->  /foo/foo1/
00372      */
00373 
00374     if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/'))
00375         *(urlPtr-1) = '\0';
00376 }
00377 
00378 nsresult
00379 net_ResolveRelativePath(const nsACString &relativePath,
00380                         const nsACString &basePath,
00381                         nsACString &result)
00382 {
00383     nsCAutoString name;
00384     nsCAutoString path(basePath);
00385     PRBool needsDelim = PR_FALSE;
00386 
00387     if ( !path.IsEmpty() ) {
00388         PRUnichar last = path.Last();
00389         needsDelim = !(last == '/');
00390     }
00391 
00392     nsACString::const_iterator beg, end;
00393     relativePath.BeginReading(beg);
00394     relativePath.EndReading(end);
00395 
00396     PRBool stop = PR_FALSE;
00397     char c;
00398     for (; !stop; ++beg) {
00399         c = (beg == end) ? '\0' : *beg;
00400         //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get());
00401         switch (c) {
00402           case '\0':
00403           case '#':
00404           case ';':
00405           case '?':
00406             stop = PR_TRUE;
00407             // fall through...
00408           case '/':
00409             // delimiter found
00410             if (name.EqualsLiteral("..")) {
00411                 // pop path
00412                 // If we already have the delim at end, then
00413                 //  skip over that when searching for next one to the left
00414                 PRInt32 offset = path.Length() - (needsDelim ? 1 : 2);
00415                 // First check for errors
00416                 if (offset < 0 ) 
00417                     return NS_ERROR_MALFORMED_URI;
00418                 PRInt32 pos = path.RFind("/", PR_FALSE, offset);
00419                 if (pos >= 0)
00420                     path.Truncate(pos + 1);
00421                 else
00422                     path.Truncate();
00423             }
00424             else if (name.IsEmpty() || name.EqualsLiteral(".")) {
00425                 // do nothing
00426             }
00427             else {
00428                 // append name to path
00429                 if (needsDelim)
00430                     path += '/';
00431                 path += name;
00432                 needsDelim = PR_TRUE;
00433             }
00434             name.Truncate();
00435             break;
00436 
00437           default:
00438             // append char to name
00439             name += c;
00440         }
00441     }
00442     // append anything left on relativePath (e.g. #..., ;..., ?...)
00443     if (c != '\0')
00444         path += Substring(--beg, end);
00445 
00446     result = path;
00447     return NS_OK;
00448 }
00449 
00450 //----------------------------------------------------------------------------
00451 // scheme fu
00452 //----------------------------------------------------------------------------
00453 
00454 /* Extract URI-Scheme if possible */
00455 nsresult
00456 net_ExtractURLScheme(const nsACString &inURI,
00457                      PRUint32 *startPos, 
00458                      PRUint32 *endPos,
00459                      nsACString *scheme)
00460 {
00461     // search for something up to a colon, and call it the scheme
00462     const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
00463     const char* uri_start = flatURI.get();
00464     const char* uri = uri_start;
00465 
00466     if (!uri)
00467         return NS_ERROR_MALFORMED_URI;
00468 
00469     // skip leading white space
00470     while (nsCRT::IsAsciiSpace(*uri))
00471         uri++;
00472 
00473     PRUint32 start = uri - uri_start;
00474     if (startPos) {
00475         *startPos = start;
00476     }
00477 
00478     PRUint32 length = 0;
00479     char c;
00480     while ((c = *uri++) != '\0') {
00481         // First char must be Alpha
00482         if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
00483             length++;
00484         } 
00485         // Next chars can be alpha + digit + some special chars
00486         else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || 
00487                  nsCRT::IsAsciiDigit(c) || c == '+' || 
00488                  c == '.' || c == '-')) {
00489             length++;
00490         }
00491         // stop if colon reached but not as first char
00492         else if (c == ':' && length > 0) {
00493             if (endPos) {
00494                 *endPos = start + length;
00495             }
00496 
00497             if (scheme)
00498                 scheme->Assign(Substring(inURI, start, length));
00499             return NS_OK;
00500         }
00501         else 
00502             break;
00503     }
00504     return NS_ERROR_MALFORMED_URI;
00505 }
00506 
00507 PRBool
00508 net_IsValidScheme(const char *scheme, PRUint32 schemeLen)
00509 {
00510     // first char must be alpha
00511     if (!nsCRT::IsAsciiAlpha(*scheme))
00512         return PR_FALSE;
00513 
00514     // nsCStrings may have embedded nulls -- reject those too
00515     for (; schemeLen; ++scheme, --schemeLen) {
00516         if (!(nsCRT::IsAsciiAlpha(*scheme) ||
00517               nsCRT::IsAsciiDigit(*scheme) ||
00518               *scheme == '+' ||
00519               *scheme == '.' ||
00520               *scheme == '-'))
00521             return PR_FALSE;
00522     }
00523 
00524     return PR_TRUE;
00525 }
00526 
00527 PRBool
00528 net_FilterURIString(const char *str, nsACString& result)
00529 {
00530     NS_PRECONDITION(str, "Must have a non-null string!");
00531     PRBool writing = PR_FALSE;
00532     result.Truncate();
00533     const char *p = str;
00534 
00535     // Remove leading spaces, tabs, CR, LF if any.
00536     while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
00537         writing = PR_TRUE;
00538         str = p + 1;
00539         p++;
00540     }
00541 
00542     while (*p) {
00543         if (*p == '\t' || *p == '\r' || *p == '\n') {
00544             writing = PR_TRUE;
00545             // append chars up to but not including *p
00546             if (p > str)
00547                 result.Append(str, p - str);
00548             str = p + 1;
00549         }
00550         p++;
00551     }
00552 
00553     // Remove trailing spaces if any
00554     while (((p-1) >= str) && (*(p-1) == ' ')) {
00555         writing = PR_TRUE;
00556         p--;
00557     }
00558 
00559     if (writing && p > str)
00560         result.Append(str, p - str);
00561 
00562     return writing;
00563 }
00564 
00565 #if defined(XP_WIN) || defined(XP_OS2)
00566 PRBool
00567 net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
00568 {
00569     PRBool writing = PR_FALSE;
00570 
00571     nsACString::const_iterator beginIter, endIter;
00572     aURL.BeginReading(beginIter);
00573     aURL.EndReading(endIter);
00574 
00575     const char *s, *begin = beginIter.get();
00576 
00577     for (s = begin; s != endIter.get(); ++s)
00578     {
00579         if (*s == '\\')
00580         {
00581             writing = PR_TRUE;
00582             if (s > begin)
00583                 aResultBuf.Append(begin, s - begin);
00584             aResultBuf += '/';
00585             begin = s + 1;
00586         }
00587     }
00588     if (writing && s > begin)
00589         aResultBuf.Append(begin, s - begin);
00590 
00591     return writing;
00592 }
00593 #endif
00594 
00595 //----------------------------------------------------------------------------
00596 // miscellaneous (i.e., stuff that should really be elsewhere)
00597 //----------------------------------------------------------------------------
00598 
00599 static inline
00600 void ToLower(char &c)
00601 {
00602     if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A'))
00603         c += 'a' - 'A';
00604 }
00605 
00606 void
00607 net_ToLowerCase(char *str, PRUint32 length)
00608 {
00609     for (char *end = str + length; str < end; ++str)
00610         ToLower(*str);
00611 }
00612 
00613 void
00614 net_ToLowerCase(char *str)
00615 {
00616     for (; *str; ++str)
00617         ToLower(*str);
00618 }
00619 
00620 char *
00621 net_FindCharInSet(const char *iter, const char *stop, const char *set)
00622 {
00623     for (; iter != stop && *iter; ++iter) {
00624         for (const char *s = set; *s; ++s) {
00625             if (*iter == *s)
00626                 return (char *) iter;
00627         }
00628     }
00629     return (char *) iter;
00630 }
00631 
00632 char *
00633 net_RFindCharInSet(const char *stop, const char *iter, const char *set)
00634 {
00635     --iter;
00636     --stop;
00637     for (; iter != stop; --iter) {
00638         for (const char *s = set; *s; ++s) {
00639             if (*iter == *s)
00640                 return (char *) iter;
00641         }
00642     }
00643     return (char *) iter;
00644 }
00645 
00646 char *
00647 net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
00648 {
00649 repeat:
00650     for (const char *s = set; *s; ++s) {
00651         if (*iter == *s) {
00652             if (++iter == stop)
00653                 break;
00654             goto repeat;
00655         }
00656     }
00657     return (char *) iter;
00658 }
00659 
00660 char *
00661 net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
00662 {
00663     --iter;
00664     --stop;
00665 
00666     if (iter == stop)
00667         return (char *) iter;
00668 
00669 repeat:
00670     for (const char *s = set; *s; ++s) {
00671         if (*iter == *s) {
00672             if (--iter == stop)
00673                 break;
00674             goto repeat;
00675         }
00676     }
00677     return (char *) iter;
00678 }
00679 
00680 #define HTTP_LWS " \t"
00681 
00682 // Return the index of the closing quote of the string, if any
00683 static PRUint32
00684 net_FindStringEnd(const nsCString& flatStr,
00685                   PRUint32 stringStart,
00686                   char stringDelim)
00687 {
00688     NS_ASSERTION(stringStart < flatStr.Length() &&
00689                  flatStr.CharAt(stringStart) == stringDelim &&
00690                  (stringDelim == '"' || stringDelim == '\''),
00691                  "Invalid stringStart");
00692 
00693     const char set[] = { stringDelim, '\\', '\0' };
00694     do {
00695         // stringStart points to either the start quote or the last
00696         // escaped char (the char following a '\\')
00697                 
00698         // Write to searchStart here, so that when we get back to the
00699         // top of the loop right outside this one we search from the
00700         // right place.
00701         PRUint32 stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
00702         if (stringEnd == PRUint32(kNotFound))
00703             return flatStr.Length();
00704 
00705         if (flatStr.CharAt(stringEnd) == '\\') {
00706             // Hit a backslash-escaped char.  Need to skip over it.
00707             stringStart = stringEnd + 1;
00708             if (stringStart == flatStr.Length())
00709                 return stringStart;
00710 
00711             // Go back to looking for the next escape or the string end
00712             continue;
00713         }
00714 
00715         return stringEnd;
00716 
00717     } while (PR_TRUE);
00718 
00719     NS_NOTREACHED("How did we get here?");
00720     return flatStr.Length();
00721 }
00722                   
00723 
00724 static PRUint32
00725 net_FindMediaDelimiter(const nsCString& flatStr,
00726                        PRUint32 searchStart,
00727                        char delimiter)
00728 {
00729     do {
00730         // searchStart points to the spot from which we should start looking
00731         // for the delimiter.
00732         const char delimStr[] = { delimiter, '"', '\'', '\0' };
00733         PRUint32 curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
00734         if (curDelimPos == PRUint32(kNotFound))
00735             return flatStr.Length();
00736             
00737         char ch = flatStr.CharAt(curDelimPos);
00738         if (ch == delimiter) {
00739             // Found delimiter
00740             return curDelimPos;
00741         }
00742 
00743         // We hit the start of a quoted string.  Look for its end.
00744         searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
00745         if (searchStart == flatStr.Length())
00746             return searchStart;
00747 
00748         ++searchStart;
00749 
00750         // searchStart now points to the first char after the end of the
00751         // string, so just go back to the top of the loop and look for
00752         // |delimiter| again.
00753     } while (PR_TRUE);
00754 
00755     NS_NOTREACHED("How did we get here?");
00756     return flatStr.Length();
00757 }
00758 
00759 static void
00760 net_ParseMediaType(const nsACString &aMediaTypeStr,
00761                    nsACString       &aContentType,
00762                    nsACString       &aContentCharset,
00763                    PRBool           *aHadCharset)
00764 {
00765     const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
00766     const char* start = flatStr.get();
00767     const char* end = start + flatStr.Length();
00768 
00769     // Trim LWS leading and trailing whitespace from type.  We include '(' in
00770     // the trailing trim set to catch media-type comments, which are not at all
00771     // standard, but may occur in rare cases.
00772     const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
00773     const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");
00774 
00775     const char* charset = "";
00776     const char* charsetEnd = charset;
00777 
00778     // Iterate over parameters
00779     PRBool typeHasCharset = PR_FALSE;
00780     PRUint32 paramStart = flatStr.FindChar(';', typeEnd - start);
00781     if (paramStart != PRUint32(kNotFound)) {
00782         // We have parameters.  Iterate over them.
00783         PRUint32 curParamStart = paramStart + 1;
00784         do {
00785             PRUint32 curParamEnd =
00786                 net_FindMediaDelimiter(flatStr, curParamStart, ';');
00787 
00788             const char* paramName = net_FindCharNotInSet(start + curParamStart,
00789                                                          start + curParamEnd,
00790                                                          HTTP_LWS);
00791             static const char charsetStr[] = "charset=";
00792             if (PL_strncasecmp(paramName, charsetStr,
00793                                sizeof(charsetStr) - 1) == 0) {
00794                 charset = paramName + sizeof(charsetStr) - 1;
00795                 charsetEnd = start + curParamEnd;
00796                 typeHasCharset = PR_TRUE;
00797             }
00798 
00799             curParamStart = curParamEnd + 1;
00800         } while (curParamStart < flatStr.Length());
00801     }
00802 
00803     if (typeHasCharset) {
00804         // Trim LWS leading and trailing whitespace from charset.  We include
00805         // '(' in the trailing trim set to catch media-type comments, which are
00806         // not at all standard, but may occur in rare cases.
00807         charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
00808         if (*charset == '"' || *charset == '\'') {
00809             charsetEnd =
00810                 start + net_FindStringEnd(flatStr, charset - start, *charset);
00811             charset++;
00812             NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
00813         } else {
00814             charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");
00815         }
00816     }
00817 
00818     // if the server sent "*/*", it is meaningless, so do not store it.
00819     // also, if type is the same as aContentType, then just update the
00820     // charset.  however, if charset is empty and aContentType hasn't
00821     // changed, then don't wipe-out an existing aContentCharset.  We
00822     // also want to reject a mime-type if it does not include a slash.
00823     // some servers give junk after the charset parameter, which may
00824     // include a comma, so this check makes us a bit more tolerant.
00825 
00826     if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 &&
00827         memchr(type, '/', typeEnd - type) != NULL) {
00828         // Common case here is that aContentType is empty
00829         PRBool eq = !aContentType.IsEmpty() &&
00830             aContentType.Equals(Substring(type, typeEnd),
00831                                 nsCaseInsensitiveCStringComparator());
00832         if (!eq) {
00833             aContentType.Assign(type, typeEnd - type);
00834             ToLowerCase(aContentType);
00835         }
00836         if ((!eq && *aHadCharset) || typeHasCharset) {
00837             *aHadCharset = PR_TRUE;
00838             aContentCharset.Assign(charset, charsetEnd - charset);
00839         }
00840     }
00841 }
00842 
00843 #undef HTTP_LWS
00844 
00845 void
00846 net_ParseContentType(const nsACString &aHeaderStr,
00847                      nsACString       &aContentType,
00848                      nsACString       &aContentCharset,
00849                      PRBool           *aHadCharset)
00850 {
00851     //
00852     // Augmented BNF (from RFC 2616 section 3.7):
00853     //
00854     //   header-value = media-type *( LWS "," LWS media-type )
00855     //   media-type   = type "/" subtype *( LWS ";" LWS parameter )
00856     //   type         = token
00857     //   subtype      = token
00858     //   parameter    = attribute "=" value
00859     //   attribute    = token
00860     //   value        = token | quoted-string
00861     //   
00862     //
00863     // Examples:
00864     //
00865     //   text/html
00866     //   text/html, text/html
00867     //   text/html,text/html; charset=ISO-8859-1
00868     //   text/html,text/html; charset="ISO-8859-1"
00869     //   text/html;charset=ISO-8859-1, text/html
00870     //   text/html;charset='ISO-8859-1', text/html
00871     //   application/octet-stream
00872     //
00873 
00874     *aHadCharset = PR_FALSE;
00875     const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
00876     
00877     // iterate over media-types.  Note that ',' characters can happen
00878     // inside quoted strings, so we need to watch out for that.
00879     PRUint32 curTypeStart = 0;
00880     do {
00881         // curTypeStart points to the start of the current media-type.  We want
00882         // to look for its end.
00883         PRUint32 curTypeEnd =
00884             net_FindMediaDelimiter(flatStr, curTypeStart, ',');
00885         
00886         // At this point curTypeEnd points to the spot where the media-type
00887         // starting at curTypeEnd ends.  Time to parse that!
00888         net_ParseMediaType(Substring(flatStr, curTypeStart,
00889                                      curTypeEnd - curTypeStart),
00890                            aContentType, aContentCharset, aHadCharset);
00891 
00892         // And let's move on to the next media-type
00893         curTypeStart = curTypeEnd + 1;
00894     } while (curTypeStart < flatStr.Length());
00895 }
00896 
00897 PRBool
00898 net_IsValidHostName(const nsCSubstring &host)
00899 {
00900     const char *end = host.EndReading();
00901     // Control Characters and !\"#%&'()*,/;<=>?@\\^{|}~
00902     // if one of these chars is found return false
00903     return net_FindCharInSet(host.BeginReading(), end,
00904                              // Control characters and space
00905                              "\x01\x02\x03\x04\x05\x06\x07\x08"
00906                              "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
00907                              "\x11\x12\x13\x14\x15\x16\x17\x18"
00908                              "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
00909                              // !   "   #   %   &   '   (   ) 
00910                              "\x21\x22\x23\x25\x26\x27\x28\x29"
00911                              // *   ,   /   ;   <   =   >   ?
00912                              "\x2a\x2c\x2f\x3b\x3c\x3d\x3e\x3f"
00913                              // @   \   ^   {   |   }   ~  DEL
00914                              "\x40\x5c\x5e\x7b\x7c\x7d\x7e\x7f") == end;
00915 }