Back to index

lightning-sunbird  0.9+nobinonly
mozTXTToHTMLConv.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is the Mozilla Text to HTML converter code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Ben Bucksch <http://www.bucksch.org>.
00019  * Portions created by the Initial Developer are Copyright (C) 1999, 2000
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either the GNU General Public License Version 2 or later (the "GPL"), or
00026  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include "mozTXTToHTMLConv.h"
00039 #include "nsIServiceManager.h"
00040 #include "nsNetCID.h"
00041 #include "nsReadableUtils.h"
00042 #include "nsUnicharUtils.h"
00043 #include "nsCRT.h"
00044 #include "nsIExternalProtocolHandler.h"
00045 
00046 static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID);
00047 
00048 #ifdef DEBUG_BenB_Perf
00049 #include "prtime.h"
00050 #include "prinrval.h"
00051 #endif
00052 
00053 const PRFloat64 growthRate = 1.2;
00054 
00055 // Bug 183111, editor now replaces multiple spaces with leading
00056 // 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
00057 // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
00058 // Also recognize the Japanese ideographic space 0x3000 as a space.
00059 static inline PRBool IsSpace(const PRUnichar aChar)
00060 {
00061   return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
00062 }
00063 
00064 // Escape Char will take ch, escape it and append the result to 
00065 // aStringToAppendTo
00066 void
00067 mozTXTToHTMLConv::EscapeChar(const PRUnichar ch, nsString& aStringToAppendTo)
00068 {
00069     switch (ch)
00070     {
00071     case '<':
00072       aStringToAppendTo.AppendLiteral("&lt;");
00073       break;
00074     case '>':
00075       aStringToAppendTo.AppendLiteral("&gt;");
00076       break;
00077     case '&':
00078       aStringToAppendTo.AppendLiteral("&amp;");
00079       break;
00080     default:
00081       aStringToAppendTo += ch;
00082     }
00083 
00084     return;
00085 }
00086 
00087 // EscapeStr takes the passed in string and
00088 // escapes it IN PLACE.
00089 void 
00090 mozTXTToHTMLConv::EscapeStr(nsString& aInString)
00091 {
00092   // the replace substring routines
00093   // don't seem to work if you have a character
00094   // in the in string that is also in the replacement
00095   // string! =(
00096   //aInString.ReplaceSubstring("&", "&amp;");
00097   //aInString.ReplaceSubstring("<", "&lt;");
00098   //aInString.ReplaceSubstring(">", "&gt;");
00099   for (PRUint32 i = 0; i < aInString.Length();)
00100   {
00101     switch (aInString[i])
00102     {
00103     case '<':
00104       aInString.Cut(i, 1);
00105       aInString.Insert(NS_LITERAL_STRING("&lt;"), i);
00106       i += 4; // skip past the integers we just added
00107       break;
00108     case '>':
00109       aInString.Cut(i, 1);
00110       aInString.Insert(NS_LITERAL_STRING("&gt;"), i);
00111       i += 4; // skip past the integers we just added
00112       break;
00113     case '&':
00114       aInString.Cut(i, 1);
00115       aInString.Insert(NS_LITERAL_STRING("&amp;"), i);
00116       i += 5; // skip past the integers we just added
00117       break;
00118     default:
00119       i++;
00120     }
00121   }
00122 }
00123 
00124 void 
00125 mozTXTToHTMLConv::UnescapeStr(const PRUnichar * aInString, PRInt32 aStartPos, PRInt32 aLength, nsString& aOutString)
00126 {
00127   const PRUnichar * subString = nsnull;
00128   for (PRUint32 i = aStartPos; PRInt32(i) - aStartPos < aLength;)
00129   {
00130     PRInt32 remainingChars = i - aStartPos;
00131     if (aInString[i] == '&')
00132     {
00133       subString = &aInString[i];
00134       if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&lt;").get(), MinInt(4, aLength - remainingChars)))
00135       {
00136         aOutString.Append(PRUnichar('<'));
00137         i += 4;
00138       }
00139       else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&gt;").get(), MinInt(4, aLength - remainingChars)))
00140       {
00141         aOutString.Append(PRUnichar('>'));
00142         i += 4;
00143       }
00144       else if (!nsCRT::strncmp(subString, NS_LITERAL_STRING("&amp;").get(), MinInt(5, aLength - remainingChars)))
00145       {
00146         aOutString.Append(PRUnichar('&'));
00147         i += 5;
00148       }
00149       else
00150       {
00151         aOutString += aInString[i];
00152         i++;
00153       }
00154     }
00155     else
00156     {
00157       aOutString += aInString[i];
00158       i++;
00159     }
00160   }
00161 }
00162 
00163 void
00164 mozTXTToHTMLConv::CompleteAbbreviatedURL(const PRUnichar * aInString, PRInt32 aInLength, 
00165                                          const PRUint32 pos, nsString& aOutString)
00166 {
00167   NS_ASSERTION(pos < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851");
00168   if (pos >= aInLength)
00169     return;
00170 
00171   if (aInString[pos] == '@')
00172   {
00173     // only pre-pend a mailto url if the string contains a .domain in it..
00174     //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
00175     nsDependentString inString(aInString, aInLength);
00176     if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign....
00177     {
00178       aOutString.AssignLiteral("mailto:");
00179       aOutString += aInString;
00180     }
00181   }
00182   else if (aInString[pos] == '.')
00183   {
00184     if (ItMatchesDelimited(aInString, aInLength,
00185                            NS_LITERAL_STRING("www.").get(), 4, LT_IGNORE, LT_IGNORE))
00186     {
00187       aOutString.AssignLiteral("http://");
00188       aOutString += aInString;
00189     }
00190     else if (ItMatchesDelimited(aInString,aInLength, NS_LITERAL_STRING("ftp.").get(), 4, LT_IGNORE, LT_IGNORE))
00191     { 
00192       aOutString.AssignLiteral("ftp://");
00193       aOutString += aInString;
00194     }
00195   }
00196 }
00197 
00198 PRBool
00199 mozTXTToHTMLConv::FindURLStart(const PRUnichar * aInString, PRInt32 aInLength,
00200                                const PRUint32 pos, const modetype check,
00201                                PRUint32& start)
00202 {
00203   switch(check)
00204   { // no breaks, because end of blocks is never reached
00205   case RFC1738:
00206   {
00207     if (!nsCRT::strncmp(&aInString[MaxInt(pos - 4, 0)], NS_LITERAL_STRING("<URL:").get(), 5))
00208     {
00209       start = pos + 1;
00210       return PR_TRUE;
00211     }
00212     else
00213       return PR_FALSE;
00214   }
00215   case RFC2396E:
00216   {
00217     nsString temp(aInString, aInLength);
00218     PRInt32 i = pos <= 0 ? kNotFound : temp.RFindCharInSet(NS_LITERAL_STRING("<>\"").get(), pos - 1);
00219     if (i != kNotFound && (temp[PRUint32(i)] == '<' ||
00220                            temp[PRUint32(i)] == '"'))
00221     {
00222       start = PRUint32(++i);
00223       return start < pos;
00224     }
00225     else
00226       return PR_FALSE;
00227   }
00228   case freetext:
00229   {
00230     PRInt32 i = pos - 1;
00231     for (; i >= 0 && (
00232          nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]) ||
00233          nsCRT::IsAsciiDigit(aInString[PRUint32(i)]) ||
00234          aInString[PRUint32(i)] == '+' ||
00235          aInString[PRUint32(i)] == '-' ||
00236          aInString[PRUint32(i)] == '.'
00237          ); i--)
00238       ;
00239     if (++i >= 0 && i < pos && nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]))
00240     {
00241       start = PRUint32(i);
00242       return PR_TRUE;
00243     }
00244     else
00245       return PR_FALSE;
00246   }
00247   case abbreviated:
00248   {
00249     PRInt32 i = pos - 1;
00250     // This disallows non-ascii-characters for email.
00251     // Currently correct, but revisit later after standards changed.
00252     PRBool isEmail = aInString[pos] == (PRUnichar)'@';
00253     // These chars mark the start of the URL
00254     for (; i >= 0
00255              && aInString[PRUint32(i)] != '>' && aInString[PRUint32(i)] != '<'
00256              && aInString[PRUint32(i)] != '"' && aInString[PRUint32(i)] != '\''
00257              && aInString[PRUint32(i)] != '`' && aInString[PRUint32(i)] != ','
00258              && aInString[PRUint32(i)] != '{' && aInString[PRUint32(i)] != '['
00259              && aInString[PRUint32(i)] != '(' && aInString[PRUint32(i)] != '|'
00260              && aInString[PRUint32(i)] != '\\'
00261              && !IsSpace(aInString[PRUint32(i)])
00262              && (!isEmail || nsCRT::IsAscii(aInString[PRUint32(i)]))
00263          ; i--)
00264       ;
00265     if
00266       (
00267         ++i >= 0 && i < pos
00268           &&
00269           (
00270             nsCRT::IsAsciiAlpha(aInString[PRUint32(i)]) ||
00271             nsCRT::IsAsciiDigit(aInString[PRUint32(i)])
00272           )
00273       )
00274     {
00275       start = PRUint32(i);
00276       return PR_TRUE;
00277     }
00278     else
00279       return PR_FALSE;
00280   }
00281   default:
00282     return PR_FALSE;
00283   } //switch
00284 }
00285 
00286 PRBool
00287 mozTXTToHTMLConv::FindURLEnd(const PRUnichar * aInString, PRInt32 aInStringLength, const PRUint32 pos,
00288            const modetype check, const PRUint32 start, PRUint32& end)
00289 {
00290   switch(check)
00291   { // no breaks, because end of blocks is never reached
00292   case RFC1738:
00293   case RFC2396E:
00294   {
00295     nsString temp(aInString, aInStringLength);
00296 
00297     PRInt32 i = temp.FindCharInSet(NS_LITERAL_STRING("<>\"").get(), pos + 1);
00298     if (i != kNotFound && temp[PRUint32(i--)] ==
00299         (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"'))
00300     {
00301       end = PRUint32(i);
00302       return end > pos;
00303     }
00304     else
00305       return PR_FALSE;
00306   }
00307   case freetext:
00308   case abbreviated:
00309   {
00310     PRUint32 i = pos + 1;
00311     PRBool isEmail = aInString[pos] == (PRUnichar)'@';
00312     PRBool haveOpeningBracket = PR_FALSE;
00313     for (; PRInt32(i) < aInStringLength; i++)
00314     {
00315       // These chars mark the end of the URL
00316       if (aInString[i] == '>' || aInString[i] == '<' ||
00317           aInString[i] == '"' || aInString[i] == '`' ||
00318           aInString[i] == '}' || aInString[i] == ']' ||
00319           aInString[i] == '{' || aInString[i] == '[' ||
00320           aInString[i] == '|' ||
00321           (aInString[i] == ')' && !haveOpeningBracket) ||
00322           IsSpace(aInString[i])    )
00323           break;
00324       // Disallow non-ascii-characters for email.
00325       // Currently correct, but revisit later after standards changed.
00326       if (isEmail && (
00327             aInString[i] == '(' || aInString[i] == '\'' ||
00328             !nsCRT::IsAscii(aInString[i])       ))
00329           break;
00330       if (aInString[i] == '(')
00331         haveOpeningBracket = PR_TRUE;
00332     }
00333     // These chars are allowed in the middle of the URL, but not at end.
00334     // Technically they are, but are used in normal text after the URL.
00335     while (--i > pos && (
00336              aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' ||
00337              aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' ||
00338              aInString[i] == '\''
00339              ))
00340         ;
00341     if (i > pos)
00342     {
00343       end = i;
00344       return PR_TRUE;
00345     }
00346     else
00347       return PR_FALSE;
00348   }
00349   default:
00350     return PR_FALSE;
00351   } //switch
00352 }
00353 
00354 void
00355 mozTXTToHTMLConv::CalculateURLBoundaries(const PRUnichar * aInString, PRInt32 aInStringLength, 
00356      const PRUint32 pos, const PRUint32 whathasbeendone,
00357      const modetype check, const PRUint32 start, const PRUint32 end,
00358      nsString& txtURL, nsString& desc,
00359      PRInt32& replaceBefore, PRInt32& replaceAfter)
00360 {
00361   PRUint32 descstart = start;
00362   switch(check)
00363   {
00364   case RFC1738:
00365   {
00366     descstart = start - 5;
00367     desc.Append(&aInString[descstart], end - descstart + 2);  // include "<URL:" and ">"
00368     replaceAfter = end - pos + 1;
00369   } break;
00370   case RFC2396E:
00371   {
00372     descstart = start - 1;
00373     desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
00374     replaceAfter = end - pos + 1;
00375   } break;
00376   case freetext:
00377   case abbreviated:
00378   {
00379     descstart = start;
00380     desc.Append(&aInString[descstart], end - start + 1); // don't include brackets  
00381     replaceAfter = end - pos;
00382   } break;
00383   default: break;
00384   } //switch
00385 
00386   EscapeStr(desc);
00387 
00388   txtURL.Append(&aInString[start], end - start + 1);
00389   txtURL.StripWhitespace();
00390 
00391   // FIX ME
00392   nsAutoString temp2;
00393   ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
00394   replaceBefore = temp2.Length();
00395   return;
00396 }
00397 
00398 PRBool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL)
00399 {
00400   if (!mIOService)
00401     return PR_FALSE;
00402 
00403   nsCAutoString scheme;
00404   nsresult rv = mIOService->ExtractScheme(aURL, scheme);
00405   if(NS_FAILED(rv))
00406     return PR_FALSE;
00407 
00408   // Get the handler for this scheme.
00409   nsCOMPtr<nsIProtocolHandler> handler;    
00410   rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
00411   if(NS_FAILED(rv))
00412     return PR_FALSE;
00413 
00414   // Is it an external protocol handler? If not, linkify it.
00415   nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler, &rv);
00416   if (!externalHandler)
00417    return PR_TRUE; // handler is built-in, linkify it!
00418 
00419   // If external app exists for the scheme then linkify it.
00420   PRBool exists;
00421   rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
00422   return(NS_SUCCEEDED(rv) && exists);
00423 }
00424 
00425 PRBool
00426 mozTXTToHTMLConv::CheckURLAndCreateHTML(
00427      const nsString& txtURL, const nsString& desc, const modetype mode,
00428      nsString& outputHTML)
00429 {
00430   // Create *uri from txtURL
00431   nsCOMPtr<nsIURI> uri;
00432   nsresult rv = NS_OK;
00433   if (!mIOService)
00434     mIOService = do_GetService(kIOServiceCID, &rv);
00435   
00436   if (NS_FAILED(rv) || !mIOService)
00437     return PR_FALSE;
00438 
00439   // See if the url should be linkified.
00440   NS_ConvertUCS2toUTF8 utf8URL(txtURL);
00441   if (!ShouldLinkify(utf8URL))
00442     return PR_FALSE;
00443 
00444   // it would be faster if we could just check to see if there is a protocol
00445   // handler for the url and return instead of actually trying to create a url...
00446   rv = mIOService->NewURI(utf8URL, nsnull, nsnull, getter_AddRefs(uri));
00447 
00448   // Real work
00449   if (NS_SUCCEEDED(rv) && uri)
00450   {
00451     outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
00452     switch(mode)
00453     {
00454     case RFC1738:
00455       outputHTML.AppendLiteral("rfc1738");
00456       break;
00457     case RFC2396E:
00458       outputHTML.AppendLiteral("rfc2396E");
00459       break;
00460     case freetext:
00461       outputHTML.AppendLiteral("freetext");
00462       break;
00463     case abbreviated:
00464       outputHTML.AppendLiteral("abbreviated");
00465       break;
00466     default: break;
00467     }
00468     outputHTML.AppendLiteral("\" href=\"");
00469     outputHTML += txtURL;
00470     outputHTML.AppendLiteral("\">");
00471     outputHTML += desc;
00472     outputHTML.AppendLiteral("</a>");
00473     return PR_TRUE;
00474   }
00475   else
00476     return PR_FALSE;
00477 }
00478 
00479 NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const PRUnichar * aInString, PRInt32 aInLength, PRInt32 aPos, PRInt32 * aStartPos, PRInt32 * aEndPos)
00480 {
00481   // call FindURL on the passed in string
00482   nsAutoString outputHTML; // we'll ignore the generated output HTML
00483 
00484   *aStartPos = -1;
00485   *aEndPos = -1;
00486 
00487   FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
00488 
00489   return NS_OK;
00490 }
00491 
00492 PRBool
00493 mozTXTToHTMLConv::FindURL(const PRUnichar * aInString, PRInt32 aInLength, const PRUint32 pos,
00494      const PRUint32 whathasbeendone,
00495      nsString& outputHTML, PRInt32& replaceBefore, PRInt32& replaceAfter)
00496 {
00497   enum statetype {unchecked, invalid, startok, endok, success};
00498   static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
00499 
00500   statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
00501   /* I don't like this abuse of enums as index for the array,
00502      but I don't know a better method */
00503 
00504   // Define, which modes to check
00505   /* all modes but abbreviated are checked for text[pos] == ':',
00506      only abbreviated for '.', RFC2396E and abbreviated for '@' */
00507   for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
00508        iState = modetype(iState + 1))
00509     state[iState] = aInString[pos] == ':' ? unchecked : invalid;
00510   switch (aInString[pos])
00511   {
00512   case '@':
00513     state[RFC2396E] = unchecked;
00514     // no break here
00515   case '.':
00516     state[abbreviated] = unchecked;
00517     break;
00518   case ':':
00519     state[abbreviated] = invalid;
00520     break;
00521   default:
00522     break;
00523   }
00524 
00525   // Test, first successful mode wins, sequence defined by |ranking|
00526   PRInt32 iCheck = 0;  // the currently tested modetype
00527   modetype check = ranking[iCheck];
00528   for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
00529        iCheck++)
00530     /* check state from last run.
00531        If this is the first, check this one, which isn't = success yet */
00532   {
00533     check = ranking[iCheck];
00534 
00535     PRUint32 start, end;
00536 
00537     if (state[check] == unchecked)
00538       if (FindURLStart(aInString, aInLength, pos, check, start))
00539         state[check] = startok;
00540 
00541     if (state[check] == startok)
00542       if (FindURLEnd(aInString, aInLength, pos, check, start, end))
00543         state[check] = endok;
00544 
00545     if (state[check] == endok)
00546     {
00547       nsAutoString txtURL, desc;
00548       PRInt32 resultReplaceBefore, resultReplaceAfter;
00549 
00550       CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end,
00551                              txtURL, desc,
00552                              resultReplaceBefore, resultReplaceAfter);
00553 
00554       if (aInString[pos] != ':')
00555       {
00556         nsAutoString temp = txtURL;
00557         txtURL.SetLength(0);
00558         CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL);
00559       }
00560 
00561       if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check,
00562                                                      outputHTML))
00563       {
00564         replaceBefore = resultReplaceBefore;
00565         replaceAfter = resultReplaceAfter;
00566         state[check] = success;
00567       }
00568     } // if
00569   } // for
00570   return state[check] == success;
00571 }
00572 
00573 PRBool
00574 mozTXTToHTMLConv::ItMatchesDelimited(const PRUnichar * aInString,
00575     PRInt32 aInLength, const PRUnichar* rep, PRInt32 aRepLen,
00576     LIMTYPE before, LIMTYPE after)
00577 {
00578 
00579   // this little method gets called a LOT. I found we were spending a
00580   // lot of time just calculating the length of the variable "rep"
00581   // over and over again every time we called it. So we're now passing
00582   // an integer in here.
00583   PRInt32 textLen = aInLength;
00584 
00585   if
00586     (
00587       (before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
00588         && textLen < aRepLen ||
00589       (before != LT_IGNORE || after != LT_IGNORE && after != LT_DELIMITER)
00590         && textLen < aRepLen + 1 ||
00591       before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
00592         && textLen < aRepLen + 2
00593     )
00594     return PR_FALSE;
00595 
00596   PRUnichar text0 = aInString[0];
00597   PRUnichar textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
00598 
00599   if
00600     (
00601       before == LT_ALPHA
00602         && !nsCRT::IsAsciiAlpha(text0) ||
00603       before == LT_DIGIT
00604         && !nsCRT::IsAsciiDigit(text0) ||
00605       before == LT_DELIMITER
00606         &&
00607         (
00608           nsCRT::IsAsciiAlpha(text0) ||
00609           nsCRT::IsAsciiDigit(text0) ||
00610           text0 == *rep
00611         ) ||
00612       after == LT_ALPHA
00613         && !nsCRT::IsAsciiAlpha(textAfterPos) ||
00614       after == LT_DIGIT
00615         && !nsCRT::IsAsciiDigit(textAfterPos) ||
00616       after == LT_DELIMITER
00617         &&
00618         (
00619           nsCRT::IsAsciiAlpha(textAfterPos) ||
00620           nsCRT::IsAsciiDigit(textAfterPos) ||
00621           textAfterPos == *rep
00622         ) ||
00623         !Substring(Substring(aInString, aInString+aInLength),
00624                    (before == LT_IGNORE ? 0 : 1),
00625                    aRepLen).Equals(Substring(rep, rep+aRepLen),
00626                                    nsCaseInsensitiveStringComparator())
00627     )
00628     return PR_FALSE;
00629 
00630   return PR_TRUE;
00631 }
00632 
00633 PRUint32
00634 mozTXTToHTMLConv::NumberOfMatches(const PRUnichar * aInString, PRInt32 aInStringLength, 
00635      const PRUnichar* rep, PRInt32 aRepLen, LIMTYPE before, LIMTYPE after)
00636 {
00637   PRUint32 result = 0;
00638 
00639   for (PRInt32 i = 0; i < aInStringLength; i++)
00640   {
00641     const PRUnichar * indexIntoString = &aInString[i];
00642     if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after))
00643       result++;
00644   }
00645   return result;
00646 }
00647 
00648 
00649 // NOTE: the converted html for the phrase is appended to aOutString
00650 // tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
00651 PRBool
00652 mozTXTToHTMLConv::StructPhraseHit(const PRUnichar * aInString, PRInt32 aInStringLength, PRBool col0,
00653      const PRUnichar* tagTXT, PRInt32 aTagTXTLen, 
00654      const char* tagHTML, const char* attributeHTML,
00655      nsString& aOutString, PRUint32& openTags)
00656 {
00657   /* We're searching for the following pattern:
00658      LT_DELIMITER - "*" - ALPHA -
00659      [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
00660      <strong> is only inserted, if existance of a pair could be verified
00661      We use the first opening/closing tag, if we can choose */
00662 
00663   const PRUnichar * newOffset = aInString;
00664   PRInt32 newLength = aInStringLength;
00665   if (!col0) // skip the first element?
00666   {
00667     newOffset = &aInString[1];
00668     newLength = aInStringLength - 1;
00669   }
00670 
00671   // opening tag
00672   if
00673     (
00674       ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, 
00675            (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag
00676         && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, 
00677               LT_ALPHA, LT_DELIMITER)  // remaining closing tags
00678               > openTags
00679     )
00680   {
00681     openTags++;
00682     aOutString.AppendLiteral("<");
00683     aOutString.AppendASCII(tagHTML);
00684     aOutString.Append(PRUnichar(' '));
00685     aOutString.AppendASCII(attributeHTML);
00686     aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
00687     aOutString.Append(tagTXT);
00688     aOutString.AppendLiteral("</span>");
00689     return PR_TRUE;
00690   }
00691 
00692   // closing tag
00693   else if (openTags > 0
00694        && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER))
00695   {
00696     openTags--;
00697     aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
00698     aOutString.Append(tagTXT);
00699     aOutString.AppendLiteral("</span></");
00700     aOutString.AppendASCII(tagHTML);
00701     aOutString.Append(PRUnichar('>'));
00702     return PR_TRUE;
00703   }
00704 
00705   return PR_FALSE;
00706 }
00707 
00708 
00709 PRBool
00710 mozTXTToHTMLConv::SmilyHit(const PRUnichar * aInString, PRInt32 aLength, PRBool col0,
00711          const char* tagTXT, const char* imageName,
00712          nsString& outputHTML, PRInt32& glyphTextLen)
00713 {
00714   if ( !aInString || !tagTXT || !imageName )
00715       return PR_FALSE;
00716 
00717   PRInt32  tagLen = nsCRT::strlen(tagTXT);
00718  
00719   PRUint32 delim = (col0 ? 0 : 1) + tagLen;
00720 
00721   if
00722     (
00723       (col0 || IsSpace(aInString[0]))
00724         &&
00725         (
00726           aLength <= PRInt32(delim) ||
00727           IsSpace(aInString[delim]) ||
00728           aLength > PRInt32(delim + 1)
00729             &&
00730             (
00731               aInString[delim] == '.' ||
00732               aInString[delim] == ',' ||
00733               aInString[delim] == ';' ||
00734               aInString[delim] == '8' ||
00735               aInString[delim] == '>' ||
00736               aInString[delim] == '!' ||
00737               aInString[delim] == '?'
00738             )
00739             && IsSpace(aInString[delim + 1])
00740         )
00741         && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUCS2(tagTXT).get(), tagLen, 
00742                               col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
00743                // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
00744     )
00745   {
00746     if (!col0)
00747     {
00748       outputHTML.Truncate();
00749       outputHTML.Append(PRUnichar(' '));
00750     }
00751 
00752     outputHTML.AppendLiteral("<span class=\""); // <span class="
00753     AppendASCIItoUTF16(imageName, outputHTML);        // smiley-frown
00754     outputHTML.AppendLiteral("\"><span> ");     // "> <span> 
00755     AppendASCIItoUTF16(tagTXT, outputHTML);           // alt text
00756     outputHTML.AppendLiteral(" </span></span>"); // </span></span>
00757     glyphTextLen = (col0 ? 0 : 1) + tagLen;
00758     return PR_TRUE;
00759   }
00760 
00761   return PR_FALSE;
00762 }
00763 
00764 // the glyph is appended to aOutputString instead of the original string...
00765 PRBool
00766 mozTXTToHTMLConv::GlyphHit(const PRUnichar * aInString, PRInt32 aInLength, PRBool col0,
00767          nsString& aOutputString, PRInt32& glyphTextLen)
00768 {
00769   MOZ_TIMER_START(mGlyphHitTimer);
00770 
00771   PRUnichar text0 = aInString[0]; 
00772   PRUnichar text1 = aInString[1];
00773   PRUnichar firstChar = (col0 ? text0 : text1);
00774 
00775   // temporary variable used to store the glyph html text
00776   nsAutoString outputHTML;
00777   PRBool bTestSmilie;
00778   PRBool bArg;
00779   int i;
00780 
00781   // refactor some of this mess to avoid code duplication and speed execution a bit
00782   // there are two cases that need to be tried one after another. To avoid a lot of
00783   // duplicate code, rolling into a loop
00784 
00785   i = 0;
00786   while ( i < 2 )
00787   {
00788     bTestSmilie = PR_FALSE;
00789     if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O'))
00790     {
00791         // first test passed
00792 
00793         bTestSmilie = PR_TRUE;
00794         bArg = col0;
00795     }
00796     if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) )
00797     {
00798         // second test passed
00799 
00800         bTestSmilie = PR_TRUE;
00801         bArg = PR_FALSE;
00802     }
00803     if ( bTestSmilie && (
00804           SmilyHit(aInString, aInLength, bArg,
00805                    ":-)",
00806                    "moz-smiley-s1", // smile
00807                    outputHTML, glyphTextLen) ||
00808   
00809           SmilyHit(aInString, aInLength, bArg,
00810                    ":)",
00811                    "moz-smiley-s1", // smile
00812                    outputHTML, glyphTextLen) ||
00813           
00814           SmilyHit(aInString, aInLength, bArg,
00815                    ":-D",
00816                    "moz-smiley-s5", // laughing
00817                    outputHTML, glyphTextLen) ||
00818           
00819           SmilyHit(aInString, aInLength, bArg,
00820                    ":-(",
00821                    "moz-smiley-s2", // frown
00822                    outputHTML, glyphTextLen) ||
00823           
00824           SmilyHit(aInString, aInLength, bArg,
00825                    ":(",
00826                    "moz-smiley-s2", // frown
00827                    outputHTML, glyphTextLen) ||
00828           
00829           SmilyHit(aInString, aInLength, bArg,
00830                    ":-[",
00831                    "moz-smiley-s6", // embarassed
00832                    outputHTML, glyphTextLen) ||
00833           
00834           SmilyHit(aInString, aInLength, bArg,
00835                    ";-)",
00836                    "moz-smiley-s3", // wink
00837                    outputHTML, glyphTextLen) ||
00838 
00839           SmilyHit(aInString, aInLength, col0,
00840                    ";)",
00841                    "moz-smiley-s3", // wink
00842                    outputHTML, glyphTextLen) ||
00843           
00844           SmilyHit(aInString, aInLength, bArg,
00845                    ":-\\",
00846                    "moz-smiley-s7", // undecided
00847                    outputHTML, glyphTextLen) ||
00848           
00849           SmilyHit(aInString, aInLength, bArg,
00850                    ":-P",
00851                    "moz-smiley-s4", // tongue
00852                    outputHTML, glyphTextLen) ||
00853                    
00854           SmilyHit(aInString, aInLength, bArg,
00855                    ";-P",
00856                    "moz-smiley-s4", // tongue
00857                    outputHTML, glyphTextLen) ||  
00858          
00859           SmilyHit(aInString, aInLength, bArg,
00860                    "=-O",
00861                    "moz-smiley-s8", // surprise
00862                    outputHTML, glyphTextLen) ||
00863          
00864           SmilyHit(aInString, aInLength, bArg,
00865                    ":-*",
00866                    "moz-smiley-s9", // kiss
00867                    outputHTML, glyphTextLen) ||
00868          
00869           SmilyHit(aInString, aInLength, bArg,
00870                    ">:o",
00871                    "moz-smiley-s10", // yell
00872                    outputHTML, glyphTextLen) ||
00873           
00874           SmilyHit(aInString, aInLength, bArg,
00875                    ">:-o",
00876                    "moz-smiley-yell", // yell
00877                    outputHTML, glyphTextLen) ||
00878         
00879           SmilyHit(aInString, aInLength, bArg,
00880                    "8-)",
00881                    "moz-smiley-s11", // cool
00882                    outputHTML, glyphTextLen) ||
00883          
00884           SmilyHit(aInString, aInLength, bArg,
00885                    ":-$",
00886                    "moz-smiley-s12", // money
00887                    outputHTML, glyphTextLen) ||
00888          
00889           SmilyHit(aInString, aInLength, bArg,
00890                    ":-!",
00891                    "moz-smiley-s13", // foot
00892                    outputHTML, glyphTextLen) ||
00893          
00894           SmilyHit(aInString, aInLength, bArg,
00895                    "O:-)",
00896                    "moz-smiley-s14", // innocent
00897                    outputHTML, glyphTextLen) ||
00898          
00899           SmilyHit(aInString, aInLength, bArg,
00900                    ":'(",
00901                    "moz-smiley-s15", // cry
00902                    outputHTML, glyphTextLen) ||
00903          
00904           SmilyHit(aInString, aInLength, bArg,
00905                    ":-X",
00906                    "moz-smiley-s16", // sealed
00907                    outputHTML, glyphTextLen) 
00908         )
00909     )
00910     {
00911         aOutputString.Append(outputHTML);
00912         MOZ_TIMER_STOP(mGlyphHitTimer);
00913         return PR_TRUE;
00914     }
00915     i++;
00916   }
00917   if (text0 == '\f')
00918   {
00919       aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
00920       glyphTextLen = 1;
00921       MOZ_TIMER_STOP(mGlyphHitTimer);
00922       return PR_TRUE;
00923   }
00924   if (text0 == '+' || text1 == '+')
00925   {
00926     if (ItMatchesDelimited(aInString, aInLength,
00927                            NS_LITERAL_STRING(" +/-").get(), 4,
00928                            LT_IGNORE, LT_IGNORE))
00929     {
00930       aOutputString.AppendLiteral(" &plusmn;");
00931       glyphTextLen = 4;
00932       MOZ_TIMER_STOP(mGlyphHitTimer);
00933       return PR_TRUE;
00934     }
00935     if (col0 && ItMatchesDelimited(aInString, aInLength,
00936                                    NS_LITERAL_STRING("+/-").get(), 3,
00937                                    LT_IGNORE, LT_IGNORE))
00938     {
00939       aOutputString.AppendLiteral("&plusmn;");
00940       glyphTextLen = 3;
00941       MOZ_TIMER_STOP(mGlyphHitTimer);
00942       return PR_TRUE;
00943     }
00944   }
00945 
00946   // x^2  =>  x<sup>2</sup>,   also handle powers x^-2,  x^0.5
00947   // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
00948   if    
00949     (
00950       text1 == '^'
00951       && 
00952       (
00953         nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) || 
00954         text0 == ')' || text0 == ']' || text0 == '}'
00955       )
00956       &&
00957       (
00958         2 < aInLength && nsCRT::IsAsciiDigit(aInString[2]) ||
00959         3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3])
00960       )
00961     )
00962   {
00963     // Find first non-digit
00964     PRInt32 delimPos = 3;  // skip "^" and first digit (or '-')
00965     for (; delimPos < aInLength
00966            &&
00967            (
00968              nsCRT::IsAsciiDigit(aInString[delimPos]) || 
00969              aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
00970                nsCRT::IsAsciiDigit(aInString[delimPos + 1])
00971            );
00972          delimPos++)
00973       ;
00974 
00975     if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos]))
00976     {
00977       MOZ_TIMER_STOP(mGlyphHitTimer);
00978       return PR_FALSE;
00979     }
00980 
00981     outputHTML.Truncate();
00982     outputHTML += text0;
00983     outputHTML.AppendLiteral("<sup class=\"moz-txt-sup\">");
00984 
00985     aOutputString.Append(outputHTML);
00986     aOutputString.Append(&aInString[2], delimPos - 2);
00987     aOutputString.AppendLiteral("</sup>");
00988 
00989     glyphTextLen = delimPos /* - 1 + 1 */ ;
00990     MOZ_TIMER_STOP(mGlyphHitTimer);
00991     return PR_TRUE;
00992   }
00993   /*
00994    The following strings are not substituted:
00995    |TXT   |HTML     |Reason
00996    +------+---------+----------
00997     ->     &larr;    Bug #454
00998     =>     &lArr;    dito
00999     <-     &rarr;    dito
01000     <=     &rArr;    dito
01001     (tm)   &trade;   dito
01002     1/4    &frac14;  is triggered by 1/4 Part 1, 2/4 Part 2, ...
01003     3/4    &frac34;  dito
01004     1/2    &frac12;  similar
01005   */
01006   MOZ_TIMER_STOP(mGlyphHitTimer);
01007   return PR_FALSE;
01008 }
01009 
01010 /***************************************************************************
01011   Library-internal Interface
01012 ****************************************************************************/
01013 
01014 mozTXTToHTMLConv::mozTXTToHTMLConv()
01015 {
01016   MOZ_TIMER_RESET(mScanTXTTimer);
01017   MOZ_TIMER_RESET(mGlyphHitTimer);
01018   MOZ_TIMER_RESET(mTotalMimeTime);
01019   MOZ_TIMER_START(mTotalMimeTime);
01020 }
01021 
01022 mozTXTToHTMLConv::~mozTXTToHTMLConv() 
01023 {
01024   MOZ_TIMER_STOP(mTotalMimeTime);
01025   MOZ_TIMER_DEBUGLOG(("MIME Total Processing Time: "));
01026   MOZ_TIMER_PRINT(mTotalMimeTime);
01027   
01028   MOZ_TIMER_DEBUGLOG(("mozTXTToHTMLConv::ScanTXT(): "));
01029   MOZ_TIMER_PRINT(mScanTXTTimer);
01030 
01031   MOZ_TIMER_DEBUGLOG(("mozTXTToHTMLConv::GlyphHit(): "));
01032   MOZ_TIMER_PRINT(mGlyphHitTimer);
01033 }
01034 
01035 NS_IMPL_ISUPPORTS1(mozTXTToHTMLConv, mozTXTToHTMLConv)
01036 
01037 PRInt32
01038 mozTXTToHTMLConv::CiteLevelTXT(const PRUnichar *line,
01039                                 PRUint32& logLineStart)
01040 {
01041   PRInt32 result = 0;
01042   PRInt32 lineLength = nsCRT::strlen(line);
01043 
01044   PRBool moreCites = PR_TRUE;
01045   while (moreCites)
01046   {
01047     /* E.g. the following lines count as quote:
01048 
01049        > text
01050        //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
01051        >text
01052        //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
01053            > text
01054        ] text
01055        USER> text
01056        USER] text
01057        //#endif
01058 
01059        logLineStart is the position of "t" in this example
01060     */
01061     PRUint32 i = logLineStart;
01062 
01063 #ifdef QUOTE_RECOGNITION_AGGRESSIVE
01064     for (; PRInt32(i) < lineLength && IsSpace(line[i]); i++)
01065       ;
01066     for (; PRInt32(i) < lineLength && nsCRT::IsAsciiAlpha(line[i])
01067                                    && nsCRT::IsUpper(line[i])   ; i++)
01068       ;
01069     if (PRInt32(i) < lineLength && (line[i] == '>' || line[i] == ']'))
01070 #else
01071     if (PRInt32(i) < lineLength && line[i] == '>')
01072 #endif
01073     {
01074       i++;
01075       if (PRInt32(i) < lineLength && line[i] == ' ')
01076         i++;
01077       // sendmail/mbox
01078       // Placed here for performance increase
01079       const PRUnichar * indexString = &line[logLineStart];
01080            // here, |logLineStart < lineLength| is always true
01081       PRUint32 minlength = MinInt(6,nsCRT::strlen(indexString));
01082       if (Substring(indexString,
01083                     indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength),
01084                                                   nsCaseInsensitiveStringComparator()))
01085         //XXX RFC2646
01086         moreCites = PR_FALSE;
01087       else
01088       {
01089         result++;
01090         logLineStart = i;
01091       }
01092     }
01093     else
01094       moreCites = PR_FALSE;
01095   }
01096 
01097   return result;
01098 }
01099 
01100 void
01101 mozTXTToHTMLConv::ScanTXT(const PRUnichar * aInString, PRInt32 aInStringLength, PRUint32 whattodo, nsString& aOutString)
01102 {
01103   PRBool doURLs = whattodo & kURLs;
01104   PRBool doGlyphSubstitution = whattodo & kGlyphSubstitution;
01105   PRBool doStructPhrase = whattodo & kStructPhrase;
01106 
01107   MOZ_TIMER_START(mScanTXTTimer);
01108 
01109   PRUint32 structPhrase_strong = 0;  // Number of currently open tags
01110   PRUint32 structPhrase_underline = 0;
01111   PRUint32 structPhrase_italic = 0;
01112   PRUint32 structPhrase_code = 0;
01113 
01114   nsAutoString outputHTML;  // moved here for performance increase
01115 
01116   for(PRUint32 i = 0; PRInt32(i) < aInStringLength;)
01117   {
01118     if (doGlyphSubstitution)
01119     {
01120       PRInt32 glyphTextLen;
01121       if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen))
01122       {
01123         i += glyphTextLen;
01124         continue;
01125       }
01126     }
01127 
01128     if (doStructPhrase)
01129     {
01130       const PRUnichar * newOffset = aInString;
01131       PRInt32 newLength = aInStringLength;
01132       if (i > 0 ) // skip the first element?
01133       {
01134         newOffset = &aInString[i-1];
01135         newLength = aInStringLength - i + 1;
01136       }
01137 
01138       switch (aInString[i]) // Performance increase
01139       {
01140       case '*':
01141         if (StructPhraseHit(newOffset, newLength, i == 0,
01142                             NS_LITERAL_STRING("*").get(), 1,
01143                             "b", "class=\"moz-txt-star\"",
01144                             aOutString, structPhrase_strong))
01145         {
01146           i++;
01147           continue;
01148         }
01149         break;
01150       case '/':
01151         if (StructPhraseHit(newOffset, newLength, i == 0,
01152                             NS_LITERAL_STRING("/").get(), 1,
01153                             "i", "class=\"moz-txt-slash\"",
01154                             aOutString, structPhrase_italic))
01155         {
01156           i++;
01157           continue;
01158         }
01159         break;
01160       case '_':
01161         if (StructPhraseHit(newOffset, newLength, i == 0,
01162                             NS_LITERAL_STRING("_").get(), 1,
01163                             "span" /* <u> is deprecated */,
01164                             "class=\"moz-txt-underscore\"",
01165                             aOutString, structPhrase_underline))
01166         {
01167           i++;
01168           continue;
01169         }
01170         break;
01171       case '|':
01172         if (StructPhraseHit(newOffset, newLength, i == 0,
01173                             NS_LITERAL_STRING("|").get(), 1,
01174                             "code", "class=\"moz-txt-verticalline\"",
01175                             aOutString, structPhrase_code))
01176         {
01177           i++;
01178           continue;
01179         }
01180         break;
01181       }
01182     }
01183 
01184     if (doURLs)
01185     {
01186       switch (aInString[i])
01187       {
01188       case ':':
01189       case '@':
01190       case '.':
01191         if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase
01192         {
01193           PRInt32 replaceBefore;
01194           PRInt32 replaceAfter;
01195           if (FindURL(aInString, aInStringLength, i, whattodo,
01196                       outputHTML, replaceBefore, replaceAfter)
01197                   && structPhrase_strong + structPhrase_italic +
01198                        structPhrase_underline + structPhrase_code == 0
01199                        /* workaround for bug #19445 */ )
01200           {
01201             aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore);
01202             aOutString += outputHTML;
01203             i += replaceAfter + 1;
01204             continue;
01205           }
01206         }
01207         break;
01208       } //switch
01209     }
01210 
01211     switch (aInString[i])
01212     {
01213     // Special symbols
01214     case '<':
01215     case '>':
01216     case '&':
01217       EscapeChar(aInString[i], aOutString);
01218       i++;
01219       break;
01220     // Normal characters
01221     default:
01222       aOutString += aInString[i];
01223       i++;
01224       break;
01225     }
01226   }
01227 
01228   MOZ_TIMER_STOP(mScanTXTTimer);
01229 }
01230 
01231 void
01232 mozTXTToHTMLConv::ScanHTML(nsString& aInString, PRUint32 whattodo, nsString &aOutString)
01233 { 
01234   // some common variables we were recalculating
01235   // every time inside the for loop...
01236   PRInt32 lengthOfInString = aInString.Length();
01237   const PRUnichar * uniBuffer = aInString.get();
01238 
01239 #ifdef DEBUG_BenB_Perf
01240   PRTime parsing_start = PR_IntervalNow();
01241 #endif
01242 
01243   // Look for simple entities not included in a tags and scan them.
01244   /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>")
01245      or in a tag ("<!--[...]-->").
01246      Unescape the rest (text between tags) and pass it to ScanTXT. */
01247   for (PRInt32 i = 0; PRUint32(i) < lengthOfInString;)
01248   {
01249     if (aInString[i] == '<')  // html tag
01250     {
01251       PRUint32 start = PRUint32(i);
01252       if (nsCRT::ToLower((char)aInString[PRUint32(i) + 1]) == 'a')
01253            // if a tag, skip until </a>
01254       {
01255         i = aInString.Find("</a>", PR_TRUE, i);
01256         if (i == kNotFound)
01257           i = lengthOfInString;
01258         else
01259           i += 4;
01260       }
01261       else if (aInString[PRUint32(i) + 1] == '!' && aInString[PRUint32(i) + 2] == '-' &&
01262         aInString[PRUint32(i) + 3] == '-')
01263           //if out-commended code, skip until -->
01264       {
01265         i = aInString.Find("-->", PR_FALSE, i);
01266         if (i == kNotFound)
01267           i = lengthOfInString;
01268         else
01269           i += 3;
01270 
01271       }
01272       else  // just skip tag (attributes etc.)
01273       {
01274         i = aInString.FindChar('>', i);
01275         if (i == kNotFound)
01276           i = lengthOfInString;
01277         else
01278           i++;
01279       }
01280       aOutString.Append(&uniBuffer[start], PRUint32(i) - start);
01281     }
01282     else
01283     {
01284       PRUint32 start = PRUint32(i);
01285       i = aInString.FindChar('<', i);
01286       if (i == kNotFound)
01287         i = lengthOfInString;
01288   
01289       nsString tempString;     
01290       tempString.SetCapacity(PRUint32((PRUint32(i) - start) * growthRate));
01291       UnescapeStr(uniBuffer, start, PRUint32(i) - start, tempString);
01292       ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
01293     }
01294   }
01295 
01296 #ifdef DEBUG_BenB_Perf
01297   printf("ScanHTML time:    %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
01298 #endif
01299 }
01300 
01301 /****************************************************************************
01302   XPCOM Interface
01303 *****************************************************************************/
01304 
01305 NS_IMETHODIMP
01306 mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
01307                           const char *aFromType,
01308                           const char *aToType,
01309                           nsISupports *aCtxt, nsIInputStream **_retval)
01310 {
01311   return NS_ERROR_NOT_IMPLEMENTED;
01312 }
01313 
01314 NS_IMETHODIMP
01315 mozTXTToHTMLConv::AsyncConvertData(const char *aFromType,
01316                                    const char *aToType,
01317                                    nsIStreamListener *aListener, nsISupports *aCtxt) {
01318   return NS_ERROR_NOT_IMPLEMENTED;
01319 }
01320 
01321 NS_IMETHODIMP
01322 mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt,
01323                                  nsIInputStream *inStr, PRUint32 sourceOffset,
01324                                  PRUint32 count)
01325 {
01326   return NS_ERROR_NOT_IMPLEMENTED;
01327 }
01328 
01329 NS_IMETHODIMP
01330 mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt)
01331 {
01332   return NS_ERROR_NOT_IMPLEMENTED;
01333 }
01334 
01335 NS_IMETHODIMP
01336 mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt,
01337                                 nsresult aStatus)
01338 {
01339   return NS_ERROR_NOT_IMPLEMENTED;
01340 }
01341 
01342 NS_IMETHODIMP
01343 mozTXTToHTMLConv::CiteLevelTXT(const PRUnichar *line, PRUint32 *logLineStart,
01344                             PRUint32 *_retval)
01345 {
01346    if (!logLineStart || !_retval || !line)
01347      return NS_ERROR_NULL_POINTER;
01348    *_retval = CiteLevelTXT(line, *logLineStart);
01349    return NS_OK;
01350 }
01351 
01352 NS_IMETHODIMP
01353 mozTXTToHTMLConv::ScanTXT(const PRUnichar *text, PRUint32 whattodo,
01354                         PRUnichar **_retval)
01355 {
01356   NS_ENSURE_ARG(text);
01357 
01358   // FIX ME!!!
01359   nsString outString;
01360   PRInt32 inLength = nsCRT::strlen(text);
01361   // by setting a large capacity up front, we save time
01362   // when appending characters to the output string because we don't
01363   // need to reallocate and re-copy the characters already in the out String.
01364   NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
01365   if (inLength == 0) {
01366     *_retval = nsCRT::strdup(text);
01367     return NS_OK;
01368   }
01369 
01370   outString.SetCapacity(PRUint32(inLength * growthRate));
01371   ScanTXT(text, inLength, whattodo, outString);
01372 
01373   *_retval = ToNewUnicode(outString);
01374   return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
01375 }
01376 
01377 NS_IMETHODIMP
01378 mozTXTToHTMLConv::ScanHTML(const PRUnichar *text, PRUint32 whattodo,
01379                          PRUnichar **_retval)
01380 {
01381   NS_ENSURE_ARG(text);
01382 
01383   // FIX ME!!!
01384   nsString outString;
01385   nsString inString (text); // look at this nasty extra copy of the entire input buffer!
01386   outString.SetCapacity(PRUint32(inString.Length() * growthRate));
01387 
01388   ScanHTML(inString, whattodo, outString);
01389   *_retval = ToNewUnicode(outString);
01390   return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
01391 }
01392 
01393 nsresult
01394 MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
01395 {
01396     NS_PRECONDITION(aConv != nsnull, "null ptr");
01397     if (!aConv)
01398       return NS_ERROR_NULL_POINTER;
01399 
01400     *aConv = new mozTXTToHTMLConv();
01401     if (!*aConv)
01402       return NS_ERROR_OUT_OF_MEMORY;
01403 
01404     NS_ADDREF(*aConv);
01405     //    return (*aConv)->Init();
01406     return NS_OK;
01407 }