Back to index

lightning-sunbird  0.9+nobinonly
mozTXTToHTMLConv.h
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is the Mozilla Text to HTML converter code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Ben Bucksch <http://www.bucksch.org>.
00019  * Portions created by the Initial Developer are Copyright (C) 1999, 2000
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either the GNU General Public License Version 2 or later (the "GPL"), or
00026  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00042 #ifndef _mozTXTToHTMLConv_h__
00043 #define _mozTXTToHTMLConv_h__
00044 
00045 #include "mozITXTToHTMLConv.h"
00046 #include "nsIIOService.h"
00047 #include "nsString.h"
00048 #include "nsTimer.h"
00049 #include "nsCOMPtr.h"
00050 
00051 
00052 class mozTXTToHTMLConv : public mozITXTToHTMLConv
00053 {
00054 
00055 
00057 public:
00059 
00060   mozTXTToHTMLConv();
00061   virtual ~mozTXTToHTMLConv();
00062   NS_DECL_ISUPPORTS
00063 
00064   // XXX Is this really needed?  This isn't an interface.
00065   NS_DEFINE_STATIC_IID_ACCESSOR(MOZITXTTOHTMLCONV_IID)
00066 
00067   NS_DECL_MOZITXTTOHTMLCONV
00068   NS_DECL_NSIREQUESTOBSERVER
00069   NS_DECL_NSISTREAMLISTENER
00070   NS_DECL_NSISTREAMCONVERTER
00071 
00075   void ScanTXT(const PRUnichar * aInString, PRInt32 aInStringLength, PRUint32 whattodo, nsString& aOutString);
00076 
00080   void ScanHTML(nsString& aInString, PRUint32 whattodo, nsString &aOutString);
00081 
00085   PRInt32 CiteLevelTXT(const PRUnichar * line,PRUint32& logLineStart);
00086 
00087 
00088   // Timing!
00089   MOZ_TIMER_DECLARE(mScanTXTTimer)
00090   MOZ_TIMER_DECLARE(mGlyphHitTimer)
00091   MOZ_TIMER_DECLARE(mTotalMimeTime)
00092 
00093 
00094 
00095 protected:
00097   nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service...
00109   void CompleteAbbreviatedURL(const PRUnichar * aInString, PRInt32 aInLength, 
00110                               const PRUint32 pos, nsString& aOutString);
00111 
00112 
00114 private:
00116 
00117   enum LIMTYPE
00118   {
00119     LT_IGNORE,     // limitation not checked
00120     LT_DELIMITER,  // not alphanumeric and not rep[0]. End of text is also ok.
00121     LT_ALPHA,      // alpha char
00122     LT_DIGIT
00123   };
00124 
00137   PRBool ItMatchesDelimited(const PRUnichar * aInString, PRInt32 aInLength,
00138       const PRUnichar * rep, PRInt32 aRepLen, LIMTYPE before, LIMTYPE after);
00139 
00144   PRUint32 NumberOfMatches(const PRUnichar * aInString, PRInt32 aInStringLength,
00145       const PRUnichar* rep, PRInt32 aRepLen, LIMTYPE before, LIMTYPE after);
00146 
00155   void EscapeChar(const PRUnichar ch, nsString& aStringToAppendto);
00156 
00160   void EscapeStr(nsString& aInString);
00161 
00169   void UnescapeStr(const PRUnichar * aInString, PRInt32 aStartPos, PRInt32 aLength, nsString& aOutString);
00170 
00194   PRBool FindURL(const PRUnichar * aInString, PRInt32 aInLength, const PRUint32 pos,
00195           const PRUint32 whathasbeendone,
00196           nsString& outputHTML, PRInt32& replaceBefore, PRInt32& replaceAfter);
00197 
00198   enum modetype {
00199          unknown,
00200          RFC1738,          /* Check, if RFC1738, APPENDIX compliant,
00201                               like "<URL:http://www.mozilla.org>". */
00202          RFC2396E,         /* RFC2396, APPENDIX E allows anglebrackets (like
00203                               "<http://www.mozilla.org>") (without "URL:") or
00204                               quotation marks(like ""http://www.mozilla.org"").
00205                               Also allow email addresses without scheme,
00206                               e.g. "<mozilla@bucksch.org>" */
00207          freetext,         /* assume heading scheme
00208                               with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
00209                               (see RFC2396, Section 3.1).
00210                               Certain characters (see code) or any whitespace
00211                               (including linebreaks) end the URL.
00212                               Other certain (punctation) characters (see code)
00213                               at the end are stripped off. */
00214          abbreviated       /* Similar to freetext, but without scheme, e.g.
00215                              "www.mozilla.org", "ftp.mozilla.org" and
00216                               "mozilla@bucksch.org". */
00217       /* RFC1738 and RFC2396E type URLs may use multiple lines,
00218          whitespace is stripped. Special characters like ")" stay intact.*/
00219   };
00220 
00228   PRBool FindURLStart(const PRUnichar * aInString, PRInt32 aInLength, const PRUint32 pos,
00229                              const modetype check, PRUint32& start);
00230 
00238   PRBool FindURLEnd(const PRUnichar * aInString, PRInt32 aInStringLength, const PRUint32 pos,
00239            const modetype check, const PRUint32 start, PRUint32& end);
00240 
00251   void CalculateURLBoundaries(const PRUnichar * aInString, PRInt32 aInStringLength, 
00252      const PRUint32 pos, const PRUint32 whathasbeendone,
00253      const modetype check, const PRUint32 start, const PRUint32 end,
00254      nsString& txtURL, nsString& desc,
00255      PRInt32& replaceBefore, PRInt32& replaceAfter);
00256 
00262   PRBool CheckURLAndCreateHTML(
00263        const nsString& txtURL, const nsString& desc, const modetype mode,
00264        nsString& outputHTML);
00265 
00284   PRBool StructPhraseHit(const PRUnichar * aInString, PRInt32 aInStringLength, PRBool col0,
00285      const PRUnichar* tagTXT,
00286      PRInt32 aTagTxtLen, 
00287      const char* tagHTML, const char* attributeHTML,
00288      nsString& aOutputString, PRUint32& openTags);
00289 
00297   PRBool
00298          SmilyHit(const PRUnichar * aInString, PRInt32 aLength, PRBool col0,
00299          const char* tagTXT, const char* imageName,
00300          nsString& outputHTML, PRInt32& glyphTextLen);
00301 
00317   PRBool GlyphHit(const PRUnichar * aInString, PRInt32 aInLength, PRBool col0,
00318        nsString& aOutString, PRInt32& glyphTextLen);
00319 
00324   PRBool ShouldLinkify(const nsCString& aURL);
00325 };
00326 
00327 // It's said, that Win32 and Mac don't like static const members
00328 const PRInt32 mozTXTToHTMLConv_lastMode = 4;
00329                                // Needed (only) by mozTXTToHTMLConv::FindURL
00330 const PRInt32 mozTXTToHTMLConv_numberOfModes = 4;  // dito; unknown not counted
00331 
00332 #endif