Back to index

lightning-sunbird  0.9+nobinonly
nsPlainTextSerializer.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Daniel Bratell <bratell@lysator.liu.se>
00024  *   Ben Bucksch <mozilla@bucksch.org>
00025  *
00026  * Alternatively, the contents of this file may be used under the terms of
00027  * either of the GNU General Public License Version 2 or later (the "GPL"),
00028  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00029  * in which case the provisions of the GPL or the LGPL are applicable instead
00030  * of those above. If you wish to allow use of your version of this file only
00031  * under the terms of either the GPL or the LGPL, and not to allow others to
00032  * use your version of this file under the terms of the MPL, indicate your
00033  * decision by deleting the provisions above and replace them with the notice
00034  * and other provisions required by the GPL or the LGPL. If you do not delete
00035  * the provisions above, a recipient may use your version of this file under
00036  * the terms of any one of the MPL, the GPL or the LGPL.
00037  *
00038  * ***** END LICENSE BLOCK ***** */
00039 
00040 #include "nsPlainTextSerializer.h"
00041 #include "nsILineBreakerFactory.h"
00042 #include "nsLWBrkCIID.h"
00043 #include "nsIServiceManager.h"
00044 #include "nsHTMLAtoms.h"
00045 #include "nsIDOMText.h"
00046 #include "nsIDOMCDATASection.h"
00047 #include "nsIDOMElement.h"
00048 #include "nsINameSpaceManager.h"
00049 #include "nsITextContent.h"
00050 #include "nsTextFragment.h"
00051 #include "nsContentUtils.h"
00052 #include "nsReadableUtils.h"
00053 #include "nsUnicharUtils.h"
00054 #include "nsCRT.h"
00055 #include "nsIParserService.h"
00056 
00057 static NS_DEFINE_CID(kLWBrkCID, NS_LWBRK_CID);
00058 
00059 #define PREF_STRUCTS "converter.html2txt.structs"
00060 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
00061 
00062 static const  PRInt32 kTabSize=4;
00063 static const  PRInt32 kOLNumberWidth = 3;
00064 static const  PRInt32 kIndentSizeHeaders = 2;  /* Indention of h1, if
00065                                                 mHeaderStrategy = 1 or = 2.
00066                                                 Indention of other headers
00067                                                 is derived from that.
00068                                                 XXX center h1? */
00069 static const  PRInt32 kIndentIncrementHeaders = 2;  /* If mHeaderStrategy = 1,
00070                                                 indent h(x+1) this many
00071                                                 columns more than h(x) */
00072 static const  PRInt32 kIndentSizeList = (kTabSize > kOLNumberWidth+3) ? kTabSize: kOLNumberWidth+3;
00073                                // Indention of non-first lines of ul and ol
00074 static const  PRInt32 kIndentSizeDD = kTabSize;  // Indention of <dd>
00075 
00076 static PRInt32 HeaderLevel(eHTMLTags aTag);
00077 static PRInt32 GetUnicharWidth(PRUnichar ucs);
00078 static PRInt32 GetUnicharStringWidth(const PRUnichar* pwcs, PRInt32 n);
00079 
00080 // Someday may want to make this non-const:
00081 static const PRUint32 TagStackSize = 500;
00082 static const PRUint32 OLStackSize = 100;
00083 
00084 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
00085 {
00086   nsPlainTextSerializer* it = new nsPlainTextSerializer();
00087   if (!it) {
00088     return NS_ERROR_OUT_OF_MEMORY;
00089   }
00090 
00091   return CallQueryInterface(it, aSerializer);
00092 }
00093 
00094 nsPlainTextSerializer::nsPlainTextSerializer()
00095   : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
00096 {
00097 
00098   mOutputString = nsnull;
00099   mInHead = PR_FALSE;
00100   mAtFirstColumn = PR_TRUE;
00101   mIndent = 0;
00102   mCiteQuoteLevel = 0;
00103   mStructs = PR_TRUE;       // will be read from prefs later
00104   mHeaderStrategy = 1 /*indent increasingly*/;   // ditto
00105   mQuotesPreformatted = PR_FALSE;                // ditto
00106   mDontWrapAnyQuotes = PR_FALSE;                 // ditto
00107   mHasWrittenCiteBlockquote = PR_FALSE;
00108   mSpanLevel = 0;
00109   for (PRInt32 i = 0; i <= 6; i++) {
00110     mHeaderCounter[i] = 0;
00111   }
00112 
00113   // Line breaker
00114   mWrapColumn = 72;     // XXX magic number, we expect someone to reset this
00115   mCurrentLineWidth = 0;
00116 
00117   // Flow
00118   mEmptyLines = 1; // The start of the document is an "empty line" in itself,
00119   mInWhitespace = PR_TRUE;
00120   mPreFormatted = PR_FALSE;
00121   mStartedOutput = PR_FALSE;
00122 
00123   // initialize the tag stack to zero:
00124   mTagStack = new nsHTMLTag[TagStackSize];
00125   mTagStackIndex = 0;
00126   mIgnoreAboveIndex = (PRUint32)kNotFound;
00127 
00128   // initialize the OL stack, where numbers for ordered lists are kept:
00129   mOLStack = new PRInt32[OLStackSize];
00130   mOLStackIndex = 0;
00131 
00132   mULCount = 0;
00133 }
00134 
00135 nsPlainTextSerializer::~nsPlainTextSerializer()
00136 {
00137   delete[] mTagStack;
00138   delete[] mOLStack;
00139 }
00140 
00141 NS_IMPL_ISUPPORTS4(nsPlainTextSerializer, 
00142                    nsIContentSerializer,
00143                    nsIContentSink,
00144                    nsIHTMLContentSink,
00145                    nsIHTMLToTextSink)
00146 
00147 
00148 NS_IMETHODIMP 
00149 nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
00150                             const char* aCharSet, PRBool aIsCopying)
00151 {
00152 #ifdef DEBUG
00153   // Check if the major control flags are set correctly.
00154   if(aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
00155     NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
00156                  "If you want format=flowed, you must combine it with "
00157                  "nsIDocumentEncoder::OutputFormatted");
00158   }
00159 
00160   if(aFlags & nsIDocumentEncoder::OutputFormatted) {
00161     NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted),
00162                  "Can't do formatted and preformatted output at the same time!");
00163   }
00164 #endif
00165 
00166   NS_ENSURE_TRUE(nsContentUtils::GetParserServiceWeakRef(),
00167                  NS_ERROR_UNEXPECTED);
00168 
00169   nsresult rv;
00170   
00171   mFlags = aFlags;
00172   mWrapColumn = aWrapColumn;
00173 
00174   // Only create a linebreaker if we will handle wrapping.
00175   if (MayWrap()) {
00176     nsCOMPtr<nsILineBreakerFactory> lf(do_GetService(kLWBrkCID, &rv));
00177     if (NS_SUCCEEDED(rv)) {
00178       nsAutoString lbarg;
00179       rv = lf->GetBreaker(lbarg, getter_AddRefs(mLineBreaker));
00180       if (NS_FAILED(rv)) return NS_ERROR_FAILURE;
00181     }
00182   }
00183 
00184   // Set the line break character:
00185   if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
00186       && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
00187     // Windows
00188     mLineBreak.AssignLiteral("\r\n");
00189   }
00190   else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
00191     // Mac
00192     mLineBreak.Assign(PRUnichar('\r'));
00193   }
00194   else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
00195     // Unix/DOM
00196     mLineBreak.Assign(PRUnichar('\n'));
00197   }
00198   else {
00199     // Platform/default
00200     mLineBreak.AssignLiteral(NS_LINEBREAK);
00201   }
00202 
00203   mLineBreakDue = PR_FALSE;
00204   mFloatingLines = -1;
00205 
00206   if (mFlags & nsIDocumentEncoder::OutputFormatted) {
00207     // Get some prefs that controls how we do formatted output
00208     mStructs = nsContentUtils::GetBoolPref(PREF_STRUCTS, mStructs);
00209 
00210     mHeaderStrategy =
00211       nsContentUtils::GetIntPref(PREF_HEADER_STRATEGY, mHeaderStrategy);
00212 
00213     // The quotesPreformatted pref is a temporary measure. See bug 69638.
00214     mQuotesPreformatted =
00215       nsContentUtils::GetBoolPref("editor.quotesPreformatted",
00216                                   mQuotesPreformatted);
00217 
00218     // DontWrapAnyQuotes is set according to whether plaintext mail
00219     // is wrapping to window width -- see bug 134439.
00220     // We'll only want this if we're wrapping and formatted.
00221     if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) {
00222       mDontWrapAnyQuotes =
00223         nsContentUtils::GetBoolPref("mail.compose.wrap_to_window_width",
00224                                     mDontWrapAnyQuotes);
00225     }
00226   }
00227 
00228   // XXX We should let the caller pass this in.
00229   if (nsContentUtils::GetBoolPref("browser.frames.enabled")) {
00230     mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
00231   }
00232   else {
00233     mFlags |= nsIDocumentEncoder::OutputNoFramesContent;
00234   }
00235 
00236   return NS_OK;
00237 }
00238 
00239 PRBool
00240 nsPlainTextSerializer::GetLastBool(const nsVoidArray& aStack)
00241 {
00242   PRUint32 size = aStack.Count();
00243   if (size == 0) {
00244     return PR_FALSE;
00245   }
00246   return (aStack.ElementAt(size-1) != NS_REINTERPRET_CAST(void*, PR_FALSE));
00247 }
00248 
00249 void
00250 nsPlainTextSerializer::SetLastBool(nsVoidArray& aStack, PRBool aValue)
00251 {
00252   PRUint32 size = aStack.Count();
00253   if (size > 0) {
00254     aStack.ReplaceElementAt(NS_REINTERPRET_CAST(void*, aValue), size-1);
00255   }
00256   else {
00257     NS_ERROR("There is no \"Last\" value");
00258   }
00259 }
00260 
00261 void
00262 nsPlainTextSerializer::PushBool(nsVoidArray& aStack, PRBool aValue)
00263 {
00264     aStack.AppendElement(NS_REINTERPRET_CAST(void*, aValue));
00265 }
00266 
00267 PRBool
00268 nsPlainTextSerializer::PopBool(nsVoidArray& aStack)
00269 {
00270   PRBool returnValue = PR_FALSE;
00271   PRUint32 size = aStack.Count();
00272   if (size > 0) {
00273     returnValue = (aStack.ElementAt(size-1) != NS_REINTERPRET_CAST(void*, PR_FALSE));
00274     aStack.RemoveElementAt(size-1);
00275   }
00276   return returnValue;
00277 }
00278 
00279 NS_IMETHODIMP
00280 nsPlainTextSerializer::Initialize(nsAString* aOutString,
00281                                   PRUint32 aFlags, PRUint32 aWrapCol)
00282 {
00283   nsresult rv = Init(aFlags, aWrapCol, nsnull, PR_FALSE);
00284   NS_ENSURE_SUCCESS(rv, rv);
00285 
00286   // XXX This is wrong. It violates XPCOM string ownership rules.
00287   // We're only getting away with this because instances of this
00288   // class are restricted to single function scope.
00289   mOutputString = aOutString;
00290 
00291   return NS_OK;
00292 }
00293 
00294 NS_IMETHODIMP 
00295 nsPlainTextSerializer::AppendText(nsIDOMText* aText, 
00296                                   PRInt32 aStartOffset,
00297                                   PRInt32 aEndOffset, 
00298                                   nsAString& aStr)
00299 {
00300   if (mIgnoreAboveIndex != (PRUint32)kNotFound) {
00301     return NS_OK;
00302   }
00303     
00304   NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
00305   if ( aStartOffset < 0 )
00306     return NS_ERROR_INVALID_ARG;
00307 
00308   NS_ENSURE_ARG(aText);
00309 
00310   nsresult rv = NS_OK;
00311   PRInt32 length = 0;
00312   nsAutoString textstr;
00313 
00314   nsCOMPtr<nsITextContent> content = do_QueryInterface(aText);
00315   if (!content) return NS_ERROR_FAILURE;
00316   
00317   const nsTextFragment* frag = content->Text();
00318 
00319   if (frag) {
00320     PRInt32 endoffset = (aEndOffset == -1) ? frag->GetLength() : aEndOffset;
00321     NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
00322 
00323     length = endoffset - aStartOffset;
00324     if (length <= 0) {
00325       return NS_OK;
00326     }
00327 
00328     if (frag->Is2b()) {
00329       textstr.Assign(frag->Get2b() + aStartOffset, length);
00330     }
00331     else {
00332       textstr.AssignWithConversion(frag->Get1b()+aStartOffset, length);
00333     }
00334   }
00335 
00336   mOutputString = &aStr;
00337 
00338   // We have to split the string across newlines
00339   // to match parser behavior
00340   PRInt32 start = 0;
00341   PRInt32 offset = textstr.FindCharInSet("\n\r");
00342   while (offset != kNotFound) {
00343 
00344     if(offset>start) {
00345       // Pass in the line
00346       rv = DoAddLeaf(nsnull,
00347                      eHTMLTag_text,
00348                      Substring(textstr, start, offset-start));
00349       if (NS_FAILED(rv)) break;
00350     }
00351 
00352     // Pass in a newline
00353     rv = DoAddLeaf(nsnull, eHTMLTag_newline, mLineBreak);
00354     if (NS_FAILED(rv)) break;
00355     
00356     start = offset+1;
00357     offset = textstr.FindCharInSet("\n\r", start);
00358   }
00359 
00360   // Consume the last bit of the string if there's any left
00361   if (NS_SUCCEEDED(rv) && start < length) {
00362     if (start) {
00363       rv = DoAddLeaf(nsnull,
00364                      eHTMLTag_text,
00365                      Substring(textstr, start, length-start));
00366     }
00367     else {
00368       rv = DoAddLeaf(nsnull, eHTMLTag_text, textstr);
00369     }
00370   }
00371   
00372   mOutputString = nsnull;
00373 
00374   return rv;
00375 }
00376 
00377 NS_IMETHODIMP
00378 nsPlainTextSerializer::AppendCDATASection(nsIDOMCDATASection* aCDATASection,
00379                                           PRInt32 aStartOffset,
00380                                           PRInt32 aEndOffset,
00381                                           nsAString& aStr)
00382 {
00383   return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
00384 }
00385 
00386 NS_IMETHODIMP
00387 nsPlainTextSerializer::AppendElementStart(nsIDOMElement *aElement,
00388                                           PRBool aHasChildren,
00389                                           nsAString& aStr)
00390 {
00391   NS_ENSURE_ARG(aElement);
00392 
00393   mContent = do_QueryInterface(aElement);
00394   if (!mContent) return NS_ERROR_FAILURE;
00395 
00396   nsresult rv;
00397   PRInt32 id = GetIdForContent(mContent);
00398 
00399   PRBool isContainer = IsContainer(id);
00400 
00401   mOutputString = &aStr;
00402 
00403   if (isContainer) {
00404     rv = DoOpenContainer(nsnull, id);
00405   }
00406   else {
00407     nsAutoString empty;
00408     rv = DoAddLeaf(nsnull, id, empty);
00409   }
00410 
00411   mContent = 0;
00412   mOutputString = nsnull;
00413 
00414   if (!mInHead && id == eHTMLTag_head)
00415     mInHead = PR_TRUE;    
00416 
00417   return rv;
00418 } 
00419  
00420 NS_IMETHODIMP 
00421 nsPlainTextSerializer::AppendElementEnd(nsIDOMElement *aElement,
00422                                         nsAString& aStr)
00423 {
00424   NS_ENSURE_ARG(aElement);
00425 
00426   mContent = do_QueryInterface(aElement);
00427   if (!mContent) return NS_ERROR_FAILURE;
00428 
00429   nsresult rv;
00430   PRInt32 id = GetIdForContent(mContent);
00431 
00432   PRBool isContainer = IsContainer(id);
00433 
00434   mOutputString = &aStr;
00435 
00436   rv = NS_OK;
00437   if (isContainer) {
00438     rv = DoCloseContainer(id);
00439   }
00440 
00441   mContent = 0;
00442   mOutputString = nsnull;
00443 
00444   if (mInHead && id == eHTMLTag_head)
00445     mInHead = PR_FALSE;    
00446 
00447   return rv;
00448 }
00449 
00450 NS_IMETHODIMP 
00451 nsPlainTextSerializer::Flush(nsAString& aStr)
00452 {
00453   mOutputString = &aStr;
00454   FlushLine();
00455   mOutputString = nsnull;
00456   return NS_OK;
00457 }
00458 
00459 NS_IMETHODIMP
00460 nsPlainTextSerializer::AppendDocumentStart(nsIDOMDocument *aDocument,
00461                                              nsAString& aStr)
00462 {
00463   return NS_OK;
00464 }
00465 
00466 NS_IMETHODIMP
00467 nsPlainTextSerializer::OpenContainer(const nsIParserNode& aNode)
00468 {
00469   PRInt32 type = aNode.GetNodeType();
00470 
00471   return DoOpenContainer(&aNode, type);
00472 }
00473 
00474 NS_IMETHODIMP 
00475 nsPlainTextSerializer::CloseContainer(const nsHTMLTag aTag)
00476 {
00477   return DoCloseContainer(aTag);
00478 }
00479 
00480 NS_IMETHODIMP 
00481 nsPlainTextSerializer::AddHeadContent(const nsIParserNode& aNode)
00482 {
00483   if (eHTMLTag_title == aNode.GetNodeType()) {
00484     // XXX collect the skipped content
00485     return NS_OK;
00486   }
00487   OpenHead(aNode);
00488   nsresult rv = AddLeaf(aNode);
00489   CloseHead();
00490   return rv;
00491 }
00492 
00493 NS_IMETHODIMP 
00494 nsPlainTextSerializer::AddLeaf(const nsIParserNode& aNode)
00495 {
00496   if (mIgnoreAboveIndex != (PRUint32)kNotFound) {
00497     return NS_OK;
00498   }
00499 
00500   eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
00501   const nsAString& text = aNode.GetText();
00502 
00503   if ((type == eHTMLTag_text) ||
00504       (type == eHTMLTag_whitespace) ||
00505       (type == eHTMLTag_newline)) {
00506     // Copy the text out, stripping out CRs
00507     nsAutoString str;
00508     PRUint32 length;
00509     str.SetCapacity(text.Length());
00510     nsReadingIterator<PRUnichar> srcStart, srcEnd;
00511     length = nsContentUtils::CopyNewlineNormalizedUnicodeTo(text.BeginReading(srcStart), text.EndReading(srcEnd), str);
00512     str.SetLength(length);
00513     return DoAddLeaf(&aNode, type, str);
00514   }
00515   else {
00516     return DoAddLeaf(&aNode, type, text);
00517   }
00518 }
00519 
00520 NS_IMETHODIMP
00521 nsPlainTextSerializer::OpenHTML(const nsIParserNode& aNode)
00522 {
00523   return OpenContainer(aNode);
00524 }
00525 
00526 NS_IMETHODIMP 
00527 nsPlainTextSerializer::CloseHTML()
00528 {
00529   return CloseContainer(eHTMLTag_html);
00530 }
00531 
00532 NS_IMETHODIMP 
00533 nsPlainTextSerializer::OpenHead(const nsIParserNode& aNode)
00534 {
00535   mInHead = PR_TRUE;
00536   return NS_OK;
00537 }
00538 
00539 NS_IMETHODIMP 
00540 nsPlainTextSerializer::CloseHead()
00541 {
00542   mInHead = PR_FALSE;
00543   return NS_OK;
00544 }
00545 
00546 NS_IMETHODIMP 
00547 nsPlainTextSerializer::OpenBody(const nsIParserNode& aNode)
00548 {
00549   return OpenContainer(aNode);
00550 }
00551 
00552 NS_IMETHODIMP 
00553 nsPlainTextSerializer::CloseBody()
00554 {
00555   return CloseContainer(eHTMLTag_body);
00556 }
00557 
00558 NS_IMETHODIMP 
00559 nsPlainTextSerializer::OpenForm(const nsIParserNode& aNode)
00560 {
00561   return OpenContainer(aNode);
00562 }
00563 
00564 NS_IMETHODIMP 
00565 nsPlainTextSerializer::CloseForm()
00566 {
00567   return CloseContainer(eHTMLTag_form);
00568 }
00569 
00570 NS_IMETHODIMP 
00571 nsPlainTextSerializer::OpenMap(const nsIParserNode& aNode)
00572 {
00573   return OpenContainer(aNode);
00574 }
00575 
00576 NS_IMETHODIMP 
00577 nsPlainTextSerializer::CloseMap()
00578 {
00579   return CloseContainer(eHTMLTag_map);
00580 }
00581 
00582 NS_IMETHODIMP 
00583 nsPlainTextSerializer::OpenFrameset(const nsIParserNode& aNode)
00584 {
00585   return OpenContainer(aNode);
00586 }
00587 
00588 NS_IMETHODIMP 
00589 nsPlainTextSerializer::CloseFrameset()
00590 {
00591   return CloseContainer(eHTMLTag_frameset);
00592 }
00593 
00594 NS_IMETHODIMP
00595 nsPlainTextSerializer::IsEnabled(PRInt32 aTag, PRBool* aReturn)
00596 {
00597   nsHTMLTag theHTMLTag = nsHTMLTag(aTag);
00598 
00599   if (theHTMLTag == eHTMLTag_script) {
00600     *aReturn = !(mFlags & nsIDocumentEncoder::OutputNoScriptContent);
00601   }
00602   else if (theHTMLTag == eHTMLTag_frameset) {
00603     *aReturn = !(mFlags & nsIDocumentEncoder::OutputNoFramesContent);
00604   }
00605   else {
00606     *aReturn = PR_FALSE;
00607   }
00608 
00609   return NS_OK;
00610 }
00611 
00616 nsresult
00617 nsPlainTextSerializer::DoOpenContainer(const nsIParserNode* aNode, PRInt32 aTag)
00618 {
00619   if (mFlags & nsIDocumentEncoder::OutputRaw) {
00620     // Raw means raw.  Don't even think about doing anything fancy
00621     // here like indenting, adding line breaks or any other
00622     // characters such as list item bullets, quote characters
00623     // around <q>, etc.  I mean it!  Don't make me smack you!
00624 
00625     return NS_OK;
00626   }
00627 
00628   eHTMLTags type = (eHTMLTags)aTag;
00629 
00630   if (mTagStackIndex < TagStackSize) {
00631     mTagStack[mTagStackIndex++] = type;
00632   }
00633 
00634   if (mIgnoreAboveIndex != (PRUint32)kNotFound) {
00635     return NS_OK;
00636   }
00637 
00638   // Reset this so that <blockquote type=cite> doesn't affect the whitespace
00639   // above random <pre>s below it.
00640   mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote && aTag == eHTMLTag_pre;
00641 
00642   PRBool isInCiteBlockquote = PR_FALSE;
00643 
00644   // XXX special-case <blockquote type=cite> so that we don't add additional
00645   // newlines before the text.
00646   if (aTag == eHTMLTag_blockquote) {
00647     nsAutoString value;
00648     nsresult rv = GetAttributeValue(aNode, nsHTMLAtoms::type, value);
00649     isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
00650   }
00651 
00652   if (mLineBreakDue && !isInCiteBlockquote)
00653     EnsureVerticalSpace(mFloatingLines);
00654 
00655   // Check if this tag's content that should not be output
00656   if ((type == eHTMLTag_noscript &&
00657        !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
00658       ((type == eHTMLTag_iframe || type == eHTMLTag_noframes) &&
00659        !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
00660     // Ignore everything that follows the current tag in 
00661     // question until a matching end tag is encountered.
00662     mIgnoreAboveIndex = mTagStackIndex - 1;
00663     return NS_OK;
00664   }
00665 
00666   if (type == eHTMLTag_body) {
00667     // Try to figure out here whether we have a
00668     // preformatted style attribute.
00669     //
00670     // Trigger on the presence of a "-moz-pre-wrap" in the
00671     // style attribute. That's a very simplistic way to do
00672     // it, but better than nothing.
00673     // Also set mWrapColumn to the value given there
00674     // (which arguably we should only do if told to do so).
00675     nsAutoString style;
00676     PRInt32 whitespace;
00677     if(NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::style, style)) &&
00678        (kNotFound != (whitespace = style.Find("white-space:")))) {
00679 
00680       if (kNotFound != style.Find("-moz-pre-wrap", PR_TRUE, whitespace)) {
00681 #ifdef DEBUG_preformatted
00682         printf("Set mPreFormatted based on style moz-pre-wrap\n");
00683 #endif
00684         mPreFormatted = PR_TRUE;
00685         PRInt32 widthOffset = style.Find("width:");
00686         if (widthOffset >= 0) {
00687           // We have to search for the ch before the semicolon,
00688           // not for the semicolon itself, because nsString::ToInteger()
00689           // considers 'c' to be a valid numeric char (even if radix=10)
00690           // but then gets confused if it sees it next to the number
00691           // when the radix specified was 10, and returns an error code.
00692           PRInt32 semiOffset = style.Find("ch", widthOffset+6);
00693           PRInt32 length = (semiOffset > 0 ? semiOffset - widthOffset - 6
00694                             : style.Length() - widthOffset);
00695           nsAutoString widthstr;
00696           style.Mid(widthstr, widthOffset+6, length);
00697           PRInt32 err;
00698           PRInt32 col = widthstr.ToInteger(&err);
00699 
00700           if (NS_SUCCEEDED(err)) {
00701             mWrapColumn = (PRUint32)col;
00702 #ifdef DEBUG_preformatted
00703             printf("Set wrap column to %d based on style\n", mWrapColumn);
00704 #endif
00705           }
00706         }
00707       }
00708       else if (kNotFound != style.Find("pre", PR_TRUE, whitespace)) {
00709 #ifdef DEBUG_preformatted
00710         printf("Set mPreFormatted based on style pre\n");
00711 #endif
00712         mPreFormatted = PR_TRUE;
00713         mWrapColumn = 0;
00714       }
00715     } 
00716     else {
00717       mPreFormatted = PR_FALSE;
00718     }
00719 
00720     return NS_OK;
00721   }
00722 
00723   if (!DoOutput()) {
00724     return NS_OK;
00725   }
00726 
00727   if (type == eHTMLTag_p)
00728     EnsureVerticalSpace(1);
00729   else if (type == eHTMLTag_pre) {
00730     if (GetLastBool(mIsInCiteBlockquote))
00731       EnsureVerticalSpace(0);
00732     else if (mHasWrittenCiteBlockquote) {
00733       EnsureVerticalSpace(0);
00734       mHasWrittenCiteBlockquote = PR_FALSE;
00735     }
00736     else
00737       EnsureVerticalSpace(1);
00738   }
00739   else if (type == eHTMLTag_tr) {
00740     PushBool(mHasWrittenCellsForRow, PR_FALSE);
00741   }
00742   else if (type == eHTMLTag_td || type == eHTMLTag_th) {
00743     // We must make sure that the content of two table cells get a
00744     // space between them.
00745 
00746     // To make the separation between cells most obvious and
00747     // importable, we use a TAB.
00748     if (GetLastBool(mHasWrittenCellsForRow)) {
00749       // Bypass |Write| so that the TAB isn't compressed away.
00750       AddToLine(NS_LITERAL_STRING("\t").get(), 1);
00751       mInWhitespace = PR_TRUE;
00752     }
00753     else if (mHasWrittenCellsForRow.Count() == 0) {
00754       // We don't always see a <tr> (nor a <table>) before the <td> if we're
00755       // copying part of a table
00756       PushBool(mHasWrittenCellsForRow, PR_TRUE); // will never be popped
00757     }
00758     else {
00759       SetLastBool(mHasWrittenCellsForRow, PR_TRUE);
00760     }
00761   }
00762   else if (type == eHTMLTag_ul) {
00763     // Indent here to support nested lists, which aren't included in li :-(
00764     EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
00765          // Must end the current line before we change indention
00766     mIndent += kIndentSizeList;
00767     mULCount++;
00768   }
00769   else if (type == eHTMLTag_ol) {
00770     EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
00771     // Must end the current line before we change indention
00772     if (mOLStackIndex < OLStackSize) {
00773       nsAutoString startAttr;
00774       PRInt32 startVal = 1;
00775       if(NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::start, startAttr))){
00776         PRInt32 rv = 0;
00777         startVal = startAttr.ToInteger(&rv);
00778         if (NS_FAILED(rv))
00779           startVal = 1;
00780       }
00781       mOLStack[mOLStackIndex++] = startVal;
00782     }
00783     mIndent += kIndentSizeList;  // see ul
00784   }
00785   else if (type == eHTMLTag_li) {
00786     if (mTagStackIndex > 1 && IsInOL()) {
00787       if (mOLStackIndex > 0) {
00788         nsAutoString valueAttr;
00789         if(NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::value, valueAttr))){
00790           PRInt32 rv = 0;
00791           PRInt32 valueAttrVal = valueAttr.ToInteger(&rv);
00792           if (NS_SUCCEEDED(rv))
00793             mOLStack[mOLStackIndex-1] = valueAttrVal;
00794         }
00795         // This is what nsBulletFrame does for OLs:
00796         mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
00797       }
00798       else {
00799         mInIndentString.Append(PRUnichar('#'));
00800       }
00801 
00802       mInIndentString.Append(PRUnichar('.'));
00803 
00804     }
00805     else {
00806       static char bulletCharArray[] = "*o+#";
00807       PRUint32 index = mULCount > 0 ? (mULCount - 1) : 3;
00808       char bulletChar = bulletCharArray[index % 4];
00809       mInIndentString.Append(PRUnichar(bulletChar));
00810     }
00811     
00812     mInIndentString.Append(PRUnichar(' '));
00813   }
00814   else if (type == eHTMLTag_dl) {
00815     EnsureVerticalSpace(1);
00816   }
00817   else if (type == eHTMLTag_dt) {
00818     EnsureVerticalSpace(0);
00819   }
00820   else if (type == eHTMLTag_dd) {
00821     EnsureVerticalSpace(0);
00822     mIndent += kIndentSizeDD;
00823   }
00824   else if (type == eHTMLTag_span) {
00825     ++mSpanLevel;
00826   }
00827   else if (type == eHTMLTag_blockquote) {
00828     // Push
00829     PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
00830     if (isInCiteBlockquote) {
00831       EnsureVerticalSpace(0);
00832       mCiteQuoteLevel++;
00833     }
00834     else {
00835       EnsureVerticalSpace(1);
00836       mIndent += kTabSize; // Check for some maximum value?
00837     }
00838   }
00839 
00840   // Else make sure we'll separate block level tags,
00841   // even if we're about to leave, before doing any other formatting.
00842   else if (IsBlockLevel(aTag)) {
00843     EnsureVerticalSpace(0);
00844   }
00845 
00847   if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
00848     return NS_OK;
00849   }
00851   // The rest of this routine is formatted output stuff,
00852   // which we should skip if we're not formatted:
00854 
00855   // Push on stack
00856   PRBool currentNodeIsConverted = IsCurrentNodeConverted(aNode);
00857   PushBool(mCurrentNodeIsConverted, currentNodeIsConverted);
00858 
00859   if (type == eHTMLTag_h1 || type == eHTMLTag_h2 ||
00860       type == eHTMLTag_h3 || type == eHTMLTag_h4 ||
00861       type == eHTMLTag_h5 || type == eHTMLTag_h6)
00862   {
00863     EnsureVerticalSpace(2);
00864     if (mHeaderStrategy == 2) {  // numbered
00865       mIndent += kIndentSizeHeaders;
00866       // Caching
00867       PRInt32 level = HeaderLevel(type);
00868       // Increase counter for current level
00869       mHeaderCounter[level]++;
00870       // Reset all lower levels
00871       PRInt32 i;
00872 
00873       for (i = level + 1; i <= 6; i++) {
00874         mHeaderCounter[i] = 0;
00875       }
00876 
00877       // Construct numbers
00878       nsAutoString leadup;
00879       for (i = 1; i <= level; i++) {
00880         leadup.AppendInt(mHeaderCounter[i]);
00881         leadup.Append(PRUnichar('.'));
00882       }
00883       leadup.Append(PRUnichar(' '));
00884       Write(leadup);
00885     }
00886     else if (mHeaderStrategy == 1) { // indent increasingly
00887       mIndent += kIndentSizeHeaders;
00888       for (PRInt32 i = HeaderLevel(type); i > 1; i--) {
00889            // for h(x), run x-1 times
00890         mIndent += kIndentIncrementHeaders;
00891       }
00892     }
00893   }
00894   else if (type == eHTMLTag_a && !currentNodeIsConverted) {
00895     nsAutoString url;
00896     if (NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::href, url))
00897         && !url.IsEmpty()) {
00898       mURL = url;
00899     }
00900   }
00901   else if (type == eHTMLTag_q) {
00902     Write(NS_LITERAL_STRING("\""));
00903   }
00904   else if (type == eHTMLTag_sup && mStructs && !currentNodeIsConverted) {
00905     Write(NS_LITERAL_STRING("^"));
00906   }
00907   else if (type == eHTMLTag_sub && mStructs && !currentNodeIsConverted) { 
00908     Write(NS_LITERAL_STRING("_"));
00909   }
00910   else if (type == eHTMLTag_code && mStructs && !currentNodeIsConverted) {
00911     Write(NS_LITERAL_STRING("|"));
00912   }
00913   else if ((type == eHTMLTag_strong || type == eHTMLTag_b)
00914            && mStructs && !currentNodeIsConverted) {
00915     Write(NS_LITERAL_STRING("*"));
00916   }
00917   else if ((type == eHTMLTag_em || type == eHTMLTag_i)
00918            && mStructs && !currentNodeIsConverted) {
00919     Write(NS_LITERAL_STRING("/"));
00920   }
00921   else if (type == eHTMLTag_u && mStructs && !currentNodeIsConverted) {
00922     Write(NS_LITERAL_STRING("_"));
00923   }
00924 
00925   return NS_OK;
00926 }
00927 
00928 nsresult
00929 nsPlainTextSerializer::DoCloseContainer(PRInt32 aTag)
00930 {
00931   if (mFlags & nsIDocumentEncoder::OutputRaw) {
00932     // Raw means raw.  Don't even think about doing anything fancy
00933     // here like indenting, adding line breaks or any other
00934     // characters such as list item bullets, quote characters
00935     // around <q>, etc.  I mean it!  Don't make me smack you!
00936 
00937     return NS_OK;
00938   }
00939 
00940   if (mTagStackIndex > 0) {
00941     --mTagStackIndex;
00942   }
00943 
00944   if (mTagStackIndex >= mIgnoreAboveIndex) {
00945     if (mTagStackIndex == mIgnoreAboveIndex) {
00946       // We're dealing with the close tag whose matching
00947       // open tag had set the mIgnoreAboveIndex value.
00948       // Reset mIgnoreAboveIndex before discarding this tag.
00949       mIgnoreAboveIndex = (PRUint32)kNotFound;
00950     }
00951     return NS_OK;
00952   }
00953 
00954   eHTMLTags type = (eHTMLTags)aTag;
00955   // End current line if we're ending a block level tag
00956   if((type == eHTMLTag_body) || (type == eHTMLTag_html)) {
00957     // We want the output to end with a new line,
00958     // but in preformatted areas like text fields,
00959     // we can't emit newlines that weren't there.
00960     // So add the newline only in the case of formatted output.
00961     if (mFlags & nsIDocumentEncoder::OutputFormatted) {
00962       EnsureVerticalSpace(0);
00963     }
00964     else {
00965       FlushLine();
00966     }
00967     // We won't want to do anything with these in formatted mode either,
00968     // so just return now:
00969     return NS_OK;
00970   }
00971   else if (type == eHTMLTag_tr) {
00972     PopBool(mHasWrittenCellsForRow);
00973     // Should always end a line, but get no more whitespace
00974     if (mFloatingLines < 0)
00975       mFloatingLines = 0;
00976     mLineBreakDue = PR_TRUE;
00977   } 
00978   else if ((type == eHTMLTag_li) ||
00979            (type == eHTMLTag_dt)) {
00980     // Items that should always end a line, but get no more whitespace
00981     if (mFloatingLines < 0)
00982       mFloatingLines = 0;
00983     mLineBreakDue = PR_TRUE;
00984   } 
00985   else if (type == eHTMLTag_pre) {
00986     mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
00987     mLineBreakDue = PR_TRUE;
00988   }
00989   else if (type == eHTMLTag_ul) {
00990     FlushLine();
00991     mIndent -= kIndentSizeList;
00992     if (--mULCount + mOLStackIndex == 0) {
00993       mFloatingLines = 1;
00994       mLineBreakDue = PR_TRUE;
00995     }
00996   }
00997   else if (type == eHTMLTag_ol) {
00998     FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
00999     mIndent -= kIndentSizeList;
01000     mOLStackIndex--;
01001     if (mULCount + mOLStackIndex == 0) {
01002       mFloatingLines = 1;
01003       mLineBreakDue = PR_TRUE;
01004     }
01005   }  
01006   else if (type == eHTMLTag_dl) {
01007     mFloatingLines = 1;
01008     mLineBreakDue = PR_TRUE;
01009   }
01010   else if (type == eHTMLTag_dd) {
01011     FlushLine();
01012     mIndent -= kIndentSizeDD;
01013   }
01014   else if (type == eHTMLTag_span) {
01015     --mSpanLevel;
01016   }
01017   else if (type == eHTMLTag_div) {
01018     if (mFloatingLines < 0)
01019       mFloatingLines = 0;
01020     mLineBreakDue = PR_TRUE;
01021   }
01022   else if (type == eHTMLTag_blockquote) {
01023     FlushLine();    // Is this needed?
01024 
01025     // Pop
01026     PRBool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
01027 
01028     if (isInCiteBlockquote) {
01029       mCiteQuoteLevel--;
01030       mFloatingLines = 0;
01031       mHasWrittenCiteBlockquote = PR_TRUE;
01032     }
01033     else {
01034       mIndent -= kTabSize;
01035       mFloatingLines = 1;
01036     }
01037     mLineBreakDue = PR_TRUE;
01038   }
01039   else if (IsBlockLevel(aTag)
01040            && type != eHTMLTag_script
01041            && type != eHTMLTag_doctypeDecl
01042            && type != eHTMLTag_markupDecl) {
01043     // All other blocks get 1 vertical space after them
01044     // in formatted mode, otherwise 0.
01045     // This is hard. Sometimes 0 is a better number, but
01046     // how to know?
01047     if (mFlags & nsIDocumentEncoder::OutputFormatted)
01048       EnsureVerticalSpace(1);
01049     else {
01050       if (mFloatingLines < 0)
01051         mFloatingLines = 0;
01052       mLineBreakDue = PR_TRUE;
01053     }
01054   }
01055 
01057   if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
01058     return NS_OK;
01059   }
01061   // The rest of this routine is formatted output stuff,
01062   // which we should skip if we're not formatted:
01064 
01065   // Pop the currentConverted stack
01066   PRBool currentNodeIsConverted = PopBool(mCurrentNodeIsConverted);
01067   
01068   if (type == eHTMLTag_h1 || type == eHTMLTag_h2 ||
01069       type == eHTMLTag_h3 || type == eHTMLTag_h4 ||
01070       type == eHTMLTag_h5 || type == eHTMLTag_h6) {
01071     
01072     if (mHeaderStrategy) {  /*numbered or indent increasingly*/ 
01073       mIndent -= kIndentSizeHeaders;
01074     }
01075     if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
01076       for (PRInt32 i = HeaderLevel(type); i > 1; i--) {
01077            // for h(x), run x-1 times
01078         mIndent -= kIndentIncrementHeaders;
01079       }
01080     }
01081     EnsureVerticalSpace(1);
01082   }
01083   else if (type == eHTMLTag_a && !currentNodeIsConverted && !mURL.IsEmpty()) {
01084     nsAutoString temp; 
01085     temp.AssignLiteral(" <");
01086     temp += mURL;
01087     temp.Append(PRUnichar('>'));
01088     Write(temp);
01089     mURL.Truncate();
01090   }
01091   else if (type == eHTMLTag_q) {
01092     Write(NS_LITERAL_STRING("\""));
01093   }
01094   else if ((type == eHTMLTag_sup || type == eHTMLTag_sub) 
01095            && mStructs && !currentNodeIsConverted) {
01096     Write(kSpace);
01097   }
01098   else if (type == eHTMLTag_code && mStructs && !currentNodeIsConverted) {
01099     Write(NS_LITERAL_STRING("|"));
01100   }
01101   else if ((type == eHTMLTag_strong || type == eHTMLTag_b)
01102            && mStructs && !currentNodeIsConverted) {
01103     Write(NS_LITERAL_STRING("*"));
01104   }
01105   else if ((type == eHTMLTag_em || type == eHTMLTag_i)
01106            && mStructs && !currentNodeIsConverted) {
01107     Write(NS_LITERAL_STRING("/"));
01108   }
01109   else if (type == eHTMLTag_u && mStructs && !currentNodeIsConverted) {
01110     Write(NS_LITERAL_STRING("_"));
01111   }
01112 
01113   return NS_OK;
01114 }
01115 
01120 nsresult
01121 nsPlainTextSerializer::DoAddLeaf(const nsIParserNode *aNode, PRInt32 aTag, 
01122                                  const nsAString& aText)
01123 {
01124   // If we don't want any output, just return
01125   if (!DoOutput()) {
01126     return NS_OK;
01127   }
01128 
01129   if (aTag != eHTMLTag_whitespace && aTag != eHTMLTag_newline) {
01130     // Make sure to reset this, since it's no longer true.
01131     mHasWrittenCiteBlockquote = PR_FALSE;
01132   }
01133   
01134   if (mLineBreakDue)
01135     EnsureVerticalSpace(mFloatingLines);
01136 
01137   eHTMLTags type = (eHTMLTags)aTag;
01138   
01139   if ((mTagStackIndex > 1 &&
01140        mTagStack[mTagStackIndex-2] == eHTMLTag_select) ||
01141       (mTagStackIndex > 0 &&
01142         mTagStack[mTagStackIndex-1] == eHTMLTag_select)) {
01143     // Don't output the contents of SELECT elements;
01144     // Might be nice, eventually, to output just the selected element.
01145     // Read more in bug 31994.
01146     return NS_OK;
01147   }
01148   else if (mTagStackIndex > 0 && mTagStack[mTagStackIndex-1] == eHTMLTag_script) {
01149     // Don't output the contents of <script> tags;
01150     return NS_OK;
01151   }
01152   else if (type == eHTMLTag_text) {
01153     /* Check, if we are in a link (symbolized with mURL containing the URL)
01154        and the text is equal to the URL. In that case we don't want to output
01155        the URL twice so we scrap the text in mURL. */
01156     if (!mURL.IsEmpty() && mURL.Equals(aText)) {
01157       mURL.Truncate();
01158     }
01159     Write(aText);
01160   }
01161   else if (type == eHTMLTag_entity) {
01162     nsIParserService* parserService =
01163       nsContentUtils::GetParserServiceWeakRef();
01164     if (parserService) {
01165       nsAutoString str(aText);
01166       PRInt32 entity;
01167       parserService->HTMLConvertEntityToUnicode(str, &entity);
01168       if (entity == -1 && 
01169           !str.IsEmpty() &&
01170           str.First() == (PRUnichar) '#') {
01171         PRInt32 err = 0;
01172         entity = str.ToInteger(&err, kAutoDetect);  // NCR
01173       }
01174       nsAutoString temp;
01175       temp.Append(PRUnichar(entity));
01176       Write(temp);
01177     }
01178   }
01179   else if (type == eHTMLTag_br) {
01180     // Another egregious editor workaround, see bug 38194:
01181     // ignore the bogus br tags that the editor sticks here and there.
01182     nsAutoString typeAttr;
01183     if (NS_FAILED(GetAttributeValue(aNode, nsHTMLAtoms::type, typeAttr))
01184         || !typeAttr.EqualsLiteral("_moz")) {
01185       EnsureVerticalSpace(mEmptyLines+1);
01186     }
01187   }
01188   else if (type == eHTMLTag_whitespace) {
01189     // The only times we want to pass along whitespace from the original
01190     // html source are if we're forced into preformatted mode via flags,
01191     // or if we're prettyprinting and we're inside a <pre>.
01192     // Otherwise, either we're collapsing to minimal text, or we're
01193     // prettyprinting to mimic the html format, and in neither case
01194     // does the formatting of the html source help us.
01195     // One exception: at the very beginning of a selection,
01196     // we want to preserve whitespace.
01197     if (mFlags & nsIDocumentEncoder::OutputPreformatted ||
01198         (mPreFormatted && !mWrapColumn) ||
01199         IsInPre()) {
01200       Write(aText);
01201     }
01202     else if(!mInWhitespace ||
01203             (!mStartedOutput
01204              && mFlags | nsIDocumentEncoder::OutputSelectionOnly)) {
01205       mInWhitespace = PR_FALSE;
01206       Write(kSpace);
01207       mInWhitespace = PR_TRUE;
01208     }
01209   }
01210   else if (type == eHTMLTag_newline) {
01211     if (mFlags & nsIDocumentEncoder::OutputPreformatted ||
01212         (mPreFormatted && !mWrapColumn) ||
01213         IsInPre()) {
01214       EnsureVerticalSpace(mEmptyLines+1);
01215     }
01216     else {
01217       Write(kSpace);
01218     }
01219   }
01220   else if (type == eHTMLTag_hr &&
01221            (mFlags & nsIDocumentEncoder::OutputFormatted)) {
01222     EnsureVerticalSpace(0);
01223 
01224     // Make a line of dashes as wide as the wrap width
01225     // XXX honoring percentage would be nice
01226     nsAutoString line;
01227     PRUint32 width = (mWrapColumn > 0 ? mWrapColumn : 25);
01228     while (line.Length() < width) {
01229       line.Append(PRUnichar('-'));
01230     }
01231     Write(line);
01232 
01233     EnsureVerticalSpace(0);
01234   }
01235   else if (type == eHTMLTag_img) {
01236     /* Output (in decreasing order of preference)
01237        alt, title or nothing */
01238     // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
01239     nsAutoString imageDescription;
01240     if (NS_SUCCEEDED(GetAttributeValue(aNode,
01241                                        nsHTMLAtoms::alt,
01242                                        imageDescription))) {
01243       // If the alt attribute has an empty value (|alt=""|), output nothing
01244     }
01245     else if (NS_SUCCEEDED(GetAttributeValue(aNode,
01246                                             nsHTMLAtoms::title,
01247                                             imageDescription))
01248              && !imageDescription.IsEmpty()) {
01249       imageDescription = NS_LITERAL_STRING(" [") +
01250                          imageDescription +
01251                          NS_LITERAL_STRING("] ");
01252     }
01253    
01254     Write(imageDescription);
01255   }
01256 
01257 
01258   return NS_OK;
01259 }
01260 
01268 void
01269 nsPlainTextSerializer::EnsureVerticalSpace(PRInt32 noOfRows)
01270 {
01271   // If we have something in the indent we probably want to output
01272   // it and it's not included in the count for empty lines so we don't
01273   // realize that we should start a new line.
01274   if(noOfRows >= 0 && !mInIndentString.IsEmpty()) {
01275     EndLine(PR_FALSE);
01276   }
01277 
01278   while(mEmptyLines < noOfRows) {
01279     EndLine(PR_FALSE);
01280   }
01281   mLineBreakDue = PR_FALSE;
01282   mFloatingLines = -1;
01283 }
01284 
01293 void
01294 nsPlainTextSerializer::FlushLine()
01295 {
01296   if(!mCurrentLine.IsEmpty()) {
01297     if(mAtFirstColumn) {
01298       OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
01299     }
01300 
01301     Output(mCurrentLine);
01302     mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
01303     mCurrentLine.Truncate();
01304     mCurrentLineWidth = 0;
01305   }
01306 }
01307 
01314 void 
01315 nsPlainTextSerializer::Output(nsString& aString)
01316 {
01317   if (!aString.IsEmpty()) {
01318     mStartedOutput = PR_TRUE;
01319   }
01320 
01321   // First, replace all nbsp characters with spaces,
01322   // which the unicode encoder won't do for us.
01323   static PRUnichar nbsp = 160;
01324   static PRUnichar space = ' ';
01325   aString.ReplaceChar(nbsp, space);
01326 
01327   mOutputString->Append(aString);
01328 }
01329 
01336 void
01337 nsPlainTextSerializer::AddToLine(const PRUnichar * aLineFragment, 
01338                                  PRInt32 aLineFragmentLength)
01339 {
01340   PRUint32 prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
01341   
01342   if (mLineBreakDue)
01343     EnsureVerticalSpace(mFloatingLines);
01344 
01345   PRInt32 linelength = mCurrentLine.Length();
01346   if(0 == linelength) {
01347     if(0 == aLineFragmentLength) {
01348       // Nothing at all. Are you kidding me?
01349       return;
01350     }
01351 
01352     if(mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
01353       if(
01354          (
01355           '>' == aLineFragment[0] ||
01356           ' ' == aLineFragment[0] ||
01357           !nsCRT::strncmp(aLineFragment, NS_LITERAL_STRING("From ").get(), 5)
01358           )
01359          && mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
01360          )
01361         {
01362           // Space stuffing a la RFC 2646 (format=flowed).
01363           mCurrentLine.Append(PRUnichar(' '));
01364           
01365           if(MayWrap()) {
01366             mCurrentLineWidth += GetUnicharWidth(' ');
01367 #ifdef DEBUG_wrapping
01368             NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
01369                                                mCurrentLine.Length()) ==
01370                          (PRInt32)mCurrentLineWidth,
01371                          "mCurrentLineWidth and reality out of sync!");
01372 #endif
01373           }
01374         }
01375     }
01376     mEmptyLines=-1;
01377   }
01378     
01379   mCurrentLine.Append(aLineFragment, aLineFragmentLength);
01380   if(MayWrap()) {
01381     mCurrentLineWidth += GetUnicharStringWidth(aLineFragment,
01382                                                aLineFragmentLength);
01383 #ifdef DEBUG_wrapping
01384     NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
01385                                        mCurrentLine.Length()) ==
01386                  (PRInt32)mCurrentLineWidth,
01387                  "mCurrentLineWidth and reality out of sync!");
01388 #endif
01389   }
01390 
01391   linelength = mCurrentLine.Length();
01392 
01393   //  Wrap?
01394   if(MayWrap())
01395   {
01396 #ifdef DEBUG_wrapping
01397     NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
01398                                   mCurrentLine.Length()) ==
01399                  (PRInt32)mCurrentLineWidth,
01400                  "mCurrentLineWidth and reality out of sync!");
01401 #endif
01402     // Yes, wrap!
01403     // The "+4" is to avoid wrap lines that only would be a couple
01404     // of letters too long. We give this bonus only if the
01405     // wrapcolumn is more than 20.
01406     PRUint32 bonuswidth = (mWrapColumn > 20) ? 4 : 0;
01407 
01408     // XXX: Should calculate prefixwidth with GetUnicharStringWidth
01409     while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {
01410       // Must wrap. Let's find a good place to do that.
01411       nsresult result = NS_OK;
01412       
01413       // We go from the end removing one letter at a time until
01414       // we have a reasonable width
01415       PRInt32 goodSpace = mCurrentLine.Length();
01416       PRUint32 width = mCurrentLineWidth;
01417       while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
01418         goodSpace--;
01419         width -= GetUnicharWidth(mCurrentLine[goodSpace]);
01420       }
01421 
01422       goodSpace++;
01423       
01424       PRBool oNeedMoreText;
01425       if (nsnull != mLineBreaker) {
01426         result = mLineBreaker->Prev(mCurrentLine.get(), 
01427                                     mCurrentLine.Length(), goodSpace,
01428                                     (PRUint32 *) &goodSpace, &oNeedMoreText);
01429         if (oNeedMoreText) {
01430           goodSpace = -1;
01431         }
01432         else if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
01433           --goodSpace;    // adjust the position since line breaker returns a position next to space
01434         }
01435       }
01436       // fallback if the line breaker is unavailable or failed
01437       if (nsnull == mLineBreaker || NS_FAILED(result)) {
01438         goodSpace = mWrapColumn-prefixwidth;
01439         while (goodSpace >= 0 &&
01440                !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
01441           goodSpace--;
01442         }
01443       }
01444       
01445       nsAutoString restOfLine;
01446       if (goodSpace < 0) {
01447         // If we don't found a good place to break, accept long line and
01448         // try to find another place to break
01449         goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
01450         result = NS_OK;
01451         if (nsnull != mLineBreaker) {
01452           result = mLineBreaker->Next(mCurrentLine.get(), 
01453                                       mCurrentLine.Length(), goodSpace,
01454                                       (PRUint32 *) &goodSpace, &oNeedMoreText);
01455         }
01456         // fallback if the line breaker is unavailable or failed
01457         if (nsnull == mLineBreaker || NS_FAILED(result)) {
01458           goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
01459           while (goodSpace < linelength &&
01460                  !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
01461             goodSpace++;
01462           }
01463         }
01464       }
01465       
01466       if((goodSpace < linelength) && (goodSpace > 0)) {
01467         // Found a place to break
01468 
01469         // -1 (trim a char at the break position)
01470         // only if the line break was a space.
01471         if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
01472           mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
01473         }
01474         else {
01475           mCurrentLine.Right(restOfLine, linelength-goodSpace);
01476         }
01477         mCurrentLine.Truncate(goodSpace); 
01478         EndLine(PR_TRUE);
01479         mCurrentLine.Truncate();
01480         // Space stuff new line?
01481         if(mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
01482           if(
01483               !restOfLine.IsEmpty()
01484               &&
01485               (
01486                 restOfLine[0] == '>' ||
01487                 restOfLine[0] == ' ' ||
01488                 StringBeginsWith(restOfLine, NS_LITERAL_STRING("From "))
01489               )
01490               && mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
01491             )
01492           {
01493             // Space stuffing a la RFC 2646 (format=flowed).
01494             mCurrentLine.Append(PRUnichar(' '));
01495             //XXX doesn't seem to work correctly for ' '
01496           }
01497         }
01498         mCurrentLine.Append(restOfLine);
01499         mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(),
01500                                                   mCurrentLine.Length());
01501         linelength = mCurrentLine.Length();
01502         mEmptyLines = -1;
01503       } 
01504       else {
01505         // Nothing to do. Hopefully we get more data later
01506         // to use for a place to break line
01507         break;
01508       }
01509     }
01510   } 
01511   else {
01512     // No wrapping.
01513   }
01514 }
01515 
01522 void
01523 nsPlainTextSerializer::EndLine(PRBool aSoftlinebreak)
01524 {
01525   PRUint32 currentlinelength = mCurrentLine.Length();
01526 
01527   if(aSoftlinebreak && 0 == currentlinelength) {
01528     // No meaning
01529     return;
01530   }
01531   
01532   // In non-preformatted mode, remove SPACE from the end
01533   // of the line, unless we got "-- " in a format=flowed
01534   // output. "-- " is the sig delimiter by convention and
01535   // shouldn't be touched even in format=flowed
01536   // (see RFC 2646). We only check for "-- " when it's a hard line
01537   // break for obvious reasons.
01538   if(!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
01539      (aSoftlinebreak || !mCurrentLine.EqualsLiteral("-- "))) {
01540     // Remove SPACE:s from the end of the line.
01541     while(currentlinelength > 0 &&
01542           mCurrentLine[currentlinelength-1] == ' ') {
01543       --currentlinelength;
01544     }
01545     mCurrentLine.SetLength(currentlinelength);
01546   }
01547   
01548   if(aSoftlinebreak &&
01549      (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
01550      (mIndent == 0)) {
01551     // Add the soft part of the soft linebreak (RFC 2646 4.1)
01552     // We only do this when there is no indentation since format=flowed
01553     // lines and indentation doesn't work well together.
01554     mCurrentLine.Append(PRUnichar(' '));
01555   }
01556 
01557   if(aSoftlinebreak) {
01558     mEmptyLines=0;
01559   } 
01560   else {
01561     // Hard break
01562     if(!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
01563       mEmptyLines=-1;
01564     }
01565 
01566     mEmptyLines++;
01567   }
01568 
01569   if(mAtFirstColumn) {
01570     // If we don't have anything "real" to output we have to
01571     // make sure the indent doesn't end in a space since that
01572     // would trick a format=flowed-aware receiver.
01573     PRBool stripTrailingSpaces = mCurrentLine.IsEmpty();
01574     OutputQuotesAndIndent(stripTrailingSpaces);
01575   }
01576 
01577   mCurrentLine.Append(mLineBreak);
01578   Output(mCurrentLine);
01579   mCurrentLine.Truncate();
01580   mCurrentLineWidth = 0;
01581   mAtFirstColumn=PR_TRUE;
01582   mInWhitespace=PR_TRUE;
01583   mLineBreakDue = PR_FALSE;
01584   mFloatingLines = -1;
01585 }
01586 
01587 
01593 void
01594 nsPlainTextSerializer::OutputQuotesAndIndent(PRBool stripTrailingSpaces /* = PR_FALSE */)
01595 {
01596   nsAutoString stringToOutput;
01597   
01598   // Put the mail quote "> " chars in, if appropriate:
01599   if (mCiteQuoteLevel > 0) {
01600     nsAutoString quotes;
01601     for(int i=0; i < mCiteQuoteLevel; i++) {
01602       quotes.Append(PRUnichar('>'));
01603     }
01604     if (!mCurrentLine.IsEmpty()) {
01605       /* Better don't output a space here, if the line is empty,
01606          in case a recieving f=f-aware UA thinks, this were a flowed line,
01607          which it isn't - it's just empty.
01608          (Flowed lines may be joined with the following one,
01609          so the empty line may be lost completely.) */
01610       quotes.Append(PRUnichar(' '));
01611     }
01612     stringToOutput = quotes;
01613     mAtFirstColumn = PR_FALSE;
01614   }
01615   
01616   // Indent if necessary
01617   PRInt32 indentwidth = mIndent - mInIndentString.Length();
01618   if (indentwidth > 0
01619       && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
01620       // Don't make empty lines look flowed
01621       ) {
01622     nsAutoString spaces;
01623     for (int i=0; i < indentwidth; ++i)
01624       spaces.Append(PRUnichar(' '));
01625     stringToOutput += spaces;
01626     mAtFirstColumn = PR_FALSE;
01627   }
01628   
01629   if(!mInIndentString.IsEmpty()) {
01630     stringToOutput += mInIndentString;
01631     mAtFirstColumn = PR_FALSE;
01632     mInIndentString.Truncate();
01633   }
01634 
01635   if(stripTrailingSpaces) {
01636     PRInt32 lineLength = stringToOutput.Length();
01637     while(lineLength > 0 &&
01638           ' ' == stringToOutput[lineLength-1]) {
01639       --lineLength;
01640     }
01641     stringToOutput.SetLength(lineLength);
01642   }
01643 
01644   if(!stringToOutput.IsEmpty()) {
01645     Output(stringToOutput);
01646   }
01647     
01648 }
01649 
01655 void
01656 nsPlainTextSerializer::Write(const nsAString& aString)
01657 {
01658 #ifdef DEBUG_wrapping
01659   printf("Write(%s): wrap col = %d\n",
01660          NS_ConvertUCS2toUTF8(aString).get(), mWrapColumn);
01661 #endif
01662 
01663   PRInt32 bol = 0;
01664   PRInt32 newline;
01665   
01666   PRInt32 totLen = aString.Length();
01667 
01668   // If the string is empty, do nothing:
01669   if (totLen <= 0) return;
01670 
01671   // We have two major codepaths here. One that does preformatted text and one
01672   // that does normal formatted text. The one for preformatted text calls
01673   // Output directly while the other code path goes through AddToLine.
01674   if ((mPreFormatted && !mWrapColumn) || IsInPre()
01675       || ((((!mQuotesPreformatted && mSpanLevel > 0) || mDontWrapAnyQuotes))
01676           && mEmptyLines >= 0 && aString.First() == PRUnichar('>'))) {
01677     // No intelligent wrapping.
01678 
01679     // This mustn't be mixed with intelligent wrapping without clearing
01680     // the mCurrentLine buffer before!!!
01681     NS_WARN_IF_FALSE(mCurrentLine.IsEmpty(),
01682                  "Mixed wrapping data and nonwrapping data on the same line");
01683     if (!mCurrentLine.IsEmpty()) {
01684       FlushLine();
01685     }
01686 
01687     // Put the mail quote "> " chars in, if appropriate.
01688     // Have to put it in before every line.
01689     while(bol<totLen) {
01690       PRBool outputQuotes = mAtFirstColumn;
01691       PRBool atFirstColumn = mAtFirstColumn;
01692       PRBool outputLineBreak = PR_FALSE;
01693 
01694       // Find one of '\n' or '\r' using iterators since nsAString
01695       // doesn't have the old FindCharInSet function.
01696       nsAString::const_iterator iter;           aString.BeginReading(iter);
01697       nsAString::const_iterator done_searching; aString.EndReading(done_searching);
01698       iter.advance(bol); 
01699       PRInt32 new_newline = bol;
01700       newline = kNotFound;
01701       while(iter != done_searching) {
01702         if('\n' == *iter || '\r' == *iter) {
01703           newline = new_newline;
01704           break;
01705         }
01706         ++new_newline;
01707         ++iter;
01708       }
01709 
01710       // Done searching
01711       if(newline == kNotFound) {
01712         // No new lines.
01713         nsAutoString stringpart(Substring(aString, bol, totLen - bol));
01714         if(!stringpart.IsEmpty()) {
01715           PRUnichar lastchar = stringpart[stringpart.Length()-1];
01716           if((lastchar == '\t') || (lastchar == ' ') ||
01717              (lastchar == '\r') ||(lastchar == '\n')) {
01718             mInWhitespace = PR_TRUE;
01719           } 
01720           else {
01721             mInWhitespace = PR_FALSE;
01722           }
01723         }
01724         mCurrentLine.Assign(stringpart);
01725         mEmptyLines=-1;
01726         atFirstColumn = mAtFirstColumn && (totLen-bol)==0;
01727         bol = totLen;
01728       } 
01729       else {
01730         // There is a newline
01731         nsAutoString stringpart(Substring(aString, bol, newline-bol));
01732         mInWhitespace = PR_TRUE;
01733         mCurrentLine.Assign(stringpart);
01734         outputLineBreak = PR_TRUE;
01735         mEmptyLines=0;
01736         atFirstColumn = PR_TRUE;
01737         bol = newline+1;
01738         if('\r' == *iter && bol < totLen && '\n' == *++iter) {
01739           // There was a CRLF in the input. This used to be illegal and
01740           // stripped by the parser. Apparently not anymore. Let's skip
01741           // over the LF.
01742           bol++;
01743         }
01744       }
01745 
01746       if(outputQuotes) {
01747         // Note: this call messes with mAtFirstColumn
01748         OutputQuotesAndIndent();
01749       }
01750 
01751       Output(mCurrentLine);
01752       if (outputLineBreak) {
01753         Output(mLineBreak);
01754       }
01755       mAtFirstColumn = atFirstColumn;
01756     }
01757 
01758     // Reset mCurrentLine.
01759     mCurrentLine.Truncate();
01760 
01761 #ifdef DEBUG_wrapping
01762     printf("No wrapping: newline is %d, totLen is %d\n",
01763            newline, totLen);
01764 #endif
01765     return;
01766   }
01767 
01768   // XXX Copy necessary to use nsString methods and gain
01769   // access to underlying buffer
01770   nsAutoString str(aString);
01771 
01772   // Intelligent handling of text
01773   // If needed, strip out all "end of lines"
01774   // and multiple whitespace between words
01775   PRInt32 nextpos;
01776   nsAutoString tempstr;
01777   const PRUnichar * offsetIntoBuffer = nsnull;
01778   
01779   while (bol < totLen) {    // Loop over lines
01780     // Find a place where we may have to do whitespace compression
01781     nextpos = str.FindCharInSet(" \t\n\r", bol);
01782 #ifdef DEBUG_wrapping
01783     nsAutoString remaining;
01784     str.Right(remaining, totLen - bol);
01785     foo = ToNewCString(remaining);
01786     //    printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
01787     //           bol, nextpos, totLen, foo);
01788     nsMemory::Free(foo);
01789 #endif
01790 
01791     if(nextpos == kNotFound) {
01792       // The rest of the string
01793       offsetIntoBuffer = str.get() + bol;
01794       AddToLine(offsetIntoBuffer, totLen-bol);
01795       bol=totLen;
01796       mInWhitespace=PR_FALSE;
01797     } 
01798     else {
01799       // There's still whitespace left in the string
01800       if (nextpos != 0 && (nextpos + 1) < totLen) {
01801         offsetIntoBuffer = str.get() + nextpos;
01802         // skip '\n' if it is between CJ chars
01803         if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
01804           offsetIntoBuffer = str.get() + bol;
01805           AddToLine(offsetIntoBuffer, nextpos-bol);
01806           bol = nextpos + 1;
01807           continue;
01808         }
01809       }
01810       // If we're already in whitespace and not preformatted, just skip it:
01811       if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
01812           !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
01813         // Skip whitespace
01814         bol++;
01815         continue;
01816       }
01817 
01818       if(nextpos == bol) {
01819         // Note that we are in whitespace.
01820         mInWhitespace = PR_TRUE;
01821         offsetIntoBuffer = str.get() + nextpos;
01822         AddToLine(offsetIntoBuffer, 1);
01823         bol++;
01824         continue;
01825       }
01826       
01827       mInWhitespace = PR_TRUE;
01828       
01829       offsetIntoBuffer = str.get() + bol;
01830       if(mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
01831         // Preserve the real whitespace character
01832         nextpos++;
01833         AddToLine(offsetIntoBuffer, nextpos-bol);
01834         bol = nextpos;
01835       } 
01836       else {
01837         // Replace the whitespace with a space
01838         AddToLine(offsetIntoBuffer, nextpos-bol);
01839         AddToLine(kSpace.get(),1);
01840         bol = nextpos + 1; // Let's eat the whitespace
01841       }
01842     }
01843   } // Continue looping over the string
01844 }
01845 
01846 
01851 nsresult
01852 nsPlainTextSerializer::GetAttributeValue(const nsIParserNode* aNode,
01853                                          nsIAtom* aName,
01854                                          nsString& aValueRet)
01855 {
01856   if (mContent) {
01857     if (NS_CONTENT_ATTR_NOT_THERE != mContent->GetAttr(kNameSpaceID_None,
01858                                                        aName, aValueRet)) {
01859       return NS_OK;
01860     }
01861   }
01862   else if (aNode) {
01863     nsAutoString name; 
01864     aName->ToString(name);
01865 
01866     PRInt32 count = aNode->GetAttributeCount();
01867     for (PRInt32 i=0;i<count;i++) {
01868       const nsAString& key = aNode->GetKeyAt(i);
01869       if (key.Equals(name, nsCaseInsensitiveStringComparator())) {
01870         aValueRet = aNode->GetValueAt(i);
01871         return NS_OK;
01872       }
01873     }
01874   }
01875 
01876   return NS_ERROR_NOT_AVAILABLE;
01877 }
01878 
01883 PRBool 
01884 nsPlainTextSerializer::IsCurrentNodeConverted(const nsIParserNode* aNode)
01885 {
01886   nsAutoString value;
01887   nsresult rv = GetAttributeValue(aNode, nsHTMLAtoms::kClass, value);
01888   return (NS_SUCCEEDED(rv) &&
01889           (value.EqualsIgnoreCase("moz-txt", 7) ||
01890            value.EqualsIgnoreCase("\"moz-txt", 8)));
01891 }
01892 
01893 
01894 // static
01895 PRInt32
01896 nsPlainTextSerializer::GetIdForContent(nsIContent* aContent)
01897 {
01898   if (!aContent->IsContentOfType(nsIContent::eHTML)) {
01899     return eHTMLTag_unknown;
01900   }
01901 
01902   nsIParserService* parserService = nsContentUtils::GetParserServiceWeakRef();
01903 
01904   return parserService ? parserService->HTMLAtomTagToId(aContent->Tag()) :
01905                          eHTMLTag_unknown;
01906 }
01907 
01912 PRBool 
01913 nsPlainTextSerializer::IsBlockLevel(PRInt32 aId)
01914 {
01915   PRBool isBlock = PR_FALSE;
01916 
01917   nsIParserService* parserService = nsContentUtils::GetParserServiceWeakRef();
01918   if (parserService) {
01919     parserService->IsBlock(aId, isBlock);
01920   }
01921 
01922   return isBlock;
01923 }
01924 
01928 PRBool 
01929 nsPlainTextSerializer::IsContainer(PRInt32 aId)
01930 {
01931   PRBool isContainer = PR_FALSE;
01932 
01933   nsIParserService* parserService = nsContentUtils::GetParserServiceWeakRef();
01934   if (parserService) {
01935     parserService->IsContainer(aId, isContainer);
01936   }
01937 
01938   return isContainer;
01939 }
01940 
01948 PRBool
01949 nsPlainTextSerializer::IsInPre()
01950 {
01951   PRInt32 i = mTagStackIndex;
01952   while(i > 0) {
01953     if(mTagStack[i-1] == eHTMLTag_pre)
01954       return PR_TRUE;
01955     if(IsBlockLevel(mTagStack[i-1])) {
01956       // We assume that every other block overrides a <pre>
01957       return PR_FALSE;
01958     }
01959     --i;
01960   }
01961 
01962   // Not a <pre> in the whole stack
01963   return PR_FALSE;
01964 }
01965 
01970 PRBool
01971 nsPlainTextSerializer::IsInOL()
01972 {
01973   PRInt32 i = mTagStackIndex;
01974   while(--i >= 0) {
01975     if(mTagStack[i] == eHTMLTag_ol)
01976       return PR_TRUE;
01977     if (mTagStack[i] == eHTMLTag_ul) {
01978       // If a UL is reached first, LI belongs the UL nested in OL.
01979       return PR_FALSE;
01980     }
01981   }
01982   // We may reach here for orphan LI's.
01983   return PR_FALSE;
01984 }
01985 
01986 /*
01987   @return 0 = no header, 1 = h1, ..., 6 = h6
01988 */
01989 PRInt32 HeaderLevel(eHTMLTags aTag)
01990 {
01991   PRInt32 result;
01992   switch (aTag)
01993   {
01994     case eHTMLTag_h1:
01995       result = 1; break;
01996     case eHTMLTag_h2:
01997       result = 2; break;
01998     case eHTMLTag_h3:
01999       result = 3; break;
02000     case eHTMLTag_h4:
02001       result = 4; break;
02002     case eHTMLTag_h5:
02003       result = 5; break;
02004     case eHTMLTag_h6:
02005       result = 6; break;
02006     default:
02007       result = 0; break;
02008   }
02009   return result;
02010 }
02011 
02012 
02013 /*
02014  * This is an implementation of GetUnicharWidth() and
02015  * GetUnicharStringWidth() as defined in
02016  * "The Single UNIX Specification, Version 2, The Open Group, 1997"
02017  * <http://www.UNIX-systems.org/online.html>
02018  *
02019  * Markus Kuhn -- 2000-02-08 -- public domain
02020  *
02021  * Minor alterations to fit Mozilla's data types by Daniel Bratell
02022  */
02023 
02024 /* These functions define the column width of an ISO 10646 character
02025  * as follows:
02026  *
02027  *    - The null character (U+0000) has a column width of 0.
02028  *
02029  *    - Other C0/C1 control characters and DEL will lead to a return
02030  *      value of -1.
02031  *
02032  *    - Non-spacing and enclosing combining characters (general
02033  *      category code Mn or Me in the Unicode database) have a
02034  *      column width of 0.
02035  *
02036  *    - Spacing characters in the East Asian Wide (W) or East Asian
02037  *      FullWidth (F) category as defined in Unicode Technical
02038  *      Report #11 have a column width of 2.
02039  *
02040  *    - All remaining characters (including all printable
02041  *      ISO 8859-1 and WGL4 characters, Unicode control characters,
02042  *      etc.) have a column width of 1.
02043  *
02044  * This implementation assumes that wchar_t characters are encoded
02045  * in ISO 10646.
02046  */
02047 
02048 PRInt32 GetUnicharWidth(PRUnichar ucs)
02049 {
02050   /* sorted list of non-overlapping intervals of non-spacing characters */
02051   static const struct interval {
02052     PRUint16 first;
02053     PRUint16 last;
02054   } combining[] = {
02055     { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
02056     { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
02057     { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
02058     { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
02059     { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
02060     { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
02061     { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
02062     { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
02063     { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
02064     { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
02065     { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
02066     { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
02067     { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
02068     { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
02069     { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
02070     { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
02071     { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
02072     { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
02073     { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
02074     { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
02075     { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
02076     { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
02077     { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
02078     { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
02079     { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
02080     { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
02081     { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
02082     { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
02083     { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
02084     { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
02085     { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
02086   };
02087   PRInt32 min = 0;
02088   PRInt32 max = sizeof(combining) / sizeof(struct interval) - 1;
02089   PRInt32 mid;
02090 
02091   /* test for 8-bit control characters */
02092   if (ucs == 0)
02093     return 0;
02094   if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
02095     return -1;
02096 
02097   /* first quick check for Latin-1 etc. characters */
02098   if (ucs < combining[0].first)
02099     return 1;
02100 
02101   /* binary search in table of non-spacing characters */
02102   while (max >= min) {
02103     mid = (min + max) / 2;
02104     if (combining[mid].last < ucs)
02105       min = mid + 1;
02106     else if (combining[mid].first > ucs)
02107       max = mid - 1;
02108     else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
02109       return 0;
02110   }
02111 
02112   /* if we arrive here, ucs is not a combining or C0/C1 control character */
02113 
02114   /* fast test for majority of non-wide scripts */
02115   if (ucs < 0x1100)
02116     return 1;
02117 
02118   return 1 +
02119     ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
02120      (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
02121       ucs != 0x303f) ||                  /* CJK ... Yi */
02122      (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
02123      (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
02124      (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
02125      (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
02126      (ucs >= 0xffe0 && ucs <= 0xffe6));
02127 }
02128 
02129 
02130 PRInt32 GetUnicharStringWidth(const PRUnichar* pwcs, PRInt32 n)
02131 {
02132   PRInt32 w, width = 0;
02133 
02134   for (;*pwcs && n-- > 0; pwcs++)
02135     if ((w = GetUnicharWidth(*pwcs)) < 0)
02136       ++width; // Taking 1 as the width of non-printable character, for bug# 94475.
02137     else
02138       width += w;
02139 
02140   return width;
02141 }
02142