Back to index

lightning-sunbird  0.9+nobinonly
nsHTMLContentSerializer.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include "nsHTMLContentSerializer.h"
00039 
00040 #include "nsIDOMElement.h"
00041 #include "nsIDOMText.h"
00042 #include "nsIContent.h"
00043 #include "nsIDocument.h"
00044 #include "nsIDOMDocument.h"
00045 #include "nsINameSpaceManager.h"
00046 #include "nsString.h"
00047 #include "nsUnicharUtils.h"
00048 #include "nsXPIDLString.h"
00049 #include "nsIServiceManager.h"
00050 #include "nsIDocumentEncoder.h"
00051 #include "nsLayoutAtoms.h"
00052 #include "nsHTMLAtoms.h"
00053 #include "nsIURI.h"
00054 #include "nsNetUtil.h"
00055 #include "nsEscape.h"
00056 #include "nsITextToSubURI.h"
00057 #include "nsCRT.h"
00058 #include "nsIParserService.h"
00059 #include "nsContentUtils.h"
00060 #include "nsILineBreakerFactory.h"
00061 #include "nsLWBrkCIID.h"
00062 
00063 #define kIndentStr NS_LITERAL_STRING("  ")
00064 #define kLessThan NS_LITERAL_STRING("<")
00065 #define kGreaterThan NS_LITERAL_STRING(">")
00066 #define kEndTag NS_LITERAL_STRING("</")
00067 
00068 static const char kMozStr[] = "moz";
00069 static NS_DEFINE_CID(kLWBrkCID, NS_LWBRK_CID);
00070 
00071 static const PRInt32 kLongLineLen = 128;
00072 
00073 nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer)
00074 {
00075   nsHTMLContentSerializer* it = new nsHTMLContentSerializer();
00076   if (!it) {
00077     return NS_ERROR_OUT_OF_MEMORY;
00078   }
00079 
00080   return CallQueryInterface(it, aSerializer);
00081 }
00082 
00083 nsHTMLContentSerializer::nsHTMLContentSerializer()
00084 : mIndent(0),
00085   mColPos(0),
00086   mInBody(PR_FALSE),
00087   mAddSpace(PR_FALSE),
00088   mMayIgnoreLineBreakSequence(PR_FALSE),
00089   mInCDATA(PR_FALSE),
00090   mNeedLineBreaker(PR_TRUE)
00091 {
00092 }
00093 
00094 nsHTMLContentSerializer::~nsHTMLContentSerializer()
00095 {
00096   NS_ASSERTION(mOLStateStack.Count() == 0, "Expected OL State stack to be empty");
00097   if (mOLStateStack.Count() > 0){
00098     for (PRInt32 i = 0; i < mOLStateStack.Count(); i++){
00099       olState* state = (olState*)mOLStateStack[i];
00100       delete state;
00101       mOLStateStack.RemoveElementAt(i);
00102     }
00103   }
00104 }
00105 
00106 NS_IMETHODIMP 
00107 nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
00108                               const char* aCharSet, PRBool aIsCopying)
00109 {
00110   mFlags = aFlags;
00111   if (!aWrapColumn) {
00112     mMaxColumn = 72;
00113   }
00114   else {
00115     mMaxColumn = aWrapColumn;
00116   }
00117 
00118   mIsCopying = aIsCopying;
00119   mIsFirstChildOfOL = PR_FALSE;
00120   mDoFormat = (mFlags & nsIDocumentEncoder::OutputFormatted) ? PR_TRUE
00121                                                              : PR_FALSE;
00122   mBodyOnly = (mFlags & nsIDocumentEncoder::OutputBodyOnly) ? PR_TRUE
00123                                                             : PR_FALSE;
00124   // Set the line break character:
00125   if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
00126       && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows
00127     mLineBreak.AssignLiteral("\r\n");
00128   }
00129   else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { // Mac
00130     mLineBreak.AssignLiteral("\r");
00131   }
00132   else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { // Unix/DOM
00133     mLineBreak.AssignLiteral("\n");
00134   }
00135   else {
00136     mLineBreak.AssignLiteral(NS_LINEBREAK);         // Platform/default
00137   }
00138 
00139   mPreLevel = 0;
00140 
00141   mCharSet = aCharSet;
00142 
00143   // set up entity converter if we are going to need it
00144   if (mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities) {
00145     mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID);
00146   }
00147 
00148   return NS_OK;
00149 }
00150 
00151 NS_IMETHODIMP 
00152 nsHTMLContentSerializer::AppendText(nsIDOMText* aText, 
00153                                     PRInt32 aStartOffset,
00154                                     PRInt32 aEndOffset,
00155                                     nsAString& aStr)
00156 {
00157   NS_ENSURE_ARG(aText);
00158 
00159   if (mNeedLineBreaker) {
00160     mNeedLineBreaker = PR_FALSE;
00161 
00162     nsCOMPtr<nsIDOMDocument> domDoc;
00163     aText->GetOwnerDocument(getter_AddRefs(domDoc));
00164     nsCOMPtr<nsIDocument> document = do_QueryInterface(domDoc);
00165     if (document) {
00166       mLineBreaker = document->GetLineBreaker();
00167     }
00168 
00169     if (!mLineBreaker) {
00170       nsresult rv;
00171       nsCOMPtr<nsILineBreakerFactory> lf(do_GetService(kLWBrkCID, &rv));
00172       if (NS_SUCCEEDED(rv)) {
00173         rv = lf->GetBreaker(EmptyString(), getter_AddRefs(mLineBreaker));
00174         // Ignore result value.
00175         // If we are unable to obtain a line breaker,
00176         // we will use our simple fallback logic.
00177       }
00178     }
00179   }
00180 
00181   nsAutoString data;
00182 
00183   nsresult rv;
00184   rv = AppendTextData((nsIDOMNode*)aText, aStartOffset, 
00185                       aEndOffset, data, PR_TRUE, PR_FALSE);
00186   if (NS_FAILED(rv))
00187     return NS_ERROR_FAILURE;
00188 
00189   if (mPreLevel > 0) {
00190     AppendToStringConvertLF(data, aStr);
00191   }
00192   else if (mFlags & nsIDocumentEncoder::OutputRaw) {
00193     PRInt32 lastNewlineOffset = data.RFindChar('\n');
00194     AppendToString(data, aStr);
00195     if (lastNewlineOffset != kNotFound)
00196       mColPos = data.Length() - lastNewlineOffset;
00197   }
00198   else if (!mDoFormat) {
00199     PRInt32 lastNewlineOffset = kNotFound;
00200     PRBool hasLongLines = HasLongLines(data, lastNewlineOffset);
00201     if (hasLongLines) {
00202       // We have long lines, rewrap
00203       AppendToStringWrapped(data, aStr, PR_FALSE);
00204       if (lastNewlineOffset != kNotFound)
00205         mColPos = data.Length() - lastNewlineOffset;
00206     }
00207     else {
00208       AppendToStringConvertLF(data, aStr);
00209     }
00210   }
00211   else {
00212     AppendToStringWrapped(data, aStr, PR_FALSE);
00213   }
00214 
00215   return NS_OK;
00216 }
00217 
00218 void nsHTMLContentSerializer::AppendWrapped_WhitespaceSequence(
00219                                nsASingleFragmentString::const_char_iterator &aPos,
00220                                const nsASingleFragmentString::const_char_iterator aEnd,
00221                                const nsASingleFragmentString::const_char_iterator aSequenceStart,
00222                                PRBool &aMayIgnoreStartOfLineWhitespaceSequence,
00223                                nsAString &aOutputStr)
00224 {
00225   // Handle the complete sequence of whitespace.
00226   // Continue to iterate until we find the first non-whitespace char.
00227   // Updates "aPos" to point to the first unhandled char.
00228   // Also updates the aMayIgnoreStartOfLineWhitespaceSequence flag,
00229   // as well as the other "global" state flags.
00230 
00231   PRBool sawBlankOrTab = PR_FALSE;
00232   PRBool leaveLoop = PR_FALSE;
00233 
00234   do {
00235     switch (*aPos) {
00236       case ' ':
00237       case '\t':
00238         sawBlankOrTab = PR_TRUE;
00239         // no break
00240       case '\n':
00241         ++aPos;
00242         // do not increase mColPos,
00243         // because we will reduce the whitespace to a single char
00244         break;
00245       default:
00246         leaveLoop = PR_TRUE;
00247         break;
00248     }
00249   } while (!leaveLoop && aPos < aEnd);
00250 
00251   if (mAddSpace) {
00252     // if we had previously been asked to add space,
00253     // our situation has not changed
00254   }
00255   else if (!sawBlankOrTab && mMayIgnoreLineBreakSequence) {
00256     // nothing to do
00257     mMayIgnoreLineBreakSequence = PR_FALSE;
00258   }
00259   else if (aMayIgnoreStartOfLineWhitespaceSequence) {
00260     // nothing to do
00261     aMayIgnoreStartOfLineWhitespaceSequence = PR_FALSE;
00262   }
00263   else {
00264     if (sawBlankOrTab) {
00265       if (mColPos + 1 >= mMaxColumn) {
00266         // no much sense in delaying, we only have one slot left,
00267         // let's write a break now
00268         aOutputStr.Append(mLineBreak);
00269         mColPos = 0;
00270       }
00271       else {
00272         // do not write out yet, we may write out either a space or a linebreak
00273         // let's delay writing it out until we know more
00274 
00275         mAddSpace = PR_TRUE;
00276         ++mColPos; // eat a slot of available space
00277       }
00278     }
00279     else {
00280       // Asian text usually does not contain spaces, therefore we should not
00281       // transform a linebreak into a space.
00282       // Since we only saw linebreaks, but no spaces or tabs,
00283       // let's write a linebreak now.
00284       aOutputStr.Append(mLineBreak);
00285       mMayIgnoreLineBreakSequence = PR_TRUE;
00286       mColPos = 0;
00287     }
00288   }
00289 }
00290 
00291 void nsHTMLContentSerializer::AppendWrapped_NonWhitespaceSequence(
00292                                nsASingleFragmentString::const_char_iterator &aPos,
00293                                const nsASingleFragmentString::const_char_iterator aEnd,
00294                                const nsASingleFragmentString::const_char_iterator aSequenceStart,
00295                                PRBool &aMayIgnoreStartOfLineWhitespaceSequence,
00296                                nsAString& aOutputStr)
00297 {
00298   mMayIgnoreLineBreakSequence = PR_FALSE;
00299   aMayIgnoreStartOfLineWhitespaceSequence = PR_FALSE;
00300 
00301   // Handle the complete sequence of non-whitespace in this block
00302   // Iterate until we find the first whitespace char or an aEnd condition
00303   // Updates "aPos" to point to the first unhandled char.
00304   // Also updates the aMayIgnoreStartOfLineWhitespaceSequence flag,
00305   // as well as the other "global" state flags.
00306 
00307   PRBool thisSequenceStartsAtBeginningOfLine = !mColPos;
00308   PRBool onceAgainBecauseWeAddedBreakInFront;
00309   PRBool foundWhitespaceInLoop;
00310 
00311   do {
00312     onceAgainBecauseWeAddedBreakInFront = PR_FALSE;
00313     foundWhitespaceInLoop = PR_FALSE;
00314 
00315     do {
00316       if (*aPos == ' ' || *aPos == '\t' || *aPos == '\n') {
00317         foundWhitespaceInLoop = PR_TRUE;
00318         break;
00319       }
00320 
00321       ++aPos;
00322       ++mColPos;
00323     } while (mColPos < mMaxColumn && aPos < aEnd);
00324 
00325     if (aPos == aEnd || foundWhitespaceInLoop) {
00326       // there is enough room for the complete block we found
00327 
00328       if (mAddSpace) {
00329         aOutputStr.Append(PRUnichar(' '));
00330         mAddSpace = PR_FALSE;
00331       }
00332 
00333       aOutputStr.Append(aSequenceStart, aPos - aSequenceStart);
00334       // We have not yet reached the max column, we will continue to
00335       // fill the current line in the next outer loop iteration.
00336     }
00337     else { // mColPos == mMaxColumn
00338       if (!thisSequenceStartsAtBeginningOfLine && mAddSpace) {
00339         // We can avoid to wrap.
00340 
00341         aOutputStr.Append(mLineBreak);
00342         mAddSpace = PR_FALSE;
00343         aPos = aSequenceStart;
00344         mColPos = 0;
00345         thisSequenceStartsAtBeginningOfLine = PR_TRUE;
00346         onceAgainBecauseWeAddedBreakInFront = PR_TRUE;
00347       }
00348       else {
00349         // we must wrap
00350 
00351         PRBool foundWrapPosition = PR_FALSE;
00352 
00353         if (mLineBreaker) { // we have a line breaker helper object
00354           PRUint32 wrapPosition;
00355           PRBool needMoreText;
00356           nsresult rv;
00357 
00358           rv = mLineBreaker->Prev(aSequenceStart,
00359                                   (aEnd - aSequenceStart),
00360                                   (aPos - aSequenceStart) + 1,
00361                                   &wrapPosition,
00362                                   &needMoreText);
00363           if (NS_SUCCEEDED(rv) && !needMoreText && wrapPosition > 0) {
00364             foundWrapPosition = PR_TRUE;
00365           }
00366           else {
00367             rv = mLineBreaker->Next(aSequenceStart,
00368                                     (aEnd - aSequenceStart),
00369                                     (aPos - aSequenceStart),
00370                                     &wrapPosition,
00371                                     &needMoreText);
00372             if (NS_SUCCEEDED(rv) && !needMoreText && wrapPosition > 0) {
00373               foundWrapPosition = PR_TRUE;
00374             }
00375           }
00376 
00377           if (foundWrapPosition) {
00378             if (mAddSpace) {
00379               aOutputStr.Append(PRUnichar(' '));
00380               mAddSpace = PR_FALSE;
00381             }
00382 
00383             aOutputStr.Append(aSequenceStart, wrapPosition);
00384             aOutputStr.Append(mLineBreak);
00385             aPos = aSequenceStart + wrapPosition;
00386             mColPos = 0;
00387             aMayIgnoreStartOfLineWhitespaceSequence = PR_TRUE;
00388             mMayIgnoreLineBreakSequence = PR_TRUE;
00389           }
00390         }
00391 
00392         if (!mLineBreaker || !foundWrapPosition) {
00393           // try some simple fallback logic
00394           // go forward up to the next whitespace position,
00395           // in the worst case this will be all the rest of the data
00396 
00397           do {
00398             if (*aPos == ' ' || *aPos == '\t' || *aPos == '\n') {
00399               break;
00400             }
00401 
00402             ++aPos;
00403             ++mColPos;
00404           } while (aPos < aEnd);
00405 
00406           if (mAddSpace) {
00407             aOutputStr.Append(PRUnichar(' '));
00408             mAddSpace = PR_FALSE;
00409           }
00410 
00411           aOutputStr.Append(aSequenceStart, aPos - aSequenceStart);
00412         }
00413       }
00414     }
00415   } while (onceAgainBecauseWeAddedBreakInFront);
00416 }
00417 
00418 void 
00419 nsHTMLContentSerializer::AppendToStringWrapped(const nsASingleFragmentString& aStr,
00420                                                nsAString& aOutputStr,
00421                                                PRBool aTranslateEntities)
00422 {
00423   nsASingleFragmentString::const_char_iterator pos, end, sequenceStart;
00424 
00425   aStr.BeginReading(pos);
00426   aStr.EndReading(end);
00427 
00428   // if the current line already has text on it, such as a tag,
00429   // leading whitespace is significant
00430 
00431   PRBool mayIgnoreStartOfLineWhitespaceSequence = !mColPos;
00432 
00433   while (pos < end) {
00434     sequenceStart = pos;
00435 
00436     // if beginning of a whitespace sequence
00437     if (*pos == ' ' || *pos == '\n' || *pos == '\t') {
00438       AppendWrapped_WhitespaceSequence(pos, end, sequenceStart, 
00439         mayIgnoreStartOfLineWhitespaceSequence, aOutputStr);
00440     }
00441     else { // any other non-whitespace char
00442       AppendWrapped_NonWhitespaceSequence(pos, end, sequenceStart, 
00443         mayIgnoreStartOfLineWhitespaceSequence, aOutputStr);
00444     }
00445   }
00446 }
00447 
00448 NS_IMETHODIMP
00449 nsHTMLContentSerializer::AppendDocumentStart(nsIDOMDocument *aDocument,
00450                                              nsAString& aStr)
00451 {
00452   return NS_OK;
00453 }
00454 
00455 PRBool
00456 nsHTMLContentSerializer::IsJavaScript(nsIAtom* aAttrNameAtom, const nsAString& aValueString)
00457 {
00458   if (aAttrNameAtom == nsHTMLAtoms::href ||
00459       aAttrNameAtom == nsHTMLAtoms::src) {
00460     static const char kJavaScript[] = "javascript";
00461     PRInt32 pos = aValueString.FindChar(':');
00462     if (pos < (PRInt32)(sizeof kJavaScript - 1))
00463         return PR_FALSE;
00464     nsAutoString scheme(Substring(aValueString, 0, pos));
00465     scheme.StripWhitespace();
00466     if ((scheme.Length() == (sizeof kJavaScript - 1)) &&
00467         scheme.EqualsIgnoreCase(kJavaScript))
00468       return PR_TRUE;
00469     else
00470       return PR_FALSE;  
00471   }
00472 
00473   PRBool result = 
00474                  (aAttrNameAtom == nsLayoutAtoms::onblur)      || (aAttrNameAtom == nsLayoutAtoms::onchange)
00475               || (aAttrNameAtom == nsLayoutAtoms::onclick)     || (aAttrNameAtom == nsLayoutAtoms::ondblclick)
00476               || (aAttrNameAtom == nsLayoutAtoms::onfocus)     || (aAttrNameAtom == nsLayoutAtoms::onkeydown)
00477               || (aAttrNameAtom == nsLayoutAtoms::onkeypress)  || (aAttrNameAtom == nsLayoutAtoms::onkeyup)
00478               || (aAttrNameAtom == nsLayoutAtoms::onload)      || (aAttrNameAtom == nsLayoutAtoms::onmousedown)
00479               || (aAttrNameAtom == nsLayoutAtoms::onpageshow)  || (aAttrNameAtom == nsLayoutAtoms::onpagehide)
00480               || (aAttrNameAtom == nsLayoutAtoms::onmousemove) || (aAttrNameAtom == nsLayoutAtoms::onmouseout)
00481               || (aAttrNameAtom == nsLayoutAtoms::onmouseover) || (aAttrNameAtom == nsLayoutAtoms::onmouseup)
00482               || (aAttrNameAtom == nsLayoutAtoms::onreset)     || (aAttrNameAtom == nsLayoutAtoms::onselect)
00483               || (aAttrNameAtom == nsLayoutAtoms::onsubmit)    || (aAttrNameAtom == nsLayoutAtoms::onunload)
00484               || (aAttrNameAtom == nsLayoutAtoms::onabort)     || (aAttrNameAtom == nsLayoutAtoms::onerror)
00485               || (aAttrNameAtom == nsLayoutAtoms::onpaint)     || (aAttrNameAtom == nsLayoutAtoms::onresize)
00486               || (aAttrNameAtom == nsLayoutAtoms::onscroll)    || (aAttrNameAtom == nsLayoutAtoms::onbroadcast)
00487               || (aAttrNameAtom == nsLayoutAtoms::onclose)     || (aAttrNameAtom == nsLayoutAtoms::oncontextmenu)
00488               || (aAttrNameAtom == nsLayoutAtoms::oncommand)   || (aAttrNameAtom == nsLayoutAtoms::oncommandupdate)
00489               || (aAttrNameAtom == nsLayoutAtoms::ondragdrop)  || (aAttrNameAtom == nsLayoutAtoms::ondragenter)
00490               || (aAttrNameAtom == nsLayoutAtoms::ondragexit)  || (aAttrNameAtom == nsLayoutAtoms::ondraggesture)
00491               || (aAttrNameAtom == nsLayoutAtoms::ondragover)  || (aAttrNameAtom == nsLayoutAtoms::oninput);
00492   return result;
00493 }
00494 
00495 nsresult 
00496 nsHTMLContentSerializer::EscapeURI(const nsAString& aURI, nsAString& aEscapedURI)
00497 {
00498   // URL escape %xx cannot be used in JS.
00499   // No escaping if the scheme is 'javascript'.
00500   if (IsJavaScript(nsHTMLAtoms::href, aURI)) {
00501     aEscapedURI = aURI;
00502     return NS_OK;
00503   }
00504 
00505   // nsITextToSubURI does charset convert plus uri escape
00506   // This is needed to convert to a document charset which is needed to support existing browsers.
00507   // But we eventually want to use UTF-8 instead of a document charset, then the code would be much simpler.
00508   // See HTML 4.01 spec, "Appendix B.2.1 Non-ASCII characters in URI attribute values"
00509   nsCOMPtr<nsITextToSubURI> textToSubURI;
00510   nsAutoString uri(aURI); // in order to use FindCharInSet()
00511   nsresult rv = NS_OK;
00512 
00513 
00514   if (!mCharSet.IsEmpty() && !IsASCII(uri)) {
00515     textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv);
00516     NS_ENSURE_SUCCESS(rv, rv);
00517   }
00518 
00519   PRInt32 start = 0;
00520   PRInt32 end;
00521   nsAutoString part;
00522   nsXPIDLCString escapedURI;
00523   aEscapedURI.Truncate(0);
00524 
00525   // Loop and escape parts by avoiding escaping reserved characters (and '%', '#' ).
00526   while ((end = uri.FindCharInSet("%#;/?:@&=+$,", start)) != -1) {
00527     part = Substring(aURI, start, (end-start));
00528     if (textToSubURI && !IsASCII(part)) {
00529       rv = textToSubURI->ConvertAndEscape(mCharSet.get(), part.get(), getter_Copies(escapedURI));
00530       NS_ENSURE_SUCCESS(rv, rv);
00531     }
00532     else {
00533       escapedURI.Adopt(nsEscape(NS_ConvertUCS2toUTF8(part).get(), url_Path));
00534     }
00535     AppendASCIItoUTF16(escapedURI, aEscapedURI);
00536 
00537     // Append a reserved character without escaping.
00538     part = Substring(aURI, end, 1);
00539     aEscapedURI.Append(part);
00540     start = end + 1;
00541   }
00542 
00543   if (start < (PRInt32) aURI.Length()) {
00544     // Escape the remaining part.
00545     part = Substring(aURI, start, aURI.Length()-start);
00546     if (textToSubURI) {
00547       rv = textToSubURI->ConvertAndEscape(mCharSet.get(), part.get(), getter_Copies(escapedURI));
00548       NS_ENSURE_SUCCESS(rv, rv);
00549     }
00550     else {
00551       escapedURI.Adopt(nsEscape(NS_ConvertUCS2toUTF8(part).get(), url_Path));
00552     }
00553     AppendASCIItoUTF16(escapedURI, aEscapedURI);
00554   }
00555 
00556   return rv;
00557 }
00558 
00559 void 
00560 nsHTMLContentSerializer::SerializeAttributes(nsIContent* aContent,
00561                                              nsIAtom* aTagName,
00562                                              nsAString& aStr)
00563 {
00564   nsresult rv;
00565   PRUint32 index, count;
00566   nsAutoString nameStr, valueStr;
00567   PRInt32 namespaceID;
00568   nsCOMPtr<nsIAtom> attrName, attrPrefix;
00569 
00570   count = aContent->GetAttrCount();
00571 
00572   NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
00573 
00574   // Loop backward over the attributes, since the order they are stored in is
00575   // the opposite of the order they were parsed in (see bug 213347 for reason).
00576   // index is unsigned, hence index >= 0 is always true.
00577   for (index = count; index > 0; ) {
00578     --index;
00579     aContent->GetAttrNameAt(index, 
00580                             &namespaceID,
00581                             getter_AddRefs(attrName),
00582                             getter_AddRefs(attrPrefix));
00583 
00584     // Filter out any attribute starting with [-|_]moz
00585     const char* sharedName;
00586     attrName->GetUTF8String(&sharedName);
00587     if ((('_' == *sharedName) || ('-' == *sharedName)) &&
00588         !nsCRT::strncmp(sharedName+1, kMozStr, PRUint32(sizeof(kMozStr)-1))) {
00589       continue;
00590     }
00591     aContent->GetAttr(namespaceID, attrName, valueStr);
00592 
00593     // 
00594     // Filter out special case of <br type="_moz"> or <br _moz*>,
00595     // used by the editor.  Bug 16988.  Yuck.
00596     //
00597     if (aTagName == nsHTMLAtoms::br && attrName == nsHTMLAtoms::type &&
00598         StringBeginsWith(valueStr, _mozStr)) {
00599       continue;
00600     }
00601 
00602     if (mIsCopying && mIsFirstChildOfOL && (aTagName == nsHTMLAtoms::li) && 
00603         (attrName == nsHTMLAtoms::value)){
00604       // This is handled separately in SerializeLIValueAttribute()
00605       continue;
00606     }
00607     PRBool isJS = IsJavaScript(attrName, valueStr);
00608     
00609     if (((attrName == nsHTMLAtoms::href) || 
00610          (attrName == nsHTMLAtoms::src))) {
00611       // Make all links absolute when converting only the selection:
00612       if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
00613         // Would be nice to handle OBJECT and APPLET tags,
00614         // but that gets more complicated since we have to
00615         // search the tag list for CODEBASE as well.
00616         // For now, just leave them relative.
00617         nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
00618         if (uri) {
00619           nsAutoString absURI;
00620           rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
00621           if (NS_SUCCEEDED(rv)) {
00622             valueStr = absURI;
00623           }
00624         }
00625       }
00626       // Need to escape URI.
00627       nsAutoString tempURI(valueStr);
00628       if (!isJS && NS_FAILED(EscapeURI(tempURI, valueStr)))
00629         valueStr = tempURI;
00630     }
00631 
00632     attrName->ToString(nameStr);
00633     
00634     /*If we already crossed the MaxColumn limit or 
00635     * if this attr name-value pair(including a space,=,opening and closing quotes) is greater than MaxColumn limit
00636     * then start the attribute from a new line.
00637     */
00638 
00639     if (mDoFormat
00640         && (mColPos >= mMaxColumn
00641             || ((PRInt32)(mColPos + nameStr.Length() +
00642                           valueStr.Length() + 4) > mMaxColumn))) {
00643         aStr.Append(mLineBreak);
00644         mColPos = 0;
00645     }
00646 
00647     // Expand shorthand attribute.
00648     if (IsShorthandAttr(attrName, aTagName) && valueStr.IsEmpty()) {
00649       valueStr = nameStr;
00650     }
00651     SerializeAttr(EmptyString(), nameStr, valueStr, aStr, !isJS);
00652   }
00653 }
00654 
00655 NS_IMETHODIMP
00656 nsHTMLContentSerializer::AppendElementStart(nsIDOMElement *aElement,
00657                                             PRBool aHasChildren,
00658                                             nsAString& aStr)
00659 {
00660   NS_ENSURE_ARG(aElement);
00661   
00662   nsCOMPtr<nsIContent> content = do_QueryInterface(aElement);
00663   if (!content) return NS_ERROR_FAILURE;
00664   
00665   // The _moz_dirty attribute is emitted by the editor to
00666   // indicate that this element should be pretty printed
00667   // even if we're not in pretty printing mode
00668   PRBool hasDirtyAttr = content->HasAttr(kNameSpaceID_None,
00669                                          nsLayoutAtoms::mozdirty);
00670 
00671   nsIAtom *name = content->Tag();
00672 
00673   if (name == nsHTMLAtoms::br && mPreLevel > 0
00674       && (mFlags & nsIDocumentEncoder::OutputNoFormattingInPre)) {
00675     AppendToString(mLineBreak, aStr);
00676     mMayIgnoreLineBreakSequence = PR_TRUE;
00677     mColPos = 0;
00678     return NS_OK;
00679   }
00680 
00681   if (name == nsHTMLAtoms::body) {
00682     mInBody = PR_TRUE;
00683   }
00684 
00685   if (LineBreakBeforeOpen(name, hasDirtyAttr)) {
00686     AppendToString(mLineBreak, aStr);
00687     mMayIgnoreLineBreakSequence = PR_TRUE;
00688     mColPos = 0;
00689     mAddSpace = PR_FALSE;
00690   }
00691   else if (mAddSpace) {
00692     AppendToString(PRUnichar(' '), aStr);
00693     mAddSpace = PR_FALSE;
00694   }
00695   else {
00696     MaybeAddNewline(aStr);
00697   }
00698   // Always reset to avoid false newlines in case MaybeAddNewline wasn't
00699   // called
00700   mAddNewline = PR_FALSE;
00701 
00702   StartIndentation(name, hasDirtyAttr, aStr);
00703 
00704   if (name == nsHTMLAtoms::pre ||
00705       name == nsHTMLAtoms::script ||
00706       name == nsHTMLAtoms::style) {
00707     mPreLevel++;
00708   }
00709   
00710   AppendToString(kLessThan, aStr);
00711 
00712   nsAutoString nameStr;
00713   name->ToString(nameStr);
00714   AppendToString(nameStr.get(), -1, aStr);
00715 
00716   // Need to keep track of OL and LI elements in order to get ordinal number 
00717   // for the LI.
00718   if (mIsCopying && name == nsHTMLAtoms::ol){
00719     // We are copying and current node is an OL;
00720     // Store it's start attribute value in olState->startVal.
00721     nsAutoString start;
00722     PRInt32 startAttrVal = 0;
00723     aElement->GetAttribute(NS_LITERAL_STRING("start"), start);
00724     if (!start.IsEmpty()){
00725       PRInt32 rv = 0;
00726       startAttrVal = start.ToInteger(&rv);
00727       //If OL has "start" attribute, first LI element has to start with that value
00728       //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
00729       //In failure of ToInteger(), default StartAttrValue to 0.
00730       if (NS_SUCCEEDED(rv))
00731         startAttrVal--; 
00732       else
00733         startAttrVal = 0;
00734     }
00735     olState* state = new olState(startAttrVal, PR_TRUE);
00736     if (state)
00737       mOLStateStack.AppendElement(state);
00738   }
00739 
00740   if (mIsCopying && name == nsHTMLAtoms::li) {
00741     mIsFirstChildOfOL = IsFirstChildOfOL(aElement);
00742     if (mIsFirstChildOfOL){
00743       // If OL is parent of this LI, serialize attributes in different manner.
00744       SerializeLIValueAttribute(aElement, aStr);
00745     }
00746   }
00747 
00748   // Even LI passed above have to go through this 
00749   // for serializing attributes other than "value".
00750   SerializeAttributes(content, name, aStr);
00751 
00752   AppendToString(kGreaterThan, aStr);
00753 
00754   if (LineBreakAfterOpen(name, hasDirtyAttr)) {
00755     AppendToString(mLineBreak, aStr);
00756     mMayIgnoreLineBreakSequence = PR_TRUE;
00757     mColPos = 0;
00758   }
00759 
00760   if (name == nsHTMLAtoms::script ||
00761       name == nsHTMLAtoms::style ||
00762       name == nsHTMLAtoms::noscript ||
00763       name == nsHTMLAtoms::noframes) {
00764     mInCDATA = PR_TRUE;
00765   }
00766 
00767   return NS_OK;
00768 }
00769   
00770 NS_IMETHODIMP 
00771 nsHTMLContentSerializer::AppendElementEnd(nsIDOMElement *aElement,
00772                                           nsAString& aStr)
00773 {
00774   NS_ENSURE_ARG(aElement);
00775 
00776   nsCOMPtr<nsIContent> content = do_QueryInterface(aElement);
00777   if (!content) return NS_ERROR_FAILURE;
00778 
00779   PRBool hasDirtyAttr = content->HasAttr(kNameSpaceID_None,
00780                                          nsLayoutAtoms::mozdirty);
00781 
00782   nsIAtom *name = content->Tag();
00783 
00784   if (name == nsHTMLAtoms::pre ||
00785       name == nsHTMLAtoms::script ||
00786       name == nsHTMLAtoms::style) {
00787     mPreLevel--;
00788   }
00789 
00790   if (mIsCopying && (name == nsHTMLAtoms::ol)){
00791     NS_ASSERTION((mOLStateStack.Count() > 0), "Cannot have an empty OL Stack");
00792     /* Though at this point we must always have an state to be deleted as all 
00793     the OL opening tags are supposed to push an olState object to the stack*/
00794     if (mOLStateStack.Count() > 0) {
00795       olState* state = (olState*)mOLStateStack.ElementAt(mOLStateStack.Count() -1);
00796       mOLStateStack.RemoveElementAt(mOLStateStack.Count() -1);
00797       delete state;
00798     }
00799   }
00800   
00801   nsIParserService* parserService = nsContentUtils::GetParserServiceWeakRef();
00802 
00803   if (parserService && (name != nsHTMLAtoms::style)) {
00804     PRBool isContainer;
00805 
00806     parserService->IsContainer(parserService->HTMLAtomTagToId(name),
00807                                isContainer);
00808     if (!isContainer) return NS_OK;
00809   }
00810 
00811   if (LineBreakBeforeClose(name, hasDirtyAttr)) {
00812     AppendToString(mLineBreak, aStr);
00813     mMayIgnoreLineBreakSequence = PR_TRUE;
00814     mColPos = 0;
00815     mAddSpace = PR_FALSE;
00816   }
00817   else if (mAddSpace) {
00818     AppendToString(PRUnichar(' '), aStr);
00819     mAddSpace = PR_FALSE;
00820   }
00821 
00822   EndIndentation(name, hasDirtyAttr, aStr);
00823 
00824   nsAutoString nameStr;
00825   name->ToString(nameStr);
00826 
00827   AppendToString(kEndTag, aStr);
00828   AppendToString(nameStr.get(), -1, aStr);
00829   AppendToString(kGreaterThan, aStr);
00830 
00831   if (LineBreakAfterClose(name, hasDirtyAttr)) {
00832     AppendToString(mLineBreak, aStr);
00833     mMayIgnoreLineBreakSequence = PR_TRUE;
00834     mColPos = 0;
00835   }
00836   else {
00837     MaybeFlagNewline(aElement);
00838   }
00839 
00840   mInCDATA = PR_FALSE;
00841 
00842   return NS_OK;
00843 }
00844 
00845 void
00846 nsHTMLContentSerializer::AppendToString(const PRUnichar* aStr,
00847                                         PRInt32 aLength,
00848                                         nsAString& aOutputStr)
00849 {
00850   if (mBodyOnly && !mInBody) {
00851     return;
00852   }
00853 
00854   PRInt32 length = (aLength == -1) ? nsCRT::strlen(aStr) : aLength;
00855   
00856   mColPos += length;
00857 
00858   aOutputStr.Append(aStr, length);
00859 }
00860 
00861 void 
00862 nsHTMLContentSerializer::AppendToString(const PRUnichar aChar,
00863                                         nsAString& aOutputStr)
00864 {
00865   if (mBodyOnly && !mInBody) {
00866     return;
00867   }
00868 
00869   mColPos += 1;
00870 
00871   aOutputStr.Append(aChar);
00872 }
00873 
00874 static const PRUint16 kValNBSP = 160;
00875 static const char kEntityNBSP[] = "nbsp";
00876 
00877 static const PRUint16 kGTVal = 62;
00878 static const char* kEntities[] = {
00879   "", "", "", "", "", "", "", "", "", "",
00880   "", "", "", "", "", "", "", "", "", "",
00881   "", "", "", "", "", "", "", "", "", "",
00882   "", "", "", "", "", "", "", "", "amp", "",
00883   "", "", "", "", "", "", "", "", "", "",
00884   "", "", "", "", "", "", "", "", "", "",
00885   "lt", "", "gt"
00886 };
00887 
00888 static const char* kAttrEntities[] = {
00889   "", "", "", "", "", "", "", "", "", "",
00890   "", "", "", "", "", "", "", "", "", "",
00891   "", "", "", "", "", "", "", "", "", "",
00892   "", "", "", "", "quot", "", "", "", "amp", "",
00893   "", "", "", "", "", "", "", "", "", "",
00894   "", "", "", "", "", "", "", "", "", "",
00895   "lt", "", "gt"
00896 };
00897 
00898 void
00899 nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
00900                                         nsAString& aOutputStr,
00901                                         PRBool aTranslateEntities,
00902                                         PRBool aIncrColumn)
00903 {
00904   if (mBodyOnly && !mInBody) {
00905     return;
00906   }
00907 
00908   if (aIncrColumn) {
00909     mColPos += aStr.Length();
00910   }
00911 
00912   if (aTranslateEntities && !mInCDATA) {
00913     if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities  |
00914                   nsIDocumentEncoder::OutputEncodeLatin1Entities |
00915                   nsIDocumentEncoder::OutputEncodeHTMLEntities   |
00916                   nsIDocumentEncoder::OutputEncodeW3CEntities)) {
00917       nsIParserService* parserService =
00918         nsContentUtils::GetParserServiceWeakRef();
00919 
00920       if (!parserService) {
00921         NS_ERROR("Can't get parser service");
00922         return;
00923       }
00924 
00925       nsReadingIterator<PRUnichar> done_reading;
00926       aStr.EndReading(done_reading);
00927 
00928       // for each chunk of |aString|...
00929       PRUint32 advanceLength = 0;
00930       nsReadingIterator<PRUnichar> iter;
00931 
00932       const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
00933 
00934       for (aStr.BeginReading(iter); 
00935            iter != done_reading; 
00936            iter.advance(PRInt32(advanceLength))) {
00937         PRUint32 fragmentLength = iter.size_forward();
00938         PRUint32 lengthReplaced = 0; // the number of UTF-16 codepoints
00939                                      //  replaced by a particular entity
00940         const PRUnichar* c = iter.get();
00941         const PRUnichar* fragmentStart = c;
00942         const PRUnichar* fragmentEnd = c + fragmentLength;
00943         const char* entityText = nsnull;
00944         nsCAutoString entityReplacement;
00945         char* fullEntityText = nsnull;
00946 
00947         advanceLength = 0;
00948         // for each character in this chunk, check if it
00949         // needs to be replaced
00950         for (; c < fragmentEnd; c++, advanceLength++) {
00951           PRUnichar val = *c;
00952           if (val == kValNBSP) {
00953             entityText = kEntityNBSP;
00954             break;
00955           }
00956           else if ((val <= kGTVal) && (entityTable[val][0] != 0)) {
00957             entityText = entityTable[val];
00958             break;
00959           } else if (val > 127 &&
00960                     ((val < 256 &&
00961                       mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
00962                       mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
00963             parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);
00964 
00965             if (!entityReplacement.IsEmpty()) {
00966               entityText = entityReplacement.get();
00967               break;
00968             }
00969           }
00970           else if (val > 127 && 
00971                    mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
00972                    mEntityConverter) {
00973             if (IS_HIGH_SURROGATE(val) &&
00974                 c + 1 < fragmentEnd &&
00975                 IS_LOW_SURROGATE(*(c + 1))) {
00976               PRUint32 valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
00977               if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
00978                                nsIEntityConverter::entityW3C, &fullEntityText))) {
00979                 lengthReplaced = 2;
00980                 break;
00981               }
00982               else {
00983                 advanceLength++;
00984               }
00985             }
00986             else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
00987                                   nsIEntityConverter::entityW3C, 
00988                                   &fullEntityText))) {
00989               lengthReplaced = 1;
00990               break;
00991             }
00992           }
00993         }
00994 
00995         aOutputStr.Append(fragmentStart, advanceLength);
00996         if (entityText) {
00997           aOutputStr.Append(PRUnichar('&'));
00998           AppendASCIItoUTF16(entityText, aOutputStr);
00999           aOutputStr.Append(PRUnichar(';'));
01000           advanceLength++;
01001         }
01002         // if it comes from nsIEntityConverter, it already has '&' and ';'
01003         else if (fullEntityText) {
01004           AppendASCIItoUTF16(fullEntityText, aOutputStr);
01005           nsMemory::Free(fullEntityText);
01006           advanceLength += lengthReplaced;
01007         }
01008       }
01009     } else {
01010       nsXMLContentSerializer::AppendToString(aStr, aOutputStr, aTranslateEntities, aIncrColumn);
01011     }
01012 
01013     return;
01014   }
01015 
01016   aOutputStr.Append(aStr);
01017 }
01018 
01019 void
01020 nsHTMLContentSerializer::AppendToStringConvertLF(const nsAString& aStr,
01021                                                  nsAString& aOutputStr)
01022 {
01023   // Convert line-endings to mLineBreak
01024   PRUint32 start = 0;
01025   PRUint32 theLen = aStr.Length();
01026   while (start < theLen) {
01027     PRInt32 eol = aStr.FindChar('\n', start);
01028     if (eol == kNotFound) {
01029       nsDependentSubstring dataSubstring(aStr, start, theLen - start);
01030       AppendToString(dataSubstring, aOutputStr);
01031       start = theLen;
01032     }
01033     else {
01034       nsDependentSubstring dataSubstring(aStr, start, eol - start);
01035       AppendToString(dataSubstring, aOutputStr);
01036       AppendToString(mLineBreak, aOutputStr);
01037       start = eol + 1;
01038       if (start == theLen)
01039         mColPos = 0;
01040     }
01041   }
01042 }
01043 
01044 PRBool
01045 nsHTMLContentSerializer::LineBreakBeforeOpen(nsIAtom* aName, 
01046                                              PRBool aHasDirtyAttr)
01047 {
01048   if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel || !mColPos ||
01049       (mFlags & nsIDocumentEncoder::OutputRaw)) {
01050     return PR_FALSE;
01051   }
01052         
01053   if (aName == nsHTMLAtoms::title ||
01054       aName == nsHTMLAtoms::meta  ||
01055       aName == nsHTMLAtoms::link  ||
01056       aName == nsHTMLAtoms::style ||
01057       aName == nsHTMLAtoms::select ||
01058       aName == nsHTMLAtoms::option ||
01059       aName == nsHTMLAtoms::script ||
01060       aName == nsHTMLAtoms::html) {
01061     return PR_TRUE;
01062   }
01063   else {
01064     nsIParserService* parserService =
01065       nsContentUtils::GetParserServiceWeakRef();
01066     
01067     if (parserService) {
01068       PRBool res;
01069       parserService->IsBlock(parserService->HTMLAtomTagToId(aName), res);
01070       return res;
01071     }
01072   }
01073 
01074   return PR_FALSE;
01075 }
01076 
01077 PRBool 
01078 nsHTMLContentSerializer::LineBreakAfterOpen(nsIAtom* aName, 
01079                                             PRBool aHasDirtyAttr)
01080 {
01081   if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel ||
01082       (mFlags & nsIDocumentEncoder::OutputRaw)) {
01083     return PR_FALSE;
01084   }
01085 
01086   if ((aName == nsHTMLAtoms::html) ||
01087       (aName == nsHTMLAtoms::head) ||
01088       (aName == nsHTMLAtoms::body) ||
01089       (aName == nsHTMLAtoms::ul) ||
01090       (aName == nsHTMLAtoms::ol) ||
01091       (aName == nsHTMLAtoms::dl) ||
01092       (aName == nsHTMLAtoms::table) ||
01093       (aName == nsHTMLAtoms::tbody) ||
01094       (aName == nsHTMLAtoms::tr) ||
01095       (aName == nsHTMLAtoms::br) ||
01096       (aName == nsHTMLAtoms::meta) ||
01097       (aName == nsHTMLAtoms::link) ||
01098       (aName == nsHTMLAtoms::script) ||
01099       (aName == nsHTMLAtoms::select) ||
01100       (aName == nsHTMLAtoms::map) ||
01101       (aName == nsHTMLAtoms::area) ||
01102       (aName == nsHTMLAtoms::style)) {
01103     return PR_TRUE;
01104   }
01105 
01106   return PR_FALSE;
01107 }
01108 
01109 PRBool 
01110 nsHTMLContentSerializer::LineBreakBeforeClose(nsIAtom* aName, 
01111                                               PRBool aHasDirtyAttr)
01112 {
01113   if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel || !mColPos ||
01114       (mFlags & nsIDocumentEncoder::OutputRaw)) {
01115     return PR_FALSE;
01116   }
01117 
01118   if ((aName == nsHTMLAtoms::html) ||
01119       (aName == nsHTMLAtoms::head) ||
01120       (aName == nsHTMLAtoms::body) ||
01121       (aName == nsHTMLAtoms::ul) ||
01122       (aName == nsHTMLAtoms::ol) ||
01123       (aName == nsHTMLAtoms::dl) ||
01124       (aName == nsHTMLAtoms::select) ||
01125       (aName == nsHTMLAtoms::table) ||
01126       (aName == nsHTMLAtoms::tbody)) {
01127     return PR_TRUE;
01128   }
01129   
01130   return PR_FALSE;
01131 }
01132 
01133 PRBool 
01134 nsHTMLContentSerializer::LineBreakAfterClose(nsIAtom* aName, 
01135                                              PRBool aHasDirtyAttr)
01136 {
01137   if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel ||
01138       (mFlags & nsIDocumentEncoder::OutputRaw)) {
01139     return PR_FALSE;
01140   }
01141 
01142   if ((aName == nsHTMLAtoms::html) ||
01143       (aName == nsHTMLAtoms::head) ||
01144       (aName == nsHTMLAtoms::body) ||
01145       (aName == nsHTMLAtoms::tr) ||
01146       (aName == nsHTMLAtoms::th) ||
01147       (aName == nsHTMLAtoms::td) ||
01148       (aName == nsHTMLAtoms::pre) ||
01149       (aName == nsHTMLAtoms::title) ||
01150       (aName == nsHTMLAtoms::li) ||
01151       (aName == nsHTMLAtoms::dt) ||
01152       (aName == nsHTMLAtoms::dd) ||
01153       (aName == nsHTMLAtoms::blockquote) ||
01154       (aName == nsHTMLAtoms::select) ||
01155       (aName == nsHTMLAtoms::option) ||
01156       (aName == nsHTMLAtoms::p) ||
01157       (aName == nsHTMLAtoms::map) ||
01158       (aName == nsHTMLAtoms::div)) {
01159     return PR_TRUE;
01160   }
01161   else {
01162     nsIParserService* parserService =
01163       nsContentUtils::GetParserServiceWeakRef();
01164     
01165     if (parserService) {
01166       PRBool res;
01167       parserService->IsBlock(parserService->HTMLAtomTagToId(aName), res);
01168       return res;
01169     }
01170   }
01171 
01172   return PR_FALSE;
01173 }
01174 
01175 void
01176 nsHTMLContentSerializer::StartIndentation(nsIAtom* aName,
01177                                           PRBool aHasDirtyAttr,
01178                                           nsAString& aStr)
01179 {
01180   if ((mDoFormat || aHasDirtyAttr) && !mPreLevel && !mColPos) {
01181     for (PRInt32 i = mIndent; --i >= 0; ) {
01182       AppendToString(kIndentStr, aStr);
01183     }
01184   }
01185 
01186   if ((aName == nsHTMLAtoms::head) ||
01187       (aName == nsHTMLAtoms::table) ||
01188       (aName == nsHTMLAtoms::tr) ||
01189       (aName == nsHTMLAtoms::ul) ||
01190       (aName == nsHTMLAtoms::ol) ||
01191       (aName == nsHTMLAtoms::dl) ||
01192       (aName == nsHTMLAtoms::tbody) ||
01193       (aName == nsHTMLAtoms::form) ||
01194       (aName == nsHTMLAtoms::frameset) ||
01195       (aName == nsHTMLAtoms::blockquote) ||
01196       (aName == nsHTMLAtoms::li) ||
01197       (aName == nsHTMLAtoms::dt) ||
01198       (aName == nsHTMLAtoms::dd)) {
01199     mIndent++;
01200   }
01201 }
01202 
01203 void
01204 nsHTMLContentSerializer::EndIndentation(nsIAtom* aName,
01205                                         PRBool aHasDirtyAttr,
01206                                         nsAString& aStr)
01207 {
01208   if ((aName == nsHTMLAtoms::head) ||
01209       (aName == nsHTMLAtoms::table) ||
01210       (aName == nsHTMLAtoms::tr) ||
01211       (aName == nsHTMLAtoms::ul) ||
01212       (aName == nsHTMLAtoms::ol) ||
01213       (aName == nsHTMLAtoms::dl) ||
01214       (aName == nsHTMLAtoms::li) ||
01215       (aName == nsHTMLAtoms::tbody) ||
01216       (aName == nsHTMLAtoms::form) ||
01217       (aName == nsHTMLAtoms::blockquote) ||
01218       (aName == nsHTMLAtoms::dt) ||
01219       (aName == nsHTMLAtoms::dd) ||
01220       (aName == nsHTMLAtoms::frameset)) {
01221     mIndent--;
01222   }
01223 
01224   if ((mDoFormat || aHasDirtyAttr) && !mPreLevel && !mColPos) {
01225     for (PRInt32 i = mIndent; --i >= 0; ) {
01226       AppendToString(kIndentStr, aStr);
01227     }
01228   }
01229 }
01230 
01231 // See if the string has any lines longer than longLineLen:
01232 // if so, we presume formatting is wonky (e.g. the node has been edited)
01233 // and we'd better rewrap the whole text node.
01234 PRBool 
01235 nsHTMLContentSerializer::HasLongLines(const nsString& text, PRInt32& aLastNewlineOffset)
01236 {
01237   PRUint32 start=0;
01238   PRUint32 theLen=text.Length();
01239   PRBool rv = PR_FALSE;
01240   aLastNewlineOffset = kNotFound;
01241   for (start = 0; start < theLen; )
01242   {
01243     PRInt32 eol = text.FindChar('\n', start);
01244     if (eol < 0) {
01245       eol = text.Length();
01246     }
01247     else {
01248       aLastNewlineOffset = eol;
01249     }
01250     if (PRInt32(eol - start) > kLongLineLen)
01251       rv = PR_TRUE;
01252     start = eol+1;
01253   }
01254   return rv;
01255 }
01256 
01257 void 
01258 nsHTMLContentSerializer::SerializeLIValueAttribute(nsIDOMElement* aElement,
01259                                                    nsAString& aStr)
01260 {
01261   // We are copying and we are at the "first" LI node of OL in selected range.
01262   // It may not be the first LI child of OL but it's first in the selected range.
01263   // Note that we get into this condition only once per a OL.
01264   nsCOMPtr<nsIDOMNode> node = do_QueryInterface(aElement);
01265   PRBool found = PR_FALSE;
01266   nsIDOMNode* currNode = node;
01267   nsAutoString valueStr;
01268   PRInt32 offset = 0;
01269   olState defaultOLState(0, PR_FALSE);
01270   olState* state = nsnull;
01271   if (mOLStateStack.Count() > 0) 
01272     state = (olState*)mOLStateStack.ElementAt(mOLStateStack.Count()-1);
01273   /* Though we should never reach to a "state" as null or mOLStateStack.Count() == 0 
01274   at this point as all LI are supposed to be inside some OL and OL tag should have 
01275   pushed a state to the olStateStack.*/
01276   if (!state || mOLStateStack.Count() == 0)
01277     state = &defaultOLState;
01278   PRInt32 startVal = state->startVal;
01279   state->isFirstListItem = PR_FALSE;
01280   // Traverse previous siblings until we find one with "value" attribute.
01281   // offset keeps track of how many previous siblings we had tocurrNode traverse.
01282   while (currNode && !found) {
01283     nsCOMPtr<nsIDOMElement> currElement = do_QueryInterface(currNode);
01284     // currElement may be null if it were a text node.
01285     if (currElement) {
01286       nsAutoString tagName;
01287       currElement->GetTagName(tagName);
01288       if (tagName.LowerCaseEqualsLiteral("li")) {
01289         currElement->GetAttribute(NS_LITERAL_STRING("value"), valueStr);
01290         if (valueStr.IsEmpty())
01291           offset++;
01292         else {
01293           found = PR_TRUE;
01294           PRInt32 rv = 0;
01295           startVal = valueStr.ToInteger(&rv); 
01296         }
01297       }
01298     }
01299     currNode->GetPreviousSibling(&currNode);
01300   }
01301   // If LI was not having "value", Set the "value" attribute for it.
01302   // Note that We are at the first LI in the selected range of OL.
01303   if (offset == 0 && found) {
01304     // offset = 0 => LI itself has the value attribute and we did not need to traverse back.
01305     // Just serialize value attribute like other tags.
01306     SerializeAttr(EmptyString(), NS_LITERAL_STRING("value"), valueStr, aStr, PR_FALSE);
01307   }
01308   else if (offset == 1 && !found) {
01309     /*(offset = 1 && !found) means either LI is the first child node of OL 
01310     and LI is not having "value" attribute. 
01311     In that case we would not like to set "value" attribute to reduce the changes.
01312     */
01313      //do nothing...
01314   }
01315   else if (offset > 0) {
01316     // Set value attribute.
01317     nsAutoString valueStr;
01318 
01319     //As serializer needs to use this valueAttr we are creating here, 
01320     valueStr.AppendInt(startVal + offset);
01321     SerializeAttr(EmptyString(), NS_LITERAL_STRING("value"), valueStr, aStr, PR_FALSE);
01322   }
01323 }
01324 
01325 PRBool
01326 nsHTMLContentSerializer::IsFirstChildOfOL(nsIDOMElement* aElement){
01327   nsCOMPtr<nsIDOMNode> node = do_QueryInterface(aElement);
01328   nsAutoString parentName;
01329   {
01330     nsCOMPtr<nsIDOMNode> parentNode;
01331     node->GetParentNode(getter_AddRefs(parentNode));
01332     if (parentNode)
01333       parentNode->GetNodeName(parentName);
01334     else
01335       return PR_FALSE;
01336   }
01337   
01338   if (parentName.LowerCaseEqualsLiteral("ol")) {
01339     olState defaultOLState(0, PR_FALSE);
01340     olState* state = nsnull;
01341     if (mOLStateStack.Count() > 0) 
01342       state = (olState*)mOLStateStack.ElementAt(mOLStateStack.Count()-1);
01343     /* Though we should never reach to a "state" as null at this point as 
01344     all LI are supposed to be inside some OL and OL tag should have pushed
01345     a state to the mOLStateStack.*/
01346     if (!state)
01347       state = &defaultOLState;
01348     
01349     if (state->isFirstListItem)
01350       return PR_TRUE;
01351 
01352     return PR_FALSE;
01353   }
01354   else
01355     return PR_FALSE;
01356 }