Back to index

lightning-sunbird  0.9+nobinonly
Convert.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
00002  * ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is Mozilla Communicator client code, released
00016  * March 31, 1998.
00017  *
00018  * The Initial Developer of the Original Code is
00019  * Netscape Communications Corporation.
00020  * Portions created by the Initial Developer are Copyright (C) 1998-1999
00021  * the Initial Developer. All Rights Reserved.
00022  *
00023  * Contributor(s):
00024  *   Akkana Peck.
00025  *
00026  * Alternatively, the contents of this file may be used under the terms of
00027  * either of the GNU General Public License Version 2 or later (the "GPL"),
00028  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00029  * in which case the provisions of the GPL or the LGPL are applicable instead
00030  * of those above. If you wish to allow use of your version of this file only
00031  * under the terms of either the GPL or the LGPL, and not to allow others to
00032  * use your version of this file under the terms of the MPL, indicate your
00033  * decision by deleting the provisions above and replace them with the notice
00034  * and other provisions required by the GPL or the LGPL. If you do not delete
00035  * the provisions above, a recipient may use your version of this file under
00036  * the terms of any one of the MPL, the GPL or the LGPL.
00037  *
00038  * ***** END LICENSE BLOCK ***** */
00039 
00040 #include <ctype.h>      // for isdigit()
00041 
00042 #include "nsXPCOM.h"
00043 #include "nsParserCIID.h"
00044 #include "nsIParser.h"
00045 #include "nsIHTMLContentSink.h"
00046 #include "nsIContentSerializer.h"
00047 #include "nsLayoutCID.h"
00048 #include "nsIHTMLToTextSink.h"
00049 #include "nsIComponentManager.h"
00050 #include "nsIServiceManager.h"
00051 #include "nsIComponentRegistrar.h"
00052 #include "nsReadableUtils.h"
00053 #include "nsCRT.h"
00054 
00055 static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
00056 static NS_DEFINE_CID(kParserCID, NS_PARSER_CID);
00057 
00058 int
00059 Compare(nsString& str, nsString& aFileName)
00060 {
00061   // Open the file in a Unix-centric way,
00062   // until I find out how to use nsFileSpec:
00063   char* filename = ToNewCString(aFileName);
00064   FILE* file = fopen(filename, "r");
00065   if (!file)
00066   {
00067     fprintf(stderr, "Can't open file %s", filename);
00068     perror(" ");
00069     delete[] filename;
00070     return 2;
00071   }
00072   delete[] filename;
00073 
00074   // Inefficiently read from the file:
00075   nsString inString;
00076   int c;
00077   int index = 0;
00078   int different = 0;
00079   while ((c = getc(file)) != EOF)
00080   {
00081     inString.Append(PRUnichar(c));
00082     // CVS isn't doing newline comparisons on these files for some reason.
00083     // So compensate for possible newline problems in the CVS file:
00084     if (c == '\n' && str[index] == '\r')
00085       ++index;
00086     if (c != str[index++])
00087     {
00088       //printf("Comparison failed at char %d: generated was %d, file had %d\n",
00089       //       index, (int)str[index-1], (int)c);
00090       different = index;
00091       break;
00092     }
00093   }
00094   if (file != stdin)
00095     fclose(file);
00096 
00097   if (!different)
00098     return 0;
00099   else
00100   {
00101     nsAutoString left;
00102     str.Left(left, different);
00103     char* cstr = ToNewUTF8String(left);
00104     printf("Comparison failed at char %d:\n-----\n%s\n-----\n",
00105            different, cstr);
00106     Recycle(cstr);
00107     return 1;
00108   }
00109 }
00110 
00111 //----------------------------------------------------------------------
00112 // Convert html on stdin to either plaintext or (if toHTML) html
00113 //----------------------------------------------------------------------
00114 nsresult
00115 HTML2text(nsString& inString, nsString& inType, nsString& outType,
00116           int flags, int wrapCol, nsString& compareAgainst)
00117 {
00118   nsresult rv = NS_OK;
00119 
00120   nsString outString;
00121 
00122   // Create a parser
00123   nsIParser* parser;
00124   rv = CallCreateInstance(kParserCID, &parser);
00125   if (NS_FAILED(rv))
00126   {
00127     printf("Unable to create a parser : 0x%x\n", rv);
00128     return NS_ERROR_FAILURE;
00129   }
00130 
00131   // Create the appropriate output sink
00132 #ifdef USE_SERIALIZER
00133   nsCAutoString progId(NS_CONTENTSERIALIZER_CONTRACTID_PREFIX);
00134   progId.AppendWithConversion(outType);
00135 
00136   // The syntax used here doesn't work
00137   nsCOMPtr<nsIContentSerializer> mSerializer;
00138   mSerializer = do_CreateInstance(NS_STATIC_CAST(const char *, progId));
00139   NS_ENSURE_TRUE(mSerializer, NS_ERROR_NOT_IMPLEMENTED);
00140 
00141   mSerializer->Init(flags, wrapCol);
00142 
00143   nsCOMPtr<nsIHTMLContentSink> sink (do_QueryInterface(mSerializer));
00144   if (!sink)
00145   {
00146     printf("Couldn't get content sink!\n");
00147     return NS_ERROR_UNEXPECTED;
00148   }
00149 #else /* USE_SERIALIZER */
00150   nsCOMPtr<nsIContentSink> sink;
00151   if (!inType.EqualsLiteral("text/html")
00152       || !outType.EqualsLiteral("text/plain"))
00153   {
00154     char* in = ToNewCString(inType);
00155     char* out = ToNewCString(outType);
00156     printf("Don't know how to convert from %s to %s\n", in, out);
00157     Recycle(in);
00158     Recycle(out);
00159     return NS_ERROR_FAILURE;
00160   }
00161 
00162   sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID);
00163   NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
00164 
00165   nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
00166   NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
00167 
00168   textSink->Initialize(&outString, flags, wrapCol);
00169 #endif /* USE_SERIALIZER */
00170 
00171   parser->SetContentSink(sink);
00172    nsCOMPtr<nsIDTD> dtd;
00173   if (inType.EqualsLiteral("text/html")) {
00174     static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID);
00175     dtd = do_CreateInstance(kNavDTDCID, &rv);
00176   }
00177   else
00178   {
00179     printf("Don't know how to deal with non-html input!\n");
00180     return NS_ERROR_NOT_IMPLEMENTED;
00181   }
00182   if (NS_FAILED(rv))
00183   {
00184     printf("Couldn't create new HTML DTD: 0x%x\n", rv);
00185     return rv;
00186   }
00187 
00188   parser->RegisterDTD(dtd);
00189 
00190   rv = parser->Parse(inString, 0, NS_LossyConvertUCS2toASCII(inType), PR_FALSE, PR_TRUE);
00191   if (NS_FAILED(rv))
00192   {
00193     printf("Parse() failed! 0x%x\n", rv);
00194     return rv;
00195   }
00196   NS_RELEASE(parser);
00197 
00198   if (compareAgainst.Length() > 0)
00199     return Compare(outString, compareAgainst);
00200 
00201   char* charstar = ToNewUTF8String(outString);
00202   printf("Output string is:\n--------------------\n%s--------------------\n",
00203          charstar);
00204   delete[] charstar;
00205 
00206   return NS_OK;
00207 }
00208 
00209 //----------------------------------------------------------------------
00210 
00211 int main(int argc, char** argv)
00212 {
00213   nsString inType(NS_LITERAL_STRING("text/html"));
00214   nsString outType(NS_LITERAL_STRING("text/plain"));
00215   int wrapCol = 72;
00216   int flags = 0;
00217   nsString compareAgainst;
00218 
00219 
00220   // Skip over progname arg:
00221   const char* progname = argv[0];
00222   --argc; ++argv;
00223 
00224   // Process flags
00225   while (argc > 0 && argv[0][0] == '-')
00226   {
00227     switch (argv[0][1])
00228     {
00229       case 'h':
00230         printf("\
00231 Usage: %s [-i intype] [-o outtype] [-f flags] [-w wrapcol] [-c comparison_file] infile\n\
00232 \tIn/out types are mime types (e.g. text/html)\n\
00233 \tcomparison_file is a file against which to compare the output\n\
00234 \n\
00235 \tDefaults are -i text/html -o text/plain -f 0 -w 72 [stdin]\n",
00236                progname);
00237         exit(0);
00238 
00239         case 'i':
00240         if (argv[0][2] != '\0')
00241           inType.AssignWithConversion(argv[0]+2);
00242         else {
00243           inType.AssignWithConversion(argv[1]);
00244           --argc;
00245           ++argv;
00246         }
00247         break;
00248 
00249       case 'o':
00250         if (argv[0][2] != '\0')
00251           outType.AssignWithConversion(argv[0]+2);
00252         else {
00253           outType.AssignWithConversion(argv[1]);
00254           --argc;
00255           ++argv;
00256         }
00257         break;
00258 
00259       case 'w':
00260         if (isdigit(argv[0][2]))
00261           wrapCol = atoi(argv[0]+2);
00262         else {
00263           wrapCol = atoi(argv[1]);
00264           --argc;
00265           ++argv;
00266         }
00267         break;
00268 
00269       case 'f':
00270         if (isdigit(argv[0][2]))
00271           flags = atoi(argv[0]+2);
00272         else {
00273           flags = atoi(argv[1]);
00274           --argc;
00275           ++argv;
00276         }
00277         break;
00278 
00279       case 'c':
00280         if (argv[0][2] != '\0')
00281           compareAgainst.AssignWithConversion(argv[0]+2);
00282         else {
00283           compareAgainst.AssignWithConversion(argv[1]);
00284           --argc;
00285           ++argv;
00286         }
00287         break;
00288     }
00289     ++argv;
00290     --argc;
00291   }
00292 
00293   FILE* file = 0;
00294   if (argc > 0)         // read from a file
00295   {
00296     // Open the file in a Unix-centric way,
00297     // until I find out how to use nsFileSpec:
00298     file = fopen(argv[0], "r");
00299     if (!file)
00300     {
00301       fprintf(stderr, "Can't open file %s", argv[0]);
00302       perror(" ");
00303       exit(1);
00304     }
00305   }
00306   else
00307     file = stdin;
00308 
00309   nsresult ret;
00310   {
00311     nsCOMPtr<nsIServiceManager> servMan;
00312     NS_InitXPCOM2(getter_AddRefs(servMan), nsnull, nsnull);
00313     nsCOMPtr<nsIComponentRegistrar> registrar = do_QueryInterface(servMan);
00314     NS_ASSERTION(registrar, "Null nsIComponentRegistrar");
00315     registrar->AutoRegister(nsnull);
00316 
00317     // Read in the string: very inefficient, but who cares?
00318     nsString inString;
00319     int c;
00320     while ((c = getc(file)) != EOF)
00321       inString.Append(PRUnichar(c));
00322 
00323     if (file != stdin)
00324       fclose(file);
00325 
00326     ret = HTML2text(inString, inType, outType, flags, wrapCol, compareAgainst);
00327   } // this scopes the nsCOMPtrs
00328   // no nsCOMPtrs are allowed to be alive when you call NS_ShutdownXPCOM
00329   nsresult rv = NS_ShutdownXPCOM( NULL );
00330   NS_ASSERTION(NS_SUCCEEDED(rv), "NS_ShutdownXPCOM failed");
00331   return ret;
00332 }