Back to index

lightning-sunbird  0.9+nobinonly
nsDebugRobot.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Pierre Phaneuf <pp@ludusdesign.com>
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 #include "nsIRobotSink.h"
00039 #include "nsIRobotSinkObserver.h"
00040 #include "nsIParser.h"
00041 #include "nsIDocShell.h"
00042 #include "nsIWebNavigation.h" 
00043 #include "nsIWebProgress.h"
00044 #include "nsIWebProgressListener.h"
00045 #include "nsWeakReference.h"
00046 #include "nsVoidArray.h"
00047 #include "nsString.h"
00048 #include "nsReadableUtils.h"
00049 #include "nsIURL.h"
00050 #include "nsIServiceManager.h"
00051 #include "nsIURL.h"
00052 #include "nsIIOService.h"
00053 #include "nsNetCID.h"
00054 #include "nsIComponentManager.h"
00055 #include "nsParserCIID.h"
00056 #include "nsIInterfaceRequestor.h"
00057 #include "nsIInterfaceRequestorUtils.h"
00058 
00059 static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID);
00060 
00061 class RobotSinkObserver : public nsIRobotSinkObserver {
00062 public:
00063   RobotSinkObserver() {
00064   }
00065 
00066   virtual ~RobotSinkObserver() {
00067   }
00068 
00069   NS_DECL_ISUPPORTS
00070 
00071   NS_IMETHOD ProcessLink(const nsString& aURLSpec);
00072   NS_IMETHOD VerifyDirectory (const char * verify_dir);
00073     
00074 };
00075 
00076 static nsVoidArray * g_workList;
00077 static nsVoidArray * g_duplicateList;
00078 static int g_iProcessed;
00079 static int g_iMaxProcess = 5000;
00080 static PRBool g_bHitTop;
00081 static PRBool g_bReadyForNextUrl;
00082 
00083 NS_IMPL_ISUPPORTS1(RobotSinkObserver, nsIRobotSinkObserver)
00084 
00085 NS_IMETHODIMP RobotSinkObserver::VerifyDirectory(const char * verify_dir)
00086 {
00087    return NS_OK;
00088 }
00089 
00090 NS_IMETHODIMP RobotSinkObserver::ProcessLink(const nsString& aURLSpec)
00091 {
00092   if (!g_bHitTop) {
00093      
00094      nsAutoString str;
00095      // Geez this is ugly. temporary hack to only process html files
00096      str.Truncate();
00097      nsString(aURLSpec).Right(str,1);
00098      if (!str.EqualsLiteral("/"))
00099      {
00100         str.Truncate();
00101         nsString(aURLSpec).Right(str,4);
00102         if (!str.EqualsLiteral("html"))
00103         {
00104            str.Truncate();
00105            nsString(aURLSpec).Right(str,3);
00106            if (!str.EqualsLiteral("htm"))
00107               return NS_OK;
00108         }
00109      }
00110      PRInt32 nCount = g_duplicateList->Count();
00111      if (nCount > 0)
00112      {
00113         for (PRInt32 n = 0; n < nCount; ++n)
00114         {
00115            nsString * pstr = (nsString *)g_duplicateList->ElementAt(n);
00116            if (pstr->Equals(aURLSpec)) {
00117               fputs ("Robot: (duplicate '",stdout);
00118               fputs (NS_LossyConvertUCS2toASCII(aURLSpec).get(),stdout);
00119               fputs ("')\n",stdout);
00120               return NS_OK;
00121            }
00122         }
00123      }
00124      g_duplicateList->AppendElement(new nsString(aURLSpec));
00125      str.Truncate();
00126      nsString(aURLSpec).Left(str,5);
00127      if (str.EqualsLiteral("http:")) {
00128         ++g_iProcessed;
00129         if (g_iProcessed == (g_iMaxProcess > 0 ? g_iMaxProcess-1 : 0))
00130            g_bHitTop = PR_TRUE;
00131         g_workList->AppendElement(new nsString(aURLSpec));
00132      }
00133      else {
00134         fputs ("Robot: (cannot process URL types '",stdout);
00135         fputs (NS_LossyConvertUCS2toASCII(aURLSpec).get(),stdout);
00136         fputs ("')\n",stdout);
00137      }
00138   }
00139   return NS_OK;
00140 }
00141 
00142 extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir);
00143 
00144 class CStreamListener:  public nsIWebProgressListener,
00145                         public nsSupportsWeakReference
00146 {
00147 public:
00148   CStreamListener() {
00149   }
00150 
00151   virtual ~CStreamListener() {
00152   }
00153 
00154   NS_DECL_ISUPPORTS
00155 
00156   // nsIWebProgressListener
00157   NS_DECL_NSIWEBPROGRESSLISTENER
00158 };
00159 
00160 // nsIWebProgressListener implementation
00161 NS_IMETHODIMP
00162 CStreamListener::OnStateChange(nsIWebProgress* aWebProgress, 
00163                    nsIRequest *aRequest, 
00164                    PRUint32 progressStateFlags, 
00165                    nsresult aStatus)
00166 {
00167     if (progressStateFlags & nsIWebProgressListener::STATE_IS_DOCUMENT)
00168         if (progressStateFlags & nsIWebProgressListener::STATE_STOP) {
00169             fputs("done.\n",stdout);
00170             g_bReadyForNextUrl = PR_TRUE;
00171         }
00172     return NS_OK;
00173 }
00174 
00175 NS_IMETHODIMP
00176 CStreamListener::OnProgressChange(nsIWebProgress *aWebProgress,
00177                                      nsIRequest *aRequest,
00178                                      PRInt32 aCurSelfProgress,
00179                                      PRInt32 aMaxSelfProgress,
00180                                      PRInt32 aCurTotalProgress,
00181                                      PRInt32 aMaxTotalProgress)
00182 {
00183     NS_NOTREACHED("notification excluded in AddProgressListener(...)");
00184     return NS_OK;
00185 }
00186 
00187 NS_IMETHODIMP
00188 CStreamListener::OnLocationChange(nsIWebProgress* aWebProgress,
00189                       nsIRequest* aRequest,
00190                       nsIURI *location)
00191 {
00192     NS_NOTREACHED("notification excluded in AddProgressListener(...)");
00193     return NS_OK;
00194 }
00195 
00196 
00197 NS_IMETHODIMP
00198 CStreamListener::OnStatusChange(nsIWebProgress* aWebProgress,
00199                     nsIRequest* aRequest,
00200                     nsresult aStatus,
00201                     const PRUnichar* aMessage)
00202 {
00203     NS_NOTREACHED("notification excluded in AddProgressListener(...)");
00204     return NS_OK;
00205 }
00206 
00207 
00208 NS_IMETHODIMP
00209 CStreamListener::OnSecurityChange(nsIWebProgress *aWebProgress, 
00210                       nsIRequest *aRequest, 
00211                       PRUint32 state)
00212 {
00213     NS_NOTREACHED("notification excluded in AddProgressListener(...)");
00214     return NS_OK;
00215 }
00216 
00217 NS_IMPL_ISUPPORTS2(CStreamListener,
00218                    nsIWebProgressListener,
00219                    nsISupportsWeakReference)
00220 
00221 extern "C" NS_EXPORT void DumpVectorRecord(void);
00222 //----------------------------------------------------------------------
00223 extern "C" NS_EXPORT int DebugRobot(
00224    nsVoidArray * workList, 
00225    nsIDocShell * docShell, 
00226    int iMaxLoads, 
00227    char * verify_dir,
00228    void (*yieldProc )(const char *)
00229    )
00230 {
00231   int iCount = 1;
00232   CStreamListener * pl = new CStreamListener; 
00233   NS_ADDREF(pl);
00234 
00235   if (nsnull==workList)
00236      return -1;
00237   g_iMaxProcess = iMaxLoads;
00238   g_iProcessed = 0;
00239   g_bHitTop = PR_FALSE;
00240   g_duplicateList = new nsVoidArray();
00241   RobotSinkObserver* myObserver = new RobotSinkObserver();
00242   NS_ADDREF(myObserver);
00243   g_workList = workList;
00244 
00245   for (;;) {
00246     PRInt32 n = g_workList->Count();
00247     if (0 == n) {
00248       break;
00249     }
00250     nsString* urlName = (nsString*) g_workList->ElementAt(n - 1);
00251     g_workList->RemoveElementAt(n - 1);
00252 
00253     // Create url
00254     nsIURI* url;
00255     nsresult rv;
00256     nsCOMPtr<nsIIOService> service(do_GetService(kIOServiceCID, &rv));
00257     if (NS_FAILED(rv)) return rv;
00258 
00259     nsIURI *uri = nsnull;
00260     NS_ConvertUCS2toUTF8 uriStr(*urlName);
00261     rv = service->NewURI(uriStr, nsnull, nsnull, &uri);
00262     if (NS_FAILED(rv)) return rv;
00263 
00264     rv = uri->QueryInterface(NS_GET_IID(nsIURI), (void**)&url);
00265     NS_RELEASE(uri);
00266     if (NS_OK != rv) {
00267       printf("invalid URL: '");
00268       fputs(uriStr.get(), stdout);
00269       printf("'\n");
00270       NS_RELEASE(myObserver);
00271       return -1;
00272     }
00273 
00274     char str_num[25];
00275     sprintf (str_num,"%d",iCount++);
00276     fputs ("Robot: parsing(",stdout);
00277     fputs (str_num,stdout);
00278     fputs (") ",stdout);
00279     fputs (NS_LossyConvertUCS2toASCII(*urlName).get(),stdout);
00280     fputs ("...",stdout);
00281 
00282     delete urlName;
00283 
00284     nsIParser* parser;
00285 
00286     static NS_DEFINE_IID(kCParserIID, NS_IPARSER_IID);
00287     static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
00288 
00289     rv = CallCreateInstance(kCParserCID, &parser);
00290     if (NS_FAILED(rv)) {
00291       printf("can't make parser\n");
00292       NS_RELEASE(myObserver);
00293       return -1;
00294     }
00295 
00296     nsIRobotSink* sink;
00297     rv = NS_NewRobotSink(&sink);
00298     if (NS_OK != rv) {
00299       printf("can't make parser\n");
00300       NS_RELEASE(myObserver);
00301       return -1;
00302     }
00303     sink->Init(url);
00304     sink->AddObserver(myObserver);
00305 
00306     parser->SetContentSink(sink);
00307     g_bReadyForNextUrl = PR_FALSE;  
00308 
00309     parser->Parse(url, nsnull,PR_TRUE);/* XXX hook up stream listener here! */
00310     while (!g_bReadyForNextUrl) {
00311       if (yieldProc != NULL) {
00312         nsCAutoString spec;
00313         (void)url->GetSpec(spec);
00314         (*yieldProc)(spec.get());
00315       }
00316     }
00317     g_bReadyForNextUrl = PR_FALSE;
00318     if (docShell) {
00319       nsCOMPtr<nsIWebProgress> progress(do_GetInterface(docShell, &rv));
00320       if (NS_FAILED(rv)) return rv;
00321 
00322       (void) progress->AddProgressListener(pl, nsIWebProgress::NOTIFY_STATE_DOCUMENT);
00323 
00324       nsCAutoString spec;
00325       (void)url->GetSpec(spec);
00326       NS_ConvertUTF8toUCS2 theSpec(spec);
00327       nsCOMPtr<nsIWebNavigation> webNav(do_QueryInterface(docShell));
00328       webNav->LoadURI(theSpec.get(),
00329                       nsIWebNavigation::LOAD_FLAGS_NONE,
00330                       nsnull,
00331                       nsnull,
00332                       nsnull);/* XXX hook up stream listener here! */
00333       while (!g_bReadyForNextUrl) {
00334         if (yieldProc != NULL) {
00335           (void)url->GetSpec(spec);
00336           (*yieldProc)(spec.get());
00337         }
00338       }
00339     }  
00340 
00341     NS_RELEASE(sink);
00342     NS_RELEASE(parser);
00343     NS_RELEASE(url);
00344   }
00345 
00346   fputs ("Robot completed.\n", stdout);
00347 
00348   NS_RELEASE(pl);
00349   NS_RELEASE(myObserver);
00350 
00351   return 0;
00352 }