Back to index

lightning-sunbird  0.9+nobinonly
nsCyrillicDetector.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Pierre Phaneuf <pp@ludusdesign.com>
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 #include "nscore.h"
00039 #include "nsCyrillicProb.h"
00040 #include <stdio.h>
00041 
00042 #include "nsCOMPtr.h"
00043 #include "nsISupports.h"
00044 #include "nsICharsetDetector.h"
00045 #include "nsCharDetDll.h"
00046 #include "nsCyrillicDetector.h"
00047 
00048 //----------------------------------------------------------------------
00049 // Interface nsISupports [implementation]
00050 NS_IMPL_ISUPPORTS1(nsCyrXPCOMDetector, nsICharsetDetector)
00051 NS_IMPL_ISUPPORTS1(nsCyrXPCOMStringDetector, nsIStringCharsetDetector)
00052 
00053 void nsCyrillicDetector::HandleData(const char* aBuf, PRUint32 aLen)
00054 {
00055    PRUint8 cls;
00056    const char* b;
00057    PRUint32 i;
00058    if(mDone) 
00059       return;
00060    for(i=0, b=aBuf;i<aLen;i++,b++)
00061    {
00062      for(PRUintn j=0;j<mItems;j++)
00063      {
00064         if( 0x80 & *b)
00065            cls = mCyrillicClass[j][(*b) & 0x7F];
00066         else 
00067            cls = 0;
00068         NS_ASSERTION( cls <= 32 , "illegal character class");
00069         mProb[j] += gCyrillicProb[mLastCls[j]][cls];
00070         mLastCls[j] = cls;
00071      } 
00072    }
00073    // We now only based on the first block we receive
00074    DataEnd();
00075 }
00076 
00077 //---------------------------------------------------------------------
00078 #define THRESHOLD_RATIO 1.5f
00079 void nsCyrillicDetector::DataEnd()
00080 {
00081    PRUint32 max=0;
00082    PRUint8  maxIdx=0;
00083    PRUint8 j;
00084    if(mDone) 
00085       return;
00086    for(j=0;j<mItems;j++) {
00087       if(mProb[j] > max)
00088       {
00089            max = mProb[j];
00090            maxIdx= j;
00091       }
00092    }
00093 
00094    if( 0 == max ) // if we didn't get any 8 bits data 
00095      return;
00096 
00097 #ifdef DEBUG
00098    for(j=0;j<mItems;j++) 
00099       printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]);
00100 #endif
00101    this->Report(mCharsets[maxIdx]);
00102    mDone = PR_TRUE;
00103 }
00104 
00105 //---------------------------------------------------------------------
00106 nsCyrXPCOMDetector:: nsCyrXPCOMDetector(PRUint8 aItems, 
00107                       const PRUint8 ** aCyrillicClass, 
00108                       const char **aCharsets)
00109             : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
00110 {
00111     mObserver = nsnull;
00112 }
00113 
00114 //---------------------------------------------------------------------
00115 nsCyrXPCOMDetector::~nsCyrXPCOMDetector() 
00116 {
00117 }
00118 
00119 //---------------------------------------------------------------------
00120 NS_IMETHODIMP nsCyrXPCOMDetector::Init(
00121   nsICharsetDetectionObserver* aObserver)
00122 {
00123   NS_ASSERTION(mObserver == nsnull , "Init twice");
00124   if(nsnull == aObserver)
00125      return NS_ERROR_ILLEGAL_VALUE;
00126 
00127   mObserver = aObserver;
00128   return NS_OK;
00129 }
00130 
00131 //----------------------------------------------------------
00132 NS_IMETHODIMP nsCyrXPCOMDetector::DoIt(
00133   const char* aBuf, PRUint32 aLen, PRBool* oDontFeedMe)
00134 {
00135   NS_ASSERTION(mObserver != nsnull , "have not init yet");
00136 
00137   if((nsnull == aBuf) || (nsnull == oDontFeedMe))
00138      return NS_ERROR_ILLEGAL_VALUE;
00139 
00140   this->HandleData(aBuf, aLen);
00141   *oDontFeedMe = PR_FALSE;
00142   return NS_OK;
00143 }
00144 
00145 //----------------------------------------------------------
00146 NS_IMETHODIMP nsCyrXPCOMDetector::Done()
00147 {
00148   NS_ASSERTION(mObserver != nsnull , "have not init yet");
00149   this->DataEnd();
00150   return NS_OK;
00151 }
00152 
00153 //----------------------------------------------------------
00154 void nsCyrXPCOMDetector::Report(const char* aCharset)
00155 {
00156   NS_ASSERTION(mObserver != nsnull , "have not init yet");
00157   mObserver->Notify(aCharset, eBestAnswer);
00158 }
00159 
00160 //---------------------------------------------------------------------
00161 nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(PRUint8 aItems, 
00162                       const PRUint8 ** aCyrillicClass, 
00163                       const char **aCharsets)
00164             : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
00165 {
00166 }
00167 
00168 //---------------------------------------------------------------------
00169 nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector() 
00170 {
00171 }
00172 
00173 //---------------------------------------------------------------------
00174 void nsCyrXPCOMStringDetector::Report(const char *aCharset) 
00175 {
00176    mResult = aCharset;
00177 }
00178 
00179 //---------------------------------------------------------------------
00180 NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, PRUint32 aLen, 
00181                      const char** oCharset, nsDetectionConfident &oConf)
00182 {
00183    mResult = nsnull;
00184    mDone = PR_FALSE;
00185    this->HandleData(aBuf, aLen); 
00186    this->DataEnd();
00187    *oCharset=mResult;
00188    oConf = eBestAnswer;
00189    return NS_OK;
00190 }
00191        
00192