Back to index

lightning-sunbird  0.9+nobinonly
nsSpellCheckUtils.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
00002  *
00003  * ***** BEGIN LICENSE BLOCK *****
00004  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00005  *
00006  * The contents of this file are subject to the Mozilla Public License Version
00007  * 1.1 (the "License"); you may not use this file except in compliance with
00008  * the License. You may obtain a copy of the License at
00009  * http://www.mozilla.org/MPL/
00010  *
00011  * Software distributed under the License is distributed on an "AS IS" basis,
00012  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00013  * for the specific language governing rights and limitations under the
00014  * License.
00015  *
00016  * The Original Code is the Mozilla browser.
00017  *
00018  * The Initial Developer of the Original Code is
00019  * Netscape Communications, Inc.
00020  * Portions created by the Initial Developer are Copyright (C) 1999
00021  * the Initial Developer. All Rights Reserved.
00022  *
00023  * Contributor(s):
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either the GNU General Public License Version 2 or later (the "GPL"), or
00027  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 #include <stdio.h>
00040 #include "nsSpellCheckUtils.h"
00041 
00042 #include "nsReadableUtils.h"
00043 #include "nsICharsetConverterManager.h"
00044 #include "nsIPlatformCharset.h"
00045 #include "nsIServiceManager.h"
00046 
00047 #include "nsISpellChecker.h"
00048 #include "nsITextServicesDocument.h"
00049 
00050 #include "nsIServiceManager.h"
00051 #include "nsIWordBreakerFactory.h" // nsIWordBreaker
00052 #include "nsLWBrkCIID.h"
00053 
00054 
00055 /* XXX The platform-specific #defines of IS_NSBSP_CHAR are unnecessary and
00056  *     inaccurate. We should be doing the whitespace conversion on the UTF-16
00057  *     buffer before the call to aUnicodeEncoder->Convert().
00058  *
00059  * XXX Should we be converting any other whitespace characters to spaces?
00060  *
00061  *     See comments in bug 211343.
00062  */
00063 #ifdef XP_MAC
00064 #define IS_NBSP_CHAR(c) (((unsigned char)0xca)==(c))
00065 #else
00066 #define IS_NBSP_CHAR(c) (((unsigned char)0xa0)==(c))
00067 #endif
00068 
00069 nsresult
00070 nsSpellCheckUtils::ReadStringIntoBuffer(nsIUnicodeEncoder* aUnicodeEncoder,
00071                                         const PRUnichar*   aStr, 
00072                                         CharBuffer*        aBuf)
00073 {
00074   NS_ENSURE_ARG_POINTER(aUnicodeEncoder);
00075   NS_ENSURE_ARG_POINTER(aBuf);
00076 
00077   if (!aStr || !*aStr) {
00078     return NS_OK;
00079   }
00080 
00081   aBuf->mDataLength = 0;
00082 
00083   PRInt32 unicodeLength = nsCRT::strlen(aStr);
00084 
00085   // Estimate a string length after the conversion.
00086   PRInt32 estimatedLength, stringLength;
00087   nsresult result = aUnicodeEncoder->GetMaxLength(aStr, unicodeLength, &estimatedLength);
00088   NS_ENSURE_SUCCESS(result, result);
00089 
00090   result = aBuf->AssureCapacity(estimatedLength + 1);
00091   NS_ENSURE_SUCCESS(result, result);
00092 
00093   // Convert from unicode.
00094   stringLength = estimatedLength;
00095   result = aUnicodeEncoder->Convert(aStr, &unicodeLength, aBuf->mData, &stringLength);
00096   NS_ENSURE_SUCCESS(result, result);
00097 
00098   // Terminate the conversion (e.g. put escape sequence for JIS).
00099   PRInt32 finLen = estimatedLength - stringLength;
00100   if (finLen)
00101   {
00102     NS_ENSURE_TRUE(finLen > 0, NS_ERROR_FAILURE);
00103     result = aUnicodeEncoder->Finish(&aBuf->mData[stringLength], &finLen);
00104     NS_ENSURE_SUCCESS(result, result);
00105   }
00106 
00107   aBuf->mDataLength = stringLength + finLen;
00108   aBuf->mData[aBuf->mDataLength] = '\0';
00109   for (unsigned char* p = (unsigned char*) aBuf->mData; *p ; p++)
00110     if( IS_NBSP_CHAR(*p) )  
00111       *p = ' '; 
00112 
00113   return NS_OK;
00114 }
00115 
00116 nsresult
00117 nsSpellCheckUtils::ReadStringIntoBuffer(nsIUnicodeEncoder* aUnicodeEncoder,
00118                                         const nsString*    aStr, 
00119                                         CharBuffer*        aBuf)
00120 {
00121   NS_ENSURE_ARG_POINTER(aUnicodeEncoder);
00122   NS_ENSURE_ARG_POINTER(aStr);
00123   NS_ENSURE_ARG_POINTER(aBuf);
00124   return ReadStringIntoBuffer(aUnicodeEncoder, aStr->get(), aBuf);
00125 }
00126 
00127 nsresult
00128 nsSpellCheckUtils::CreateUnicodeConverters(const PRUnichar*    aCharset,
00129                                            nsIUnicodeEncoder** aUnicodeEncoder,
00130                                            nsIUnicodeDecoder** aUnicodeDecoder)
00131 {
00132   NS_ENSURE_ARG_POINTER(aCharset);
00133   NS_ENSURE_ARG_POINTER(aUnicodeEncoder);
00134   NS_ENSURE_ARG_POINTER(aUnicodeDecoder);
00135 
00136   nsresult rv;
00137 
00138   nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
00139   NS_ENSURE_SUCCESS(rv, rv);
00140 
00141   NS_LossyConvertUCS2toASCII charset(aCharset);
00142 
00143   rv = ccm->GetUnicodeDecoder(charset.get(), aUnicodeDecoder);
00144   NS_ENSURE_SUCCESS(rv, rv);
00145 
00146   rv = ccm->GetUnicodeEncoder(charset.get(), aUnicodeEncoder);
00147   NS_ENSURE_SUCCESS(rv, rv);
00148 
00149   // Set the error behavior, in case a character cannot be mapped.
00150   rv = (*aUnicodeEncoder)->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nsnull, ' ');
00151 
00152   return rv;
00153 }
00154 
00155 nsresult
00156 nsSpellCheckUtils::LoadTextBlockIntoBuffer(nsITextServicesDocument* aTxtSvcDoc,
00157                                            nsISpellChecker*        aSpellChecker,
00158                                            CharBuffer&             aCharBuf,
00159                                            nsString&               aText, 
00160                                            PRUint32&               aOffset)
00161 {
00162   NS_ENSURE_ARG_POINTER(aTxtSvcDoc);
00163   NS_ENSURE_ARG_POINTER(aSpellChecker);
00164 
00165   nsCOMPtr<nsIUnicodeEncoder> unicodeEncoder;
00166   nsXPIDLString charSet;
00167   nsresult result = aSpellChecker->GetCharset(getter_Copies(charSet));
00168   if (NS_SUCCEEDED(result)) 
00169   {
00170     nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
00171     result = nsSpellCheckUtils::CreateUnicodeConverters(charSet, 
00172                                                         getter_AddRefs(unicodeEncoder), 
00173                                                         getter_AddRefs(unicodeDecoder));
00174     NS_ENSURE_TRUE(unicodeEncoder, NS_ERROR_NULL_POINTER);
00175   }
00176 
00177   if (aCharBuf.mData)
00178     aCharBuf.mData[0]  = '\0';
00179 
00180   nsString str;
00181   aCharBuf.mDataLength = 0;
00182 
00183   result = aTxtSvcDoc->GetCurrentTextBlock(&str);
00184   NS_ENSURE_SUCCESS(result, result);
00185 
00186   result = nsSpellCheckUtils::ReadStringIntoBuffer(unicodeEncoder, &str, &aCharBuf);
00187   NS_ENSURE_SUCCESS(result, result);
00188 
00189   if (aCharBuf.mDataLength < 1)
00190   {
00191     // The document could be empty, so return
00192     // NS_OK!
00193     return NS_OK;
00194   }
00195 
00196   aText.AssignWithConversion(aCharBuf.mData);
00197 
00198   return NS_OK;
00199 }
00200 
00201 nsresult 
00202 nsSpellCheckUtils::GetWordBreaker(nsIWordBreaker** aResult) 
00203 {
00204   NS_ENSURE_ARG_POINTER(aResult);
00205 
00206   // no line breaker, find a default one
00207   nsresult result;
00208   nsCOMPtr<nsIWordBreakerFactory> wbf(do_GetService(NS_LWBRK_CONTRACTID, &result));
00209   if (NS_SUCCEEDED(result)) 
00210   {
00211     nsAutoString wbarg;
00212     result = wbf->GetBreaker(wbarg, aResult);
00213     NS_IF_ADDREF(*aResult);
00214   }
00215   return result;
00216 }
00217 
00218 #ifdef NS_DEBUG
00219 nsresult
00220 nsSpellCheckUtils::DumpWords(nsIWordBreaker* aWordBreaker, 
00221                              const PRUnichar*      aText, 
00222                              const PRUint32&       aTextLen)
00223 {
00224   PRUint32 offset = 0;
00225   PRUint32 wlen   = 0;
00226   for (int i=0;i<7;i++) printf("**********");
00227   printf("\n");
00228   for (i=0;i<7;i++) printf("0123456789");
00229   printf("\n");
00230   char* line = strdup(NS_LossyConvertUCS2toASCII(aText).get());
00231   for (i=0;i<aTextLen;i++)
00232     if (line[i] < 32) 
00233       putc('_', stdout);
00234     else 
00235       putc(line[i], stdout);
00236   printf("\n");
00237   //printf("%s\n", NS_LossyConvertUCS2toASCII(aText).get());
00238   free(line);
00239 
00240   while (offset < aTextLen) 
00241   {
00242     PRUint32 begin = 0;
00243     PRUint32 end   = 0;
00244     nsresult result = aWordBreaker->FindWord(aText, aTextLen, offset, &begin, &end);
00245     NS_ENSURE_SUCCESS(result, result);
00246     wlen = end - begin;
00247     printf("%d  %d  l:%d ", begin, end, wlen);
00248     const PRUnichar* start = (const PRUnichar*)(aText+offset);
00249     PRUnichar* word = nsCRT::strndup(start, wlen);
00250     nsString str(word);
00251     printf("[%s]\n", NS_LossyConvertUCS2toASCII(str).get());
00252     nsMemory::Free(word);
00253     offset = end;
00254   }
00255   for (i=0;i<7;i++) printf("**********");
00256   printf("\n");
00257   return NS_OK;
00258 }
00259 #endif