Back to index

lightning-sunbird  0.9+nobinonly
nsSemanticUnitScanner.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include "nsSemanticUnitScanner.h"
00039 #include "prmem.h"
00040 
00041 NS_IMPL_ISUPPORTS1(nsSemanticUnitScanner, nsISemanticUnitScanner)
00042 
00043 nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker()
00044 {
00045   /* member initializers and constructor code */
00046 }
00047 
00048 nsSemanticUnitScanner::~nsSemanticUnitScanner()
00049 {
00050   /* destructor code */
00051 }
00052 
00053 
00054 /* void start (in string characterSet); */
00055 NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet)
00056 {
00057     // do nothing for now.
00058     return NS_OK;
00059 }
00060 
00061 /* void next (in wstring text, in long length, in long pos, out boolean hasMoreUnits, out long begin, out long end); */
00062 NS_IMETHODIMP nsSemanticUnitScanner::Next(const PRUnichar *text, PRInt32 length, PRInt32 pos, PRBool isLastBuffer, PRInt32 *begin, PRInt32 *end, PRBool *_retval)
00063 {
00064     // xxx need to bullet proff and check input pointer 
00065     //  make sure begin, end and _retval is not nsnull here
00066 
00067     // if we reach the end, just return
00068     if (pos >= length) {
00069        *begin = pos;
00070        *end = pos;
00071        *_retval = PR_FALSE;
00072        return NS_OK;
00073     }
00074 
00075     PRUint8 char_class = nsSampleWordBreaker::GetClass(text[pos]);
00076 
00077     // if we are in chinese mode, return one han letter at a time
00078     // we should not do this if we are in Japanese or Korean mode
00079     if (kWbClassHanLetter == char_class) {
00080        *begin = pos;
00081        *end = pos+1;
00082        *_retval = PR_TRUE;
00083        return NS_OK;
00084     }
00085 
00086     PRUint32 next;
00087     PRBool needMoreText;
00088     // find the next "word"
00089     nsresult res = NextWord(text, (PRUint32) length, (PRUint32) pos, 
00090         &next, &needMoreText);
00091 
00092     NS_ASSERTION(NS_SUCCEEDED(res), "nsSampleWordBreaker::Next failed");
00093     if (NS_FAILED(res)) 
00094         return res;
00095 
00096     // if we don't have enough text to make decision, return 
00097     if (needMoreText) {
00098        *begin = pos;
00099        *end = isLastBuffer ? length : pos;
00100        *_retval = isLastBuffer;
00101        return NS_OK;
00102     } 
00103     
00104     // if what we got is space or punct, look at the next break
00105     if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) {
00106         // if the next "word" is not letters, 
00107         // call itself recursively with the new pos
00108         return Next(text, length, next, isLastBuffer, begin, end, _retval);
00109     }
00110 
00111     // for the rest, return 
00112     *begin = pos;
00113     *end = next;
00114     *_retval = PR_TRUE;
00115     return NS_OK;
00116 }
00117