Back to index

lightning-sunbird  0.9+nobinonly
nsUnicodeNormalizer.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
00002 
00003 /* This file is modified from JPNIC's mDNKit, it is under both MPL and 
00004  * JPNIC's license.
00005  */
00006 
00007  /* ***** BEGIN LICENSE BLOCK *****
00008  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00009  *
00010  * The contents of this file are subject to the Mozilla Public License Version
00011  * 1.1 (the "License"); you may not use this file except in compliance with
00012  * the License. You may obtain a copy of the License at
00013  * http://www.mozilla.org/MPL/
00014  *
00015  * Software distributed under the License is distributed on an "AS IS" basis,
00016  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00017  * for the specific language governing rights and limitations under the
00018  * License.
00019  *
00020  * The Original Code is Unicode case conversion helpers.
00021  *
00022  * The Initial Developer of the Original Code is
00023  * Netscape Communications Corp..
00024  * Portions created by the Initial Developer are Copyright (C) 2002
00025  * the Initial Developer. All Rights Reserved.
00026  *
00027  * Contributor(s):
00028  *
00029  * Alternatively, the contents of this file may be used under the terms of
00030  * either the GNU General Public License Version 2 or later (the "GPL"), or
00031  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00032  * in which case the provisions of the GPL or the LGPL are applicable instead
00033  * of those above. If you wish to allow use of your version of this file only
00034  * under the terms of either the GPL or the LGPL, and not to allow others to
00035  * use your version of this file under the terms of the MPL, indicate your
00036  * decision by deleting the provisions above and replace them with the notice
00037  * and other provisions required by the GPL or the LGPL. If you do not delete
00038  * the provisions above, a recipient may use your version of this file under
00039  * the terms of any one of the MPL, the GPL or the LGPL.
00040  *
00041  * ***** END LICENSE BLOCK ***** */
00042 
00043 /*
00044  * Copyright (c) 2000,2002 Japan Network Information Center.
00045  * All rights reserved.
00046  *  
00047  * By using this file, you agree to the terms and conditions set forth bellow.
00048  * 
00049  *                   LICENSE TERMS AND CONDITIONS 
00050  * 
00051  * The following License Terms and Conditions apply, unless a different
00052  * license is obtained from Japan Network Information Center ("JPNIC"),
00053  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
00054  * Chiyoda-ku, Tokyo 101-0047, Japan.
00055  * 
00056  * 1. Use, Modification and Redistribution (including distribution of any
00057  *    modified or derived work) in source and/or binary forms is permitted
00058  *    under this License Terms and Conditions.
00059  * 
00060  * 2. Redistribution of source code must retain the copyright notices as they
00061  *    appear in each source code file, this License Terms and Conditions.
00062  * 
00063  * 3. Redistribution in binary form must reproduce the Copyright Notice,
00064  *    this License Terms and Conditions, in the documentation and/or other
00065  *    materials provided with the distribution.  For the purposes of binary
00066  *    distribution the "Copyright Notice" refers to the following language:
00067  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
00068  * 
00069  * 4. The name of JPNIC may not be used to endorse or promote products
00070  *    derived from this Software without specific prior written approval of
00071  *    JPNIC.
00072  * 
00073  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
00074  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00075  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00076  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
00077  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00078  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00079  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
00080  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00081  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
00082  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
00083  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
00084  */
00085 
00086 #include <stdlib.h>
00087 #include <string.h>
00088 
00089 #include "nsUnicharUtils.h"
00090 #include "nsMemory.h"
00091 #include "nsCRT.h"
00092 #include "nsUnicodeNormalizer.h"
00093 #include "nsString.h"
00094 #include "nsReadableUtils.h"
00095 
00096 NS_IMPL_ISUPPORTS1(nsUnicodeNormalizer, nsIUnicodeNormalizer)
00097 
00098 
00099 nsUnicodeNormalizer::nsUnicodeNormalizer()
00100 {
00101 }
00102 
00103 nsUnicodeNormalizer::~nsUnicodeNormalizer()
00104 {
00105 }
00106 
00107 
00108 
00109 #define NS_ERROR_UNORM_MOREOUTPUT  \
00110         NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_GENERAL, 0x21)
00111 
00112 #define NS_SUCCESS_UNORM_NOTFOUND  \
00113         NS_ERROR_GENERATE_SUCCESS(NS_ERROR_MODULE_GENERAL, 0x11)
00114 
00115 
00116 #define END_BIT             0x80000000
00117 
00118 
00119 /*
00120  * Some constants for Hangul decomposition/composition.
00121  * These things were taken from unicode book. 
00122  */
00123 #define SBase        0xac00
00124 #define LBase        0x1100
00125 #define VBase        0x1161
00126 #define TBase        0x11a7
00127 #define LCount              19
00128 #define VCount              21
00129 #define TCount              28
00130 #define SLast        (SBase + LCount * VCount * TCount)
00131 
00132 struct composition {
00133        PRUint32 c2;  /* 2nd character */
00134        PRUint32 comp;       /* composed character */
00135 };
00136 
00137 
00138 #include "normalization_data.h"
00139 
00140 /*
00141  * Macro for multi-level index table.
00142  */
00143 #define LOOKUPTBL(vprefix, mprefix, v) \
00144        DMAP(vprefix)[\
00145               IMAP(vprefix)[\
00146                      IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\
00147               ]\
00148        ].tbl[IDX2(mprefix, v)]
00149 
00150 #define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix))
00151 #define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix))
00152 #define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix))
00153 
00154 #define IDX_0(v, bits1, bits2)     ((v) >> ((bits1) + (bits2)))
00155 #define IDX_1(v, bits1, bits2)     (((v) >> (bits2)) & ((1 << (bits1)) - 1))
00156 #define IDX_2(v, bits1, bits2)     ((v) & ((1 << (bits2)) - 1))
00157 
00158 #define BITS1(mprefix)      mprefix ## _BITS_1
00159 #define BITS2(mprefix)      mprefix ## _BITS_2
00160 
00161 #define IMAP(vprefix)       vprefix ## _imap
00162 #define DMAP(vprefix)       vprefix ## _table
00163 #define SEQ(vprefix) vprefix ## _seq
00164 
00165 static PRInt32
00166 canonclass(PRUint32 c) {
00167        /* Look up canonicalclass table. */
00168        return (LOOKUPTBL(canon_class, CANON_CLASS, c));
00169 }
00170 
00171 static PRInt32
00172 decompose_char(PRUint32 c, const PRUint32 **seqp)
00173 {
00174        /* Look up decomposition table. */
00175        PRInt32 seqidx = LOOKUPTBL(decompose, DECOMP, c);
00176        *seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT);
00177        return (seqidx);
00178 }
00179 
00180 static PRInt32
00181 compose_char(PRUint32 c,
00182                             const struct composition **compp)
00183 {
00184        /* Look up composition table. */
00185        PRInt32 seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c);
00186        *compp = SEQ(compose) + (seqidx & 0xffff);
00187        return (seqidx >> 16);
00188 }
00189 
00190 static nsresult
00191 mdn__unicode_decompose(PRInt32 compat, PRUint32 *v, size_t vlen,
00192                      PRUint32 c, PRInt32 *decomp_lenp)
00193 {
00194        PRUint32 *vorg = v;
00195        PRInt32 seqidx;
00196        const PRUint32 *seq;
00197 
00198        //assert(v != NULL && vlen >= 0 && decomp_lenp != NULL);
00199 
00200        /*
00201         * First, check for Hangul.
00202         */
00203        if (SBase <= c && c < SLast) {
00204               PRInt32 idx, t_offset, v_offset, l_offset;
00205 
00206               idx = c - SBase;
00207               t_offset = idx % TCount;
00208               idx /= TCount;
00209               v_offset = idx % VCount;
00210               l_offset = idx / VCount;
00211               if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))
00212                      return (NS_ERROR_UNORM_MOREOUTPUT);
00213               *v++ = LBase + l_offset;
00214               *v++ = VBase + v_offset;
00215               if (t_offset > 0)
00216                      *v++ = TBase + t_offset;
00217               *decomp_lenp = v - vorg;
00218               return (NS_OK);
00219        }
00220 
00221        /*
00222         * Look up decomposition table.  If no decomposition is defined
00223         * or if it is a compatibility decomosition when canonical
00224         * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'.
00225         */
00226        seqidx = decompose_char(c, &seq);
00227        if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0))
00228               return (NS_SUCCESS_UNORM_NOTFOUND);
00229        
00230        /*
00231         * Copy the decomposed sequence.  The end of the sequence are
00232         * marked with END_BIT.
00233         */
00234        do {
00235               PRUint32 c;
00236               PRInt32 dlen;
00237               nsresult r;
00238 
00239               c = *seq & ~END_BIT;
00240 
00241               /* Decompose recursively. */
00242               r = mdn__unicode_decompose(compat, v, vlen, c, &dlen);
00243               if (r == NS_OK) {
00244                      v += dlen;
00245                      vlen -= dlen;
00246               } else if (r == NS_SUCCESS_UNORM_NOTFOUND) {
00247                      if (vlen < 1)
00248                             return (NS_ERROR_UNORM_MOREOUTPUT);
00249                      *v++ = c;
00250                      vlen--;
00251               } else {
00252                      return (r);
00253               }
00254 
00255        } while ((*seq++ & END_BIT) == 0);
00256        
00257        *decomp_lenp = v - vorg;
00258 
00259        return (NS_OK);
00260 }
00261 
00262 static PRInt32
00263 mdn__unicode_iscompositecandidate(PRUint32 c)
00264 {
00265        const struct composition *dummy;
00266 
00267        /* Check for Hangul */
00268        if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast))
00269               return (1);
00270 
00271        /*
00272         * Look up composition table.  If there are no composition
00273         * that begins with the given character, it is not a
00274         * composition candidate.
00275         */
00276        if (compose_char(c, &dummy) == 0)
00277               return (0);
00278        else
00279               return (1);
00280 }
00281 
00282 static nsresult
00283 mdn__unicode_compose(PRUint32 c1, PRUint32 c2, PRUint32 *compp)
00284 {
00285        PRInt32 n;
00286        PRInt32 lo, hi;
00287        const struct composition *cseq;
00288 
00289        //assert(compp != NULL);
00290 
00291        /*
00292         * Check for Hangul.
00293         */
00294        if (LBase <= c1 && c1 < LBase + LCount &&
00295            VBase <= c2 && c2 < VBase + VCount) {
00296               /*
00297                * Hangul L and V.
00298                */
00299               *compp = SBase +
00300                      ((c1 - LBase) * VCount + (c2 - VBase)) * TCount;
00301               return (NS_OK);
00302        } else if (SBase <= c1 && c1 < SLast &&
00303                  TBase <= c2 && c2 < TBase + TCount &&
00304                  (c1 - SBase) % TCount == 0) {
00305               /*
00306                * Hangul LV and T.
00307                */
00308               *compp = c1 + (c2 - TBase);
00309               return (NS_OK);
00310        }
00311 
00312        /*
00313         * Look up composition table.  If the result is 0, no composition
00314         * is defined.  Otherwise, upper 16bits of the result contains
00315         * the number of composition that begins with 'c1', and the lower
00316         * 16bits is the offset in 'compose_seq'.
00317         */
00318        if ((n = compose_char(c1, &cseq)) == 0)
00319               return (NS_SUCCESS_UNORM_NOTFOUND);
00320 
00321        /*
00322         * The composite sequences are sorted by the 2nd character 'c2'.
00323         * So we can use binary search.
00324         */
00325        lo = 0;
00326        hi = n - 1;
00327        while (lo <= hi) {
00328               PRInt32 mid = (lo + hi) / 2;
00329 
00330               if (cseq[mid].c2 < c2) {
00331                      lo = mid + 1;
00332               } else if (cseq[mid].c2 > c2) {
00333                      hi = mid - 1;
00334               } else {
00335                      *compp = cseq[mid].comp;
00336                      return (NS_OK);
00337               }
00338        }
00339        return (NS_SUCCESS_UNORM_NOTFOUND);
00340 }
00341 
00342 
00343 #define WORKBUF_SIZE        128
00344 #define WORKBUF_SIZE_MAX    10000
00345 
00346 typedef struct {
00347        PRInt32 cur;         /* pointing now processing character */
00348        PRInt32 last;        /* pointing just after the last character */
00349        PRInt32 size;        /* size of UCS and CLASS array */
00350        PRUint32 *ucs;       /* UCS-4 characters */
00351        PRInt32 *cclass;            /* and their canonical classes */
00352        PRUint32 ucs_buf[WORKBUF_SIZE];    /* local buffer */
00353        PRInt32 class_buf[WORKBUF_SIZE];          /* ditto */
00354 } workbuf_t;
00355 
00356 static nsresult      decompose(workbuf_t *wb, PRUint32 c, PRInt32 compat);
00357 static void          get_class(workbuf_t *wb);
00358 static void          reorder(workbuf_t *wb);
00359 static void          compose(workbuf_t *wb);
00360 static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr);
00361 static void          workbuf_init(workbuf_t *wb);
00362 static void          workbuf_free(workbuf_t *wb);
00363 static nsresult      workbuf_extend(workbuf_t *wb);
00364 static nsresult      workbuf_append(workbuf_t *wb, PRUint32 c);
00365 static void          workbuf_shift(workbuf_t *wb, PRInt32 shift);
00366 static void          workbuf_removevoid(workbuf_t *wb);
00367 
00368 
00369 static nsresult
00370 mdn_normalize(PRBool do_composition, PRBool compat,
00371          const nsAString& aSrcStr, nsAString& aToStr)
00372 {
00373        workbuf_t wb;
00374        nsresult r = NS_OK;
00375        /*
00376         * Initialize working buffer.
00377         */
00378        workbuf_init(&wb);
00379 
00380        nsAString::const_iterator start, end;
00381        aSrcStr.BeginReading(start); 
00382        aSrcStr.EndReading(end); 
00383 
00384        while (start != end) {
00385               PRUint32 c;
00386               PRUnichar curChar;
00387 
00388               //assert(wb.cur == wb.last);
00389 
00390               /*
00391                * Get one character from 'from'.
00392                */
00393               curChar= *start++;
00394 
00395               if (IS_HIGH_SURROGATE(curChar) && start != end && IS_LOW_SURROGATE(*(start)) ) {
00396                      c = SURROGATE_TO_UCS4(curChar, *start);
00397                      ++start;
00398               } else {
00399                      c = curChar;
00400               }
00401 
00402               /*
00403                * Decompose it.
00404                */
00405               if ((r = decompose(&wb, c, compat)) != NS_OK)
00406                      break;
00407 
00408               /*
00409                * Get canonical class.
00410                */
00411               get_class(&wb);
00412 
00413               /*
00414                * Reorder & compose.
00415                */
00416               for (; wb.cur < wb.last; wb.cur++) {
00417                      if (wb.cur == 0) {
00418                             continue;
00419                      } else if (wb.cclass[wb.cur] > 0) {
00420                             /*
00421                              * This is not a starter. Try reordering.
00422                              * Note that characters up to it are
00423                              * already in canonical order.
00424                              */
00425                             reorder(&wb);
00426                             continue;
00427                      }
00428 
00429                      /*
00430                       * This is a starter character, and there are
00431                       * some characters before it.  Those characters
00432                       * have been reordered properly, and
00433                       * ready for composition.
00434                       */
00435                      if (do_composition && wb.cclass[0] == 0)
00436                             compose(&wb);
00437 
00438                      /*
00439                       * If CUR points to a starter character,
00440                       * then process of characters before CUR are
00441                       * already finished, because any further
00442                       * reordering/composition for them are blocked
00443                       * by the starter CUR points.
00444                       */
00445                      if (wb.cur > 0 && wb.cclass[wb.cur] == 0) {
00446                             /* Flush everything before CUR. */
00447                             r = flush_before_cur(&wb, aToStr);
00448                             if (r != NS_OK)
00449                                    break;
00450                      }
00451               }
00452        }
00453 
00454        if (r == NS_OK) {
00455               if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) {
00456                      /*
00457                       * There is some characters left in WB.
00458                       * They are ordered, but not composed yet.
00459                       * Now CUR points just after the last character in WB,
00460                       * and since compose() tries to compose characters
00461                       * between top and CUR inclusive, we must make CUR
00462                       * one character back during compose().
00463                       */
00464                      wb.cur--;
00465                      compose(&wb);
00466                      wb.cur++;
00467               }
00468               /*
00469                * Call this even when WB.CUR == 0, to make TO
00470                * NUL-terminated.
00471                */
00472               r = flush_before_cur(&wb, aToStr);
00473        }
00474 
00475        workbuf_free(&wb);
00476 
00477        return (r);
00478 }
00479 
00480 static nsresult
00481 decompose(workbuf_t *wb, PRUint32 c, PRInt32 compat) {
00482        nsresult r;
00483        PRInt32 dec_len;
00484 
00485 again:
00486        r = mdn__unicode_decompose(compat, wb->ucs + wb->last,
00487                                wb->size - wb->last, c, &dec_len);
00488        switch (r) {
00489        case NS_OK:
00490               wb->last += dec_len;
00491               return (NS_OK);
00492        case NS_SUCCESS_UNORM_NOTFOUND:
00493               return (workbuf_append(wb, c));
00494        case NS_ERROR_UNORM_MOREOUTPUT:
00495               if ((r = workbuf_extend(wb)) != NS_OK)
00496                      return (r);
00497               if (wb->size > WORKBUF_SIZE_MAX) {
00498                      // "mdn__unormalize_form*: " "working buffer too large\n"
00499                      return (NS_ERROR_FAILURE);
00500               }
00501               goto again;
00502        default:
00503               return (r);
00504        }
00505        /* NOTREACHED */
00506 }
00507 
00508 static void          
00509 get_class(workbuf_t *wb) {
00510        PRInt32 i;
00511 
00512        for (i = wb->cur; i < wb->last; i++)
00513               wb->cclass[i] = canonclass(wb->ucs[i]);
00514 }
00515 
00516 static void
00517 reorder(workbuf_t *wb) {
00518        PRUint32 c;
00519        PRInt32 i;
00520        PRInt32 cclass;
00521 
00522        //assert(wb != NULL);
00523 
00524        i = wb->cur;
00525        c = wb->ucs[i];
00526        cclass = wb->cclass[i];
00527 
00528        while (i > 0 && wb->cclass[i - 1] > cclass) {
00529               wb->ucs[i] = wb->ucs[i - 1];
00530               wb->cclass[i] =wb->cclass[i - 1];
00531               i--;
00532               wb->ucs[i] = c;
00533               wb->cclass[i] = cclass;
00534        }
00535 }
00536 
00537 static void
00538 compose(workbuf_t *wb) {
00539        PRInt32 cur;
00540        PRUint32 *ucs;
00541        PRInt32 *cclass;
00542        PRInt32 last_class;
00543        PRInt32 nvoids;
00544        PRInt32 i;
00545 
00546        //assert(wb != NULL && wb->cclass[0] == 0);
00547 
00548        cur = wb->cur;
00549        ucs = wb->ucs;
00550        cclass = wb->cclass;
00551 
00552        /*
00553         * If there are no decomposition sequence that begins with
00554         * the top character, composition is impossible.
00555         */
00556        if (!mdn__unicode_iscompositecandidate(ucs[0]))
00557               return;
00558 
00559        last_class = 0;
00560        nvoids = 0;
00561        for (i = 1; i <= cur; i++) {
00562               PRUint32 c;
00563               PRInt32 cl = cclass[i];
00564 
00565               if ((last_class < cl || cl == 0) &&
00566                   mdn__unicode_compose(ucs[0], ucs[i],
00567                                     &c) == NS_OK) {
00568                      /*
00569                       * Replace the top character with the composed one.
00570                       */
00571                      ucs[0] = c;
00572                      cclass[0] = canonclass(c);
00573 
00574                      cclass[i] = -1;      /* void this character */
00575                      nvoids++;
00576               } else {
00577                      last_class = cl;
00578               }
00579        }
00580 
00581        /* Purge void characters, if any. */
00582        if (nvoids > 0)
00583               workbuf_removevoid(wb);
00584 }
00585 
00586 static nsresult
00587 flush_before_cur(workbuf_t *wb, nsAString& aToStr) 
00588 {
00589        PRInt32 i;
00590 
00591        for (i = 0; i < wb->cur; i++) {
00592               if (!IS_IN_BMP(wb->ucs[i])) {
00593                      aToStr.Append((PRUnichar)H_SURROGATE(wb->ucs[i]));
00594                      aToStr.Append((PRUnichar)L_SURROGATE(wb->ucs[i]));
00595               } else {
00596                      aToStr.Append((PRUnichar)(wb->ucs[i]));
00597               }
00598        }
00599 
00600        workbuf_shift(wb, wb->cur);
00601 
00602        return (NS_OK);
00603 }
00604 
00605 static void
00606 workbuf_init(workbuf_t *wb) {
00607        wb->cur = 0;
00608        wb->last = 0;
00609        wb->size = WORKBUF_SIZE;
00610        wb->ucs = wb->ucs_buf;
00611        wb->cclass = wb->class_buf;
00612 }
00613 
00614 static void
00615 workbuf_free(workbuf_t *wb) {
00616        if (wb->ucs != wb->ucs_buf) {
00617               nsMemory::Free(wb->ucs);
00618               nsMemory::Free(wb->cclass);
00619        }
00620 }
00621 
00622 static nsresult
00623 workbuf_extend(workbuf_t *wb) {
00624        PRInt32 newsize = wb->size * 3;
00625 
00626        if (wb->ucs == wb->ucs_buf) {
00627               wb->ucs = (PRUint32*)nsMemory::Alloc(sizeof(wb->ucs[0]) * newsize);
00628               if (!wb->ucs)
00629                      return NS_ERROR_OUT_OF_MEMORY;
00630               wb->cclass = (PRInt32*)nsMemory::Alloc(sizeof(wb->cclass[0]) * newsize);
00631               if (!wb->cclass) {
00632                      nsMemory::Free(wb->ucs);
00633                      wb->ucs = NULL;
00634                      return NS_ERROR_OUT_OF_MEMORY;
00635               }
00636        } else {
00637               void* buf = nsMemory::Realloc(wb->ucs, sizeof(wb->ucs[0]) * newsize);
00638               if (!buf)
00639                      return NS_ERROR_OUT_OF_MEMORY;
00640               wb->ucs = (PRUint32*)buf;
00641               buf = nsMemory::Realloc(wb->cclass, sizeof(wb->cclass[0]) * newsize);
00642               if (!buf)
00643                      return NS_ERROR_OUT_OF_MEMORY;
00644               wb->cclass = (PRInt32*)buf;
00645        }
00646        return (NS_OK);
00647 }
00648 
00649 static nsresult
00650 workbuf_append(workbuf_t *wb, PRUint32 c) {
00651        nsresult r;
00652 
00653        if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK)
00654               return (r);
00655        wb->ucs[wb->last++] = c;
00656        return (NS_OK);
00657 }
00658 
00659 static void
00660 workbuf_shift(workbuf_t *wb, PRInt32 shift) {
00661        PRInt32 nmove;
00662 
00663        //assert(wb != NULL && wb->cur >= shift);
00664 
00665        nmove = wb->last - shift;
00666        memmove(&wb->ucs[0], &wb->ucs[shift],
00667                     nmove * sizeof(wb->ucs[0]));
00668        memmove(&wb->cclass[0], &wb->cclass[shift],
00669                     nmove * sizeof(wb->cclass[0]));
00670        wb->cur -= shift;
00671        wb->last -= shift;
00672 }
00673 
00674 static void
00675 workbuf_removevoid(workbuf_t *wb) {
00676        PRInt32 i, j;
00677        PRInt32 last = wb->last;
00678 
00679        for (i = j = 0; i < last; i++) {
00680               if (wb->cclass[i] >= 0) {
00681                      if (j < i) {
00682                             wb->ucs[j] = wb->ucs[i];
00683                             wb->cclass[j] = wb->cclass[i];
00684                      }
00685                      j++;
00686               }
00687        }
00688        wb->cur -= last - j;
00689        wb->last = j;
00690 }
00691 
00692 nsresult  
00693 nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest)
00694 {
00695   return mdn_normalize(PR_FALSE, PR_FALSE, aSrc, aDest);
00696 }
00697 
00698 nsresult  
00699 nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest)
00700 {
00701   return mdn_normalize(PR_TRUE, PR_FALSE, aSrc, aDest);
00702 }
00703 
00704 nsresult  
00705 nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest)
00706 {
00707   return mdn_normalize(PR_FALSE, PR_TRUE, aSrc, aDest);
00708 }
00709 
00710 nsresult  
00711 nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
00712 {
00713   return mdn_normalize(PR_TRUE, PR_TRUE, aSrc, aDest);
00714 }
00715