Back to index

lightning-sunbird  0.9+nobinonly
ucdata.c
Go to the documentation of this file.
00001 /*
00002  * Copyright 1996, 1997, 1998 Computing Research Labs,
00003  * New Mexico State University
00004  *
00005  * Permission is hereby granted, free of charge, to any person obtaining a
00006  * copy of this software and associated documentation files (the "Software"),
00007  * to deal in the Software without restriction, including without limitation
00008  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
00009  * and/or sell copies of the Software, and to permit persons to whom the
00010  * Software is furnished to do so, subject to the following conditions:
00011  *
00012  * The above copyright notice and this permission notice shall be included in
00013  * all copies or substantial portions of the Software.
00014  *
00015  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00016  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00017  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
00018  * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
00019  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
00020  * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
00021  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00022  */
00023 #ifndef lint
00024 #ifdef __GNUC__
00025 static char rcsid[] __attribute__ ((unused)) = "$Id: ucdata.c,v 1.1 1999/01/08 00:19:11 ftang%netscape.com Exp $";
00026 #else
00027 static char rcsid[] = "$Id: ucdata.c,v 1.1 1999/01/08 00:19:11 ftang%netscape.com Exp $";
00028 #endif
00029 #endif
00030 
00031 #include <stdio.h>
00032 #include <stdlib.h>
00033 #include <string.h>
00034 #ifndef WIN32
00035 #include <unistd.h>
00036 #endif
00037 
00038 #include "ucdata.h"
00039 
00040 /**************************************************************************
00041  *
00042  * Miscellaneous types, data, and support functions.
00043  *
00044  **************************************************************************/
00045 
00046 typedef struct {
00047     unsigned short bom;
00048     unsigned short cnt;
00049     union {
00050         unsigned long bytes;
00051         unsigned short len[2];
00052     } size;
00053 } _ucheader_t;
00054 
00055 /*
00056  * A simple array of 32-bit masks for lookup.
00057  */
00058 static unsigned long masks32[32] = {
00059     0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
00060     0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
00061     0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
00062     0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
00063     0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
00064     0x40000000, 0x80000000
00065 };
00066 
00067 #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8))
00068 #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\
00069                         ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))
00070 
00071 static FILE *
00072 #ifdef __STDC__
00073 _ucopenfile(char *paths, char *filename, char *mode)
00074 #else
00075 _ucopenfile(paths, filename, mode)
00076 char *paths, *filename, *mode;
00077 #endif
00078 {
00079     FILE *f;
00080     char *fp, *dp, *pp, path[BUFSIZ];
00081 
00082     if (filename == 0 || *filename == 0)
00083       return 0;
00084 
00085     dp = paths;
00086     while (dp && *dp) {
00087         pp = path;
00088         while (*dp && *dp != ':')
00089           *pp++ = *dp++;
00090         *pp++ = '/';
00091 
00092         fp = filename;
00093         while (*fp)
00094           *pp++ = *fp++;
00095         *pp = 0;
00096 
00097         if ((f = fopen(path, mode)) != 0)
00098           return f;
00099 
00100         if (*dp == ':')
00101           dp++;
00102     }
00103 
00104     return 0;
00105 }
00106 
00107 /**************************************************************************
00108  *
00109  * Support for the character properties.
00110  *
00111  **************************************************************************/
00112 
00113 static unsigned long  _ucprop_size;
00114 static unsigned short *_ucprop_offsets;
00115 static unsigned long  *_ucprop_ranges;
00116 
00117 static void
00118 #ifdef __STDC__
00119 _ucprop_load(char *paths, int reload)
00120 #else
00121 _ucprop_load(paths, reload)
00122 char *paths;
00123 int reload;
00124 #endif
00125 {
00126     FILE *in;
00127     unsigned long size, i;
00128     _ucheader_t hdr;
00129 
00130     if (_ucprop_size > 0) {
00131         if (!reload)
00132           /*
00133            * The character properties have already been loaded.
00134            */
00135           return;
00136 
00137         /*
00138          * Unload the current character property data in preparation for
00139          * loading a new copy.  Only the first array has to be deallocated
00140          * because all the memory for the arrays is allocated as a single
00141          * block.
00142          */
00143         free((char *) _ucprop_offsets);
00144         _ucprop_size = 0;
00145     }
00146 
00147     if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0)
00148       return;
00149 
00150     /*
00151      * Load the header.
00152      */
00153     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
00154 
00155     if (hdr.bom == 0xfffe) {
00156         hdr.cnt = endian_short(hdr.cnt);
00157         hdr.size.bytes = endian_long(hdr.size.bytes);
00158     }
00159 
00160     if ((_ucprop_size = hdr.cnt) == 0) {
00161         fclose(in);
00162         return;
00163     }
00164 
00165     /*
00166      * Allocate all the storage needed for the lookup table.
00167      */
00168     _ucprop_offsets = (unsigned short *) malloc(hdr.size.bytes);
00169 
00170     /*
00171      * Calculate the offset into the storage for the ranges.  The offsets
00172      * array is on a 4-byte boundary and one larger than the value provided in
00173      * the header count field.  This means the offset to the ranges must be
00174      * calculated after aligning the count to a 4-byte boundary.
00175      */
00176     if ((size = ((hdr.cnt + 1) * sizeof(unsigned short))) & 3)
00177       size += 4 - (size & 3);
00178     size >>= 1;
00179     _ucprop_ranges = (unsigned long *) (_ucprop_offsets + size);
00180 
00181     /*
00182      * Load the offset array.
00183      */
00184     fread((char *) _ucprop_offsets, sizeof(unsigned short), size, in);
00185 
00186     /*
00187      * Do an endian swap if necessary.  Don't forget there is an extra node on
00188      * the end with the final index.
00189      */
00190     if (hdr.bom == 0xfffe) {
00191         for (i = 0; i <= _ucprop_size; i++)
00192           _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]);
00193     }
00194 
00195     /*
00196      * Load the ranges.  The number of elements is in the last array position
00197      * of the offsets.
00198      */
00199     fread((char *) _ucprop_ranges, sizeof(unsigned long),
00200           _ucprop_offsets[_ucprop_size], in);
00201 
00202     fclose(in);
00203 
00204     /*
00205      * Do an endian swap if necessary.
00206      */
00207     if (hdr.bom == 0xfffe) {
00208         for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++)
00209           _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]);
00210     }
00211 }
00212 
00213 static void
00214 #ifdef __STDC__
00215 _ucprop_unload(void)
00216 #else
00217 _ucprop_unload()
00218 #endif
00219 {
00220     if (_ucprop_size == 0)
00221       return;
00222 
00223     /*
00224      * Only need to free the offsets because the memory is allocated as a
00225      * single block.
00226      */
00227     free((char *) _ucprop_offsets);
00228     _ucprop_size = 0;
00229 }
00230 
00231 static int
00232 #ifdef __STDC__
00233 _ucprop_lookup(unsigned long code, unsigned long n)
00234 #else
00235 _ucprop_lookup(code, n)
00236 unsigned long code, n;
00237 #endif
00238 {
00239     long l, r, m;
00240 
00241     /*
00242      * There is an extra node on the end of the offsets to allow this routine
00243      * to work right.  If the index is 0xffff, then there are no nodes for the
00244      * property.
00245      */
00246     if ((l = _ucprop_offsets[n]) == 0xffff)
00247       return 0;
00248 
00249     /*
00250      * Locate the next offset that is not 0xffff.  The sentinel at the end of
00251      * the array is the max index value.
00252      */
00253     for (m = 1;
00254          n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ;
00255 
00256     r = _ucprop_offsets[n + m] - 1;
00257 
00258     while (l <= r) {
00259         /*
00260          * Determine a "mid" point and adjust to make sure the mid point is at
00261          * the beginning of a range pair.
00262          */
00263         m = (l + r) >> 1;
00264         m -= (m & 1);
00265         if (code > _ucprop_ranges[m + 1])
00266           l = m + 2;
00267         else if (code < _ucprop_ranges[m])
00268           r = m - 2;
00269         else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
00270           return 1;
00271     }
00272     return 0;
00273 }
00274 
00275 int
00276 #ifdef __STDC__
00277 ucisprop(unsigned long code, unsigned long mask1, unsigned long mask2)
00278 #else
00279 ucisprop(code, mask1, mask2)
00280 unsigned long code, mask1, mask2;
00281 #endif
00282 {
00283     unsigned long i;
00284 
00285     if (mask1 == 0 && mask2 == 0)
00286       return 0;
00287 
00288     for (i = 0; mask1 && i < 32; i++) {
00289         if ((mask1 & masks32[i]) && _ucprop_lookup(code, i))
00290           return 1;
00291     }
00292 
00293     for (i = 32; mask2 && i < _ucprop_size; i++) {
00294         if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i))
00295           return 1;
00296     }
00297 
00298     return 0;
00299 }
00300 
00301 /**************************************************************************
00302  *
00303  * Support for case mapping.
00304  *
00305  **************************************************************************/
00306 
00307 static unsigned long _uccase_size;
00308 static unsigned short _uccase_len[2];
00309 static unsigned long *_uccase_map;
00310 
00311 static void
00312 #ifdef __STDC__
00313 _uccase_load(char *paths, int reload)
00314 #else
00315 _uccase_load(paths, reload)
00316 char *paths;
00317 int reload;
00318 #endif
00319 {
00320     FILE *in;
00321     unsigned long i;
00322     _ucheader_t hdr;
00323 
00324     if (_uccase_size > 0) {
00325         if (!reload)
00326           /*
00327            * The case mappings have already been loaded.
00328            */
00329           return;
00330 
00331         free((char *) _uccase_map);
00332         _uccase_size = 0;
00333     }
00334 
00335     if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0)
00336       return;
00337 
00338     /*
00339      * Load the header.
00340      */
00341     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
00342 
00343     if (hdr.bom == 0xfffe) {
00344         hdr.cnt = endian_short(hdr.cnt);
00345         hdr.size.len[0] = endian_short(hdr.size.len[0]);
00346         hdr.size.len[1] = endian_short(hdr.size.len[1]);
00347     }
00348 
00349     /*
00350      * Set the node count and lengths of the upper and lower case mapping
00351      * tables.
00352      */
00353     _uccase_size = hdr.cnt * 3;
00354     _uccase_len[0] = hdr.size.len[0] * 3;
00355     _uccase_len[1] = hdr.size.len[1] * 3;
00356 
00357     _uccase_map = (unsigned long *)
00358         malloc(_uccase_size * sizeof(unsigned long));
00359 
00360     /*
00361      * Load the case mapping table.
00362      */
00363     fread((char *) _uccase_map, sizeof(unsigned long), _uccase_size, in);
00364 
00365     /*
00366      * Do an endian swap if necessary.
00367      */
00368     if (hdr.bom == 0xfffe) {
00369         for (i = 0; i < _uccase_size; i++)
00370           _uccase_map[i] = endian_long(_uccase_map[i]);
00371     }
00372 }
00373 
00374 static void
00375 #ifdef __STDC__
00376 _uccase_unload(void)
00377 #else
00378 _uccase_unload()
00379 #endif
00380 {
00381     if (_uccase_size == 0)
00382       return;
00383 
00384     free((char *) _uccase_map);
00385     _uccase_size = 0;
00386 }
00387 
00388 static unsigned long
00389 #ifdef __STDC__
00390 _uccase_lookup(unsigned long code, long l, long r, int field)
00391 #else
00392 _uccase_lookup(code, l, r, field)
00393 unsigned long code;
00394 long l, r;
00395 int field;
00396 #endif
00397 {
00398     long m;
00399 
00400     /*
00401      * Do the binary search.
00402      */
00403     while (l <= r) {
00404         /*
00405          * Determine a "mid" point and adjust to make sure the mid point is at
00406          * the beginning of a case mapping triple.
00407          */
00408         m = (l + r) >> 1;
00409         m -= (m % 3);
00410         if (code > _uccase_map[m])
00411           l = m + 3;
00412         else if (code < _uccase_map[m])
00413           r = m - 3;
00414         else if (code == _uccase_map[m])
00415           return _uccase_map[m + field];
00416     }
00417 
00418     return code;
00419 }
00420 
00421 unsigned long
00422 #ifdef __STDC__
00423 uctoupper(unsigned long code)
00424 #else
00425 uctoupper(code)
00426 unsigned long code;
00427 #endif
00428 {
00429     int field;
00430     long l, r;
00431 
00432     if (ucisupper(code))
00433       return code;
00434 
00435     if (ucislower(code)) {
00436         /*
00437          * The character is lower case.
00438          */
00439         field = 1;
00440         l = _uccase_len[0];
00441         r = (l + _uccase_len[1]) - 1;
00442     } else {
00443         /*
00444          * The character is title case.
00445          */
00446         field = 2;
00447         l = _uccase_len[0] + _uccase_len[1];
00448         r = _uccase_size - 1;
00449     }
00450     return _uccase_lookup(code, l, r, field);
00451 }
00452 
00453 unsigned long
00454 #ifdef __STDC__
00455 uctolower(unsigned long code)
00456 #else
00457 uctolower(code)
00458 unsigned long code;
00459 #endif
00460 {
00461     int field;
00462     long l, r;
00463 
00464     if (ucislower(code))
00465       return code;
00466 
00467     if (ucisupper(code)) {
00468         /*
00469          * The character is upper case.
00470          */
00471         field = 1;
00472         l = 0;
00473         r = _uccase_len[0] - 1;
00474     } else {
00475         /*
00476          * The character is title case.
00477          */
00478         field = 2;
00479         l = _uccase_len[0] + _uccase_len[1];
00480         r = _uccase_size - 1;
00481     }
00482     return _uccase_lookup(code, l, r, field);
00483 }
00484 
00485 unsigned long
00486 #ifdef __STDC__
00487 uctotitle(unsigned long code)
00488 #else
00489 uctotitle(code)
00490 unsigned long code;
00491 #endif
00492 {
00493     int field;
00494     long l, r;
00495 
00496     if (ucistitle(code))
00497       return code;
00498 
00499     /*
00500      * The offset will always be the same for converting to title case.
00501      */
00502     field = 2;
00503 
00504     if (ucisupper(code)) {
00505         /*
00506          * The character is upper case.
00507          */
00508         l = 0;
00509         r = _uccase_len[0] - 1;
00510     } else {
00511         /*
00512          * The character is lower case.
00513          */
00514         l = _uccase_len[0];
00515         r = (l + _uccase_len[1]) - 1;
00516     }
00517     return _uccase_lookup(code, l, r, field);
00518 }
00519 
00520 /**************************************************************************
00521  *
00522  * Support for decompositions.
00523  *
00524  **************************************************************************/
00525 
00526 static unsigned long  _ucdcmp_size;
00527 static unsigned long *_ucdcmp_nodes;
00528 static unsigned long *_ucdcmp_decomp;
00529 
00530 static void
00531 #ifdef __STDC__
00532 _ucdcmp_load(char *paths, int reload)
00533 #else
00534 _ucdcmp_load(paths, reload)
00535 char *paths;
00536 int reload;
00537 #endif
00538 {
00539     FILE *in;
00540     unsigned long size, i;
00541     _ucheader_t hdr;
00542 
00543     if (_ucdcmp_size > 0) {
00544         if (!reload)
00545           /*
00546            * The decompositions have already been loaded.
00547            */
00548           return;
00549 
00550         free((char *) _ucdcmp_nodes);
00551         _ucdcmp_size = 0;
00552     }
00553 
00554     if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0)
00555       return;
00556 
00557     /*
00558      * Load the header.
00559      */
00560     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
00561 
00562     if (hdr.bom == 0xfffe) {
00563         hdr.cnt = endian_short(hdr.cnt);
00564         hdr.size.bytes = endian_long(hdr.size.bytes);
00565     }
00566 
00567     _ucdcmp_size = hdr.cnt << 1;
00568     _ucdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes);
00569     _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1);
00570 
00571     /*
00572      * Read the decomposition data in.
00573      */
00574     size = hdr.size.bytes / sizeof(unsigned long);
00575     fread((char *) _ucdcmp_nodes, sizeof(unsigned long), size, in);
00576 
00577     /*
00578      * Do an endian swap if necessary.
00579      */
00580     if (hdr.bom == 0xfffe) {
00581         for (i = 0; i < size; i++)
00582           _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]);
00583     }        
00584 }
00585 
00586 static void
00587 #ifdef __STDC__
00588 _ucdcmp_unload(void)
00589 #else
00590 _ucdcmp_unload()
00591 #endif
00592 {
00593     if (_ucdcmp_size == 0)
00594       return;
00595 
00596     /*
00597      * Only need to free the offsets because the memory is allocated as a
00598      * single block.
00599      */
00600     free((char *) _ucdcmp_nodes);
00601     _ucdcmp_size = 0;
00602 }
00603 
00604 int
00605 #ifdef __STDC__
00606 ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
00607 #else
00608 ucdecomp(code, num, decomp)
00609 unsigned long code, *num, **decomp;
00610 #endif
00611 {
00612     long l, r, m;
00613 
00614     l = 0;
00615     r = _ucdcmp_nodes[_ucdcmp_size] - 1;
00616 
00617     while (l <= r) {
00618         /*
00619          * Determine a "mid" point and adjust to make sure the mid point is at
00620          * the beginning of a code+offset pair.
00621          */
00622         m = (l + r) >> 1;
00623         m -= (m & 1);
00624         if (code > _ucdcmp_nodes[m])
00625           l = m + 2;
00626         else if (code < _ucdcmp_nodes[m])
00627           r = m - 2;
00628         else if (code == _ucdcmp_nodes[m]) {
00629             *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1];
00630             *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]];
00631             return 1;
00632         }
00633     }
00634     return 0;
00635 }
00636 
00637 int
00638 #ifdef __STDC__
00639 ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[])
00640 #else
00641 ucdecomp_hangul(code, num, decomp)
00642 unsigned long code, *num, decomp[];
00643 #endif
00644 {
00645     if (!ucishangul(code))
00646       return 0;
00647 
00648     code -= 0xac00;
00649     decomp[0] = 0x1100 + (unsigned long) (code / 588);
00650     decomp[1] = 0x1161 + (unsigned long) ((code % 588) / 28);
00651     decomp[2] = 0x11a7 + (unsigned long) (code % 28);
00652     *num = (decomp[2] != 0x11a7) ? 3 : 2;
00653 
00654     return 1;
00655 }
00656 
00657 /**************************************************************************
00658  *
00659  * Support for combining classes.
00660  *
00661  **************************************************************************/
00662 
00663 static unsigned long  _uccmcl_size;
00664 static unsigned long *_uccmcl_nodes;
00665 
00666 static void
00667 #ifdef __STDC__
00668 _uccmcl_load(char *paths, int reload)
00669 #else
00670 _uccmcl_load(paths, reload)
00671 char *paths;
00672 int reload;
00673 #endif
00674 {
00675     FILE *in;
00676     unsigned long i;
00677     _ucheader_t hdr;
00678 
00679     if (_uccmcl_size > 0) {
00680         if (!reload)
00681           /*
00682            * The combining classes have already been loaded.
00683            */
00684           return;
00685 
00686         free((char *) _uccmcl_nodes);
00687         _uccmcl_size = 0;
00688     }
00689 
00690     if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0)
00691       return;
00692 
00693     /*
00694      * Load the header.
00695      */
00696     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
00697 
00698     if (hdr.bom == 0xfffe) {
00699         hdr.cnt = endian_short(hdr.cnt);
00700         hdr.size.bytes = endian_long(hdr.size.bytes);
00701     }
00702 
00703     _uccmcl_size = hdr.cnt * 3;
00704     _uccmcl_nodes = (unsigned long *) malloc(hdr.size.bytes);
00705 
00706     /*
00707      * Read the combining classes in.
00708      */
00709     fread((char *) _uccmcl_nodes, sizeof(unsigned long), _uccmcl_size, in);
00710 
00711     /*
00712      * Do an endian swap if necessary.
00713      */
00714     if (hdr.bom == 0xfffe) {
00715         for (i = 0; i < _uccmcl_size; i++)
00716           _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]);
00717     }        
00718 }
00719 
00720 static void
00721 #ifdef __STDC__
00722 _uccmcl_unload(void)
00723 #else
00724 _uccmcl_unload()
00725 #endif
00726 {
00727     if (_uccmcl_size == 0)
00728       return;
00729 
00730     free((char *) _uccmcl_nodes);
00731     _uccmcl_size = 0;
00732 }
00733 
00734 unsigned long
00735 #ifdef __STDC__
00736 uccombining_class(unsigned long code)
00737 #else
00738 uccombining_class(code)
00739 unsigned long code;
00740 #endif
00741 {
00742     long l, r, m;
00743 
00744     l = 0;
00745     r = _uccmcl_size - 1;
00746 
00747     while (l <= r) {
00748         m = (l + r) >> 1;
00749         m -= (m % 3);
00750         if (code > _uccmcl_nodes[m + 1])
00751           l = m + 3;
00752         else if (code < _uccmcl_nodes[m])
00753           r = m - 3;
00754         else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1])
00755           return _uccmcl_nodes[m + 2];
00756     }
00757     return 0;
00758 }
00759 
00760 /**************************************************************************
00761  *
00762  * Support for numeric values.
00763  *
00764  **************************************************************************/
00765 
00766 static unsigned long *_ucnum_nodes;
00767 static unsigned long _ucnum_size;
00768 static short *_ucnum_vals;
00769 
00770 static void
00771 #ifdef __STDC__
00772 _ucnumb_load(char *paths, int reload)
00773 #else
00774 _ucnumb_load(paths, reload)
00775 char *paths;
00776 int reload;
00777 #endif
00778 {
00779     FILE *in;
00780     unsigned long size, i;
00781     _ucheader_t hdr;
00782 
00783     if (_ucnum_size > 0) {
00784         if (!reload)
00785           /*
00786            * The numbers have already been loaded.
00787            */
00788           return;
00789 
00790         free((char *) _ucnum_nodes);
00791         _ucnum_size = 0;
00792     }
00793 
00794     if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0)
00795       return;
00796 
00797     /*
00798      * Load the header.
00799      */
00800     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
00801 
00802     if (hdr.bom == 0xfffe) {
00803         hdr.cnt = endian_short(hdr.cnt);
00804         hdr.size.bytes = endian_long(hdr.size.bytes);
00805     }
00806 
00807     _ucnum_size = hdr.cnt;
00808     _ucnum_nodes = (unsigned long *) malloc(hdr.size.bytes);
00809     _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size);
00810 
00811     /*
00812      * Read the combining classes in.
00813      */
00814     fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in);
00815 
00816     /*
00817      * Do an endian swap if necessary.
00818      */
00819     if (hdr.bom == 0xfffe) {
00820         for (i = 0; i < _ucnum_size; i++)
00821           _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]);
00822 
00823         /*
00824          * Determine the number of values that have to be adjusted.
00825          */
00826         size = (hdr.size.bytes -
00827                 (_ucnum_size * (sizeof(unsigned long) << 1))) /
00828             sizeof(short);
00829 
00830         for (i = 0; i < size; i++)
00831           _ucnum_vals[i] = endian_short(_ucnum_vals[i]);
00832     }        
00833 }
00834 
00835 static void
00836 #ifdef __STDC__
00837 _ucnumb_unload(void)
00838 #else
00839 _ucnumb_unload()
00840 #endif
00841 {
00842     if (_ucnum_size == 0)
00843       return;
00844 
00845     free((char *) _ucnum_nodes);
00846     _ucnum_size = 0;
00847 }
00848 
00849 int
00850 #ifdef __STDC__
00851 ucnumber_lookup(unsigned long code, struct ucnumber *num)
00852 #else
00853 ucnumber_lookup(code, num)
00854 unsigned long code;
00855 struct ucnumber *num;
00856 #endif
00857 {
00858     long l, r, m;
00859     short *vp;
00860 
00861     l = 0;
00862     r = _ucnum_size - 1;
00863     while (l <= r) {
00864         /*
00865          * Determine a "mid" point and adjust to make sure the mid point is at
00866          * the beginning of a code+offset pair.
00867          */
00868         m = (l + r) >> 1;
00869         m -= (m & 1);
00870         if (code > _ucnum_nodes[m])
00871           l = m + 2;
00872         else if (code < _ucnum_nodes[m])
00873           r = m - 2;
00874         else {
00875             vp = _ucnum_vals + _ucnum_nodes[m + 1];
00876             num->numerator = (int) *vp++;
00877             num->denominator = (int) *vp;
00878             return 1;
00879         }
00880     }
00881     return 0;
00882 }
00883 
00884 int
00885 #ifdef __STDC__
00886 ucdigit_lookup(unsigned long code, int *digit)
00887 #else
00888 ucdigit_lookup(code, digit)
00889 unsigned long code;
00890 int *digit;
00891 #endif
00892 {
00893     long l, r, m;
00894     short *vp;
00895 
00896     l = 0;
00897     r = _ucnum_size - 1;
00898     while (l <= r) {
00899         /*
00900          * Determine a "mid" point and adjust to make sure the mid point is at
00901          * the beginning of a code+offset pair.
00902          */
00903         m = (l + r) >> 1;
00904         m -= (m & 1);
00905         if (code > _ucnum_nodes[m])
00906           l = m + 2;
00907         else if (code < _ucnum_nodes[m])
00908           r = m - 2;
00909         else {
00910             vp = _ucnum_vals + _ucnum_nodes[m + 1];
00911             if (*vp == *(vp + 1)) {
00912               *digit = *vp;
00913               return 1;
00914             }
00915             return 0;
00916         }
00917     }
00918     return 0;
00919 }
00920 
00921 struct ucnumber
00922 #ifdef __STDC__
00923 ucgetnumber(unsigned long code)
00924 #else
00925 ucgetnumber(code)
00926 unsigned long code;
00927 #endif
00928 {
00929     struct ucnumber num;
00930 
00931     /*
00932      * Initialize with some arbitrary value, because the caller simply cannot
00933      * tell for sure if the code is a number without calling the ucisnumber()
00934      * macro before calling this function.
00935      */
00936     num.numerator = num.denominator = -111;
00937 
00938     (void) ucnumber_lookup(code, &num);
00939 
00940     return num;
00941 }
00942 
00943 int
00944 #ifdef __STDC__
00945 ucgetdigit(unsigned long code)
00946 #else
00947 ucgetdigit(code)
00948 unsigned long code;
00949 #endif
00950 {
00951     int dig;
00952 
00953     /*
00954      * Initialize with some arbitrary value, because the caller simply cannot
00955      * tell for sure if the code is a number without calling the ucisdigit()
00956      * macro before calling this function.
00957      */
00958     dig = -111;
00959 
00960     (void) ucdigit_lookup(code, &dig);
00961 
00962     return dig;
00963 }
00964 
00965 /**************************************************************************
00966  *
00967  * Setup and cleanup routines.
00968  *
00969  **************************************************************************/
00970 
00971 void
00972 #ifdef __STDC__
00973 ucdata_load(char *paths, int masks)
00974 #else
00975 ucdata_load(paths, masks)
00976 char *paths;
00977 int masks;
00978 #endif
00979 {
00980     if (masks & UCDATA_CTYPE)
00981       _ucprop_load(paths, 0);
00982     if (masks & UCDATA_CASE)
00983       _uccase_load(paths, 0);
00984     if (masks & UCDATA_DECOMP)
00985       _ucdcmp_load(paths, 0);
00986     if (masks & UCDATA_CMBCL)
00987       _uccmcl_load(paths, 0);
00988     if (masks & UCDATA_NUM)
00989       _ucnumb_load(paths, 0);
00990 }
00991 
00992 void
00993 #ifdef __STDC__
00994 ucdata_unload(int masks)
00995 #else
00996 ucdata_unload(masks)
00997 int masks;
00998 #endif
00999 {
01000     if (masks & UCDATA_CTYPE)
01001       _ucprop_unload();
01002     if (masks & UCDATA_CASE)
01003       _uccase_unload();
01004     if (masks & UCDATA_DECOMP)
01005       _ucdcmp_unload();
01006     if (masks & UCDATA_CMBCL)
01007       _uccmcl_unload();
01008     if (masks & UCDATA_NUM)
01009       _ucnumb_unload();
01010 }
01011 
01012 void
01013 #ifdef __STDC__
01014 ucdata_reload(char *paths, int masks)
01015 #else
01016 ucdata_reload(paths, masks)
01017 char *paths;
01018 int masks;
01019 #endif
01020 {
01021     if (masks & UCDATA_CTYPE)
01022       _ucprop_load(paths, 1);
01023     if (masks & UCDATA_CASE)
01024       _uccase_load(paths, 1);
01025     if (masks & UCDATA_DECOMP)
01026       _ucdcmp_load(paths, 1);
01027     if (masks & UCDATA_CMBCL)
01028       _uccmcl_load(paths, 1);
01029     if (masks & UCDATA_NUM)
01030       _ucnumb_load(paths, 1);
01031 }
01032 
01033 #ifdef TEST
01034 
01035 void
01036 #ifdef __STDC__
01037 main(void)
01038 #else
01039 main()
01040 #endif
01041 {
01042     int dig;
01043     unsigned long i, lo, *dec;
01044     struct ucnumber num;
01045 
01046     ucdata_setup(".");
01047 
01048     if (ucisweak(0x30))
01049       printf("WEAK\n");
01050     else
01051       printf("NOT WEAK\n");
01052 
01053     printf("LOWER 0x%04lX\n", uctolower(0xff3a));
01054     printf("UPPER 0x%04lX\n", uctoupper(0xff5a));
01055 
01056     if (ucisalpha(0x1d5))
01057       printf("ALPHA\n");
01058     else
01059       printf("NOT ALPHA\n");
01060 
01061     if (ucisupper(0x1d5)) {
01062         printf("UPPER\n");
01063         lo = uctolower(0x1d5);
01064         printf("0x%04lx\n", lo);
01065         lo = uctotitle(0x1d5);
01066         printf("0x%04lx\n", lo);
01067     } else
01068       printf("NOT UPPER\n");
01069 
01070     if (ucistitle(0x1d5))
01071       printf("TITLE\n");
01072     else
01073       printf("NOT TITLE\n");
01074 
01075     if (uciscomposite(0x1d5))
01076       printf("COMPOSITE\n");
01077     else
01078       printf("NOT COMPOSITE\n");
01079 
01080     if (ucdecomp(0x1d5, &lo, &dec)) {
01081         for (i = 0; i < lo; i++)
01082           printf("0x%04lx ", dec[i]);
01083         putchar('\n');
01084     }
01085 
01086     if ((lo = uccombining_class(0x41)) != 0)
01087       printf("0x41 CCL %ld\n", lo);
01088 
01089     if (ucisxdigit(0xfeff))
01090       printf("0xFEFF HEX DIGIT\n");
01091     else
01092       printf("0xFEFF NOT HEX DIGIT\n");
01093 
01094     if (ucisdefined(0x10000))
01095       printf("0x10000 DEFINED\n");
01096     else
01097       printf("0x10000 NOT DEFINED\n");
01098 
01099     if (ucnumber_lookup(0x30, &num)) {
01100         if (num.numerator != num.denominator)
01101           printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
01102         else
01103           printf("UCNUMBER: 0x30 = %d\n", num.numerator);
01104     } else
01105       printf("UCNUMBER: 0x30 NOT A NUMBER\n");
01106 
01107     if (ucnumber_lookup(0xbc, &num)) {
01108         if (num.numerator != num.denominator)
01109           printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
01110         else
01111           printf("UCNUMBER: 0xbc = %d\n", num.numerator);
01112     } else
01113       printf("UCNUMBER: 0xbc NOT A NUMBER\n");
01114 
01115 
01116     if (ucnumber_lookup(0xff19, &num)) {
01117         if (num.numerator != num.denominator)
01118           printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
01119         else
01120           printf("UCNUMBER: 0xff19 = %d\n", num.numerator);
01121     } else
01122       printf("UCNUMBER: 0xff19 NOT A NUMBER\n");
01123 
01124     if (ucnumber_lookup(0x4e00, &num)) {
01125         if (num.numerator != num.denominator)
01126           printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator);
01127         else
01128           printf("UCNUMBER: 0x4e00 = %d\n", num.numerator);
01129     } else
01130       printf("UCNUMBER: 0x4e00 NOT A NUMBER\n");
01131 
01132     if (ucdigit_lookup(0x06f9, &dig))
01133       printf("UCDIGIT: 0x6f9 = %d\n", dig);
01134     else
01135       printf("UCDIGIT: 0x6f9 NOT A NUMBER\n");
01136 
01137     dig = ucgetdigit(0x0969);
01138     printf("UCGETDIGIT: 0x969 = %d\n", dig);
01139 
01140     num = ucgetnumber(0x30);
01141     if (num.numerator != num.denominator)
01142       printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
01143     else
01144       printf("UCGETNUMBER: 0x30 = %d\n", num.numerator);
01145 
01146     num = ucgetnumber(0xbc);
01147     if (num.numerator != num.denominator)
01148       printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
01149     else
01150       printf("UCGETNUMBER: 0xbc = %d\n", num.numerator);
01151 
01152     num = ucgetnumber(0xff19);
01153     if (num.numerator != num.denominator)
01154       printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
01155     else
01156       printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator);
01157 
01158     ucdata_cleanup();
01159     exit(0);
01160 }
01161 
01162 #endif /* TEST */