Back to index

im-sdk  12.3.91
converter.c
Go to the documentation of this file.
00001 /*
00002 Copyright 1990-2003 Sun Microsystems, Inc. All Rights Reserved.
00003 
00004 Permission is hereby granted, free of charge, to any person obtaining a
00005 copy of this software and associated documentation files (the
00006 "Software"), to deal in the Software without restriction, including
00007 without limitation the rights to use, copy, modify, merge, publish,
00008 distribute, sublicense, and/or sell copies of the Software, and to
00009 permit persons to whom the Software is furnished to do so, subject to
00010 the following conditions: The above copyright notice and this
00011 permission notice shall be included in all copies or substantial
00012 portions of the Software.
00013 
00014 
00015 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00016 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00017 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00018 IN NO EVENT SHALL THE OPEN GROUP OR SUN MICROSYSTEMS, INC. BE LIABLE
00019 FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
00020 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
00021 THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE EVEN IF
00022 ADVISED IN ADVANCE OF THE POSSIBILITY OF SUCH DAMAGES.
00023 
00024 
00025 Except as contained in this notice, the names of The Open Group and/or
00026 Sun Microsystems, Inc. shall not be used in advertising or otherwise to
00027 promote the sale, use or other dealings in this Software without prior
00028 written authorization from The Open Group and/or Sun Microsystems,
00029 Inc., as applicable.
00030 
00031 
00032 X Window System is a trademark of The Open Group
00033 
00034 OSF/1, OSF/Motif and Motif are registered trademarks, and OSF, the OSF
00035 logo, LBX, X Window System, and Xinerama are trademarks of the Open
00036 Group. All other trademarks and registered trademarks mentioned herein
00037 are the property of their respective owners. No right, title or
00038 interest in or to any trademark, service mark, logo or trade name of
00039 Sun Microsystems, Inc. or its licensors is granted.
00040 
00041 */
00042 
00043 #include <ctype.h>
00044 #include <stdio.h>
00045 #include <string.h>
00046 #include <stdlib.h>
00047 
00048 enum {
00049        OK = 0,
00050        ERROR = -1,
00051        ERROR_HZCODE = -2,
00052        ERROR_CANGJIECODE = -3,
00053 };
00054 
00055 /* define struct for file with text format */
00056 typedef struct {
00057        char cangjie_code[6];
00058        unsigned char *hz_str;
00059 } TTextItemStruct, *PTextItemStruct;
00060 
00061 typedef struct {
00062        long num_items;
00063        PTextItemStruct *items;
00064 } TTextItemListStruct;
00065 
00066 #define LEN_BUFFER_ALLOC  5000
00067 
00068 
00069 char *skip_space(char *s)
00070 {
00071        while (*s && (*s==' ' || *s=='\t')) s++;
00072        return s;
00073 }
00074 
00075 char *to_space(char *s)
00076 {
00077        while (*s && !(isspace(*s) || *s == '\n')) s++;
00078        return s;
00079 }
00080 
00081 /*
00082 parse the line into gbk_code, cangjie_code and frequence:
00083 
00084 return OK if success;
00085 return ERROR_HZCODE  if gbk_code error;
00086 return ERROR_CANGJIECODE if cangjie_code error;
00087 */
00088 
00089 int get_hex_code(char high_code, char low_code)
00090 {
00091        unsigned char high, low;
00092 
00093        high = toupper(high_code);
00094        if (high >= '0' && high <= '9') {
00095               high = high - '0' + 0x8;
00096        } else if (high >= 'A' && high <= 'F') {
00097               high = high - 'A' + 0xa;
00098        }
00099        high <<= 4;
00100 
00101        low = toupper(low_code);
00102        if (low >= '0' && low <= '9') {
00103               low = low - '0' + 0x0;
00104        } else if (low >= 'A' && low <= 'F') {
00105               low = low - 'A' + 0xa;
00106        }
00107        
00108        return((high | low) & 0x00ff);
00109 }
00110 
00111 int parse_line_for_cangjie(char *line_buf, TTextItemStruct *item)
00112 {
00113        unsigned char hz_code[6], hanzi_str[5], qu_code;
00114        char *ptr, *frequence_ptr;
00115        int i, cur_pos;
00116 
00117        ptr = skip_space(line_buf);
00118 
00119        if (*ptr == '\n' || *ptr == '#') 
00120               return(ERROR);
00121 
00122        /* get gbk code information */
00123        memset(hz_code, 0, 6);
00124        memcpy(hz_code, ptr, 5);
00125 
00126        cur_pos = 0;
00127        qu_code = toupper(hz_code[0]);
00128        if (qu_code >= '2' && qu_code <= '7') {
00129               hanzi_str[cur_pos++] = 0x8e;
00130               hanzi_str[cur_pos++] = 0xa1 + (qu_code - '1');
00131        } else if (qu_code >= 'A' && qu_code <= 'F') {
00132               hanzi_str[cur_pos++] = 0x8e;
00133               hanzi_str[cur_pos++] = 0xaa + (qu_code - 'A');
00134        }
00135        hanzi_str[cur_pos++] = get_hex_code(hz_code[1], hz_code[2]);
00136        hanzi_str[cur_pos++] = get_hex_code(hz_code[3], hz_code[4]);
00137        hanzi_str[cur_pos] = 0;
00138 
00139        item->hz_str = (unsigned char *)strdup(hanzi_str);
00140 
00141        /* get cangjie code information */
00142        ptr += 5;
00143        ptr = skip_space(ptr);
00144 
00145        i = 0;
00146        while(*ptr && *ptr != '\n' && !isspace(*ptr)) {
00147               if (i < 5) {
00148                      if (*ptr & 0x80) 
00149                             return(ERROR_CANGJIECODE);
00150               
00151                      item->cangjie_code[i] = tolower(*ptr);
00152               }
00153               *ptr ++;
00154               i++;
00155        } 
00156 
00157        return(OK);
00158 }
00159 
00160 int TextItemList_Alloc(TTextItemListStruct *ItemList, int num_alloced)
00161 {
00162        int i;
00163 
00164        ItemList->num_items = 0;
00165        ItemList->items = malloc(num_alloced * sizeof(PTextItemStruct*));
00166 
00167        if (ItemList->items == NULL) {
00168               fprintf(stderr, "Error: TextItemList malloc\n");
00169               return(ERROR);
00170        }
00171 
00172        for (i=0; i<num_alloced; i++)
00173               ItemList->items[i] = NULL;
00174 
00175        ItemList->num_items = num_alloced;
00176        
00177        return(OK);
00178 }
00179 
00180 int TextItemList_ReAlloc(TTextItemListStruct *ItemList, int num_alloced)
00181 {
00182        int i;
00183 
00184        ItemList->items = realloc((void *)ItemList->items, num_alloced * sizeof(PTextItemStruct*));
00185 
00186        if (ItemList->items == NULL) {
00187               fprintf(stderr, "Error: TextItemList realloc\n");
00188               ItemList->num_items = 0;
00189               return(ERROR);
00190        }
00191 
00192        for (i=ItemList->num_items; i<num_alloced; i++)
00193               ItemList->items[i] = NULL;
00194 
00195        ItemList->num_items = num_alloced;
00196        
00197        return(OK);
00198 }
00199 
00200 int TextItemList_Free(TTextItemListStruct *ItemList)
00201 {
00202        int i;
00203 
00204        for (i=0; i<ItemList->num_items; i++) {
00205               if (ItemList->items[i]->hz_str != NULL) 
00206                      free((char *)ItemList->items[i]->hz_str);
00207 
00208               if (ItemList->items[i] != NULL)
00209                      free((char *)ItemList->items[i]);
00210        }
00211 
00212        if (ItemList->items != NULL)
00213               free((char *)ItemList->items);
00214 
00215        ItemList->num_items = 0;
00216 
00217        return(OK);
00218 }
00219 
00220 int TextItem_Compare_By_CangJieCode(const void *p1, const void *p2)
00221 {
00222        TTextItemStruct *item1 = *((TTextItemStruct **)p1);
00223        TTextItemStruct *item2 = *((TTextItemStruct **)p2);
00224        int ret;
00225 
00226        ret = strncmp(item1->cangjie_code, item2->cangjie_code, 4);
00227 
00228        return(ret);
00229 }
00230 
00231 int TextItemList_Sort(TTextItemListStruct *ItemList, int(*compare)(const void *, const void *))
00232 {
00233        qsort((void *)ItemList->items, ItemList->num_items, sizeof(PTextItemStruct), compare);
00234 }
00235 
00236 int TextItemList_Print_For_Single(TTextItemListStruct *ItemList)
00237 {
00238        int i;
00239        char *cangjie_code;
00240        unsigned char *hz_str;
00241        long frequence;
00242 
00243        for (i=0; i<ItemList->num_items; i++) {
00244               if (ItemList->items[i] == NULL) {
00245                      continue;
00246               }
00247 
00248               cangjie_code = ItemList->items[i]->cangjie_code;
00249               hz_str = ItemList->items[i]->hz_str;
00250 
00251               printf("%s\t%s\n",  cangjie_code, (char *)hz_str);
00252 /*
00253               printf("%s%s\n",  (char *)hz_str, cangjie_code);
00254 */
00255        }
00256 }
00257 
00258 int Read_TextItemList_From_File(char *file_name, TTextItemListStruct *ItemList, int(*parse_line)(char *, TTextItemStruct *))
00259 {
00260        FILE *ifile;
00261        char line_buf[256];
00262 
00263        int num_malloc_items = LEN_BUFFER_ALLOC;
00264        int num_items = 0;
00265        int line_no = 0;
00266        int ret;
00267 
00268        ifile = fopen(file_name, "r");
00269        if (ifile == NULL) {
00270               fprintf(stderr, "Error: open file %s\n", file_name);
00271               return(ERROR);
00272        }
00273 
00274        TextItemList_Alloc(ItemList, num_malloc_items);
00275 
00276        while(fgets(line_buf, 256, ifile) != NULL) {
00277 
00278               line_no++;
00279 
00280               /* realloc buffer for items */
00281               if (num_items >= num_malloc_items) {
00282                      num_malloc_items += LEN_BUFFER_ALLOC;
00283                      ret = TextItemList_ReAlloc(ItemList, num_malloc_items);
00284                      if (ret == ERROR) 
00285                             break;
00286               }
00287               
00288               if (ItemList->items[num_items] == NULL) {
00289                      ItemList->items[num_items] = (TTextItemStruct *)calloc(1, sizeof(TTextItemStruct));
00290                      if (ItemList->items[num_items] == NULL) {
00291                             fprintf(stderr, "Error:  No Memory for TextItemList\n");
00292                             break;
00293                      }
00294               }
00295 
00296               ret = parse_line(line_buf, ItemList->items[num_items]);
00297               if (ret != 0) {
00298                      if (ret == ERROR_HZCODE) {
00299                             fprintf(stderr, "Error: hz_code error in line %d\n", line_no);
00300                      } else if (ret == ERROR_CANGJIECODE) {
00301                             fprintf(stderr, "Error: cangjie_code error in line %d\n", line_no);
00302                      }
00303                      continue;
00304               } 
00305 
00306               num_items++;
00307        }
00308 
00309        TextItemList_ReAlloc(ItemList, num_items);
00310 
00311        fclose(ifile);
00312        return(OK);
00313 }
00314 
00315 int main(int argc, char **argv)
00316 {
00317        int ret = OK;
00318        char *cangjie_file;
00319 
00320        TTextItemListStruct CangJie_ItemList;
00321 
00322        if (argc != 2) {
00323               printf("Usage:  %s cangjie_file\n", argv[0]);
00324               exit(-1);
00325        }
00326 
00327        cangjie_file = argv[1];
00328 
00329        ret = Read_TextItemList_From_File(cangjie_file, 
00330                                      &CangJie_ItemList, 
00331                                      parse_line_for_cangjie);
00332        if (ret == ERROR) {
00333               TextItemList_Free(&CangJie_ItemList);
00334               exit(-1);
00335        }
00336 
00337 /*
00338        TextItemList_Sort(&CangJie_ItemList, TextItem_Compare_By_CangJieCode);
00339 */
00340        TextItemList_Print_For_Single(&CangJie_ItemList);
00341        TextItemList_Free(&CangJie_ItemList);
00342 
00343        return(OK);
00344 }