Back to index

im-sdk  12.3.91
converter.c
Go to the documentation of this file.
00001 /*
00002 Copyright 1990-2003 Sun Microsystems, Inc. All Rights Reserved.
00003 
00004 Permission is hereby granted, free of charge, to any person obtaining a
00005 copy of this software and associated documentation files (the
00006 "Software"), to deal in the Software without restriction, including
00007 without limitation the rights to use, copy, modify, merge, publish,
00008 distribute, sublicense, and/or sell copies of the Software, and to
00009 permit persons to whom the Software is furnished to do so, subject to
00010 the following conditions: The above copyright notice and this
00011 permission notice shall be included in all copies or substantial
00012 portions of the Software.
00013 
00014 
00015 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00016 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00017 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00018 IN NO EVENT SHALL THE OPEN GROUP OR SUN MICROSYSTEMS, INC. BE LIABLE
00019 FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
00020 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
00021 THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE EVEN IF
00022 ADVISED IN ADVANCE OF THE POSSIBILITY OF SUCH DAMAGES.
00023 
00024 
00025 Except as contained in this notice, the names of The Open Group and/or
00026 Sun Microsystems, Inc. shall not be used in advertising or otherwise to
00027 promote the sale, use or other dealings in this Software without prior
00028 written authorization from The Open Group and/or Sun Microsystems,
00029 Inc., as applicable.
00030 
00031 
00032 X Window System is a trademark of The Open Group
00033 
00034 OSF/1, OSF/Motif and Motif are registered trademarks, and OSF, the OSF
00035 logo, LBX, X Window System, and Xinerama are trademarks of the Open
00036 Group. All other trademarks and registered trademarks mentioned herein
00037 are the property of their respective owners. No right, title or
00038 interest in or to any trademark, service mark, logo or trade name of
00039 Sun Microsystems, Inc. or its licensors is granted.
00040 
00041 */
00042 
00043 #include <ctype.h>
00044 #include <stdio.h>
00045 #include <string.h>
00046 #include <stdlib.h>
00047 
00048 enum {
00049        OK = 0,
00050        ERROR = -1,
00051        ERROR_ZHICODE = -2,
00052        ERROR_ZHUYINCODE = -3,
00053 };
00054 
00055 /* define struct for file with text format */
00056 typedef struct {
00057        unsigned char *Zhi;
00058        unsigned char *Yin[10];
00059 } TTextItemStruct, *PTextItemStruct;
00060 
00061 typedef struct {
00062        long num_items;
00063        PTextItemStruct *items;
00064 } TTextItemListStruct;
00065 
00066 #define LEN_BUFFER_ALLOC  5000
00067 
00068 
00069 char *skip_space(char *s)
00070 {
00071        while (*s && (*s==' ' || *s=='\t')) s++;
00072        return s;
00073 }
00074 
00075 char *to_space(char *s)
00076 {
00077        while (*s && !(isspace(*s) || *s == '\n')) s++;
00078        return s;
00079 }
00080 
00081 /*
00082 parse the line into gbk_code, Zhi and frequence:
00083 
00084 return OK if success;
00085 return ERROR_ZHICODE  if gbk_code error;
00086 return ERROR_ZHUYINCODE if Zhi error;
00087 */
00088 
00089 int get_hex_code(char high_code, char low_code)
00090 {
00091        unsigned char high, low;
00092 
00093        high = toupper(high_code);
00094        if (high >= '0' && high <= '9') {
00095               high = high - '0' + 0x8;
00096        } else if (high >= 'A' && high <= 'F') {
00097               high = high - 'A' + 0xa;
00098        }
00099        high <<= 4;
00100 
00101        low = toupper(low_code);
00102        if (low >= '0' && low <= '9') {
00103               low = low - '0' + 0x0;
00104        } else if (low >= 'A' && low <= 'F') {
00105               low = low - 'A' + 0xa;
00106        }
00107        
00108        return((high | low) & 0x00ff);
00109 }
00110 
00111 int parse_line_for_zhuyin(char *line_buf, TTextItemStruct *item)
00112 {
00113        unsigned char hz_code[7], hanzi_str[5], qu_code;
00114        char key_sequence[5];
00115        char *ptr, *yin_ptr;
00116        int i, cur_pos, yin_len;
00117 
00118        ptr = skip_space(line_buf);
00119 
00120        if (*ptr == '\n' || *ptr == '#') 
00121               return(ERROR);
00122 
00123        /* get gbk code information */
00124        memset(hz_code, 0, 7);
00125        memcpy(hz_code, ptr, 6);
00126 
00127        /* parse "1-2345" to Chinese encode character */
00128        cur_pos = 0;
00129        qu_code = toupper(hz_code[0]);
00130        if (qu_code >= '2' && qu_code <= '7') {
00131               hanzi_str[cur_pos++] = 0x8e;
00132               hanzi_str[cur_pos++] = 0xa1 + (qu_code - '1');
00133        } else if (qu_code >= 'A' && qu_code <= 'F') {
00134               hanzi_str[cur_pos++] = 0x8e;
00135               hanzi_str[cur_pos++] = 0xaa + (qu_code - 'A');
00136        }
00137        hanzi_str[cur_pos++] = get_hex_code(hz_code[2], hz_code[3]);
00138        hanzi_str[cur_pos++] = get_hex_code(hz_code[4], hz_code[5]);
00139        hanzi_str[cur_pos] = 0;
00140 
00141        item->Zhi = (unsigned char *)strdup(hanzi_str);
00142 
00143        /* get zhuyin code information */
00144        ptr += 6;
00145        ptr = yin_ptr = skip_space(ptr);
00146 
00147        /* convert '()' to space */
00148        while(*ptr && (*ptr != '\n') && !(*ptr >='0' && *ptr <='9')) {
00149               if (*ptr == '(' || *ptr == ')')
00150                      *ptr = ' ';
00151               ptr++;
00152        }
00153        *ptr = 0;
00154 
00155        i = 0;
00156        while(*yin_ptr) {
00157               ZhuyinSymbolSequenceToStandardKeySequence(yin_ptr, key_sequence, &yin_len);
00158 
00159               if (yin_len == 0) break;
00160 
00161               if (key_sequence[0]) {
00162                      item->Yin[i] = (unsigned char *)strdup(key_sequence);
00163                      i++;
00164                      if (i>10) break;
00165               }
00166               yin_ptr += yin_len;
00167               yin_ptr = skip_space(yin_ptr);
00168        }
00169        return(OK);
00170 }
00171 
00172 int TextItemList_Alloc(TTextItemListStruct *ItemList, int num_alloced)
00173 {
00174        int i;
00175 
00176        ItemList->num_items = 0;
00177        ItemList->items = malloc(num_alloced * sizeof(PTextItemStruct*));
00178 
00179        if (ItemList->items == NULL) {
00180               fprintf(stderr, "Error: TextItemList malloc\n");
00181               return(ERROR);
00182        }
00183 
00184        for (i=0; i<num_alloced; i++)
00185               ItemList->items[i] = NULL;
00186 
00187        ItemList->num_items = num_alloced;
00188        
00189        return(OK);
00190 }
00191 
00192 int TextItemList_ReAlloc(TTextItemListStruct *ItemList, int num_alloced)
00193 {
00194        int i;
00195 
00196        ItemList->items = realloc((void *)ItemList->items, num_alloced * sizeof(PTextItemStruct*));
00197 
00198        if (ItemList->items == NULL) {
00199               fprintf(stderr, "Error: TextItemList realloc\n");
00200               ItemList->num_items = 0;
00201               return(ERROR);
00202        }
00203 
00204        for (i=ItemList->num_items; i<num_alloced; i++)
00205               ItemList->items[i] = NULL;
00206 
00207        ItemList->num_items = num_alloced;
00208        
00209        return(OK);
00210 }
00211 
00212 int TextItemList_Init(TTextItemListStruct *ItemList)
00213 {
00214        ItemList->num_items = 0;
00215        ItemList->items = NULL;
00216 }
00217 
00218 int TextItemList_Free(TTextItemListStruct *ItemList)
00219 {
00220        int i, j;
00221 
00222        for (i=0; i<ItemList->num_items; i++) {
00223               if (ItemList->items[i]->Zhi != NULL) 
00224                      free((char *)ItemList->items[i]->Zhi);
00225 
00226               for  (j=0; j<10; j++) {
00227                      if (ItemList->items[i]->Yin[j] != NULL) 
00228                             free((char *)ItemList->items[i]->Yin[j]);
00229               }
00230 
00231               if (ItemList->items[i] != NULL)
00232                      free((char *)ItemList->items[i]);
00233        }
00234 
00235        if (ItemList->items != NULL)
00236               free((char *)ItemList->items);
00237 
00238        ItemList->num_items = 0;
00239 
00240        return(OK);
00241 }
00242 
00243 int TextItem_Compare_By_ZhuyinCode(const void *p1, const void *p2)
00244 {
00245        TTextItemStruct *item1 = *((TTextItemStruct **)p1);
00246        TTextItemStruct *item2 = *((TTextItemStruct **)p2);
00247        int ret;
00248 
00249        ret = strncmp(item1->Zhi, item2->Zhi, 4);
00250 
00251        return(ret);
00252 }
00253 
00254 int TextItemList_Sort(TTextItemListStruct *ItemList, int(*compare)(const void *, const void *))
00255 {
00256        qsort((void *)ItemList->items, ItemList->num_items, sizeof(PTextItemStruct), compare);
00257 }
00258 
00259 int TextItemList_Print_For_Single(TTextItemListStruct *ItemList)
00260 {
00261        int i, j;
00262        unsigned char *Zhi;
00263        unsigned char *Yin;
00264        long frequence;
00265 
00266        for (i=0; i<ItemList->num_items; i++) {
00267               if (ItemList->items[i] == NULL) {
00268                      continue;
00269               }
00270 
00271               Zhi = ItemList->items[i]->Zhi;
00272               if (!Zhi || !*Zhi) break;
00273 
00274               for (j=0; j<10; j++) {
00275                      Yin = ItemList->items[i]->Yin[j];
00276 
00277                      if (!Yin || !*Yin) break;
00278 
00279                      printf("%s\t%s\n", (char *)Yin,  Zhi);
00280               }
00281        }
00282 }
00283 
00284 int Read_TextItemList_From_File(char *file_name, TTextItemListStruct *ItemList, int(*parse_line)(char *, TTextItemStruct *))
00285 {
00286        FILE *ifile;
00287        char line_buf[256];
00288 
00289        int num_malloc_items = LEN_BUFFER_ALLOC;
00290        int num_items = 0;
00291        int line_no = 0;
00292        int ret;
00293 
00294        ifile = fopen(file_name, "r");
00295        if (ifile == NULL) {
00296               fprintf(stderr, "Error: open file %s\n", file_name);
00297               return(ERROR);
00298        }
00299 
00300        TextItemList_Alloc(ItemList, num_malloc_items);
00301 
00302        while(fgets(line_buf, 256, ifile) != NULL) {
00303 
00304               line_no++;
00305 
00306               /* realloc buffer for items */
00307               if (num_items >= num_malloc_items) {
00308                      num_malloc_items += LEN_BUFFER_ALLOC;
00309                      ret = TextItemList_ReAlloc(ItemList, num_malloc_items);
00310                      if (ret == ERROR) 
00311                             break;
00312               }
00313               
00314               if (ItemList->items[num_items] == NULL) {
00315                      ItemList->items[num_items] = (TTextItemStruct *)calloc(1, sizeof(TTextItemStruct));
00316                      if (ItemList->items[num_items] == NULL) {
00317                             fprintf(stderr, "Error:  No Memory for TextItemList\n");
00318                             break;
00319                      }
00320               }
00321 
00322               ret = parse_line(line_buf, ItemList->items[num_items]);
00323               if (ret != 0) {
00324                      if (ret == ERROR_ZHICODE) {
00325                             fprintf(stderr, "Error: hz_code error in line %d\n", line_no);
00326                      } else if (ret == ERROR_ZHUYINCODE) {
00327                             fprintf(stderr, "Error: Zhi error in line %d\n", line_no);
00328                      }
00329                      continue;
00330               } 
00331 
00332               num_items++;
00333        }
00334 
00335        TextItemList_ReAlloc(ItemList, num_items);
00336 
00337        fclose(ifile);
00338        return(OK);
00339 }
00340 
00341 int main(int argc, char **argv)
00342 {
00343        int ret = OK;
00344        char *zhuyin_file;
00345 
00346        TTextItemListStruct Zhuyin_ItemList;
00347 
00348        if (argc != 2) {
00349               printf("Usage:  %s zhuyin_file\n", argv[0]);
00350               exit(-1);
00351        }
00352 
00353        zhuyin_file = argv[1];
00354 
00355        ret = Read_TextItemList_From_File(zhuyin_file, 
00356                                      &Zhuyin_ItemList, 
00357                                      parse_line_for_zhuyin);
00358        if (ret == ERROR) {
00359               TextItemList_Free(&Zhuyin_ItemList);
00360               exit(-1);
00361        }
00362 
00363 /*
00364        TextItemList_Sort(&Zhuyin_ItemList, TextItem_Compare_By_ZhuyinCode);
00365 */
00366        TextItemList_Print_For_Single(&Zhuyin_ItemList);
00367        TextItemList_Free(&Zhuyin_ItemList);
00368 
00369        return(OK);
00370 }