Back to index

im-sdk  12.3.91
ucstoutf.c
Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <wchar.h>
00004 #include <errno.h>
00005 #include <locale.h>
00006 #include <string.h>
00007 
00008 static char mapfile[256];
00009 static char after_depend[100];
00010 
00011 static void
00012 input_extra_header_to_mapfile(char *name, FILE *fp1) {
00013        FILE *tmp;
00014        char buf[256];
00015        if(((tmp=fopen(name,"r"))!=NULL) || ((tmp=fopen("default.hdr","r"))!=NULL)) {
00016               rewind(tmp);
00017               while ( fgets(buf, 256, tmp)) {
00018                      (void)fprintf(fp1,"%s", buf);
00019               }
00020        } else {
00021               (void)fprintf(stderr,"<lang>.hdr or default.hdr header file for mapfile not found\n");
00022               exit(-1);
00023        }
00024 }
00025 
00026 static void 
00027 create_mapfile( char *infile) {
00028        FILE *fp, *fp1;
00029        char buf[256];
00030        char *t;
00031        int put_space=1;
00032        char *b, *lang;
00033        char e[256];
00034        long l1=0;
00035 
00036 
00037        (void)sprintf(mapfile,"%s.%s",infile,"mapfile");
00038        fp = fopen(infile,"r");
00039        fp1= fopen(mapfile,"w");
00040        rewind(fp);
00041        (void)strcpy(e,mapfile);
00042        lang=(char *)strchr(e,'.');
00043        if(lang) {
00044               *lang='\0';
00045               (void)strcat(e,".hdr");
00046        } 
00047        input_extra_header_to_mapfile(e,fp1);
00048        while ( fgets(buf, 256, fp)) {
00049               if ((t=(char *)strtok(buf, " \t"))!= NULL) {
00050                      if ((*t) == '#' || (*t) == '%') continue;
00051                      b=(char*)strdup(t);
00052                      if(strcmp(b,"special_midword_consonant\n")==0)
00053                             put_space=0;
00054                      else if(strcmp(b,"vowel\n")==0)
00055                             put_space=1;
00056                      (void)fprintf(fp1,"%s", b);
00057                      if((t=(char *)strtok(NULL," \t"))!=NULL) {
00058                             b=(char*)strdup(t);
00059                             l1=strtol(t,(char**)NULL, 16);
00060                             if(l1) {
00061                                    (void)fprintf(fp1,"         ");
00062                                    (void)fputwc(l1, fp1);
00063                                    l1=strtol(t,(char**)NULL, 16);
00064                             } 
00065                             if ((t=(char *)strchr(b,','))!=NULL)  (void)fprintf(fp1," ");
00066                             while ((t=(char *)strtok(NULL," \t"))!=NULL) {
00067                                    if(put_space)
00068                                           (void)fprintf(fp1,"         ");
00069                                    l1=strtol(t,(char**)NULL, 16);
00070                                    if(l1) (void)fputwc(l1, fp1);
00071                             }
00072                             (void)fprintf(fp1,"\n");
00073                      }
00074               }      
00075        }
00076        (void)fclose(fp);
00077        (void)fclose(fp1);
00078 }
00079 
00080 static void
00081 usage(char *prog) {
00082        (void)fprintf(stderr,"Usage: %s [-c lang.ucs]/[utf-8 map file]\n", prog);
00083        exit(-1);
00084 }
00085 
00086 static void
00087 check_special_midword_consonant(FILE *in) {
00088        char buf[256];
00089        char *t;
00090        rewind(in);
00091        while ( fgets(buf, 256, in)) {
00092               if ((t=(char *)strtok(buf, " \t\n"))!= NULL) {
00093                      if(strcmp(t,"special_midword_consonant")==0) {
00094                             (void)strcpy(after_depend,"special");
00095                             return;
00096                      }
00097               }
00098        }
00099        (void)strcpy(after_depend,"INITIAL");
00100 }
00101 static void
00102 output_token_rules(FILE *mapp, FILE *outp) {
00103        char buf[256];
00104        int gen_flag=0;
00105        char *t, *b, *c;
00106 
00107        rewind(mapp);
00108        while ( fgets(buf, 256, mapp)) {
00109               if ((t=(char *)strtok(buf, " \t\n"))!= NULL) {
00110                      b=(char*)strdup(t);
00111                      if(strncmp(b,"virama", 6)==0) {
00112                             gen_flag=0;
00113                             continue;
00114                      } else if ((strcmp(b,"other")==0) || (strcmp(b,"special_other")==0)) {
00115                             gen_flag=1;
00116                             continue;
00117                      } else if ((strcmp(b,"vowel")==0) || (strcmp(b,"special_vowel")==0)) {
00118                             gen_flag=2;
00119                             continue;
00120                      } else if ((strcmp(b,"consonant")==0) || (strcmp(b,"special_consonant")==0)) {
00121                             gen_flag=3;
00122                             continue;
00123                      } else if ((strcmp(b,"digit")==0) || (strcmp(b,"special_digit")==0)) {
00124                             gen_flag=4;
00125                             continue;
00126                      } else if ((strcmp(b,"special_midword_consonant")==0)) {
00127                             gen_flag=5;
00128                             continue;
00129                      }
00130                      if ((t=(char *)strtok(NULL," \t\n"))==NULL)
00131                             t = (char*)strdup("");
00132                      if(gen_flag==1)
00133                             (void)fprintf(outp,"%s             { (void)strcpy(lang_str,\"%s\"); return(OTHER); }\n", b, t);
00134                      else if(gen_flag==2) {
00135                             c=(char *)strtok(NULL," \t\n");
00136                             if(!c) c=(char *)strdup("");
00137                             (void)fprintf(outp,"<depend>%s     { BEGIN %s; (void)strcpy(lang_str,\"%s\"); return(DEP_VOWEL); }\n", b, after_depend, c);
00138                             (void)fprintf(outp,"%s             { (void)strcpy(lang_str,\"%s\"); return(INDEP_VOWEL); }\n", b, t);
00139                      } else if(gen_flag==3) {
00140                             (void)fprintf(outp,"%s             { BEGIN depend; (void)strcpy(lang_str,\"%s\"); return(CONS); }\n", b, t);
00141                      } else if(gen_flag==4) {
00142                             (void)fprintf(outp,"%s             { (void)strcpy(lang_str,\"%s\"); return(DIGIT); }\n", b, t);
00143                      } else if(gen_flag==5) {
00144                             (void)fprintf(outp,"<depend,special>%s    { BEGIN depend; (void)strcpy(lang_str,\"%s\"); return(CONS); }\n", b, t);
00145                      }
00146               }
00147        }
00148 }
00149 static void
00150 output_virama(FILE *mapp, FILE *outp) {
00151        char buf[256];
00152        int virama_found=0;
00153        int has_half_form=0;
00154        char *t, *b;
00155 
00156        rewind(mapp);
00157        while ( fgets(buf, 256, mapp)) {
00158               if ((t=(char *)strtok(buf, " \t\n"))!= NULL) {
00159                      b=(char*)strdup(t);
00160                      if(!has_half_form && strncmp(b,"has_half_form",13)==0) {
00161                             has_half_form=1;
00162                      }
00163                      if(!virama_found && strncmp(b,"virama", 6)==0) {
00164                             virama_found=1;
00165                             continue;
00166                      } else if (virama_found) {
00167                             if((t=(char *)strtok(NULL," \t\n"))!=NULL) {
00168                                    (void)fprintf(outp,"#define VIRAMA_CHAR   \"%s\"\n",t);
00169                             } else  {
00170                                    (void)fprintf(stderr,"Incomplete virama mapping in map file\n");
00171                                    exit(-1);
00172                             }      
00173                             virama_found=0;
00174                      }      
00175               }
00176        }
00177        (void)fprintf(outp,"#define HAS_HALF_FORM        %d\n", has_half_form);
00178 }
00179        
00180 static void
00181 fill_in_lex_skeleton(char *skel_file, char *map_file) {
00182        FILE *fp, *fp1, *outfp;
00183        char *b, *e, *lang;
00184        char buf[256];
00185        char dbuf[256];
00186        char lexsource[128];
00187        char *t;
00188        int sect_over=0;
00189 
00190        fp = fopen(skel_file,"r");
00191        fp1= fopen(map_file,"r");
00192        if(!fp) {
00193               (void)fprintf(stderr,"Skeleton file lex.skel does not exist\n");
00194               exit(-1);
00195        }
00196        e=(char *)strdup(map_file);
00197        lang=(char *)strchr(e,'.');
00198 
00199        if(!lang) {
00200               (void)sprintf(lexsource,"%s%s.merged",map_file, skel_file);
00201        } else {
00202               *lang='\0';
00203               (void)sprintf(lexsource,"%s.l",e);
00204        }
00205 
00206        outfp= fopen(lexsource,"w");
00207        rewind(fp);
00208        while ( fgets(buf, 256, fp)) {
00209               (void)strcpy(dbuf, buf);
00210               if ((t=(char *)strtok(buf, " \t\n"))!= NULL) {
00211                      b=(char*)strdup(t);
00212                      if(strcmp(b,"#define")==0) {
00213                             if((t=(char *)strtok(NULL," \t"))!=NULL) {
00214                                    if(strcmp(t,"VIRAMA_CHAR")==0)
00215                                           output_virama(fp1,outfp);
00216                                    else
00217                                           (void)fputs(dbuf,outfp);
00218                             }
00219                      } else if (strcmp(b,"%%")==0 && !sect_over) {
00220                             sect_over=1;
00221                             (void)fputs(dbuf,outfp);
00222                             check_special_midword_consonant(fp1);
00223                             output_token_rules(fp1,outfp);
00224                             (void)fprintf(outfp,"{space}|\\n   { BEGIN INITIAL; return(WORD_START); }\n");
00225                             (void)fprintf(outfp,"[a-zA-Z]             { return(ALPHA_NOT_MAPPED); }\n");
00226                             (void)fprintf(outfp,".             { BEGIN INITIAL; return(NOT_MAPPED); }\n");
00227                      } else {
00228                             (void)fputs(dbuf,outfp);
00229                      }
00230                             
00231               }
00232        }
00233 }
00234 
00235 int 
00236 main(int argc, char *argv[]) {
00237 
00238   /*
00239        locale=setlocale(LC_ALL,"");
00240        if(strstr(locale,"UTF-8")==NULL) {
00241               (void)fprintf(stderr,"Invoke the program %s in a UTF-8 locale\n", argv[0]);
00242               exit(-1);
00243 
00244        }
00245   */
00246        if(!argv[1]) {
00247               usage(argv[0]);
00248        } else if (argv[1][0]=='-') {
00249               if(argv[1][1]=='c' && argv[2] )
00250                      create_mapfile(argv[2]);
00251               else
00252                      usage(argv[0]);
00253        } else {
00254               (void)strcpy(mapfile,argv[1]);
00255        }
00256 
00257        fill_in_lex_skeleton("lex.skel", mapfile );
00258        return(0);
00259 }