Back to index

lightning-sunbird  0.9+nobinonly
csutil.cpp
Go to the documentation of this file.
00001 #include <stdlib.h>
00002 #include <string.h>
00003 #include <stdio.h>
00004 #include "csutil.hxx"
00005 
00006 #include "nsCOMPtr.h"
00007 #include "nsServiceManagerUtils.h"
00008 #include "nsIUnicodeEncoder.h"
00009 #include "nsIUnicodeDecoder.h"
00010 #include "nsICaseConversion.h"
00011 #include "nsICharsetConverterManager.h"
00012 #include "nsUnicharUtilCIID.h"
00013 #include "nsUnicharUtils.h"
00014 
00015 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
00016 static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
00017 
00018 #ifdef __SUNPRO_CC // for SunONE Studio compiler
00019 using namespace std;
00020 #endif
00021 
00022 // strip strings into token based on single char delimiter
00023 // acts like strsep() but only uses a delim char and not 
00024 // a delim string
00025 
00026 char * mystrsep(char ** stringp, const char delim)
00027 {
00028   char * rv = NULL;
00029   char * mp = *stringp;
00030   int n = strlen(mp);
00031   if (n > 0) {
00032      char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
00033      if (dp) {
00034         *stringp = dp+1;
00035         int nc = (int)((unsigned long)dp - (unsigned long)mp); 
00036         rv = (char *) malloc(nc+1);
00037         memcpy(rv,mp,nc);
00038         *(rv+nc) = '\0';
00039         return rv;
00040      } else {
00041        rv = (char *) malloc(n+1);
00042        memcpy(rv, mp, n);
00043        *(rv+n) = '\0';
00044        *stringp = mp + n;
00045        return rv;
00046      }
00047   }
00048   return NULL;
00049 }
00050 
00051 
00052 // replaces strdup with ansi version
00053 char * mystrdup(const char * s)
00054 {
00055   char * d = NULL;
00056   if (s) {
00057      int sl = strlen(s);
00058      d = (char *) malloc(((sl+1) * sizeof(char)));
00059      if (d) memcpy(d,s,((sl+1)*sizeof(char)));
00060   }
00061   return d;
00062 }
00063 
00064 
00065 // remove cross-platform text line end characters
00066 void mychomp(char * s)
00067 {
00068   int k = strlen(s);
00069   if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
00070   if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
00071 }
00072 
00073 
00074 //  does an ansi strdup of the reverse of a string
00075 char * myrevstrdup(const char * s)
00076 {
00077     char * d = NULL;
00078     if (s) {
00079        int sl = strlen(s);
00080        d = (char *) malloc((sl+1) * sizeof(char));
00081        if (d) {
00082         const char * p = s + sl - 1;
00083          char * q = d;
00084          while (p >= s) *q++ = *p--;
00085          *q = '\0';
00086        }
00087     }
00088     return d; 
00089 }
00090 
00091 #if 0
00092 // return 1 if s1 is a leading subset of s2
00093 int isSubset(const char * s1, const char * s2)
00094 {
00095   int l1 = strlen(s1);
00096   int l2 = strlen(s2);
00097   if (l1 > l2) return 0;
00098   if (strncmp(s2,s1,l1) == 0) return 1;
00099   return 0;
00100 }
00101 #endif
00102 
00103 
00104 // return 1 if s1 is a leading subset of s2
00105 int isSubset(const char * s1, const char * s2)
00106 {
00107   while( *s1 && (*s1 == *s2) ) {
00108     s1++;
00109     s2++;
00110   }
00111   return (*s1 == '\0');
00112 }
00113 
00114 
00115 // return 1 if s1 (reversed) is a leading subset of end of s2
00116 int isRevSubset(const char * s1, const char * end_of_s2, int len)
00117 {
00118   while( (len > 0) && *s1 && (*s1 == *end_of_s2) ) {
00119     s1++;
00120     end_of_s2--;
00121     len --;
00122   }
00123   return (*s1 == '\0');
00124 }
00125 
00126 
00127 #if 0
00128 // Not needed in mozilla
00129 // convert null terminated string to all caps using encoding 
00130 void enmkallcap(char * d, const char * p, const char * encoding)
00131 {
00132   struct cs_info * csconv = get_current_cs(encoding);
00133   while (*p != '\0') { 
00134     *d++ = csconv[((unsigned char) *p)].cupper;
00135     p++;
00136   }
00137   *d = '\0';
00138 }
00139 
00140 
00141 // convert null terminated string to all little using encoding
00142 void enmkallsmall(char * d, const char * p, const char * encoding)
00143 {
00144   struct cs_info * csconv = get_current_cs(encoding);
00145   while (*p != '\0') { 
00146     *d++ = csconv[((unsigned char) *p)].clower;
00147     p++;
00148   }
00149   *d = '\0';
00150 }
00151 
00152 
00153 // convert null terminated string to have intial capital using encoding
00154 void enmkinitcap(char * d, const char * p, const char * encoding)
00155 {
00156   struct cs_info * csconv = get_current_cs(encoding);
00157   memcpy(d,p,(strlen(p)+1));
00158   if (*p != '\0') *d= csconv[((unsigned char)*p)].cupper;
00159 }
00160 #endif
00161 
00162 // convert null terminated string to all caps 
00163 void mkallcap(char * p, const struct cs_info * csconv)
00164 {
00165   while (*p != '\0') { 
00166     *p = csconv[((unsigned char) *p)].cupper;
00167     p++;
00168   }
00169 }
00170 
00171 
00172 // convert null terminated string to all little
00173 void mkallsmall(char * p, const struct cs_info * csconv)
00174 {
00175   while (*p != '\0') { 
00176     *p = csconv[((unsigned char) *p)].clower;
00177     p++;
00178   }
00179 }
00180 
00181 
00182 // convert null terminated string to have intial capital
00183 void mkinitcap(char * p, const struct cs_info * csconv)
00184 {
00185   if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
00186 }
00187 
00188 
00189 // XXX This function was rewritten for mozilla. Instead of storing the
00190 // conversion tables static in this file, create them when needed
00191 // with help the mozilla backend.
00192 struct cs_info * get_current_cs(const char * es) {
00193   struct cs_info *ccs;
00194 
00195   nsCOMPtr<nsIUnicodeEncoder> encoder; 
00196   nsCOMPtr<nsIUnicodeDecoder> decoder; 
00197   nsCOMPtr<nsICaseConversion> caseConv;
00198 
00199   nsresult rv;
00200   nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
00201   if (NS_FAILED(rv))
00202     return nsnull;
00203 
00204   rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder));
00205   if (encoder && NS_SUCCEEDED(rv))
00206     encoder->SetOutputErrorBehavior(encoder->kOnError_Replace, nsnull, '?');
00207   if (NS_FAILED(rv))
00208     return nsnull;
00209   rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder));
00210 
00211   caseConv = do_GetService(kUnicharUtilCID, &rv);
00212   if (NS_FAILED(rv))
00213     return nsnull;
00214 
00215   ccs = (struct cs_info *) malloc(256 * sizeof(cs_info));
00216 
00217   PRInt32 charLength = 256;
00218   PRInt32 uniLength = 512;
00219   char *source = (char *)malloc(charLength * sizeof(char));
00220   PRUnichar *uni = (PRUnichar *)malloc(uniLength * sizeof(PRUnichar));
00221   char *lower = (char *)malloc(charLength * sizeof(char));
00222   char *upper = (char *)malloc(charLength * sizeof(char));
00223 
00224   // Create a long string of all chars.
00225   unsigned int i;
00226   for (i = 0x00; i <= 0xff ; ++i) {
00227     source[i] = i;
00228   }
00229 
00230   // Convert this long string to unicode
00231   rv = decoder->Convert(source, &charLength, uni, &uniLength);
00232 
00233   // Do case conversion stuff, and convert back.
00234   caseConv->ToUpper(uni, uni, uniLength);
00235   encoder->Convert(uni, &uniLength, upper, &charLength);
00236 
00237   uniLength = 512;
00238   charLength = 256;
00239   rv = decoder->Convert(source, &charLength, uni, &uniLength);
00240   caseConv->ToLower(uni, uni, uniLength);
00241   encoder->Convert(uni, &uniLength, lower, &charLength);
00242 
00243   // Store
00244   for (i = 0x00; i <= 0xff ; ++i) {
00245     ccs[i].cupper = upper[i];
00246     ccs[i].clower = lower[i];
00247     
00248     if (ccs[i].clower != (unsigned char)i)
00249       ccs[i].ccase = true;
00250     else
00251       ccs[i].ccase = false;
00252       
00253   }
00254 
00255   free(source);
00256   free(uni);
00257   free(lower);
00258   free(upper);
00259 
00260   return ccs;
00261 }
00262 
00263 
00264 struct lang_map lang2enc[] = {
00265   {"ca","ISO8859-1"},
00266   {"cs","ISO8859-2"},
00267   {"da","ISO8859-1"},
00268   {"de","ISO8859-1"},
00269   {"el","ISO8859-7"},
00270   {"en","ISO8859-1"},
00271   {"es","ISO8859-1"},
00272   {"fr","ISO8859-1"},
00273   {"hr","ISO8859-2"},
00274   {"hu","ISO8859-2"},
00275   {"it","ISO8859-1"},
00276   {"la","ISO8859-1"},
00277   {"lv","ISO8859-13"},
00278   {"nl","ISO8859-1"},
00279   {"pl","ISO8859-2"},
00280   {"pt","ISO8859-1"},
00281   {"sv","ISO8859-1"},
00282   {"ru","KOI8-R"},
00283   {"bg","microsoft-cp1251"},
00284 };
00285 
00286 
00287 const char * get_default_enc(const char * lang) {
00288   int n = sizeof(lang2enc) / sizeof(lang2enc[0]);
00289   for (int i = 0; i < n; i++) {
00290     if (strcmp(lang,lang2enc[i].lang) == 0) {
00291       return lang2enc[i].def_enc;
00292     }
00293   }
00294   return NULL;
00295 }