Back to index

lightning-sunbird  0.9+nobinonly
Defines | Functions
nsClassicDetectors.cpp File Reference
#include "nsIFactory.h"
#include "nsISupports.h"
#include "nsClassicCharDetDll.h"
#include "pratom.h"
#include "nsICharsetDetector.h"
#include "nsICharsetDetectionObserver.h"
#include "nsIStringCharsetDetector.h"
#include "nsClassicDetectors.h"

Go to the source code of this file.

Defines

#define SS2   0x8E /* Single Shift 2 */
#define SS3   0x8F /* Single Shift 3 */
#define IsRoman(c)   ((c) < 0x80)
#define IsSJIS2ndByte(c)   (((c) > 0x3F) && ((c) < 0xFD))
#define IsLoSJIS2ndByte(c)   (((c) > 0x3F) && ((c) < 0xA1))
#define IsHiSJIS2ndByte(c)   (((c) > 0xA0) && ((c) < 0xFD))
#define IsEUCJPKana(b1)   (((b1) > 0xA0) && ((b1) < 0xE0))
#define IsEUCJPKanji(b1or2)   (((b1or2) > 0xA0) && ((b1or2) < 0xFF))
#define YES   1
#define NO   0
#define MAYBE   -1

Functions

static int isSJIS (const unsigned char *cp, PRInt32 len)
static int isEUCJP (const unsigned char *cp, PRInt32 len)
static nsresult JA_AutoCharsetDetectBuffer (const char *aBuffer, const PRInt32 aLen, char *aCharset)

Define Documentation

#define IsEUCJPKana (   b1)    (((b1) > 0xA0) && ((b1) < 0xE0))

Definition at line 59 of file nsClassicDetectors.cpp.

#define IsEUCJPKanji (   b1or2)    (((b1or2) > 0xA0) && ((b1or2) < 0xFF))

Definition at line 60 of file nsClassicDetectors.cpp.

#define IsHiSJIS2ndByte (   c)    (((c) > 0xA0) && ((c) < 0xFD))

Definition at line 58 of file nsClassicDetectors.cpp.

#define IsLoSJIS2ndByte (   c)    (((c) > 0x3F) && ((c) < 0xA1))

Definition at line 57 of file nsClassicDetectors.cpp.

#define IsRoman (   c)    ((c) < 0x80)

Definition at line 55 of file nsClassicDetectors.cpp.

#define IsSJIS2ndByte (   c)    (((c) > 0x3F) && ((c) < 0xFD))

Definition at line 56 of file nsClassicDetectors.cpp.

#define MAYBE   -1

Definition at line 64 of file nsClassicDetectors.cpp.

#define NO   0

Definition at line 63 of file nsClassicDetectors.cpp.

#define SS2   0x8E /* Single Shift 2 */

Definition at line 53 of file nsClassicDetectors.cpp.

#define SS3   0x8F /* Single Shift 3 */

Definition at line 54 of file nsClassicDetectors.cpp.

#define YES   1

Definition at line 62 of file nsClassicDetectors.cpp.


Function Documentation

static int isEUCJP ( const unsigned char *  cp,
PRInt32  len 
) [static]

Definition at line 108 of file nsClassicDetectors.cpp.

{
       while (len) {
              if (IsRoman(*cp)) {                /* Roman                                         */
                     cp++, len--;
              } else if (*cp == SS2) {           /* EUCJP JIS201 half-width kana */
                     if (len > 1) {
                            if (IsEUCJPKana(cp[1]))
                                   cp += 2, len -= 2;          /* valid half-width kana */
                            else
                                   return NO;                         /* invalid 2of3 byte EUC */ 
                     } else
                            break;                                    /* buffer ended w/1of2 byte EUC    */
              } else if (*cp == SS3) {                  /* EUCJP JIS212                                  */
                      if (len > 1) {
                            if (IsEUCJPKanji(cp[1])) {
                                   if (len > 2) {
                                          if (IsEUCJPKanji(cp[2]))
                                                 cp += 2, len -= 2;   /* valid 3 byte EUCJP              */
                                          else
                                                 return NO;           /* invalid 3of3 byte EUCJP  */
                                   } else
                                          break;                      /* buffer ended w/2of3 byte EUCJP */
                            } else
                                   return NO;                         /* invalid 2of3 byte EUCJP  */
                     } else
                            break;                                    /* buffer ended w/1of3 byte EUCJP */
              } else if (*cp == 0xA0) {                 /* illegal EUCJP byte              */
#if ALLOW_NBSP
                     cp++, len--; /* allow nbsp */
#else
                     return NO;
#endif
              } else if (*cp < 0xF0) {           /* EUCJP JIS208 (overlaps SJIS)           */
                     if (len > 1) {
                            if (IsEUCJPKanji(cp[1]))
                                   cp += 2, len -= 2;          /* valid 2 byte EUCJP              */
                            else
                                   return NO;                         /* invalid 2of2 byte EUCJP  */
                     } else
                            break;                                    /* buffer ended w/1of2 byte EUCJP */
              } else if (*cp < 0xFF) {           /* EUCJP JIS208 only:                     */
                     if (len > 1) {
                            if (IsEUCJPKanji(cp[1]))
                                   return YES;                 /* valid 2 byte EUCJP, invalid SJIS       */
                            else
                                   return NO;                         /* invalid 2of2 byte EUCJP  */
                     } else
                            break;                                    /* buffer ended w/1of2 byte EUCJP */
              } else {
                     return NO;                                /* invalid EUCJP 1st byte: 0xFF    */
              }
       }
       return MAYBE;
}

Here is the caller graph for this function:

static int isSJIS ( const unsigned char *  cp,
PRInt32  len 
) [static]

Definition at line 67 of file nsClassicDetectors.cpp.

{
       while (len) {
              if (IsRoman(*cp)) {
                     cp++, len--;
              } else if (*cp == 0x80) {          /* illegal SJIS 1st byte                  */
                     return NO;
              } else if ((*cp < 0xA0)) {         /* byte 1 of 2byte SJIS 1st range  */
                     if (len > 1) {
                            if (IsSJIS2ndByte(cp[1])) {
                                   if ((*cp != 0x8E && *cp != 0x8F) || (*(cp+1) <= 0xA0))
                                          return YES;
                                   cp += 2, len -= 2;   /* valid 2 byte SJIS                      */
                            } else {
                                   return NO;                  /* invalid SJIS      2nd byte                    */
                            }
                     } else
                            break;                                    /* buffer ended w/1of2 byte SJIS */
              } else if (*cp == 0xA0) {                 /* illegal EUCJP byte              */
#if ALLOW_NBSP
                     cp++, len--; /* allow nbsp */
#endif
              } else if (*cp < 0xE0) {           /* SJIS half-width kana                          */
                     cp++, len--;
              } else if (*cp < 0xF0) {           /* byte 1 of 2byte SJIS      2nd range    */
                     if (len > 1) {
                            if (IsSJIS2ndByte(cp[1])) {
                                   cp += 2, len -= 2;   /* valid 2 byte SJIS                      */
                            } else {
                                   return NO;                  /* invalid SJIS                                         */
                            }
                     } else
                            break;                             /* buffer ended w/1of2 byte SJIS   */
              } else {
                     return NO;                                /* invalid SJIS 1st byte                  */
              }
       }
       return MAYBE;                                    /* No illegal SJIS values found           */
}

Here is the caller graph for this function:

static nsresult JA_AutoCharsetDetectBuffer ( const char *  aBuffer,
const PRInt32  aLen,
char *  aCharset 
) [static]

Definition at line 166 of file nsClassicDetectors.cpp.

{
  PRBool hasEsc = PR_FALSE;
  PRBool asciiOnly = PR_TRUE;

  PL_strcpy(aCharset, "ISO-8859-1");

  // check 8 bit or ESC
  for (int i = 0; i < aLen; i++) {
    if ((unsigned char) aBuffer[i] > 127 || aBuffer[i] == 0x1B) {
      if (aBuffer[i] == 0x1B) {
        hasEsc = PR_TRUE;
        break;
      }
      else {
        asciiOnly = PR_FALSE;
      }
    }
  }

  if (hasEsc) {
    PL_strcpy(aCharset, "ISO-2022-JP");
  }
  else if (!asciiOnly) {
    // use old japanese auto detect code
    int euc, sjis;
    euc = isEUCJP((unsigned char *) aBuffer, aLen);
    sjis = isSJIS((unsigned char *) aBuffer, aLen);
    if (YES == euc) {
      PL_strcpy(aCharset, "EUC-JP");
    }
    else if (YES == sjis) {
      PL_strcpy(aCharset, "Shift_JIS");
    }
    else if (MAYBE == euc && NO == sjis) {
      PL_strcpy(aCharset, "EUC-JP");
    }
    else if (MAYBE == sjis && NO == euc) {
      PL_strcpy(aCharset, "Shift_JIS");
    }
    else if (MAYBE == euc && MAYBE == sjis) {
      PL_strcpy(aCharset, "EUC-JP");
    }
  }

  return NS_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function: