Back to index

php5  5.3.10
Defines | Enumerations | Functions | Variables
gb18030.c File Reference
#include "regenc.h"

Go to the source code of this file.

Defines

#define DEBUG_GB18030(arg)

Enumerations

enum  { C1, C2, C4, CM }
enum  state {
  S_START, S_one_C2, S_one_C4, S_one_CM,
  S_odd_CM_one_CX, S_even_CM_one_CX, S_one_CMC4, S_odd_CMC4,
  S_one_C4_odd_CMC4, S_even_CMC4, S_one_C4_even_CMC4, S_odd_CM_odd_CMC4,
  S_even_CM_odd_CMC4, S_odd_CM_even_CMC4, S_even_CM_even_CMC4, S_odd_C4CM,
  S_one_CM_odd_C4CM, S_even_C4CM, S_one_CM_even_C4CM, S_even_CM_odd_C4CM,
  S_odd_CM_odd_C4CM, S_even_CM_even_C4CM, S_odd_CM_even_C4CM
}

Functions

static int gb18030_mbc_enc_len (const UChar *p)
static OnigCodePoint gb18030_mbc_to_code (const UChar *p, const UChar *end)
static int gb18030_code_to_mbc (OnigCodePoint code, UChar *buf)
static int gb18030_mbc_to_normalize (OnigAmbigType flag, const UChar **pp, const UChar *end, UChar *lower)
static int gb18030_is_mbc_ambiguous (OnigAmbigType flag, const UChar **pp, const UChar *end)
static int gb18030_is_code_ctype (OnigCodePoint code, unsigned int ctype)
static UChargb18030_left_adjust_char_head (const UChar *start, const UChar *s)
static int gb18030_is_allowed_reverse_match (const UChar *s, const UChar *end)

Variables

static const char GB18030_MAP []
OnigEncodingType OnigEncodingGB18030

Define Documentation

#define DEBUG_GB18030 (   arg)

Definition at line 34 of file gb18030.c.


Enumeration Type Documentation

anonymous enum
Enumerator:
C1 
C2 
C4 
CM 

Definition at line 39 of file gb18030.c.

     {
  C1, /* one-byte char */
  C2, /* one-byte or second of two-byte char */
  C4, /* one-byte or second or fourth of four-byte char */
  CM  /* first of two- or four-byte char or second of two-byte char */
};
enum state
Enumerator:
S_START 
S_one_C2 
S_one_C4 
S_one_CM 
S_odd_CM_one_CX 
S_even_CM_one_CX 
S_one_CMC4 
S_odd_CMC4 
S_one_C4_odd_CMC4 
S_even_CMC4 
S_one_C4_even_CMC4 
S_odd_CM_odd_CMC4 
S_even_CM_odd_CMC4 
S_odd_CM_even_CMC4 
S_even_CM_even_CMC4 
S_odd_C4CM 
S_one_CM_odd_C4CM 
S_even_C4CM 
S_one_CM_even_C4CM 
S_even_CM_odd_C4CM 
S_odd_CM_odd_C4CM 
S_even_CM_even_C4CM 
S_odd_CM_even_C4CM 

Definition at line 110 of file gb18030.c.


Function Documentation

static int gb18030_code_to_mbc ( OnigCodePoint  code,
UChar buf 
) [static]

Definition at line 85 of file gb18030.c.

Here is the call graph for this function:

static int gb18030_is_allowed_reverse_match ( const UChar s,
const UChar end 
) [static]

Definition at line 470 of file gb18030.c.

{
  return GB18030_MAP[*s] == C1 ? TRUE : FALSE;
}
static int gb18030_is_code_ctype ( OnigCodePoint  code,
unsigned int  ctype 
) [static]

Definition at line 105 of file gb18030.c.

Here is the call graph for this function:

static int gb18030_is_mbc_ambiguous ( OnigAmbigType  flag,
const UChar **  pp,
const UChar end 
) [static]

Definition at line 99 of file gb18030.c.

Here is the call graph for this function:

static UChar* gb18030_left_adjust_char_head ( const UChar start,
const UChar s 
) [static]

Definition at line 145 of file gb18030.c.

{
  const UChar *p;
  enum state state = S_START;

  DEBUG_GB18030(("----------------\n"));
  for (p = s; p >= start; p--) {
    DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p));
    switch (state) {
    case S_START:
      switch (GB18030_MAP[*p]) {
      case C1:
       return (UChar *)s;
      case C2:
       state = S_one_C2; /* C2 */
       break;
      case C4:
       state = S_one_C4; /* C4 */
       break;
      case CM:
       state = S_one_CM; /* CM */
       break;
      }
      break;
    case S_one_C2: /* C2 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)s;
      case CM:
       state = S_odd_CM_one_CX; /* CM C2 */
       break;
      }
      break;
    case S_one_C4: /* C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)s;
      case CM:
       state = S_one_CMC4;
       break;
      }
      break;
    case S_one_CM: /* CM */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
       return (UChar *)s;
      case C4:
       state = S_odd_C4CM;
       break;
      case CM:
       state = S_odd_CM_one_CX; /* CM CM */
       break;
      }
      break;

    case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 1);
      case CM:
       state = S_even_CM_one_CX;
       break;
      }
      break;
    case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)s;
      case CM:
       state = S_odd_CM_one_CX;
       break;
      }
      break;

    case S_one_CMC4: /* CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
       return (UChar *)(s - 1);
      case C4:
       state = S_one_C4_odd_CMC4; /* C4 CM C4 */
       break;
      case CM:
       state = S_even_CM_one_CX; /* CM CM C4 */
       break;
      }
      break;
    case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
       return (UChar *)(s - 1);
      case C4:
       state = S_one_C4_odd_CMC4;
       break;
      case CM:
       state = S_odd_CM_odd_CMC4;
       break;
      }
      break;
    case S_one_C4_odd_CMC4: /* C4 CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 1);
      case CM:
       state = S_even_CMC4; /* CM C4 CM C4 */
       break;
      }
      break;
    case S_even_CMC4: /* CM C4 CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
       return (UChar *)(s - 3);
      case C4:
       state = S_one_C4_even_CMC4;
       break;
      case CM:
       state = S_odd_CM_even_CMC4;
       break;
      }
      break;
    case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 3);
      case CM:
       state = S_odd_CMC4;
       break;
      }
      break;

    case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 3);
      case CM:
       state = S_even_CM_odd_CMC4;
       break;
      }
      break;
    case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 1);
      case CM:
       state = S_odd_CM_odd_CMC4;
       break;
      }
      break;

    case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 1);
      case CM:
       state = S_even_CM_even_CMC4;
       break;
      }
      break;
    case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 3);
      case CM:
       state = S_odd_CM_even_CMC4;
       break;
      }
      break;

    case S_odd_C4CM: /* C4 CM */  /* C4 CM C4 CM C4 CM*/
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)s;
      case CM:
       state = S_one_CM_odd_C4CM; /* CM C4 CM */
       break;
      }
      break;
    case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
       return (UChar *)(s - 2); /* |CM C4 CM */
      case C4:
       state = S_even_C4CM;
       break;
      case CM:
       state = S_even_CM_odd_C4CM;
       break;
      }
      break;
    case S_even_C4CM: /* C4 CM C4 CM */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 2);  /* C4|CM C4 CM */
      case CM:
       state = S_one_CM_even_C4CM;
       break;
      }
      break;
    case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
       return (UChar *)(s - 0);  /*|CM C4 CM C4|CM */
      case C4:
       state = S_odd_C4CM;
       break;
      case CM:
       state = S_even_CM_even_C4CM;
       break;
      }
      break;

    case S_even_CM_odd_C4CM: /* CM CM C4 CM */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 0); /* |CM CM|C4|CM */
      case CM:
       state = S_odd_CM_odd_C4CM;
       break;
      }
      break;
    case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
      case CM:
       state = S_even_CM_odd_C4CM;
       break;
      }
      break;

    case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
      case CM:
       state = S_odd_CM_even_C4CM;
       break;
      }
      break;
    case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
      switch (GB18030_MAP[*p]) {
      case C1:
      case C2:
      case C4:
       return (UChar *)(s - 0);  /* |CM CM|CM C4 CM C4|CM */
      case CM:
       state = S_even_CM_even_C4CM;
       break;
      }
      break;
    }
  }

  DEBUG_GB18030(("state %d\n", state));
  switch (state) {
  case S_START:             return (UChar *)(s - 0);
  case S_one_C2:            return (UChar *)(s - 0);
  case S_one_C4:            return (UChar *)(s - 0);
  case S_one_CM:            return (UChar *)(s - 0);

  case S_odd_CM_one_CX:     return (UChar *)(s - 1);
  case S_even_CM_one_CX:    return (UChar *)(s - 0);

  case S_one_CMC4:          return (UChar *)(s - 1);
  case S_odd_CMC4:          return (UChar *)(s - 1);
  case S_one_C4_odd_CMC4:   return (UChar *)(s - 1);
  case S_even_CMC4:         return (UChar *)(s - 3);
  case S_one_C4_even_CMC4:  return (UChar *)(s - 3);

  case S_odd_CM_odd_CMC4:   return (UChar *)(s - 3);
  case S_even_CM_odd_CMC4:  return (UChar *)(s - 1);

  case S_odd_CM_even_CMC4:  return (UChar *)(s - 1);
  case S_even_CM_even_CMC4: return (UChar *)(s - 3);

  case S_odd_C4CM:          return (UChar *)(s - 0);
  case S_one_CM_odd_C4CM:   return (UChar *)(s - 2);
  case S_even_C4CM:         return (UChar *)(s - 2);
  case S_one_CM_even_C4CM:  return (UChar *)(s - 0);

  case S_even_CM_odd_C4CM:  return (UChar *)(s - 0);
  case S_odd_CM_odd_C4CM:   return (UChar *)(s - 2);
  case S_even_CM_even_C4CM: return (UChar *)(s - 2);
  case S_odd_CM_even_C4CM:  return (UChar *)(s - 0);
  }

  return (UChar* )s;  /* never come here. (escape warning) */
}
static int gb18030_mbc_enc_len ( const UChar p) [static]

Definition at line 66 of file gb18030.c.

{
  if (GB18030_MAP[*p] != CM)
    return 1;
  p++;
  if (GB18030_MAP[*p] == C4)
    return 4;
  if (GB18030_MAP[*p] == C1)
    return 1; /* illegal sequence */
  return 2;
}
static OnigCodePoint gb18030_mbc_to_code ( const UChar p,
const UChar end 
) [static]

Definition at line 79 of file gb18030.c.

Here is the call graph for this function:

static int gb18030_mbc_to_normalize ( OnigAmbigType  flag,
const UChar **  pp,
const UChar end,
UChar lower 
) [static]

Definition at line 91 of file gb18030.c.

Here is the call graph for this function:


Variable Documentation

const char GB18030_MAP[] [static]
Initial value:
 {
  C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
  C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
  C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
  C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
  C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
}

Definition at line 46 of file gb18030.c.