Back to index

python3.2  3.2.2
Classes | Defines | Typedefs | Functions | Variables
_codecs_iso2022.c File Reference
#include "cjkcodecs.h"
#include "alg_jisx0201.h"
#include "emu_jisx0213_2000.h"
#include "mappings_jisx0213_pair.h"

Go to the source code of this file.

Classes

struct  iso2022_designation
struct  iso2022_config

Defines

#define USING_IMPORTED_MAPS
#define USING_BINARY_PAIR_SEARCH
#define EXTERN_JISX0213_PAIR
#define EMULATE_JISX0213_2000_ENCODE_INVALID   MAP_UNMAPPABLE
#define EMULATE_JISX0213_2000_DECODE_INVALID   MAP_UNMAPPABLE
#define ESC   0x1B
#define SO   0x0E
#define SI   0x0F
#define LF   0x0A
#define MAX_ESCSEQLEN   16
#define CHARSET_ISO8859_1   'A'
#define CHARSET_ASCII   'B'
#define CHARSET_ISO8859_7   'F'
#define CHARSET_JISX0201_K   'I'
#define CHARSET_JISX0201_R   'J'
#define CHARSET_GB2312   ('A'|CHARSET_DBCS)
#define CHARSET_JISX0208   ('B'|CHARSET_DBCS)
#define CHARSET_KSX1001   ('C'|CHARSET_DBCS)
#define CHARSET_JISX0212   ('D'|CHARSET_DBCS)
#define CHARSET_GB2312_8565   ('E'|CHARSET_DBCS)
#define CHARSET_CNS11643_1   ('G'|CHARSET_DBCS)
#define CHARSET_CNS11643_2   ('H'|CHARSET_DBCS)
#define CHARSET_JISX0213_2000_1   ('O'|CHARSET_DBCS)
#define CHARSET_JISX0213_2   ('P'|CHARSET_DBCS)
#define CHARSET_JISX0213_2004_1   ('Q'|CHARSET_DBCS)
#define CHARSET_JISX0208_O   ('@'|CHARSET_DBCS)
#define CHARSET_DBCS   0x80
#define ESCMARK(mark)   ((mark) & 0x7f)
#define IS_ESCEND(c)   (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
#define IS_ISO2022ESC(c2)
#define MAP_UNMAPPABLE   0xFFFF
#define MAP_MULTIPLE_AVAIL   0xFFFE /* for JIS X 0213 */
#define F_SHIFTED   0x01
#define F_ESCTHROUGHOUT   0x02
#define STATE_SETG(dn, v)   ((state)->c[dn]) = (v);
#define STATE_GETG(dn)   ((state)->c[dn])
#define STATE_G0   STATE_GETG(0)
#define STATE_G1   STATE_GETG(1)
#define STATE_G2   STATE_GETG(2)
#define STATE_G3   STATE_GETG(3)
#define STATE_SETG0(v)   STATE_SETG(0, v)
#define STATE_SETG1(v)   STATE_SETG(1, v)
#define STATE_SETG2(v)   STATE_SETG(2, v)
#define STATE_SETG3(v)   STATE_SETG(3, v)
#define STATE_SETFLAG(f)   ((state)->c[4]) |= (f);
#define STATE_GETFLAG(f)   ((state)->c[4] & (f))
#define STATE_CLEARFLAG(f)   ((state)->c[4]) &= ~(f);
#define STATE_CLEARFLAGS()   ((state)->c[4]) = 0;
#define ISO2022_CONFIG   ((const struct iso2022_config *)config)
#define CONFIG_ISSET(flag)   (ISO2022_CONFIG->flags & (flag))
#define CONFIG_DESIGNATIONS   (ISO2022_CONFIG->designations)
#define NO_SHIFT   0x01
#define USE_G2   0x02
#define USE_JISX0208_EXT   0x04
#define ISO8859_7_DECODE(c, assi)
#define ENCMAP(enc)   static const encode_map *enc##_encmap = NULL;
#define DECMAP(enc)   static const decode_map *enc##_decmap = NULL;
#define config   ((void *)2000)
#define REGISTRY_KSX1001_G0
#define REGISTRY_KSX1001_G1
#define REGISTRY_JISX0201_R
#define REGISTRY_JISX0201_K
#define REGISTRY_JISX0208
#define REGISTRY_JISX0208_O
#define REGISTRY_JISX0212
#define REGISTRY_JISX0213_2000_1
#define REGISTRY_JISX0213_2000_1_PAIRONLY
#define REGISTRY_JISX0213_2000_2
#define REGISTRY_JISX0213_2004_1
#define REGISTRY_JISX0213_2004_1_PAIRONLY
#define REGISTRY_JISX0213_2004_2
#define REGISTRY_GB2312
#define REGISTRY_CNS11643_1
#define REGISTRY_CNS11643_2
#define REGISTRY_ISO8859_1
#define REGISTRY_ISO8859_7
#define REGISTRY_SENTINEL   { 0, }
#define CONFIGDEF(var, attrs)
#define ISO2022_CODEC(variation)

Typedefs

typedef int(* iso2022_init_func )(void)
typedef ucs4_t(* iso2022_decode_func )(const unsigned char *data)
typedef DBCHAR(* iso2022_encode_func )(const ucs4_t *data, Py_ssize_t *length)

Functions

 CODEC_INIT (iso2022)
 ENCODER_INIT (iso2022)
 ENCODER_RESET (iso2022)
 ENCODER (iso2022)
 DECODER_INIT (iso2022)
 DECODER_RESET (iso2022)
static Py_ssize_t iso2022processesc (const void *config, MultibyteCodec_State *state, const unsigned char **inbuf, Py_ssize_t *inleft)
static Py_ssize_t iso2022processg2 (const void *config, MultibyteCodec_State *state, const unsigned char **inbuf, Py_ssize_t *inleft, Py_UNICODE **outbuf, Py_ssize_t *outleft)
 DECODER (iso2022)
static int ksx1001_init (void)
static ucs4_t ksx1001_decoder (const unsigned char *data)
static DBCHAR ksx1001_encoder (const ucs4_t *data, Py_ssize_t *length)
static int jisx0208_init (void)
static ucs4_t jisx0208_decoder (const unsigned char *data)
static DBCHAR jisx0208_encoder (const ucs4_t *data, Py_ssize_t *length)
static int jisx0212_init (void)
static ucs4_t jisx0212_decoder (const unsigned char *data)
static DBCHAR jisx0212_encoder (const ucs4_t *data, Py_ssize_t *length)
static int jisx0213_init (void)
static ucs4_t jisx0213_2000_1_decoder (const unsigned char *data)
static ucs4_t jisx0213_2000_2_decoder (const unsigned char *data)
static ucs4_t jisx0213_2004_1_decoder (const unsigned char *data)
static ucs4_t jisx0213_2004_2_decoder (const unsigned char *data)
static DBCHAR jisx0213_encoder (const ucs4_t *data, Py_ssize_t *length, void *config)
static DBCHAR jisx0213_2000_1_encoder (const ucs4_t *data, Py_ssize_t *length)
static DBCHAR jisx0213_2000_1_encoder_paironly (const ucs4_t *data, Py_ssize_t *length)
static DBCHAR jisx0213_2000_2_encoder (const ucs4_t *data, Py_ssize_t *length)
static DBCHAR jisx0213_2004_1_encoder (const ucs4_t *data, Py_ssize_t *length)
static DBCHAR jisx0213_2004_1_encoder_paironly (const ucs4_t *data, Py_ssize_t *length)
static DBCHAR jisx0213_2004_2_encoder (const ucs4_t *data, Py_ssize_t *length)
static ucs4_t jisx0201_r_decoder (const unsigned char *data)
static DBCHAR jisx0201_r_encoder (const ucs4_t *data, Py_ssize_t *length)
static ucs4_t jisx0201_k_decoder (const unsigned char *data)
static DBCHAR jisx0201_k_encoder (const ucs4_t *data, Py_ssize_t *length)
static int gb2312_init (void)
static ucs4_t gb2312_decoder (const unsigned char *data)
static DBCHAR gb2312_encoder (const ucs4_t *data, Py_ssize_t *length)
static ucs4_t dummy_decoder (const unsigned char *data)
static DBCHAR dummy_encoder (const ucs4_t *data, Py_ssize_t *length)

Variables

static struct iso2022_designation []

Class Documentation

struct iso2022_designation

Definition at line 108 of file _codecs_iso2022.c.

Class Members
iso2022_decode_func decoder
iso2022_encode_func encoder
iso2022_init_func initializer
unsigned char mark
unsigned char plane
unsigned char width
struct iso2022_config

Definition at line 117 of file _codecs_iso2022.c.

Collaboration diagram for iso2022_config:
Class Members
struct iso2022_designation * designations
int flags

Define Documentation

#define CHARSET_ASCII   'B'

Definition at line 43 of file _codecs_iso2022.c.

#define CHARSET_CNS11643_1   ('G'|CHARSET_DBCS)

Definition at line 53 of file _codecs_iso2022.c.

#define CHARSET_CNS11643_2   ('H'|CHARSET_DBCS)

Definition at line 54 of file _codecs_iso2022.c.

#define CHARSET_DBCS   0x80

Definition at line 60 of file _codecs_iso2022.c.

#define CHARSET_GB2312   ('A'|CHARSET_DBCS)

Definition at line 48 of file _codecs_iso2022.c.

#define CHARSET_GB2312_8565   ('E'|CHARSET_DBCS)

Definition at line 52 of file _codecs_iso2022.c.

#define CHARSET_ISO8859_1   'A'

Definition at line 42 of file _codecs_iso2022.c.

#define CHARSET_ISO8859_7   'F'

Definition at line 44 of file _codecs_iso2022.c.

#define CHARSET_JISX0201_K   'I'

Definition at line 45 of file _codecs_iso2022.c.

#define CHARSET_JISX0201_R   'J'

Definition at line 46 of file _codecs_iso2022.c.

#define CHARSET_JISX0208   ('B'|CHARSET_DBCS)

Definition at line 49 of file _codecs_iso2022.c.

#define CHARSET_JISX0208_O   ('@'|CHARSET_DBCS)

Definition at line 58 of file _codecs_iso2022.c.

#define CHARSET_JISX0212   ('D'|CHARSET_DBCS)

Definition at line 51 of file _codecs_iso2022.c.

#define CHARSET_JISX0213_2   ('P'|CHARSET_DBCS)

Definition at line 56 of file _codecs_iso2022.c.

Definition at line 55 of file _codecs_iso2022.c.

Definition at line 57 of file _codecs_iso2022.c.

#define CHARSET_KSX1001   ('C'|CHARSET_DBCS)

Definition at line 50 of file _codecs_iso2022.c.

#define config   ((void *)2000)

Definition at line 707 of file _codecs_iso2022.c.

#define CONFIG_DESIGNATIONS   (ISO2022_CONFIG->designations)

Definition at line 95 of file _codecs_iso2022.c.

#define CONFIG_ISSET (   flag)    (ISO2022_CONFIG->flags & (flag))

Definition at line 94 of file _codecs_iso2022.c.

#define CONFIGDEF (   var,
  attrs 
)
Value:
static const struct iso2022_config iso2022_##var##_config = {       \
        attrs, iso2022_##var##_designations                             \
    };

Definition at line 1062 of file _codecs_iso2022.c.

#define DECMAP (   enc)    static const decode_map *enc##_decmap = NULL;

Definition at line 542 of file _codecs_iso2022.c.

Definition at line 11 of file _codecs_iso2022.c.

Definition at line 10 of file _codecs_iso2022.c.

#define ENCMAP (   enc)    static const encode_map *enc##_encmap = NULL;

Definition at line 541 of file _codecs_iso2022.c.

#define ESC   0x1B

Definition at line 35 of file _codecs_iso2022.c.

#define ESCMARK (   mark)    ((mark) & 0x7f)

Definition at line 61 of file _codecs_iso2022.c.

Definition at line 9 of file _codecs_iso2022.c.

#define F_ESCTHROUGHOUT   0x02

Definition at line 74 of file _codecs_iso2022.c.

#define F_SHIFTED   0x01

Definition at line 73 of file _codecs_iso2022.c.

#define IS_ESCEND (   c)    (((c) >= 'A' && (c) <= 'Z') || (c) == '@')

Definition at line 63 of file _codecs_iso2022.c.

#define IS_ISO2022ESC (   c2)
Value:
((c2) == '(' || (c2) == ')' || (c2) == '$' || \
         (c2) == '.' || (c2) == '&')

Definition at line 64 of file _codecs_iso2022.c.

#define ISO2022_CODEC (   variation)
Value:
{              \
    "iso2022_" #variation,                      \
    &iso2022_##variation##_config,              \
    iso2022_codec_init,                         \
    _STATEFUL_METHODS(iso2022)                  \
},

Definition at line 1114 of file _codecs_iso2022.c.

#define ISO2022_CONFIG   ((const struct iso2022_config *)config)

Definition at line 93 of file _codecs_iso2022.c.

#define ISO8859_7_DECODE (   c,
  assi 
)
Value:
if ((c) < 0xa0) (assi) = (c);                                       \
    else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0))))          \
        (assi) = (c);                                                   \
    else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 ||              \
             (0xbffffd77L & (1L << ((c)-0xb4)))))                       \
        (assi) = 0x02d0 + (c);                                          \
    else if ((c) == 0xa1) (assi) = 0x2018;                              \
    else if ((c) == 0xa2) (assi) = 0x2019;                              \
    else if ((c) == 0xaf) (assi) = 0x2015;

Definition at line 379 of file _codecs_iso2022.c.

#define LF   0x0A

Definition at line 38 of file _codecs_iso2022.c.

#define MAP_MULTIPLE_AVAIL   0xFFFE /* for JIS X 0213 */

Definition at line 71 of file _codecs_iso2022.c.

#define MAP_UNMAPPABLE   0xFFFF

Definition at line 70 of file _codecs_iso2022.c.

#define MAX_ESCSEQLEN   16

Definition at line 40 of file _codecs_iso2022.c.

#define NO_SHIFT   0x01

Definition at line 98 of file _codecs_iso2022.c.

Value:
{ CHARSET_CNS11643_1, 1, 2,             \
                  cns11643_init,                                        \
                  cns11643_1_decoder, cns11643_1_encoder }

Definition at line 1051 of file _codecs_iso2022.c.

Value:
{ CHARSET_CNS11643_2, 2, 2,             \
                  cns11643_init,                                        \
                  cns11643_2_decoder, cns11643_2_encoder }

Definition at line 1054 of file _codecs_iso2022.c.

#define REGISTRY_GB2312
Value:
{ CHARSET_GB2312, 0, 2,                 \
                  gb2312_init,                                          \
                  gb2312_decoder, gb2312_encoder }

Definition at line 1048 of file _codecs_iso2022.c.

Value:

Definition at line 1057 of file _codecs_iso2022.c.

Value:

Definition at line 1059 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0201_K, 0, 1,             \
                  NULL,                                                 \
                  jisx0201_k_decoder, jisx0201_k_encoder }

Definition at line 1012 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0201_R, 0, 1,             \
                  NULL,                                                 \
                  jisx0201_r_decoder, jisx0201_r_encoder }

Definition at line 1009 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0208, 0, 2,               \
                  jisx0208_init,                                        \
                  jisx0208_decoder, jisx0208_encoder }

Definition at line 1015 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0208_O, 0, 2,             \
                  jisx0208_init,                                        \
                  jisx0208_decoder, jisx0208_encoder }

Definition at line 1018 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0212, 0, 2,               \
                  jisx0212_init,                                        \
                  jisx0212_decoder, jisx0212_encoder }

Definition at line 1021 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0213_2000_1, 0, 2,       \
                  jisx0213_init,                                        \
                  jisx0213_2000_1_decoder,                              \
                  jisx0213_2000_1_encoder }

Definition at line 1024 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0213_2000_1, 0, 2, \
                  jisx0213_init,                                        \
                  jisx0213_2000_1_decoder,                              \
                  jisx0213_2000_1_encoder_paironly }

Definition at line 1028 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0213_2, 0, 2,            \
                  jisx0213_init,                                        \
                  jisx0213_2000_2_decoder,                              \
                  jisx0213_2000_2_encoder }

Definition at line 1032 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0213_2004_1, 0, 2,       \
                  jisx0213_init,                                        \
                  jisx0213_2004_1_decoder,                              \
                  jisx0213_2004_1_encoder }

Definition at line 1036 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0213_2004_1, 0, 2, \
                  jisx0213_init,                                        \
                  jisx0213_2004_1_decoder,                              \
                  jisx0213_2004_1_encoder_paironly }

Definition at line 1040 of file _codecs_iso2022.c.

Value:
{ CHARSET_JISX0213_2, 0, 2,            \
                  jisx0213_init,                                        \
                  jisx0213_2004_2_decoder,                              \
                  jisx0213_2004_2_encoder }

Definition at line 1044 of file _codecs_iso2022.c.

Value:
{ CHARSET_KSX1001, 0, 2,                \
                  ksx1001_init,                                         \
                  ksx1001_decoder, ksx1001_encoder }

Definition at line 1003 of file _codecs_iso2022.c.

Value:
{ CHARSET_KSX1001, 1, 2,                \
                  ksx1001_init,                                         \
                  ksx1001_decoder, ksx1001_encoder }

Definition at line 1006 of file _codecs_iso2022.c.

#define REGISTRY_SENTINEL   { 0, }

Definition at line 1061 of file _codecs_iso2022.c.

#define SI   0x0F

Definition at line 37 of file _codecs_iso2022.c.

#define SO   0x0E

Definition at line 36 of file _codecs_iso2022.c.

#define STATE_CLEARFLAG (   f)    ((state)->c[4]) &= ~(f);

Definition at line 90 of file _codecs_iso2022.c.

#define STATE_CLEARFLAGS ( )    ((state)->c[4]) = 0;

Definition at line 91 of file _codecs_iso2022.c.

#define STATE_G0   STATE_GETG(0)

Definition at line 79 of file _codecs_iso2022.c.

#define STATE_G1   STATE_GETG(1)

Definition at line 80 of file _codecs_iso2022.c.

#define STATE_G2   STATE_GETG(2)

Definition at line 81 of file _codecs_iso2022.c.

#define STATE_G3   STATE_GETG(3)

Definition at line 82 of file _codecs_iso2022.c.

#define STATE_GETFLAG (   f)    ((state)->c[4] & (f))

Definition at line 89 of file _codecs_iso2022.c.

#define STATE_GETG (   dn)    ((state)->c[dn])

Definition at line 77 of file _codecs_iso2022.c.

#define STATE_SETFLAG (   f)    ((state)->c[4]) |= (f);

Definition at line 88 of file _codecs_iso2022.c.

#define STATE_SETG (   dn,
  v 
)    ((state)->c[dn]) = (v);

Definition at line 76 of file _codecs_iso2022.c.

#define STATE_SETG0 (   v)    STATE_SETG(0, v)

Definition at line 83 of file _codecs_iso2022.c.

#define STATE_SETG1 (   v)    STATE_SETG(1, v)

Definition at line 84 of file _codecs_iso2022.c.

#define STATE_SETG2 (   v)    STATE_SETG(2, v)

Definition at line 85 of file _codecs_iso2022.c.

#define STATE_SETG3 (   v)    STATE_SETG(3, v)

Definition at line 86 of file _codecs_iso2022.c.

#define USE_G2   0x02

Definition at line 99 of file _codecs_iso2022.c.

#define USE_JISX0208_EXT   0x04

Definition at line 100 of file _codecs_iso2022.c.

Definition at line 8 of file _codecs_iso2022.c.

Definition at line 7 of file _codecs_iso2022.c.


Typedef Documentation

typedef ucs4_t(* iso2022_decode_func)(const unsigned char *data)

Definition at line 105 of file _codecs_iso2022.c.

Definition at line 106 of file _codecs_iso2022.c.

typedef int(* iso2022_init_func)(void)

Definition at line 104 of file _codecs_iso2022.c.


Function Documentation

CODEC_INIT ( iso2022  )

Definition at line 124 of file _codecs_iso2022.c.

{
    const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;
    for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)
        if (desig->initializer != NULL && desig->initializer() != 0)
            return -1;
    return 0;
}
DECODER ( iso2022  )

Definition at line 421 of file _codecs_iso2022.c.

{
    const struct iso2022_designation *dsgcache = NULL;

    while (inleft > 0) {
        unsigned char c = IN1;
        Py_ssize_t err;

        if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
            /* ESC throughout mode:
             * for non-iso2022 escape sequences */
            WRITE1(c) /* assume as ISO-8859-1 */
            NEXT(1, 1)
            if (IS_ESCEND(c)) {
                STATE_CLEARFLAG(F_ESCTHROUGHOUT)
            }
            continue;
        }

        switch (c) {
        case ESC:
            REQUIRE_INBUF(2)
            if (IS_ISO2022ESC(IN2)) {
                err = iso2022processesc(config, state,
                                        inbuf, &inleft);
                if (err != 0)
                    return err;
            }
            else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
                REQUIRE_INBUF(3)
                err = iso2022processg2(config, state,
                    inbuf, &inleft, outbuf, &outleft);
                if (err != 0)
                    return err;
            }
            else {
                WRITE1(ESC)
                STATE_SETFLAG(F_ESCTHROUGHOUT)
                NEXT(1, 1)
            }
            break;
        case SI:
            if (CONFIG_ISSET(NO_SHIFT))
                goto bypass;
            STATE_CLEARFLAG(F_SHIFTED)
            NEXT_IN(1)
            break;
        case SO:
            if (CONFIG_ISSET(NO_SHIFT))
                goto bypass;
            STATE_SETFLAG(F_SHIFTED)
            NEXT_IN(1)
            break;
        case LF:
            STATE_CLEARFLAG(F_SHIFTED)
            WRITE1(LF)
            NEXT(1, 1)
            break;
        default:
            if (c < 0x20) /* C0 */
                goto bypass;
            else if (c >= 0x80)
                return 1;
            else {
                const struct iso2022_designation *dsg;
                unsigned char charset;
                ucs4_t decoded;

                if (STATE_GETFLAG(F_SHIFTED))
                    charset = STATE_G1;
                else
                    charset = STATE_G0;

                if (charset == CHARSET_ASCII) {
bypass:                                 WRITE1(c)
                                        NEXT(1, 1)
                                        break;
                                }

                                if (dsgcache != NULL &&
                                    dsgcache->mark == charset)
                                        dsg = dsgcache;
                                else {
                                        for (dsg = CONFIG_DESIGNATIONS;
                                             dsg->mark != charset
#ifdef Py_DEBUG
                                                && dsg->mark != '\0'
#endif
                                             ;dsg++)
                                                /* noop */;
                                        assert(dsg->mark != '\0');
                                        dsgcache = dsg;
                                }

                                REQUIRE_INBUF(dsg->width)
                                decoded = dsg->decoder(*inbuf);
                                if (decoded == MAP_UNMAPPABLE)
                                        return dsg->width;

                                if (decoded < 0x10000) {
                                        WRITE1(decoded)
                                        NEXT_OUT(1)
                                }
                                else if (decoded < 0x30000) {
                                        WRITEUCS4(decoded)
                                }
                                else { /* JIS X 0213 pairs */
                    WRITE2(decoded >> 16, decoded & 0xffff)
                    NEXT_OUT(2)
                }
                NEXT_IN(dsg->width)
            }
            break;
        }
    }
    return 0;
}

Here is the call graph for this function:

DECODER_INIT ( iso2022  )
DECODER_RESET ( iso2022  )

Definition at line 295 of file _codecs_iso2022.c.

static ucs4_t dummy_decoder ( const unsigned char *  data) [static]

Definition at line 990 of file _codecs_iso2022.c.

{
    return MAP_UNMAPPABLE;
}
static DBCHAR dummy_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 996 of file _codecs_iso2022.c.

{
    return MAP_UNMAPPABLE;
}
ENCODER ( iso2022  )

Definition at line 156 of file _codecs_iso2022.c.

{
    while (inleft > 0) {
        const struct iso2022_designation *dsg;
        DBCHAR encoded;
        ucs4_t c = **inbuf;
        Py_ssize_t insize;

        if (c < 0x80) {
            if (STATE_G0 != CHARSET_ASCII) {
                WRITE3(ESC, '(', 'B')
                STATE_SETG0(CHARSET_ASCII)
                NEXT_OUT(3)
            }
            if (STATE_GETFLAG(F_SHIFTED)) {
                WRITE1(SI)
                STATE_CLEARFLAG(F_SHIFTED)
                NEXT_OUT(1)
            }
            WRITE1((unsigned char)c)
            NEXT(1, 1)
            continue;
        }

        DECODE_SURROGATE(c)
        insize = GET_INSIZE(c);

        encoded = MAP_UNMAPPABLE;
        for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
            Py_ssize_t length = 1;
            encoded = dsg->encoder(&c, &length);
            if (encoded == MAP_MULTIPLE_AVAIL) {
                /* this implementation won't work for pair
                 * of non-bmp characters. */
                if (inleft < 2) {
                    if (!(flags & MBENC_FLUSH))
                        return MBERR_TOOFEW;
                    length = -1;
                }
                else
                    length = 2;
#if Py_UNICODE_SIZE == 2
                if (length == 2) {
                    ucs4_t u4in[2];
                    u4in[0] = (ucs4_t)IN1;
                    u4in[1] = (ucs4_t)IN2;
                    encoded = dsg->encoder(u4in, &length);
                } else
                    encoded = dsg->encoder(&c, &length);
#else
                encoded = dsg->encoder(&c, &length);
#endif
                if (encoded != MAP_UNMAPPABLE) {
                    insize = length;
                    break;
                }
            }
            else if (encoded != MAP_UNMAPPABLE)
                break;
        }

        if (!dsg->mark)
            return 1;
        assert(dsg->width == 1 || dsg->width == 2);

        switch (dsg->plane) {
        case 0: /* G0 */
            if (STATE_GETFLAG(F_SHIFTED)) {
                WRITE1(SI)
                STATE_CLEARFLAG(F_SHIFTED)
                NEXT_OUT(1)
            }
            if (STATE_G0 != dsg->mark) {
                if (dsg->width == 1) {
                    WRITE3(ESC, '(', ESCMARK(dsg->mark))
                    STATE_SETG0(dsg->mark)
                    NEXT_OUT(3)
                }
                else if (dsg->mark == CHARSET_JISX0208) {
                    WRITE3(ESC, '$', ESCMARK(dsg->mark))
                    STATE_SETG0(dsg->mark)
                    NEXT_OUT(3)
                }
                else {
                    WRITE4(ESC, '$', '(',
                        ESCMARK(dsg->mark))
                    STATE_SETG0(dsg->mark)
                    NEXT_OUT(4)
                }
            }
            break;
        case 1: /* G1 */
            if (STATE_G1 != dsg->mark) {
                if (dsg->width == 1) {
                    WRITE3(ESC, ')', ESCMARK(dsg->mark))
                    STATE_SETG1(dsg->mark)
                    NEXT_OUT(3)
                }
                else {
                    WRITE4(ESC, '$', ')',
                        ESCMARK(dsg->mark))
                    STATE_SETG1(dsg->mark)
                    NEXT_OUT(4)
                }
            }
            if (!STATE_GETFLAG(F_SHIFTED)) {
                WRITE1(SO)
                STATE_SETFLAG(F_SHIFTED)
                NEXT_OUT(1)
            }
            break;
        default: /* G2 and G3 is not supported: no encoding in
                  * CJKCodecs are using them yet */
            return MBERR_INTERNAL;
        }

        if (dsg->width == 1) {
            WRITE1((unsigned char)encoded)
            NEXT_OUT(1)
        }
        else {
            WRITE2(encoded >> 8, encoded & 0xff)
            NEXT_OUT(2)
        }
        NEXT_IN(insize)
    }

    return 0;
}
ENCODER_INIT ( iso2022  )
ENCODER_RESET ( iso2022  )

Definition at line 141 of file _codecs_iso2022.c.

{
    if (STATE_GETFLAG(F_SHIFTED)) {
        WRITE1(SI)
        NEXT_OUT(1)
        STATE_CLEARFLAG(F_SHIFTED)
    }
    if (STATE_G0 != CHARSET_ASCII) {
        WRITE3(ESC, '(', 'B')
        NEXT_OUT(3)
        STATE_SETG0(CHARSET_ASCII)
    }
    return 0;
}
static ucs4_t gb2312_decoder ( const unsigned char *  data) [static]

Definition at line 965 of file _codecs_iso2022.c.

{
    ucs4_t u;
    TRYMAP_DEC(gb2312, u, data[0], data[1])
        return u;
    else
        return MAP_UNMAPPABLE;
}
static DBCHAR gb2312_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 975 of file _codecs_iso2022.c.

{
    DBCHAR coded;
    assert(*length == 1);
    if (*data < 0x10000) {
        TRYMAP_ENC(gbcommon, coded, *data) {
            if (!(coded & 0x8000))
                return coded;
        }
    }
    return MAP_UNMAPPABLE;
}
static int gb2312_init ( void  ) [static]

Definition at line 952 of file _codecs_iso2022.c.

{
    static int initialized = 0;

    if (!initialized && (
                    IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) ||
                    IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap)))
        return -1;
    initialized = 1;
    return 0;
}
static Py_ssize_t iso2022processesc ( const void config,
MultibyteCodec_State state,
const unsigned char **  inbuf,
Py_ssize_t inleft 
) [static]

Definition at line 303 of file _codecs_iso2022.c.

{
    unsigned char charset, designation;
    Py_ssize_t i, esclen;

    for (i = 1;i < MAX_ESCSEQLEN;i++) {
        if (i >= *inleft)
            return MBERR_TOOFEW;
        if (IS_ESCEND((*inbuf)[i])) {
            esclen = i + 1;
            break;
        }
        else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&
                 (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')
            i += 2;
    }

    if (i >= MAX_ESCSEQLEN)
        return 1; /* unterminated escape sequence */

    switch (esclen) {
    case 3:
        if (IN2 == '$') {
            charset = IN3 | CHARSET_DBCS;
            designation = 0;
        }
        else {
            charset = IN3;
            if (IN2 == '(') designation = 0;
            else if (IN2 == ')') designation = 1;
            else if (CONFIG_ISSET(USE_G2) && IN2 == '.')
                designation = 2;
            else return 3;
        }
        break;
    case 4:
        if (IN2 != '$')
            return 4;

        charset = IN4 | CHARSET_DBCS;
        if (IN3 == '(') designation = 0;
        else if (IN3 == ')') designation = 1;
        else return 4;
        break;
    case 6: /* designation with prefix */
        if (CONFIG_ISSET(USE_JISX0208_EXT) &&
            (*inbuf)[3] == ESC && (*inbuf)[4] == '$' &&
            (*inbuf)[5] == 'B') {
            charset = 'B' | CHARSET_DBCS;
            designation = 0;
        }
        else
            return 6;
        break;
    default:
        return esclen;
    }

    /* raise error when the charset is not designated for this encoding */
    if (charset != CHARSET_ASCII) {
        const struct iso2022_designation *dsg;

        for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++)
            if (dsg->mark == charset)
                break;
        if (!dsg->mark)
            return esclen;
    }

    STATE_SETG(designation, charset)
    *inleft -= esclen;
    (*inbuf) += esclen;
    return 0;
}

Here is the caller graph for this function:

static Py_ssize_t iso2022processg2 ( const void config,
MultibyteCodec_State state,
const unsigned char **  inbuf,
Py_ssize_t inleft,
Py_UNICODE **  outbuf,
Py_ssize_t outleft 
) [static]

Definition at line 391 of file _codecs_iso2022.c.

{
    /* not written to use encoder, decoder functions because only few
     * encodings use G2 designations in CJKCodecs */
    if (STATE_G2 == CHARSET_ISO8859_1) {
        if (IN3 < 0x80)
            OUT1(IN3 + 0x80)
        else
            return 3;
    }
    else if (STATE_G2 == CHARSET_ISO8859_7) {
        ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
        else return 3;
    }
    else if (STATE_G2 == CHARSET_ASCII) {
        if (IN3 & 0x80) return 3;
        else **outbuf = IN3;
    }
    else
        return MBERR_INTERNAL;

    (*inbuf) += 3;
    *inleft -= 3;
    (*outbuf) += 1;
    *outleft -= 1;
    return 0;
}

Here is the caller graph for this function:

static ucs4_t jisx0201_k_decoder ( const unsigned char *  data) [static]

Definition at line 934 of file _codecs_iso2022.c.

{
    ucs4_t u;
    JISX0201_K_DECODE(*data ^ 0x80, u)
    else return MAP_UNMAPPABLE;
    return u;
}
static DBCHAR jisx0201_k_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 943 of file _codecs_iso2022.c.

{
    DBCHAR coded;
    JISX0201_K_ENCODE(*data, coded)
    else return MAP_UNMAPPABLE;
    return coded - 0x80;
}
static ucs4_t jisx0201_r_decoder ( const unsigned char *  data) [static]

Definition at line 916 of file _codecs_iso2022.c.

{
    ucs4_t u;
    JISX0201_R_DECODE(*data, u)
    else return MAP_UNMAPPABLE;
    return u;
}
static DBCHAR jisx0201_r_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 925 of file _codecs_iso2022.c.

{
    DBCHAR coded;
    JISX0201_R_ENCODE(*data, coded)
    else return MAP_UNMAPPABLE;
    return coded;
}
static ucs4_t jisx0208_decoder ( const unsigned char *  data) [static]

Definition at line 617 of file _codecs_iso2022.c.

{
    ucs4_t u;
    if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
        return 0xff3c;
    else TRYMAP_DEC(jisx0208, u, data[0], data[1])
        return u;
    else
        return MAP_UNMAPPABLE;
}
static DBCHAR jisx0208_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 629 of file _codecs_iso2022.c.

{
    DBCHAR coded;
    assert(*length == 1);
    if (*data < 0x10000) {
        if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
            return 0x2140;
        else TRYMAP_ENC(jisxcommon, coded, *data) {
            if (!(coded & 0x8000))
                return coded;
        }
    }
    return MAP_UNMAPPABLE;
}
static int jisx0208_init ( void  ) [static]

Definition at line 604 of file _codecs_iso2022.c.

{
    static int initialized = 0;

    if (!initialized && (
                    IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
                    IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap)))
        return -1;
    initialized = 1;
    return 0;
}

Here is the caller graph for this function:

static ucs4_t jisx0212_decoder ( const unsigned char *  data) [static]

Definition at line 658 of file _codecs_iso2022.c.

{
    ucs4_t u;
    TRYMAP_DEC(jisx0212, u, data[0], data[1])
        return u;
    else
        return MAP_UNMAPPABLE;
}
static DBCHAR jisx0212_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 668 of file _codecs_iso2022.c.

{
    DBCHAR coded;
    assert(*length == 1);
    if (*data < 0x10000) {
        TRYMAP_ENC(jisxcommon, coded, *data) {
            if (coded & 0x8000)
                return coded & 0x7fff;
        }
    }
    return MAP_UNMAPPABLE;
}
static int jisx0212_init ( void  ) [static]

Definition at line 645 of file _codecs_iso2022.c.

{
    static int initialized = 0;

    if (!initialized && (
                    IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
                    IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap)))
        return -1;
    initialized = 1;
    return 0;
}
static ucs4_t jisx0213_2000_1_decoder ( const unsigned char *  data) [static]

Definition at line 709 of file _codecs_iso2022.c.

{
    ucs4_t u;
    EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
    else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
        return 0xff3c;
    else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
    else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
    else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
        u |= 0x20000;
    else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
    else
        return MAP_UNMAPPABLE;
    return u;
}
static DBCHAR jisx0213_2000_1_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 822 of file _codecs_iso2022.c.

{
    DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
        return coded;
    else if (coded & 0x8000)
        return MAP_UNMAPPABLE;
    else
        return coded;
}

Here is the call graph for this function:

static DBCHAR jisx0213_2000_1_encoder_paironly ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 834 of file _codecs_iso2022.c.

{
    DBCHAR coded;
    Py_ssize_t ilength = *length;

    coded = jisx0213_encoder(data, length, (void *)2000);
    switch (ilength) {
    case 1:
        if (coded == MAP_MULTIPLE_AVAIL)
            return MAP_MULTIPLE_AVAIL;
        else
            return MAP_UNMAPPABLE;
    case 2:
        if (*length != 2)
            return MAP_UNMAPPABLE;
        else
            return coded;
    default:
        return MAP_UNMAPPABLE;
    }
}

Here is the call graph for this function:

static ucs4_t jisx0213_2000_2_decoder ( const unsigned char *  data) [static]

Definition at line 726 of file _codecs_iso2022.c.

{
    ucs4_t u;
    EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
    TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
    else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
        u |= 0x20000;
    else
        return MAP_UNMAPPABLE;
    return u;
}
static DBCHAR jisx0213_2000_2_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 857 of file _codecs_iso2022.c.

{
    DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
        return coded;
    else if (coded & 0x8000)
        return coded & 0x7fff;
    else
        return MAP_UNMAPPABLE;
}

Here is the call graph for this function:

static ucs4_t jisx0213_2004_1_decoder ( const unsigned char *  data) [static]

Definition at line 740 of file _codecs_iso2022.c.

{
    ucs4_t u;
    if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
        return 0xff3c;
    else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
    else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
    else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
        u |= 0x20000;
    else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
    else
        return MAP_UNMAPPABLE;
    return u;
}
static DBCHAR jisx0213_2004_1_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 869 of file _codecs_iso2022.c.

{
    DBCHAR coded = jisx0213_encoder(data, length, NULL);
    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
        return coded;
    else if (coded & 0x8000)
        return MAP_UNMAPPABLE;
    else
        return coded;
}

Here is the call graph for this function:

static DBCHAR jisx0213_2004_1_encoder_paironly ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 881 of file _codecs_iso2022.c.

{
    DBCHAR coded;
    Py_ssize_t ilength = *length;

    coded = jisx0213_encoder(data, length, NULL);
    switch (ilength) {
    case 1:
        if (coded == MAP_MULTIPLE_AVAIL)
            return MAP_MULTIPLE_AVAIL;
        else
            return MAP_UNMAPPABLE;
    case 2:
        if (*length != 2)
            return MAP_UNMAPPABLE;
        else
            return coded;
    default:
        return MAP_UNMAPPABLE;
    }
}

Here is the call graph for this function:

static ucs4_t jisx0213_2004_2_decoder ( const unsigned char *  data) [static]

Definition at line 756 of file _codecs_iso2022.c.

{
    ucs4_t u;
    TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
    else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
        u |= 0x20000;
    else
        return MAP_UNMAPPABLE;
    return u;
}
static DBCHAR jisx0213_2004_2_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 904 of file _codecs_iso2022.c.

{
    DBCHAR coded = jisx0213_encoder(data, length, NULL);
    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
        return coded;
    else if (coded & 0x8000)
        return coded & 0x7fff;
    else
        return MAP_UNMAPPABLE;
}

Here is the call graph for this function:

static DBCHAR jisx0213_encoder ( const ucs4_t data,
Py_ssize_t length,
void config 
) [static]

Definition at line 768 of file _codecs_iso2022.c.

{
    DBCHAR coded;

    switch (*length) {
    case 1: /* first character */
        if (*data >= 0x10000) {
            if ((*data) >> 16 == 0x20000 >> 16) {
                EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data)
                else TRYMAP_ENC(jisx0213_emp, coded,
                                (*data) & 0xffff)
                    return coded;
            }
            return MAP_UNMAPPABLE;
        }

        EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data)
        else TRYMAP_ENC(jisx0213_bmp, coded, *data) {
            if (coded == MULTIC)
                return MAP_MULTIPLE_AVAIL;
        }
        else TRYMAP_ENC(jisxcommon, coded, *data) {
            if (coded & 0x8000)
                return MAP_UNMAPPABLE;
        }
        else
            return MAP_UNMAPPABLE;
        return coded;
    case 2: /* second character of unicode pair */
        coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
                        jisx0213_pair_encmap, JISX0213_ENCPAIRS);
        if (coded == DBCINV) {
            *length = 1;
            coded = find_pairencmap((ucs2_t)data[0], 0,
                      jisx0213_pair_encmap, JISX0213_ENCPAIRS);
            if (coded == DBCINV)
                return MAP_UNMAPPABLE;
        }
        else
            return coded;
    case -1: /* flush unterminated */
        *length = 1;
        coded = find_pairencmap((ucs2_t)data[0], 0,
                        jisx0213_pair_encmap, JISX0213_ENCPAIRS);
        if (coded == DBCINV)
            return MAP_UNMAPPABLE;
        else
            return coded;
    default:
        return MAP_UNMAPPABLE;
    }
}

Here is the caller graph for this function:

static int jisx0213_init ( void  ) [static]

Definition at line 682 of file _codecs_iso2022.c.

{
    static int initialized = 0;

    if (!initialized && (
                    jisx0208_init() ||
                    IMPORT_MAP(jp, jisx0213_bmp,
                               &jisx0213_bmp_encmap, NULL) ||
                    IMPORT_MAP(jp, jisx0213_1_bmp,
                               NULL, &jisx0213_1_bmp_decmap) ||
                    IMPORT_MAP(jp, jisx0213_2_bmp,
                               NULL, &jisx0213_2_bmp_decmap) ||
                    IMPORT_MAP(jp, jisx0213_emp,
                               &jisx0213_emp_encmap, NULL) ||
                    IMPORT_MAP(jp, jisx0213_1_emp,
                               NULL, &jisx0213_1_emp_decmap) ||
                    IMPORT_MAP(jp, jisx0213_2_emp,
                               NULL, &jisx0213_2_emp_decmap) ||
                    IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap,
                               &jisx0213_pair_decmap)))
        return -1;
    initialized = 1;
    return 0;
}

Here is the call graph for this function:

static ucs4_t ksx1001_decoder ( const unsigned char *  data) [static]

Definition at line 581 of file _codecs_iso2022.c.

{
    ucs4_t u;
    TRYMAP_DEC(ksx1001, u, data[0], data[1])
        return u;
    else
        return MAP_UNMAPPABLE;
}
static DBCHAR ksx1001_encoder ( const ucs4_t data,
Py_ssize_t length 
) [static]

Definition at line 591 of file _codecs_iso2022.c.

{
    DBCHAR coded;
    assert(*length == 1);
    if (*data < 0x10000) {
        TRYMAP_ENC(cp949, coded, *data)
            if (!(coded & 0x8000))
                return coded;
    }
    return MAP_UNMAPPABLE;
}
static int ksx1001_init ( void  ) [static]

Definition at line 568 of file _codecs_iso2022.c.

{
    static int initialized = 0;

    if (!initialized && (
                    IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) ||
                    IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap)))
        return -1;
    initialized = 1;
    return 0;
}

Variable Documentation

static struct iso2022_designation [static]
Initial value:

Definition at line 1067 of file _codecs_iso2022.c.