Back to index

python3.2  3.2.2
Classes | Functions | Variables
email.charset Namespace Reference

Classes

class  Charset

Functions

def add_charset
def add_alias
def add_codec
def _encode

Variables

list __all__
int QP = 1
int BASE64 = 2
int SHORTEST = 3
int RFC2047_CHROME_LEN = 7
string DEFAULT_CHARSET = 'us-ascii'
string UNKNOWN8BIT = 'unknown-8bit'
string EMPTYSTRING = ''
dictionary CHARSETS
dictionary ALIASES
dictionary CODEC_MAP

Function Documentation

def email.charset._encode (   string,
  codec 
) [private]

Definition at line 159 of file charset.py.

00159 
00160 def _encode(string, codec):
00161     if codec == UNKNOWN8BIT:
00162         return string.encode('ascii', 'surrogateescape')
00163     else:
00164         return string.encode(codec)
00165 
00166 


Here is the caller graph for this function:

def email.charset.add_alias (   alias,
  canonical 
)
Add a character set alias.

alias is the alias name, e.g. latin-1
canonical is the character set's canonical name, e.g. iso-8859-1

Definition at line 137 of file charset.py.

00137 
00138 def add_alias(alias, canonical):
00139     """Add a character set alias.
00140 
00141     alias is the alias name, e.g. latin-1
00142     canonical is the character set's canonical name, e.g. iso-8859-1
00143     """
00144     ALIASES[alias] = canonical
00145 

def email.charset.add_charset (   charset,
  header_enc = None,
  body_enc = None,
  output_charset = None 
)
Add character set properties to the global registry.

charset is the input character set, and must be the canonical name of a
character set.

Optional header_enc and body_enc is either Charset.QP for
quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
the shortest of qp or base64 encoding, or None for no encoding.  SHORTEST
is only valid for header_enc.  It describes how message headers and
message bodies in the input charset are to be encoded.  Default is no
encoding.

Optional output_charset is the character set that the output should be
in.  Conversions will proceed from input charset, to Unicode, to the
output charset when the method Charset.convert() is called.  The default
is to output in the same character set as the input.

Both input_charset and output_charset must have Unicode codec entries in
the module's charset-to-codec mapping; use add_codec(charset, codecname)
to add codecs the module does not know about.  See the codecs module's
documentation for more information.

Definition at line 109 of file charset.py.

00109 
00110 def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
00111     """Add character set properties to the global registry.
00112 
00113     charset is the input character set, and must be the canonical name of a
00114     character set.
00115 
00116     Optional header_enc and body_enc is either Charset.QP for
00117     quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
00118     the shortest of qp or base64 encoding, or None for no encoding.  SHORTEST
00119     is only valid for header_enc.  It describes how message headers and
00120     message bodies in the input charset are to be encoded.  Default is no
00121     encoding.
00122 
00123     Optional output_charset is the character set that the output should be
00124     in.  Conversions will proceed from input charset, to Unicode, to the
00125     output charset when the method Charset.convert() is called.  The default
00126     is to output in the same character set as the input.
00127 
00128     Both input_charset and output_charset must have Unicode codec entries in
00129     the module's charset-to-codec mapping; use add_codec(charset, codecname)
00130     to add codecs the module does not know about.  See the codecs module's
00131     documentation for more information.
00132     """
00133     if body_enc == SHORTEST:
00134         raise ValueError('SHORTEST not allowed for body_enc')
00135     CHARSETS[charset] = (header_enc, body_enc, output_charset)
00136 

def email.charset.add_codec (   charset,
  codecname 
)
Add a codec that map characters in the given charset to/from Unicode.

charset is the canonical name of a character set.  codecname is the name
of a Python codec, as appropriate for the second argument to the unicode()
built-in, or to the encode() method of a Unicode string.

Definition at line 146 of file charset.py.

00146 
00147 def add_codec(charset, codecname):
00148     """Add a codec that map characters in the given charset to/from Unicode.
00149 
00150     charset is the canonical name of a character set.  codecname is the name
00151     of a Python codec, as appropriate for the second argument to the unicode()
00152     built-in, or to the encode() method of a Unicode string.
00153     """
00154     CODEC_MAP[charset] = codecname
00155 
00156 
00157 
00158 # Convenience function for encoding strings, taking into account
# that they might be unknown-8bit (ie: have surrogate-escaped bytes)

Variable Documentation

Initial value:
00001 [
00002     'Charset',
00003     'add_alias',
00004     'add_charset',
00005     'add_codec',
00006     ]

Definition at line 5 of file charset.py.

Initial value:
00001 {
00002     'latin_1': 'iso-8859-1',
00003     'latin-1': 'iso-8859-1',
00004     'latin_2': 'iso-8859-2',
00005     'latin-2': 'iso-8859-2',
00006     'latin_3': 'iso-8859-3',
00007     'latin-3': 'iso-8859-3',
00008     'latin_4': 'iso-8859-4',
00009     'latin-4': 'iso-8859-4',
00010     'latin_5': 'iso-8859-9',
00011     'latin-5': 'iso-8859-9',
00012     'latin_6': 'iso-8859-10',
00013     'latin-6': 'iso-8859-10',
00014     'latin_7': 'iso-8859-13',
00015     'latin-7': 'iso-8859-13',
00016     'latin_8': 'iso-8859-14',
00017     'latin-8': 'iso-8859-14',
00018     'latin_9': 'iso-8859-15',
00019     'latin-9': 'iso-8859-15',
00020     'latin_10':'iso-8859-16',
00021     'latin-10':'iso-8859-16',
00022     'cp949':   'ks_c_5601-1987',
00023     'euc_jp':  'euc-jp',
00024     'euc_kr':  'euc-kr',
00025     'ascii':   'us-ascii',
00026     }

Definition at line 68 of file charset.py.

Definition at line 24 of file charset.py.

Initial value:
00001 {
00002     # input        header enc  body enc output conv
00003     'iso-8859-1':  (QP,        QP,      None),
00004     'iso-8859-2':  (QP,        QP,      None),
00005     'iso-8859-3':  (QP,        QP,      None),
00006     'iso-8859-4':  (QP,        QP,      None),
00007     # iso-8859-5 is Cyrillic, and not especially used
00008     # iso-8859-6 is Arabic, also not particularly used
00009     # iso-8859-7 is Greek, QP will not make it readable
00010     # iso-8859-8 is Hebrew, QP will not make it readable
00011     'iso-8859-9':  (QP,        QP,      None),
00012     'iso-8859-10': (QP,        QP,      None),
00013     # iso-8859-11 is Thai, QP will not make it readable
00014     'iso-8859-13': (QP,        QP,      None),
00015     'iso-8859-14': (QP,        QP,      None),
00016     'iso-8859-15': (QP,        QP,      None),
00017     'iso-8859-16': (QP,        QP,      None),
00018     'windows-1252':(QP,        QP,      None),
00019     'viscii':      (QP,        QP,      None),
00020     'us-ascii':    (None,      None,    None),
00021     'big5':        (BASE64,    BASE64,  None),
00022     'gb2312':      (BASE64,    BASE64,  None),
00023     'euc-jp':      (BASE64,    None,    'iso-2022-jp'),
00024     'shift_jis':   (BASE64,    None,    'iso-2022-jp'),
00025     'iso-2022-jp': (BASE64,    None,    None),
00026     'koi8-r':      (BASE64,    BASE64,  None),
00027     'utf-8':       (SHORTEST,  BASE64, 'utf-8'),
00028     }

Definition at line 37 of file charset.py.

Initial value:
00001 {
00002     'gb2312':      'eucgb2312_cn',
00003     'big5':        'big5_tw',
00004     # Hack: We don't want *any* conversion for stuff marked us-ascii, as all
00005     # sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
00006     # Let that stuff pass through without conversion to/from Unicode.
00007     'us-ascii':    None,
00008     }

Definition at line 97 of file charset.py.

Definition at line 30 of file charset.py.

Definition at line 32 of file charset.py.

Definition at line 23 of file charset.py.

Definition at line 28 of file charset.py.

Definition at line 25 of file charset.py.

Definition at line 31 of file charset.py.