Back to index

python3.2  3.2.2
Classes | Defines | Typedefs | Enumerations | Functions | Variables
unicodeobject.c File Reference
#include "Python.h"
#include "ucnhash.h"
#include "stringlib/unicodedefs.h"
#include "stringlib/fastsearch.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/partition.h"
#include "stringlib/split.h"
#include "stringlib/localeutil.h"
#include "stringlib/string_format.h"

Go to the source code of this file.

Classes

struct  encoding_map
struct  unicodeiterobject

Defines

#define PY_SSIZE_T_CLEAN
#define PyUnicode_MAXFREELIST   1024
#define KEEPALIVE_SIZE_LIMIT   9
#define BYTEORDER_IS_LITTLE_ENDIAN
#define BLOOM_MASK   unsigned long
#define BLOOM_ADD(mask, ch)   ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM(mask, ch)   ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM_LINEBREAK(ch)
#define BLOOM_MEMBER(mask, chr, set, setlen)   BLOOM(mask, chr) && unicode_member(chr, set, setlen)
#define IS_BASE64(c)
#define FROM_BASE64(c)
#define TO_BASE64(n)   ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f])
#define DECODE_DIRECT(c)   ((c) <= 127 && (c) != '+')
#define ENCODE_DIRECT(c, directO, directWS)
#define LONG_PTR_MASK   (size_t) (SIZEOF_LONG - 1)
#define MAX_SHORT_UNICHARS   300 /* largest size we'll do on the stack */
#define STORECHAR(CH)
#define OFF   0
#define STORECHAR(CH)
#define _Py_InsertThousandsGrouping   _PyUnicode_InsertThousandsGrouping
#define _Py_InsertThousandsGroupingLocale   _PyUnicode_InsertThousandsGroupingLocale
#define ADJUST_INDICES(start, end, len)
#define TEST_COND(cond)   ((cond) ? Py_True : Py_False)
#define LEFTSTRIP   0
#define RIGHTSTRIP   1
#define BOTHSTRIP   2
#define STRIPNAME(i)   (stripformat[i]+3)
#define FORMATBUFLEN   (size_t)10

Typedefs

typedef enum charmapencode_result charmapencode_result

Enumerations

enum  charmapencode_result { enc_SUCCESS, enc_FAILED, enc_EXCEPTION }

Functions

static PyObjectunicode_encode_call_errorhandler (const char *errors, PyObject **errorHandler, const char *encoding, const char *reason, const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos)
static void raise_encode_exception (PyObject **exceptionObject, const char *encoding, const Py_UNICODE *unicode, Py_ssize_t size, Py_ssize_t startpos, Py_ssize_t endpos, const char *reason)
Py_UNICODE PyUnicode_GetMax (void)
 Py_LOCAL_INLINE (BLOOM_MASK)
 Py_LOCAL_INLINE (int)
static int unicode_resize (register PyUnicodeObject *unicode, Py_ssize_t length)
static PyUnicodeObject_PyUnicode_New (Py_ssize_t length)
static void unicode_dealloc (register PyUnicodeObject *unicode)
static int _PyUnicode_Resize (PyUnicodeObject **unicode, Py_ssize_t length)
int PyUnicode_Resize (PyObject **unicode, Py_ssize_t length)
PyObjectPyUnicode_FromUnicode (const Py_UNICODE *u, Py_ssize_t size)
PyObjectPyUnicode_FromStringAndSize (const char *u, Py_ssize_t size)
PyObjectPyUnicode_FromString (const char *u)
PyObjectPyUnicode_FromOrdinal (int ordinal)
PyObjectPyUnicode_FromObject (register PyObject *obj)
PyObjectPyUnicode_FromEncodedObject (register PyObject *obj, const char *encoding, const char *errors)
static int normalize_encoding (const char *encoding, char *lower, size_t lower_len)
PyObjectPyUnicode_Decode (const char *s, Py_ssize_t size, const char *encoding, const char *errors)
PyObjectPyUnicode_AsDecodedObject (PyObject *unicode, const char *encoding, const char *errors)
PyObjectPyUnicode_AsDecodedUnicode (PyObject *unicode, const char *encoding, const char *errors)
PyObjectPyUnicode_Encode (const Py_UNICODE *s, Py_ssize_t size, const char *encoding, const char *errors)
PyObjectPyUnicode_AsEncodedObject (PyObject *unicode, const char *encoding, const char *errors)
PyObjectPyUnicode_EncodeFSDefault (PyObject *unicode)
PyObjectPyUnicode_AsEncodedString (PyObject *unicode, const char *encoding, const char *errors)
PyObjectPyUnicode_AsEncodedUnicode (PyObject *unicode, const char *encoding, const char *errors)
PyObject_PyUnicode_AsDefaultEncodedString (PyObject *unicode, const char *errors)
PyObjectPyUnicode_DecodeFSDefault (const char *s)
PyObjectPyUnicode_DecodeFSDefaultAndSize (const char *s, Py_ssize_t size)
int PyUnicode_FSConverter (PyObject *arg, void *addr)
int PyUnicode_FSDecoder (PyObject *arg, void *addr)
char * _PyUnicode_AsStringAndSize (PyObject *unicode, Py_ssize_t *psize)
char * _PyUnicode_AsString (PyObject *unicode)
Py_UNICODEPyUnicode_AsUnicode (PyObject *unicode)
Py_ssize_t PyUnicode_GetSize (PyObject *unicode)
const char * PyUnicode_GetDefaultEncoding (void)
static void make_decode_exception (PyObject **exceptionObject, const char *encoding, const char *input, Py_ssize_t length, Py_ssize_t startpos, Py_ssize_t endpos, const char *reason)
static int unicode_decode_call_errorhandler (const char *errors, PyObject **errorHandler, const char *encoding, const char *reason, const char **input, const char **inend, Py_ssize_t *startinpos, Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, PyUnicodeObject **output, Py_ssize_t *outpos, Py_UNICODE **outptr)
PyObjectPyUnicode_DecodeUTF7 (const char *s, Py_ssize_t size, const char *errors)
PyObjectPyUnicode_DecodeUTF7Stateful (const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed)
PyObjectPyUnicode_EncodeUTF7 (const Py_UNICODE *s, Py_ssize_t size, int base64SetO, int base64WhiteSpace, const char *errors)
PyObjectPyUnicode_DecodeUTF8 (const char *s, Py_ssize_t size, const char *errors)
PyObjectPyUnicode_DecodeUTF8Stateful (const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed)
PyObjectPyUnicode_EncodeUTF8 (const Py_UNICODE *s, Py_ssize_t size, const char *errors)
PyObjectPyUnicode_AsUTF8String (PyObject *unicode)
PyObjectPyUnicode_DecodeUTF32 (const char *s, Py_ssize_t size, const char *errors, int *byteorder)
PyObjectPyUnicode_DecodeUTF32Stateful (const char *s, Py_ssize_t size, const char *errors, int *byteorder, Py_ssize_t *consumed)
PyObjectPyUnicode_EncodeUTF32 (const Py_UNICODE *s, Py_ssize_t size, const char *errors, int byteorder)
PyObjectPyUnicode_AsUTF32String (PyObject *unicode)
PyObjectPyUnicode_DecodeUTF16 (const char *s, Py_ssize_t size, const char *errors, int *byteorder)
PyObjectPyUnicode_DecodeUTF16Stateful (const char *s, Py_ssize_t size, const char *errors, int *byteorder, Py_ssize_t *consumed)
PyObjectPyUnicode_EncodeUTF16 (const Py_UNICODE *s, Py_ssize_t size, const char *errors, int byteorder)
PyObjectPyUnicode_AsUTF16String (PyObject *unicode)
PyObjectPyUnicode_DecodeUnicodeEscape (const char *s, Py_ssize_t size, const char *errors)
 Py_LOCAL_INLINE (const Py_UNICODE *)
PyObjectPyUnicode_EncodeUnicodeEscape (const Py_UNICODE *s, Py_ssize_t size)
PyObjectPyUnicode_AsUnicodeEscapeString (PyObject *unicode)
PyObjectPyUnicode_DecodeRawUnicodeEscape (const char *s, Py_ssize_t size, const char *errors)
PyObjectPyUnicode_EncodeRawUnicodeEscape (const Py_UNICODE *s, Py_ssize_t size)
PyObjectPyUnicode_AsRawUnicodeEscapeString (PyObject *unicode)
PyObject_PyUnicode_DecodeUnicodeInternal (const char *s, Py_ssize_t size, const char *errors)
PyObjectPyUnicode_DecodeLatin1 (const char *s, Py_ssize_t size, const char *errors)
static void make_encode_exception (PyObject **exceptionObject, const char *encoding, const Py_UNICODE *unicode, Py_ssize_t size, Py_ssize_t startpos, Py_ssize_t endpos, const char *reason)
static PyObjectunicode_encode_ucs1 (const Py_UNICODE *p, Py_ssize_t size, const char *errors, int limit)
PyObjectPyUnicode_EncodeLatin1 (const Py_UNICODE *p, Py_ssize_t size, const char *errors)
PyObjectPyUnicode_AsLatin1String (PyObject *unicode)
PyObjectPyUnicode_DecodeASCII (const char *s, Py_ssize_t size, const char *errors)
PyObjectPyUnicode_EncodeASCII (const Py_UNICODE *p, Py_ssize_t size, const char *errors)
PyObjectPyUnicode_AsASCIIString (PyObject *unicode)
PyObjectPyUnicode_DecodeCharmap (const char *s, Py_ssize_t size, PyObject *mapping, const char *errors)
static PyObjectencoding_map_size (PyObject *obj, PyObject *args)
static void encoding_map_dealloc (PyObject *o)
PyObjectPyUnicode_BuildEncodingMap (PyObject *string)
static int encoding_map_lookup (Py_UNICODE c, PyObject *mapping)
static PyObjectcharmapencode_lookup (Py_UNICODE c, PyObject *mapping)
static int charmapencode_resize (PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
static charmapencode_result charmapencode_output (Py_UNICODE c, PyObject *mapping, PyObject **outobj, Py_ssize_t *outpos)
static int charmap_encoding_error (const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping, PyObject **exceptionObject, int *known_errorHandler, PyObject **errorHandler, const char *errors, PyObject **res, Py_ssize_t *respos)
PyObjectPyUnicode_EncodeCharmap (const Py_UNICODE *p, Py_ssize_t size, PyObject *mapping, const char *errors)
PyObjectPyUnicode_AsCharmapString (PyObject *unicode, PyObject *mapping)
static void make_translate_exception (PyObject **exceptionObject, const Py_UNICODE *unicode, Py_ssize_t size, Py_ssize_t startpos, Py_ssize_t endpos, const char *reason)
static void raise_translate_exception (PyObject **exceptionObject, const Py_UNICODE *unicode, Py_ssize_t size, Py_ssize_t startpos, Py_ssize_t endpos, const char *reason)
static PyObjectunicode_translate_call_errorhandler (const char *errors, PyObject **errorHandler, const char *reason, const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos)
static int charmaptranslate_lookup (Py_UNICODE c, PyObject *mapping, PyObject **result)
static int charmaptranslate_makespace (PyObject **outobj, Py_UNICODE **outp, Py_ssize_t requiredsize)
static int charmaptranslate_output (const Py_UNICODE *startinp, const Py_UNICODE *curinp, Py_ssize_t insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp, PyObject **res)
PyObjectPyUnicode_TranslateCharmap (const Py_UNICODE *p, Py_ssize_t size, PyObject *mapping, const char *errors)
PyObjectPyUnicode_Translate (PyObject *str, PyObject *mapping, const char *errors)
PyObjectPyUnicode_TransformDecimalToASCII (Py_UNICODE *s, Py_ssize_t length)
int PyUnicode_EncodeDecimal (Py_UNICODE *s, Py_ssize_t length, char *output, const char *errors)
Py_ssize_t PyUnicode_Count (PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end)
Py_ssize_t PyUnicode_Find (PyObject *str, PyObject *sub, Py_ssize_t start, Py_ssize_t end, int direction)
static int tailmatch (PyUnicodeObject *self, PyUnicodeObject *substring, Py_ssize_t start, Py_ssize_t end, int direction)
Py_ssize_t PyUnicode_Tailmatch (PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end, int direction)
static PyObjectfixup (PyUnicodeObject *self, int(*fixfct)(PyUnicodeObject *s))
static int fixupper (PyUnicodeObject *self)
static int fixlower (PyUnicodeObject *self)
static int fixswapcase (PyUnicodeObject *self)
static int fixcapitalize (PyUnicodeObject *self)
static int fixtitle (PyUnicodeObject *self)
PyObjectPyUnicode_Join (PyObject *separator, PyObject *seq)
static PyUnicodeObjectpad (PyUnicodeObject *self, Py_ssize_t left, Py_ssize_t right, Py_UNICODE fill)
PyObjectPyUnicode_Splitlines (PyObject *string, int keepends)
static PyObjectsplit (PyUnicodeObject *self, PyUnicodeObject *substring, Py_ssize_t maxcount)
static PyObjectrsplit (PyUnicodeObject *self, PyUnicodeObject *substring, Py_ssize_t maxcount)
static PyObjectreplace (PyUnicodeObject *self, PyUnicodeObject *str1, PyUnicodeObject *str2, Py_ssize_t maxcount)
 PyDoc_STRVAR (title__doc__,"S.title() -> str\n\ \n\ Return a titlecased version of S, i.e. words start with title case\n\ characters, all remaining cased characters have lower case.")
static PyObjectunicode_title (PyUnicodeObject *self)
 PyDoc_STRVAR (capitalize__doc__,"S.capitalize() -> str\n\ \n\ Return a capitalized version of S, i.e. make the first character\n\ have upper case and the rest lower case.")
static PyObjectunicode_capitalize (PyUnicodeObject *self)
static int convert_uc (PyObject *obj, void *addr)
 PyDoc_STRVAR (center__doc__,"S.center(width[, fillchar]) -> str\n\ \n\ Return S centered in a string of length width. Padding is\n\ done using the specified fill character (default is a space)")
static PyObjectunicode_center (PyUnicodeObject *self, PyObject *args)
static int unicode_compare (PyUnicodeObject *str1, PyUnicodeObject *str2)
int PyUnicode_Compare (PyObject *left, PyObject *right)
int PyUnicode_CompareWithASCIIString (PyObject *uni, const char *str)
PyObjectPyUnicode_RichCompare (PyObject *left, PyObject *right, int op)
int PyUnicode_Contains (PyObject *container, PyObject *element)
PyObjectPyUnicode_Concat (PyObject *left, PyObject *right)
void PyUnicode_Append (PyObject **pleft, PyObject *right)
void PyUnicode_AppendAndDel (PyObject **pleft, PyObject *right)
 PyDoc_STRVAR (count__doc__,"S.count(sub[, start[, end]]) -> int\n\ \n\ Return the number of non-overlapping occurrences of substring sub in\n\ string S[start:end]. Optional arguments start and end are\n\ interpreted as in slice notation.")
static PyObjectunicode_count (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (encode__doc__,"S.encode(encoding='utf-8', errors='strict') -> bytes\n\ \n\ Encode S using the codec registered for encoding. Default encoding\n\ is 'utf-8'. errors may be given to set a different error\n\ handling scheme. Default is 'strict' meaning that encoding errors raise\n\ a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ 'xmlcharrefreplace' as well as any other name registered with\n\ codecs.register_error that can handle UnicodeEncodeErrors.")
static PyObjectunicode_encode (PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
 PyDoc_STRVAR (expandtabs__doc__,"S.expandtabs([tabsize]) -> str\n\ \n\ Return a copy of S where all tab characters are expanded using spaces.\n\ If tabsize is not given, a tab size of 8 characters is assumed.")
static PyObjectunicode_expandtabs (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (find__doc__,"S.find(sub[, start[, end]]) -> int\n\ \n\ Return the lowest index in S where substring sub is found,\n\ such that sub is contained within S[start:end]. Optional\n\ arguments start and end are interpreted as in slice notation.\n\ \n\ Return -1 on failure.")
static PyObjectunicode_find (PyUnicodeObject *self, PyObject *args)
static PyObjectunicode_getitem (PyUnicodeObject *self, Py_ssize_t index)
static Py_hash_t unicode_hash (PyUnicodeObject *self)
 PyDoc_STRVAR (index__doc__,"S.index(sub[, start[, end]]) -> int\n\ \n\ Like S.find() but raise ValueError when the substring is not found.")
static PyObjectunicode_index (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (islower__doc__,"S.islower() -> bool\n\ \n\ Return True if all cased characters in S are lowercase and there is\n\ at least one cased character in S, False otherwise.")
static PyObjectunicode_islower (PyUnicodeObject *self)
 PyDoc_STRVAR (isupper__doc__,"S.isupper() -> bool\n\ \n\ Return True if all cased characters in S are uppercase and there is\n\ at least one cased character in S, False otherwise.")
static PyObjectunicode_isupper (PyUnicodeObject *self)
 PyDoc_STRVAR (istitle__doc__,"S.istitle() -> bool\n\ \n\ Return True if S is a titlecased string and there is at least one\n\ character in S, i.e. upper- and titlecase characters may only\n\ follow uncased characters and lowercase characters only cased ones.\n\ Return False otherwise.")
static PyObjectunicode_istitle (PyUnicodeObject *self)
 PyDoc_STRVAR (isspace__doc__,"S.isspace() -> bool\n\ \n\ Return True if all characters in S are whitespace\n\ and there is at least one character in S, False otherwise.")
static PyObjectunicode_isspace (PyUnicodeObject *self)
 PyDoc_STRVAR (isalpha__doc__,"S.isalpha() -> bool\n\ \n\ Return True if all characters in S are alphabetic\n\ and there is at least one character in S, False otherwise.")
static PyObjectunicode_isalpha (PyUnicodeObject *self)
 PyDoc_STRVAR (isalnum__doc__,"S.isalnum() -> bool\n\ \n\ Return True if all characters in S are alphanumeric\n\ and there is at least one character in S, False otherwise.")
static PyObjectunicode_isalnum (PyUnicodeObject *self)
 PyDoc_STRVAR (isdecimal__doc__,"S.isdecimal() -> bool\n\ \n\ Return True if there are only decimal characters in S,\n\ False otherwise.")
static PyObjectunicode_isdecimal (PyUnicodeObject *self)
 PyDoc_STRVAR (isdigit__doc__,"S.isdigit() -> bool\n\ \n\ Return True if all characters in S are digits\n\ and there is at least one character in S, False otherwise.")
static PyObjectunicode_isdigit (PyUnicodeObject *self)
 PyDoc_STRVAR (isnumeric__doc__,"S.isnumeric() -> bool\n\ \n\ Return True if there are only numeric characters in S,\n\ False otherwise.")
static PyObjectunicode_isnumeric (PyUnicodeObject *self)
static Py_UCS4 decode_ucs4 (const Py_UNICODE *s, Py_ssize_t *i, Py_ssize_t size)
int PyUnicode_IsIdentifier (PyObject *self)
 PyDoc_STRVAR (isidentifier__doc__,"S.isidentifier() -> bool\n\ \n\ Return True if S is a valid identifier according\n\ to the language definition.")
static PyObjectunicode_isidentifier (PyObject *self)
 PyDoc_STRVAR (isprintable__doc__,"S.isprintable() -> bool\n\ \n\ Return True if all characters in S are considered\n\ printable in repr() or S is empty, False otherwise.")
static PyObjectunicode_isprintable (PyObject *self)
 PyDoc_STRVAR (join__doc__,"S.join(iterable) -> str\n\ \n\ Return a string which is the concatenation of the strings in the\n\ iterable. The separator between elements is S.")
static PyObjectunicode_join (PyObject *self, PyObject *data)
static Py_ssize_t unicode_length (PyUnicodeObject *self)
 PyDoc_STRVAR (ljust__doc__,"S.ljust(width[, fillchar]) -> str\n\ \n\ Return S left-justified in a Unicode string of length width. Padding is\n\ done using the specified fill character (default is a space).")
static PyObjectunicode_ljust (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (lower__doc__,"S.lower() -> str\n\ \n\ Return a copy of the string S converted to lowercase.")
static PyObjectunicode_lower (PyUnicodeObject *self)
PyObject_PyUnicode_XStrip (PyUnicodeObject *self, int striptype, PyObject *sepobj)
static PyObjectdo_strip (PyUnicodeObject *self, int striptype)
static PyObjectdo_argstrip (PyUnicodeObject *self, int striptype, PyObject *args)
 PyDoc_STRVAR (strip__doc__,"S.strip([chars]) -> str\n\ \n\ Return a copy of the string S with leading and trailing\n\ whitespace removed.\n\ If chars is given and not None, remove characters in chars instead.")
static PyObjectunicode_strip (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (lstrip__doc__,"S.lstrip([chars]) -> str\n\ \n\ Return a copy of the string S with leading whitespace removed.\n\ If chars is given and not None, remove characters in chars instead.")
static PyObjectunicode_lstrip (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (rstrip__doc__,"S.rstrip([chars]) -> str\n\ \n\ Return a copy of the string S with trailing whitespace removed.\n\ If chars is given and not None, remove characters in chars instead.")
static PyObjectunicode_rstrip (PyUnicodeObject *self, PyObject *args)
static PyObjectunicode_repeat (PyUnicodeObject *str, Py_ssize_t len)
PyObjectPyUnicode_Replace (PyObject *obj, PyObject *subobj, PyObject *replobj, Py_ssize_t maxcount)
 PyDoc_STRVAR (replace__doc__,"S.replace(old, new[, count]) -> str\n\ \n\ Return a copy of S with all occurrences of substring\n\ old replaced by new. If the optional argument count is\n\ given, only the first count occurrences are replaced.")
static PyObjectunicode_replace (PyUnicodeObject *self, PyObject *args)
static PyObjectunicode_repr (PyObject *unicode)
 PyDoc_STRVAR (rfind__doc__,"S.rfind(sub[, start[, end]]) -> int\n\ \n\ Return the highest index in S where substring sub is found,\n\ such that sub is contained within S[start:end]. Optional\n\ arguments start and end are interpreted as in slice notation.\n\ \n\ Return -1 on failure.")
static PyObjectunicode_rfind (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (rindex__doc__,"S.rindex(sub[, start[, end]]) -> int\n\ \n\ Like S.rfind() but raise ValueError when the substring is not found.")
static PyObjectunicode_rindex (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (rjust__doc__,"S.rjust(width[, fillchar]) -> str\n\ \n\ Return S right-justified in a string of length width. Padding is\n\ done using the specified fill character (default is a space).")
static PyObjectunicode_rjust (PyUnicodeObject *self, PyObject *args)
PyObjectPyUnicode_Split (PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
 PyDoc_STRVAR (split__doc__,"S.split([sep[, maxsplit]]) -> list of strings\n\ \n\ Return a list of the words in S, using sep as the\n\ delimiter string. If maxsplit is given, at most maxsplit\n\ splits are done. If sep is not specified or is None, any\n\ whitespace string is a separator and empty strings are\n\ removed from the result.")
static PyObjectunicode_split (PyUnicodeObject *self, PyObject *args)
PyObjectPyUnicode_Partition (PyObject *str_in, PyObject *sep_in)
PyObjectPyUnicode_RPartition (PyObject *str_in, PyObject *sep_in)
 PyDoc_STRVAR (partition__doc__,"S.partition(sep) -> (head, sep, tail)\n\ \n\ Search for the separator sep in S, and return the part before it,\n\ the separator itself, and the part after it. If the separator is not\n\ found, return S and two empty strings.")
static PyObjectunicode_partition (PyUnicodeObject *self, PyObject *separator)
 PyDoc_STRVAR (rpartition__doc__,"S.rpartition(sep) -> (head, sep, tail)\n\ \n\ Search for the separator sep in S, starting at the end of S, and return\n\ the part before it, the separator itself, and the part after it. If the\n\ separator is not found, return two empty strings and S.")
static PyObjectunicode_rpartition (PyUnicodeObject *self, PyObject *separator)
PyObjectPyUnicode_RSplit (PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
 PyDoc_STRVAR (rsplit__doc__,"S.rsplit([sep[, maxsplit]]) -> list of strings\n\ \n\ Return a list of the words in S, using sep as the\n\ delimiter string, starting at the end of the string and\n\ working to the front. If maxsplit is given, at most maxsplit\n\ splits are done. If sep is not specified, any whitespace string\n\ is a separator.")
static PyObjectunicode_rsplit (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (splitlines__doc__,"S.splitlines([keepends]) -> list of strings\n\ \n\ Return a list of the lines in S, breaking at line boundaries.\n\ Line breaks are not included in the resulting list unless keepends\n\ is given and true.")
static PyObjectunicode_splitlines (PyUnicodeObject *self, PyObject *args)
static PyObjectunicode_str (PyObject *self)
 PyDoc_STRVAR (swapcase__doc__,"S.swapcase() -> str\n\ \n\ Return a copy of S with uppercase characters converted to lowercase\n\ and vice versa.")
static PyObjectunicode_swapcase (PyUnicodeObject *self)
 PyDoc_STRVAR (maketrans__doc__,"str.maketrans(x[, y[, z]]) -> dict (static method)\n\ \n\ Return a translation table usable for str.translate().\n\ If there is only one argument, it must be a dictionary mapping Unicode\n\ ordinals (integers) or characters to Unicode ordinals, strings or None.\n\ Character keys will be then converted to ordinals.\n\ If there are two arguments, they must be strings of equal length, and\n\ in the resulting dictionary, each character in x will be mapped to the\n\ character at the same position in y. If there is a third argument, it\n\ must be a string, whose characters will be mapped to None in the result.")
static PyObjectunicode_maketrans (PyUnicodeObject *null, PyObject *args)
 PyDoc_STRVAR (translate__doc__,"S.translate(table) -> str\n\ \n\ Return a copy of the string S, where all characters have been mapped\n\ through the given translation table, which must be a mapping of\n\ Unicode ordinals to Unicode ordinals, strings, or None.\n\ Unmapped characters are left untouched. Characters mapped to None\n\ are deleted.")
static PyObjectunicode_translate (PyUnicodeObject *self, PyObject *table)
 PyDoc_STRVAR (upper__doc__,"S.upper() -> str\n\ \n\ Return a copy of S converted to uppercase.")
static PyObjectunicode_upper (PyUnicodeObject *self)
 PyDoc_STRVAR (zfill__doc__,"S.zfill(width) -> str\n\ \n\ Pad a numeric string S with zeros on the left, to fill a field\n\ of the specified width. The string S is never truncated.")
static PyObjectunicode_zfill (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (startswith__doc__,"S.startswith(prefix[, start[, end]]) -> bool\n\ \n\ Return True if S starts with the specified prefix, False otherwise.\n\ With optional start, test S beginning at that position.\n\ With optional end, stop comparing S at that position.\n\ prefix can also be a tuple of strings to try.")
static PyObjectunicode_startswith (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (endswith__doc__,"S.endswith(suffix[, start[, end]]) -> bool\n\ \n\ Return True if S ends with the specified suffix, False otherwise.\n\ With optional start, test S beginning at that position.\n\ With optional end, stop comparing S at that position.\n\ suffix can also be a tuple of strings to try.")
static PyObjectunicode_endswith (PyUnicodeObject *self, PyObject *args)
 PyDoc_STRVAR (format__doc__,"S.format(*args, **kwargs) -> str\n\ \n\ Return a formatted version of S, using substitutions from args and kwargs.\n\ The substitutions are identified by braces ('{' and '}').")
 PyDoc_STRVAR (format_map__doc__,"S.format_map(mapping) -> str\n\ \n\ Return a formatted version of S, using substitutions from mapping.\n\ The substitutions are identified by braces ('{' and '}').")
static PyObjectunicode__format__ (PyObject *self, PyObject *args)
 PyDoc_STRVAR (p_format__doc__,"S.__format__(format_spec) -> str\n\ \n\ Return a formatted version of S as described by format_spec.")
static PyObjectunicode__sizeof__ (PyUnicodeObject *v)
 PyDoc_STRVAR (sizeof__doc__,"S.__sizeof__() -> size of S in memory, in bytes")
static PyObjectunicode_getnewargs (PyUnicodeObject *v)
static PyObjectunicode_mod (PyObject *v, PyObject *w)
static PyObjectunicode_subscript (PyUnicodeObject *self, PyObject *item)
static PyObjectgetnextarg (PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
static PyObjectformatfloat (PyObject *v, int flags, int prec, int type)
static PyObjectformatlong (PyObject *val, int flags, int prec, int type)
static int formatchar (Py_UNICODE *buf, size_t buflen, PyObject *v)
PyObjectPyUnicode_Format (PyObject *format, PyObject *args)
static PyObjectunicode_subtype_new (PyTypeObject *type, PyObject *args, PyObject *kwds)
static PyObjectunicode_new (PyTypeObject *type, PyObject *args, PyObject *kwds)
 PyDoc_STRVAR (unicode_doc,"str(string[, encoding[, errors]]) -> str\n\ \n\ Create a new string object from the given encoded string.\n\ encoding defaults to the current default string encoding.\n\ errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.")
static PyObjectunicode_iter (PyObject *seq)
void _PyUnicode_Init (void)
int PyUnicode_ClearFreeList (void)
void _PyUnicode_Fini (void)
void PyUnicode_InternInPlace (PyObject **p)
void PyUnicode_InternImmortal (PyObject **p)
PyObjectPyUnicode_InternFromString (const char *cp)
void _Py_ReleaseInternedUnicodeStrings (void)
static void unicodeiter_dealloc (unicodeiterobject *it)
static int unicodeiter_traverse (unicodeiterobject *it, visitproc visit, void *arg)
static PyObjectunicodeiter_next (unicodeiterobject *it)
static PyObjectunicodeiter_len (unicodeiterobject *it)
 PyDoc_STRVAR (length_hint_doc,"Private method returning an estimate of len(list(it)).")
size_t Py_UNICODE_strlen (const Py_UNICODE *u)
Py_UNICODEPy_UNICODE_strcpy (Py_UNICODE *s1, const Py_UNICODE *s2)
Py_UNICODEPy_UNICODE_strncpy (Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
Py_UNICODEPy_UNICODE_strcat (Py_UNICODE *s1, const Py_UNICODE *s2)
int Py_UNICODE_strcmp (const Py_UNICODE *s1, const Py_UNICODE *s2)
int Py_UNICODE_strncmp (const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
Py_UNICODEPy_UNICODE_strchr (const Py_UNICODE *s, Py_UNICODE c)
Py_UNICODEPy_UNICODE_strrchr (const Py_UNICODE *s, Py_UNICODE c)
Py_UNICODEPyUnicode_AsUnicodeCopy (PyObject *object)
PyMODINIT_FUNC PyInit__string (void)

Variables

static PyObjectinterned
static PyUnicodeObjectfree_list
static int numfree
static PyUnicodeObjectunicode_empty
static PyUnicodeObjectunicode_latin1 [256]
const unsigned char _Py_ascii_whitespace []
static unsigned char ascii_linebreak []
static BLOOM_MASK bloom_linebreak
static char utf7_category [128]
static char utf8_code_length [256]
static _PyUnicode_Name_CAPIucnhash_CAPI = NULL
static const char * hexdigits = "0123456789abcdef"
static PyMethodDef encoding_map_methods []
static PyTypeObject EncodingMapType
static const char * stripformat [] = {"|O:lstrip", "|O:rstrip", "|O:strip"}
static PyMethodDef unicode_methods []
static PyNumberMethods unicode_as_number
static PySequenceMethods unicode_as_sequence
static PyMappingMethods unicode_as_mapping
PyTypeObject PyUnicode_Type
static PyMethodDef unicodeiter_methods []
PyTypeObject PyUnicodeIter_Type
static PyMethodDef _string_methods []
static struct PyModuleDef

Class Documentation

struct encoding_map

Definition at line 5336 of file unicodeobject.c.

Class Members
int count2
int count3
PyObject_HEAD unsigned char level1
unsigned char level23
struct unicodeiterobject

Definition at line 10188 of file unicodeobject.c.

Collaboration diagram for unicodeiterobject:
Class Members
PyObject_HEAD Py_ssize_t it_index
PyUnicodeObject * it_seq

Define Documentation

#define _Py_InsertThousandsGrouping   _PyUnicode_InsertThousandsGrouping

Definition at line 6426 of file unicodeobject.c.

#define _Py_InsertThousandsGroupingLocale   _PyUnicode_InsertThousandsGroupingLocale

Definition at line 6427 of file unicodeobject.c.

#define ADJUST_INDICES (   start,
  end,
  len 
)
Value:
if (end > len)                              \
        end = len;                              \
    else if (end < 0) {                         \
        end += len;                             \
        if (end < 0)                            \
            end = 0;                            \
    }                                           \
    if (start < 0) {                            \
        start += len;                           \
        if (start < 0)                          \
            start = 0;                          \
    }

Definition at line 6431 of file unicodeobject.c.

#define BLOOM (   mask,
  ch 
)    ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))

Definition at line 220 of file unicodeobject.c.

#define BLOOM_ADD (   mask,
  ch 
)    ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))

Definition at line 219 of file unicodeobject.c.

#define BLOOM_LINEBREAK (   ch)
Value:
((ch) < 128U ? ascii_linebreak[(ch)] :                              \
     (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch)))

Definition at line 222 of file unicodeobject.c.

#define BLOOM_MASK   unsigned long

Definition at line 215 of file unicodeobject.c.

#define BLOOM_MEMBER (   mask,
  chr,
  set,
  setlen 
)    BLOOM(mask, chr) && unicode_member(chr, set, setlen)

Definition at line 251 of file unicodeobject.c.

#define BOTHSTRIP   2

Definition at line 8111 of file unicodeobject.c.

Definition at line 78 of file unicodeobject.c.

#define DECODE_DIRECT (   c)    ((c) <= 127 && (c) != '+')

Definition at line 2161 of file unicodeobject.c.

#define ENCODE_DIRECT (   c,
  directO,
  directWS 
)
Value:
((c) < 128 && (c) > 0 &&                            \
     ((utf7_category[(c)] == 0) ||                      \
      (directWS && (utf7_category[(c)] == 2)) ||        \
      (directO && (utf7_category[(c)] == 1))))

Definition at line 2204 of file unicodeobject.c.

#define FORMATBUFLEN   (size_t)10

Definition at line 9442 of file unicodeobject.c.

#define FROM_BASE64 (   c)
Value:
(((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' :                           \
     ((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 26 :                      \
     ((c) >= '0' && (c) <= '9') ? (c) - '0' + 52 :                      \
     (c) == '+' ? 62 : 63)

Definition at line 2145 of file unicodeobject.c.

#define IS_BASE64 (   c)
Value:
(((c) >= 'A' && (c) <= 'Z') ||     \
     ((c) >= 'a' && (c) <= 'z') ||     \
     ((c) >= '0' && (c) <= '9') ||     \
     (c) == '+' || (c) == '/')

Definition at line 2137 of file unicodeobject.c.

#define KEEPALIVE_SIZE_LIMIT   9

Definition at line 71 of file unicodeobject.c.

#define LEFTSTRIP   0

Definition at line 8109 of file unicodeobject.c.

#define LONG_PTR_MASK   (size_t) (SIZEOF_LONG - 1)

Definition at line 2547 of file unicodeobject.c.

#define MAX_SHORT_UNICHARS   300 /* largest size we'll do on the stack */
#define OFF   0

Definition at line 42 of file unicodeobject.c.

#define PyUnicode_MAXFREELIST   1024

Definition at line 52 of file unicodeobject.c.

#define RIGHTSTRIP   1

Definition at line 8110 of file unicodeobject.c.

#define STORECHAR (   CH)
Value:
do {                                        \
        p[iorder[3]] = ((CH) >> 24) & 0xff;     \
        p[iorder[2]] = ((CH) >> 16) & 0xff;     \
        p[iorder[1]] = ((CH) >> 8) & 0xff;      \
        p[iorder[0]] = (CH) & 0xff;             \
        p += 4;                                 \
    } while(0)
#define STORECHAR (   CH)
Value:
do {                                        \
        p[ihi] = ((CH) >> 8) & 0xff;            \
        p[ilo] = (CH) & 0xff;                   \
        p += 2;                                 \
    } while(0)
#define STRIPNAME (   i)    (stripformat[i]+3)

Definition at line 8116 of file unicodeobject.c.

#define TEST_COND (   cond)    ((cond) ? Py_True : Py_False)

Definition at line 7291 of file unicodeobject.c.

#define TO_BASE64 (   n)    ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f])

Definition at line 2153 of file unicodeobject.c.


Typedef Documentation


Enumeration Type Documentation

Enumerator:
enc_SUCCESS 
enc_FAILED 
enc_EXCEPTION 

Definition at line 5606 of file unicodeobject.c.


Function Documentation

Definition at line 10134 of file unicodeobject.c.

{
    PyObject *keys;
    PyUnicodeObject *s;
    Py_ssize_t i, n;
    Py_ssize_t immortal_size = 0, mortal_size = 0;

    if (interned == NULL || !PyDict_Check(interned))
        return;
    keys = PyDict_Keys(interned);
    if (keys == NULL || !PyList_Check(keys)) {
        PyErr_Clear();
        return;
    }

    /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak
       detector, interned unicode strings are not forcibly deallocated;
       rather, we give them their stolen references back, and then clear
       and DECREF the interned dict. */

    n = PyList_GET_SIZE(keys);
    fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
            n);
    for (i = 0; i < n; i++) {
        s = (PyUnicodeObject *) PyList_GET_ITEM(keys, i);
        switch (s->state) {
        case SSTATE_NOT_INTERNED:
            /* XXX Shouldn't happen */
            break;
        case SSTATE_INTERNED_IMMORTAL:
            Py_REFCNT(s) += 1;
            immortal_size += s->length;
            break;
        case SSTATE_INTERNED_MORTAL:
            Py_REFCNT(s) += 2;
            mortal_size += s->length;
            break;
        default:
            Py_FatalError("Inconsistent interned string state.");
        }
        s->state = SSTATE_NOT_INTERNED;
    }
    fprintf(stderr, "total size of all interned strings: "
            "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
            "mortal/immortal\n", mortal_size, immortal_size);
    Py_DECREF(keys);
    PyDict_Clear(interned);
    Py_DECREF(interned);
    interned = NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* _PyUnicode_AsDefaultEncodedString ( PyObject unicode,
const char *  errors 
)

Definition at line 1800 of file unicodeobject.c.

{
    PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
    if (v)
        return v;
    if (errors != NULL)
        Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString");
    v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
                             PyUnicode_GET_SIZE(unicode),
                             NULL);
    if (!v)
        return NULL;
    ((PyUnicodeObject *)unicode)->defenc = v;
    return v;
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* _PyUnicode_AsString ( PyObject unicode)

Definition at line 1967 of file unicodeobject.c.

{
    return _PyUnicode_AsStringAndSize(unicode, NULL);
}

Here is the call graph for this function:

char* _PyUnicode_AsStringAndSize ( PyObject unicode,
Py_ssize_t psize 
)

Definition at line 1951 of file unicodeobject.c.

{
    PyObject *bytes;
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    bytes = _PyUnicode_AsDefaultEncodedString(unicode, NULL);
    if (bytes == NULL)
        return NULL;
    if (psize != NULL)
        *psize = PyBytes_GET_SIZE(bytes);
    return PyBytes_AS_STRING(bytes);
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* _PyUnicode_DecodeUnicodeInternal ( const char *  s,
Py_ssize_t  size,
const char *  errors 
)

Definition at line 4381 of file unicodeobject.c.

{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    const char *end;
    const char *reason;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

#ifdef Py_UNICODE_WIDE
    Py_UNICODE unimax = PyUnicode_GetMax();
#endif

    /* XXX overflow detection missing */
    v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE);
    if (v == NULL)
        goto onError;
    if (PyUnicode_GetSize((PyObject *)v) == 0)
        return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    end = s + size;

    while (s < end) {
        memcpy(p, s, sizeof(Py_UNICODE));
        /* We have to sanity check the raw data, otherwise doom looms for
           some malformed UCS-4 data. */
        if (
#ifdef Py_UNICODE_WIDE
            *p > unimax || *p < 0 ||
#endif
            end-s < Py_UNICODE_SIZE
            )
        {
            startinpos = s - starts;
            if (end-s < Py_UNICODE_SIZE) {
                endinpos = end-starts;
                reason = "truncated input";
            }
            else {
                endinpos = s - starts + Py_UNICODE_SIZE;
                reason = "illegal code point (> 0x10FFFF)";
            }
            outpos = p - PyUnicode_AS_UNICODE(v);
            if (unicode_decode_call_errorhandler(
                    errors, &errorHandler,
                    "unicode_internal", reason,
                    &starts, &end, &startinpos, &endinpos, &exc, &s,
                    &v, &outpos, &p)) {
                goto onError;
            }
        }
        else {
            p++;
            s += Py_UNICODE_SIZE;
        }
    }

    if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
        goto onError;
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)v;

  onError:
    Py_XDECREF(v);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 10050 of file unicodeobject.c.

Here is the call graph for this function:

Definition at line 9991 of file unicodeobject.c.

{
    int i;

    /* XXX - move this array to unicodectype.c ? */
    Py_UNICODE linebreak[] = {
        0x000A, /* LINE FEED */
        0x000D, /* CARRIAGE RETURN */
        0x001C, /* FILE SEPARATOR */
        0x001D, /* GROUP SEPARATOR */
        0x001E, /* RECORD SEPARATOR */
        0x0085, /* NEXT LINE */
        0x2028, /* LINE SEPARATOR */
        0x2029, /* PARAGRAPH SEPARATOR */
    };

    /* Init the implementation */
    free_list = NULL;
    numfree = 0;
    unicode_empty = _PyUnicode_New(0);
    if (!unicode_empty)
        return;

    for (i = 0; i < 256; i++)
        unicode_latin1[i] = NULL;
    if (PyType_Ready(&PyUnicode_Type) < 0)
        Py_FatalError("Can't initialize 'unicode'");

    /* initialize the linebreak bloom filter */
    bloom_linebreak = make_bloom_mask(
        linebreak, sizeof(linebreak) / sizeof(linebreak[0])
        );

    PyType_Ready(&EncodingMapType);
}

Here is the call graph for this function:

static PyUnicodeObject* _PyUnicode_New ( Py_ssize_t  length) [static]

Definition at line 315 of file unicodeobject.c.

{
    register PyUnicodeObject *unicode;

    /* Optimization for empty strings */
    if (length == 0 && unicode_empty != NULL) {
        Py_INCREF(unicode_empty);
        return unicode_empty;
    }

    /* Ensure we won't overflow the size. */
    if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {
        return (PyUnicodeObject *)PyErr_NoMemory();
    }

    /* Unicode freelist & memory allocation */
    if (free_list) {
        unicode = free_list;
        free_list = *(PyUnicodeObject **)unicode;
        numfree--;
        if (unicode->str) {
            /* Keep-Alive optimization: we only upsize the buffer,
               never downsize it. */
            if ((unicode->length < length) &&
                unicode_resize(unicode, length) < 0) {
                PyObject_DEL(unicode->str);
                unicode->str = NULL;
            }
        }
        else {
            size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
            unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
        }
        PyObject_INIT(unicode, &PyUnicode_Type);
    }
    else {
        size_t new_size;
        unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
        if (unicode == NULL)
            return NULL;
        new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
        unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
    }

    if (!unicode->str) {
        PyErr_NoMemory();
        goto onError;
    }
    /* Initialize the first element to guard against cases where
     * the caller fails before initializing str -- unicode_resize()
     * reads str[0], and the Keep-Alive optimization can keep memory
     * allocated for str alive across a call to unicode_dealloc(unicode).
     * We don't want unicode_resize to read uninitialized memory in
     * that case.
     */
    unicode->str[0] = 0;
    unicode->str[length] = 0;
    unicode->length = length;
    unicode->hash = -1;
    unicode->state = 0;
    unicode->defenc = NULL;
    return unicode;

  onError:
    /* XXX UNREF/NEWREF interface should be more symmetrical */
    _Py_DEC_REFTOTAL;
    _Py_ForgetReference((PyObject *)unicode);
    PyObject_Del(unicode);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int _PyUnicode_Resize ( PyUnicodeObject **  unicode,
Py_ssize_t  length 
) [static]

Definition at line 432 of file unicodeobject.c.

{
    register PyUnicodeObject *v;

    /* Argument checks */
    if (unicode == NULL) {
        PyErr_BadInternalCall();
        return -1;
    }
    v = *unicode;
    if (v == NULL || !PyUnicode_Check(v) || Py_REFCNT(v) != 1 || length < 0) {
        PyErr_BadInternalCall();
        return -1;
    }

    /* Resizing unicode_empty and single character objects is not
       possible since these are being shared. We simply return a fresh
       copy with the same Unicode content. */
    if (v->length != length &&
        (v == unicode_empty || v->length == 1)) {
        PyUnicodeObject *w = _PyUnicode_New(length);
        if (w == NULL)
            return -1;
        Py_UNICODE_COPY(w->str, v->str,
                        length < v->length ? length : v->length);
        Py_DECREF(*unicode);
        *unicode = w;
        return 0;
    }

    /* Note that we don't have to modify *unicode for unshared Unicode
       objects, since we can modify them in-place. */
    return unicode_resize(v, length);
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* _PyUnicode_XStrip ( PyUnicodeObject self,
int  striptype,
PyObject sepobj 
)

Definition at line 8120 of file unicodeobject.c.

{
    Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
    Py_ssize_t len = PyUnicode_GET_SIZE(self);
    Py_UNICODE *sep = PyUnicode_AS_UNICODE(sepobj);
    Py_ssize_t seplen = PyUnicode_GET_SIZE(sepobj);
    Py_ssize_t i, j;

    BLOOM_MASK sepmask = make_bloom_mask(sep, seplen);

    i = 0;
    if (striptype != RIGHTSTRIP) {
        while (i < len && BLOOM_MEMBER(sepmask, s[i], sep, seplen)) {
            i++;
        }
    }

    j = len;
    if (striptype != LEFTSTRIP) {
        do {
            j--;
        } while (j >= i && BLOOM_MEMBER(sepmask, s[j], sep, seplen));
        j++;
    }

    if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
        Py_INCREF(self);
        return (PyObject*)self;
    }
    else
        return PyUnicode_FromUnicode(s+i, j-i);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int charmap_encoding_error ( const Py_UNICODE p,
Py_ssize_t  size,
Py_ssize_t inpos,
PyObject mapping,
PyObject **  exceptionObject,
int known_errorHandler,
PyObject **  errorHandler,
const char *  errors,
PyObject **  res,
Py_ssize_t respos 
) [static]

Definition at line 5674 of file unicodeobject.c.

{
    PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
    Py_ssize_t repsize;
    Py_ssize_t newpos;
    Py_UNICODE *uni2;
    /* startpos for collecting unencodable chars */
    Py_ssize_t collstartpos = *inpos;
    Py_ssize_t collendpos = *inpos+1;
    Py_ssize_t collpos;
    char *encoding = "charmap";
    char *reason = "character maps to <undefined>";
    charmapencode_result x;

    /* find all unencodable characters */
    while (collendpos < size) {
        PyObject *rep;
        if (Py_TYPE(mapping) == &EncodingMapType) {
            int res = encoding_map_lookup(p[collendpos], mapping);
            if (res != -1)
                break;
            ++collendpos;
            continue;
        }

        rep = charmapencode_lookup(p[collendpos], mapping);
        if (rep==NULL)
            return -1;
        else if (rep!=Py_None) {
            Py_DECREF(rep);
            break;
        }
        Py_DECREF(rep);
        ++collendpos;
    }
    /* cache callback name lookup
     * (if not done yet, i.e. it's the first error) */
    if (*known_errorHandler==-1) {
        if ((errors==NULL) || (!strcmp(errors, "strict")))
            *known_errorHandler = 1;
        else if (!strcmp(errors, "replace"))
            *known_errorHandler = 2;
        else if (!strcmp(errors, "ignore"))
            *known_errorHandler = 3;
        else if (!strcmp(errors, "xmlcharrefreplace"))
            *known_errorHandler = 4;
        else
            *known_errorHandler = 0;
    }
    switch (*known_errorHandler) {
    case 1: /* strict */
        raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
        return -1;
    case 2: /* replace */
        for (collpos = collstartpos; collpos<collendpos; ++collpos) {
            x = charmapencode_output('?', mapping, res, respos);
            if (x==enc_EXCEPTION) {
                return -1;
            }
            else if (x==enc_FAILED) {
                raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
                return -1;
            }
        }
        /* fall through */
    case 3: /* ignore */
        *inpos = collendpos;
        break;
    case 4: /* xmlcharrefreplace */
        /* generate replacement (temporarily (mis)uses p) */
        for (collpos = collstartpos; collpos < collendpos; ++collpos) {
            char buffer[2+29+1+1];
            char *cp;
            sprintf(buffer, "&#%d;", (int)p[collpos]);
            for (cp = buffer; *cp; ++cp) {
                x = charmapencode_output(*cp, mapping, res, respos);
                if (x==enc_EXCEPTION)
                    return -1;
                else if (x==enc_FAILED) {
                    raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
                    return -1;
                }
            }
        }
        *inpos = collendpos;
        break;
    default:
        repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
                                                      encoding, reason, p, size, exceptionObject,
                                                      collstartpos, collendpos, &newpos);
        if (repunicode == NULL)
            return -1;
        if (PyBytes_Check(repunicode)) {
            /* Directly copy bytes result to output. */
            Py_ssize_t outsize = PyBytes_Size(*res);
            Py_ssize_t requiredsize;
            repsize = PyBytes_Size(repunicode);
            requiredsize = *respos + repsize;
            if (requiredsize > outsize)
                /* Make room for all additional bytes. */
                if (charmapencode_resize(res, respos, requiredsize)) {
                    Py_DECREF(repunicode);
                    return -1;
                }
            memcpy(PyBytes_AsString(*res) + *respos,
                   PyBytes_AsString(repunicode),  repsize);
            *respos += repsize;
            *inpos = newpos;
            Py_DECREF(repunicode);
            break;
        }
        /* generate replacement  */
        repsize = PyUnicode_GET_SIZE(repunicode);
        for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
            x = charmapencode_output(*uni2, mapping, res, respos);
            if (x==enc_EXCEPTION) {
                return -1;
            }
            else if (x==enc_FAILED) {
                Py_DECREF(repunicode);
                raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
                return -1;
            }
        }
        *inpos = newpos;
        Py_DECREF(repunicode);
    }
    return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static PyObject* charmapencode_lookup ( Py_UNICODE  c,
PyObject mapping 
) [static]

Definition at line 5551 of file unicodeobject.c.

{
    PyObject *w = PyLong_FromLong((long)c);
    PyObject *x;

    if (w == NULL)
        return NULL;
    x = PyObject_GetItem(mapping, w);
    Py_DECREF(w);
    if (x == NULL) {
        if (PyErr_ExceptionMatches(PyExc_LookupError)) {
            /* No mapping found means: mapping is undefined. */
            PyErr_Clear();
            x = Py_None;
            Py_INCREF(x);
            return x;
        } else
            return NULL;
    }
    else if (x == Py_None)
        return x;
    else if (PyLong_Check(x)) {
        long value = PyLong_AS_LONG(x);
        if (value < 0 || value > 255) {
            PyErr_SetString(PyExc_TypeError,
                            "character mapping must be in range(256)");
            Py_DECREF(x);
            return NULL;
        }
        return x;
    }
    else if (PyBytes_Check(x))
        return x;
    else {
        /* wrong return value */
        PyErr_Format(PyExc_TypeError,
                     "character mapping must return integer, bytes or None, not %.400s",
                     x->ob_type->tp_name);
        Py_DECREF(x);
        return NULL;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static charmapencode_result charmapencode_output ( Py_UNICODE  c,
PyObject mapping,
PyObject **  outobj,
Py_ssize_t outpos 
) [static]

Definition at line 5616 of file unicodeobject.c.

{
    PyObject *rep;
    char *outstart;
    Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);

    if (Py_TYPE(mapping) == &EncodingMapType) {
        int res = encoding_map_lookup(c, mapping);
        Py_ssize_t requiredsize = *outpos+1;
        if (res == -1)
            return enc_FAILED;
        if (outsize<requiredsize)
            if (charmapencode_resize(outobj, outpos, requiredsize))
                return enc_EXCEPTION;
        outstart = PyBytes_AS_STRING(*outobj);
        outstart[(*outpos)++] = (char)res;
        return enc_SUCCESS;
    }

    rep = charmapencode_lookup(c, mapping);
    if (rep==NULL)
        return enc_EXCEPTION;
    else if (rep==Py_None) {
        Py_DECREF(rep);
        return enc_FAILED;
    } else {
        if (PyLong_Check(rep)) {
            Py_ssize_t requiredsize = *outpos+1;
            if (outsize<requiredsize)
                if (charmapencode_resize(outobj, outpos, requiredsize)) {
                    Py_DECREF(rep);
                    return enc_EXCEPTION;
                }
            outstart = PyBytes_AS_STRING(*outobj);
            outstart[(*outpos)++] = (char)PyLong_AS_LONG(rep);
        }
        else {
            const char *repchars = PyBytes_AS_STRING(rep);
            Py_ssize_t repsize = PyBytes_GET_SIZE(rep);
            Py_ssize_t requiredsize = *outpos+repsize;
            if (outsize<requiredsize)
                if (charmapencode_resize(outobj, outpos, requiredsize)) {
                    Py_DECREF(rep);
                    return enc_EXCEPTION;
                }
            outstart = PyBytes_AS_STRING(*outobj);
            memcpy(outstart + *outpos, repchars, repsize);
            *outpos += repsize;
        }
    }
    Py_DECREF(rep);
    return enc_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int charmapencode_resize ( PyObject **  outobj,
Py_ssize_t outpos,
Py_ssize_t  requiredsize 
) [static]

Definition at line 5595 of file unicodeobject.c.

{
    Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
    /* exponentially overallocate to minimize reallocations */
    if (requiredsize < 2*outsize)
        requiredsize = 2*outsize;
    if (_PyBytes_Resize(outobj, requiredsize))
        return -1;
    return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int charmaptranslate_lookup ( Py_UNICODE  c,
PyObject mapping,
PyObject **  result 
) [static]

Definition at line 5981 of file unicodeobject.c.

{
    PyObject *w = PyLong_FromLong((long)c);
    PyObject *x;

    if (w == NULL)
        return -1;
    x = PyObject_GetItem(mapping, w);
    Py_DECREF(w);
    if (x == NULL) {
        if (PyErr_ExceptionMatches(PyExc_LookupError)) {
            /* No mapping found means: use 1:1 mapping. */
            PyErr_Clear();
            *result = NULL;
            return 0;
        } else
            return -1;
    }
    else if (x == Py_None) {
        *result = x;
        return 0;
    }
    else if (PyLong_Check(x)) {
        long value = PyLong_AS_LONG(x);
        long max = PyUnicode_GetMax();
        if (value < 0 || value > max) {
            PyErr_Format(PyExc_TypeError,
                         "character mapping must be in range(0x%x)", max+1);
            Py_DECREF(x);
            return -1;
        }
        *result = x;
        return 0;
    }
    else if (PyUnicode_Check(x)) {
        *result = x;
        return 0;
    }
    else {
        /* wrong return value */
        PyErr_SetString(PyExc_TypeError,
                        "character mapping must return integer, None or str");
        Py_DECREF(x);
        return -1;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int charmaptranslate_makespace ( PyObject **  outobj,
Py_UNICODE **  outp,
Py_ssize_t  requiredsize 
) [static]

Definition at line 6031 of file unicodeobject.c.

{
    Py_ssize_t oldsize = PyUnicode_GET_SIZE(*outobj);
    if (requiredsize > oldsize) {
        /* remember old output position */
        Py_ssize_t outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
        /* exponentially overallocate to minimize reallocations */
        if (requiredsize < 2 * oldsize)
            requiredsize = 2 * oldsize;
        if (PyUnicode_Resize(outobj, requiredsize) < 0)
            return -1;
        *outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
    }
    return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int charmaptranslate_output ( const Py_UNICODE startinp,
const Py_UNICODE curinp,
Py_ssize_t  insize,
PyObject mapping,
PyObject **  outobj,
Py_UNICODE **  outp,
PyObject **  res 
) [static]

Definition at line 6054 of file unicodeobject.c.

{
    if (charmaptranslate_lookup(*curinp, mapping, res))
        return -1;
    if (*res==NULL) {
        /* not found => default to 1:1 mapping */
        *(*outp)++ = *curinp;
    }
    else if (*res==Py_None)
        ;
    else if (PyLong_Check(*res)) {
        /* no overflow check, because we know that the space is enough */
        *(*outp)++ = (Py_UNICODE)PyLong_AS_LONG(*res);
    }
    else if (PyUnicode_Check(*res)) {
        Py_ssize_t repsize = PyUnicode_GET_SIZE(*res);
        if (repsize==1) {
            /* no overflow check, because we know that the space is enough */
            *(*outp)++ = *PyUnicode_AS_UNICODE(*res);
        }
        else if (repsize!=0) {
            /* more than one character */
            Py_ssize_t requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) +
                (insize - (curinp-startinp)) +
                repsize - 1;
            if (charmaptranslate_makespace(outobj, outp, requiredsize))
                return -1;
            memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
            *outp += repsize;
        }
    }
    else
        return -1;
    return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int convert_uc ( PyObject obj,
void addr 
) [static]

Definition at line 7126 of file unicodeobject.c.

{
    Py_UNICODE *fillcharloc = (Py_UNICODE *)addr;
    PyObject *uniobj;
    Py_UNICODE *unistr;

    uniobj = PyUnicode_FromObject(obj);
    if (uniobj == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "The fill character cannot be converted to Unicode");
        return 0;
    }
    if (PyUnicode_GET_SIZE(uniobj) != 1) {
        PyErr_SetString(PyExc_TypeError,
                        "The fill character must be exactly one character long");
        Py_DECREF(uniobj);
        return 0;
    }
    unistr = PyUnicode_AS_UNICODE(uniobj);
    *fillcharloc = unistr[0];
    Py_DECREF(uniobj);
    return 1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static Py_UCS4 decode_ucs4 ( const Py_UNICODE s,
Py_ssize_t i,
Py_ssize_t  size 
) [static]

Definition at line 7976 of file unicodeobject.c.

{
    Py_UCS4 ch;
    assert(*i < size);
    ch = s[(*i)++];
#ifndef Py_UNICODE_WIDE
    if ((ch & 0xfffffc00) == 0xd800 &&
        *i < size
        && (s[*i] & 0xFFFFFC00) == 0xDC00)
        ch = ((Py_UCS4)ch << 10UL) + (Py_UCS4)(s[(*i)++]) - 0x35fdc00;
#endif
    return ch;
}

Here is the caller graph for this function:

static PyObject* do_argstrip ( PyUnicodeObject self,
int  striptype,
PyObject args 
) [static]

Definition at line 8185 of file unicodeobject.c.

{
    PyObject *sep = NULL;

    if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
        return NULL;

    if (sep != NULL && sep != Py_None) {
        if (PyUnicode_Check(sep))
            return _PyUnicode_XStrip(self, striptype, sep);
        else {
            PyErr_Format(PyExc_TypeError,
                         "%s arg must be None or str",
                         STRIPNAME(striptype));
            return NULL;
        }
    }

    return do_strip(self, striptype);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static PyObject* do_strip ( PyUnicodeObject self,
int  striptype 
) [static]

Definition at line 8155 of file unicodeobject.c.

{
    Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
    Py_ssize_t len = PyUnicode_GET_SIZE(self), i, j;

    i = 0;
    if (striptype != RIGHTSTRIP) {
        while (i < len && Py_UNICODE_ISSPACE(s[i])) {
            i++;
        }
    }

    j = len;
    if (striptype != LEFTSTRIP) {
        do {
            j--;
        } while (j >= i && Py_UNICODE_ISSPACE(s[j]));
        j++;
    }

    if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
        Py_INCREF(self);
        return (PyObject*)self;
    }
    else
        return PyUnicode_FromUnicode(s+i, j-i);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void encoding_map_dealloc ( PyObject o) [static]

Definition at line 5358 of file unicodeobject.c.

{
    PyObject_FREE(o);
}
static int encoding_map_lookup ( Py_UNICODE  c,
PyObject mapping 
) [static]

Definition at line 5515 of file unicodeobject.c.

{
    struct encoding_map *map = (struct encoding_map*)mapping;
    int l1 = c>>11;
    int l2 = (c>>7) & 0xF;
    int l3 = c & 0x7F;
    int i;

#ifdef Py_UNICODE_WIDE
    if (c > 0xFFFF) {
        return -1;
    }
#endif
    if (c == 0)
        return 0;
    /* level 1*/
    i = map->level1[l1];
    if (i == 0xFF) {
        return -1;
    }
    /* level 2*/
    i = map->level23[16*i+l2];
    if (i == 0xFF) {
        return -1;
    }
    /* level 3 */
    i = map->level23[16*map->count2 + 128*i + l3];
    if (i == 0) {
        return -1;
    }
    return i;
}

Here is the caller graph for this function:

static PyObject* encoding_map_size ( PyObject obj,
PyObject args 
) [static]

Definition at line 5344 of file unicodeobject.c.

{
    struct encoding_map *map = (struct encoding_map*)obj;
    return PyLong_FromLong(sizeof(*map) - 1 + 16*map->count2 +
                           128*map->count3);
}

Here is the call graph for this function:

static int fixcapitalize ( PyUnicodeObject self) [static]

Definition at line 6653 of file unicodeobject.c.

{
    Py_ssize_t len = self->length;
    Py_UNICODE *s = self->str;
    int status = 0;

    if (len == 0)
        return 0;
    if (Py_UNICODE_ISLOWER(*s)) {
        *s = Py_UNICODE_TOUPPER(*s);
        status = 1;
    }
    s++;
    while (--len > 0) {
        if (Py_UNICODE_ISUPPER(*s)) {
            *s = Py_UNICODE_TOLOWER(*s);
            status = 1;
        }
        s++;
    }
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int fixlower ( PyUnicodeObject self) [static]

Definition at line 6611 of file unicodeobject.c.

{
    Py_ssize_t len = self->length;
    Py_UNICODE *s = self->str;
    int status = 0;

    while (len-- > 0) {
        register Py_UNICODE ch;

        ch = Py_UNICODE_TOLOWER(*s);
        if (ch != *s) {
            status = 1;
            *s = ch;
        }
        s++;
    }

    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int fixswapcase ( PyUnicodeObject self) [static]

Definition at line 6632 of file unicodeobject.c.

{
    Py_ssize_t len = self->length;
    Py_UNICODE *s = self->str;
    int status = 0;

    while (len-- > 0) {
        if (Py_UNICODE_ISUPPER(*s)) {
            *s = Py_UNICODE_TOLOWER(*s);
            status = 1;
        } else if (Py_UNICODE_ISLOWER(*s)) {
            *s = Py_UNICODE_TOUPPER(*s);
            status = 1;
        }
        s++;
    }

    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int fixtitle ( PyUnicodeObject self) [static]

Definition at line 6677 of file unicodeobject.c.

{
    register Py_UNICODE *p = PyUnicode_AS_UNICODE(self);
    register Py_UNICODE *e;
    int previous_is_cased;

    /* Shortcut for single character strings */
    if (PyUnicode_GET_SIZE(self) == 1) {
        Py_UNICODE ch = Py_UNICODE_TOTITLE(*p);
        if (*p != ch) {
            *p = ch;
            return 1;
        }
        else
            return 0;
    }

    e = p + PyUnicode_GET_SIZE(self);
    previous_is_cased = 0;
    for (; p < e; p++) {
        register const Py_UNICODE ch = *p;

        if (previous_is_cased)
            *p = Py_UNICODE_TOLOWER(ch);
        else
            *p = Py_UNICODE_TOTITLE(ch);

        if (Py_UNICODE_ISLOWER(ch) ||
            Py_UNICODE_ISUPPER(ch) ||
            Py_UNICODE_ISTITLE(ch))
            previous_is_cased = 1;
        else
            previous_is_cased = 0;
    }
    return 1;
}

Here is the caller graph for this function:

static PyObject* fixup ( PyUnicodeObject self,
int(*)(PyUnicodeObject *s fixfct 
) [static]

Definition at line 6566 of file unicodeobject.c.

{

    PyUnicodeObject *u;

    u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length);
    if (u == NULL)
        return NULL;

    Py_UNICODE_COPY(u->str, self->str, self->length);

    if (!fixfct(u) && PyUnicode_CheckExact(self)) {
        /* fixfct should return TRUE if it modified the buffer. If
           FALSE, return a reference to the original buffer instead
           (to save space, not time) */
        Py_INCREF(self);
        Py_DECREF(u);
        return (PyObject*) self;
    }
    return (PyObject*) u;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int fixupper ( PyUnicodeObject self) [static]

Definition at line 6590 of file unicodeobject.c.

{
    Py_ssize_t len = self->length;
    Py_UNICODE *s = self->str;
    int status = 0;

    while (len-- > 0) {
        register Py_UNICODE ch;

        ch = Py_UNICODE_TOUPPER(*s);
        if (ch != *s) {
            status = 1;
            *s = ch;
        }
        s++;
    }

    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int formatchar ( Py_UNICODE buf,
size_t  buflen,
PyObject v 
) [static]

Definition at line 9380 of file unicodeobject.c.

{
    /* presume that the buffer is at least 3 characters long */
    if (PyUnicode_Check(v)) {
        if (PyUnicode_GET_SIZE(v) == 1) {
            buf[0] = PyUnicode_AS_UNICODE(v)[0];
            buf[1] = '\0';
            return 1;
        }
#ifndef Py_UNICODE_WIDE
        if (PyUnicode_GET_SIZE(v) == 2) {
            /* Decode a valid surrogate pair */
            int c0 = PyUnicode_AS_UNICODE(v)[0];
            int c1 = PyUnicode_AS_UNICODE(v)[1];
            if (0xD800 <= c0 && c0 <= 0xDBFF &&
                0xDC00 <= c1 && c1 <= 0xDFFF) {
                buf[0] = c0;
                buf[1] = c1;
                buf[2] = '\0';
                return 2;
            }
        }
#endif
        goto onError;
    }
    else {
        /* Integer input truncated to a character */
        long x;
        x = PyLong_AsLong(v);
        if (x == -1 && PyErr_Occurred())
            goto onError;

        if (x < 0 || x > 0x10ffff) {
            PyErr_SetString(PyExc_OverflowError,
                            "%c arg not in range(0x110000)");
            return -1;
        }

#ifndef Py_UNICODE_WIDE
        if (x > 0xffff) {
            x -= 0x10000;
            buf[0] = (Py_UNICODE)(0xD800 | (x >> 10));
            buf[1] = (Py_UNICODE)(0xDC00 | (x & 0x3FF));
            return 2;
        }
#endif
        buf[0] = (Py_UNICODE) x;
        buf[1] = '\0';
        return 1;
    }

  onError:
    PyErr_SetString(PyExc_TypeError,
                    "%c requires int or char");
    return -1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static PyObject* formatfloat ( PyObject v,
int  flags,
int  prec,
int  type 
) [static]

Definition at line 9341 of file unicodeobject.c.

{
    char *p;
    PyObject *result;
    double x;

    x = PyFloat_AsDouble(v);
    if (x == -1.0 && PyErr_Occurred())
        return NULL;

    if (prec < 0)
        prec = 6;

    p = PyOS_double_to_string(x, type, prec,
                              (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
    if (p == NULL)
        return NULL;
    result = PyUnicode_FromStringAndSize(p, strlen(p));
    PyMem_Free(p);
    return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static PyObject* formatlong ( PyObject val,
int  flags,
int  prec,
int  type 
) [static]

Definition at line 9364 of file unicodeobject.c.

{
    char *buf;
    int len;
    PyObject *str; /* temporary string object. */
    PyObject *result;

    str = _PyBytes_FormatLong(val, flags, prec, type, &buf, &len);
    if (!str)
        return NULL;
    result = PyUnicode_FromStringAndSize(buf, len);
    Py_DECREF(str);
    return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static PyObject* getnextarg ( PyObject args,
Py_ssize_t  arglen,
Py_ssize_t p_argidx 
) [static]

Definition at line 9323 of file unicodeobject.c.

{
    Py_ssize_t argidx = *p_argidx;
    if (argidx < arglen) {
        (*p_argidx)++;
        if (arglen < 0)
            return args;
        else
            return PyTuple_GetItem(args, argidx);
    }
    PyErr_SetString(PyExc_TypeError,
                    "not enough arguments for format string");
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void make_decode_exception ( PyObject **  exceptionObject,
const char *  encoding,
const char *  input,
Py_ssize_t  length,
Py_ssize_t  startpos,
Py_ssize_t  endpos,
const char *  reason 
) [static]

Definition at line 2003 of file unicodeobject.c.

{
    if (*exceptionObject == NULL) {
        *exceptionObject = PyUnicodeDecodeError_Create(
            encoding, input, length, startpos, endpos, reason);
    }
    else {
        if (PyUnicodeDecodeError_SetStart(*exceptionObject, startpos))
            goto onError;
        if (PyUnicodeDecodeError_SetEnd(*exceptionObject, endpos))
            goto onError;
        if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason))
            goto onError;
    }
    return;

onError:
    Py_DECREF(*exceptionObject);
    *exceptionObject = NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void make_encode_exception ( PyObject **  exceptionObject,
const char *  encoding,
const Py_UNICODE unicode,
Py_ssize_t  size,
Py_ssize_t  startpos,
Py_ssize_t  endpos,
const char *  reason 
) [static]

Definition at line 4501 of file unicodeobject.c.

{
    if (*exceptionObject == NULL) {
        *exceptionObject = PyUnicodeEncodeError_Create(
            encoding, unicode, size, startpos, endpos, reason);
    }
    else {
        if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos))
            goto onError;
        if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos))
            goto onError;
        if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason))
            goto onError;
        return;
      onError:
        Py_DECREF(*exceptionObject);
        *exceptionObject = NULL;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void make_translate_exception ( PyObject **  exceptionObject,
const Py_UNICODE unicode,
Py_ssize_t  size,
Py_ssize_t  startpos,
Py_ssize_t  endpos,
const char *  reason 
) [static]

Definition at line 5886 of file unicodeobject.c.

{
    if (*exceptionObject == NULL) {
        *exceptionObject = PyUnicodeTranslateError_Create(
            unicode, size, startpos, endpos, reason);
    }
    else {
        if (PyUnicodeTranslateError_SetStart(*exceptionObject, startpos))
            goto onError;
        if (PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos))
            goto onError;
        if (PyUnicodeTranslateError_SetReason(*exceptionObject, reason))
            goto onError;
        return;
      onError:
        Py_DECREF(*exceptionObject);
        *exceptionObject = NULL;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int normalize_encoding ( const char *  encoding,
char *  lower,
size_t  lower_len 
) [static]

Definition at line 1438 of file unicodeobject.c.

{
    const char *e;
    char *l;
    char *l_end;

    e = encoding;
    l = lower;
    l_end = &lower[lower_len - 1];
    while (*e) {
        if (l == l_end)
            return 0;
        if (Py_ISUPPER(*e)) {
            *l++ = Py_TOLOWER(*e++);
        }
        else if (*e == '_') {
            *l++ = '-';
            e++;
        }
        else {
            *l++ = *e++;
        }
    }
    *l = '\0';
    return 1;
}

Here is the caller graph for this function:

static PyUnicodeObject* pad ( PyUnicodeObject self,
Py_ssize_t  left,
Py_ssize_t  right,
Py_UNICODE  fill 
) [static]

Definition at line 6828 of file unicodeobject.c.

{
    PyUnicodeObject *u;

    if (left < 0)
        left = 0;
    if (right < 0)
        right = 0;

    if (left == 0 && right == 0 && PyUnicode_CheckExact(self)) {
        Py_INCREF(self);
        return self;
    }

    if (left > PY_SSIZE_T_MAX - self->length ||
        right > PY_SSIZE_T_MAX - (left + self->length)) {
        PyErr_SetString(PyExc_OverflowError, "padded string is too long");
        return NULL;
    }
    u = _PyUnicode_New(left + self->length + right);
    if (u) {
        if (left)
            Py_UNICODE_FILL(u->str, fill, left);
        Py_UNICODE_COPY(u->str + left, self->str, self->length);
        if (right)
            Py_UNICODE_FILL(u->str + left + self->length, fill, right);
    }

    return u;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 226 of file unicodeobject.c.

{
    /* calculate simple bloom-style bitmask for a given unicode string */

    BLOOM_MASK mask;
    Py_ssize_t i;

    mask = 0;
    for (i = 0; i < len; i++)
        BLOOM_ADD(mask, ptr[i]);

    return mask;
}

Definition at line 240 of file unicodeobject.c.

{
    Py_ssize_t i;

    for (i = 0; i < setlen; i++)
        if (set[i] == chr)
            return 1;

    return 0;
}

Definition at line 3989 of file unicodeobject.c.

{
    /* like wcschr, but doesn't stop at NULL characters */

    while (size-- > 0) {
        if (*s == ch)
            return s;
        s++;
    }

    return NULL;
}

Definition at line 10331 of file unicodeobject.c.

{
    Py_UNICODE *u1 = s1;
    u1 += Py_UNICODE_strlen(u1);
    Py_UNICODE_strcpy(u1, s2);
    return s1;
}

Here is the call graph for this function:

Definition at line 10371 of file unicodeobject.c.

{
    const Py_UNICODE *p;
    for (p = s; *p; p++)
        if (*p == c)
            return (Py_UNICODE*)p;
    return NULL;
}

Here is the caller graph for this function:

Definition at line 10340 of file unicodeobject.c.

{
    while (*s1 && *s2 && *s1 == *s2)
        s1++, s2++;
    if (*s1 && *s2)
        return (*s1 < *s2) ? -1 : +1;
    if (*s1)
        return 1;
    if (*s2)
        return -1;
    return 0;
}

Here is the caller graph for this function:

Definition at line 10313 of file unicodeobject.c.

{
    Py_UNICODE *u = s1;
    while ((*u++ = *s2++));
    return s1;
}

Here is the caller graph for this function:

Definition at line 10304 of file unicodeobject.c.

{
    int res = 0;
    while(*u++)
        res++;
    return res;
}

Here is the caller graph for this function:

Definition at line 10354 of file unicodeobject.c.

{
    register Py_UNICODE u1, u2;
    for (; n != 0; n--) {
        u1 = *s1;
        u2 = *s2;
        if (u1 != u2)
            return (u1 < u2) ? -1 : +1;
        if (u1 == '\0')
            return 0;
        s1++;
        s2++;
    }
    return 0;
}

Here is the caller graph for this function:

Definition at line 10321 of file unicodeobject.c.

{
    Py_UNICODE *u = s1;
    while ((*u++ = *s2++))
        if (n-- == 0)
            break;
    return s1;
}

Here is the caller graph for this function:

Definition at line 10381 of file unicodeobject.c.

{
    const Py_UNICODE *p;
    p = s + Py_UNICODE_strlen(s);
    while (p != s) {
        p--;
        if (*p == c)
            return (Py_UNICODE*)p;
    }
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyDoc_STRVAR ( title__doc__  ,
"S.title() -> str\n\\n\Return a titlecased version of  S,
i.e.words start with title case\n\  characters,
all remaining cased characters have lower case."   
)
PyDoc_STRVAR ( capitalize__doc__  ,
"S.capitalize() -> str\n\\n\Return a capitalized version of  S,
i.e.make the first character\n\have upper case and the rest lower case."   
)
PyDoc_STRVAR ( center__doc__  ,
"S.center(width[, fillchar]) -> str\n\\n\Return S centered in a string of length width. Padding is\n\done using the specified fill character (default is a space)"   
)
PyDoc_STRVAR ( count__doc__  ,
"S.count(sub[, start[, end]]) -> int\n\\n\Return the number of non-overlapping occurrences of substring sub in\n\string S. Optional arguments start and end are\n\interpreted as in slice notation."  [start:end] 
)
PyDoc_STRVAR ( encode__doc__  ,
"S.encode(encoding='utf-8', errors='strict') -> bytes\n\\n\Encode S using the codec registered for encoding. Default encoding\n\is 'utf-8'. errors may be given to set a different error\n\handling scheme. Default is 'strict' meaning that encoding errors raise\n\a UnicodeEncodeError. Other possible values are 'ignore'  ,
'replace'and\n\'xmlcharrefreplace'as well as any other name registered with\n\codecs.register_error that can handle UnicodeEncodeErrors."   
)
PyDoc_STRVAR ( expandtabs__doc__  ,
"S.expandtabs([tabsize]) -> str\n\\n\Return a copy of S where all tab characters are expanded using spaces.\n\If tabsize is not  given,
a tab size of 8 characters is assumed."   
)
PyDoc_STRVAR ( find__doc__  ,
"S.find(sub[, start[, end]]) -> int\n\\n\Return the lowest index in S where substring sub is  found,
\n\such that sub is contained within S.Optional\n\arguments start and end are interpreted as in slice notation.\n\\n\Return-1 on failure."  [start:end] 
)
PyDoc_STRVAR ( index__doc__  ,
"S.index(sub[, start[, end]]) -> int\n\\n\Like S.find() but raise ValueError when the substring is not found."   
)
PyDoc_STRVAR ( islower__doc__  ,
"S.islower() -> bool\n\\n\Return True if all cased characters in S are lowercase and there is\n\at least one cased character in  S,
False otherwise."   
)
PyDoc_STRVAR ( isupper__doc__  ,
"S.isupper() -> bool\n\\n\Return True if all cased characters in S are uppercase and there is\n\at least one cased character in  S,
False otherwise."   
)
PyDoc_STRVAR ( istitle__doc__  ,
"S.istitle() -> bool\n\\n\Return True if S is a titlecased string and there is at least one\n\character in  S,
i.e.upper-and titlecase characters may only\n\follow uncased characters and lowercase characters only cased ones.\n\Return False otherwise."   
)
PyDoc_STRVAR ( isspace__doc__  ,
"S.isspace() -> bool\n\\n\Return True if all characters in S are whitespace\n\and there is at least one character in  S,
False otherwise."   
)
PyDoc_STRVAR ( isalpha__doc__  ,
"S.isalpha() -> bool\n\\n\Return True if all characters in S are alphabetic\n\and there is at least one character in  S,
False otherwise."   
)
PyDoc_STRVAR ( isalnum__doc__  ,
"S.isalnum() -> bool\n\\n\Return True if all characters in S are alphanumeric\n\and there is at least one character in  S,
False otherwise."   
)
PyDoc_STRVAR ( isdecimal__doc__  ,
"S.isdecimal() -> bool\n\\n\Return True if there are only decimal characters in  S,
\n\False otherwise."   
)
PyDoc_STRVAR ( isdigit__doc__  ,
"S.isdigit() -> bool\n\\n\Return True if all characters in S are digits\n\and there is at least one character in  S,
False otherwise."   
)
PyDoc_STRVAR ( isnumeric__doc__  ,
"S.isnumeric() -> bool\n\\n\Return True if there are only numeric characters in  S,
\n\False otherwise."   
)
PyDoc_STRVAR ( isidentifier__doc__  ,
"S.isidentifier() -> bool\n\\n\Return True if S is a valid identifier according\n\to the language definition."   
)
PyDoc_STRVAR ( isprintable__doc__  ,
"S.isprintable() -> bool\n\\n\Return True if all characters in S are considered\n\printable in repr() or S is  empty,
False otherwise."   
)
PyDoc_STRVAR ( join__doc__  ,
"S.join(iterable) -> str\n\\n\Return a string which is the concatenation of the strings in the\n\iterable. The separator between elements is S."   
)
PyDoc_STRVAR ( ljust__doc__  ,
"S.ljust(width[, fillchar]) -> str\n\\n\Return S left-justified in a Unicode string of length width. Padding is\n\done using the specified fill character (default is a space)."   
)
PyDoc_STRVAR ( lower__doc__  ,
"S.lower() -> str\n\\n\Return a copy of the string S converted to lowercase."   
)
PyDoc_STRVAR ( strip__doc__  ,
"S.strip([chars]) -> str\n\\n\Return a copy of the string S with leading and trailing\n\whitespace removed.\n\If chars is given and not  None,
remove characters in chars instead."   
)
PyDoc_STRVAR ( lstrip__doc__  ,
"S.lstrip([chars]) -> str\n\\n\Return a copy of the string S with leading whitespace removed.\n\If chars is given and not  None,
remove characters in chars instead."   
)
PyDoc_STRVAR ( rstrip__doc__  ,
"S.rstrip([chars]) -> str\n\\n\Return a copy of the string S with trailing whitespace removed.\n\If chars is given and not  None,
remove characters in chars instead."   
)
PyDoc_STRVAR ( replace__doc__  ,
"S.replace(old, new[, count]) -> str\n\\n\Return a copy of S with all occurrences of substring\n\old replaced by new. If the optional argument count is\n\  given,
only the first count occurrences are replaced."   
)
PyDoc_STRVAR ( rfind__doc__  ,
"S.rfind(sub[, start[, end]]) -> int\n\\n\Return the highest index in S where substring sub is  found,
\n\such that sub is contained within S.Optional\n\arguments start and end are interpreted as in slice notation.\n\\n\Return-1 on failure."  [start:end] 
)
PyDoc_STRVAR ( rindex__doc__  ,
"S.rindex(sub[, start[, end]]) -> int\n\\n\Like S.rfind() but raise ValueError when the substring is not found."   
)
PyDoc_STRVAR ( rjust__doc__  ,
"S.rjust(width[, fillchar]) -> str\n\\n\Return S right-justified in a string of length width. Padding is\n\done using the specified fill character (default is a space)."   
)
PyDoc_STRVAR ( split__doc__  ,
"S.split([sep[, maxsplit]]) -> list of strings\n\\n\Return a list of the words in  S,
using sep as the\n\delimiter string.If maxsplit is  given,
at most maxsplit\n\splits are done.If sep is not specified or is  None,
any\n\whitespace string is a separator and empty strings are\n\removed from the result."   
)
PyDoc_STRVAR ( partition__doc__  ,
"S.partition(sep) -> (head, sep, tail)\n\\n\Search for the separator sep in  S,
and return the part before  it,
\n\the separator  itself,
and the part after it.If the separator is not\n\  found,
return S and two empty strings."   
)
PyDoc_STRVAR ( rpartition__doc__  ,
"S.rpartition(sep) -> (head, sep, tail)\n\\n\Search for the separator sep in  S,
starting at the end of  S,
and return\n\the part before  it,
the separator  itself,
and the part after it.If the\n\separator is not  found,
return two empty strings and S."   
)
PyDoc_STRVAR ( rsplit__doc__  ,
"S.rsplit([sep[, maxsplit]]) -> list of strings\n\\n\Return a list of the words in  S,
using sep as the\n\delimiter  string,
starting at the end of the string and\n\working to the front.If maxsplit is  given,
at most maxsplit\n\splits are done.If sep is not  specified,
any whitespace string\n\is a separator."   
)
PyDoc_STRVAR ( splitlines__doc__  ,
"S.splitlines([keepends]) -> list of strings\n\\n\Return a list of the lines in  S,
breaking at line boundaries.\n\Line breaks are not included in the resulting list unless keepends\n\is given and true."   
)
PyDoc_STRVAR ( swapcase__doc__  ,
"S.swapcase() -> str\n\\n\Return a copy of S with uppercase characters converted to lowercase\n\and vice versa."   
)
PyDoc_STRVAR ( maketrans__doc__  ,
"str.maketrans(x[, y[, z]]) -> dict (static method)\n\\n\Return a translation table usable for str.translate().\n\If there is only one  argument,
it must be a dictionary mapping Unicode\n\ordinals(integers) or characters to Unicode  ordinals,
strings or None.\n\Character keys will be then converted to ordinals.\n\If there are two  arguments,
they must be strings of equal  length,
and\n\in the resulting  dictionary,
each character in x will be mapped to the\n\character at the same position in y.If there is a third  argument,
it\n\must be a  string,
whose characters will be mapped to None in the result."   
)
PyDoc_STRVAR ( translate__doc__  ,
"S.translate(table) -> str\n\\n\Return a copy of the string  S,
where all characters have been mapped\n\through the given translation  table,
which must be a mapping of\n\Unicode ordinals to Unicode  ordinals,
strings  ,
or None.\n\Unmapped characters are left untouched.Characters mapped to None\n\are deleted."   
)
PyDoc_STRVAR ( upper__doc__  ,
"S.upper() -> str\n\\n\Return a copy of S converted to uppercase."   
)
PyDoc_STRVAR ( zfill__doc__  ,
"S.zfill(width) -> str\n\\n\Pad a numeric string S with zeros on the  left,
to fill a field\n\of the specified width.The string S is never truncated."   
)
PyDoc_STRVAR ( startswith__doc__  ,
"S.startswith(prefix[, start[, end]]) -> bool\n\\n\Return True if S starts with the specified  prefix,
False otherwise.\n\With optional  start,
test S beginning at that position.\n\With optional  end,
stop comparing S at that position.\n\prefix can also be a tuple of strings to try."   
)
PyDoc_STRVAR ( endswith__doc__  ,
"S.endswith(suffix[, start[, end]]) -> bool\n\\n\Return True if S ends with the specified  suffix,
False otherwise.\n\With optional  start,
test S beginning at that position.\n\With optional  end,
stop comparing S at that position.\n\suffix can also be a tuple of strings to try."   
)
PyDoc_STRVAR ( format__doc__  ,
"S.format*, **kwargs -> str\n\\n\Return a formatted version of  S,
using substitutions from args and kwargs.\n\The substitutions are identified by braces('{'and '}')."   
)
PyDoc_STRVAR ( format_map__doc__  ,
"S.format_map(mapping) -> str\n\\n\Return a formatted version of  S,
using substitutions from mapping.\n\The substitutions are identified by braces('{'and '}')."   
)
PyDoc_STRVAR ( p_format__doc__  ,
"S.__format__(format_spec) -> str\n\\n\Return a formatted version of S as described by format_spec."   
)
PyDoc_STRVAR ( sizeof__doc__  ,
"S.__sizeof__() -> size of S in  memory,
in bytes  
)
PyDoc_STRVAR ( unicode_doc  ,
"str(string[, encoding[, errors]]) -> str\n\\n\Create a new string object from the given encoded string.\n\encoding defaults to the current default string encoding.\n\errors can be 'strict'  ,
'replace'or 'ignore'and defaults to 'strict'."   
)
PyDoc_STRVAR ( length_hint_doc  ,
"Private method returning an estimate of len(list(it))."   
)

Definition at line 10440 of file unicodeobject.c.

{
    return PyModule_Create(&_string_module);
}
void PyUnicode_Append ( PyObject **  pleft,
PyObject right 
)

Definition at line 7423 of file unicodeobject.c.

{
    PyObject *new;
    if (*pleft == NULL)
        return;
    if (right == NULL || !PyUnicode_Check(*pleft)) {
        Py_DECREF(*pleft);
        *pleft = NULL;
        return;
    }
    new = PyUnicode_Concat(*pleft, right);
    Py_DECREF(*pleft);
    *pleft = new;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void PyUnicode_AppendAndDel ( PyObject **  pleft,
PyObject right 
)

Definition at line 7439 of file unicodeobject.c.

{
    PyUnicode_Append(pleft, right);
    Py_XDECREF(right);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 4878 of file unicodeobject.c.

{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
                                 PyUnicode_GET_SIZE(unicode),
                                 NULL);
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_AsCharmapString ( PyObject unicode,
PyObject mapping 
)

Definition at line 5872 of file unicodeobject.c.

{
    if (!PyUnicode_Check(unicode) || mapping == NULL) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeCharmap(PyUnicode_AS_UNICODE(unicode),
                                   PyUnicode_GET_SIZE(unicode),
                                   mapping,
                                   NULL);
}

Here is the call graph for this function:

PyObject* PyUnicode_AsDecodedObject ( PyObject unicode,
const char *  encoding,
const char *  errors 
)

Definition at line 1523 of file unicodeobject.c.

{
    PyObject *v;

    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }

    if (encoding == NULL)
        encoding = PyUnicode_GetDefaultEncoding();

    /* Decode via the codec registry */
    v = PyCodec_Decode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    return v;

  onError:
    return NULL;
}

Here is the call graph for this function:

PyObject* PyUnicode_AsDecodedUnicode ( PyObject unicode,
const char *  encoding,
const char *  errors 
)

Definition at line 1547 of file unicodeobject.c.

{
    PyObject *v;

    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }

    if (encoding == NULL)
        encoding = PyUnicode_GetDefaultEncoding();

    /* Decode via the codec registry */
    v = PyCodec_Decode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    if (!PyUnicode_Check(v)) {
        PyErr_Format(PyExc_TypeError,
                     "decoder did not return a str object (type=%.400s)",
                     Py_TYPE(v)->tp_name);
        Py_DECREF(v);
        goto onError;
    }
    return v;

  onError:
    return NULL;
}

Here is the call graph for this function:

PyObject* PyUnicode_AsEncodedObject ( PyObject unicode,
const char *  encoding,
const char *  errors 
)

Definition at line 1593 of file unicodeobject.c.

{
    PyObject *v;

    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }

    if (encoding == NULL)
        encoding = PyUnicode_GetDefaultEncoding();

    /* Encode via the codec registry */
    v = PyCodec_Encode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    return v;

  onError:
    return NULL;
}

Here is the call graph for this function:

PyObject* PyUnicode_AsEncodedString ( PyObject unicode,
const char *  encoding,
const char *  errors 
)

Definition at line 1682 of file unicodeobject.c.

{
    PyObject *v;
    char lower[11];  /* Enough for any encoding shortcut */

    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }

    if (encoding == NULL)
        encoding = PyUnicode_GetDefaultEncoding();

    /* Shortcuts for common default encodings */
    if (normalize_encoding(encoding, lower, sizeof(lower))) {
        if (strcmp(lower, "utf-8") == 0)
            return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
                                        PyUnicode_GET_SIZE(unicode),
                                        errors);
        else if ((strcmp(lower, "latin-1") == 0) ||
                 (strcmp(lower, "iso-8859-1") == 0))
            return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
                                          PyUnicode_GET_SIZE(unicode),
                                          errors);
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
        else if (strcmp(lower, "mbcs") == 0)
            return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
                                        PyUnicode_GET_SIZE(unicode),
                                        errors);
#endif
        else if (strcmp(lower, "ascii") == 0)
            return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
                                         PyUnicode_GET_SIZE(unicode),
                                         errors);
    }
    /* During bootstrap, we may need to find the encodings
       package, to load the file system encoding, and require the
       file system encoding in order to load the encodings
       package.

       Break out of this dependency by assuming that the path to
       the encodings module is ASCII-only.  XXX could try wcstombs
       instead, if the file system encoding is the locale's
       encoding. */
    if (Py_FileSystemDefaultEncoding &&
             strcmp(encoding, Py_FileSystemDefaultEncoding) == 0 &&
             !PyThreadState_GET()->interp->codecs_initialized)
        return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
                                     PyUnicode_GET_SIZE(unicode),
                                     errors);

    /* Encode via the codec registry */
    v = PyCodec_Encode(unicode, encoding, errors);
    if (v == NULL)
        return NULL;

    /* The normal path */
    if (PyBytes_Check(v))
        return v;

    /* If the codec returns a buffer, raise a warning and convert to bytes */
    if (PyByteArray_Check(v)) {
        int error;
        PyObject *b;

        error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
            "encoder %s returned bytearray instead of bytes",
            encoding);
        if (error) {
            Py_DECREF(v);
            return NULL;
        }

        b = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
        Py_DECREF(v);
        return b;
    }

    PyErr_Format(PyExc_TypeError,
                 "encoder did not return a bytes object (type=%.400s)",
                 Py_TYPE(v)->tp_name);
    Py_DECREF(v);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_AsEncodedUnicode ( PyObject unicode,
const char *  encoding,
const char *  errors 
)

Definition at line 1769 of file unicodeobject.c.

{
    PyObject *v;

    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }

    if (encoding == NULL)
        encoding = PyUnicode_GetDefaultEncoding();

    /* Encode via the codec registry */
    v = PyCodec_Encode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    if (!PyUnicode_Check(v)) {
        PyErr_Format(PyExc_TypeError,
                     "encoder did not return an str object (type=%.400s)",
                     Py_TYPE(v)->tp_name);
        Py_DECREF(v);
        goto onError;
    }
    return v;

  onError:
    return NULL;
}

Here is the call graph for this function:

Definition at line 4799 of file unicodeobject.c.

{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
                                  PyUnicode_GET_SIZE(unicode),
                                  NULL);
}

Here is the call graph for this function:

Definition at line 4366 of file unicodeobject.c.

{
    PyObject *s;
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    s = PyUnicode_EncodeRawUnicodeEscape(PyUnicode_AS_UNICODE(unicode),
                                         PyUnicode_GET_SIZE(unicode));

    return s;
}

Here is the call graph for this function:

Definition at line 1972 of file unicodeobject.c.

{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }
    return PyUnicode_AS_UNICODE(unicode);

  onError:
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 10394 of file unicodeobject.c.

{
    PyUnicodeObject *unicode = (PyUnicodeObject *)object;
    Py_UNICODE *copy;
    Py_ssize_t size;

    /* Ensure we won't overflow the size. */
    if (PyUnicode_GET_SIZE(unicode) > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {
        PyErr_NoMemory();
        return NULL;
    }
    size = PyUnicode_GET_SIZE(unicode) + 1; /* copy the nul character */
    size *= sizeof(Py_UNICODE);
    copy = PyMem_Malloc(size);
    if (copy == NULL) {
        PyErr_NoMemory();
        return NULL;
    }
    memcpy(copy, PyUnicode_AS_UNICODE(unicode), size);
    return copy;
}

Here is the call graph for this function:

Definition at line 4147 of file unicodeobject.c.

{
    PyObject *s;
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    s = PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(unicode),
                                      PyUnicode_GET_SIZE(unicode));
    return s;
}

Here is the call graph for this function:

Definition at line 3732 of file unicodeobject.c.

{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(unicode),
                                 PyUnicode_GET_SIZE(unicode),
                                 NULL,
                                 0);
}

Here is the call graph for this function:

Definition at line 3342 of file unicodeobject.c.

{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(unicode),
                                 PyUnicode_GET_SIZE(unicode),
                                 NULL,
                                 0);
}

Here is the call graph for this function:

Definition at line 3068 of file unicodeobject.c.

{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
                                PyUnicode_GET_SIZE(unicode),
                                NULL);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 5408 of file unicodeobject.c.

{
    Py_UNICODE *decode;
    PyObject *result;
    struct encoding_map *mresult;
    int i;
    int need_dict = 0;
    unsigned char level1[32];
    unsigned char level2[512];
    unsigned char *mlevel1, *mlevel2, *mlevel3;
    int count2 = 0, count3 = 0;

    if (!PyUnicode_Check(string) || PyUnicode_GetSize(string) != 256) {
        PyErr_BadArgument();
        return NULL;
    }
    decode = PyUnicode_AS_UNICODE(string);
    memset(level1, 0xFF, sizeof level1);
    memset(level2, 0xFF, sizeof level2);

    /* If there isn't a one-to-one mapping of NULL to \0,
       or if there are non-BMP characters, we need to use
       a mapping dictionary. */
    if (decode[0] != 0)
        need_dict = 1;
    for (i = 1; i < 256; i++) {
        int l1, l2;
        if (decode[i] == 0
#ifdef Py_UNICODE_WIDE
            || decode[i] > 0xFFFF
#endif
            ) {
            need_dict = 1;
            break;
        }
        if (decode[i] == 0xFFFE)
            /* unmapped character */
            continue;
        l1 = decode[i] >> 11;
        l2 = decode[i] >> 7;
        if (level1[l1] == 0xFF)
            level1[l1] = count2++;
        if (level2[l2] == 0xFF)
            level2[l2] = count3++;
    }

    if (count2 >= 0xFF || count3 >= 0xFF)
        need_dict = 1;

    if (need_dict) {
        PyObject *result = PyDict_New();
        PyObject *key, *value;
        if (!result)
            return NULL;
        for (i = 0; i < 256; i++) {
            key = value = NULL;
            key = PyLong_FromLong(decode[i]);
            value = PyLong_FromLong(i);
            if (!key || !value)
                goto failed1;
            if (PyDict_SetItem(result, key, value) == -1)
                goto failed1;
            Py_DECREF(key);
            Py_DECREF(value);
        }
        return result;
      failed1:
        Py_XDECREF(key);
        Py_XDECREF(value);
        Py_DECREF(result);
        return NULL;
    }

    /* Create a three-level trie */
    result = PyObject_MALLOC(sizeof(struct encoding_map) +
                             16*count2 + 128*count3 - 1);
    if (!result)
        return PyErr_NoMemory();
    PyObject_Init(result, &EncodingMapType);
    mresult = (struct encoding_map*)result;
    mresult->count2 = count2;
    mresult->count3 = count3;
    mlevel1 = mresult->level1;
    mlevel2 = mresult->level23;
    mlevel3 = mresult->level23 + 16*count2;
    memcpy(mlevel1, level1, 32);
    memset(mlevel2, 0xFF, 16*count2);
    memset(mlevel3, 0, 128*count3);
    count3 = 0;
    for (i = 1; i < 256; i++) {
        int o1, o2, o3, i2, i3;
        if (decode[i] == 0xFFFE)
            /* unmapped character */
            continue;
        o1 = decode[i]>>11;
        o2 = (decode[i]>>7) & 0xF;
        i2 = 16*mlevel1[o1] + o2;
        if (mlevel2[i2] == 0xFF)
            mlevel2[i2] = count3++;
        o3 = decode[i] & 0x7F;
        i3 = 128*mlevel2[i2] + o3;
        mlevel3[i3] = i;
    }
    return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 10030 of file unicodeobject.c.

{
    int freelist_size = numfree;
    PyUnicodeObject *u;

    for (u = free_list; u != NULL;) {
        PyUnicodeObject *v = u;
        u = *(PyUnicodeObject **)u;
        if (v->str)
            PyObject_DEL(v->str);
        Py_XDECREF(v->defenc);
        PyObject_Del(v);
        numfree--;
    }
    free_list = NULL;
    assert(numfree == 0);
    return freelist_size;
}

Here is the caller graph for this function:

int PyUnicode_Compare ( PyObject left,
PyObject right 
)

Definition at line 7257 of file unicodeobject.c.

{
    if (PyUnicode_Check(left) && PyUnicode_Check(right))
        return unicode_compare((PyUnicodeObject *)left,
                               (PyUnicodeObject *)right);
    PyErr_Format(PyExc_TypeError,
                 "Can't compare %.100s and %.100s",
                 left->ob_type->tp_name,
                 right->ob_type->tp_name);
    return -1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PyUnicode_CompareWithASCIIString ( PyObject uni,
const char *  str 
)

Definition at line 7271 of file unicodeobject.c.

{
    int i;
    Py_UNICODE *id;
    assert(PyUnicode_Check(uni));
    id = PyUnicode_AS_UNICODE(uni);
    /* Compare Unicode string and source character set string */
    for (i = 0; id[i] && str[i]; i++)
        if (id[i] != str[i])
            return ((int)id[i] < (int)str[i]) ? -1 : 1;
    /* This check keeps Python strings that end in '\0' from comparing equal
     to C strings identical up to that point. */
    if (PyUnicode_GET_SIZE(uni) != i || id[i])
        return 1; /* uni is longer */
    if (str[i])
        return -1; /* str is longer */
    return 0;
}

Here is the caller graph for this function:

PyObject* PyUnicode_Concat ( PyObject left,
PyObject right 
)

Definition at line 7382 of file unicodeobject.c.

{
    PyUnicodeObject *u = NULL, *v = NULL, *w;

    /* Coerce the two arguments */
    u = (PyUnicodeObject *)PyUnicode_FromObject(left);
    if (u == NULL)
        goto onError;
    v = (PyUnicodeObject *)PyUnicode_FromObject(right);
    if (v == NULL)
        goto onError;

    /* Shortcuts */
    if (v == unicode_empty) {
        Py_DECREF(v);
        return (PyObject *)u;
    }
    if (u == unicode_empty) {
        Py_DECREF(u);
        return (PyObject *)v;
    }

    /* Concat the two Unicode strings */
    w = _PyUnicode_New(u->length + v->length);
    if (w == NULL)
        goto onError;
    Py_UNICODE_COPY(w->str, u->str, u->length);
    Py_UNICODE_COPY(w->str + u->length, v->str, v->length);

    Py_DECREF(u);
    Py_DECREF(v);
    return (PyObject *)w;

  onError:
    Py_XDECREF(u);
    Py_XDECREF(v);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PyUnicode_Contains ( PyObject container,
PyObject element 
)

Definition at line 7351 of file unicodeobject.c.

{
    PyObject *str, *sub;
    int result;

    /* Coerce the two arguments */
    sub = PyUnicode_FromObject(element);
    if (!sub) {
        PyErr_Format(PyExc_TypeError,
                     "'in <string>' requires string as left operand, not %s",
                     element->ob_type->tp_name);
        return -1;
    }

    str = PyUnicode_FromObject(container);
    if (!str) {
        Py_DECREF(sub);
        return -1;
    }

    result = stringlib_contains_obj(str, sub);

    Py_DECREF(str);
    Py_DECREF(sub);

    return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Py_ssize_t PyUnicode_Count ( PyObject str,
PyObject substr,
Py_ssize_t  start,
Py_ssize_t  end 
)

Definition at line 6445 of file unicodeobject.c.

{
    Py_ssize_t result;
    PyUnicodeObject* str_obj;
    PyUnicodeObject* sub_obj;

    str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str);
    if (!str_obj)
        return -1;
    sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr);
    if (!sub_obj) {
        Py_DECREF(str_obj);
        return -1;
    }

    ADJUST_INDICES(start, end, str_obj->length);
    result = stringlib_count(
        str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
        PY_SSIZE_T_MAX
        );

    Py_DECREF(sub_obj);
    Py_DECREF(str_obj);

    return result;
}

Here is the call graph for this function:

PyObject* PyUnicode_Decode ( const char *  s,
Py_ssize_t  size,
const char *  encoding,
const char *  errors 
)

Definition at line 1467 of file unicodeobject.c.

{
    PyObject *buffer = NULL, *unicode;
    Py_buffer info;
    char lower[11];  /* Enough for any encoding shortcut */

    if (encoding == NULL)
        encoding = PyUnicode_GetDefaultEncoding();

    /* Shortcuts for common default encodings */
    if (normalize_encoding(encoding, lower, sizeof(lower))) {
        if (strcmp(lower, "utf-8") == 0)
            return PyUnicode_DecodeUTF8(s, size, errors);
        else if ((strcmp(lower, "latin-1") == 0) ||
                 (strcmp(lower, "iso-8859-1") == 0))
            return PyUnicode_DecodeLatin1(s, size, errors);
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
        else if (strcmp(lower, "mbcs") == 0)
            return PyUnicode_DecodeMBCS(s, size, errors);
#endif
        else if (strcmp(lower, "ascii") == 0)
            return PyUnicode_DecodeASCII(s, size, errors);
        else if (strcmp(lower, "utf-16") == 0)
            return PyUnicode_DecodeUTF16(s, size, errors, 0);
        else if (strcmp(lower, "utf-32") == 0)
            return PyUnicode_DecodeUTF32(s, size, errors, 0);
    }

    /* Decode via the codec registry */
    buffer = NULL;
    if (PyBuffer_FillInfo(&info, NULL, (void *)s, size, 1, PyBUF_FULL_RO) < 0)
        goto onError;
    buffer = PyMemoryView_FromBuffer(&info);
    if (buffer == NULL)
        goto onError;
    unicode = PyCodec_Decode(buffer, encoding, errors);
    if (unicode == NULL)
        goto onError;
    if (!PyUnicode_Check(unicode)) {
        PyErr_Format(PyExc_TypeError,
                     "decoder did not return a str object (type=%.400s)",
                     Py_TYPE(unicode)->tp_name);
        Py_DECREF(unicode);
        goto onError;
    }
    Py_DECREF(buffer);
    return unicode;

  onError:
    Py_XDECREF(buffer);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeASCII ( const char *  s,
Py_ssize_t  size,
const char *  errors 
)

Definition at line 4812 of file unicodeobject.c.

{
    const char *starts = s;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    const char *e;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    /* ASCII is equivalent to the first 128 ordinals in Unicode. */
    if (size == 1 && *(unsigned char*)s < 128) {
        Py_UNICODE r = *(unsigned char*)s;
        return PyUnicode_FromUnicode(&r, 1);
    }

    v = _PyUnicode_New(size);
    if (v == NULL)
        goto onError;
    if (size == 0)
        return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    e = s + size;
    while (s < e) {
        register unsigned char c = (unsigned char)*s;
        if (c < 128) {
            *p++ = c;
            ++s;
        }
        else {
            startinpos = s-starts;
            endinpos = startinpos + 1;
            outpos = p - (Py_UNICODE *)PyUnicode_AS_UNICODE(v);
            if (unicode_decode_call_errorhandler(
                    errors, &errorHandler,
                    "ascii", "ordinal not in range(128)",
                    &starts, &e, &startinpos, &endinpos, &exc, &s,
                    &v, &outpos, &p))
                goto onError;
        }
    }
    if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
        if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
            goto onError;
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)v;

  onError:
    Py_XDECREF(v);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeCharmap ( const char *  s,
Py_ssize_t  size,
PyObject mapping,
const char *  errors 
)

Definition at line 5173 of file unicodeobject.c.

{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    const char *e;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    Py_ssize_t extrachars = 0;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;
    Py_UNICODE *mapstring = NULL;
    Py_ssize_t maplen = 0;

    /* Default to Latin-1 */
    if (mapping == NULL)
        return PyUnicode_DecodeLatin1(s, size, errors);

    v = _PyUnicode_New(size);
    if (v == NULL)
        goto onError;
    if (size == 0)
        return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    e = s + size;
    if (PyUnicode_CheckExact(mapping)) {
        mapstring = PyUnicode_AS_UNICODE(mapping);
        maplen = PyUnicode_GET_SIZE(mapping);
        while (s < e) {
            unsigned char ch = *s;
            Py_UNICODE x = 0xfffe; /* illegal value */

            if (ch < maplen)
                x = mapstring[ch];

            if (x == 0xfffe) {
                /* undefined mapping */
                outpos = p-PyUnicode_AS_UNICODE(v);
                startinpos = s-starts;
                endinpos = startinpos+1;
                if (unicode_decode_call_errorhandler(
                        errors, &errorHandler,
                        "charmap", "character maps to <undefined>",
                        &starts, &e, &startinpos, &endinpos, &exc, &s,
                        &v, &outpos, &p)) {
                    goto onError;
                }
                continue;
            }
            *p++ = x;
            ++s;
        }
    }
    else {
        while (s < e) {
            unsigned char ch = *s;
            PyObject *w, *x;

            /* Get mapping (char ordinal -> integer, Unicode char or None) */
            w = PyLong_FromLong((long)ch);
            if (w == NULL)
                goto onError;
            x = PyObject_GetItem(mapping, w);
            Py_DECREF(w);
            if (x == NULL) {
                if (PyErr_ExceptionMatches(PyExc_LookupError)) {
                    /* No mapping found means: mapping is undefined. */
                    PyErr_Clear();
                    x = Py_None;
                    Py_INCREF(x);
                } else
                    goto onError;
            }

            /* Apply mapping */
            if (PyLong_Check(x)) {
                long value = PyLong_AS_LONG(x);
                if (value < 0 || value > 65535) {
                    PyErr_SetString(PyExc_TypeError,
                                    "character mapping must be in range(65536)");
                    Py_DECREF(x);
                    goto onError;
                }
                *p++ = (Py_UNICODE)value;
            }
            else if (x == Py_None) {
                /* undefined mapping */
                outpos = p-PyUnicode_AS_UNICODE(v);
                startinpos = s-starts;
                endinpos = startinpos+1;
                if (unicode_decode_call_errorhandler(
                        errors, &errorHandler,
                        "charmap", "character maps to <undefined>",
                        &starts, &e, &startinpos, &endinpos, &exc, &s,
                        &v, &outpos, &p)) {
                    Py_DECREF(x);
                    goto onError;
                }
                Py_DECREF(x);
                continue;
            }
            else if (PyUnicode_Check(x)) {
                Py_ssize_t targetsize = PyUnicode_GET_SIZE(x);

                if (targetsize == 1)
                    /* 1-1 mapping */
                    *p++ = *PyUnicode_AS_UNICODE(x);

                else if (targetsize > 1) {
                    /* 1-n mapping */
                    if (targetsize > extrachars) {
                        /* resize first */
                        Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v);
                        Py_ssize_t needed = (targetsize - extrachars) + \
                            (targetsize << 2);
                        extrachars += needed;
                        /* XXX overflow detection missing */
                        if (_PyUnicode_Resize(&v,
                                              PyUnicode_GET_SIZE(v) + needed) < 0) {
                            Py_DECREF(x);
                            goto onError;
                        }
                        p = PyUnicode_AS_UNICODE(v) + oldpos;
                    }
                    Py_UNICODE_COPY(p,
                                    PyUnicode_AS_UNICODE(x),
                                    targetsize);
                    p += targetsize;
                    extrachars -= targetsize;
                }
                /* 1-0 mapping: skip the character */
            }
            else {
                /* wrong return value */
                PyErr_SetString(PyExc_TypeError,
                                "character mapping must return integer, None or str");
                Py_DECREF(x);
                goto onError;
            }
            Py_DECREF(x);
            ++s;
        }
    }
    if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
        if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
            goto onError;
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)v;

  onError:
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    Py_XDECREF(v);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 1818 of file unicodeobject.c.

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 1824 of file unicodeobject.c.

{
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
    return PyUnicode_DecodeMBCS(s, size, NULL);
#elif defined(__APPLE__)
    return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
#else
    PyInterpreterState *interp = PyThreadState_GET()->interp;
    /* Bootstrap check: if the filesystem codec is implemented in Python, we
       cannot use it to encode and decode filenames before it is loaded. Load
       the Python codec requires to encode at least its own filename. Use the C
       version of the locale codec until the codec registry is initialized and
       the Python codec is loaded.

       Py_FileSystemDefaultEncoding is shared between all interpreters, we
       cannot only rely on it: check also interp->fscodec_initialized for
       subinterpreters. */
    if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
        return PyUnicode_Decode(s, size,
                                Py_FileSystemDefaultEncoding,
                                "surrogateescape");
    }
    else {
        /* locale encoding with surrogateescape */
        wchar_t *wchar;
        PyObject *unicode;
        size_t len;

        if (s[size] != '\0' || size != strlen(s)) {
            PyErr_SetString(PyExc_TypeError, "embedded NUL character");
            return NULL;
        }

        wchar = _Py_char2wchar(s, &len);
        if (wchar == NULL)
            return PyErr_NoMemory();

        unicode = PyUnicode_FromWideChar(wchar, len);
        PyMem_Free(wchar);
        return unicode;
    }
#endif
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeLatin1 ( const char *  s,
Py_ssize_t  size,
const char *  errors 
)

Definition at line 4459 of file unicodeobject.c.

{
    PyUnicodeObject *v;
    Py_UNICODE *p;
    const char *e, *unrolled_end;

    /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
    if (size == 1) {
        Py_UNICODE r = *(unsigned char*)s;
        return PyUnicode_FromUnicode(&r, 1);
    }

    v = _PyUnicode_New(size);
    if (v == NULL)
        goto onError;
    if (size == 0)
        return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    e = s + size;
    /* Unrolling the copy makes it much faster by reducing the looping
       overhead. This is similar to what many memcpy() implementations do. */
    unrolled_end = e - 4;
    while (s < unrolled_end) {
        p[0] = (unsigned char) s[0];
        p[1] = (unsigned char) s[1];
        p[2] = (unsigned char) s[2];
        p[3] = (unsigned char) s[3];
        s += 4;
        p += 4;
    }
    while (s < e)
        *p++ = (unsigned char) *s++;
    return (PyObject *)v;

  onError:
    Py_XDECREF(v);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeRawUnicodeEscape ( const char *  s,
Py_ssize_t  size,
const char *  errors 
)

Definition at line 4161 of file unicodeobject.c.

{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    const char *end;
    const char *bs;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    /* Escaped strings will always be longer than the resulting
       Unicode string, so we start with size here and then reduce the
       length after conversion to the true value. (But decoding error
       handler might have to resize the string) */
    v = _PyUnicode_New(size);
    if (v == NULL)
        goto onError;
    if (size == 0)
        return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    end = s + size;
    while (s < end) {
        unsigned char c;
        Py_UCS4 x;
        int i;
        int count;

        /* Non-escape characters are interpreted as Unicode ordinals */
        if (*s != '\\') {
            *p++ = (unsigned char)*s++;
            continue;
        }
        startinpos = s-starts;

        /* \u-escapes are only interpreted iff the number of leading
           backslashes if odd */
        bs = s;
        for (;s < end;) {
            if (*s != '\\')
                break;
            *p++ = (unsigned char)*s++;
        }
        if (((s - bs) & 1) == 0 ||
            s >= end ||
            (*s != 'u' && *s != 'U')) {
            continue;
        }
        p--;
        count = *s=='u' ? 4 : 8;
        s++;

        /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
        outpos = p-PyUnicode_AS_UNICODE(v);
        for (x = 0, i = 0; i < count; ++i, ++s) {
            c = (unsigned char)*s;
            if (!Py_ISXDIGIT(c)) {
                endinpos = s-starts;
                if (unicode_decode_call_errorhandler(
                        errors, &errorHandler,
                        "rawunicodeescape", "truncated \\uXXXX",
                        &starts, &end, &startinpos, &endinpos, &exc, &s,
                        &v, &outpos, &p))
                    goto onError;
                goto nextByte;
            }
            x = (x<<4) & ~0xF;
            if (c >= '0' && c <= '9')
                x += c - '0';
            else if (c >= 'a' && c <= 'f')
                x += 10 + c - 'a';
            else
                x += 10 + c - 'A';
        }
        if (x <= 0xffff)
            /* UCS-2 character */
            *p++ = (Py_UNICODE) x;
        else if (x <= 0x10ffff) {
            /* UCS-4 character. Either store directly, or as
               surrogate pair. */
#ifdef Py_UNICODE_WIDE
            *p++ = (Py_UNICODE) x;
#else
            x -= 0x10000L;
            *p++ = 0xD800 + (Py_UNICODE) (x >> 10);
            *p++ = 0xDC00 + (Py_UNICODE) (x & 0x03FF);
#endif
        } else {
            endinpos = s-starts;
            outpos = p-PyUnicode_AS_UNICODE(v);
            if (unicode_decode_call_errorhandler(
                    errors, &errorHandler,
                    "rawunicodeescape", "\\Uxxxxxxxx out of range",
                    &starts, &end, &startinpos, &endinpos, &exc, &s,
                    &v, &outpos, &p))
                goto onError;
        }
      nextByte:
        ;
    }
    if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
        goto onError;
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)v;

  onError:
    Py_XDECREF(v);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeUnicodeEscape ( const char *  s,
Py_ssize_t  size,
const char *  errors 
)

Definition at line 3748 of file unicodeobject.c.

{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    int i;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    const char *end;
    char* message;
    Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    /* Escaped strings will always be longer than the resulting
       Unicode string, so we start with size here and then reduce the
       length after conversion to the true value.
       (but if the error callback returns a long replacement string
       we'll have to allocate more space) */
    v = _PyUnicode_New(size);
    if (v == NULL)
        goto onError;
    if (size == 0)
        return (PyObject *)v;

    p = PyUnicode_AS_UNICODE(v);
    end = s + size;

    while (s < end) {
        unsigned char c;
        Py_UNICODE x;
        int digits;

        /* Non-escape characters are interpreted as Unicode ordinals */
        if (*s != '\\') {
            *p++ = (unsigned char) *s++;
            continue;
        }

        startinpos = s-starts;
        /* \ - Escapes */
        s++;
        c = *s++;
        if (s > end)
            c = '\0'; /* Invalid after \ */
        switch (c) {

            /* \x escapes */
        case '\n': break;
        case '\\': *p++ = '\\'; break;
        case '\'': *p++ = '\''; break;
        case '\"': *p++ = '\"'; break;
        case 'b': *p++ = '\b'; break;
        case 'f': *p++ = '\014'; break; /* FF */
        case 't': *p++ = '\t'; break;
        case 'n': *p++ = '\n'; break;
        case 'r': *p++ = '\r'; break;
        case 'v': *p++ = '\013'; break; /* VT */
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */

            /* \OOO (octal) escapes */
        case '0': case '1': case '2': case '3':
        case '4': case '5': case '6': case '7':
            x = s[-1] - '0';
            if (s < end && '0' <= *s && *s <= '7') {
                x = (x<<3) + *s++ - '0';
                if (s < end && '0' <= *s && *s <= '7')
                    x = (x<<3) + *s++ - '0';
            }
            *p++ = x;
            break;

            /* hex escapes */
            /* \xXX */
        case 'x':
            digits = 2;
            message = "truncated \\xXX escape";
            goto hexescape;

            /* \uXXXX */
        case 'u':
            digits = 4;
            message = "truncated \\uXXXX escape";
            goto hexescape;

            /* \UXXXXXXXX */
        case 'U':
            digits = 8;
            message = "truncated \\UXXXXXXXX escape";
        hexescape:
            chr = 0;
            outpos = p-PyUnicode_AS_UNICODE(v);
            if (s+digits>end) {
                endinpos = size;
                if (unicode_decode_call_errorhandler(
                        errors, &errorHandler,
                        "unicodeescape", "end of string in escape sequence",
                        &starts, &end, &startinpos, &endinpos, &exc, &s,
                        &v, &outpos, &p))
                    goto onError;
                goto nextByte;
            }
            for (i = 0; i < digits; ++i) {
                c = (unsigned char) s[i];
                if (!Py_ISXDIGIT(c)) {
                    endinpos = (s+i+1)-starts;
                    if (unicode_decode_call_errorhandler(
                            errors, &errorHandler,
                            "unicodeescape", message,
                            &starts, &end, &startinpos, &endinpos, &exc, &s,
                            &v, &outpos, &p))
                        goto onError;
                    goto nextByte;
                }
                chr = (chr<<4) & ~0xF;
                if (c >= '0' && c <= '9')
                    chr += c - '0';
                else if (c >= 'a' && c <= 'f')
                    chr += 10 + c - 'a';
                else
                    chr += 10 + c - 'A';
            }
            s += i;
            if (chr == 0xffffffff && PyErr_Occurred())
                /* _decoding_error will have already written into the
                   target buffer. */
                break;
        store:
            /* when we get here, chr is a 32-bit unicode character */
            if (chr <= 0xffff)
                /* UCS-2 character */
                *p++ = (Py_UNICODE) chr;
            else if (chr <= 0x10ffff) {
                /* UCS-4 character. Either store directly, or as
                   surrogate pair. */
#ifdef Py_UNICODE_WIDE
                *p++ = chr;
#else
                chr -= 0x10000L;
                *p++ = 0xD800 + (Py_UNICODE) (chr >> 10);
                *p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
#endif
            } else {
                endinpos = s-starts;
                outpos = p-PyUnicode_AS_UNICODE(v);
                if (unicode_decode_call_errorhandler(
                        errors, &errorHandler,
                        "unicodeescape", "illegal Unicode character",
                        &starts, &end, &startinpos, &endinpos, &exc, &s,
                        &v, &outpos, &p))
                    goto onError;
            }
            break;

            /* \N{name} */
        case 'N':
            message = "malformed \\N character escape";
            if (ucnhash_CAPI == NULL) {
                /* load the unicode data module */
                ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(PyUnicodeData_CAPSULE_NAME, 1);
                if (ucnhash_CAPI == NULL)
                    goto ucnhashError;
            }
            if (*s == '{') {
                const char *start = s+1;
                /* look for the closing brace */
                while (*s != '}' && s < end)
                    s++;
                if (s > start && s < end && *s == '}') {
                    /* found a name.  look it up in the unicode database */
                    message = "unknown Unicode character name";
                    s++;
                    if (ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1), &chr))
                        goto store;
                }
            }
            endinpos = s-starts;
            outpos = p-PyUnicode_AS_UNICODE(v);
            if (unicode_decode_call_errorhandler(
                    errors, &errorHandler,
                    "unicodeescape", message,
                    &starts, &end, &startinpos, &endinpos, &exc, &s,
                    &v, &outpos, &p))
                goto onError;
            break;

        default:
            if (s > end) {
                message = "\\ at end of string";
                s--;
                endinpos = s-starts;
                outpos = p-PyUnicode_AS_UNICODE(v);
                if (unicode_decode_call_errorhandler(
                        errors, &errorHandler,
                        "unicodeescape", message,
                        &starts, &end, &startinpos, &endinpos, &exc, &s,
                        &v, &outpos, &p))
                    goto onError;
            }
            else {
                *p++ = '\\';
                *p++ = (unsigned char)s[-1];
            }
            break;
        }
      nextByte:
        ;
    }
    if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
        goto onError;
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)v;

  ucnhashError:
    PyErr_SetString(
        PyExc_UnicodeError,
        "\\N escapes not supported (can't load unicodedata module)"
        );
    Py_XDECREF(v);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;

  onError:
    Py_XDECREF(v);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeUTF16 ( const char *  s,
Py_ssize_t  size,
const char *  errors,
int byteorder 
)

Definition at line 3357 of file unicodeobject.c.

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeUTF16Stateful ( const char *  s,
Py_ssize_t  size,
const char *  errors,
int byteorder,
Py_ssize_t consumed 
)

Definition at line 3383 of file unicodeobject.c.

{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
    const unsigned char *q, *e, *aligned_end;
    int bo = 0;       /* assume native ordering by default */
    int native_ordering = 0;
    const char *errmsg = "";
    /* Offsets from q for retrieving byte pairs in the right order. */
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
    int ihi = 1, ilo = 0;
#else
    int ihi = 0, ilo = 1;
#endif
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    /* Note: size will always be longer than the resulting Unicode
       character count */
    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;
    if (size == 0)
        return (PyObject *)unicode;

    /* Unpack UTF-16 encoded data */
    p = unicode->str;
    q = (unsigned char *)s;
    e = q + size - 1;

    if (byteorder)
        bo = *byteorder;

    /* Check for BOM marks (U+FEFF) in the input and adjust current
       byte order setting accordingly. In native mode, the leading BOM
       mark is skipped, in all other modes, it is copied to the output
       stream as-is (giving a ZWNBSP character). */
    if (bo == 0) {
        if (size >= 2) {
            const Py_UNICODE bom = (q[ihi] << 8) | q[ilo];
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
            if (bom == 0xFEFF) {
                q += 2;
                bo = -1;
            }
            else if (bom == 0xFFFE) {
                q += 2;
                bo = 1;
            }
#else
            if (bom == 0xFEFF) {
                q += 2;
                bo = 1;
            }
            else if (bom == 0xFFFE) {
                q += 2;
                bo = -1;
            }
#endif
        }
    }

    if (bo == -1) {
        /* force LE */
        ihi = 1;
        ilo = 0;
    }
    else if (bo == 1) {
        /* force BE */
        ihi = 0;
        ilo = 1;
    }
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
    native_ordering = ilo < ihi;
#else
    native_ordering = ilo > ihi;
#endif

    aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
    while (q < e) {
        Py_UNICODE ch;
        /* First check for possible aligned read of a C 'long'. Unaligned
           reads are more expensive, better to defer to another iteration. */
        if (!((size_t) q & LONG_PTR_MASK)) {
            /* Fast path for runs of non-surrogate chars. */
            register const unsigned char *_q = q;
            Py_UNICODE *_p = p;
            if (native_ordering) {
                /* Native ordering is simple: as long as the input cannot
                   possibly contain a surrogate char, do an unrolled copy
                   of several 16-bit code points to the target object.
                   The non-surrogate check is done on several input bytes
                   at a time (as many as a C 'long' can contain). */
                while (_q < aligned_end) {
                    unsigned long data = * (unsigned long *) _q;
                    if (data & FAST_CHAR_MASK)
                        break;
                    _p[0] = ((unsigned short *) _q)[0];
                    _p[1] = ((unsigned short *) _q)[1];
#if (SIZEOF_LONG == 8)
                    _p[2] = ((unsigned short *) _q)[2];
                    _p[3] = ((unsigned short *) _q)[3];
#endif
                    _q += SIZEOF_LONG;
                    _p += SIZEOF_LONG / 2;
                }
            }
            else {
                /* Byteswapped ordering is similar, but we must decompose
                   the copy bytewise, and take care of zero'ing out the
                   upper bytes if the target object is in 32-bit units
                   (that is, in UCS-4 builds). */
                while (_q < aligned_end) {
                    unsigned long data = * (unsigned long *) _q;
                    if (data & SWAPPED_FAST_CHAR_MASK)
                        break;
                    /* Zero upper bytes in UCS-4 builds */
#if (Py_UNICODE_SIZE > 2)
                    _p[0] = 0;
                    _p[1] = 0;
#if (SIZEOF_LONG == 8)
                    _p[2] = 0;
                    _p[3] = 0;
#endif
#endif
                    /* Issue #4916; UCS-4 builds on big endian machines must
                       fill the two last bytes of each 4-byte unit. */
#if (!defined(BYTEORDER_IS_LITTLE_ENDIAN) && Py_UNICODE_SIZE > 2)
# define OFF 2
#else
# define OFF 0
#endif
                    ((unsigned char *) _p)[OFF + 1] = _q[0];
                    ((unsigned char *) _p)[OFF + 0] = _q[1];
                    ((unsigned char *) _p)[OFF + 1 + Py_UNICODE_SIZE] = _q[2];
                    ((unsigned char *) _p)[OFF + 0 + Py_UNICODE_SIZE] = _q[3];
#if (SIZEOF_LONG == 8)
                    ((unsigned char *) _p)[OFF + 1 + 2 * Py_UNICODE_SIZE] = _q[4];
                    ((unsigned char *) _p)[OFF + 0 + 2 * Py_UNICODE_SIZE] = _q[5];
                    ((unsigned char *) _p)[OFF + 1 + 3 * Py_UNICODE_SIZE] = _q[6];
                    ((unsigned char *) _p)[OFF + 0 + 3 * Py_UNICODE_SIZE] = _q[7];
#endif
#undef OFF
                    _q += SIZEOF_LONG;
                    _p += SIZEOF_LONG / 2;
                }
            }
            p = _p;
            q = _q;
            if (q >= e)
                break;
        }
        ch = (q[ihi] << 8) | q[ilo];

        q += 2;

        if (ch < 0xD800 || ch > 0xDFFF) {
            *p++ = ch;
            continue;
        }

        /* UTF-16 code pair: */
        if (q > e) {
            errmsg = "unexpected end of data";
            startinpos = (((const char *)q) - 2) - starts;
            endinpos = ((const char *)e) + 1 - starts;
            goto utf16Error;
        }
        if (0xD800 <= ch && ch <= 0xDBFF) {
            Py_UNICODE ch2 = (q[ihi] << 8) | q[ilo];
            q += 2;
            if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
#ifndef Py_UNICODE_WIDE
                *p++ = ch;
                *p++ = ch2;
#else
                *p++ = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
#endif
                continue;
            }
            else {
                errmsg = "illegal UTF-16 surrogate";
                startinpos = (((const char *)q)-4)-starts;
                endinpos = startinpos+2;
                goto utf16Error;
            }

        }
        errmsg = "illegal encoding";
        startinpos = (((const char *)q)-2)-starts;
        endinpos = startinpos+2;
        /* Fall through to report the error */

      utf16Error:
        outpos = p - PyUnicode_AS_UNICODE(unicode);
        if (unicode_decode_call_errorhandler(
                errors,
                &errorHandler,
                "utf16", errmsg,
                &starts,
                (const char **)&e,
                &startinpos,
                &endinpos,
                &exc,
                (const char **)&q,
                &unicode,
                &outpos,
                &p))
            goto onError;
    }
    /* remaining byte at the end? (size should be even) */
    if (e == q) {
        if (!consumed) {
            errmsg = "truncated data";
            startinpos = ((const char *)q) - starts;
            endinpos = ((const char *)e) + 1 - starts;
            outpos = p - PyUnicode_AS_UNICODE(unicode);
            if (unicode_decode_call_errorhandler(
                    errors,
                    &errorHandler,
                    "utf16", errmsg,
                    &starts,
                    (const char **)&e,
                    &startinpos,
                    &endinpos,
                    &exc,
                    (const char **)&q,
                    &unicode,
                    &outpos,
                    &p))
                goto onError;
            /* The remaining input chars are ignored if the callback
               chooses to skip the input */
        }
    }

    if (byteorder)
        *byteorder = bo;

    if (consumed)
        *consumed = (const char *)q-starts;

    /* Adjust length */
    if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
        goto onError;

    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)unicode;

  onError:
    Py_DECREF(unicode);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeUTF32 ( const char *  s,
Py_ssize_t  size,
const char *  errors,
int byteorder 
)

Definition at line 3082 of file unicodeobject.c.

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeUTF32Stateful ( const char *  s,
Py_ssize_t  size,
const char *  errors,
int byteorder,
Py_ssize_t consumed 
)

Definition at line 3091 of file unicodeobject.c.

{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
#ifndef Py_UNICODE_WIDE
    int pairs = 0;
    const unsigned char *qq;
#else
    const int pairs = 0;
#endif
    const unsigned char *q, *e;
    int bo = 0;       /* assume native ordering by default */
    const char *errmsg = "";
    /* Offsets from q for retrieving bytes in the right order. */
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
    int iorder[] = {0, 1, 2, 3};
#else
    int iorder[] = {3, 2, 1, 0};
#endif
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    q = (unsigned char *)s;
    e = q + size;

    if (byteorder)
        bo = *byteorder;

    /* Check for BOM marks (U+FEFF) in the input and adjust current
       byte order setting accordingly. In native mode, the leading BOM
       mark is skipped, in all other modes, it is copied to the output
       stream as-is (giving a ZWNBSP character). */
    if (bo == 0) {
        if (size >= 4) {
            const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
                (q[iorder[1]] << 8) | q[iorder[0]];
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
            if (bom == 0x0000FEFF) {
                q += 4;
                bo = -1;
            }
            else if (bom == 0xFFFE0000) {
                q += 4;
                bo = 1;
            }
#else
            if (bom == 0x0000FEFF) {
                q += 4;
                bo = 1;
            }
            else if (bom == 0xFFFE0000) {
                q += 4;
                bo = -1;
            }
#endif
        }
    }

    if (bo == -1) {
        /* force LE */
        iorder[0] = 0;
        iorder[1] = 1;
        iorder[2] = 2;
        iorder[3] = 3;
    }
    else if (bo == 1) {
        /* force BE */
        iorder[0] = 3;
        iorder[1] = 2;
        iorder[2] = 1;
        iorder[3] = 0;
    }

    /* On narrow builds we split characters outside the BMP into two
       codepoints => count how much extra space we need. */
#ifndef Py_UNICODE_WIDE
    for (qq = q; qq < e; qq += 4)
        if (qq[iorder[2]] != 0 || qq[iorder[3]] != 0)
            pairs++;
#endif

    /* This might be one to much, because of a BOM */
    unicode = _PyUnicode_New((size+3)/4+pairs);
    if (!unicode)
        return NULL;
    if (size == 0)
        return (PyObject *)unicode;

    /* Unpack UTF-32 encoded data */
    p = unicode->str;

    while (q < e) {
        Py_UCS4 ch;
        /* remaining bytes at the end? (size should be divisible by 4) */
        if (e-q<4) {
            if (consumed)
                break;
            errmsg = "truncated data";
            startinpos = ((const char *)q)-starts;
            endinpos = ((const char *)e)-starts;
            goto utf32Error;
            /* The remaining input chars are ignored if the callback
               chooses to skip the input */
        }
        ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
            (q[iorder[1]] << 8) | q[iorder[0]];

        if (ch >= 0x110000)
        {
            errmsg = "codepoint not in range(0x110000)";
            startinpos = ((const char *)q)-starts;
            endinpos = startinpos+4;
            goto utf32Error;
        }
#ifndef Py_UNICODE_WIDE
        if (ch >= 0x10000)
        {
            *p++ = 0xD800 | ((ch-0x10000) >> 10);
            *p++ = 0xDC00 | ((ch-0x10000) & 0x3FF);
        }
        else
#endif
            *p++ = ch;
        q += 4;
        continue;
      utf32Error:
        outpos = p-PyUnicode_AS_UNICODE(unicode);
        if (unicode_decode_call_errorhandler(
                errors, &errorHandler,
                "utf32", errmsg,
                &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
                &unicode, &outpos, &p))
            goto onError;
    }

    if (byteorder)
        *byteorder = bo;

    if (consumed)
        *consumed = (const char *)q-starts;

    /* Adjust length */
    if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
        goto onError;

    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)unicode;

  onError:
    Py_DECREF(unicode);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeUTF7 ( const char *  s,
Py_ssize_t  size,
const char *  errors 
)

Definition at line 2210 of file unicodeobject.c.

Here is the call graph for this function:

PyObject* PyUnicode_DecodeUTF7Stateful ( const char *  s,
Py_ssize_t  size,
const char *  errors,
Py_ssize_t consumed 
)

Definition at line 2224 of file unicodeobject.c.

{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    const char *e;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
    const char *errmsg = "";
    int inShift = 0;
    Py_UNICODE *shiftOutStart;
    unsigned int base64bits = 0;
    unsigned long base64buffer = 0;
    Py_UNICODE surrogate = 0;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;
    if (size == 0) {
        if (consumed)
            *consumed = 0;
        return (PyObject *)unicode;
    }

    p = unicode->str;
    shiftOutStart = p;
    e = s + size;

    while (s < e) {
        Py_UNICODE ch;
      restart:
        ch = (unsigned char) *s;

        if (inShift) { /* in a base-64 section */
            if (IS_BASE64(ch)) { /* consume a base-64 character */
                base64buffer = (base64buffer << 6) | FROM_BASE64(ch);
                base64bits += 6;
                s++;
                if (base64bits >= 16) {
                    /* we have enough bits for a UTF-16 value */
                    Py_UNICODE outCh = (Py_UNICODE)
                                       (base64buffer >> (base64bits-16));
                    base64bits -= 16;
                    base64buffer &= (1 << base64bits) - 1; /* clear high bits */
                    if (surrogate) {
                        /* expecting a second surrogate */
                        if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
#ifdef Py_UNICODE_WIDE
                            *p++ = (((surrogate & 0x3FF)<<10)
                                    | (outCh & 0x3FF)) + 0x10000;
#else
                            *p++ = surrogate;
                            *p++ = outCh;
#endif
                            surrogate = 0;
                        }
                        else {
                            surrogate = 0;
                            errmsg = "second surrogate missing";
                            goto utf7Error;
                        }
                    }
                    else if (outCh >= 0xD800 && outCh <= 0xDBFF) {
                        /* first surrogate */
                        surrogate = outCh;
                    }
                    else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
                        errmsg = "unexpected second surrogate";
                        goto utf7Error;
                    }
                    else {
                        *p++ = outCh;
                    }
                }
            }
            else { /* now leaving a base-64 section */
                inShift = 0;
                s++;
                if (surrogate) {
                    errmsg = "second surrogate missing at end of shift sequence";
                    goto utf7Error;
                }
                if (base64bits > 0) { /* left-over bits */
                    if (base64bits >= 6) {
                        /* We've seen at least one base-64 character */
                        errmsg = "partial character in shift sequence";
                        goto utf7Error;
                    }
                    else {
                        /* Some bits remain; they should be zero */
                        if (base64buffer != 0) {
                            errmsg = "non-zero padding bits in shift sequence";
                            goto utf7Error;
                        }
                    }
                }
                if (ch != '-') {
                    /* '-' is absorbed; other terminating
                       characters are preserved */
                    *p++ = ch;
                }
            }
        }
        else if ( ch == '+' ) {
            startinpos = s-starts;
            s++; /* consume '+' */
            if (s < e && *s == '-') { /* '+-' encodes '+' */
                s++;
                *p++ = '+';
            }
            else { /* begin base64-encoded section */
                inShift = 1;
                shiftOutStart = p;
                base64bits = 0;
            }
        }
        else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
            *p++ = ch;
            s++;
        }
        else {
            startinpos = s-starts;
            s++;
            errmsg = "unexpected special character";
            goto utf7Error;
        }
        continue;
utf7Error:
        outpos = p-PyUnicode_AS_UNICODE(unicode);
        endinpos = s-starts;
        if (unicode_decode_call_errorhandler(
                errors, &errorHandler,
                "utf7", errmsg,
                &starts, &e, &startinpos, &endinpos, &exc, &s,
                &unicode, &outpos, &p))
            goto onError;
    }

    /* end of string */

    if (inShift && !consumed) { /* in shift sequence, no more to follow */
        /* if we're in an inconsistent state, that's an error */
        if (surrogate ||
                (base64bits >= 6) ||
                (base64bits > 0 && base64buffer != 0)) {
            outpos = p-PyUnicode_AS_UNICODE(unicode);
            endinpos = size;
            if (unicode_decode_call_errorhandler(
                    errors, &errorHandler,
                    "utf7", "unterminated shift sequence",
                    &starts, &e, &startinpos, &endinpos, &exc, &s,
                    &unicode, &outpos, &p))
                goto onError;
            if (s < e)
                goto restart;
        }
    }

    /* return state */
    if (consumed) {
        if (inShift) {
            p = shiftOutStart; /* back off output */
            *consumed = startinpos;
        }
        else {
            *consumed = s-starts;
        }
    }

    if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0)
        goto onError;

    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)unicode;

  onError:
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    Py_DECREF(unicode);
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PyObject* PyUnicode_DecodeUTF8 ( const char *  s,
Py_ssize_t  size,
const char *  errors 
)

Definition at line 2539 of file unicodeobject.c.

Here is the call graph for this function: