Back to index

python3.2  3.2.2
_sre.c
Go to the documentation of this file.
00001 /*
00002  * Secret Labs' Regular Expression Engine
00003  *
00004  * regular expression matching engine
00005  *
00006  * partial history:
00007  * 1999-10-24 fl  created (based on existing template matcher code)
00008  * 2000-03-06 fl  first alpha, sort of
00009  * 2000-08-01 fl  fixes for 1.6b1
00010  * 2000-08-07 fl  use PyOS_CheckStack() if available
00011  * 2000-09-20 fl  added expand method
00012  * 2001-03-20 fl  lots of fixes for 2.1b2
00013  * 2001-04-15 fl  export copyright as Python attribute, not global
00014  * 2001-04-28 fl  added __copy__ methods (work in progress)
00015  * 2001-05-14 fl  fixes for 1.5.2 compatibility
00016  * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
00017  * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
00018  * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
00019  * 2001-10-21 fl  added sub/subn primitive
00020  * 2001-10-24 fl  added finditer primitive (for 2.2 only)
00021  * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
00022  * 2002-11-09 fl  fixed empty sub/subn return type
00023  * 2003-04-18 mvl fully support 4-byte codes
00024  * 2003-10-17 gn  implemented non recursive scheme
00025  *
00026  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
00027  *
00028  * This version of the SRE library can be redistributed under CNRI's
00029  * Python 1.6 license.  For any other use, please contact Secret Labs
00030  * AB (info@pythonware.com).
00031  *
00032  * Portions of this engine have been developed in cooperation with
00033  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
00034  * other compatibility work.
00035  */
00036 
00037 #ifndef SRE_RECURSIVE
00038 
00039 static char copyright[] =
00040     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
00041 
00042 #define PY_SSIZE_T_CLEAN
00043 
00044 #include "Python.h"
00045 #include "structmember.h" /* offsetof */
00046 
00047 #include "sre.h"
00048 
00049 #include <ctype.h>
00050 
00051 /* name of this module, minus the leading underscore */
00052 #if !defined(SRE_MODULE)
00053 #define SRE_MODULE "sre"
00054 #endif
00055 
00056 #define SRE_PY_MODULE "re"
00057 
00058 /* defining this one enables tracing */
00059 #undef VERBOSE
00060 
00061 /* defining this enables unicode support (default under 1.6a1 and later) */
00062 #define HAVE_UNICODE
00063 
00064 /* -------------------------------------------------------------------- */
00065 /* optional features */
00066 
00067 /* enables fast searching */
00068 #define USE_FAST_SEARCH
00069 
00070 /* enables copy/deepcopy handling (work in progress) */
00071 #undef USE_BUILTIN_COPY
00072 
00073 #if PY_VERSION_HEX < 0x01060000
00074 #define PyObject_DEL(op) PyMem_DEL((op))
00075 #endif
00076 
00077 /* -------------------------------------------------------------------- */
00078 
00079 #if defined(_MSC_VER)
00080 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
00081 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
00082 /* fastest possible local call under MSVC */
00083 #define LOCAL(type) static __inline type __fastcall
00084 #elif defined(USE_INLINE)
00085 #define LOCAL(type) static inline type
00086 #else
00087 #define LOCAL(type) static type
00088 #endif
00089 
00090 /* error codes */
00091 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
00092 #define SRE_ERROR_STATE -2 /* illegal state */
00093 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
00094 #define SRE_ERROR_MEMORY -9 /* out of memory */
00095 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
00096 
00097 #if defined(VERBOSE)
00098 #define TRACE(v) printf v
00099 #else
00100 #define TRACE(v)
00101 #endif
00102 
00103 /* -------------------------------------------------------------------- */
00104 /* search engine state */
00105 
00106 /* default character predicates (run sre_chars.py to regenerate tables) */
00107 
00108 #define SRE_DIGIT_MASK 1
00109 #define SRE_SPACE_MASK 2
00110 #define SRE_LINEBREAK_MASK 4
00111 #define SRE_ALNUM_MASK 8
00112 #define SRE_WORD_MASK 16
00113 
00114 /* FIXME: this assumes ASCII.  create tables in init_sre() instead */
00115 
00116 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
00117 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
00118 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
00119 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
00120 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
00121 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
00122 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
00123 
00124 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
00125 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
00126 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
00127 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
00128 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
00129 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
00130 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
00131 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
00132 120, 121, 122, 123, 124, 125, 126, 127 };
00133 
00134 #define SRE_IS_DIGIT(ch)\
00135     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
00136 #define SRE_IS_SPACE(ch)\
00137     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
00138 #define SRE_IS_LINEBREAK(ch)\
00139     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
00140 #define SRE_IS_ALNUM(ch)\
00141     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
00142 #define SRE_IS_WORD(ch)\
00143     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
00144 
00145 static unsigned int sre_lower(unsigned int ch)
00146 {
00147     return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
00148 }
00149 
00150 /* locale-specific character predicates */
00151 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
00152  * warnings when c's type supports only numbers < N+1 */
00153 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
00154 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
00155 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
00156 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
00157 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
00158 
00159 static unsigned int sre_lower_locale(unsigned int ch)
00160 {
00161     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
00162 }
00163 
00164 /* unicode-specific character predicates */
00165 
00166 #if defined(HAVE_UNICODE)
00167 
00168 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
00169 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
00170 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
00171 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
00172 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
00173 
00174 static unsigned int sre_lower_unicode(unsigned int ch)
00175 {
00176     return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
00177 }
00178 
00179 #endif
00180 
00181 LOCAL(int)
00182 sre_category(SRE_CODE category, unsigned int ch)
00183 {
00184     switch (category) {
00185 
00186     case SRE_CATEGORY_DIGIT:
00187         return SRE_IS_DIGIT(ch);
00188     case SRE_CATEGORY_NOT_DIGIT:
00189         return !SRE_IS_DIGIT(ch);
00190     case SRE_CATEGORY_SPACE:
00191         return SRE_IS_SPACE(ch);
00192     case SRE_CATEGORY_NOT_SPACE:
00193         return !SRE_IS_SPACE(ch);
00194     case SRE_CATEGORY_WORD:
00195         return SRE_IS_WORD(ch);
00196     case SRE_CATEGORY_NOT_WORD:
00197         return !SRE_IS_WORD(ch);
00198     case SRE_CATEGORY_LINEBREAK:
00199         return SRE_IS_LINEBREAK(ch);
00200     case SRE_CATEGORY_NOT_LINEBREAK:
00201         return !SRE_IS_LINEBREAK(ch);
00202 
00203     case SRE_CATEGORY_LOC_WORD:
00204         return SRE_LOC_IS_WORD(ch);
00205     case SRE_CATEGORY_LOC_NOT_WORD:
00206         return !SRE_LOC_IS_WORD(ch);
00207 
00208 #if defined(HAVE_UNICODE)
00209     case SRE_CATEGORY_UNI_DIGIT:
00210         return SRE_UNI_IS_DIGIT(ch);
00211     case SRE_CATEGORY_UNI_NOT_DIGIT:
00212         return !SRE_UNI_IS_DIGIT(ch);
00213     case SRE_CATEGORY_UNI_SPACE:
00214         return SRE_UNI_IS_SPACE(ch);
00215     case SRE_CATEGORY_UNI_NOT_SPACE:
00216         return !SRE_UNI_IS_SPACE(ch);
00217     case SRE_CATEGORY_UNI_WORD:
00218         return SRE_UNI_IS_WORD(ch);
00219     case SRE_CATEGORY_UNI_NOT_WORD:
00220         return !SRE_UNI_IS_WORD(ch);
00221     case SRE_CATEGORY_UNI_LINEBREAK:
00222         return SRE_UNI_IS_LINEBREAK(ch);
00223     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
00224         return !SRE_UNI_IS_LINEBREAK(ch);
00225 #else
00226     case SRE_CATEGORY_UNI_DIGIT:
00227         return SRE_IS_DIGIT(ch);
00228     case SRE_CATEGORY_UNI_NOT_DIGIT:
00229         return !SRE_IS_DIGIT(ch);
00230     case SRE_CATEGORY_UNI_SPACE:
00231         return SRE_IS_SPACE(ch);
00232     case SRE_CATEGORY_UNI_NOT_SPACE:
00233         return !SRE_IS_SPACE(ch);
00234     case SRE_CATEGORY_UNI_WORD:
00235         return SRE_LOC_IS_WORD(ch);
00236     case SRE_CATEGORY_UNI_NOT_WORD:
00237         return !SRE_LOC_IS_WORD(ch);
00238     case SRE_CATEGORY_UNI_LINEBREAK:
00239         return SRE_IS_LINEBREAK(ch);
00240     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
00241         return !SRE_IS_LINEBREAK(ch);
00242 #endif
00243     }
00244     return 0;
00245 }
00246 
00247 /* helpers */
00248 
00249 static void
00250 data_stack_dealloc(SRE_STATE* state)
00251 {
00252     if (state->data_stack) {
00253         PyMem_FREE(state->data_stack);
00254         state->data_stack = NULL;
00255     }
00256     state->data_stack_size = state->data_stack_base = 0;
00257 }
00258 
00259 static int
00260 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
00261 {
00262     Py_ssize_t minsize, cursize;
00263     minsize = state->data_stack_base+size;
00264     cursize = state->data_stack_size;
00265     if (cursize < minsize) {
00266         void* stack;
00267         cursize = minsize+minsize/4+1024;
00268         TRACE(("allocate/grow stack %d\n", cursize));
00269         stack = PyMem_REALLOC(state->data_stack, cursize);
00270         if (!stack) {
00271             data_stack_dealloc(state);
00272             return SRE_ERROR_MEMORY;
00273         }
00274         state->data_stack = (char *)stack;
00275         state->data_stack_size = cursize;
00276     }
00277     return 0;
00278 }
00279 
00280 /* generate 8-bit version */
00281 
00282 #define SRE_CHAR unsigned char
00283 #define SRE_AT sre_at
00284 #define SRE_COUNT sre_count
00285 #define SRE_CHARSET sre_charset
00286 #define SRE_INFO sre_info
00287 #define SRE_MATCH sre_match
00288 #define SRE_MATCH_CONTEXT sre_match_context
00289 #define SRE_SEARCH sre_search
00290 #define SRE_LITERAL_TEMPLATE sre_literal_template
00291 
00292 #if defined(HAVE_UNICODE)
00293 
00294 #define SRE_RECURSIVE
00295 #include "_sre.c"
00296 #undef SRE_RECURSIVE
00297 
00298 #undef SRE_LITERAL_TEMPLATE
00299 #undef SRE_SEARCH
00300 #undef SRE_MATCH
00301 #undef SRE_MATCH_CONTEXT
00302 #undef SRE_INFO
00303 #undef SRE_CHARSET
00304 #undef SRE_COUNT
00305 #undef SRE_AT
00306 #undef SRE_CHAR
00307 
00308 /* generate 16-bit unicode version */
00309 
00310 #define SRE_CHAR Py_UNICODE
00311 #define SRE_AT sre_uat
00312 #define SRE_COUNT sre_ucount
00313 #define SRE_CHARSET sre_ucharset
00314 #define SRE_INFO sre_uinfo
00315 #define SRE_MATCH sre_umatch
00316 #define SRE_MATCH_CONTEXT sre_umatch_context
00317 #define SRE_SEARCH sre_usearch
00318 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
00319 #endif
00320 
00321 #endif /* SRE_RECURSIVE */
00322 
00323 /* -------------------------------------------------------------------- */
00324 /* String matching engine */
00325 
00326 /* the following section is compiled twice, with different character
00327    settings */
00328 
00329 LOCAL(int)
00330 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
00331 {
00332     /* check if pointer is at given position */
00333 
00334     Py_ssize_t thisp, thatp;
00335 
00336     switch (at) {
00337 
00338     case SRE_AT_BEGINNING:
00339     case SRE_AT_BEGINNING_STRING:
00340         return ((void*) ptr == state->beginning);
00341 
00342     case SRE_AT_BEGINNING_LINE:
00343         return ((void*) ptr == state->beginning ||
00344                 SRE_IS_LINEBREAK((int) ptr[-1]));
00345 
00346     case SRE_AT_END:
00347         return (((void*) (ptr+1) == state->end &&
00348                  SRE_IS_LINEBREAK((int) ptr[0])) ||
00349                 ((void*) ptr == state->end));
00350 
00351     case SRE_AT_END_LINE:
00352         return ((void*) ptr == state->end ||
00353                 SRE_IS_LINEBREAK((int) ptr[0]));
00354 
00355     case SRE_AT_END_STRING:
00356         return ((void*) ptr == state->end);
00357 
00358     case SRE_AT_BOUNDARY:
00359         if (state->beginning == state->end)
00360             return 0;
00361         thatp = ((void*) ptr > state->beginning) ?
00362             SRE_IS_WORD((int) ptr[-1]) : 0;
00363         thisp = ((void*) ptr < state->end) ?
00364             SRE_IS_WORD((int) ptr[0]) : 0;
00365         return thisp != thatp;
00366 
00367     case SRE_AT_NON_BOUNDARY:
00368         if (state->beginning == state->end)
00369             return 0;
00370         thatp = ((void*) ptr > state->beginning) ?
00371             SRE_IS_WORD((int) ptr[-1]) : 0;
00372         thisp = ((void*) ptr < state->end) ?
00373             SRE_IS_WORD((int) ptr[0]) : 0;
00374         return thisp == thatp;
00375 
00376     case SRE_AT_LOC_BOUNDARY:
00377         if (state->beginning == state->end)
00378             return 0;
00379         thatp = ((void*) ptr > state->beginning) ?
00380             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
00381         thisp = ((void*) ptr < state->end) ?
00382             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
00383         return thisp != thatp;
00384 
00385     case SRE_AT_LOC_NON_BOUNDARY:
00386         if (state->beginning == state->end)
00387             return 0;
00388         thatp = ((void*) ptr > state->beginning) ?
00389             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
00390         thisp = ((void*) ptr < state->end) ?
00391             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
00392         return thisp == thatp;
00393 
00394 #if defined(HAVE_UNICODE)
00395     case SRE_AT_UNI_BOUNDARY:
00396         if (state->beginning == state->end)
00397             return 0;
00398         thatp = ((void*) ptr > state->beginning) ?
00399             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
00400         thisp = ((void*) ptr < state->end) ?
00401             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
00402         return thisp != thatp;
00403 
00404     case SRE_AT_UNI_NON_BOUNDARY:
00405         if (state->beginning == state->end)
00406             return 0;
00407         thatp = ((void*) ptr > state->beginning) ?
00408             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
00409         thisp = ((void*) ptr < state->end) ?
00410             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
00411         return thisp == thatp;
00412 #endif
00413 
00414     }
00415 
00416     return 0;
00417 }
00418 
00419 LOCAL(int)
00420 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
00421 {
00422     /* check if character is a member of the given set */
00423 
00424     int ok = 1;
00425 
00426     for (;;) {
00427         switch (*set++) {
00428 
00429         case SRE_OP_FAILURE:
00430             return !ok;
00431 
00432         case SRE_OP_LITERAL:
00433             /* <LITERAL> <code> */
00434             if (ch == set[0])
00435                 return ok;
00436             set++;
00437             break;
00438 
00439         case SRE_OP_CATEGORY:
00440             /* <CATEGORY> <code> */
00441             if (sre_category(set[0], (int) ch))
00442                 return ok;
00443             set += 1;
00444             break;
00445 
00446         case SRE_OP_CHARSET:
00447             if (sizeof(SRE_CODE) == 2) {
00448                 /* <CHARSET> <bitmap> (16 bits per code word) */
00449                 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
00450                     return ok;
00451                 set += 16;
00452             }
00453             else {
00454                 /* <CHARSET> <bitmap> (32 bits per code word) */
00455                 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
00456                     return ok;
00457                 set += 8;
00458             }
00459             break;
00460 
00461         case SRE_OP_RANGE:
00462             /* <RANGE> <lower> <upper> */
00463             if (set[0] <= ch && ch <= set[1])
00464                 return ok;
00465             set += 2;
00466             break;
00467 
00468         case SRE_OP_NEGATE:
00469             ok = !ok;
00470             break;
00471 
00472         case SRE_OP_BIGCHARSET:
00473             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
00474         {
00475             Py_ssize_t count, block;
00476             count = *(set++);
00477 
00478             if (sizeof(SRE_CODE) == 2) {
00479                 block = ((unsigned char*)set)[ch >> 8];
00480                 set += 128;
00481                 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
00482                     return ok;
00483                 set += count*16;
00484             }
00485             else {
00486                 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
00487                  * warnings when c's type supports only numbers < N+1 */
00488                 if (!(ch & ~65535))
00489                     block = ((unsigned char*)set)[ch >> 8];
00490                 else
00491                     block = -1;
00492                 set += 64;
00493                 if (block >=0 &&
00494                     (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
00495                     return ok;
00496                 set += count*8;
00497             }
00498             break;
00499         }
00500 
00501         default:
00502             /* internal error -- there's not much we can do about it
00503                here, so let's just pretend it didn't match... */
00504             return 0;
00505         }
00506     }
00507 }
00508 
00509 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
00510 
00511 LOCAL(Py_ssize_t)
00512 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
00513 {
00514     SRE_CODE chr;
00515     SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
00516     SRE_CHAR* end = (SRE_CHAR *)state->end;
00517     Py_ssize_t i;
00518 
00519     /* adjust end */
00520     if (maxcount < end - ptr && maxcount != 65535)
00521         end = ptr + maxcount;
00522 
00523     switch (pattern[0]) {
00524 
00525     case SRE_OP_IN:
00526         /* repeated set */
00527         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
00528         while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
00529             ptr++;
00530         break;
00531 
00532     case SRE_OP_ANY:
00533         /* repeated dot wildcard. */
00534         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
00535         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
00536             ptr++;
00537         break;
00538 
00539     case SRE_OP_ANY_ALL:
00540         /* repeated dot wildcard.  skip to the end of the target
00541            string, and backtrack from there */
00542         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
00543         ptr = end;
00544         break;
00545 
00546     case SRE_OP_LITERAL:
00547         /* repeated literal */
00548         chr = pattern[1];
00549         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
00550         while (ptr < end && (SRE_CODE) *ptr == chr)
00551             ptr++;
00552         break;
00553 
00554     case SRE_OP_LITERAL_IGNORE:
00555         /* repeated literal */
00556         chr = pattern[1];
00557         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
00558         while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
00559             ptr++;
00560         break;
00561 
00562     case SRE_OP_NOT_LITERAL:
00563         /* repeated non-literal */
00564         chr = pattern[1];
00565         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
00566         while (ptr < end && (SRE_CODE) *ptr != chr)
00567             ptr++;
00568         break;
00569 
00570     case SRE_OP_NOT_LITERAL_IGNORE:
00571         /* repeated non-literal */
00572         chr = pattern[1];
00573         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
00574         while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
00575             ptr++;
00576         break;
00577 
00578     default:
00579         /* repeated single character pattern */
00580         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
00581         while ((SRE_CHAR*) state->ptr < end) {
00582             i = SRE_MATCH(state, pattern);
00583             if (i < 0)
00584                 return i;
00585             if (!i)
00586                 break;
00587         }
00588         TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
00589                (SRE_CHAR*) state->ptr - ptr));
00590         return (SRE_CHAR*) state->ptr - ptr;
00591     }
00592 
00593     TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
00594     return ptr - (SRE_CHAR*) state->ptr;
00595 }
00596 
00597 #if 0 /* not used in this release */
00598 LOCAL(int)
00599 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
00600 {
00601     /* check if an SRE_OP_INFO block matches at the current position.
00602        returns the number of SRE_CODE objects to skip if successful, 0
00603        if no match */
00604 
00605     SRE_CHAR* end = state->end;
00606     SRE_CHAR* ptr = state->ptr;
00607     Py_ssize_t i;
00608 
00609     /* check minimal length */
00610     if (pattern[3] && (end - ptr) < pattern[3])
00611         return 0;
00612 
00613     /* check known prefix */
00614     if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
00615         /* <length> <skip> <prefix data> <overlap data> */
00616         for (i = 0; i < pattern[5]; i++)
00617             if ((SRE_CODE) ptr[i] != pattern[7 + i])
00618                 return 0;
00619         return pattern[0] + 2 * pattern[6];
00620     }
00621     return pattern[0];
00622 }
00623 #endif
00624 
00625 /* The macros below should be used to protect recursive SRE_MATCH()
00626  * calls that *failed* and do *not* return immediately (IOW, those
00627  * that will backtrack). Explaining:
00628  *
00629  * - Recursive SRE_MATCH() returned true: that's usually a success
00630  *   (besides atypical cases like ASSERT_NOT), therefore there's no
00631  *   reason to restore lastmark;
00632  *
00633  * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
00634  *   is returning to the caller: If the current SRE_MATCH() is the
00635  *   top function of the recursion, returning false will be a matching
00636  *   failure, and it doesn't matter where lastmark is pointing to.
00637  *   If it's *not* the top function, it will be a recursive SRE_MATCH()
00638  *   failure by itself, and the calling SRE_MATCH() will have to deal
00639  *   with the failure by the same rules explained here (it will restore
00640  *   lastmark by itself if necessary);
00641  *
00642  * - Recursive SRE_MATCH() returned false, and will continue the
00643  *   outside 'for' loop: must be protected when breaking, since the next
00644  *   OP could potentially depend on lastmark;
00645  *
00646  * - Recursive SRE_MATCH() returned false, and will be called again
00647  *   inside a local for/while loop: must be protected between each
00648  *   loop iteration, since the recursive SRE_MATCH() could do anything,
00649  *   and could potentially depend on lastmark.
00650  *
00651  * For more information, check the discussion at SF patch #712900.
00652  */
00653 #define LASTMARK_SAVE()     \
00654     do { \
00655         ctx->lastmark = state->lastmark; \
00656         ctx->lastindex = state->lastindex; \
00657     } while (0)
00658 #define LASTMARK_RESTORE()  \
00659     do { \
00660         state->lastmark = ctx->lastmark; \
00661         state->lastindex = ctx->lastindex; \
00662     } while (0)
00663 
00664 #define RETURN_ERROR(i) do { return i; } while(0)
00665 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
00666 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
00667 
00668 #define RETURN_ON_ERROR(i) \
00669     do { if (i < 0) RETURN_ERROR(i); } while (0)
00670 #define RETURN_ON_SUCCESS(i) \
00671     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
00672 #define RETURN_ON_FAILURE(i) \
00673     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
00674 
00675 #define SFY(x) #x
00676 
00677 #define DATA_STACK_ALLOC(state, type, ptr) \
00678 do { \
00679     alloc_pos = state->data_stack_base; \
00680     TRACE(("allocating %s in %d (%d)\n", \
00681            SFY(type), alloc_pos, sizeof(type))); \
00682     if (state->data_stack_size < alloc_pos+sizeof(type)) { \
00683         int j = data_stack_grow(state, sizeof(type)); \
00684         if (j < 0) return j; \
00685         if (ctx_pos != -1) \
00686             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
00687     } \
00688     ptr = (type*)(state->data_stack+alloc_pos); \
00689     state->data_stack_base += sizeof(type); \
00690 } while (0)
00691 
00692 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
00693 do { \
00694     TRACE(("looking up %s at %d\n", SFY(type), pos)); \
00695     ptr = (type*)(state->data_stack+pos); \
00696 } while (0)
00697 
00698 #define DATA_STACK_PUSH(state, data, size) \
00699 do { \
00700     TRACE(("copy data in %p to %d (%d)\n", \
00701            data, state->data_stack_base, size)); \
00702     if (state->data_stack_size < state->data_stack_base+size) { \
00703         int j = data_stack_grow(state, size); \
00704         if (j < 0) return j; \
00705         if (ctx_pos != -1) \
00706             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
00707     } \
00708     memcpy(state->data_stack+state->data_stack_base, data, size); \
00709     state->data_stack_base += size; \
00710 } while (0)
00711 
00712 #define DATA_STACK_POP(state, data, size, discard) \
00713 do { \
00714     TRACE(("copy data to %p from %d (%d)\n", \
00715            data, state->data_stack_base-size, size)); \
00716     memcpy(data, state->data_stack+state->data_stack_base-size, size); \
00717     if (discard) \
00718         state->data_stack_base -= size; \
00719 } while (0)
00720 
00721 #define DATA_STACK_POP_DISCARD(state, size) \
00722 do { \
00723     TRACE(("discard data from %d (%d)\n", \
00724            state->data_stack_base-size, size)); \
00725     state->data_stack_base -= size; \
00726 } while(0)
00727 
00728 #define DATA_PUSH(x) \
00729     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
00730 #define DATA_POP(x) \
00731     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
00732 #define DATA_POP_DISCARD(x) \
00733     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
00734 #define DATA_ALLOC(t,p) \
00735     DATA_STACK_ALLOC(state, t, p)
00736 #define DATA_LOOKUP_AT(t,p,pos) \
00737     DATA_STACK_LOOKUP_AT(state,t,p,pos)
00738 
00739 #define MARK_PUSH(lastmark) \
00740     do if (lastmark > 0) { \
00741         i = lastmark; /* ctx->lastmark may change if reallocated */ \
00742         DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
00743     } while (0)
00744 #define MARK_POP(lastmark) \
00745     do if (lastmark > 0) { \
00746         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
00747     } while (0)
00748 #define MARK_POP_KEEP(lastmark) \
00749     do if (lastmark > 0) { \
00750         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
00751     } while (0)
00752 #define MARK_POP_DISCARD(lastmark) \
00753     do if (lastmark > 0) { \
00754         DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
00755     } while (0)
00756 
00757 #define JUMP_NONE            0
00758 #define JUMP_MAX_UNTIL_1     1
00759 #define JUMP_MAX_UNTIL_2     2
00760 #define JUMP_MAX_UNTIL_3     3
00761 #define JUMP_MIN_UNTIL_1     4
00762 #define JUMP_MIN_UNTIL_2     5
00763 #define JUMP_MIN_UNTIL_3     6
00764 #define JUMP_REPEAT          7
00765 #define JUMP_REPEAT_ONE_1    8
00766 #define JUMP_REPEAT_ONE_2    9
00767 #define JUMP_MIN_REPEAT_ONE  10
00768 #define JUMP_BRANCH          11
00769 #define JUMP_ASSERT          12
00770 #define JUMP_ASSERT_NOT      13
00771 
00772 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
00773     DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
00774     nextctx->last_ctx_pos = ctx_pos; \
00775     nextctx->jump = jumpvalue; \
00776     nextctx->pattern = nextpattern; \
00777     ctx_pos = alloc_pos; \
00778     ctx = nextctx; \
00779     goto entrance; \
00780     jumplabel: \
00781     while (0) /* gcc doesn't like labels at end of scopes */ \
00782 
00783 typedef struct {
00784     Py_ssize_t last_ctx_pos;
00785     Py_ssize_t jump;
00786     SRE_CHAR* ptr;
00787     SRE_CODE* pattern;
00788     Py_ssize_t count;
00789     Py_ssize_t lastmark;
00790     Py_ssize_t lastindex;
00791     union {
00792         SRE_CODE chr;
00793         SRE_REPEAT* rep;
00794     } u;
00795 } SRE_MATCH_CONTEXT;
00796 
00797 /* check if string matches the given pattern.  returns <0 for
00798    error, 0 for failure, and 1 for success */
00799 LOCAL(Py_ssize_t)
00800 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
00801 {
00802     SRE_CHAR* end = (SRE_CHAR *)state->end;
00803     Py_ssize_t alloc_pos, ctx_pos = -1;
00804     Py_ssize_t i, ret = 0;
00805     Py_ssize_t jump;
00806     unsigned int sigcount=0;
00807 
00808     SRE_MATCH_CONTEXT* ctx;
00809     SRE_MATCH_CONTEXT* nextctx;
00810 
00811     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
00812 
00813     DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
00814     ctx->last_ctx_pos = -1;
00815     ctx->jump = JUMP_NONE;
00816     ctx->pattern = pattern;
00817     ctx_pos = alloc_pos;
00818 
00819 entrance:
00820 
00821     ctx->ptr = (SRE_CHAR *)state->ptr;
00822 
00823     if (ctx->pattern[0] == SRE_OP_INFO) {
00824         /* optimization info block */
00825         /* <INFO> <1=skip> <2=flags> <3=min> ... */
00826         if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
00827             TRACE(("reject (got %d chars, need %d)\n",
00828                    (end - ctx->ptr), ctx->pattern[3]));
00829             RETURN_FAILURE;
00830         }
00831         ctx->pattern += ctx->pattern[1] + 1;
00832     }
00833 
00834     for (;;) {
00835         ++sigcount;
00836         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
00837             RETURN_ERROR(SRE_ERROR_INTERRUPTED);
00838 
00839         switch (*ctx->pattern++) {
00840 
00841         case SRE_OP_MARK:
00842             /* set mark */
00843             /* <MARK> <gid> */
00844             TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
00845                    ctx->ptr, ctx->pattern[0]));
00846             i = ctx->pattern[0];
00847             if (i & 1)
00848                 state->lastindex = i/2 + 1;
00849             if (i > state->lastmark) {
00850                 /* state->lastmark is the highest valid index in the
00851                    state->mark array.  If it is increased by more than 1,
00852                    the intervening marks must be set to NULL to signal
00853                    that these marks have not been encountered. */
00854                 Py_ssize_t j = state->lastmark + 1;
00855                 while (j < i)
00856                     state->mark[j++] = NULL;
00857                 state->lastmark = i;
00858             }
00859             state->mark[i] = ctx->ptr;
00860             ctx->pattern++;
00861             break;
00862 
00863         case SRE_OP_LITERAL:
00864             /* match literal string */
00865             /* <LITERAL> <code> */
00866             TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
00867                    ctx->ptr, *ctx->pattern));
00868             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
00869                 RETURN_FAILURE;
00870             ctx->pattern++;
00871             ctx->ptr++;
00872             break;
00873 
00874         case SRE_OP_NOT_LITERAL:
00875             /* match anything that is not literal character */
00876             /* <NOT_LITERAL> <code> */
00877             TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
00878                    ctx->ptr, *ctx->pattern));
00879             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
00880                 RETURN_FAILURE;
00881             ctx->pattern++;
00882             ctx->ptr++;
00883             break;
00884 
00885         case SRE_OP_SUCCESS:
00886             /* end of pattern */
00887             TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
00888             state->ptr = ctx->ptr;
00889             RETURN_SUCCESS;
00890 
00891         case SRE_OP_AT:
00892             /* match at given position */
00893             /* <AT> <code> */
00894             TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
00895             if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
00896                 RETURN_FAILURE;
00897             ctx->pattern++;
00898             break;
00899 
00900         case SRE_OP_CATEGORY:
00901             /* match at given category */
00902             /* <CATEGORY> <code> */
00903             TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
00904                    ctx->ptr, *ctx->pattern));
00905             if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
00906                 RETURN_FAILURE;
00907             ctx->pattern++;
00908             ctx->ptr++;
00909             break;
00910 
00911         case SRE_OP_ANY:
00912             /* match anything (except a newline) */
00913             /* <ANY> */
00914             TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
00915             if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
00916                 RETURN_FAILURE;
00917             ctx->ptr++;
00918             break;
00919 
00920         case SRE_OP_ANY_ALL:
00921             /* match anything */
00922             /* <ANY_ALL> */
00923             TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
00924             if (ctx->ptr >= end)
00925                 RETURN_FAILURE;
00926             ctx->ptr++;
00927             break;
00928 
00929         case SRE_OP_IN:
00930             /* match set member (or non_member) */
00931             /* <IN> <skip> <set> */
00932             TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
00933             if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
00934                 RETURN_FAILURE;
00935             ctx->pattern += ctx->pattern[0];
00936             ctx->ptr++;
00937             break;
00938 
00939         case SRE_OP_LITERAL_IGNORE:
00940             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
00941                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
00942             if (ctx->ptr >= end ||
00943                 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
00944                 RETURN_FAILURE;
00945             ctx->pattern++;
00946             ctx->ptr++;
00947             break;
00948 
00949         case SRE_OP_NOT_LITERAL_IGNORE:
00950             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
00951                    ctx->pattern, ctx->ptr, *ctx->pattern));
00952             if (ctx->ptr >= end ||
00953                 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
00954                 RETURN_FAILURE;
00955             ctx->pattern++;
00956             ctx->ptr++;
00957             break;
00958 
00959         case SRE_OP_IN_IGNORE:
00960             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
00961             if (ctx->ptr >= end
00962                 || !SRE_CHARSET(ctx->pattern+1,
00963                                 (SRE_CODE)state->lower(*ctx->ptr)))
00964                 RETURN_FAILURE;
00965             ctx->pattern += ctx->pattern[0];
00966             ctx->ptr++;
00967             break;
00968 
00969         case SRE_OP_JUMP:
00970         case SRE_OP_INFO:
00971             /* jump forward */
00972             /* <JUMP> <offset> */
00973             TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
00974                    ctx->ptr, ctx->pattern[0]));
00975             ctx->pattern += ctx->pattern[0];
00976             break;
00977 
00978         case SRE_OP_BRANCH:
00979             /* alternation */
00980             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
00981             TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
00982             LASTMARK_SAVE();
00983             ctx->u.rep = state->repeat;
00984             if (ctx->u.rep)
00985                 MARK_PUSH(ctx->lastmark);
00986             for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
00987                 if (ctx->pattern[1] == SRE_OP_LITERAL &&
00988                     (ctx->ptr >= end ||
00989                      (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
00990                     continue;
00991                 if (ctx->pattern[1] == SRE_OP_IN &&
00992                     (ctx->ptr >= end ||
00993                      !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
00994                     continue;
00995                 state->ptr = ctx->ptr;
00996                 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
00997                 if (ret) {
00998                     if (ctx->u.rep)
00999                         MARK_POP_DISCARD(ctx->lastmark);
01000                     RETURN_ON_ERROR(ret);
01001                     RETURN_SUCCESS;
01002                 }
01003                 if (ctx->u.rep)
01004                     MARK_POP_KEEP(ctx->lastmark);
01005                 LASTMARK_RESTORE();
01006             }
01007             if (ctx->u.rep)
01008                 MARK_POP_DISCARD(ctx->lastmark);
01009             RETURN_FAILURE;
01010 
01011         case SRE_OP_REPEAT_ONE:
01012             /* match repeated sequence (maximizing regexp) */
01013 
01014             /* this operator only works if the repeated item is
01015                exactly one character wide, and we're not already
01016                collecting backtracking points.  for other cases,
01017                use the MAX_REPEAT operator */
01018 
01019             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
01020 
01021             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
01022                    ctx->pattern[1], ctx->pattern[2]));
01023 
01024             if (ctx->ptr + ctx->pattern[1] > end)
01025                 RETURN_FAILURE; /* cannot match */
01026 
01027             state->ptr = ctx->ptr;
01028 
01029             ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
01030             RETURN_ON_ERROR(ret);
01031             DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
01032             ctx->count = ret;
01033             ctx->ptr += ctx->count;
01034 
01035             /* when we arrive here, count contains the number of
01036                matches, and ctx->ptr points to the tail of the target
01037                string.  check if the rest of the pattern matches,
01038                and backtrack if not. */
01039 
01040             if (ctx->count < (Py_ssize_t) ctx->pattern[1])
01041                 RETURN_FAILURE;
01042 
01043             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
01044                 /* tail is empty.  we're finished */
01045                 state->ptr = ctx->ptr;
01046                 RETURN_SUCCESS;
01047             }
01048 
01049             LASTMARK_SAVE();
01050 
01051             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
01052                 /* tail starts with a literal. skip positions where
01053                    the rest of the pattern cannot possibly match */
01054                 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
01055                 for (;;) {
01056                     while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
01057                            (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
01058                         ctx->ptr--;
01059                         ctx->count--;
01060                     }
01061                     if (ctx->count < (Py_ssize_t) ctx->pattern[1])
01062                         break;
01063                     state->ptr = ctx->ptr;
01064                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
01065                             ctx->pattern+ctx->pattern[0]);
01066                     if (ret) {
01067                         RETURN_ON_ERROR(ret);
01068                         RETURN_SUCCESS;
01069                     }
01070 
01071                     LASTMARK_RESTORE();
01072 
01073                     ctx->ptr--;
01074                     ctx->count--;
01075                 }
01076 
01077             } else {
01078                 /* general case */
01079                 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
01080                     state->ptr = ctx->ptr;
01081                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
01082                             ctx->pattern+ctx->pattern[0]);
01083                     if (ret) {
01084                         RETURN_ON_ERROR(ret);
01085                         RETURN_SUCCESS;
01086                     }
01087                     ctx->ptr--;
01088                     ctx->count--;
01089                     LASTMARK_RESTORE();
01090                 }
01091             }
01092             RETURN_FAILURE;
01093 
01094         case SRE_OP_MIN_REPEAT_ONE:
01095             /* match repeated sequence (minimizing regexp) */
01096 
01097             /* this operator only works if the repeated item is
01098                exactly one character wide, and we're not already
01099                collecting backtracking points.  for other cases,
01100                use the MIN_REPEAT operator */
01101 
01102             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
01103 
01104             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
01105                    ctx->pattern[1], ctx->pattern[2]));
01106 
01107             if (ctx->ptr + ctx->pattern[1] > end)
01108                 RETURN_FAILURE; /* cannot match */
01109 
01110             state->ptr = ctx->ptr;
01111 
01112             if (ctx->pattern[1] == 0)
01113                 ctx->count = 0;
01114             else {
01115                 /* count using pattern min as the maximum */
01116                 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
01117                 RETURN_ON_ERROR(ret);
01118                 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
01119                 if (ret < (Py_ssize_t) ctx->pattern[1])
01120                     /* didn't match minimum number of times */
01121                     RETURN_FAILURE;
01122                 /* advance past minimum matches of repeat */
01123                 ctx->count = ret;
01124                 ctx->ptr += ctx->count;
01125             }
01126 
01127             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
01128                 /* tail is empty.  we're finished */
01129                 state->ptr = ctx->ptr;
01130                 RETURN_SUCCESS;
01131 
01132             } else {
01133                 /* general case */
01134                 LASTMARK_SAVE();
01135                 while ((Py_ssize_t)ctx->pattern[2] == 65535
01136                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
01137                     state->ptr = ctx->ptr;
01138                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
01139                             ctx->pattern+ctx->pattern[0]);
01140                     if (ret) {
01141                         RETURN_ON_ERROR(ret);
01142                         RETURN_SUCCESS;
01143                     }
01144                     state->ptr = ctx->ptr;
01145                     ret = SRE_COUNT(state, ctx->pattern+3, 1);
01146                     RETURN_ON_ERROR(ret);
01147                     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
01148                     if (ret == 0)
01149                         break;
01150                     assert(ret == 1);
01151                     ctx->ptr++;
01152                     ctx->count++;
01153                     LASTMARK_RESTORE();
01154                 }
01155             }
01156             RETURN_FAILURE;
01157 
01158         case SRE_OP_REPEAT:
01159             /* create repeat context.  all the hard work is done
01160                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
01161             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
01162             TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
01163                    ctx->pattern[1], ctx->pattern[2]));
01164 
01165             /* install new repeat context */
01166             ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
01167             if (!ctx->u.rep) {
01168                 PyErr_NoMemory();
01169                 RETURN_FAILURE;
01170             }
01171             ctx->u.rep->count = -1;
01172             ctx->u.rep->pattern = ctx->pattern;
01173             ctx->u.rep->prev = state->repeat;
01174             ctx->u.rep->last_ptr = NULL;
01175             state->repeat = ctx->u.rep;
01176 
01177             state->ptr = ctx->ptr;
01178             DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
01179             state->repeat = ctx->u.rep->prev;
01180             PyObject_FREE(ctx->u.rep);
01181 
01182             if (ret) {
01183                 RETURN_ON_ERROR(ret);
01184                 RETURN_SUCCESS;
01185             }
01186             RETURN_FAILURE;
01187 
01188         case SRE_OP_MAX_UNTIL:
01189             /* maximizing repeat */
01190             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
01191 
01192             /* FIXME: we probably need to deal with zero-width
01193                matches in here... */
01194 
01195             ctx->u.rep = state->repeat;
01196             if (!ctx->u.rep)
01197                 RETURN_ERROR(SRE_ERROR_STATE);
01198 
01199             state->ptr = ctx->ptr;
01200 
01201             ctx->count = ctx->u.rep->count+1;
01202 
01203             TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
01204                    ctx->ptr, ctx->count));
01205 
01206             if (ctx->count < ctx->u.rep->pattern[1]) {
01207                 /* not enough matches */
01208                 ctx->u.rep->count = ctx->count;
01209                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
01210                         ctx->u.rep->pattern+3);
01211                 if (ret) {
01212                     RETURN_ON_ERROR(ret);
01213                     RETURN_SUCCESS;
01214                 }
01215                 ctx->u.rep->count = ctx->count-1;
01216                 state->ptr = ctx->ptr;
01217                 RETURN_FAILURE;
01218             }
01219 
01220             if ((ctx->count < ctx->u.rep->pattern[2] ||
01221                 ctx->u.rep->pattern[2] == 65535) &&
01222                 state->ptr != ctx->u.rep->last_ptr) {
01223                 /* we may have enough matches, but if we can
01224                    match another item, do so */
01225                 ctx->u.rep->count = ctx->count;
01226                 LASTMARK_SAVE();
01227                 MARK_PUSH(ctx->lastmark);
01228                 /* zero-width match protection */
01229                 DATA_PUSH(&ctx->u.rep->last_ptr);
01230                 ctx->u.rep->last_ptr = state->ptr;
01231                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
01232                         ctx->u.rep->pattern+3);
01233                 DATA_POP(&ctx->u.rep->last_ptr);
01234                 if (ret) {
01235                     MARK_POP_DISCARD(ctx->lastmark);
01236                     RETURN_ON_ERROR(ret);
01237                     RETURN_SUCCESS;
01238                 }
01239                 MARK_POP(ctx->lastmark);
01240                 LASTMARK_RESTORE();
01241                 ctx->u.rep->count = ctx->count-1;
01242                 state->ptr = ctx->ptr;
01243             }
01244 
01245             /* cannot match more repeated items here.  make sure the
01246                tail matches */
01247             state->repeat = ctx->u.rep->prev;
01248             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
01249             RETURN_ON_SUCCESS(ret);
01250             state->repeat = ctx->u.rep;
01251             state->ptr = ctx->ptr;
01252             RETURN_FAILURE;
01253 
01254         case SRE_OP_MIN_UNTIL:
01255             /* minimizing repeat */
01256             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
01257 
01258             ctx->u.rep = state->repeat;
01259             if (!ctx->u.rep)
01260                 RETURN_ERROR(SRE_ERROR_STATE);
01261 
01262             state->ptr = ctx->ptr;
01263 
01264             ctx->count = ctx->u.rep->count+1;
01265 
01266             TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
01267                    ctx->ptr, ctx->count, ctx->u.rep->pattern));
01268 
01269             if (ctx->count < ctx->u.rep->pattern[1]) {
01270                 /* not enough matches */
01271                 ctx->u.rep->count = ctx->count;
01272                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
01273                         ctx->u.rep->pattern+3);
01274                 if (ret) {
01275                     RETURN_ON_ERROR(ret);
01276                     RETURN_SUCCESS;
01277                 }
01278                 ctx->u.rep->count = ctx->count-1;
01279                 state->ptr = ctx->ptr;
01280                 RETURN_FAILURE;
01281             }
01282 
01283             LASTMARK_SAVE();
01284 
01285             /* see if the tail matches */
01286             state->repeat = ctx->u.rep->prev;
01287             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
01288             if (ret) {
01289                 RETURN_ON_ERROR(ret);
01290                 RETURN_SUCCESS;
01291             }
01292 
01293             state->repeat = ctx->u.rep;
01294             state->ptr = ctx->ptr;
01295 
01296             LASTMARK_RESTORE();
01297 
01298             if (ctx->count >= ctx->u.rep->pattern[2]
01299                 && ctx->u.rep->pattern[2] != 65535)
01300                 RETURN_FAILURE;
01301 
01302             ctx->u.rep->count = ctx->count;
01303             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
01304                     ctx->u.rep->pattern+3);
01305             if (ret) {
01306                 RETURN_ON_ERROR(ret);
01307                 RETURN_SUCCESS;
01308             }
01309             ctx->u.rep->count = ctx->count-1;
01310             state->ptr = ctx->ptr;
01311             RETURN_FAILURE;
01312 
01313         case SRE_OP_GROUPREF:
01314             /* match backreference */
01315             TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
01316                    ctx->ptr, ctx->pattern[0]));
01317             i = ctx->pattern[0];
01318             {
01319                 Py_ssize_t groupref = i+i;
01320                 if (groupref >= state->lastmark) {
01321                     RETURN_FAILURE;
01322                 } else {
01323                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
01324                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
01325                     if (!p || !e || e < p)
01326                         RETURN_FAILURE;
01327                     while (p < e) {
01328                         if (ctx->ptr >= end || *ctx->ptr != *p)
01329                             RETURN_FAILURE;
01330                         p++; ctx->ptr++;
01331                     }
01332                 }
01333             }
01334             ctx->pattern++;
01335             break;
01336 
01337         case SRE_OP_GROUPREF_IGNORE:
01338             /* match backreference */
01339             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
01340                    ctx->ptr, ctx->pattern[0]));
01341             i = ctx->pattern[0];
01342             {
01343                 Py_ssize_t groupref = i+i;
01344                 if (groupref >= state->lastmark) {
01345                     RETURN_FAILURE;
01346                 } else {
01347                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
01348                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
01349                     if (!p || !e || e < p)
01350                         RETURN_FAILURE;
01351                     while (p < e) {
01352                         if (ctx->ptr >= end ||
01353                             state->lower(*ctx->ptr) != state->lower(*p))
01354                             RETURN_FAILURE;
01355                         p++; ctx->ptr++;
01356                     }
01357                 }
01358             }
01359             ctx->pattern++;
01360             break;
01361 
01362         case SRE_OP_GROUPREF_EXISTS:
01363             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
01364                    ctx->ptr, ctx->pattern[0]));
01365             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
01366             i = ctx->pattern[0];
01367             {
01368                 Py_ssize_t groupref = i+i;
01369                 if (groupref >= state->lastmark) {
01370                     ctx->pattern += ctx->pattern[1];
01371                     break;
01372                 } else {
01373                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
01374                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
01375                     if (!p || !e || e < p) {
01376                         ctx->pattern += ctx->pattern[1];
01377                         break;
01378                     }
01379                 }
01380             }
01381             ctx->pattern += 2;
01382             break;
01383 
01384         case SRE_OP_ASSERT:
01385             /* assert subpattern */
01386             /* <ASSERT> <skip> <back> <pattern> */
01387             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
01388                    ctx->ptr, ctx->pattern[1]));
01389             state->ptr = ctx->ptr - ctx->pattern[1];
01390             if (state->ptr < state->beginning)
01391                 RETURN_FAILURE;
01392             DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
01393             RETURN_ON_FAILURE(ret);
01394             ctx->pattern += ctx->pattern[0];
01395             break;
01396 
01397         case SRE_OP_ASSERT_NOT:
01398             /* assert not subpattern */
01399             /* <ASSERT_NOT> <skip> <back> <pattern> */
01400             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
01401                    ctx->ptr, ctx->pattern[1]));
01402             state->ptr = ctx->ptr - ctx->pattern[1];
01403             if (state->ptr >= state->beginning) {
01404                 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
01405                 if (ret) {
01406                     RETURN_ON_ERROR(ret);
01407                     RETURN_FAILURE;
01408                 }
01409             }
01410             ctx->pattern += ctx->pattern[0];
01411             break;
01412 
01413         case SRE_OP_FAILURE:
01414             /* immediate failure */
01415             TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
01416             RETURN_FAILURE;
01417 
01418         default:
01419             TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
01420                    ctx->pattern[-1]));
01421             RETURN_ERROR(SRE_ERROR_ILLEGAL);
01422         }
01423     }
01424 
01425 exit:
01426     ctx_pos = ctx->last_ctx_pos;
01427     jump = ctx->jump;
01428     DATA_POP_DISCARD(ctx);
01429     if (ctx_pos == -1)
01430         return ret;
01431     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
01432 
01433     switch (jump) {
01434         case JUMP_MAX_UNTIL_2:
01435             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
01436             goto jump_max_until_2;
01437         case JUMP_MAX_UNTIL_3:
01438             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
01439             goto jump_max_until_3;
01440         case JUMP_MIN_UNTIL_2:
01441             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
01442             goto jump_min_until_2;
01443         case JUMP_MIN_UNTIL_3:
01444             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
01445             goto jump_min_until_3;
01446         case JUMP_BRANCH:
01447             TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
01448             goto jump_branch;
01449         case JUMP_MAX_UNTIL_1:
01450             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
01451             goto jump_max_until_1;
01452         case JUMP_MIN_UNTIL_1:
01453             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
01454             goto jump_min_until_1;
01455         case JUMP_REPEAT:
01456             TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
01457             goto jump_repeat;
01458         case JUMP_REPEAT_ONE_1:
01459             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
01460             goto jump_repeat_one_1;
01461         case JUMP_REPEAT_ONE_2:
01462             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
01463             goto jump_repeat_one_2;
01464         case JUMP_MIN_REPEAT_ONE:
01465             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
01466             goto jump_min_repeat_one;
01467         case JUMP_ASSERT:
01468             TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
01469             goto jump_assert;
01470         case JUMP_ASSERT_NOT:
01471             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
01472             goto jump_assert_not;
01473         case JUMP_NONE:
01474             TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
01475             break;
01476     }
01477 
01478     return ret; /* should never get here */
01479 }
01480 
01481 LOCAL(Py_ssize_t)
01482 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
01483 {
01484     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
01485     SRE_CHAR* end = (SRE_CHAR *)state->end;
01486     Py_ssize_t status = 0;
01487     Py_ssize_t prefix_len = 0;
01488     Py_ssize_t prefix_skip = 0;
01489     SRE_CODE* prefix = NULL;
01490     SRE_CODE* charset = NULL;
01491     SRE_CODE* overlap = NULL;
01492     int flags = 0;
01493 
01494     if (pattern[0] == SRE_OP_INFO) {
01495         /* optimization info block */
01496         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
01497 
01498         flags = pattern[2];
01499 
01500         if (pattern[3] > 1) {
01501             /* adjust end point (but make sure we leave at least one
01502                character in there, so literal search will work) */
01503             end -= pattern[3]-1;
01504             if (end <= ptr)
01505                 end = ptr+1;
01506         }
01507 
01508         if (flags & SRE_INFO_PREFIX) {
01509             /* pattern starts with a known prefix */
01510             /* <length> <skip> <prefix data> <overlap data> */
01511             prefix_len = pattern[5];
01512             prefix_skip = pattern[6];
01513             prefix = pattern + 7;
01514             overlap = prefix + prefix_len - 1;
01515         } else if (flags & SRE_INFO_CHARSET)
01516             /* pattern starts with a character from a known set */
01517             /* <charset> */
01518             charset = pattern + 5;
01519 
01520         pattern += 1 + pattern[1];
01521     }
01522 
01523     TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
01524     TRACE(("charset = %p\n", charset));
01525 
01526 #if defined(USE_FAST_SEARCH)
01527     if (prefix_len > 1) {
01528         /* pattern starts with a known prefix.  use the overlap
01529            table to skip forward as fast as we possibly can */
01530         Py_ssize_t i = 0;
01531         end = (SRE_CHAR *)state->end;
01532         while (ptr < end) {
01533             for (;;) {
01534                 if ((SRE_CODE) ptr[0] != prefix[i]) {
01535                     if (!i)
01536                         break;
01537                     else
01538                         i = overlap[i];
01539                 } else {
01540                     if (++i == prefix_len) {
01541                         /* found a potential match */
01542                         TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
01543                         state->start = ptr + 1 - prefix_len;
01544                         state->ptr = ptr + 1 - prefix_len + prefix_skip;
01545                         if (flags & SRE_INFO_LITERAL)
01546                             return 1; /* we got all of it */
01547                         status = SRE_MATCH(state, pattern + 2*prefix_skip);
01548                         if (status != 0)
01549                             return status;
01550                         /* close but no cigar -- try again */
01551                         i = overlap[i];
01552                     }
01553                     break;
01554                 }
01555             }
01556             ptr++;
01557         }
01558         return 0;
01559     }
01560 #endif
01561 
01562     if (pattern[0] == SRE_OP_LITERAL) {
01563         /* pattern starts with a literal character.  this is used
01564            for short prefixes, and if fast search is disabled */
01565         SRE_CODE chr = pattern[1];
01566         end = (SRE_CHAR *)state->end;
01567         for (;;) {
01568             while (ptr < end && (SRE_CODE) ptr[0] != chr)
01569                 ptr++;
01570             if (ptr >= end)
01571                 return 0;
01572             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
01573             state->start = ptr;
01574             state->ptr = ++ptr;
01575             if (flags & SRE_INFO_LITERAL)
01576                 return 1; /* we got all of it */
01577             status = SRE_MATCH(state, pattern + 2);
01578             if (status != 0)
01579                 break;
01580         }
01581     } else if (charset) {
01582         /* pattern starts with a character from a known set */
01583         end = (SRE_CHAR *)state->end;
01584         for (;;) {
01585             while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
01586                 ptr++;
01587             if (ptr >= end)
01588                 return 0;
01589             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
01590             state->start = ptr;
01591             state->ptr = ptr;
01592             status = SRE_MATCH(state, pattern);
01593             if (status != 0)
01594                 break;
01595             ptr++;
01596         }
01597     } else
01598         /* general case */
01599         while (ptr <= end) {
01600             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
01601             state->start = state->ptr = ptr++;
01602             status = SRE_MATCH(state, pattern);
01603             if (status != 0)
01604                 break;
01605         }
01606 
01607     return status;
01608 }
01609 
01610 LOCAL(int)
01611 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
01612 {
01613     /* check if given string is a literal template (i.e. no escapes) */
01614     while (len-- > 0)
01615         if (*ptr++ == '\\')
01616             return 0;
01617     return 1;
01618 }
01619 
01620 #if !defined(SRE_RECURSIVE)
01621 
01622 /* -------------------------------------------------------------------- */
01623 /* factories and destructors */
01624 
01625 /* see sre.h for object declarations */
01626 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
01627 static PyObject*pattern_scanner(PatternObject*, PyObject*);
01628 
01629 static PyObject *
01630 sre_codesize(PyObject* self, PyObject *unused)
01631 {
01632     return Py_BuildValue("l", sizeof(SRE_CODE));
01633 }
01634 
01635 static PyObject *
01636 sre_getlower(PyObject* self, PyObject* args)
01637 {
01638     int character, flags;
01639     if (!PyArg_ParseTuple(args, "ii", &character, &flags))
01640         return NULL;
01641     if (flags & SRE_FLAG_LOCALE)
01642         return Py_BuildValue("i", sre_lower_locale(character));
01643     if (flags & SRE_FLAG_UNICODE)
01644 #if defined(HAVE_UNICODE)
01645         return Py_BuildValue("i", sre_lower_unicode(character));
01646 #else
01647         return Py_BuildValue("i", sre_lower_locale(character));
01648 #endif
01649     return Py_BuildValue("i", sre_lower(character));
01650 }
01651 
01652 LOCAL(void)
01653 state_reset(SRE_STATE* state)
01654 {
01655     /* FIXME: dynamic! */
01656     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
01657 
01658     state->lastmark = -1;
01659     state->lastindex = -1;
01660 
01661     state->repeat = NULL;
01662 
01663     data_stack_dealloc(state);
01664 }
01665 
01666 static void*
01667 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
01668 {
01669     /* given a python object, return a data pointer, a length (in
01670        characters), and a character size.  return NULL if the object
01671        is not a string (or not compatible) */
01672 
01673     PyBufferProcs *buffer;
01674     Py_ssize_t size, bytes;
01675     int charsize;
01676     void* ptr;
01677     Py_buffer view;
01678 
01679     /* Unicode objects do not support the buffer API. So, get the data
01680        directly instead. */
01681     if (PyUnicode_Check(string)) {
01682         ptr = (void *)PyUnicode_AS_DATA(string);
01683         *p_length = PyUnicode_GET_SIZE(string);
01684         *p_charsize = sizeof(Py_UNICODE);
01685         return ptr;
01686     }
01687 
01688     /* get pointer to string buffer */
01689     view.len = -1;
01690     buffer = Py_TYPE(string)->tp_as_buffer;
01691     if (!buffer || !buffer->bf_getbuffer ||
01692         (*buffer->bf_getbuffer)(string, &view, PyBUF_SIMPLE) < 0) {
01693             PyErr_SetString(PyExc_TypeError, "expected string or buffer");
01694             return NULL;
01695     }
01696 
01697     /* determine buffer size */
01698     bytes = view.len;
01699     ptr = view.buf;
01700 
01701     /* Release the buffer immediately --- possibly dangerous
01702        but doing something else would require some re-factoring
01703     */
01704     PyBuffer_Release(&view);
01705 
01706     if (bytes < 0) {
01707         PyErr_SetString(PyExc_TypeError, "buffer has negative size");
01708         return NULL;
01709     }
01710 
01711     /* determine character size */
01712     size = PyObject_Size(string);
01713 
01714     if (PyBytes_Check(string) || bytes == size)
01715         charsize = 1;
01716 #if defined(HAVE_UNICODE)
01717     else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
01718         charsize = sizeof(Py_UNICODE);
01719 #endif
01720     else {
01721         PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
01722         return NULL;
01723     }
01724 
01725     *p_length = size;
01726     *p_charsize = charsize;
01727 
01728     if (ptr == NULL) {
01729             PyErr_SetString(PyExc_ValueError,
01730                             "Buffer is NULL");
01731     }
01732     return ptr;
01733 }
01734 
01735 LOCAL(PyObject*)
01736 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
01737            Py_ssize_t start, Py_ssize_t end)
01738 {
01739     /* prepare state object */
01740 
01741     Py_ssize_t length;
01742     int charsize;
01743     void* ptr;
01744 
01745     memset(state, 0, sizeof(SRE_STATE));
01746 
01747     state->lastmark = -1;
01748     state->lastindex = -1;
01749 
01750     ptr = getstring(string, &length, &charsize);
01751     if (!ptr)
01752         return NULL;
01753 
01754        if (charsize == 1 && pattern->charsize > 1) {
01755               PyErr_SetString(PyExc_TypeError,
01756                      "can't use a string pattern on a bytes-like object");
01757               return NULL;
01758        }
01759        if (charsize > 1 && pattern->charsize == 1) {
01760               PyErr_SetString(PyExc_TypeError,
01761                      "can't use a bytes pattern on a string-like object");
01762               return NULL;
01763        }
01764 
01765     /* adjust boundaries */
01766     if (start < 0)
01767         start = 0;
01768     else if (start > length)
01769         start = length;
01770 
01771     if (end < 0)
01772         end = 0;
01773     else if (end > length)
01774         end = length;
01775 
01776     state->charsize = charsize;
01777 
01778     state->beginning = ptr;
01779 
01780     state->start = (void*) ((char*) ptr + start * state->charsize);
01781     state->end = (void*) ((char*) ptr + end * state->charsize);
01782 
01783     Py_INCREF(string);
01784     state->string = string;
01785     state->pos = start;
01786     state->endpos = end;
01787 
01788     if (pattern->flags & SRE_FLAG_LOCALE)
01789         state->lower = sre_lower_locale;
01790     else if (pattern->flags & SRE_FLAG_UNICODE)
01791 #if defined(HAVE_UNICODE)
01792         state->lower = sre_lower_unicode;
01793 #else
01794         state->lower = sre_lower_locale;
01795 #endif
01796     else
01797         state->lower = sre_lower;
01798 
01799     return string;
01800 }
01801 
01802 LOCAL(void)
01803 state_fini(SRE_STATE* state)
01804 {
01805     Py_XDECREF(state->string);
01806     data_stack_dealloc(state);
01807 }
01808 
01809 /* calculate offset from start of string */
01810 #define STATE_OFFSET(state, member)\
01811     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
01812 
01813 LOCAL(PyObject*)
01814 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
01815 {
01816     Py_ssize_t i, j;
01817 
01818     index = (index - 1) * 2;
01819 
01820     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
01821         if (empty)
01822             /* want empty string */
01823             i = j = 0;
01824         else {
01825             Py_INCREF(Py_None);
01826             return Py_None;
01827         }
01828     } else {
01829         i = STATE_OFFSET(state, state->mark[index]);
01830         j = STATE_OFFSET(state, state->mark[index+1]);
01831     }
01832 
01833     return PySequence_GetSlice(string, i, j);
01834 }
01835 
01836 static void
01837 pattern_error(int status)
01838 {
01839     switch (status) {
01840     case SRE_ERROR_RECURSION_LIMIT:
01841         PyErr_SetString(
01842             PyExc_RuntimeError,
01843             "maximum recursion limit exceeded"
01844             );
01845         break;
01846     case SRE_ERROR_MEMORY:
01847         PyErr_NoMemory();
01848         break;
01849     case SRE_ERROR_INTERRUPTED:
01850     /* An exception has already been raised, so let it fly */
01851         break;
01852     default:
01853         /* other error codes indicate compiler/engine bugs */
01854         PyErr_SetString(
01855             PyExc_RuntimeError,
01856             "internal error in regular expression engine"
01857             );
01858     }
01859 }
01860 
01861 static void
01862 pattern_dealloc(PatternObject* self)
01863 {
01864     if (self->weakreflist != NULL)
01865         PyObject_ClearWeakRefs((PyObject *) self);
01866     Py_XDECREF(self->pattern);
01867     Py_XDECREF(self->groupindex);
01868     Py_XDECREF(self->indexgroup);
01869     PyObject_DEL(self);
01870 }
01871 
01872 static PyObject*
01873 pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
01874 {
01875     SRE_STATE state;
01876     int status;
01877 
01878     PyObject* string;
01879     Py_ssize_t start = 0;
01880     Py_ssize_t end = PY_SSIZE_T_MAX;
01881     static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
01882     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
01883                                      &string, &start, &end))
01884         return NULL;
01885 
01886     string = state_init(&state, self, string, start, end);
01887     if (!string)
01888         return NULL;
01889 
01890     state.ptr = state.start;
01891 
01892     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
01893 
01894     if (state.charsize == 1) {
01895         status = sre_match(&state, PatternObject_GetCode(self));
01896     } else {
01897 #if defined(HAVE_UNICODE)
01898         status = sre_umatch(&state, PatternObject_GetCode(self));
01899 #endif
01900     }
01901 
01902     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
01903     if (PyErr_Occurred())
01904         return NULL;
01905 
01906     state_fini(&state);
01907 
01908     return pattern_new_match(self, &state, status);
01909 }
01910 
01911 static PyObject*
01912 pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
01913 {
01914     SRE_STATE state;
01915     int status;
01916 
01917     PyObject* string;
01918     Py_ssize_t start = 0;
01919     Py_ssize_t end = PY_SSIZE_T_MAX;
01920     static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
01921     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
01922                                      &string, &start, &end))
01923         return NULL;
01924 
01925     string = state_init(&state, self, string, start, end);
01926     if (!string)
01927         return NULL;
01928 
01929     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
01930 
01931     if (state.charsize == 1) {
01932         status = sre_search(&state, PatternObject_GetCode(self));
01933     } else {
01934 #if defined(HAVE_UNICODE)
01935         status = sre_usearch(&state, PatternObject_GetCode(self));
01936 #endif
01937     }
01938 
01939     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
01940 
01941     state_fini(&state);
01942 
01943     if (PyErr_Occurred())
01944         return NULL;
01945 
01946     return pattern_new_match(self, &state, status);
01947 }
01948 
01949 static PyObject*
01950 call(char* module, char* function, PyObject* args)
01951 {
01952     PyObject* name;
01953     PyObject* mod;
01954     PyObject* func;
01955     PyObject* result;
01956 
01957     if (!args)
01958         return NULL;
01959     name = PyUnicode_FromString(module);
01960     if (!name)
01961         return NULL;
01962     mod = PyImport_Import(name);
01963     Py_DECREF(name);
01964     if (!mod)
01965         return NULL;
01966     func = PyObject_GetAttrString(mod, function);
01967     Py_DECREF(mod);
01968     if (!func)
01969         return NULL;
01970     result = PyObject_CallObject(func, args);
01971     Py_DECREF(func);
01972     Py_DECREF(args);
01973     return result;
01974 }
01975 
01976 #ifdef USE_BUILTIN_COPY
01977 static int
01978 deepcopy(PyObject** object, PyObject* memo)
01979 {
01980     PyObject* copy;
01981 
01982     copy = call(
01983         "copy", "deepcopy",
01984         PyTuple_Pack(2, *object, memo)
01985         );
01986     if (!copy)
01987         return 0;
01988 
01989     Py_DECREF(*object);
01990     *object = copy;
01991 
01992     return 1; /* success */
01993 }
01994 #endif
01995 
01996 static PyObject*
01997 join_list(PyObject* list, PyObject* string)
01998 {
01999     /* join list elements */
02000 
02001     PyObject* joiner;
02002 #if PY_VERSION_HEX >= 0x01060000
02003     PyObject* function;
02004     PyObject* args;
02005 #endif
02006     PyObject* result;
02007 
02008     joiner = PySequence_GetSlice(string, 0, 0);
02009     if (!joiner)
02010         return NULL;
02011 
02012     if (PyList_GET_SIZE(list) == 0) {
02013         Py_DECREF(list);
02014         return joiner;
02015     }
02016 
02017 #if PY_VERSION_HEX >= 0x01060000
02018     function = PyObject_GetAttrString(joiner, "join");
02019     if (!function) {
02020         Py_DECREF(joiner);
02021         return NULL;
02022     }
02023     args = PyTuple_New(1);
02024     if (!args) {
02025         Py_DECREF(function);
02026         Py_DECREF(joiner);
02027         return NULL;
02028     }
02029     PyTuple_SET_ITEM(args, 0, list);
02030     result = PyObject_CallObject(function, args);
02031     Py_DECREF(args); /* also removes list */
02032     Py_DECREF(function);
02033 #else
02034     result = call(
02035         "string", "join",
02036         PyTuple_Pack(2, list, joiner)
02037         );
02038 #endif
02039     Py_DECREF(joiner);
02040 
02041     return result;
02042 }
02043 
02044 static PyObject*
02045 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
02046 {
02047     SRE_STATE state;
02048     PyObject* list;
02049     int status;
02050     Py_ssize_t i, b, e;
02051 
02052     PyObject* string;
02053     Py_ssize_t start = 0;
02054     Py_ssize_t end = PY_SSIZE_T_MAX;
02055     static char* kwlist[] = { "source", "pos", "endpos", NULL };
02056     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
02057                                      &string, &start, &end))
02058         return NULL;
02059 
02060     string = state_init(&state, self, string, start, end);
02061     if (!string)
02062         return NULL;
02063 
02064     list = PyList_New(0);
02065     if (!list) {
02066         state_fini(&state);
02067         return NULL;
02068     }
02069 
02070     while (state.start <= state.end) {
02071 
02072         PyObject* item;
02073 
02074         state_reset(&state);
02075 
02076         state.ptr = state.start;
02077 
02078         if (state.charsize == 1) {
02079             status = sre_search(&state, PatternObject_GetCode(self));
02080         } else {
02081 #if defined(HAVE_UNICODE)
02082             status = sre_usearch(&state, PatternObject_GetCode(self));
02083 #endif
02084         }
02085 
02086        if (PyErr_Occurred())
02087            goto error;
02088 
02089         if (status <= 0) {
02090             if (status == 0)
02091                 break;
02092             pattern_error(status);
02093             goto error;
02094         }
02095 
02096         /* don't bother to build a match object */
02097         switch (self->groups) {
02098         case 0:
02099             b = STATE_OFFSET(&state, state.start);
02100             e = STATE_OFFSET(&state, state.ptr);
02101             item = PySequence_GetSlice(string, b, e);
02102             if (!item)
02103                 goto error;
02104             break;
02105         case 1:
02106             item = state_getslice(&state, 1, string, 1);
02107             if (!item)
02108                 goto error;
02109             break;
02110         default:
02111             item = PyTuple_New(self->groups);
02112             if (!item)
02113                 goto error;
02114             for (i = 0; i < self->groups; i++) {
02115                 PyObject* o = state_getslice(&state, i+1, string, 1);
02116                 if (!o) {
02117                     Py_DECREF(item);
02118                     goto error;
02119                 }
02120                 PyTuple_SET_ITEM(item, i, o);
02121             }
02122             break;
02123         }
02124 
02125         status = PyList_Append(list, item);
02126         Py_DECREF(item);
02127         if (status < 0)
02128             goto error;
02129 
02130         if (state.ptr == state.start)
02131             state.start = (void*) ((char*) state.ptr + state.charsize);
02132         else
02133             state.start = state.ptr;
02134 
02135     }
02136 
02137     state_fini(&state);
02138     return list;
02139 
02140 error:
02141     Py_DECREF(list);
02142     state_fini(&state);
02143     return NULL;
02144 
02145 }
02146 
02147 #if PY_VERSION_HEX >= 0x02020000
02148 static PyObject*
02149 pattern_finditer(PatternObject* pattern, PyObject* args)
02150 {
02151     PyObject* scanner;
02152     PyObject* search;
02153     PyObject* iterator;
02154 
02155     scanner = pattern_scanner(pattern, args);
02156     if (!scanner)
02157         return NULL;
02158 
02159     search = PyObject_GetAttrString(scanner, "search");
02160     Py_DECREF(scanner);
02161     if (!search)
02162         return NULL;
02163 
02164     iterator = PyCallIter_New(search, Py_None);
02165     Py_DECREF(search);
02166 
02167     return iterator;
02168 }
02169 #endif
02170 
02171 static PyObject*
02172 pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
02173 {
02174     SRE_STATE state;
02175     PyObject* list;
02176     PyObject* item;
02177     int status;
02178     Py_ssize_t n;
02179     Py_ssize_t i;
02180     void* last;
02181 
02182     PyObject* string;
02183     Py_ssize_t maxsplit = 0;
02184     static char* kwlist[] = { "source", "maxsplit", NULL };
02185     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
02186                                      &string, &maxsplit))
02187         return NULL;
02188 
02189     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
02190     if (!string)
02191         return NULL;
02192 
02193     list = PyList_New(0);
02194     if (!list) {
02195         state_fini(&state);
02196         return NULL;
02197     }
02198 
02199     n = 0;
02200     last = state.start;
02201 
02202     while (!maxsplit || n < maxsplit) {
02203 
02204         state_reset(&state);
02205 
02206         state.ptr = state.start;
02207 
02208         if (state.charsize == 1) {
02209             status = sre_search(&state, PatternObject_GetCode(self));
02210         } else {
02211 #if defined(HAVE_UNICODE)
02212             status = sre_usearch(&state, PatternObject_GetCode(self));
02213 #endif
02214         }
02215 
02216        if (PyErr_Occurred())
02217            goto error;
02218 
02219         if (status <= 0) {
02220             if (status == 0)
02221                 break;
02222             pattern_error(status);
02223             goto error;
02224         }
02225 
02226         if (state.start == state.ptr) {
02227             if (last == state.end)
02228                 break;
02229             /* skip one character */
02230             state.start = (void*) ((char*) state.ptr + state.charsize);
02231             continue;
02232         }
02233 
02234         /* get segment before this match */
02235         item = PySequence_GetSlice(
02236             string, STATE_OFFSET(&state, last),
02237             STATE_OFFSET(&state, state.start)
02238             );
02239         if (!item)
02240             goto error;
02241         status = PyList_Append(list, item);
02242         Py_DECREF(item);
02243         if (status < 0)
02244             goto error;
02245 
02246         /* add groups (if any) */
02247         for (i = 0; i < self->groups; i++) {
02248             item = state_getslice(&state, i+1, string, 0);
02249             if (!item)
02250                 goto error;
02251             status = PyList_Append(list, item);
02252             Py_DECREF(item);
02253             if (status < 0)
02254                 goto error;
02255         }
02256 
02257         n = n + 1;
02258 
02259         last = state.start = state.ptr;
02260 
02261     }
02262 
02263     /* get segment following last match (even if empty) */
02264     item = PySequence_GetSlice(
02265         string, STATE_OFFSET(&state, last), state.endpos
02266         );
02267     if (!item)
02268         goto error;
02269     status = PyList_Append(list, item);
02270     Py_DECREF(item);
02271     if (status < 0)
02272         goto error;
02273 
02274     state_fini(&state);
02275     return list;
02276 
02277 error:
02278     Py_DECREF(list);
02279     state_fini(&state);
02280     return NULL;
02281 
02282 }
02283 
02284 static PyObject*
02285 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
02286              Py_ssize_t count, Py_ssize_t subn)
02287 {
02288     SRE_STATE state;
02289     PyObject* list;
02290     PyObject* item;
02291     PyObject* filter;
02292     PyObject* args;
02293     PyObject* match;
02294     void* ptr;
02295     int status;
02296     Py_ssize_t n;
02297     Py_ssize_t i, b, e;
02298     int bint;
02299     int filter_is_callable;
02300 
02301     if (PyCallable_Check(ptemplate)) {
02302         /* sub/subn takes either a function or a template */
02303         filter = ptemplate;
02304         Py_INCREF(filter);
02305         filter_is_callable = 1;
02306     } else {
02307         /* if not callable, check if it's a literal string */
02308         int literal;
02309         ptr = getstring(ptemplate, &n, &bint);
02310         b = bint;
02311         if (ptr) {
02312             if (b == 1) {
02313                   literal = sre_literal_template((unsigned char *)ptr, n);
02314             } else {
02315 #if defined(HAVE_UNICODE)
02316                   literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
02317 #endif
02318             }
02319         } else {
02320             PyErr_Clear();
02321             literal = 0;
02322         }
02323         if (literal) {
02324             filter = ptemplate;
02325             Py_INCREF(filter);
02326             filter_is_callable = 0;
02327         } else {
02328             /* not a literal; hand it over to the template compiler */
02329             filter = call(
02330                 SRE_PY_MODULE, "_subx",
02331                 PyTuple_Pack(2, self, ptemplate)
02332                 );
02333             if (!filter)
02334                 return NULL;
02335             filter_is_callable = PyCallable_Check(filter);
02336         }
02337     }
02338 
02339     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
02340     if (!string) {
02341         Py_DECREF(filter);
02342         return NULL;
02343     }
02344 
02345     list = PyList_New(0);
02346     if (!list) {
02347         Py_DECREF(filter);
02348         state_fini(&state);
02349         return NULL;
02350     }
02351 
02352     n = i = 0;
02353 
02354     while (!count || n < count) {
02355 
02356         state_reset(&state);
02357 
02358         state.ptr = state.start;
02359 
02360         if (state.charsize == 1) {
02361             status = sre_search(&state, PatternObject_GetCode(self));
02362         } else {
02363 #if defined(HAVE_UNICODE)
02364             status = sre_usearch(&state, PatternObject_GetCode(self));
02365 #endif
02366         }
02367 
02368        if (PyErr_Occurred())
02369            goto error;
02370 
02371         if (status <= 0) {
02372             if (status == 0)
02373                 break;
02374             pattern_error(status);
02375             goto error;
02376         }
02377 
02378         b = STATE_OFFSET(&state, state.start);
02379         e = STATE_OFFSET(&state, state.ptr);
02380 
02381         if (i < b) {
02382             /* get segment before this match */
02383             item = PySequence_GetSlice(string, i, b);
02384             if (!item)
02385                 goto error;
02386             status = PyList_Append(list, item);
02387             Py_DECREF(item);
02388             if (status < 0)
02389                 goto error;
02390 
02391         } else if (i == b && i == e && n > 0)
02392             /* ignore empty match on latest position */
02393             goto next;
02394 
02395         if (filter_is_callable) {
02396             /* pass match object through filter */
02397             match = pattern_new_match(self, &state, 1);
02398             if (!match)
02399                 goto error;
02400             args = PyTuple_Pack(1, match);
02401             if (!args) {
02402                 Py_DECREF(match);
02403                 goto error;
02404             }
02405             item = PyObject_CallObject(filter, args);
02406             Py_DECREF(args);
02407             Py_DECREF(match);
02408             if (!item)
02409                 goto error;
02410         } else {
02411             /* filter is literal string */
02412             item = filter;
02413             Py_INCREF(item);
02414         }
02415 
02416         /* add to list */
02417         if (item != Py_None) {
02418             status = PyList_Append(list, item);
02419             Py_DECREF(item);
02420             if (status < 0)
02421                 goto error;
02422         }
02423 
02424         i = e;
02425         n = n + 1;
02426 
02427 next:
02428         /* move on */
02429         if (state.ptr == state.start)
02430             state.start = (void*) ((char*) state.ptr + state.charsize);
02431         else
02432             state.start = state.ptr;
02433 
02434     }
02435 
02436     /* get segment following last match */
02437     if (i < state.endpos) {
02438         item = PySequence_GetSlice(string, i, state.endpos);
02439         if (!item)
02440             goto error;
02441         status = PyList_Append(list, item);
02442         Py_DECREF(item);
02443         if (status < 0)
02444             goto error;
02445     }
02446 
02447     state_fini(&state);
02448 
02449     Py_DECREF(filter);
02450 
02451     /* convert list to single string (also removes list) */
02452     item = join_list(list, string);
02453 
02454     if (!item)
02455         return NULL;
02456 
02457     if (subn)
02458         return Py_BuildValue("Ni", item, n);
02459 
02460     return item;
02461 
02462 error:
02463     Py_DECREF(list);
02464     state_fini(&state);
02465     Py_DECREF(filter);
02466     return NULL;
02467 
02468 }
02469 
02470 static PyObject*
02471 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
02472 {
02473     PyObject* ptemplate;
02474     PyObject* string;
02475     Py_ssize_t count = 0;
02476     static char* kwlist[] = { "repl", "string", "count", NULL };
02477     if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
02478                                      &ptemplate, &string, &count))
02479         return NULL;
02480 
02481     return pattern_subx(self, ptemplate, string, count, 0);
02482 }
02483 
02484 static PyObject*
02485 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
02486 {
02487     PyObject* ptemplate;
02488     PyObject* string;
02489     Py_ssize_t count = 0;
02490     static char* kwlist[] = { "repl", "string", "count", NULL };
02491     if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
02492                                      &ptemplate, &string, &count))
02493         return NULL;
02494 
02495     return pattern_subx(self, ptemplate, string, count, 1);
02496 }
02497 
02498 static PyObject*
02499 pattern_copy(PatternObject* self, PyObject *unused)
02500 {
02501 #ifdef USE_BUILTIN_COPY
02502     PatternObject* copy;
02503     int offset;
02504 
02505     copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
02506     if (!copy)
02507         return NULL;
02508 
02509     offset = offsetof(PatternObject, groups);
02510 
02511     Py_XINCREF(self->groupindex);
02512     Py_XINCREF(self->indexgroup);
02513     Py_XINCREF(self->pattern);
02514 
02515     memcpy((char*) copy + offset, (char*) self + offset,
02516            sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
02517     copy->weakreflist = NULL;
02518 
02519     return (PyObject*) copy;
02520 #else
02521     PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
02522     return NULL;
02523 #endif
02524 }
02525 
02526 static PyObject*
02527 pattern_deepcopy(PatternObject* self, PyObject* memo)
02528 {
02529 #ifdef USE_BUILTIN_COPY
02530     PatternObject* copy;
02531 
02532     copy = (PatternObject*) pattern_copy(self);
02533     if (!copy)
02534         return NULL;
02535 
02536     if (!deepcopy(&copy->groupindex, memo) ||
02537         !deepcopy(&copy->indexgroup, memo) ||
02538         !deepcopy(&copy->pattern, memo)) {
02539         Py_DECREF(copy);
02540         return NULL;
02541     }
02542 
02543 #else
02544     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
02545     return NULL;
02546 #endif
02547 }
02548 
02549 PyDoc_STRVAR(pattern_match_doc,
02550 "match(string[, pos[, endpos]]) --> match object or None.\n\
02551     Matches zero or more characters at the beginning of the string");
02552 
02553 PyDoc_STRVAR(pattern_search_doc,
02554 "search(string[, pos[, endpos]]) --> match object or None.\n\
02555     Scan through string looking for a match, and return a corresponding\n\
02556     MatchObject instance. Return None if no position in the string matches.");
02557 
02558 PyDoc_STRVAR(pattern_split_doc,
02559 "split(string[, maxsplit = 0])  --> list.\n\
02560     Split string by the occurrences of pattern.");
02561 
02562 PyDoc_STRVAR(pattern_findall_doc,
02563 "findall(string[, pos[, endpos]]) --> list.\n\
02564    Return a list of all non-overlapping matches of pattern in string.");
02565 
02566 PyDoc_STRVAR(pattern_finditer_doc,
02567 "finditer(string[, pos[, endpos]]) --> iterator.\n\
02568     Return an iterator over all non-overlapping matches for the \n\
02569     RE pattern in string. For each match, the iterator returns a\n\
02570     match object.");
02571 
02572 PyDoc_STRVAR(pattern_sub_doc,
02573 "sub(repl, string[, count = 0]) --> newstring\n\
02574     Return the string obtained by replacing the leftmost non-overlapping\n\
02575     occurrences of pattern in string by the replacement repl.");
02576 
02577 PyDoc_STRVAR(pattern_subn_doc,
02578 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
02579     Return the tuple (new_string, number_of_subs_made) found by replacing\n\
02580     the leftmost non-overlapping occurrences of pattern with the\n\
02581     replacement repl.");
02582 
02583 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
02584 
02585 static PyMethodDef pattern_methods[] = {
02586     {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
02587        pattern_match_doc},
02588     {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
02589        pattern_search_doc},
02590     {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
02591        pattern_sub_doc},
02592     {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
02593        pattern_subn_doc},
02594     {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
02595        pattern_split_doc},
02596     {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
02597        pattern_findall_doc},
02598 #if PY_VERSION_HEX >= 0x02020000
02599     {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
02600        pattern_finditer_doc},
02601 #endif
02602     {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
02603     {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
02604     {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
02605     {NULL, NULL}
02606 };
02607 
02608 #define PAT_OFF(x) offsetof(PatternObject, x)
02609 static PyMemberDef pattern_members[] = {
02610     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY},
02611     {"flags",      T_INT,       PAT_OFF(flags),         READONLY},
02612     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY},
02613     {"groupindex", T_OBJECT,    PAT_OFF(groupindex),    READONLY},
02614     {NULL}  /* Sentinel */
02615 };
02616 
02617 static PyTypeObject Pattern_Type = {
02618     PyVarObject_HEAD_INIT(NULL, 0)
02619     "_" SRE_MODULE ".SRE_Pattern",
02620     sizeof(PatternObject), sizeof(SRE_CODE),
02621     (destructor)pattern_dealloc,   /* tp_dealloc */
02622     0,                             /* tp_print */
02623     0,                             /* tp_getattr */
02624     0,                             /* tp_setattr */
02625     0,                             /* tp_reserved */
02626     0,                             /* tp_repr */
02627     0,                             /* tp_as_number */
02628     0,                             /* tp_as_sequence */
02629     0,                             /* tp_as_mapping */
02630     0,                             /* tp_hash */
02631     0,                             /* tp_call */
02632     0,                             /* tp_str */
02633     0,                             /* tp_getattro */
02634     0,                             /* tp_setattro */
02635     0,                             /* tp_as_buffer */
02636     Py_TPFLAGS_DEFAULT,                   /* tp_flags */
02637     pattern_doc,                   /* tp_doc */
02638     0,                             /* tp_traverse */
02639     0,                             /* tp_clear */
02640     0,                             /* tp_richcompare */
02641     offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
02642     0,                             /* tp_iter */
02643     0,                             /* tp_iternext */
02644     pattern_methods,               /* tp_methods */
02645     pattern_members,               /* tp_members */
02646 };
02647 
02648 static int _validate(PatternObject *self); /* Forward */
02649 
02650 static PyObject *
02651 _compile(PyObject* self_, PyObject* args)
02652 {
02653     /* "compile" pattern descriptor to pattern object */
02654 
02655     PatternObject* self;
02656     Py_ssize_t i, n;
02657 
02658     PyObject* pattern;
02659     int flags = 0;
02660     PyObject* code;
02661     Py_ssize_t groups = 0;
02662     PyObject* groupindex = NULL;
02663     PyObject* indexgroup = NULL;
02664     if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
02665                           &PyList_Type, &code, &groups,
02666                           &groupindex, &indexgroup))
02667         return NULL;
02668 
02669     n = PyList_GET_SIZE(code);
02670     /* coverity[ampersand_in_size] */
02671     self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
02672     if (!self)
02673         return NULL;
02674     self->weakreflist = NULL;
02675     self->pattern = NULL;
02676     self->groupindex = NULL;
02677     self->indexgroup = NULL;
02678 
02679     self->codesize = n;
02680 
02681     for (i = 0; i < n; i++) {
02682         PyObject *o = PyList_GET_ITEM(code, i);
02683         unsigned long value = PyLong_AsUnsignedLong(o);
02684         self->code[i] = (SRE_CODE) value;
02685         if ((unsigned long) self->code[i] != value) {
02686             PyErr_SetString(PyExc_OverflowError,
02687                             "regular expression code size limit exceeded");
02688             break;
02689         }
02690     }
02691 
02692     if (PyErr_Occurred()) {
02693         Py_DECREF(self);
02694         return NULL;
02695     }
02696 
02697        if (pattern == Py_None)
02698               self->charsize = -1;
02699        else {
02700               Py_ssize_t p_length;
02701               if (!getstring(pattern, &p_length, &self->charsize)) {
02702                      Py_DECREF(self);
02703                      return NULL;
02704               }
02705        }
02706 
02707     Py_INCREF(pattern);
02708     self->pattern = pattern;
02709 
02710     self->flags = flags;
02711 
02712     self->groups = groups;
02713 
02714     Py_XINCREF(groupindex);
02715     self->groupindex = groupindex;
02716 
02717     Py_XINCREF(indexgroup);
02718     self->indexgroup = indexgroup;
02719 
02720     self->weakreflist = NULL;
02721 
02722     if (!_validate(self)) {
02723         Py_DECREF(self);
02724         return NULL;
02725     }
02726 
02727     return (PyObject*) self;
02728 }
02729 
02730 /* -------------------------------------------------------------------- */
02731 /* Code validation */
02732 
02733 /* To learn more about this code, have a look at the _compile() function in
02734    Lib/sre_compile.py.  The validation functions below checks the code array
02735    for conformance with the code patterns generated there.
02736 
02737    The nice thing about the generated code is that it is position-independent:
02738    all jumps are relative jumps forward.  Also, jumps don't cross each other:
02739    the target of a later jump is always earlier than the target of an earlier
02740    jump.  IOW, this is okay:
02741 
02742    J---------J-------T--------T
02743     \         \_____/        /
02744      \______________________/
02745 
02746    but this is not:
02747 
02748    J---------J-------T--------T
02749     \_________\_____/        /
02750                \____________/
02751 
02752    It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4
02753    bytes wide (the latter if Python is compiled for "wide" unicode support).
02754 */
02755 
02756 /* Defining this one enables tracing of the validator */
02757 #undef VVERBOSE
02758 
02759 /* Trace macro for the validator */
02760 #if defined(VVERBOSE)
02761 #define VTRACE(v) printf v
02762 #else
02763 #define VTRACE(v)
02764 #endif
02765 
02766 /* Report failure */
02767 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
02768 
02769 /* Extract opcode, argument, or skip count from code array */
02770 #define GET_OP                                          \
02771     do {                                                \
02772         VTRACE(("%p: ", code));                         \
02773         if (code >= end) FAIL;                          \
02774         op = *code++;                                   \
02775         VTRACE(("%lu (op)\n", (unsigned long)op));      \
02776     } while (0)
02777 #define GET_ARG                                         \
02778     do {                                                \
02779         VTRACE(("%p= ", code));                         \
02780         if (code >= end) FAIL;                          \
02781         arg = *code++;                                  \
02782         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
02783     } while (0)
02784 #define GET_SKIP_ADJ(adj)                               \
02785     do {                                                \
02786         VTRACE(("%p= ", code));                         \
02787         if (code >= end) FAIL;                          \
02788         skip = *code;                                   \
02789         VTRACE(("%lu (skip to %p)\n",                   \
02790                (unsigned long)skip, code+skip));        \
02791         if (code+skip-adj < code || code+skip-adj > end)\
02792             FAIL;                                       \
02793         code++;                                         \
02794     } while (0)
02795 #define GET_SKIP GET_SKIP_ADJ(0)
02796 
02797 static int
02798 _validate_charset(SRE_CODE *code, SRE_CODE *end)
02799 {
02800     /* Some variables are manipulated by the macros above */
02801     SRE_CODE op;
02802     SRE_CODE arg;
02803     SRE_CODE offset;
02804     int i;
02805 
02806     while (code < end) {
02807         GET_OP;
02808         switch (op) {
02809 
02810         case SRE_OP_NEGATE:
02811             break;
02812 
02813         case SRE_OP_LITERAL:
02814             GET_ARG;
02815             break;
02816 
02817         case SRE_OP_RANGE:
02818             GET_ARG;
02819             GET_ARG;
02820             break;
02821 
02822         case SRE_OP_CHARSET:
02823             offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
02824             if (code+offset < code || code+offset > end)
02825                 FAIL;
02826             code += offset;
02827             break;
02828 
02829         case SRE_OP_BIGCHARSET:
02830             GET_ARG; /* Number of blocks */
02831             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
02832             if (code+offset < code || code+offset > end)
02833                 FAIL;
02834             /* Make sure that each byte points to a valid block */
02835             for (i = 0; i < 256; i++) {
02836                 if (((unsigned char *)code)[i] >= arg)
02837                     FAIL;
02838             }
02839             code += offset;
02840             offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
02841             if (code+offset < code || code+offset > end)
02842                 FAIL;
02843             code += offset;
02844             break;
02845 
02846         case SRE_OP_CATEGORY:
02847             GET_ARG;
02848             switch (arg) {
02849             case SRE_CATEGORY_DIGIT:
02850             case SRE_CATEGORY_NOT_DIGIT:
02851             case SRE_CATEGORY_SPACE:
02852             case SRE_CATEGORY_NOT_SPACE:
02853             case SRE_CATEGORY_WORD:
02854             case SRE_CATEGORY_NOT_WORD:
02855             case SRE_CATEGORY_LINEBREAK:
02856             case SRE_CATEGORY_NOT_LINEBREAK:
02857             case SRE_CATEGORY_LOC_WORD:
02858             case SRE_CATEGORY_LOC_NOT_WORD:
02859             case SRE_CATEGORY_UNI_DIGIT:
02860             case SRE_CATEGORY_UNI_NOT_DIGIT:
02861             case SRE_CATEGORY_UNI_SPACE:
02862             case SRE_CATEGORY_UNI_NOT_SPACE:
02863             case SRE_CATEGORY_UNI_WORD:
02864             case SRE_CATEGORY_UNI_NOT_WORD:
02865             case SRE_CATEGORY_UNI_LINEBREAK:
02866             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
02867                 break;
02868             default:
02869                 FAIL;
02870             }
02871             break;
02872 
02873         default:
02874             FAIL;
02875 
02876         }
02877     }
02878 
02879     return 1;
02880 }
02881 
02882 static int
02883 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
02884 {
02885     /* Some variables are manipulated by the macros above */
02886     SRE_CODE op;
02887     SRE_CODE arg;
02888     SRE_CODE skip;
02889 
02890     VTRACE(("code=%p, end=%p\n", code, end));
02891 
02892     if (code > end)
02893         FAIL;
02894 
02895     while (code < end) {
02896         GET_OP;
02897         switch (op) {
02898 
02899         case SRE_OP_MARK:
02900             /* We don't check whether marks are properly nested; the
02901                sre_match() code is robust even if they don't, and the worst
02902                you can get is nonsensical match results. */
02903             GET_ARG;
02904             if (arg > 2*groups+1) {
02905                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
02906                 FAIL;
02907             }
02908             break;
02909 
02910         case SRE_OP_LITERAL:
02911         case SRE_OP_NOT_LITERAL:
02912         case SRE_OP_LITERAL_IGNORE:
02913         case SRE_OP_NOT_LITERAL_IGNORE:
02914             GET_ARG;
02915             /* The arg is just a character, nothing to check */
02916             break;
02917 
02918         case SRE_OP_SUCCESS:
02919         case SRE_OP_FAILURE:
02920             /* Nothing to check; these normally end the matching process */
02921             break;
02922 
02923         case SRE_OP_AT:
02924             GET_ARG;
02925             switch (arg) {
02926             case SRE_AT_BEGINNING:
02927             case SRE_AT_BEGINNING_STRING:
02928             case SRE_AT_BEGINNING_LINE:
02929             case SRE_AT_END:
02930             case SRE_AT_END_LINE:
02931             case SRE_AT_END_STRING:
02932             case SRE_AT_BOUNDARY:
02933             case SRE_AT_NON_BOUNDARY:
02934             case SRE_AT_LOC_BOUNDARY:
02935             case SRE_AT_LOC_NON_BOUNDARY:
02936             case SRE_AT_UNI_BOUNDARY:
02937             case SRE_AT_UNI_NON_BOUNDARY:
02938                 break;
02939             default:
02940                 FAIL;
02941             }
02942             break;
02943 
02944         case SRE_OP_ANY:
02945         case SRE_OP_ANY_ALL:
02946             /* These have no operands */
02947             break;
02948 
02949         case SRE_OP_IN:
02950         case SRE_OP_IN_IGNORE:
02951             GET_SKIP;
02952             /* Stop 1 before the end; we check the FAILURE below */
02953             if (!_validate_charset(code, code+skip-2))
02954                 FAIL;
02955             if (code[skip-2] != SRE_OP_FAILURE)
02956                 FAIL;
02957             code += skip-1;
02958             break;
02959 
02960         case SRE_OP_INFO:
02961             {
02962                 /* A minimal info field is
02963                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
02964                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
02965                    more follows. */
02966                 SRE_CODE flags, min, max, i;
02967                 SRE_CODE *newcode;
02968                 GET_SKIP;
02969                 newcode = code+skip-1;
02970                 GET_ARG; flags = arg;
02971                 GET_ARG; min = arg;
02972                 GET_ARG; max = arg;
02973                 /* Check that only valid flags are present */
02974                 if ((flags & ~(SRE_INFO_PREFIX |
02975                                SRE_INFO_LITERAL |
02976                                SRE_INFO_CHARSET)) != 0)
02977                     FAIL;
02978                 /* PREFIX and CHARSET are mutually exclusive */
02979                 if ((flags & SRE_INFO_PREFIX) &&
02980                     (flags & SRE_INFO_CHARSET))
02981                     FAIL;
02982                 /* LITERAL implies PREFIX */
02983                 if ((flags & SRE_INFO_LITERAL) &&
02984                     !(flags & SRE_INFO_PREFIX))
02985                     FAIL;
02986                 /* Validate the prefix */
02987                 if (flags & SRE_INFO_PREFIX) {
02988                     SRE_CODE prefix_len, prefix_skip;
02989                     GET_ARG; prefix_len = arg;
02990                     GET_ARG; prefix_skip = arg;
02991                     /* Here comes the prefix string */
02992                     if (code+prefix_len < code || code+prefix_len > newcode)
02993                         FAIL;
02994                     code += prefix_len;
02995                     /* And here comes the overlap table */
02996                     if (code+prefix_len < code || code+prefix_len > newcode)
02997                         FAIL;
02998                     /* Each overlap value should be < prefix_len */
02999                     for (i = 0; i < prefix_len; i++) {
03000                         if (code[i] >= prefix_len)
03001                             FAIL;
03002                     }
03003                     code += prefix_len;
03004                 }
03005                 /* Validate the charset */
03006                 if (flags & SRE_INFO_CHARSET) {
03007                     if (!_validate_charset(code, newcode-1))
03008                         FAIL;
03009                     if (newcode[-1] != SRE_OP_FAILURE)
03010                         FAIL;
03011                     code = newcode;
03012                 }
03013                 else if (code != newcode) {
03014                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
03015                     FAIL;
03016                 }
03017             }
03018             break;
03019 
03020         case SRE_OP_BRANCH:
03021             {
03022                 SRE_CODE *target = NULL;
03023                 for (;;) {
03024                     GET_SKIP;
03025                     if (skip == 0)
03026                         break;
03027                     /* Stop 2 before the end; we check the JUMP below */
03028                     if (!_validate_inner(code, code+skip-3, groups))
03029                         FAIL;
03030                     code += skip-3;
03031                     /* Check that it ends with a JUMP, and that each JUMP
03032                        has the same target */
03033                     GET_OP;
03034                     if (op != SRE_OP_JUMP)
03035                         FAIL;
03036                     GET_SKIP;
03037                     if (target == NULL)
03038                         target = code+skip-1;
03039                     else if (code+skip-1 != target)
03040                         FAIL;
03041                 }
03042             }
03043             break;
03044 
03045         case SRE_OP_REPEAT_ONE:
03046         case SRE_OP_MIN_REPEAT_ONE:
03047             {
03048                 SRE_CODE min, max;
03049                 GET_SKIP;
03050                 GET_ARG; min = arg;
03051                 GET_ARG; max = arg;
03052                 if (min > max)
03053                     FAIL;
03054 #ifdef Py_UNICODE_WIDE
03055                 if (max > 65535)
03056                     FAIL;
03057 #endif
03058                 if (!_validate_inner(code, code+skip-4, groups))
03059                     FAIL;
03060                 code += skip-4;
03061                 GET_OP;
03062                 if (op != SRE_OP_SUCCESS)
03063                     FAIL;
03064             }
03065             break;
03066 
03067         case SRE_OP_REPEAT:
03068             {
03069                 SRE_CODE min, max;
03070                 GET_SKIP;
03071                 GET_ARG; min = arg;
03072                 GET_ARG; max = arg;
03073                 if (min > max)
03074                     FAIL;
03075 #ifdef Py_UNICODE_WIDE
03076                 if (max > 65535)
03077                     FAIL;
03078 #endif
03079                 if (!_validate_inner(code, code+skip-3, groups))
03080                     FAIL;
03081                 code += skip-3;
03082                 GET_OP;
03083                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
03084                     FAIL;
03085             }
03086             break;
03087 
03088         case SRE_OP_GROUPREF:
03089         case SRE_OP_GROUPREF_IGNORE:
03090             GET_ARG;
03091             if (arg >= groups)
03092                 FAIL;
03093             break;
03094 
03095         case SRE_OP_GROUPREF_EXISTS:
03096             /* The regex syntax for this is: '(?(group)then|else)', where
03097                'group' is either an integer group number or a group name,
03098                'then' and 'else' are sub-regexes, and 'else' is optional. */
03099             GET_ARG;
03100             if (arg >= groups)
03101                 FAIL;
03102             GET_SKIP_ADJ(1);
03103             code--; /* The skip is relative to the first arg! */
03104             /* There are two possibilities here: if there is both a 'then'
03105                part and an 'else' part, the generated code looks like:
03106 
03107                GROUPREF_EXISTS
03108                <group>
03109                <skipyes>
03110                ...then part...
03111                JUMP
03112                <skipno>
03113                (<skipyes> jumps here)
03114                ...else part...
03115                (<skipno> jumps here)
03116 
03117                If there is only a 'then' part, it looks like:
03118 
03119                GROUPREF_EXISTS
03120                <group>
03121                <skip>
03122                ...then part...
03123                (<skip> jumps here)
03124 
03125                There is no direct way to decide which it is, and we don't want
03126                to allow arbitrary jumps anywhere in the code; so we just look
03127                for a JUMP opcode preceding our skip target.
03128             */
03129             if (skip >= 3 && code+skip-3 >= code &&
03130                 code[skip-3] == SRE_OP_JUMP)
03131             {
03132                 VTRACE(("both then and else parts present\n"));
03133                 if (!_validate_inner(code+1, code+skip-3, groups))
03134                     FAIL;
03135                 code += skip-2; /* Position after JUMP, at <skipno> */
03136                 GET_SKIP;
03137                 if (!_validate_inner(code, code+skip-1, groups))
03138                     FAIL;
03139                 code += skip-1;
03140             }
03141             else {
03142                 VTRACE(("only a then part present\n"));
03143                 if (!_validate_inner(code+1, code+skip-1, groups))
03144                     FAIL;
03145                 code += skip-1;
03146             }
03147             break;
03148 
03149         case SRE_OP_ASSERT:
03150         case SRE_OP_ASSERT_NOT:
03151             GET_SKIP;
03152             GET_ARG; /* 0 for lookahead, width for lookbehind */
03153             code--; /* Back up over arg to simplify math below */
03154             if (arg & 0x80000000)
03155                 FAIL; /* Width too large */
03156             /* Stop 1 before the end; we check the SUCCESS below */
03157             if (!_validate_inner(code+1, code+skip-2, groups))
03158                 FAIL;
03159             code += skip-2;
03160             GET_OP;
03161             if (op != SRE_OP_SUCCESS)
03162                 FAIL;
03163             break;
03164 
03165         default:
03166             FAIL;
03167 
03168         }
03169     }
03170 
03171     VTRACE(("okay\n"));
03172     return 1;
03173 }
03174 
03175 static int
03176 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
03177 {
03178     if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
03179         FAIL;
03180     if (groups == 0)  /* fix for simplejson */
03181         groups = 100; /* 100 groups should always be safe */
03182     return _validate_inner(code, end-1, groups);
03183 }
03184 
03185 static int
03186 _validate(PatternObject *self)
03187 {
03188     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
03189     {
03190         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
03191         return 0;
03192     }
03193     else
03194         VTRACE(("Success!\n"));
03195     return 1;
03196 }
03197 
03198 /* -------------------------------------------------------------------- */
03199 /* match methods */
03200 
03201 static void
03202 match_dealloc(MatchObject* self)
03203 {
03204     Py_XDECREF(self->regs);
03205     Py_XDECREF(self->string);
03206     Py_DECREF(self->pattern);
03207     PyObject_DEL(self);
03208 }
03209 
03210 static PyObject*
03211 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
03212 {
03213     if (index < 0 || index >= self->groups) {
03214         /* raise IndexError if we were given a bad group number */
03215         PyErr_SetString(
03216             PyExc_IndexError,
03217             "no such group"
03218             );
03219         return NULL;
03220     }
03221 
03222     index *= 2;
03223 
03224     if (self->string == Py_None || self->mark[index] < 0) {
03225         /* return default value if the string or group is undefined */
03226         Py_INCREF(def);
03227         return def;
03228     }
03229 
03230     return PySequence_GetSlice(
03231         self->string, self->mark[index], self->mark[index+1]
03232         );
03233 }
03234 
03235 static Py_ssize_t
03236 match_getindex(MatchObject* self, PyObject* index)
03237 {
03238     Py_ssize_t i;
03239 
03240     if (index == NULL)
03241        /* Default value */
03242        return 0;
03243 
03244     if (PyLong_Check(index))
03245         return PyLong_AsSsize_t(index);
03246 
03247     i = -1;
03248 
03249     if (self->pattern->groupindex) {
03250         index = PyObject_GetItem(self->pattern->groupindex, index);
03251         if (index) {
03252             if (PyLong_Check(index))
03253                 i = PyLong_AsSsize_t(index);
03254             Py_DECREF(index);
03255         } else
03256             PyErr_Clear();
03257     }
03258 
03259     return i;
03260 }
03261 
03262 static PyObject*
03263 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
03264 {
03265     return match_getslice_by_index(self, match_getindex(self, index), def);
03266 }
03267 
03268 static PyObject*
03269 match_expand(MatchObject* self, PyObject* ptemplate)
03270 {
03271     /* delegate to Python code */
03272     return call(
03273         SRE_PY_MODULE, "_expand",
03274         PyTuple_Pack(3, self->pattern, self, ptemplate)
03275         );
03276 }
03277 
03278 static PyObject*
03279 match_group(MatchObject* self, PyObject* args)
03280 {
03281     PyObject* result;
03282     Py_ssize_t i, size;
03283 
03284     size = PyTuple_GET_SIZE(args);
03285 
03286     switch (size) {
03287     case 0:
03288         result = match_getslice(self, Py_False, Py_None);
03289         break;
03290     case 1:
03291         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
03292         break;
03293     default:
03294         /* fetch multiple items */
03295         result = PyTuple_New(size);
03296         if (!result)
03297             return NULL;
03298         for (i = 0; i < size; i++) {
03299             PyObject* item = match_getslice(
03300                 self, PyTuple_GET_ITEM(args, i), Py_None
03301                 );
03302             if (!item) {
03303                 Py_DECREF(result);
03304                 return NULL;
03305             }
03306             PyTuple_SET_ITEM(result, i, item);
03307         }
03308         break;
03309     }
03310     return result;
03311 }
03312 
03313 static PyObject*
03314 match_groups(MatchObject* self, PyObject* args, PyObject* kw)
03315 {
03316     PyObject* result;
03317     Py_ssize_t index;
03318 
03319     PyObject* def = Py_None;
03320     static char* kwlist[] = { "default", NULL };
03321     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
03322         return NULL;
03323 
03324     result = PyTuple_New(self->groups-1);
03325     if (!result)
03326         return NULL;
03327 
03328     for (index = 1; index < self->groups; index++) {
03329         PyObject* item;
03330         item = match_getslice_by_index(self, index, def);
03331         if (!item) {
03332             Py_DECREF(result);
03333             return NULL;
03334         }
03335         PyTuple_SET_ITEM(result, index-1, item);
03336     }
03337 
03338     return result;
03339 }
03340 
03341 static PyObject*
03342 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
03343 {
03344     PyObject* result;
03345     PyObject* keys;
03346     Py_ssize_t index;
03347 
03348     PyObject* def = Py_None;
03349     static char* kwlist[] = { "default", NULL };
03350     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
03351         return NULL;
03352 
03353     result = PyDict_New();
03354     if (!result || !self->pattern->groupindex)
03355         return result;
03356 
03357     keys = PyMapping_Keys(self->pattern->groupindex);
03358     if (!keys)
03359         goto failed;
03360 
03361     for (index = 0; index < PyList_GET_SIZE(keys); index++) {
03362         int status;
03363         PyObject* key;
03364         PyObject* value;
03365         key = PyList_GET_ITEM(keys, index);
03366         if (!key)
03367             goto failed;
03368         value = match_getslice(self, key, def);
03369         if (!value) {
03370             Py_DECREF(key);
03371             goto failed;
03372         }
03373         status = PyDict_SetItem(result, key, value);
03374         Py_DECREF(value);
03375         if (status < 0)
03376             goto failed;
03377     }
03378 
03379     Py_DECREF(keys);
03380 
03381     return result;
03382 
03383 failed:
03384     Py_XDECREF(keys);
03385     Py_DECREF(result);
03386     return NULL;
03387 }
03388 
03389 static PyObject*
03390 match_start(MatchObject* self, PyObject* args)
03391 {
03392     Py_ssize_t index;
03393 
03394     PyObject* index_ = NULL;
03395     if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
03396         return NULL;
03397 
03398     index = match_getindex(self, index_);
03399 
03400     if (index < 0 || index >= self->groups) {
03401         PyErr_SetString(
03402             PyExc_IndexError,
03403             "no such group"
03404             );
03405         return NULL;
03406     }
03407 
03408     /* mark is -1 if group is undefined */
03409     return Py_BuildValue("i", self->mark[index*2]);
03410 }
03411 
03412 static PyObject*
03413 match_end(MatchObject* self, PyObject* args)
03414 {
03415     Py_ssize_t index;
03416 
03417     PyObject* index_ = NULL;
03418     if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
03419         return NULL;
03420 
03421     index = match_getindex(self, index_);
03422 
03423     if (index < 0 || index >= self->groups) {
03424         PyErr_SetString(
03425             PyExc_IndexError,
03426             "no such group"
03427             );
03428         return NULL;
03429     }
03430 
03431     /* mark is -1 if group is undefined */
03432     return Py_BuildValue("i", self->mark[index*2+1]);
03433 }
03434 
03435 LOCAL(PyObject*)
03436 _pair(Py_ssize_t i1, Py_ssize_t i2)
03437 {
03438     PyObject* pair;
03439     PyObject* item;
03440 
03441     pair = PyTuple_New(2);
03442     if (!pair)
03443         return NULL;
03444 
03445     item = PyLong_FromSsize_t(i1);
03446     if (!item)
03447         goto error;
03448     PyTuple_SET_ITEM(pair, 0, item);
03449 
03450     item = PyLong_FromSsize_t(i2);
03451     if (!item)
03452         goto error;
03453     PyTuple_SET_ITEM(pair, 1, item);
03454 
03455     return pair;
03456 
03457   error:
03458     Py_DECREF(pair);
03459     return NULL;
03460 }
03461 
03462 static PyObject*
03463 match_span(MatchObject* self, PyObject* args)
03464 {
03465     Py_ssize_t index;
03466 
03467     PyObject* index_ = NULL;
03468     if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
03469         return NULL;
03470 
03471     index = match_getindex(self, index_);
03472 
03473     if (index < 0 || index >= self->groups) {
03474         PyErr_SetString(
03475             PyExc_IndexError,
03476             "no such group"
03477             );
03478         return NULL;
03479     }
03480 
03481     /* marks are -1 if group is undefined */
03482     return _pair(self->mark[index*2], self->mark[index*2+1]);
03483 }
03484 
03485 static PyObject*
03486 match_regs(MatchObject* self)
03487 {
03488     PyObject* regs;
03489     PyObject* item;
03490     Py_ssize_t index;
03491 
03492     regs = PyTuple_New(self->groups);
03493     if (!regs)
03494         return NULL;
03495 
03496     for (index = 0; index < self->groups; index++) {
03497         item = _pair(self->mark[index*2], self->mark[index*2+1]);
03498         if (!item) {
03499             Py_DECREF(regs);
03500             return NULL;
03501         }
03502         PyTuple_SET_ITEM(regs, index, item);
03503     }
03504 
03505     Py_INCREF(regs);
03506     self->regs = regs;
03507 
03508     return regs;
03509 }
03510 
03511 static PyObject*
03512 match_copy(MatchObject* self, PyObject *unused)
03513 {
03514 #ifdef USE_BUILTIN_COPY
03515     MatchObject* copy;
03516     Py_ssize_t slots, offset;
03517 
03518     slots = 2 * (self->pattern->groups+1);
03519 
03520     copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
03521     if (!copy)
03522         return NULL;
03523 
03524     /* this value a constant, but any compiler should be able to
03525        figure that out all by itself */
03526     offset = offsetof(MatchObject, string);
03527 
03528     Py_XINCREF(self->pattern);
03529     Py_XINCREF(self->string);
03530     Py_XINCREF(self->regs);
03531 
03532     memcpy((char*) copy + offset, (char*) self + offset,
03533            sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
03534 
03535     return (PyObject*) copy;
03536 #else
03537     PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
03538     return NULL;
03539 #endif
03540 }
03541 
03542 static PyObject*
03543 match_deepcopy(MatchObject* self, PyObject* memo)
03544 {
03545 #ifdef USE_BUILTIN_COPY
03546     MatchObject* copy;
03547 
03548     copy = (MatchObject*) match_copy(self);
03549     if (!copy)
03550         return NULL;
03551 
03552     if (!deepcopy((PyObject**) &copy->pattern, memo) ||
03553         !deepcopy(&copy->string, memo) ||
03554         !deepcopy(&copy->regs, memo)) {
03555         Py_DECREF(copy);
03556         return NULL;
03557     }
03558 
03559 #else
03560     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
03561     return NULL;
03562 #endif
03563 }
03564 
03565 static PyMethodDef match_methods[] = {
03566     {"group", (PyCFunction) match_group, METH_VARARGS},
03567     {"start", (PyCFunction) match_start, METH_VARARGS},
03568     {"end", (PyCFunction) match_end, METH_VARARGS},
03569     {"span", (PyCFunction) match_span, METH_VARARGS},
03570     {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
03571     {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
03572     {"expand", (PyCFunction) match_expand, METH_O},
03573     {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
03574     {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
03575     {NULL, NULL}
03576 };
03577 
03578 static PyObject *
03579 match_lastindex_get(MatchObject *self)
03580 {
03581     if (self->lastindex >= 0)
03582        return Py_BuildValue("i", self->lastindex);
03583     Py_INCREF(Py_None);
03584     return Py_None;
03585 }
03586 
03587 static PyObject *
03588 match_lastgroup_get(MatchObject *self)
03589 {
03590     if (self->pattern->indexgroup && self->lastindex >= 0) {
03591         PyObject* result = PySequence_GetItem(
03592             self->pattern->indexgroup, self->lastindex
03593             );
03594         if (result)
03595             return result;
03596         PyErr_Clear();
03597     }
03598     Py_INCREF(Py_None);
03599     return Py_None;
03600 }
03601 
03602 static PyObject *
03603 match_regs_get(MatchObject *self)
03604 {
03605     if (self->regs) {
03606         Py_INCREF(self->regs);
03607         return self->regs;
03608     } else
03609         return match_regs(self);
03610 }
03611 
03612 static PyGetSetDef match_getset[] = {
03613     {"lastindex", (getter)match_lastindex_get, (setter)NULL},
03614     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
03615     {"regs",      (getter)match_regs_get,      (setter)NULL},
03616     {NULL}
03617 };
03618 
03619 #define MATCH_OFF(x) offsetof(MatchObject, x)
03620 static PyMemberDef match_members[] = {
03621     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY},
03622     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY},
03623     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY},
03624     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY},
03625     {NULL}
03626 };
03627 
03628 /* FIXME: implement setattr("string", None) as a special case (to
03629    detach the associated string, if any */
03630 
03631 static PyTypeObject Match_Type = {
03632     PyVarObject_HEAD_INIT(NULL,0)
03633     "_" SRE_MODULE ".SRE_Match",
03634     sizeof(MatchObject), sizeof(Py_ssize_t),
03635     (destructor)match_dealloc,     /* tp_dealloc */
03636     0,                      /* tp_print */
03637     0,                      /* tp_getattr */
03638     0,                      /* tp_setattr */
03639     0,                      /* tp_reserved */
03640     0,                      /* tp_repr */
03641     0,                      /* tp_as_number */
03642     0,                      /* tp_as_sequence */
03643     0,                      /* tp_as_mapping */
03644     0,                      /* tp_hash */
03645     0,                      /* tp_call */
03646     0,                      /* tp_str */
03647     0,                      /* tp_getattro */
03648     0,                      /* tp_setattro */
03649     0,                      /* tp_as_buffer */
03650     Py_TPFLAGS_DEFAULT,            /* tp_flags */
03651     0,                      /* tp_doc */
03652     0,                      /* tp_traverse */
03653     0,                      /* tp_clear */
03654     0,                      /* tp_richcompare */
03655     0,                      /* tp_weaklistoffset */
03656     0,                      /* tp_iter */
03657     0,                      /* tp_iternext */
03658     match_methods,          /* tp_methods */
03659     match_members,          /* tp_members */
03660     match_getset,            /* tp_getset */
03661 };
03662 
03663 static PyObject*
03664 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
03665 {
03666     /* create match object (from state object) */
03667 
03668     MatchObject* match;
03669     Py_ssize_t i, j;
03670     char* base;
03671     int n;
03672 
03673     if (status > 0) {
03674 
03675         /* create match object (with room for extra group marks) */
03676         /* coverity[ampersand_in_size] */
03677         match = PyObject_NEW_VAR(MatchObject, &Match_Type,
03678                                  2*(pattern->groups+1));
03679         if (!match)
03680             return NULL;
03681 
03682         Py_INCREF(pattern);
03683         match->pattern = pattern;
03684 
03685         Py_INCREF(state->string);
03686         match->string = state->string;
03687 
03688         match->regs = NULL;
03689         match->groups = pattern->groups+1;
03690 
03691         /* fill in group slices */
03692 
03693         base = (char*) state->beginning;
03694         n = state->charsize;
03695 
03696         match->mark[0] = ((char*) state->start - base) / n;
03697         match->mark[1] = ((char*) state->ptr - base) / n;
03698 
03699         for (i = j = 0; i < pattern->groups; i++, j+=2)
03700             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
03701                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
03702                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
03703             } else
03704                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
03705 
03706         match->pos = state->pos;
03707         match->endpos = state->endpos;
03708 
03709         match->lastindex = state->lastindex;
03710 
03711         return (PyObject*) match;
03712 
03713     } else if (status == 0) {
03714 
03715         /* no match */
03716         Py_INCREF(Py_None);
03717         return Py_None;
03718 
03719     }
03720 
03721     /* internal error */
03722     pattern_error(status);
03723     return NULL;
03724 }
03725 
03726 
03727 /* -------------------------------------------------------------------- */
03728 /* scanner methods (experimental) */
03729 
03730 static void
03731 scanner_dealloc(ScannerObject* self)
03732 {
03733     state_fini(&self->state);
03734     Py_XDECREF(self->pattern);
03735     PyObject_DEL(self);
03736 }
03737 
03738 static PyObject*
03739 scanner_match(ScannerObject* self, PyObject *unused)
03740 {
03741     SRE_STATE* state = &self->state;
03742     PyObject* match;
03743     int status;
03744 
03745     state_reset(state);
03746 
03747     state->ptr = state->start;
03748 
03749     if (state->charsize == 1) {
03750         status = sre_match(state, PatternObject_GetCode(self->pattern));
03751     } else {
03752 #if defined(HAVE_UNICODE)
03753         status = sre_umatch(state, PatternObject_GetCode(self->pattern));
03754 #endif
03755     }
03756     if (PyErr_Occurred())
03757         return NULL;
03758 
03759     match = pattern_new_match((PatternObject*) self->pattern,
03760                                state, status);
03761 
03762     if (status == 0 || state->ptr == state->start)
03763         state->start = (void*) ((char*) state->ptr + state->charsize);
03764     else
03765         state->start = state->ptr;
03766 
03767     return match;
03768 }
03769 
03770 
03771 static PyObject*
03772 scanner_search(ScannerObject* self, PyObject *unused)
03773 {
03774     SRE_STATE* state = &self->state;
03775     PyObject* match;
03776     int status;
03777 
03778     state_reset(state);
03779 
03780     state->ptr = state->start;
03781 
03782     if (state->charsize == 1) {
03783         status = sre_search(state, PatternObject_GetCode(self->pattern));
03784     } else {
03785 #if defined(HAVE_UNICODE)
03786         status = sre_usearch(state, PatternObject_GetCode(self->pattern));
03787 #endif
03788     }
03789     if (PyErr_Occurred())
03790         return NULL;
03791 
03792     match = pattern_new_match((PatternObject*) self->pattern,
03793                                state, status);
03794 
03795     if (status == 0 || state->ptr == state->start)
03796         state->start = (void*) ((char*) state->ptr + state->charsize);
03797     else
03798         state->start = state->ptr;
03799 
03800     return match;
03801 }
03802 
03803 static PyMethodDef scanner_methods[] = {
03804     {"match", (PyCFunction) scanner_match, METH_NOARGS},
03805     {"search", (PyCFunction) scanner_search, METH_NOARGS},
03806     {NULL, NULL}
03807 };
03808 
03809 #define SCAN_OFF(x) offsetof(ScannerObject, x)
03810 static PyMemberDef scanner_members[] = {
03811     {"pattern",      T_OBJECT,     SCAN_OFF(pattern),   READONLY},
03812     {NULL}  /* Sentinel */
03813 };
03814 
03815 static PyTypeObject Scanner_Type = {
03816     PyVarObject_HEAD_INIT(NULL, 0)
03817     "_" SRE_MODULE ".SRE_Scanner",
03818     sizeof(ScannerObject), 0,
03819     (destructor)scanner_dealloc,/* tp_dealloc */
03820     0,                      /* tp_print */
03821     0,                      /* tp_getattr */
03822     0,                      /* tp_setattr */
03823     0,                      /* tp_reserved */
03824     0,                      /* tp_repr */
03825     0,                      /* tp_as_number */
03826     0,                      /* tp_as_sequence */
03827     0,                      /* tp_as_mapping */
03828     0,                      /* tp_hash */
03829     0,                      /* tp_call */
03830     0,                      /* tp_str */
03831     0,                      /* tp_getattro */
03832     0,                      /* tp_setattro */
03833     0,                      /* tp_as_buffer */
03834     Py_TPFLAGS_DEFAULT,            /* tp_flags */
03835     0,                      /* tp_doc */
03836     0,                      /* tp_traverse */
03837     0,                      /* tp_clear */
03838     0,                      /* tp_richcompare */
03839     0,                      /* tp_weaklistoffset */
03840     0,                      /* tp_iter */
03841     0,                      /* tp_iternext */
03842     scanner_methods,        /* tp_methods */
03843     scanner_members,        /* tp_members */
03844     0,                      /* tp_getset */
03845 };
03846 
03847 static PyObject*
03848 pattern_scanner(PatternObject* pattern, PyObject* args)
03849 {
03850     /* create search state object */
03851 
03852     ScannerObject* self;
03853 
03854     PyObject* string;
03855     Py_ssize_t start = 0;
03856     Py_ssize_t end = PY_SSIZE_T_MAX;
03857     if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
03858         return NULL;
03859 
03860     /* create scanner object */
03861     self = PyObject_NEW(ScannerObject, &Scanner_Type);
03862     if (!self)
03863         return NULL;
03864     self->pattern = NULL;
03865 
03866     string = state_init(&self->state, pattern, string, start, end);
03867     if (!string) {
03868         Py_DECREF(self);
03869         return NULL;
03870     }
03871 
03872     Py_INCREF(pattern);
03873     self->pattern = (PyObject*) pattern;
03874 
03875     return (PyObject*) self;
03876 }
03877 
03878 static PyMethodDef _functions[] = {
03879     {"compile", _compile, METH_VARARGS},
03880     {"getcodesize", sre_codesize, METH_NOARGS},
03881     {"getlower", sre_getlower, METH_VARARGS},
03882     {NULL, NULL}
03883 };
03884 
03885 static struct PyModuleDef sremodule = {
03886        PyModuleDef_HEAD_INIT,
03887        "_" SRE_MODULE,
03888        NULL,
03889        -1,
03890        _functions,
03891        NULL,
03892        NULL,
03893        NULL,
03894        NULL
03895 };
03896 
03897 PyMODINIT_FUNC PyInit__sre(void)
03898 {
03899     PyObject* m;
03900     PyObject* d;
03901     PyObject* x;
03902 
03903     /* Patch object types */
03904     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
03905         PyType_Ready(&Scanner_Type))
03906         return NULL;
03907 
03908     m = PyModule_Create(&sremodule);
03909     if (m == NULL)
03910        return NULL;
03911     d = PyModule_GetDict(m);
03912 
03913     x = PyLong_FromLong(SRE_MAGIC);
03914     if (x) {
03915         PyDict_SetItemString(d, "MAGIC", x);
03916         Py_DECREF(x);
03917     }
03918 
03919     x = PyLong_FromLong(sizeof(SRE_CODE));
03920     if (x) {
03921         PyDict_SetItemString(d, "CODESIZE", x);
03922         Py_DECREF(x);
03923     }
03924 
03925     x = PyUnicode_FromString(copyright);
03926     if (x) {
03927         PyDict_SetItemString(d, "copyright", x);
03928         Py_DECREF(x);
03929     }
03930     return m;
03931 }
03932 
03933 #endif /* !defined(SRE_RECURSIVE) */
03934 
03935 /* vim:ts=4:sw=4:et
03936 */