Back to index

php5  5.3.10
pcre_get.c
Go to the documentation of this file.
00001 /*************************************************
00002 *      Perl-Compatible Regular Expressions       *
00003 *************************************************/
00004 
00005 /* PCRE is a library of functions to support regular expressions whose syntax
00006 and semantics are as close as possible to those of the Perl 5 language.
00007 
00008                        Written by Philip Hazel
00009            Copyright (c) 1997-2008 University of Cambridge
00010 
00011 -----------------------------------------------------------------------------
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014 
00015     * Redistributions of source code must retain the above copyright notice,
00016       this list of conditions and the following disclaimer.
00017 
00018     * Redistributions in binary form must reproduce the above copyright
00019       notice, this list of conditions and the following disclaimer in the
00020       documentation and/or other materials provided with the distribution.
00021 
00022     * Neither the name of the University of Cambridge nor the names of its
00023       contributors may be used to endorse or promote products derived from
00024       this software without specific prior written permission.
00025 
00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00036 POSSIBILITY OF SUCH DAMAGE.
00037 -----------------------------------------------------------------------------
00038 */
00039 
00040 
00041 /* This module contains some convenience functions for extracting substrings
00042 from the subject string after a regex match has succeeded. The original idea
00043 for these functions came from Scott Wimer. */
00044 
00045 
00046 #include "config.h"
00047 
00048 #include "pcre_internal.h"
00049 
00050 
00051 /*************************************************
00052 *           Find number for named string         *
00053 *************************************************/
00054 
00055 /* This function is used by the get_first_set() function below, as well
00056 as being generally available. It assumes that names are unique.
00057 
00058 Arguments:
00059   code        the compiled regex
00060   stringname  the name whose number is required
00061 
00062 Returns:      the number of the named parentheses, or a negative number
00063                 (PCRE_ERROR_NOSUBSTRING) if not found
00064 */
00065 
00066 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00067 pcre_get_stringnumber(const pcre *code, const char *stringname)
00068 {
00069 int rc;
00070 int entrysize;
00071 int top, bot;
00072 uschar *nametable;
00073 
00074 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
00075   return rc;
00076 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
00077 
00078 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
00079   return rc;
00080 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
00081   return rc;
00082 
00083 bot = 0;
00084 while (top > bot)
00085   {
00086   int mid = (top + bot) / 2;
00087   uschar *entry = nametable + entrysize*mid;
00088   int c = strcmp(stringname, (char *)(entry + 2));
00089   if (c == 0) return (entry[0] << 8) + entry[1];
00090   if (c > 0) bot = mid + 1; else top = mid;
00091   }
00092 
00093 return PCRE_ERROR_NOSUBSTRING;
00094 }
00095 
00096 
00097 
00098 /*************************************************
00099 *     Find (multiple) entries for named string   *
00100 *************************************************/
00101 
00102 /* This is used by the get_first_set() function below, as well as being
00103 generally available. It is used when duplicated names are permitted.
00104 
00105 Arguments:
00106   code        the compiled regex
00107   stringname  the name whose entries required
00108   firstptr    where to put the pointer to the first entry
00109   lastptr     where to put the pointer to the last entry
00110 
00111 Returns:      the length of each entry, or a negative number
00112                 (PCRE_ERROR_NOSUBSTRING) if not found
00113 */
00114 
00115 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00116 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
00117   char **firstptr, char **lastptr)
00118 {
00119 int rc;
00120 int entrysize;
00121 int top, bot;
00122 uschar *nametable, *lastentry;
00123 
00124 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
00125   return rc;
00126 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
00127 
00128 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
00129   return rc;
00130 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
00131   return rc;
00132 
00133 lastentry = nametable + entrysize * (top - 1);
00134 bot = 0;
00135 while (top > bot)
00136   {
00137   int mid = (top + bot) / 2;
00138   uschar *entry = nametable + entrysize*mid;
00139   int c = strcmp(stringname, (char *)(entry + 2));
00140   if (c == 0)
00141     {
00142     uschar *first = entry;
00143     uschar *last = entry;
00144     while (first > nametable)
00145       {
00146       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
00147       first -= entrysize;
00148       }
00149     while (last < lastentry)
00150       {
00151       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
00152       last += entrysize;
00153       }
00154     *firstptr = (char *)first;
00155     *lastptr = (char *)last;
00156     return entrysize;
00157     }
00158   if (c > 0) bot = mid + 1; else top = mid;
00159   }
00160 
00161 return PCRE_ERROR_NOSUBSTRING;
00162 }
00163 
00164 
00165 
00166 /*************************************************
00167 *    Find first set of multiple named strings    *
00168 *************************************************/
00169 
00170 /* This function allows for duplicate names in the table of named substrings.
00171 It returns the number of the first one that was set in a pattern match.
00172 
00173 Arguments:
00174   code         the compiled regex
00175   stringname   the name of the capturing substring
00176   ovector      the vector of matched substrings
00177 
00178 Returns:       the number of the first that is set,
00179                or the number of the last one if none are set,
00180                or a negative number on error
00181 */
00182 
00183 static int
00184 get_first_set(const pcre *code, const char *stringname, int *ovector)
00185 {
00186 const real_pcre *re = (const real_pcre *)code;
00187 int entrysize;
00188 char *first, *last;
00189 uschar *entry;
00190 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
00191   return pcre_get_stringnumber(code, stringname);
00192 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
00193 if (entrysize <= 0) return entrysize;
00194 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
00195   {
00196   int n = (entry[0] << 8) + entry[1];
00197   if (ovector[n*2] >= 0) return n;
00198   }
00199 return (first[0] << 8) + first[1];
00200 }
00201 
00202 
00203 
00204 
00205 /*************************************************
00206 *      Copy captured string to given buffer      *
00207 *************************************************/
00208 
00209 /* This function copies a single captured substring into a given buffer.
00210 Note that we use memcpy() rather than strncpy() in case there are binary zeros
00211 in the string.
00212 
00213 Arguments:
00214   subject        the subject string that was matched
00215   ovector        pointer to the offsets table
00216   stringcount    the number of substrings that were captured
00217                    (i.e. the yield of the pcre_exec call, unless
00218                    that was zero, in which case it should be 1/3
00219                    of the offset table size)
00220   stringnumber   the number of the required substring
00221   buffer         where to put the substring
00222   size           the size of the buffer
00223 
00224 Returns:         if successful:
00225                    the length of the copied string, not including the zero
00226                    that is put on the end; can be zero
00227                  if not successful:
00228                    PCRE_ERROR_NOMEMORY (-6) buffer too small
00229                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
00230 */
00231 
00232 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00233 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
00234   int stringnumber, char *buffer, int size)
00235 {
00236 int yield;
00237 if (stringnumber < 0 || stringnumber >= stringcount)
00238   return PCRE_ERROR_NOSUBSTRING;
00239 stringnumber *= 2;
00240 yield = ovector[stringnumber+1] - ovector[stringnumber];
00241 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
00242 memcpy(buffer, subject + ovector[stringnumber], yield);
00243 buffer[yield] = 0;
00244 return yield;
00245 }
00246 
00247 
00248 
00249 /*************************************************
00250 *   Copy named captured string to given buffer   *
00251 *************************************************/
00252 
00253 /* This function copies a single captured substring into a given buffer,
00254 identifying it by name. If the regex permits duplicate names, the first
00255 substring that is set is chosen.
00256 
00257 Arguments:
00258   code           the compiled regex
00259   subject        the subject string that was matched
00260   ovector        pointer to the offsets table
00261   stringcount    the number of substrings that were captured
00262                    (i.e. the yield of the pcre_exec call, unless
00263                    that was zero, in which case it should be 1/3
00264                    of the offset table size)
00265   stringname     the name of the required substring
00266   buffer         where to put the substring
00267   size           the size of the buffer
00268 
00269 Returns:         if successful:
00270                    the length of the copied string, not including the zero
00271                    that is put on the end; can be zero
00272                  if not successful:
00273                    PCRE_ERROR_NOMEMORY (-6) buffer too small
00274                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
00275 */
00276 
00277 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00278 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
00279   int stringcount, const char *stringname, char *buffer, int size)
00280 {
00281 int n = get_first_set(code, stringname, ovector);
00282 if (n <= 0) return n;
00283 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
00284 }
00285 
00286 
00287 
00288 /*************************************************
00289 *      Copy all captured strings to new store    *
00290 *************************************************/
00291 
00292 /* This function gets one chunk of store and builds a list of pointers and all
00293 of the captured substrings in it. A NULL pointer is put on the end of the list.
00294 
00295 Arguments:
00296   subject        the subject string that was matched
00297   ovector        pointer to the offsets table
00298   stringcount    the number of substrings that were captured
00299                    (i.e. the yield of the pcre_exec call, unless
00300                    that was zero, in which case it should be 1/3
00301                    of the offset table size)
00302   listptr        set to point to the list of pointers
00303 
00304 Returns:         if successful: 0
00305                  if not successful:
00306                    PCRE_ERROR_NOMEMORY (-6) failed to get store
00307 */
00308 
00309 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00310 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
00311   const char ***listptr)
00312 {
00313 int i;
00314 int size = sizeof(char *);
00315 int double_count = stringcount * 2;
00316 char **stringlist;
00317 char *p;
00318 
00319 for (i = 0; i < double_count; i += 2)
00320   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
00321 
00322 stringlist = (char **)(pcre_malloc)(size);
00323 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
00324 
00325 *listptr = (const char **)stringlist;
00326 p = (char *)(stringlist + stringcount + 1);
00327 
00328 for (i = 0; i < double_count; i += 2)
00329   {
00330   int len = ovector[i+1] - ovector[i];
00331   memcpy(p, subject + ovector[i], len);
00332   *stringlist++ = p;
00333   p += len;
00334   *p++ = 0;
00335   }
00336 
00337 *stringlist = NULL;
00338 return 0;
00339 }
00340 
00341 
00342 
00343 /*************************************************
00344 *   Free store obtained by get_substring_list    *
00345 *************************************************/
00346 
00347 /* This function exists for the benefit of people calling PCRE from non-C
00348 programs that can call its functions, but not free() or (pcre_free)() directly.
00349 
00350 Argument:   the result of a previous pcre_get_substring_list()
00351 Returns:    nothing
00352 */
00353 
00354 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
00355 pcre_free_substring_list(const char **pointer)
00356 {
00357 (pcre_free)((void *)pointer);
00358 }
00359 
00360 
00361 
00362 /*************************************************
00363 *      Copy captured string to new store         *
00364 *************************************************/
00365 
00366 /* This function copies a single captured substring into a piece of new
00367 store
00368 
00369 Arguments:
00370   subject        the subject string that was matched
00371   ovector        pointer to the offsets table
00372   stringcount    the number of substrings that were captured
00373                    (i.e. the yield of the pcre_exec call, unless
00374                    that was zero, in which case it should be 1/3
00375                    of the offset table size)
00376   stringnumber   the number of the required substring
00377   stringptr      where to put a pointer to the substring
00378 
00379 Returns:         if successful:
00380                    the length of the string, not including the zero that
00381                    is put on the end; can be zero
00382                  if not successful:
00383                    PCRE_ERROR_NOMEMORY (-6) failed to get store
00384                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
00385 */
00386 
00387 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00388 pcre_get_substring(const char *subject, int *ovector, int stringcount,
00389   int stringnumber, const char **stringptr)
00390 {
00391 int yield;
00392 char *substring;
00393 if (stringnumber < 0 || stringnumber >= stringcount)
00394   return PCRE_ERROR_NOSUBSTRING;
00395 stringnumber *= 2;
00396 yield = ovector[stringnumber+1] - ovector[stringnumber];
00397 substring = (char *)(pcre_malloc)(yield + 1);
00398 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
00399 memcpy(substring, subject + ovector[stringnumber], yield);
00400 substring[yield] = 0;
00401 *stringptr = substring;
00402 return yield;
00403 }
00404 
00405 
00406 
00407 /*************************************************
00408 *   Copy named captured string to new store      *
00409 *************************************************/
00410 
00411 /* This function copies a single captured substring, identified by name, into
00412 new store. If the regex permits duplicate names, the first substring that is
00413 set is chosen.
00414 
00415 Arguments:
00416   code           the compiled regex
00417   subject        the subject string that was matched
00418   ovector        pointer to the offsets table
00419   stringcount    the number of substrings that were captured
00420                    (i.e. the yield of the pcre_exec call, unless
00421                    that was zero, in which case it should be 1/3
00422                    of the offset table size)
00423   stringname     the name of the required substring
00424   stringptr      where to put the pointer
00425 
00426 Returns:         if successful:
00427                    the length of the copied string, not including the zero
00428                    that is put on the end; can be zero
00429                  if not successful:
00430                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
00431                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
00432 */
00433 
00434 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00435 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
00436   int stringcount, const char *stringname, const char **stringptr)
00437 {
00438 int n = get_first_set(code, stringname, ovector);
00439 if (n <= 0) return n;
00440 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
00441 }
00442 
00443 
00444 
00445 
00446 /*************************************************
00447 *       Free store obtained by get_substring     *
00448 *************************************************/
00449 
00450 /* This function exists for the benefit of people calling PCRE from non-C
00451 programs that can call its functions, but not free() or (pcre_free)() directly.
00452 
00453 Argument:   the result of a previous pcre_get_substring()
00454 Returns:    nothing
00455 */
00456 
00457 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
00458 pcre_free_substring(const char *pointer)
00459 {
00460 (pcre_free)((void *)pointer);
00461 }
00462 
00463 /* End of pcre_get.c */