Back to index

php5  5.3.10
regext.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   regext.c -  Oniguruma (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * All rights reserved.
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions
00010  * are met:
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in the
00015  *    documentation and/or other materials provided with the distribution.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 #include "regint.h"
00031 
00032 static void
00033 conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
00034 {
00035   while (s < end) {
00036     *conv++ = '\0';
00037     *conv++ = '\0';
00038     *conv++ = '\0';
00039     *conv++ = *s++;
00040   }
00041 }
00042 
00043 static void
00044 conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
00045 {
00046   while (s < end) {
00047     *conv++ = *s++;
00048     *conv++ = '\0';
00049     *conv++ = '\0';
00050     *conv++ = '\0';
00051   }
00052 }
00053 
00054 static void
00055 conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
00056 {
00057   while (s < end) {
00058     *conv++ = '\0';
00059     *conv++ = *s++;
00060   }
00061 }
00062 
00063 static void
00064 conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
00065 {
00066   while (s < end) {
00067     *conv++ = *s++;
00068     *conv++ = '\0';
00069   }
00070 }
00071 
00072 static void
00073 conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
00074 {
00075   while (s < end) {
00076     *conv++ = s[3];
00077     *conv++ = s[2];
00078     *conv++ = s[1];
00079     *conv++ = s[0];
00080     s += 4;
00081   }
00082 }
00083 
00084 static void
00085 conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
00086 {
00087   while (s < end) {
00088     *conv++ = s[1];
00089     *conv++ = s[0];
00090     s += 2;
00091   }
00092 }
00093 
00094 static int
00095 conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
00096               UChar** conv, UChar** conv_end)
00097 {
00098   int len = end - s;
00099 
00100   if (to == ONIG_ENCODING_UTF16_BE) {
00101     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
00102       *conv = (UChar* )xmalloc(len * 2);
00103       CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
00104       *conv_end = *conv + (len * 2);
00105       conv_ext0be(s, end, *conv);
00106       return 0;
00107     }
00108     else if (from == ONIG_ENCODING_UTF16_LE) {
00109     swap16:
00110       *conv = (UChar* )xmalloc(len);
00111       CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
00112       *conv_end = *conv + len;
00113       conv_swap2bytes(s, end, *conv);
00114       return 0;
00115     }
00116   }
00117   else if (to == ONIG_ENCODING_UTF16_LE) {
00118     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
00119       *conv = (UChar* )xmalloc(len * 2);
00120       CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
00121       *conv_end = *conv + (len * 2);
00122       conv_ext0le(s, end, *conv);
00123       return 0;
00124     }
00125     else if (from == ONIG_ENCODING_UTF16_BE) {
00126       goto swap16;
00127     }
00128   }
00129   if (to == ONIG_ENCODING_UTF32_BE) {
00130     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
00131       *conv = (UChar* )xmalloc(len * 4);
00132       CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
00133       *conv_end = *conv + (len * 4);
00134       conv_ext0be32(s, end, *conv);
00135       return 0;
00136     }
00137     else if (from == ONIG_ENCODING_UTF32_LE) {
00138     swap32:
00139       *conv = (UChar* )xmalloc(len);
00140       CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
00141       *conv_end = *conv + len;
00142       conv_swap4bytes(s, end, *conv);
00143       return 0;
00144     }
00145   }
00146   else if (to == ONIG_ENCODING_UTF32_LE) {
00147     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
00148       *conv = (UChar* )xmalloc(len * 4);
00149       CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
00150       *conv_end = *conv + (len * 4);
00151       conv_ext0le32(s, end, *conv);
00152       return 0;
00153     }
00154     else if (from == ONIG_ENCODING_UTF32_BE) {
00155       goto swap32;
00156     }
00157   }
00158 
00159   return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
00160 }
00161 
00162 extern int
00163 onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
00164                 OnigCompileInfo* ci, OnigErrorInfo* einfo)
00165 {
00166   int r;
00167   UChar *cpat, *cpat_end;
00168 
00169   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
00170 
00171   if (ci->pattern_enc != ci->target_enc) {
00172     r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
00173                       &cpat, &cpat_end);
00174     if (r) return r;
00175   }
00176   else {
00177     cpat     = (UChar* )pattern;
00178     cpat_end = (UChar* )pattern_end;
00179   }
00180 
00181   r = onig_alloc_init(reg, ci->option, ci->ambig_flag, ci->target_enc,
00182                       ci->syntax);
00183   if (r) goto err;
00184 
00185   r = onig_compile(*reg, cpat, cpat_end, einfo);
00186   if (r) {
00187     onig_free(*reg);
00188     *reg = NULL;
00189   }
00190 
00191  err:
00192   if (cpat != pattern) xfree(cpat);
00193 
00194   return r;
00195 }
00196 
00197 #ifdef USE_RECOMPILE_API
00198 extern int
00199 onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
00200                       OnigCompileInfo* ci, OnigErrorInfo* einfo)
00201 {
00202   int r;
00203   regex_t *new_reg;
00204 
00205   r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo);
00206   if (r) return r;
00207   if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
00208     onig_transfer(reg, new_reg);
00209   }
00210   else {
00211     onig_chain_link_add(reg, new_reg);
00212   }
00213   return 0;
00214 }
00215 #endif