Back to index

php5  5.3.10
collator_convert.c
Go to the documentation of this file.
00001 /*
00002    +----------------------------------------------------------------------+
00003    | PHP Version 5                                                        |
00004    +----------------------------------------------------------------------+
00005    | This source file is subject to version 3.01 of the PHP license,      |
00006    | that is bundled with this package in the file LICENSE, and is        |
00007    | available through the world-wide-web at the following url:           |
00008    | http://www.php.net/license/3_01.txt                                  |
00009    | If you did not receive a copy of the PHP license and are unable to   |
00010    | obtain it through the world-wide-web, please send a note to          |
00011    | license@php.net so we can mail you a copy immediately.               |
00012    +----------------------------------------------------------------------+
00013    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
00014    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
00015    +----------------------------------------------------------------------+
00016  */
00017 
00018 #ifdef HAVE_CONFIG_H
00019 #include "config.h"
00020 #endif
00021 
00022 #include "php_intl.h"
00023 #include "collator_class.h"
00024 #include "collator_is_numeric.h"
00025 #include "collator_convert.h"
00026 #include "intl_convert.h"
00027 
00028 #include <unicode/ustring.h>
00029 #include <php.h>
00030 
00031 #if (PHP_MAJOR_VERSION == 5 && PHP_MINOR_VERSION <= 1)
00032 #define CAST_OBJECT_SHOULD_FREE ,0
00033 #else
00034 #define CAST_OBJECT_SHOULD_FREE
00035 #endif
00036 
00037 #define COLLATOR_CONVERT_RETURN_FAILED(retval) { \
00038                      zval_add_ref( &retval );             \
00039                      return retval;                       \
00040        }
00041 
00042 /* {{{ collator_convert_hash_item_from_utf8_to_utf16 */
00043 static void collator_convert_hash_item_from_utf8_to_utf16(
00044        HashTable* hash, int hashKeyType, char* hashKey, ulong hashIndex,
00045        UErrorCode* status )
00046 {
00047        const char* old_val;
00048        int         old_val_len;
00049        UChar*      new_val      = NULL;
00050        int         new_val_len  = 0;
00051        zval**      hashData     = NULL;
00052        zval*       znew_val     = NULL;
00053 
00054        /* Get current hash item. */
00055        zend_hash_get_current_data( hash, (void**) &hashData );
00056 
00057        /* Process string values only. */
00058        if( Z_TYPE_P( *hashData ) != IS_STRING )
00059               return;
00060 
00061        old_val     = Z_STRVAL_P( *hashData );
00062        old_val_len = Z_STRLEN_P( *hashData );
00063 
00064        /* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
00065        intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
00066        if( U_FAILURE( *status ) )
00067               return;
00068 
00069        /* Update current hash item with the converted value. */
00070        MAKE_STD_ZVAL( znew_val );
00071        ZVAL_STRINGL( znew_val, (char*)new_val, UBYTES(new_val_len), FALSE );
00072 
00073        if( hashKeyType == HASH_KEY_IS_STRING )
00074        {
00075               zend_hash_update( hash, hashKey, strlen( hashKey ) + 1,
00076                      (void*) &znew_val, sizeof(zval*), NULL );
00077        }
00078        else /* hashKeyType == HASH_KEY_IS_LONG */
00079        {
00080               zend_hash_index_update( hash, hashIndex,
00081                      (void*) &znew_val, sizeof(zval*), NULL );
00082        }
00083 }
00084 /* }}} */
00085 
00086 /* {{{ collator_convert_hash_item_from_utf16_to_utf8 */
00087 static void collator_convert_hash_item_from_utf16_to_utf8(
00088        HashTable* hash, int hashKeyType, char* hashKey, ulong hashIndex,
00089        UErrorCode* status )
00090 {
00091        const char* old_val;
00092        int         old_val_len;
00093        char*       new_val      = NULL;
00094        int         new_val_len  = 0;
00095        zval**      hashData     = NULL;
00096        zval*       znew_val     = NULL;
00097 
00098        /* Get current hash item. */
00099        zend_hash_get_current_data( hash, (void**) &hashData );
00100 
00101        /* Process string values only. */
00102        if( Z_TYPE_P( *hashData ) != IS_STRING )
00103               return;
00104 
00105        old_val     = Z_STRVAL_P( *hashData );
00106        old_val_len = Z_STRLEN_P( *hashData );
00107 
00108        /* Convert it from UTF-16LE to UTF-8 and save the result to new_val[_len]. */
00109        intl_convert_utf16_to_utf8( &new_val, &new_val_len,
00110               (UChar*)old_val, UCHARS(old_val_len), status );
00111        if( U_FAILURE( *status ) )
00112               return;
00113 
00114        /* Update current hash item with the converted value. */
00115        MAKE_STD_ZVAL( znew_val );
00116        ZVAL_STRINGL( znew_val, (char*)new_val, new_val_len, FALSE );
00117 
00118        if( hashKeyType == HASH_KEY_IS_STRING )
00119        {
00120               zend_hash_update( hash, hashKey, strlen( hashKey ) + 1,
00121                      (void*) &znew_val, sizeof(zval*), NULL );
00122        }
00123        else /* hashKeyType == HASH_KEY_IS_LONG */
00124        {
00125               zend_hash_index_update( hash, hashIndex,
00126                      (void*) &znew_val, sizeof(zval*), NULL );
00127        }
00128 }
00129 /* }}} */
00130 
00131 /* {{{ collator_convert_hash_from_utf8_to_utf16
00132  *  Convert values of the given hash from UTF-8 encoding to UTF-16LE.
00133  */
00134 void collator_convert_hash_from_utf8_to_utf16( HashTable* hash, UErrorCode* status )
00135 {
00136        ulong    hashIndex    = 0;
00137        char*    hashKey      = NULL;
00138        int      hashKeyType  = 0;
00139 
00140        zend_hash_internal_pointer_reset( hash );
00141        while( ( hashKeyType = zend_hash_get_current_key( hash, &hashKey, &hashIndex, 0 ) )
00142                      != HASH_KEY_NON_EXISTANT )
00143        {
00144               /* Convert current hash item from UTF-8 to UTF-16LE. */
00145               collator_convert_hash_item_from_utf8_to_utf16(
00146                      hash, hashKeyType, hashKey, hashIndex, status );
00147               if( U_FAILURE( *status ) )
00148                      return;
00149 
00150               /* Proceed to the next item. */
00151               zend_hash_move_forward( hash );
00152        }
00153 }
00154 /* }}} */
00155 
00156 /* {{{ collator_convert_hash_from_utf16_to_utf8
00157  * Convert values of the given hash from UTF-16LE encoding to UTF-8.
00158  */
00159 void collator_convert_hash_from_utf16_to_utf8( HashTable* hash, UErrorCode* status )
00160 {
00161        ulong    hashIndex    = 0;
00162        char*    hashKey      = NULL;
00163        int      hashKeyType  = 0;
00164 
00165        zend_hash_internal_pointer_reset( hash );
00166        while( ( hashKeyType = zend_hash_get_current_key( hash, &hashKey, &hashIndex, 0 ) )
00167                      != HASH_KEY_NON_EXISTANT )
00168        {
00169               /* Convert current hash item from UTF-16LE to UTF-8. */
00170               collator_convert_hash_item_from_utf16_to_utf8(
00171                      hash, hashKeyType, hashKey, hashIndex, status );
00172               if( U_FAILURE( *status ) ) {
00173                      return;
00174               }
00175 
00176               /* Proceed to the next item. */
00177               zend_hash_move_forward( hash );
00178        }
00179 }
00180 /* }}} */
00181 
00182 /* {{{ collator_convert_zstr_utf16_to_utf8
00183  *
00184  * Convert string from utf16 to utf8.
00185  *
00186  * @param  zval* utf16_zval String to convert.
00187  *
00188  * @return zval* Converted string.
00189  */
00190 zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval )
00191 {
00192        zval* utf8_zval   = NULL;
00193        char* str         = NULL;
00194        int   str_len     = 0;
00195        UErrorCode status = U_ZERO_ERROR;
00196 
00197        /* Convert to utf8 then. */
00198        intl_convert_utf16_to_utf8( &str, &str_len,
00199               (UChar*) Z_STRVAL_P(utf16_zval), UCHARS( Z_STRLEN_P(utf16_zval) ), &status );
00200        if( U_FAILURE( status ) )
00201               php_error( E_WARNING, "Error converting utf16 to utf8 in collator_convert_zval_utf16_to_utf8()" );
00202 
00203        ALLOC_INIT_ZVAL( utf8_zval );
00204        ZVAL_STRINGL( utf8_zval, str, str_len, FALSE );
00205 
00206        return utf8_zval;
00207 }
00208 /* }}} */
00209 
00210 /* {{{ collator_convert_zstr_utf8_to_utf16
00211  *
00212  * Convert string from utf8 to utf16.
00213  *
00214  * @param  zval* utf8_zval String to convert.
00215  *
00216  * @return zval* Converted string.
00217  */
00218 zval* collator_convert_zstr_utf8_to_utf16( zval* utf8_zval )
00219 {
00220        zval* zstr        = NULL;
00221        UChar* ustr       = NULL;
00222        int    ustr_len   = 0;
00223        UErrorCode status = U_ZERO_ERROR;
00224 
00225        /* Convert the string to UTF-16. */
00226        intl_convert_utf8_to_utf16(
00227                      &ustr, &ustr_len,
00228                      Z_STRVAL_P( utf8_zval ), Z_STRLEN_P( utf8_zval ),
00229                      &status );
00230        if( U_FAILURE( status ) )
00231               php_error( E_WARNING, "Error casting object to string in collator_convert_zstr_utf8_to_utf16()" );
00232 
00233        /* Set string. */
00234        ALLOC_INIT_ZVAL( zstr );
00235        ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len), FALSE );
00236 
00237        return zstr;
00238 }
00239 /* }}} */
00240 
00241 /* {{{ collator_convert_object_to_string
00242  * Convert object to UTF16-encoded string.
00243  */
00244 zval* collator_convert_object_to_string( zval* obj TSRMLS_DC )
00245 {
00246        zval* zstr        = NULL;
00247        UErrorCode status = U_ZERO_ERROR;
00248        UChar* ustr       = NULL;
00249        int    ustr_len   = 0;
00250 
00251        /* Bail out if it's not an object. */
00252        if( Z_TYPE_P( obj ) != IS_OBJECT )
00253        {
00254               COLLATOR_CONVERT_RETURN_FAILED( obj );
00255        }
00256 
00257        /* Try object's handlers. */
00258        if( Z_OBJ_HT_P(obj)->get )
00259        {
00260               zstr = Z_OBJ_HT_P(obj)->get( obj TSRMLS_CC );
00261 
00262               switch( Z_TYPE_P( zstr ) )
00263               {
00264                      case IS_OBJECT:
00265                             {
00266                                    /* Bail out. */
00267                                    zval_ptr_dtor( &zstr );
00268                                    COLLATOR_CONVERT_RETURN_FAILED( obj );
00269                             } break;
00270 
00271                      case IS_STRING:
00272                             break;
00273 
00274                      default:
00275                             {
00276                                    convert_to_string( zstr );
00277                             } break;
00278               }
00279        }
00280        else if( Z_OBJ_HT_P(obj)->cast_object )
00281        {
00282               ALLOC_INIT_ZVAL( zstr );
00283 
00284               if( Z_OBJ_HT_P(obj)->cast_object( obj, zstr, IS_STRING CAST_OBJECT_SHOULD_FREE TSRMLS_CC ) == FAILURE )
00285               {
00286                      /* cast_object failed => bail out. */
00287                      zval_ptr_dtor( &zstr );
00288                      COLLATOR_CONVERT_RETURN_FAILED( obj );
00289               }
00290        }
00291 
00292        /* Object wasn't successfuly converted => bail out. */
00293        if( zstr == NULL )
00294        {
00295               COLLATOR_CONVERT_RETURN_FAILED( obj );
00296        }
00297 
00298        /* Convert the string to UTF-16. */
00299        intl_convert_utf8_to_utf16(
00300                      &ustr, &ustr_len,
00301                      Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
00302                      &status );
00303        if( U_FAILURE( status ) )
00304               php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
00305 
00306        /* Cleanup zstr to hold utf16 string. */
00307        zval_dtor( zstr );
00308 
00309        /* Set string. */
00310        ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len), FALSE );
00311 
00312        /* Don't free ustr cause it's set in zstr without copy.
00313         * efree( ustr );
00314         */
00315 
00316        return zstr;
00317 }
00318 /* }}} */
00319 
00320 /* {{{ collator_convert_string_to_number
00321  *
00322  * Convert string to number.
00323  *
00324  * @param  zval* str String to convert.
00325  *
00326  * @return zval* Number. If str is not numeric string return number zero.
00327  */
00328 zval* collator_convert_string_to_number( zval* str )
00329 {
00330        zval* num = collator_convert_string_to_number_if_possible( str );
00331        if( num == str )
00332        {
00333               /* String wasn't converted => return zero. */
00334               zval_ptr_dtor( &num );
00335 
00336               ALLOC_INIT_ZVAL( num );
00337               ZVAL_LONG( num, 0 );
00338        }
00339 
00340        return num;
00341 }
00342 /* }}} */
00343 
00344 /* {{{ collator_convert_string_to_double
00345  *
00346  * Convert string to double.
00347  *
00348  * @param  zval* str String to convert.
00349  *
00350  * @return zval* Number. If str is not numeric string return number zero.
00351  */
00352 zval* collator_convert_string_to_double( zval* str )
00353 {
00354        zval* num = collator_convert_string_to_number( str );
00355        if( Z_TYPE_P(num) == IS_LONG )
00356        {
00357               ZVAL_DOUBLE( num, Z_LVAL_P( num ) );
00358        }
00359 
00360        return num;
00361 }
00362 /* }}} */
00363 
00364 /* {{{ collator_convert_string_to_number_if_possible
00365  *
00366  * Convert string to numer.
00367  *
00368  * @param  zval* str String to convert.
00369  *
00370  * @return zval* Number if str is numeric string. Otherwise
00371  *               original str param.
00372  */
00373 zval* collator_convert_string_to_number_if_possible( zval* str )
00374 {
00375        zval* num      = NULL;
00376        int is_numeric = 0;
00377        long lval      = 0;
00378        double dval    = 0;
00379 
00380        if( Z_TYPE_P( str ) != IS_STRING )
00381        {
00382               COLLATOR_CONVERT_RETURN_FAILED( str );
00383        }
00384 
00385        if( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, 1 ) ) )
00386        {
00387               ALLOC_INIT_ZVAL( num );
00388 
00389               if( is_numeric == IS_LONG )
00390                      Z_LVAL_P(num) = lval;
00391               if( is_numeric == IS_DOUBLE )
00392                      Z_DVAL_P(num) = dval;
00393 
00394               Z_TYPE_P(num) = is_numeric;
00395        }
00396        else
00397        {
00398               COLLATOR_CONVERT_RETURN_FAILED( str );
00399        }
00400 
00401        return num;
00402 }
00403 /* }}} */
00404 
00405 /* {{{ collator_make_printable_zval
00406  *
00407  * Returns string from input zval.
00408  *
00409  * @param  zval* arg zval to get string from
00410  *
00411  * @return zval* UTF16 string.
00412  */
00413 zval* collator_make_printable_zval( zval* arg )
00414 {
00415        zval arg_copy;
00416        int use_copy = 0;
00417        zval* str    = NULL;
00418 
00419        if( Z_TYPE_P(arg) != IS_STRING )
00420        {
00421               zend_make_printable_zval(arg, &arg_copy, &use_copy);
00422 
00423               if( use_copy )
00424               {
00425                      str = collator_convert_zstr_utf8_to_utf16( &arg_copy );
00426                      zval_dtor( &arg_copy );
00427               }
00428               else
00429               {
00430                      str = collator_convert_zstr_utf8_to_utf16( arg );
00431               }
00432        }
00433        else
00434        {
00435               COLLATOR_CONVERT_RETURN_FAILED( arg );
00436        }
00437 
00438        return str;
00439 }
00440 /* }}} */
00441 
00442 /* {{{ collator_normalize_sort_argument
00443  *
00444  * Normalize argument to use in sort's compare function.
00445  *
00446  * @param  zval* arg Sort's argument to normalize.
00447  *
00448  * @return zval* Normalized copy of arg or unmodified arg
00449  *               if normalization is not needed.
00450  */
00451 zval* collator_normalize_sort_argument( zval* arg )
00452 {
00453        zval* n_arg = NULL;
00454 
00455        if( Z_TYPE_P( arg ) != IS_STRING )
00456        {
00457               /* If its not a string then nothing to do.
00458                * Return original arg.
00459                */
00460               COLLATOR_CONVERT_RETURN_FAILED( arg );
00461        }
00462 
00463        /* Try convert to number. */
00464        n_arg = collator_convert_string_to_number_if_possible( arg );
00465 
00466        if( n_arg == arg )
00467        {
00468               /* Conversion to number failed. */
00469               zval_ptr_dtor( &n_arg );
00470 
00471               /* Convert string to utf8. */
00472               n_arg = collator_convert_zstr_utf16_to_utf8( arg );
00473        }
00474 
00475        return n_arg;
00476 }
00477 /* }}} */
00478 /*
00479  * Local variables:
00480  * tab-width: 4
00481  * c-basic-offset: 4
00482  * End:
00483  * vim600: noet sw=4 ts=4 fdm=marker
00484  * vim<600: noet sw=4 ts=4
00485  */