Back to index

php5  5.3.10
Functions
normalizer_normalize.h File Reference
#include <php.h>
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

 PHP_FUNCTION (normalizer_normalize)
 PHP_FUNCTION (normalizer_is_normalized)

Function Documentation

Definition at line 33 of file normalizer_normalize.c.

{
       char*                input = NULL;
       /* form is optional, defaults to FORM_C */
       long                 form = NORMALIZER_DEFAULT;
       int                  input_len = 0;
              
       UChar*               uinput = NULL;
       int                  uinput_len = 0;
       int                  expansion_factor = 1;
       UErrorCode           status = U_ZERO_ERROR;
              
       UChar*               uret_buf = NULL;
       int                  uret_len = 0;
              
       char*                ret_buf = NULL;
       int32_t                     ret_len = 0;

       int32_t                     size_needed;
              
       intl_error_reset( NULL TSRMLS_CC );

       /* Parse parameters. */
       if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l",
                            &input, &input_len, &form ) == FAILURE )
       {
              intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
                                           "normalizer_normalize: unable to parse input params", 0 TSRMLS_CC );

              RETURN_FALSE;
       }

       expansion_factor = 1;

       switch(form) {
              case NORMALIZER_NONE:
                     break;
              case NORMALIZER_FORM_D:
                     expansion_factor = 3;
                     break;
              case NORMALIZER_FORM_KD:
                     expansion_factor = 3;
                     break;
              case NORMALIZER_FORM_C:
              case NORMALIZER_FORM_KC:
                     break;
              default:
                     intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
                                          "normalizer_normalize: illegal normalization form", 0 TSRMLS_CC );
                     RETURN_FALSE;
       }

       /*
        * Normalize string (converting it to UTF-16 first).
        */

       /* First convert the string to UTF-16. */
       intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );

       if( U_FAILURE( status ) )
       {
              /* Set global error code. */
              intl_error_set_code( NULL, status TSRMLS_CC );

              /* Set error messages. */
              intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
              if (uinput) {
                     efree( uinput );
              }
              RETURN_FALSE;
       }


       /* Allocate memory for the destination buffer for normalization */
       uret_len = uinput_len * expansion_factor;
       uret_buf = eumalloc( uret_len + 1 );

       /* normalize */
       size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
       
       /* Bail out if an unexpected error occured.
        * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
        * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
        */    
       if( U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && status != U_STRING_NOT_TERMINATED_WARNING ) {
              efree( uret_buf );
              efree( uinput );
              RETURN_NULL();
       }

       if ( size_needed > uret_len ) {
              /* realloc does not seem to work properly - memory is corrupted
               * uret_buf =  eurealloc(uret_buf, size_needed + 1);
               */
              efree( uret_buf );
              uret_buf = eumalloc( size_needed + 1 );
              uret_len = size_needed;

              status = U_ZERO_ERROR;

              /* try normalize again */
              size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);

              /* Bail out if an unexpected error occured. */
              if( U_FAILURE(status)  ) {
                     /* Set error messages. */
                     intl_error_set_custom_msg( NULL,"Error normalizing string", 0 TSRMLS_CC );
                     efree( uret_buf );
                     efree( uinput );
                     RETURN_FALSE;
              }
       }

       efree( uinput );

       /* the buffer we actually used */
       uret_len = size_needed;

       /* Convert normalized string from UTF-16 to UTF-8. */
       intl_convert_utf16_to_utf8( &ret_buf, &ret_len, uret_buf, uret_len, &status );
       efree( uret_buf );
       if( U_FAILURE( status ) )
       {
              intl_error_set( NULL, status,
                            "normalizer_normalize: error converting normalized text UTF-8", 0 TSRMLS_CC );
              RETURN_FALSE;
       }

       /* Return it. */
       RETVAL_STRINGL( ret_buf, ret_len, FALSE );
}

Here is the call graph for this function:

Definition at line 171 of file normalizer_normalize.c.

{
       char*         input = NULL;
       /* form is optional, defaults to FORM_C */
       long          form = NORMALIZER_DEFAULT;
       int           input_len = 0;

       UChar*        uinput = NULL;
       int           uinput_len = 0;
       UErrorCode    status = U_ZERO_ERROR;
              
       UBool         uret = FALSE;
              
       intl_error_reset( NULL TSRMLS_CC );

       /* Parse parameters. */
       if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l",
                            &input, &input_len, &form) == FAILURE )
       {
              intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
                            "normalizer_is_normalized: unable to parse input params", 0 TSRMLS_CC );

              RETURN_FALSE;
       }

       switch(form) {
              /* case NORMALIZER_NONE: not allowed - doesn't make sense */

              case NORMALIZER_FORM_D:
              case NORMALIZER_FORM_KD:
              case NORMALIZER_FORM_C:
              case NORMALIZER_FORM_KC:
                     break;
              default:
                     intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
                                          "normalizer_normalize: illegal normalization form", 0 TSRMLS_CC );
                     RETURN_FALSE;
       }


       /*
        * Test normalization of string (converting it to UTF-16 first).
        */

       /* First convert the string to UTF-16. */
       intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );

       if( U_FAILURE( status ) )
       {
              /* Set global error code. */
              intl_error_set_code( NULL, status TSRMLS_CC );

              /* Set error messages. */
              intl_error_set_custom_msg( NULL, "Error converting string to UTF-16.", 0 TSRMLS_CC );
              if (uinput) {
                     efree( uinput );
              }
              RETURN_FALSE;
       }


       /* test string */
       uret = unorm_isNormalizedWithOptions( uinput, uinput_len, form, (int32_t) 0 /* options */, &status);
       
       efree( uinput );

       /* Bail out if an unexpected error occured. */
       if( U_FAILURE(status)  ) {
              /* Set error messages. */
              intl_error_set_custom_msg( NULL,"Error testing if string is the given normalization form.", 0 TSRMLS_CC );
              RETURN_FALSE;
       }

       if ( uret )
              RETURN_TRUE;
                            
       RETURN_FALSE;
}

Here is the call graph for this function: