Back to index

php5  5.3.10
php_unicode.h
Go to the documentation of this file.
00001 /*
00002    +----------------------------------------------------------------------+
00003    | PHP Version 5                                                        |
00004    +----------------------------------------------------------------------+
00005    | Copyright (c) 1997-2012 The PHP Group                                |
00006    +----------------------------------------------------------------------+
00007    | This source file is subject to version 3.01 of the PHP license,      |
00008    | that is bundled with this package in the file LICENSE, and is        |
00009    | available through the world-wide-web at the following url:           |
00010    | http://www.php.net/license/3_01.txt                                  |
00011    | If you did not receive a copy of the PHP license and are unable to   |
00012    | obtain it through the world-wide-web, please send a note to          |
00013    | license@php.net so we can mail you a copy immediately.               |
00014    +----------------------------------------------------------------------+
00015    | Author: Wez Furlong (wez@thebrainroom.com)                           |
00016    +----------------------------------------------------------------------+
00017 
00018        Based on code from ucdata-2.5, which has the following Copyright:
00019    
00020        Copyright 2001 Computing Research Labs, New Mexico State University
00021  
00022        Permission is hereby granted, free of charge, to any person obtaining a
00023        copy of this software and associated documentation files (the "Software"),
00024        to deal in the Software without restriction, including without limitation
00025        the rights to use, copy, modify, merge, publish, distribute, sublicense,
00026        and/or sell copies of the Software, and to permit persons to whom the
00027        Software is furnished to do so, subject to the following conditions:
00028  
00029        The above copyright notice and this permission notice shall be included in
00030        all copies or substantial portions of the Software.
00031 */
00032 
00033 #ifndef PHP_UNICODE_H
00034 #define PHP_UNICODE_H
00035 
00036 #if HAVE_MBSTRING
00037 /*
00038  * Values that can appear in the `mask1' parameter of the php_unicode_is_prop()
00039  * function.
00040  */
00041 #define UC_MN 0x00000001 /* Mark, Non-Spacing          */
00042 #define UC_MC 0x00000002 /* Mark, Spacing Combining    */
00043 #define UC_ME 0x00000004 /* Mark, Enclosing            */
00044 #define UC_ND 0x00000008 /* Number, Decimal Digit      */
00045 #define UC_NL 0x00000010 /* Number, Letter             */
00046 #define UC_NO 0x00000020 /* Number, Other              */
00047 #define UC_ZS 0x00000040 /* Separator, Space           */
00048 #define UC_ZL 0x00000080 /* Separator, Line            */
00049 #define UC_ZP 0x00000100 /* Separator, Paragraph       */
00050 #define UC_CC 0x00000200 /* Other, Control             */
00051 #define UC_CF 0x00000400 /* Other, Format              */
00052 #define UC_OS 0x00000800 /* Other, Surrogate           */
00053 #define UC_CO 0x00001000 /* Other, Private Use         */
00054 #define UC_CN 0x00002000 /* Other, Not Assigned        */
00055 #define UC_LU 0x00004000 /* Letter, Uppercase          */
00056 #define UC_LL 0x00008000 /* Letter, Lowercase          */
00057 #define UC_LT 0x00010000 /* Letter, Titlecase          */
00058 #define UC_LM 0x00020000 /* Letter, Modifier           */
00059 #define UC_LO 0x00040000 /* Letter, Other              */
00060 #define UC_PC 0x00080000 /* Punctuation, Connector     */
00061 #define UC_PD 0x00100000 /* Punctuation, Dash          */
00062 #define UC_PS 0x00200000 /* Punctuation, Open          */
00063 #define UC_PE 0x00400000 /* Punctuation, Close         */
00064 #define UC_PO 0x00800000 /* Punctuation, Other         */
00065 #define UC_SM 0x01000000 /* Symbol, Math               */
00066 #define UC_SC 0x02000000 /* Symbol, Currency           */
00067 #define UC_SK 0x04000000 /* Symbol, Modifier           */
00068 #define UC_SO 0x08000000 /* Symbol, Other              */
00069 #define UC_L  0x10000000 /* Left-To-Right              */
00070 #define UC_R  0x20000000 /* Right-To-Left              */
00071 #define UC_EN 0x40000000 /* European Number            */
00072 #define UC_ES 0x80000000 /* European Number Separator  */
00073 
00074 /*
00075  * Values that can appear in the `mask2' parameter of the php_unicode_is_prop()
00076  * function.
00077  */
00078 #define UC_ET 0x00000001 /* European Number Terminator */
00079 #define UC_AN 0x00000002 /* Arabic Number              */
00080 #define UC_CS 0x00000004 /* Common Number Separator    */
00081 #define UC_B  0x00000008 /* Block Separator            */
00082 #define UC_S  0x00000010 /* Segment Separator          */
00083 #define UC_WS 0x00000020 /* Whitespace                 */
00084 #define UC_ON 0x00000040 /* Other Neutrals             */
00085 /*
00086  * Implementation specific character properties.
00087  */
00088 #define UC_CM 0x00000080 /* Composite                  */
00089 #define UC_NB 0x00000100 /* Non-Breaking               */
00090 #define UC_SY 0x00000200 /* Symmetric                  */
00091 #define UC_HD 0x00000400 /* Hex Digit                  */
00092 #define UC_QM 0x00000800 /* Quote Mark                 */
00093 #define UC_MR 0x00001000 /* Mirroring                  */
00094 #define UC_SS 0x00002000 /* Space, other               */
00095 
00096 #define UC_CP 0x00004000 /* Defined                    */
00097 
00098 /*
00099  * Added for UnicodeData-2.1.3.
00100  */
00101 #define UC_PI 0x00008000 /* Punctuation, Initial       */
00102 #define UC_PF 0x00010000 /* Punctuation, Final         */
00103 
00104 MBSTRING_API int php_unicode_is_prop(unsigned long code, unsigned long mask1,
00105               unsigned long mask2);
00106 MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, size_t *retlen,
00107               const char *src_encoding TSRMLS_DC);
00108 
00109 #define PHP_UNICODE_CASE_UPPER     0
00110 #define PHP_UNICODE_CASE_LOWER     1
00111 #define PHP_UNICODE_CASE_TITLE     2
00112 
00113 #define php_unicode_is_alpha(cc) php_unicode_is_prop(cc, UC_LU|UC_LL|UC_LM|UC_LO|UC_LT, 0)
00114 #define php_unicode_is_digit(cc) php_unicode_is_prop(cc, UC_ND, 0)
00115 #define php_unicode_is_alnum(cc) php_unicode_is_prop(cc, UC_LU|UC_LL|UC_LM|UC_LO|UC_LT|UC_ND, 0)
00116 #define php_unicode_is_cntrl(cc) php_unicode_is_prop(cc, UC_CC|UC_CF, 0)
00117 #define php_unicode_is_space(cc) php_unicode_is_prop(cc, UC_ZS|UC_SS, 0)
00118 #define php_unicode_is_blank(cc) php_unicode_is_prop(cc, UC_ZS, 0)
00119 #define php_unicode_is_punct(cc) php_unicode_is_prop(cc, UC_PD|UC_PS|UC_PE|UC_PO, UC_PI|UC_PF)
00120 #define php_unicode_is_graph(cc) php_unicode_is_prop(cc, UC_MN|UC_MC|UC_ME|UC_ND|UC_NL|UC_NO|\
00121                                UC_LU|UC_LL|UC_LT|UC_LM|UC_LO|UC_PC|UC_PD|\
00122                                UC_PS|UC_PE|UC_PO|UC_SM|UC_SM|UC_SC|UC_SK|\
00123                                UC_SO, UC_PI|UC_PF)
00124 #define php_unicode_is_print(cc) php_unicode_is_prop(cc, UC_MN|UC_MC|UC_ME|UC_ND|UC_NL|UC_NO|\
00125                                UC_LU|UC_LL|UC_LT|UC_LM|UC_LO|UC_PC|UC_PD|\
00126                                UC_PS|UC_PE|UC_PO|UC_SM|UC_SM|UC_SC|UC_SK|\
00127                                UC_SO|UC_ZS, UC_PI|UC_PF)
00128 #define php_unicode_is_upper(cc) php_unicode_is_prop(cc, UC_LU, 0)
00129 #define php_unicode_is_lower(cc) php_unicode_is_prop(cc, UC_LL, 0)
00130 #define php_unicode_is_title(cc) php_unicode_is_prop(cc, UC_LT, 0)
00131 #define php_unicode_is_xdigit(cc) php_unicode_is_prop(cc, 0, UC_HD)
00132 
00133 #define php_unicode_is_isocntrl(cc) php_unicode_is_prop(cc, UC_CC, 0)
00134 #define php_unicode_is_fmtcntrl(cc) php_unicode_is_prop(cc, UC_CF, 0)
00135 
00136 #define php_unicode_is_symbol(cc) php_unicode_is_prop(cc, UC_SM|UC_SC|UC_SO|UC_SK, 0)
00137 #define php_unicode_is_number(cc) php_unicode_is_prop(cc, UC_ND|UC_NO|UC_NL, 0)
00138 #define php_unicode_is_nonspacing(cc) php_unicode_is_prop(cc, UC_MN, 0)
00139 #define php_unicode_is_openpunct(cc) php_unicode_is_prop(cc, UC_PS, 0)
00140 #define php_unicode_is_closepunct(cc) php_unicode_is_prop(cc, UC_PE, 0)
00141 #define php_unicode_is_initialpunct(cc) php_unicode_is_prop(cc, 0, UC_PI)
00142 #define php_unicode_is_finalpunct(cc) php_unicode_is_prop(cc, 0, UC_PF)
00143 
00144 #define php_unicode_is_composite(cc) php_unicode_is_prop(cc, 0, UC_CM)
00145 #define php_unicode_is_hex(cc) php_unicode_is_prop(cc, 0, UC_HD)
00146 #define php_unicode_is_quote(cc) php_unicode_is_prop(cc, 0, UC_QM)
00147 #define php_unicode_is_symmetric(cc) php_unicode_is_prop(cc, 0, UC_SY)
00148 #define php_unicode_is_mirroring(cc) php_unicode_is_prop(cc, 0, UC_MR)
00149 #define php_unicode_is_nonbreaking(cc) php_unicode_is_prop(cc, 0, UC_NB)
00150 
00151 /*
00152  * Directionality macros.
00153  */
00154 #define php_unicode_is_rtl(cc) php_unicode_is_prop(cc, UC_R, 0)
00155 #define php_unicode_is_ltr(cc) php_unicode_is_prop(cc, UC_L, 0)
00156 #define php_unicode_is_strong(cc) php_unicode_is_prop(cc, UC_L|UC_R, 0)
00157 #define php_unicode_is_weak(cc) php_unicode_is_prop(cc, UC_EN|UC_ES, UC_ET|UC_AN|UC_CS)
00158 #define php_unicode_is_neutral(cc) php_unicode_is_prop(cc, 0, UC_B|UC_S|UC_WS|UC_ON)
00159 #define php_unicode_is_separator(cc) php_unicode_is_prop(cc, 0, UC_B|UC_S)
00160 
00161 /*
00162  * Other macros inspired by John Cowan.
00163  */
00164 #define php_unicode_is_mark(cc) php_unicode_is_prop(cc, UC_MN|UC_MC|UC_ME, 0)
00165 #define php_unicode_is_modif(cc) php_unicode_is_prop(cc, UC_LM, 0)
00166 #define php_unicode_is_letnum(cc) php_unicode_is_prop(cc, UC_NL, 0)
00167 #define php_unicode_is_connect(cc) php_unicode_is_prop(cc, UC_PC, 0)
00168 #define php_unicode_is_dash(cc) php_unicode_is_prop(cc, UC_PD, 0)
00169 #define php_unicode_is_math(cc) php_unicode_is_prop(cc, UC_SM, 0)
00170 #define php_unicode_is_currency(cc) php_unicode_is_prop(cc, UC_SC, 0)
00171 #define php_unicode_is_modifsymbol(cc) php_unicode_is_prop(cc, UC_SK, 0)
00172 #define php_unicode_is_nsmark(cc) php_unicode_is_prop(cc, UC_MN, 0)
00173 #define php_unicode_is_spmark(cc) php_unicode_is_prop(cc, UC_MC, 0)
00174 #define php_unicode_is_enclosing(cc) php_unicode_is_prop(cc, UC_ME, 0)
00175 #define php_unicode_is_private(cc) php_unicode_is_prop(cc, UC_CO, 0)
00176 #define php_unicode_is_surrogate(cc) php_unicode_is_prop(cc, UC_OS, 0)
00177 #define php_unicode_is_lsep(cc) php_unicode_is_prop(cc, UC_ZL, 0)
00178 #define php_unicode_is_psep(cc) php_unicode_is_prop(cc, UC_ZP, 0)
00179 
00180 #define php_unicode_is_identstart(cc) php_unicode_is_prop(cc, UC_LU|UC_LL|UC_LT|UC_LO|UC_NL, 0)
00181 #define php_unicode_is_identpart(cc) php_unicode_is_prop(cc, UC_LU|UC_LL|UC_LT|UC_LO|UC_NL|\
00182                                    UC_MN|UC_MC|UC_ND|UC_PC|UC_CF, 0)
00183 
00184 #define php_unicode_is_defined(cc) php_unicode_is_prop(cc, 0, UC_CP)
00185 #define php_unicode_is_undefined(cc) !php_unicode_is_prop(cc, 0, UC_CP)
00186 
00187 /*
00188  * Other miscellaneous character property macros.
00189  */
00190 #define php_unicode_is_han(cc) (((cc) >= 0x4e00 && (cc) <= 0x9fff) ||\
00191                      ((cc) >= 0xf900 && (cc) <= 0xfaff))
00192 #define php_unicode_is_hangul(cc) ((cc) >= 0xac00 && (cc) <= 0xd7ff)
00193 
00194 
00195 #endif
00196 
00197 
00198 #endif /* PHP_UNICODE_H */
00199 
00200 
00201