2005-08-11 23:35:03 +00:00
/*
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
| Zend Engine |
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
2006-01-05 02:35:02 +00:00
| Copyright ( c ) 1998 - 2006 Zend Technologies Ltd . ( http : //www.zend.com) |
2005-08-11 23:35:03 +00:00
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
| This source file is subject to version 2.00 of the Zend license , |
2006-02-21 08:00:39 +00:00
| that is bundled with this package in the file LICENSE , and is |
2006-01-01 13:25:34 +00:00
| available through the world - wide - web at |
2005-08-11 23:35:03 +00:00
| http : //www.zend.com/license/2_00.txt. |
| If you did not receive a copy of the Zend license and are unable to |
| obtain it through the world - wide - web , please send a note to |
| license @ zend . com so we can mail you a copy immediately . |
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
| Authors : Andrei Zmievski < andrei @ php . net > |
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
*/
# include "zend.h"
# include "zend_globals.h"
# include "zend_operators.h"
2006-03-24 07:38:07 +00:00
# include "zend_exceptions.h"
2005-08-11 23:35:03 +00:00
# include "zend_API.h"
# include "zend_unicode.h"
# include <unicode/unorm.h>
# ifdef ZTS
ZEND_API ts_rsrc_id unicode_globals_id ;
# else
ZEND_API zend_unicode_globals unicode_globals ;
# endif
2006-03-24 07:38:07 +00:00
ZEND_API zend_class_entry * unicodeConversionException ;
2005-08-11 23:35:03 +00:00
/* {{{ zend_set_converter_error_mode */
2006-03-26 06:19:24 +00:00
void zend_set_converter_error_mode ( UConverter * conv , zend_conv_direction direction , uint16_t error_mode )
2005-08-11 23:35:03 +00:00
{
2006-03-24 10:25:50 +00:00
UErrorCode status = U_ZERO_ERROR ;
2005-08-11 23:35:03 +00:00
2006-03-26 06:19:24 +00:00
switch ( error_mode & 0xff ) {
2006-03-24 10:25:50 +00:00
case ZEND_CONV_ERROR_STOP :
2006-03-26 06:19:24 +00:00
if ( direction = = ZEND_FROM_UNICODE )
ucnv_setFromUCallBack ( conv , UCNV_FROM_U_CALLBACK_STOP , NULL , NULL , NULL , & status ) ;
else
ucnv_setToUCallBack ( conv , UCNV_TO_U_CALLBACK_STOP , NULL , NULL , NULL , & status ) ;
2006-03-24 10:25:50 +00:00
break ;
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
case ZEND_CONV_ERROR_SKIP :
2006-03-26 06:19:24 +00:00
if ( direction = = ZEND_FROM_UNICODE )
ucnv_setFromUCallBack ( conv , UCNV_FROM_U_CALLBACK_SKIP , UCNV_SKIP_STOP_ON_ILLEGAL , NULL , NULL , & status ) ;
else
ucnv_setToUCallBack ( conv , UCNV_TO_U_CALLBACK_SKIP , UCNV_SKIP_STOP_ON_ILLEGAL , NULL , NULL , & status ) ;
2006-03-24 10:25:50 +00:00
break ;
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
case ZEND_CONV_ERROR_SUBST :
2006-03-26 06:19:24 +00:00
if ( direction = = ZEND_FROM_UNICODE )
ucnv_setFromUCallBack ( conv , UCNV_FROM_U_CALLBACK_SUBSTITUTE , UCNV_SUB_STOP_ON_ILLEGAL , NULL , NULL , & status ) ;
else
ucnv_setToUCallBack ( conv , UCNV_TO_U_CALLBACK_SUBSTITUTE , UCNV_SUB_STOP_ON_ILLEGAL , NULL , NULL , & status ) ;
2006-03-24 10:25:50 +00:00
break ;
2006-03-23 22:00:42 +00:00
2006-03-24 10:25:50 +00:00
case ZEND_CONV_ERROR_ESCAPE_UNICODE :
2006-03-26 06:19:24 +00:00
if ( direction = = ZEND_FROM_UNICODE )
ucnv_setFromUCallBack ( conv , UCNV_FROM_U_CALLBACK_ESCAPE , UCNV_ESCAPE_UNICODE , NULL , NULL , & status ) ;
else
ucnv_setToUCallBack ( conv , UCNV_TO_U_CALLBACK_ESCAPE , UCNV_ESCAPE_UNICODE , NULL , NULL , & status ) ;
2006-03-24 10:25:50 +00:00
break ;
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
case ZEND_CONV_ERROR_ESCAPE_ICU :
2006-03-26 06:19:24 +00:00
if ( direction = = ZEND_FROM_UNICODE )
ucnv_setFromUCallBack ( conv , UCNV_FROM_U_CALLBACK_ESCAPE , UCNV_ESCAPE_ICU , NULL , NULL , & status ) ;
else
ucnv_setToUCallBack ( conv , UCNV_TO_U_CALLBACK_ESCAPE , UCNV_ESCAPE_ICU , NULL , NULL , & status ) ;
2006-03-24 10:25:50 +00:00
break ;
2006-03-23 22:00:42 +00:00
2006-03-24 10:25:50 +00:00
case ZEND_CONV_ERROR_ESCAPE_JAVA :
2006-03-26 06:19:24 +00:00
if ( direction = = ZEND_FROM_UNICODE )
ucnv_setFromUCallBack ( conv , UCNV_FROM_U_CALLBACK_ESCAPE , UCNV_ESCAPE_JAVA , NULL , NULL , & status ) ;
else
/*
* use C escape , even though JAVA is requested , so that we don ' t
* have to expose another constant
*/
ucnv_setToUCallBack ( conv , UCNV_TO_U_CALLBACK_ESCAPE , UCNV_ESCAPE_C , NULL , NULL , & status ) ;
2006-03-24 10:25:50 +00:00
break ;
2006-03-23 22:00:42 +00:00
2006-03-24 10:25:50 +00:00
case ZEND_CONV_ERROR_ESCAPE_XML_DEC :
2006-03-26 06:19:24 +00:00
if ( direction = = ZEND_FROM_UNICODE )
ucnv_setFromUCallBack ( conv , UCNV_FROM_U_CALLBACK_ESCAPE , UCNV_ESCAPE_XML_DEC , NULL , NULL , & status ) ;
else
ucnv_setToUCallBack ( conv , UCNV_TO_U_CALLBACK_ESCAPE , UCNV_ESCAPE_XML_DEC , NULL , NULL , & status ) ;
2006-03-24 10:25:50 +00:00
break ;
2006-03-23 22:00:42 +00:00
2006-03-24 10:25:50 +00:00
case ZEND_CONV_ERROR_ESCAPE_XML_HEX :
2006-03-26 06:19:24 +00:00
if ( direction = = ZEND_FROM_UNICODE )
ucnv_setFromUCallBack ( conv , UCNV_FROM_U_CALLBACK_ESCAPE , UCNV_ESCAPE_XML_HEX , NULL , NULL , & status ) ;
else
ucnv_setToUCallBack ( conv , UCNV_TO_U_CALLBACK_ESCAPE , UCNV_ESCAPE_XML_HEX , NULL , NULL , & status ) ;
2006-03-24 10:25:50 +00:00
break ;
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
default :
assert ( 0 ) ;
break ;
}
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_set_converter_subst_char */
2006-03-26 01:48:33 +00:00
void zend_set_converter_subst_char ( UConverter * conv , UChar * subst_char )
2005-08-11 23:35:03 +00:00
{
2006-03-27 03:17:49 +00:00
char dest [ 8 ] , * dest_ptr ;
2006-03-24 10:25:50 +00:00
int8_t dest_len = 8 ;
UErrorCode status = U_ZERO_ERROR ;
UErrorCode temp = U_ZERO_ERROR ;
const void * old_context ;
UConverterFromUCallback old_cb ;
2006-03-26 01:48:33 +00:00
int32_t subst_char_len = u_strlen ( subst_char ) ;
2006-03-24 10:25:50 +00:00
if ( ! subst_char_len )
return ;
ucnv_setFromUCallBack ( conv , UCNV_FROM_U_CALLBACK_STOP , NULL , & old_cb , & old_context , & temp ) ;
dest_len = ucnv_fromUChars ( conv , dest , dest_len , subst_char , subst_char_len , & status ) ;
ucnv_setFromUCallBack ( conv , old_cb , old_context , NULL , NULL , & temp ) ;
if ( U_FAILURE ( status ) ) {
zend_error ( E_WARNING , " Could not set substitution character for the converter " ) ;
return ;
}
2006-03-27 03:17:49 +00:00
/* skip BOM for UTF-16/32 converters */
switch ( ucnv_getType ( conv ) ) {
case UCNV_UTF16 :
dest_ptr = dest + 2 ;
dest_len - = 2 ;
break ;
case UCNV_UTF32 :
dest_ptr = dest + 4 ;
dest_len - = 4 ;
break ;
default :
dest_ptr = dest ;
break ;
}
ucnv_setSubstChars ( conv , dest_ptr , dest_len , & status ) ;
2006-03-24 10:25:50 +00:00
if ( status = = U_ILLEGAL_ARGUMENT_ERROR ) {
zend_error ( E_WARNING , " Substitution character byte sequence is too short or long for this converter " ) ;
return ;
}
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_set_converter_encoding */
int zend_set_converter_encoding ( UConverter * * converter , const char * encoding )
{
2006-03-24 10:25:50 +00:00
UErrorCode status = U_ZERO_ERROR ;
UConverter * new_converter = NULL ;
if ( ! converter ) {
return FAILURE ;
}
/*
* The specified encoding might be the same as converter ' s existing one ,
* which results in a no - op .
*/
if ( * converter & & encoding & & encoding [ 0 ] ) {
const char * current = ucnv_getName ( * converter , & status ) ;
status = U_ZERO_ERROR ; /* reset error */
if ( ! ucnv_compareNames ( current , encoding ) ) {
return SUCCESS ;
}
}
/*
* If encoding is NULL , ucnv_open ( ) will return a converter based on
* the default platform encoding as determined by ucnv_getDefaultName ( ) .
*/
new_converter = ucnv_open ( encoding , & status ) ;
if ( U_FAILURE ( status ) ) {
return FAILURE ;
}
if ( * converter ) {
ucnv_close ( * converter ) ;
}
* converter = new_converter ;
return SUCCESS ;
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_copy_converter */
int zend_copy_converter ( UConverter * * target , UConverter * source )
{
2006-03-24 10:25:50 +00:00
UErrorCode status = U_ZERO_ERROR ;
const char * encoding ;
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
assert ( source ! = NULL ) ;
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
encoding = ucnv_getName ( source , & status ) ;
if ( U_FAILURE ( status ) ) {
return FAILURE ;
}
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
return zend_set_converter_encoding ( target , encoding ) ;
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_convert_to_unicode */
2006-03-26 06:19:24 +00:00
ZEND_API int zend_convert_to_unicode ( UConverter * conv , UChar * * target , int * target_len , const char * source , int source_len , UErrorCode * status )
2005-08-11 23:35:03 +00:00
{
2006-03-24 10:25:50 +00:00
UChar * buffer = NULL ;
UChar * output ;
int32_t buffer_len = 0 ;
int32_t converted = 0 ;
const char * input = source ;
UConverterType conv_type ;
if ( U_FAILURE ( * status ) ) {
2006-03-26 06:19:24 +00:00
return 0 ;
2006-03-24 10:25:50 +00:00
}
ucnv_resetToUnicode ( conv ) ;
conv_type = ucnv_getType ( conv ) ;
switch ( conv_type ) {
case UCNV_SBCS :
case UCNV_LATIN_1 :
case UCNV_US_ASCII :
/*
* For single - byte charsets , 1 input byte = 1 output UChar
*/
buffer_len = source_len ;
break ;
default :
/*
* Initial estimate : 1.25 UChar ' s for every 2 source bytes + 2 ( past a
* certain limit ( 2 ) ) . The rationale behind this is that ( atleast
* in the case of GB2312 ) it is possible that there are single byte
* characters in the input string . By using an GD2312 text as
* example it seemed that a value of 1.25 allowed for as little
* re - allocations as possible without over estimating the buffer
* too much . In case there is a lot of single - byte characters
* around a single multi - byte character this estimation is too low ,
* and then the re - allocation routines in the loop below kick in .
* There we multiply by 1.33 and add 1 so that it ' s quite efficient
* for smaller input strings without causing too many iterations of
* this loop .
*/
buffer_len = ( source_len > 2 ) ? ( ( source_len > > 1 ) + ( source_len > > 3 ) + 2 ) : source_len ;
break ;
}
while ( 1 ) {
buffer = eurealloc ( buffer , buffer_len + 1 ) ;
output = buffer + converted ;
ucnv_toUnicode ( conv , & output , buffer + buffer_len , & input , source + source_len , NULL , TRUE , status ) ;
converted = ( int32_t ) ( output - buffer ) ;
if ( * status = = U_BUFFER_OVERFLOW_ERROR ) {
buffer_len = ( buffer_len * 1.33 ) + 1 ;
* status = U_ZERO_ERROR ;
} else {
break ;
}
}
/*
* We return the buffer in case of failure anyway . The caller may want to
* use partially converted string for something .
*/
buffer [ converted ] = 0 ;
* target = buffer ;
* target_len = converted ;
2006-03-26 06:19:24 +00:00
return input - source ;
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_convert_from_unicode */
2006-03-24 07:38:07 +00:00
ZEND_API int zend_convert_from_unicode ( UConverter * conv , char * * target , int * target_len , const UChar * source , int source_len , UErrorCode * status )
2005-08-11 23:35:03 +00:00
{
2006-03-24 10:25:50 +00:00
char * buffer = NULL ;
char * output ;
int32_t buffer_len = 0 ;
int32_t converted = 0 ;
const UChar * input = source ;
if ( U_FAILURE ( * status ) ) {
return 0 ;
}
ucnv_resetFromUnicode ( conv ) ;
buffer_len = ucnv_getMaxCharSize ( conv ) * source_len ;
while ( 1 ) {
buffer = erealloc ( buffer , buffer_len + 1 ) ;
output = buffer + converted ;
ucnv_fromUnicode ( conv , & output , buffer + buffer_len , & input , source + source_len , NULL , TRUE , status ) ;
converted = ( int32_t ) ( output - buffer ) ;
if ( * status = = U_BUFFER_OVERFLOW_ERROR ) {
buffer_len + = 64 ;
* status = U_ZERO_ERROR ;
} else {
break ;
}
}
/*
* We return the buffer in case of failure anyway . The caller may want to
* use partially converted string for something .
*/
buffer [ converted ] = 0 ; /* NULL-terminate the output string */
* target = buffer ;
* target_len = converted ;
return input - source ;
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_convert_encodings */
ZEND_API void zend_convert_encodings ( UConverter * target_conv , UConverter * source_conv ,
2006-03-24 10:25:50 +00:00
char * * target , int * target_len ,
const char * source , int source_len , UErrorCode * status )
2005-08-11 23:35:03 +00:00
{
2006-03-24 10:25:50 +00:00
char * buffer = NULL ;
char * output ;
const char * input = source ;
int32_t allocated = 0 ;
int32_t converted = 0 ;
int8_t null_size ;
UChar pivot_buf [ 1024 ] , * pivot , * pivot2 ;
if ( U_FAILURE ( * status ) ) {
return ;
}
null_size = ucnv_getMinCharSize ( target_conv ) ;
allocated = source_len + null_size ;
ucnv_resetToUnicode ( source_conv ) ;
ucnv_resetFromUnicode ( target_conv ) ;
pivot = pivot2 = pivot_buf ;
while ( 1 ) {
buffer = ( char * ) erealloc ( buffer , allocated ) ;
output = buffer + converted ;
ucnv_convertEx ( target_conv , source_conv , & output , buffer + allocated - null_size ,
& input , source + source_len , pivot_buf , & pivot , & pivot2 , pivot_buf + 1024 , FALSE , TRUE , status ) ;
converted = ( int32_t ) ( output - buffer ) ;
if ( * status = = U_BUFFER_OVERFLOW_ERROR ) {
allocated + = 1024 ;
* status = U_ZERO_ERROR ;
} else {
break ;
}
}
memset ( buffer + converted , 0 , null_size ) ; /* NULL-terminate the output string */
* target = buffer ;
* target_len = converted ;
/* Report the conversion error */
if ( U_FAILURE ( * status ) ) {
zend_error ( E_NOTICE , " Error converting from codepage string to Unicode: %s " , u_errorName ( * status ) ) ;
}
2005-08-11 23:35:03 +00:00
}
/* }}} */
2006-03-24 07:38:07 +00:00
/* {{{ zend_raise_conversion_error_ex */
2006-03-26 06:19:24 +00:00
ZEND_API void zend_raise_conversion_error_ex ( char * message , UConverter * conv , zend_conv_direction dir , int error_char_offset , int use_exception TSRMLS_DC )
2006-03-24 07:38:07 +00:00
{
2006-03-24 10:25:50 +00:00
const char * conv_name ;
UErrorCode status = U_ZERO_ERROR ;
if ( ! message )
return ;
2006-03-26 06:19:24 +00:00
if ( ! conv ) {
if ( use_exception ) {
zend_throw_exception_ex ( unicodeConversionException , 0 TSRMLS_CC , " %s " , message ) ;
} else {
zend_error ( E_WARNING , " %s " , message ) ;
}
return ;
}
2006-03-24 10:25:50 +00:00
conv_name = ucnv_getName ( conv , & status ) ;
/*
* UTODO
* use some other standard than MIME ? or fallback onto IANA ? or use
* internal converter name ? ponder
*/
conv_name = ucnv_getStandardName ( conv_name , " MIME " , & status ) ;
2006-03-26 06:19:24 +00:00
status = U_ZERO_ERROR ;
if ( dir = = ZEND_FROM_UNICODE ) {
UChar err_char [ U16_MAX_LENGTH ] ;
int8_t err_char_len = sizeof ( err_char ) ;
UChar32 codepoint ;
char * message_fmt = " %s (converter %s failed on character {U+%04X} at offset %d) " ;
2006-03-24 10:25:50 +00:00
2006-03-26 06:19:24 +00:00
ucnv_getInvalidUChars ( conv , err_char , & err_char_len , & status ) ;
codepoint = ( err_char_len < 2 ) ? err_char [ 0 ] : U16_GET_SUPPLEMENTARY ( err_char [ 0 ] , err_char [ 1 ] ) ;
2006-03-24 10:25:50 +00:00
2006-03-26 06:19:24 +00:00
if ( use_exception ) {
zend_throw_exception_ex ( unicodeConversionException , 0 TSRMLS_CC , message_fmt , message , conv_name ? conv_name : " <unknown> " , codepoint , error_char_offset - 1 ) ;
} else {
zend_error ( E_WARNING , message_fmt , message , conv_name ? conv_name : " " , codepoint , error_char_offset - 1 ) ;
}
2006-03-24 10:25:50 +00:00
} else {
2006-03-26 06:19:24 +00:00
char err_char [ 8 ] ; /* UTF-8 uses up to 8 bytes */
char buf [ 32 ] ; /* 4x number of error bytes */
int8_t err_char_len = sizeof ( err_char ) ;
char * message_fmt = " %s (converter %s failed on bytes (%s) at offset %d) " ;
char * p ;
int i ;
ucnv_getInvalidChars ( conv , err_char , & err_char_len , & status ) ;
p = buf ;
for ( i = 0 ; i < err_char_len ; i + + ) {
sprintf ( p , " 0x%02X%s " , ( unsigned char ) err_char [ i ] , ( i + 1 < err_char_len ) ? " , " : " " ) ;
p + = 5 ;
}
if ( use_exception ) {
zend_throw_exception_ex ( unicodeConversionException , 0 TSRMLS_CC , message_fmt , message , conv_name ? conv_name : " <unknown> " , buf , error_char_offset - err_char_len ) ;
} else {
zend_error ( E_WARNING , message_fmt , message , conv_name ? conv_name : " " , buf , error_char_offset - err_char_len ) ;
}
2006-03-24 10:25:50 +00:00
}
2006-03-24 07:38:07 +00:00
}
/* }}} */
2005-08-11 23:35:03 +00:00
/* {{{ zval_unicode_to_string */
ZEND_API int zval_unicode_to_string ( zval * string , UConverter * conv TSRMLS_DC )
{
2006-03-24 10:25:50 +00:00
UErrorCode status = U_ZERO_ERROR ;
char * s = NULL ;
int s_len ;
int num_conv ;
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
UChar * u = Z_USTRVAL_P ( string ) ;
int u_len = Z_USTRLEN_P ( string ) ;
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
num_conv = zend_convert_from_unicode ( conv , & s , & s_len , u , u_len , & status ) ;
2006-03-24 07:38:07 +00:00
2006-03-24 10:25:50 +00:00
if ( U_FAILURE ( status ) ) {
2006-03-26 06:19:24 +00:00
int32_t offset = u_countChar32 ( u , num_conv ) ;
2006-03-24 10:11:49 +00:00
2006-03-27 07:35:05 +00:00
zend_raise_conversion_error_ex ( " Could not convert Unicode string to binary string " , conv , ZEND_FROM_UNICODE , offset , ( UG ( from_error_mode ) & ZEND_CONV_ERROR_EXCEPTION ) TSRMLS_CC ) ;
2006-03-24 10:11:49 +00:00
if ( s ) {
efree ( s ) ;
}
2006-03-27 07:35:05 +00:00
ZVAL_EMPTY_STRING ( string ) ;
efree ( ( UChar * ) u ) ;
return FAILURE ;
} else {
ZVAL_STRINGL ( string , s , s_len , 0 ) ;
efree ( ( UChar * ) u ) ;
return SUCCESS ;
2006-03-24 10:25:50 +00:00
}
2005-08-11 23:35:03 +00:00
}
/* }}} */
2006-01-17 12:18:53 +00:00
/* {{{ zval_string_to_unicode_ex */
2006-03-27 06:02:42 +00:00
ZEND_API int zval_string_to_unicode_ex ( zval * string , UConverter * conv TSRMLS_DC )
2005-08-11 23:35:03 +00:00
{
2006-03-24 10:25:50 +00:00
UErrorCode status = U_ZERO_ERROR ;
int retval = TRUE ;
UChar * u = NULL ;
2006-03-26 06:19:24 +00:00
int u_len , num_conv ;
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
char * s = Z_STRVAL_P ( string ) ;
int s_len = Z_STRLEN_P ( string ) ;
2005-08-11 23:35:03 +00:00
2006-03-26 06:19:24 +00:00
num_conv = zend_convert_to_unicode ( conv , & u , & u_len , s , s_len , & status ) ;
2005-08-11 23:35:03 +00:00
2006-03-24 10:25:50 +00:00
if ( U_FAILURE ( status ) ) {
2006-03-26 06:19:24 +00:00
zend_raise_conversion_error_ex ( " Could not convert binary string to Unicode string " , conv , ZEND_TO_UNICODE , num_conv , ( UG ( to_error_mode ) & ZEND_CONV_ERROR_EXCEPTION ) TSRMLS_CC ) ;
2006-03-27 07:35:05 +00:00
if ( u ) {
efree ( u ) ;
}
ZVAL_EMPTY_UNICODE ( string ) ;
efree ( s ) ;
return FAILURE ;
} else {
ZVAL_UNICODEL ( string , u , u_len , 0 ) ;
efree ( s ) ;
return SUCCESS ;
2006-03-24 10:25:50 +00:00
}
2005-08-11 23:35:03 +00:00
}
/* }}} */
2006-01-17 12:18:53 +00:00
/* {{{ zval_string_to_unicode */
ZEND_API int zval_string_to_unicode ( zval * string TSRMLS_DC )
{
2006-03-27 06:02:42 +00:00
return zval_string_to_unicode_ex ( string , ZEND_U_CONVERTER ( UG ( runtime_encoding_conv ) ) TSRMLS_CC ) ;
2006-01-17 12:18:53 +00:00
}
/* }}} */
2005-08-11 23:35:03 +00:00
/* {{{ zend_cmp_unicode_and_string */
ZEND_API int zend_cmp_unicode_and_string ( UChar * ustr , char * str , uint len )
{
2006-03-24 10:25:50 +00:00
UErrorCode status = U_ZERO_ERROR ;
UChar * u = NULL ;
int u_len ;
int retval = TRUE ;
2005-08-11 23:35:03 +00:00
TSRMLS_FETCH ( ) ;
2006-03-24 10:25:50 +00:00
zend_convert_to_unicode ( ZEND_U_CONVERTER ( UG ( runtime_encoding_conv ) ) , & u , & u_len , str , len , & status ) ;
if ( U_FAILURE ( status ) ) {
efree ( u ) ;
return FALSE ;
}
retval = u_memcmp ( ustr , u , u_len ) ;
efree ( u ) ;
return retval ;
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_cmp_unicode_and_literal */
/*
* Compare a Unicode string and an ASCII literal . Because ASCII maps nicely onto Unicode
2006-03-27 19:15:58 +00:00
* range U + 0000 . . U + 007F , we can simply cast ASCII chars to Unicode values and avoid
2005-08-11 23:35:03 +00:00
* memory allocation .
*/
2006-03-02 13:12:45 +00:00
ZEND_API int zend_cmp_unicode_and_literal ( UChar * ustr , int ulen , char * str , int slen )
2005-08-11 23:35:03 +00:00
{
2006-03-24 10:25:50 +00:00
int result ;
uint len = MIN ( ulen , slen ) ;
2006-03-27 19:15:58 +00:00
/* UTODO: make sure we're only comparing against ASCII values here (< 0x80) */
2006-03-24 10:25:50 +00:00
while ( len - - ) {
result = ( int ) ( uint16_t ) * ustr - ( int ) ( uint16_t ) * str ;
if ( result ! = 0 )
return result ;
ustr + + ;
str + + ;
}
return ulen - slen ;
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_is_valid_identifier */
2006-03-02 13:12:45 +00:00
ZEND_API int zend_is_valid_identifier ( UChar * ident , int len )
2005-08-11 23:35:03 +00:00
{
2006-03-24 10:25:50 +00:00
UChar32 codepoint ;
int32_t i ;
int32_t ident_len = len ;
UProperty id_prop = UCHAR_XID_START ;
for ( i = 0 ; i < ident_len ; ) {
U16_NEXT ( ident , i , ident_len , codepoint ) ;
if ( ! u_hasBinaryProperty ( codepoint , id_prop ) & &
codepoint ! = 0x5f ) { /* special case for starting '_' */
return 0 ;
}
id_prop = UCHAR_XID_CONTINUE ;
}
return 1 ;
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_normalize_string */
2006-03-02 13:12:45 +00:00
static inline void zend_normalize_string ( UChar * * dest , int32_t * dest_len , UChar * src , int src_len , UErrorCode * status )
2005-08-11 23:35:03 +00:00
{
2006-03-24 10:25:50 +00:00
UChar * buffer = NULL ;
int32_t buffer_len ;
buffer_len = src_len ;
while ( 1 ) {
* status = U_ZERO_ERROR ;
buffer = eurealloc ( buffer , buffer_len + 1 ) ;
buffer_len = unorm_normalize ( src , src_len , UNORM_NFKC , 0 , buffer , buffer_len , status ) ;
if ( * status ! = U_BUFFER_OVERFLOW_ERROR ) {
break ;
}
}
if ( U_SUCCESS ( * status ) ) {
buffer [ buffer_len ] = 0 ;
* dest = buffer ;
* dest_len = buffer_len ;
} else {
efree ( buffer ) ;
}
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_case_fold_string */
2006-02-21 20:12:43 +00:00
ZEND_API void zend_case_fold_string ( UChar * * dest , int * dest_len , UChar * src , int src_len , uint32_t options , UErrorCode * status )
2005-08-11 23:35:03 +00:00
{
2006-03-24 10:25:50 +00:00
UChar * buffer = NULL ;
int32_t buffer_len ;
buffer_len = src_len ;
while ( 1 ) {
* status = U_ZERO_ERROR ;
buffer = eurealloc ( buffer , buffer_len + 1 ) ;
buffer_len = u_strFoldCase ( buffer , buffer_len , src , src_len , options , status ) ;
if ( * status ! = U_BUFFER_OVERFLOW_ERROR ) {
break ;
}
}
if ( U_SUCCESS ( * status ) ) {
buffer [ buffer_len ] = 0 ;
* dest = buffer ;
* dest_len = buffer_len ;
} else {
efree ( buffer ) ;
}
2005-08-11 23:35:03 +00:00
}
/* }}} */
/* {{{ zend_normalize_identifier */
2006-02-21 20:12:43 +00:00
ZEND_API int zend_normalize_identifier ( UChar * * dest , int * dest_len , UChar * ident , int ident_len , zend_bool fold_case )
2005-08-11 23:35:03 +00:00
{
2006-03-24 10:25:50 +00:00
UChar * buffer = NULL ;
UChar * orig_ident = ident ;
int32_t buffer_len ;
UErrorCode status = U_ZERO_ERROR ;
if ( unorm_quickCheck ( ident , ident_len , UNORM_NFKC , & status ) ! = UNORM_YES ) {
zend_normalize_string ( & buffer , & buffer_len , ident , ident_len , & status ) ;
if ( U_FAILURE ( status ) ) {
return 0 ;
}
ident = buffer ;
ident_len = buffer_len ;
}
if ( fold_case ) {
zend_case_fold_string ( & buffer , & buffer_len , ident , ident_len , U_FOLD_CASE_DEFAULT , & status ) ;
if ( ident ! = orig_ident ) {
efree ( ident ) ;
}
if ( U_FAILURE ( status ) ) {
return 0 ;
}
ident = buffer ;
ident_len = buffer_len ;
if ( unorm_quickCheck ( ident , ident_len , UNORM_NFKC , & status ) ! = UNORM_YES ) {
zend_normalize_string ( & buffer , & buffer_len , ident , ident_len , & status ) ;
if ( ident ! = orig_ident ) {
efree ( ident ) ;
}
if ( U_FAILURE ( status ) ) {
return 0 ;
}
ident = buffer ;
ident_len = buffer_len ;
}
}
* dest = ident ;
* dest_len = ident_len ;
return 1 ;
2005-08-11 23:35:03 +00:00
}
/* }}} */
2006-03-24 16:45:18 +00:00
/* {{{ zend_register_unicode_exceptions */
2006-03-24 07:38:07 +00:00
void zend_register_unicode_exceptions ( TSRMLS_D )
{
2006-03-24 10:25:50 +00:00
zend_class_entry ce ;
2006-03-24 07:38:07 +00:00
2006-03-24 10:25:50 +00:00
INIT_CLASS_ENTRY ( ce , " UnicodeConversionException " , NULL ) ;
unicodeConversionException = zend_register_internal_class_ex ( & ce , zend_exception_get_default ( TSRMLS_C ) , NULL TSRMLS_CC ) ;
2006-03-24 07:38:07 +00:00
}
2006-03-24 16:45:18 +00:00
/* }}} */
2006-03-24 07:38:07 +00:00
2006-04-20 21:56:43 +00:00
zend_collator * zend_collator_create ( UCollator * coll )
{
zend_collator * zcoll = NULL ;
zcoll = emalloc ( sizeof ( zend_collator ) ) ;
zcoll - > coll = coll ;
zcoll - > refcount = 1 ;
return zcoll ;
}
void zend_collator_destroy ( zend_collator * zcoll )
{
zcoll - > refcount - - ;
if ( zcoll - > refcount = = 0 ) {
ucol_close ( zcoll - > coll ) ;
efree ( zcoll ) ;
}
}
2006-03-24 10:25:50 +00:00
/*
* Local variables :
* tab - width : 4
* c - basic - offset : 4
* indent - tabs - mode : t
* End :
2006-03-24 16:45:18 +00:00
* vim : noet sw = 4 ts = 4 fdm = marker
2006-03-24 10:25:50 +00:00
*/