mirror of
https://github.com/php/php-src.git
synced 2024-10-21 00:12:34 +00:00
533 lines
15 KiB
C
Executable File
533 lines
15 KiB
C
Executable File
/*
|
|
+----------------------------------------------------------------------+
|
|
| PHP Version 5 |
|
|
+----------------------------------------------------------------------+
|
|
| This source file is subject to version 3.01 of the PHP license, |
|
|
| that is bundled with this package in the file LICENSE, and is |
|
|
| available through the world-wide-web at the following url: |
|
|
| http://www.php.net/license/3_01.txt |
|
|
| If you did not receive a copy of the PHP license and are unable to |
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
| license@php.net so we can mail you a copy immediately. |
|
|
+----------------------------------------------------------------------+
|
|
| Authors: Vadim Savchuk <vsavchuk@productengine.com> |
|
|
| Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
|
|
+----------------------------------------------------------------------+
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "php_intl.h"
|
|
#include "collator.h"
|
|
#include "collator_class.h"
|
|
#include "collator_sort.h"
|
|
#include "collator_convert.h"
|
|
#include "intl_convert.h"
|
|
|
|
#if !defined(HAVE_PTRDIFF_T) && !defined(_PTRDIFF_T_DEFINED)
|
|
typedef long ptrdiff_t;
|
|
#endif
|
|
|
|
/**
|
|
* Declare 'index' which will point to sort key in sort key
|
|
* buffer.
|
|
*/
|
|
typedef struct _collator_sort_key_index {
|
|
char* key; // pointer to sort key
|
|
zval** zstr; // pointer to original string(hash-item)
|
|
} collator_sort_key_index_t;
|
|
|
|
ZEND_EXTERN_MODULE_GLOBALS( intl )
|
|
|
|
static const size_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
|
|
static const size_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
|
|
|
|
static const size_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576;
|
|
static const size_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576;
|
|
|
|
static const size_t DEF_UTF16_BUF_SIZE = 1024;
|
|
|
|
/* {{{ collator_regular_compare_function */
|
|
static int collator_regular_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
|
|
{
|
|
Collator_object* co = NULL;
|
|
|
|
int rc = SUCCESS;
|
|
|
|
zval* str1 = collator_convert_object_to_string( op1 TSRMLS_CC );
|
|
zval* str2 = collator_convert_object_to_string( op2 TSRMLS_CC );
|
|
|
|
zval* num1 = NULL;
|
|
zval* num2 = NULL;
|
|
zval* norm1 = NULL;
|
|
zval* norm2 = NULL;
|
|
|
|
// If both args are strings AND either of args is not numeric string
|
|
// then use ICU-compare. Otherwise PHP-compare.
|
|
if( Z_TYPE_P(str1) == IS_STRING && Z_TYPE_P(str2) == IS_STRING &&
|
|
( str1 == ( num1 = collator_convert_string_to_number_if_possible( str1 ) ) ||
|
|
str2 == ( num2 = collator_convert_string_to_number_if_possible( str2 ) ) ) )
|
|
{
|
|
// Fetch collator object.
|
|
co = (Collator_object *) zend_object_store_get_object( INTL_G(current_collator) TSRMLS_CC );
|
|
|
|
// Compare the strings using ICU.
|
|
result->value.lval = ucol_strcoll(
|
|
co->ucoll,
|
|
INTL_Z_STRVAL_P(str1), INTL_Z_STRLEN_P(str1),
|
|
INTL_Z_STRVAL_P(str2), INTL_Z_STRLEN_P(str2) );
|
|
result->type = IS_LONG;
|
|
}
|
|
else
|
|
{
|
|
// num1 is set if str1 and str2 are strings.
|
|
if( num1 )
|
|
{
|
|
if( num1 == str1 )
|
|
{
|
|
// str1 is string but not numeric string
|
|
// just convert it to utf8.
|
|
norm1 = collator_convert_zstr_utf16_to_utf8( str1 );
|
|
|
|
// num2 is not set but str2 is string => do normalization.
|
|
norm2 = collator_normalize_sort_argument( str2 );
|
|
}
|
|
else
|
|
{
|
|
// str1 is numeric strings => passthru to PHP-compare.
|
|
zval_add_ref( &num1 );
|
|
norm1 = num1;
|
|
|
|
// str2 is numeric strings => passthru to PHP-compare.
|
|
zval_add_ref( &num2 );
|
|
norm2 = num2;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// num1 is not set if str1 or str2 is not a string => do normalization.
|
|
norm1 = collator_normalize_sort_argument( str1 );
|
|
|
|
// if num1 is not set then num2 is not set as well => do normalization.
|
|
norm2 = collator_normalize_sort_argument( str2 );
|
|
}
|
|
|
|
rc = compare_function( result, norm1, norm2 TSRMLS_CC );
|
|
|
|
zval_ptr_dtor( &norm1 );
|
|
zval_ptr_dtor( &norm2 );
|
|
}
|
|
|
|
if( num1 )
|
|
zval_ptr_dtor( &num1 );
|
|
|
|
if( num2 )
|
|
zval_ptr_dtor( &num2 );
|
|
|
|
zval_ptr_dtor( &str1 );
|
|
zval_ptr_dtor( &str2 );
|
|
|
|
return rc;
|
|
}
|
|
/* }}} */
|
|
|
|
/* {{{ collator_numeric_compare_function
|
|
* Convert input args to double and compare it.
|
|
*/
|
|
static int collator_numeric_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
|
|
{
|
|
int rc = SUCCESS;
|
|
zval* num1 = NULL;
|
|
zval* num2 = NULL;
|
|
|
|
if( Z_TYPE_P(op1) == IS_STRING )
|
|
{
|
|
num1 = collator_convert_string_to_double( op1 );
|
|
op1 = num1;
|
|
}
|
|
|
|
if( Z_TYPE_P(op2) == IS_STRING )
|
|
{
|
|
num2 = collator_convert_string_to_double( op2 );
|
|
op2 = num2;
|
|
}
|
|
|
|
rc = numeric_compare_function( result, op1, op2 TSRMLS_CC);
|
|
|
|
if( num1 )
|
|
zval_ptr_dtor( &num1 );
|
|
if( num2 )
|
|
zval_ptr_dtor( &num2 );
|
|
|
|
return rc;
|
|
}
|
|
/* }}} */
|
|
|
|
/* {{{ collator_icu_compare_function
|
|
* Direct use of ucol_strcoll.
|
|
*/
|
|
static int collator_icu_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
|
|
{
|
|
int rc = SUCCESS;
|
|
Collator_object* co = NULL;
|
|
zval* str1 = NULL;
|
|
zval* str2 = NULL;
|
|
|
|
str1 = collator_make_printable_zval( op1 );
|
|
str2 = collator_make_printable_zval( op2 );
|
|
|
|
// Fetch collator object.
|
|
co = (Collator_object *) zend_object_store_get_object( INTL_G(current_collator) TSRMLS_CC );
|
|
|
|
// Compare the strings using ICU.
|
|
result->value.lval = ucol_strcoll(
|
|
co->ucoll,
|
|
INTL_Z_STRVAL_P(str1), INTL_Z_STRLEN_P(str1),
|
|
INTL_Z_STRVAL_P(str2), INTL_Z_STRLEN_P(str2) );
|
|
result->type = IS_LONG;
|
|
|
|
zval_ptr_dtor( &str1 );
|
|
zval_ptr_dtor( &str2 );
|
|
|
|
return rc;
|
|
}
|
|
/* }}} */
|
|
|
|
/* {{{ collator_compare_func
|
|
* Taken from PHP5 source (array_data_compare).
|
|
*/
|
|
static int collator_compare_func( const void* a, const void* b TSRMLS_DC )
|
|
{
|
|
Bucket *f;
|
|
Bucket *s;
|
|
zval result;
|
|
zval *first;
|
|
zval *second;
|
|
|
|
f = *((Bucket **) a);
|
|
s = *((Bucket **) b);
|
|
|
|
first = *((zval **) f->pData);
|
|
second = *((zval **) s->pData);
|
|
|
|
if( INTL_G(compare_func)( &result, first, second TSRMLS_CC) == FAILURE )
|
|
return 0;
|
|
|
|
if( Z_TYPE(result) == IS_DOUBLE )
|
|
{
|
|
if( Z_DVAL(result) < 0 )
|
|
return -1;
|
|
else if( Z_DVAL(result) > 0 )
|
|
return 1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
convert_to_long(&result);
|
|
|
|
if( Z_LVAL(result) < 0 )
|
|
return -1;
|
|
else if( Z_LVAL(result) > 0 )
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
/* }}} */
|
|
|
|
/* {{{ collator_cmp_sort_keys
|
|
* Compare sort keys
|
|
*/
|
|
static int collator_cmp_sort_keys( const void *p1, const void *p2 TSRMLS_DC )
|
|
{
|
|
char* key1 = ((collator_sort_key_index_t*)p1)->key;
|
|
char* key2 = ((collator_sort_key_index_t*)p2)->key;
|
|
|
|
return strcmp( key1, key2 );
|
|
}
|
|
/* }}} */
|
|
|
|
/* {{{ collator_get_compare_function
|
|
* Choose compare function according to sort flags.
|
|
*/
|
|
static collator_compare_func_t collator_get_compare_function( const long sort_flags )
|
|
{
|
|
collator_compare_func_t func;
|
|
|
|
switch( sort_flags )
|
|
{
|
|
case COLLATOR_SORT_NUMERIC:
|
|
func = collator_numeric_compare_function;
|
|
break;
|
|
|
|
case COLLATOR_SORT_STRING:
|
|
func = collator_icu_compare_function;
|
|
break;
|
|
|
|
case COLLATOR_SORT_REGULAR:
|
|
default:
|
|
func = collator_regular_compare_function;
|
|
break;
|
|
}
|
|
|
|
return func;
|
|
}
|
|
/* }}} */
|
|
|
|
/* {{{ collator_sort_internal
|
|
* Common code shared by collator_sort() and collator_asort() API functions.
|
|
*/
|
|
static void collator_sort_internal( int renumber, INTERNAL_FUNCTION_PARAMETERS )
|
|
{
|
|
zval* array = NULL;
|
|
HashTable* hash = NULL;
|
|
zval* saved_collator = NULL;
|
|
long sort_flags = COLLATOR_SORT_REGULAR;
|
|
|
|
COLLATOR_METHOD_INIT_VARS
|
|
|
|
// Parse parameters.
|
|
if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa|l",
|
|
&object, Collator_ce_ptr, &array, &sort_flags ) == FAILURE )
|
|
{
|
|
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
|
|
"collator_sort_internal: unable to parse input params", 0 TSRMLS_CC );
|
|
|
|
RETURN_FALSE;
|
|
}
|
|
|
|
// Fetch the object.
|
|
COLLATOR_METHOD_FETCH_OBJECT;
|
|
|
|
// Set 'compare function' according to sort flags.
|
|
INTL_G(compare_func) = collator_get_compare_function( sort_flags );
|
|
|
|
hash = HASH_OF( array );
|
|
|
|
// Convert strings in the specified array from UTF-8 to UTF-16.
|
|
collator_convert_hash_from_utf8_to_utf16( hash, COLLATOR_ERROR_CODE_P( co ) );
|
|
COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-8 to UTF-16" );
|
|
|
|
// Save specified collator in the request-global (?) variable.
|
|
saved_collator = INTL_G( current_collator );
|
|
INTL_G( current_collator ) = object;
|
|
|
|
// Sort specified array.
|
|
zend_hash_sort( hash, zend_qsort, collator_compare_func, renumber TSRMLS_CC );
|
|
|
|
// Restore saved collator.
|
|
INTL_G( current_collator ) = saved_collator;
|
|
|
|
// Convert strings in the specified array back to UTF-8.
|
|
collator_convert_hash_from_utf16_to_utf8( hash, COLLATOR_ERROR_CODE_P( co ) );
|
|
COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-16 to UTF-8" );
|
|
|
|
RETURN_TRUE;
|
|
}
|
|
/* }}} */
|
|
|
|
/* {{{ proto bool Collator::sort( Collator $coll, array(string) $arr [, int $sort_flags] )
|
|
* Sort array using specified collator. }}} */
|
|
/* {{{ proto bool collator_sort( Collator $coll, array(string) $arr [, int $sort_flags] )
|
|
* Sort array using specified collator.
|
|
*/
|
|
PHP_FUNCTION( collator_sort )
|
|
{
|
|
collator_sort_internal( TRUE, INTERNAL_FUNCTION_PARAM_PASSTHRU );
|
|
}
|
|
/* }}} */
|
|
|
|
/* {{{ proto bool Collator::sortWithSortKeys( Collator $coll, array(string) $arr )
|
|
* Equivalent to standard PHP sort using Collator.
|
|
* Uses ICU ucol_getSortKey for performance. }}} */
|
|
/* {{{ proto bool collator_sort_with_sort_keys( Collator $coll, array(string) $arr )
|
|
* Equivalent to standard PHP sort using Collator.
|
|
* Uses ICU ucol_getSortKey for performance.
|
|
*/
|
|
PHP_FUNCTION( collator_sort_with_sort_keys )
|
|
{
|
|
zval* array = NULL;
|
|
HashTable* hash = NULL;
|
|
zval** hashData = NULL; // currently processed item of input hash
|
|
|
|
char* sortKeyBuf = NULL; // buffer to store sort keys
|
|
uint32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; // buffer size
|
|
ptrdiff_t sortKeyBufOffset = 0; // pos in buffer to store sort key
|
|
int32_t sortKeyLen = 0; // the length of currently processing key
|
|
uint32_t bufLeft = 0;
|
|
uint32_t bufIncrement = 0;
|
|
|
|
collator_sort_key_index_t* sortKeyIndxBuf = NULL; // buffer to store 'indexes' which will be passed to 'qsort'
|
|
uint32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE;
|
|
uint32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t );
|
|
|
|
uint32_t sortKeyCount = 0;
|
|
uint32_t j = 0;
|
|
|
|
UChar* utf16_buf = NULL; // tmp buffer to hold current processing string in utf-16
|
|
int utf16_buf_size = DEF_UTF16_BUF_SIZE; // the length of utf16_buf
|
|
int utf16_len = 0; // length of converted string
|
|
|
|
HashTable* sortedHash = NULL;
|
|
|
|
COLLATOR_METHOD_INIT_VARS
|
|
|
|
// Parse parameters.
|
|
if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa",
|
|
&object, Collator_ce_ptr, &array ) == FAILURE )
|
|
{
|
|
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
|
|
"collator_sort_with_sort_keys: unable to parse input params", 0 TSRMLS_CC );
|
|
|
|
RETURN_FALSE;
|
|
}
|
|
|
|
// Fetch the object.
|
|
COLLATOR_METHOD_FETCH_OBJECT;
|
|
|
|
|
|
/*
|
|
* Sort specified array.
|
|
*/
|
|
hash = HASH_OF( array );
|
|
|
|
if( !hash || zend_hash_num_elements( hash ) == 0 )
|
|
RETURN_TRUE;
|
|
|
|
// Create bufers
|
|
sortKeyBuf = ecalloc( sortKeyBufSize, sizeof( char ) );
|
|
sortKeyIndxBuf = ecalloc( sortKeyIndxBufSize, sizeof( uint8_t ) );
|
|
utf16_buf = eumalloc( utf16_buf_size );
|
|
|
|
// Iterate through input hash and create a sort key for each value.
|
|
zend_hash_internal_pointer_reset( hash );
|
|
while( zend_hash_get_current_data( hash, (void**) &hashData ) == SUCCESS )
|
|
{
|
|
// Convert current hash item from UTF-8 to UTF-16LE and save the result to utf16_buf.
|
|
|
|
utf16_len = utf16_buf_size;
|
|
|
|
// Process string values only.
|
|
if( Z_TYPE_PP( hashData ) == IS_STRING )
|
|
{
|
|
intl_convert_utf8_to_utf16( &utf16_buf, &utf16_len, Z_STRVAL_PP( hashData ), Z_STRLEN_PP( hashData ), COLLATOR_ERROR_CODE_P( co ) );
|
|
|
|
if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
|
|
{
|
|
intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC );
|
|
intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Sort with sort keys failed", 0 TSRMLS_CC );
|
|
|
|
if( utf16_buf )
|
|
efree( utf16_buf );
|
|
|
|
efree( sortKeyIndxBuf );
|
|
efree( sortKeyBuf );
|
|
|
|
RETURN_FALSE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Set empty string
|
|
utf16_len = 0;
|
|
utf16_buf[utf16_len] = 0;
|
|
}
|
|
|
|
if( (utf16_len + 1) > utf16_buf_size )
|
|
utf16_buf_size = utf16_len + 1;
|
|
|
|
// Get sort key, reallocating the buffer if needed.
|
|
bufLeft = sortKeyBufSize - sortKeyBufOffset;
|
|
|
|
sortKeyLen = ucol_getSortKey( co->ucoll,
|
|
utf16_buf,
|
|
utf16_len,
|
|
(uint8_t*)sortKeyBuf + sortKeyBufOffset,
|
|
bufLeft );
|
|
|
|
// check for sortKeyBuf overflow, increasing its size of the buffer if needed
|
|
if( sortKeyLen > bufLeft )
|
|
{
|
|
bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ? sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
|
|
|
|
sortKeyBufSize += bufIncrement;
|
|
bufLeft += bufIncrement;
|
|
|
|
sortKeyBuf = erealloc( sortKeyBuf, sortKeyBufSize );
|
|
|
|
sortKeyLen = ucol_getSortKey( co->ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft );
|
|
}
|
|
|
|
// check sortKeyIndxBuf overflow, increasing its size of the buffer if needed
|
|
if( ( sortKeyCount + 1 ) * sortKeyIndxSize > sortKeyIndxBufSize )
|
|
{
|
|
bufIncrement = ( sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT ) ? sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
|
|
|
|
sortKeyIndxBufSize += bufIncrement;
|
|
|
|
sortKeyIndxBuf = erealloc( sortKeyIndxBuf, sortKeyIndxBufSize );
|
|
}
|
|
|
|
sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset; // remeber just offset, cause address
|
|
// of 'sortKeyBuf' may be changed due to realloc.
|
|
sortKeyIndxBuf[sortKeyCount].zstr = hashData;
|
|
|
|
sortKeyBufOffset += sortKeyLen;
|
|
++sortKeyCount;
|
|
|
|
zend_hash_move_forward( hash );
|
|
}
|
|
|
|
// update ptrs to point to valid keys.
|
|
for( j = 0; j < sortKeyCount; j++ )
|
|
sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;
|
|
|
|
// sort it
|
|
zend_qsort( sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize, collator_cmp_sort_keys TSRMLS_CC );
|
|
|
|
// for resulting hash we'll assign new hash keys rather then reordering
|
|
ALLOC_HASHTABLE( sortedHash );
|
|
zend_hash_init( sortedHash, 0, NULL, ZVAL_PTR_DTOR, 0 );
|
|
|
|
for( j = 0; j < sortKeyCount; j++ )
|
|
{
|
|
zval_add_ref( sortKeyIndxBuf[j].zstr );
|
|
zend_hash_next_index_insert( sortedHash, sortKeyIndxBuf[j].zstr, sizeof(zval **), NULL );
|
|
}
|
|
|
|
// Save sorted hash into return variable.
|
|
zval_dtor( array );
|
|
(array)->value.ht = sortedHash;
|
|
(array)->type = IS_ARRAY;
|
|
|
|
if( utf16_buf )
|
|
efree( utf16_buf );
|
|
|
|
efree( sortKeyIndxBuf );
|
|
efree( sortKeyBuf );
|
|
|
|
RETURN_TRUE;
|
|
}
|
|
/* }}} */
|
|
|
|
/* {{{ proto bool Collator::asort( Collator $coll, array(string) $arr )
|
|
* Sort array using specified collator, maintaining index association. }}} */
|
|
/* {{{ proto bool collator_asort( Collator $coll, array(string) $arr )
|
|
* Sort array using specified collator, maintaining index association.
|
|
*/
|
|
PHP_FUNCTION( collator_asort )
|
|
{
|
|
collator_sort_internal( FALSE, INTERNAL_FUNCTION_PARAM_PASSTHRU );
|
|
}
|
|
/* }}} */
|
|
|
|
/*
|
|
* Local variables:
|
|
* tab-width: 4
|
|
* c-basic-offset: 4
|
|
* End:
|
|
* vim600: noet sw=4 ts=4 fdm=marker
|
|
* vim<600: noet sw=4 ts=4
|
|
*/
|