php-src/ext/intl/collator/collator_sort.c
Felipe Pena eb1837d9de MFH:
- Added arginfo
- Fixed WS
- Changed C++ comments to C comments
2008-08-11 19:48:00 +00:00

534 lines
15 KiB
C
Executable File

/*
+----------------------------------------------------------------------+
| PHP Version 5 |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Vadim Savchuk <vsavchuk@productengine.com> |
| Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
+----------------------------------------------------------------------+
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php_intl.h"
#include "collator.h"
#include "collator_class.h"
#include "collator_sort.h"
#include "collator_convert.h"
#include "intl_convert.h"
#if !defined(HAVE_PTRDIFF_T) && !defined(_PTRDIFF_T_DEFINED)
typedef long ptrdiff_t;
#endif
/**
* Declare 'index' which will point to sort key in sort key
* buffer.
*/
typedef struct _collator_sort_key_index {
char* key; /* pointer to sort key */
zval** zstr; /* pointer to original string(hash-item) */
} collator_sort_key_index_t;
ZEND_EXTERN_MODULE_GLOBALS( intl )
static const size_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
static const size_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
static const size_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576;
static const size_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576;
static const size_t DEF_UTF16_BUF_SIZE = 1024;
/* {{{ collator_regular_compare_function */
static int collator_regular_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{
Collator_object* co = NULL;
int rc = SUCCESS;
zval* str1 = collator_convert_object_to_string( op1 TSRMLS_CC );
zval* str2 = collator_convert_object_to_string( op2 TSRMLS_CC );
zval* num1 = NULL;
zval* num2 = NULL;
zval* norm1 = NULL;
zval* norm2 = NULL;
/* If both args are strings AND either of args is not numeric string
* then use ICU-compare. Otherwise PHP-compare. */
if( Z_TYPE_P(str1) == IS_STRING && Z_TYPE_P(str2) == IS_STRING &&
( str1 == ( num1 = collator_convert_string_to_number_if_possible( str1 ) ) ||
str2 == ( num2 = collator_convert_string_to_number_if_possible( str2 ) ) ) )
{
/* Fetch collator object. */
co = (Collator_object *) zend_object_store_get_object( INTL_G(current_collator) TSRMLS_CC );
/* Compare the strings using ICU. */
result->value.lval = ucol_strcoll(
co->ucoll,
INTL_Z_STRVAL_P(str1), INTL_Z_STRLEN_P(str1),
INTL_Z_STRVAL_P(str2), INTL_Z_STRLEN_P(str2) );
result->type = IS_LONG;
}
else
{
/* num1 is set if str1 and str2 are strings. */
if( num1 )
{
if( num1 == str1 )
{
/* str1 is string but not numeric string
* just convert it to utf8.
*/
norm1 = collator_convert_zstr_utf16_to_utf8( str1 );
/* num2 is not set but str2 is string => do normalization. */
norm2 = collator_normalize_sort_argument( str2 );
}
else
{
/* str1 is numeric strings => passthru to PHP-compare. */
zval_add_ref( &num1 );
norm1 = num1;
/* str2 is numeric strings => passthru to PHP-compare. */
zval_add_ref( &num2 );
norm2 = num2;
}
}
else
{
/* num1 is not set if str1 or str2 is not a string => do normalization. */
norm1 = collator_normalize_sort_argument( str1 );
/* if num1 is not set then num2 is not set as well => do normalization. */
norm2 = collator_normalize_sort_argument( str2 );
}
rc = compare_function( result, norm1, norm2 TSRMLS_CC );
zval_ptr_dtor( &norm1 );
zval_ptr_dtor( &norm2 );
}
if( num1 )
zval_ptr_dtor( &num1 );
if( num2 )
zval_ptr_dtor( &num2 );
zval_ptr_dtor( &str1 );
zval_ptr_dtor( &str2 );
return rc;
}
/* }}} */
/* {{{ collator_numeric_compare_function
* Convert input args to double and compare it.
*/
static int collator_numeric_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{
int rc = SUCCESS;
zval* num1 = NULL;
zval* num2 = NULL;
if( Z_TYPE_P(op1) == IS_STRING )
{
num1 = collator_convert_string_to_double( op1 );
op1 = num1;
}
if( Z_TYPE_P(op2) == IS_STRING )
{
num2 = collator_convert_string_to_double( op2 );
op2 = num2;
}
rc = numeric_compare_function( result, op1, op2 TSRMLS_CC);
if( num1 )
zval_ptr_dtor( &num1 );
if( num2 )
zval_ptr_dtor( &num2 );
return rc;
}
/* }}} */
/* {{{ collator_icu_compare_function
* Direct use of ucol_strcoll.
*/
static int collator_icu_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{
int rc = SUCCESS;
Collator_object* co = NULL;
zval* str1 = NULL;
zval* str2 = NULL;
str1 = collator_make_printable_zval( op1 );
str2 = collator_make_printable_zval( op2 );
/* Fetch collator object. */
co = (Collator_object *) zend_object_store_get_object( INTL_G(current_collator) TSRMLS_CC );
/* Compare the strings using ICU. */
result->value.lval = ucol_strcoll(
co->ucoll,
INTL_Z_STRVAL_P(str1), INTL_Z_STRLEN_P(str1),
INTL_Z_STRVAL_P(str2), INTL_Z_STRLEN_P(str2) );
result->type = IS_LONG;
zval_ptr_dtor( &str1 );
zval_ptr_dtor( &str2 );
return rc;
}
/* }}} */
/* {{{ collator_compare_func
* Taken from PHP5 source (array_data_compare).
*/
static int collator_compare_func( const void* a, const void* b TSRMLS_DC )
{
Bucket *f;
Bucket *s;
zval result;
zval *first;
zval *second;
f = *((Bucket **) a);
s = *((Bucket **) b);
first = *((zval **) f->pData);
second = *((zval **) s->pData);
if( INTL_G(compare_func)( &result, first, second TSRMLS_CC) == FAILURE )
return 0;
if( Z_TYPE(result) == IS_DOUBLE )
{
if( Z_DVAL(result) < 0 )
return -1;
else if( Z_DVAL(result) > 0 )
return 1;
else
return 0;
}
convert_to_long(&result);
if( Z_LVAL(result) < 0 )
return -1;
else if( Z_LVAL(result) > 0 )
return 1;
return 0;
}
/* }}} */
/* {{{ collator_cmp_sort_keys
* Compare sort keys
*/
static int collator_cmp_sort_keys( const void *p1, const void *p2 TSRMLS_DC )
{
char* key1 = ((collator_sort_key_index_t*)p1)->key;
char* key2 = ((collator_sort_key_index_t*)p2)->key;
return strcmp( key1, key2 );
}
/* }}} */
/* {{{ collator_get_compare_function
* Choose compare function according to sort flags.
*/
static collator_compare_func_t collator_get_compare_function( const long sort_flags )
{
collator_compare_func_t func;
switch( sort_flags )
{
case COLLATOR_SORT_NUMERIC:
func = collator_numeric_compare_function;
break;
case COLLATOR_SORT_STRING:
func = collator_icu_compare_function;
break;
case COLLATOR_SORT_REGULAR:
default:
func = collator_regular_compare_function;
break;
}
return func;
}
/* }}} */
/* {{{ collator_sort_internal
* Common code shared by collator_sort() and collator_asort() API functions.
*/
static void collator_sort_internal( int renumber, INTERNAL_FUNCTION_PARAMETERS )
{
zval* array = NULL;
HashTable* hash = NULL;
zval* saved_collator = NULL;
long sort_flags = COLLATOR_SORT_REGULAR;
COLLATOR_METHOD_INIT_VARS
/* Parse parameters. */
if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa|l",
&object, Collator_ce_ptr, &array, &sort_flags ) == FAILURE )
{
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
"collator_sort_internal: unable to parse input params", 0 TSRMLS_CC );
RETURN_FALSE;
}
/* Fetch the object. */
COLLATOR_METHOD_FETCH_OBJECT;
/* Set 'compare function' according to sort flags. */
INTL_G(compare_func) = collator_get_compare_function( sort_flags );
hash = HASH_OF( array );
/* Convert strings in the specified array from UTF-8 to UTF-16. */
collator_convert_hash_from_utf8_to_utf16( hash, COLLATOR_ERROR_CODE_P( co ) );
COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-8 to UTF-16" );
/* Save specified collator in the request-global (?) variable. */
saved_collator = INTL_G( current_collator );
INTL_G( current_collator ) = object;
/* Sort specified array. */
zend_hash_sort( hash, zend_qsort, collator_compare_func, renumber TSRMLS_CC );
/* Restore saved collator. */
INTL_G( current_collator ) = saved_collator;
/* Convert strings in the specified array back to UTF-8. */
collator_convert_hash_from_utf16_to_utf8( hash, COLLATOR_ERROR_CODE_P( co ) );
COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-16 to UTF-8" );
RETURN_TRUE;
}
/* }}} */
/* {{{ proto bool Collator::sort( Collator $coll, array(string) $arr [, int $sort_flags] )
* Sort array using specified collator. }}} */
/* {{{ proto bool collator_sort( Collator $coll, array(string) $arr [, int $sort_flags] )
* Sort array using specified collator.
*/
PHP_FUNCTION( collator_sort )
{
collator_sort_internal( TRUE, INTERNAL_FUNCTION_PARAM_PASSTHRU );
}
/* }}} */
/* {{{ proto bool Collator::sortWithSortKeys( Collator $coll, array(string) $arr )
* Equivalent to standard PHP sort using Collator.
* Uses ICU ucol_getSortKey for performance. }}} */
/* {{{ proto bool collator_sort_with_sort_keys( Collator $coll, array(string) $arr )
* Equivalent to standard PHP sort using Collator.
* Uses ICU ucol_getSortKey for performance.
*/
PHP_FUNCTION( collator_sort_with_sort_keys )
{
zval* array = NULL;
HashTable* hash = NULL;
zval** hashData = NULL; /* currently processed item of input hash */
char* sortKeyBuf = NULL; /* buffer to store sort keys */
uint32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */
ptrdiff_t sortKeyBufOffset = 0; /* pos in buffer to store sort key */
int32_t sortKeyLen = 0; /* the length of currently processing key */
uint32_t bufLeft = 0;
uint32_t bufIncrement = 0;
collator_sort_key_index_t* sortKeyIndxBuf = NULL; /* buffer to store 'indexes' which will be passed to 'qsort' */
uint32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE;
uint32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t );
uint32_t sortKeyCount = 0;
uint32_t j = 0;
UChar* utf16_buf = NULL; /* tmp buffer to hold current processing string in utf-16 */
int utf16_buf_size = DEF_UTF16_BUF_SIZE; /* the length of utf16_buf */
int utf16_len = 0; /* length of converted string */
HashTable* sortedHash = NULL;
COLLATOR_METHOD_INIT_VARS
/* Parse parameters. */
if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa",
&object, Collator_ce_ptr, &array ) == FAILURE )
{
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
"collator_sort_with_sort_keys: unable to parse input params", 0 TSRMLS_CC );
RETURN_FALSE;
}
/* Fetch the object. */
COLLATOR_METHOD_FETCH_OBJECT;
/*
* Sort specified array.
*/
hash = HASH_OF( array );
if( !hash || zend_hash_num_elements( hash ) == 0 )
RETURN_TRUE;
/* Create bufers */
sortKeyBuf = ecalloc( sortKeyBufSize, sizeof( char ) );
sortKeyIndxBuf = ecalloc( sortKeyIndxBufSize, sizeof( uint8_t ) );
utf16_buf = eumalloc( utf16_buf_size );
/* Iterate through input hash and create a sort key for each value. */
zend_hash_internal_pointer_reset( hash );
while( zend_hash_get_current_data( hash, (void**) &hashData ) == SUCCESS )
{
/* Convert current hash item from UTF-8 to UTF-16LE and save the result to utf16_buf. */
utf16_len = utf16_buf_size;
/* Process string values only. */
if( Z_TYPE_PP( hashData ) == IS_STRING )
{
intl_convert_utf8_to_utf16( &utf16_buf, &utf16_len, Z_STRVAL_PP( hashData ), Z_STRLEN_PP( hashData ), COLLATOR_ERROR_CODE_P( co ) );
if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
{
intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC );
intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Sort with sort keys failed", 0 TSRMLS_CC );
if( utf16_buf )
efree( utf16_buf );
efree( sortKeyIndxBuf );
efree( sortKeyBuf );
RETURN_FALSE;
}
}
else
{
/* Set empty string */
utf16_len = 0;
utf16_buf[utf16_len] = 0;
}
if( (utf16_len + 1) > utf16_buf_size )
utf16_buf_size = utf16_len + 1;
/* Get sort key, reallocating the buffer if needed. */
bufLeft = sortKeyBufSize - sortKeyBufOffset;
sortKeyLen = ucol_getSortKey( co->ucoll,
utf16_buf,
utf16_len,
(uint8_t*)sortKeyBuf + sortKeyBufOffset,
bufLeft );
/* check for sortKeyBuf overflow, increasing its size of the buffer if needed */
if( sortKeyLen > bufLeft )
{
bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ? sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
sortKeyBufSize += bufIncrement;
bufLeft += bufIncrement;
sortKeyBuf = erealloc( sortKeyBuf, sortKeyBufSize );
sortKeyLen = ucol_getSortKey( co->ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft );
}
/* check sortKeyIndxBuf overflow, increasing its size of the buffer if needed */
if( ( sortKeyCount + 1 ) * sortKeyIndxSize > sortKeyIndxBufSize )
{
bufIncrement = ( sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT ) ? sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
sortKeyIndxBufSize += bufIncrement;
sortKeyIndxBuf = erealloc( sortKeyIndxBuf, sortKeyIndxBufSize );
}
sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset; /* remeber just offset, cause address */
/* of 'sortKeyBuf' may be changed due to realloc. */
sortKeyIndxBuf[sortKeyCount].zstr = hashData;
sortKeyBufOffset += sortKeyLen;
++sortKeyCount;
zend_hash_move_forward( hash );
}
/* update ptrs to point to valid keys. */
for( j = 0; j < sortKeyCount; j++ )
sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;
/* sort it */
zend_qsort( sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize, collator_cmp_sort_keys TSRMLS_CC );
/* for resulting hash we'll assign new hash keys rather then reordering */
ALLOC_HASHTABLE( sortedHash );
zend_hash_init( sortedHash, 0, NULL, ZVAL_PTR_DTOR, 0 );
for( j = 0; j < sortKeyCount; j++ )
{
zval_add_ref( sortKeyIndxBuf[j].zstr );
zend_hash_next_index_insert( sortedHash, sortKeyIndxBuf[j].zstr, sizeof(zval **), NULL );
}
/* Save sorted hash into return variable. */
zval_dtor( array );
(array)->value.ht = sortedHash;
(array)->type = IS_ARRAY;
if( utf16_buf )
efree( utf16_buf );
efree( sortKeyIndxBuf );
efree( sortKeyBuf );
RETURN_TRUE;
}
/* }}} */
/* {{{ proto bool Collator::asort( Collator $coll, array(string) $arr )
* Sort array using specified collator, maintaining index association. }}} */
/* {{{ proto bool collator_asort( Collator $coll, array(string) $arr )
* Sort array using specified collator, maintaining index association.
*/
PHP_FUNCTION( collator_asort )
{
collator_sort_internal( FALSE, INTERNAL_FUNCTION_PARAM_PASSTHRU );
}
/* }}} */
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: noet sw=4 ts=4 fdm=marker
* vim<600: noet sw=4 ts=4
*/