Add UConverter class (ICU's UConverter API)

RFC at http://wiki.php.net/rfc/uconverter
This commit is contained in:
Sara Golemon 2012-12-05 15:07:36 -08:00 committed by Gustavo Lopes
parent 37c304b5db
commit 1faddd15d9
13 changed files with 1366 additions and 0 deletions

View File

@ -34,6 +34,7 @@ if test "$PHP_INTL" != "no"; then
common/common_error.c \
common/common_enum.cpp \
common/common_date.cpp \
converter/converter.c \
formatter/formatter.c \
formatter/formatter_main.c \
formatter/formatter_class.c \
@ -86,6 +87,7 @@ if test "$PHP_INTL" != "no"; then
idn/idn.c \
$icu_spoof_src, $ext_shared,,$ICU_INCS -Wno-write-strings)
PHP_ADD_BUILD_DIR($ext_builddir/collator)
PHP_ADD_BUILD_DIR($ext_builddir/converter)
PHP_ADD_BUILD_DIR($ext_builddir/common)
PHP_ADD_BUILD_DIR($ext_builddir/formatter)
PHP_ADD_BUILD_DIR($ext_builddir/normalizer)

View File

@ -26,6 +26,9 @@ if (PHP_INTL != "no") {
common_enum.cpp \
common_date.cpp \
", "intl");
ADD_SOURCES(configure_module_dirname + "/converter", "\
converter.c \
", "intl");
ADD_SOURCES(configure_module_dirname + "/formatter", "\
formatter.c \
formatter_attr.c \

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,28 @@
/*
+----------------------------------------------------------------------+
| PHP Version 5 |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Sara Golemon <pollita@php.net> |
+----------------------------------------------------------------------+
*/
#ifndef PHP_INTL_CONVERTER_H
#define PHP_INTL_CONVERTER_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
int php_converter_minit(INIT_FUNC_ARGS);
#endif /* PHP_INTL_CONVERTER_H */

View File

@ -34,6 +34,8 @@
#include "collator/collator_create.h"
#include "collator/collator_error.h"
#include "converter/converter.h"
#include "formatter/formatter.h"
#include "formatter/formatter_class.h"
#include "formatter/formatter_attr.h"
@ -986,6 +988,9 @@ PHP_MINIT_FUNCTION( intl )
/* Global error handling. */
intl_error_init( NULL TSRMLS_CC );
/* 'Converter' class for codepage conversions */
php_converter_minit(INIT_FUNC_ARGS_PASSTHRU);
return SUCCESS;
}
/* }}} */

View File

@ -0,0 +1,21 @@
--TEST--
UConverter Enumerations
--SKIPIF--
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
--FILE--
<?php
$avail = UConverter::getAvailable();
var_dump(count($avail) > 100);
var_dump(in_array('UTF-7', $avail));
var_dump(in_array('CESU-8', $avail));
var_dump(in_array('ISO-8859-1', $avail));
$latin1 = UConverter::getAliases('latin1');
var_dump(in_array('ISO-8859-1', $latin1));
--EXPECT--
bool(true)
bool(true)
bool(true)
bool(true)
bool(true)

View File

@ -0,0 +1,17 @@
--TEST--
Basic UConverter::transcode() usage
--SKIPIF--
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
--FILE--
<?php
var_dump(UConverter::transcode("This is an ascii string", 'utf-8', 'latin1'));
// urlencode so that non-ascii shows up parsable in phpt file
var_dump(urlencode(UConverter::transcode("Espa\xF1ol", 'utf-8', 'latin1')));
var_dump(urlencode(UConverter::transcode("Stra\xDFa", 'utf-8', 'latin1')));
var_dump(bin2hex(UConverter::transcode("\xE4", 'utf-8', 'koi8-r')));
--EXPECT--
string(23) "This is an ascii string"
string(12) "Espa%C3%B1ol"
string(11) "Stra%C3%9Fa"
string(4) "d094"

View File

@ -0,0 +1,31 @@
--TEST--
Basic UConverter::convert() w/ Subsitution
--SKIPIF--
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
--INI--
intl.use_exceptions=false
--FILE--
<?php
foreach(array('?','','??') as $subst) {
$opts = array('to_subst' => $subst);
$ret = UConverter::transcode("This is an ascii string", 'ascii', 'utf-8', $opts);
if ($ret === NULL) {
echo "Error: ", intl_get_error_message(), "\n";
} else {
var_dump($ret);
}
$ret = UConverter::transcode("Snowman: (\xE2\x98\x83)", 'ascii', 'utf-8', $opts);
if ($ret === NULL) {
echo "Error: ", intl_get_error_message(), "\n";
} else {
var_dump($ret);
}
}
--EXPECTF--
string(23) "This is an ascii string"
string(12) "Snowman: (?)"
Error: transcode() returned error 1: U_ILLEGAL_ARGUMENT_ERROR: U_ILLEGAL_ARGUMENT_ERROR
Error: transcode() returned error 1: U_ILLEGAL_ARGUMENT_ERROR: U_ILLEGAL_ARGUMENT_ERROR
Error: transcode() returned error 1: U_ILLEGAL_ARGUMENT_ERROR: U_ILLEGAL_ARGUMENT_ERROR
Error: transcode() returned error 1: U_ILLEGAL_ARGUMENT_ERROR: U_ILLEGAL_ARGUMENT_ERROR

View File

@ -0,0 +1,18 @@
--TEST--
UConverter Algorithmic converters
--SKIPIF--
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
--FILE--
<?php
$c = new UConverter('utf-8', 'latin1');
var_dump(UConverter::LATIN_1 === $c->getSourceType());
var_dump(UConverter::UTF8 === $c->getDestinationType());
$c = new UConverter('koi8-r', 'utf-32be');
var_dump(UConverter::UTF32_BigEndian === $c->getSourceType());
var_dump(UConverter::SBCS === $c->getDestinationType());
--EXPECT--
bool(true)
bool(true)
bool(true)
bool(true)

View File

@ -0,0 +1,21 @@
--TEST--
Basic UConverter::convert() usage
--SKIPIF--
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
--FILE--
<?php
$c = new UConverter('utf-8', 'latin1');
var_dump($c->convert("This is an ascii string"));
// urlencode so that non-ascii shows up parsable in phpt file
var_dump(urlencode($c->convert("Espa\xF1ol"))); // U+00F1 LATIN SMALL LETTER N WITH TILDE
var_dump(urlencode($c->convert("Stra\xDFa"))); // U+00DF LATIN SMALL LETTER SHARP S
var_dump(urlencode($c->convert("Stra\xC3\x9Fa", true))); // Reverse prior op
$k = new UConverter('utf-8', 'koi8-r');
var_dump(bin2hex($k->convert("\xE4"))); // U+0414 CYRILLIC CAPITAL LETTER DE
--EXPECT--
string(23) "This is an ascii string"
string(12) "Espa%C3%B1ol"
string(11) "Stra%C3%9Fa"
string(8) "Stra%DFa"
string(4) "d094"

View File

@ -0,0 +1,52 @@
--TEST--
UConverter::convert() w/ Callback Reasons
--SKIPIF--
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
--FILE--
<?php
class MyConverter extends UConverter {
/**
* Called during conversion from source encoding to internal UChar representation
*/
public function toUCallback($reason, $source, $codeUnits, &$error) {
echo "toUCallback(", UConverter::reasonText($reason), ", ...)\n";
return parent::toUCallback($reason, $source, $codeUnits, $error);
}
/**
* Called during conversion from internal UChar to destination encoding
*/
public function fromUCallback($reason, $source, $codePoint, &$error) {
echo "fromUCallback(", UConverter::reasonText($reason), ", ...)\n";
return parent::fromUCallback($reason, $source, $codePoint, $error);
}
}
$c = new MyConverter('ascii', 'utf-8');
foreach(array("regular", "irregul\xC1\xA1r", "\xC2\xA1unsupported!") as $word) {
$c->convert($word);
}
--EXPECT--
toUCallback(REASON_RESET, ...)
toUCallback(REASON_RESET, ...)
fromUCallback(REASON_RESET, ...)
fromUCallback(REASON_RESET, ...)
toUCallback(REASON_RESET, ...)
toUCallback(REASON_ILLEGAL, ...)
toUCallback(REASON_RESET, ...)
toUCallback(REASON_ILLEGAL, ...)
fromUCallback(REASON_RESET, ...)
fromUCallback(REASON_UNASSIGNED, ...)
fromUCallback(REASON_RESET, ...)
fromUCallback(REASON_UNASSIGNED, ...)
toUCallback(REASON_RESET, ...)
toUCallback(REASON_RESET, ...)
fromUCallback(REASON_RESET, ...)
fromUCallback(REASON_UNASSIGNED, ...)
fromUCallback(REASON_RESET, ...)
fromUCallback(REASON_UNASSIGNED, ...)
toUCallback(REASON_CLOSE, ...)
fromUCallback(REASON_CLOSE, ...)
toUCallback(REASON_CLOSE, ...)
fromUCallback(REASON_CLOSE, ...)

View File

@ -0,0 +1,40 @@
--TEST--
UConverter::convert() w/ Callback Return Values
--SKIPIF--
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
--FILE--
<?php
class MyConverter extends UConverter {
public function toUCallback($reason, $source, $codeUnits, &$error) {
$error = U_ZERO_ERROR;
switch ($codeUnits) {
case "\x80": return NULL;
case "\x81": return 'a';
case "\x82": return ord('b');
case "\x83": return array('c');
}
}
/**
* Called during conversion from internal UChar to destination encoding
*/
public function fromUCallback($reason, $source, $codePoint, &$error) {
$error = U_ZERO_ERROR;
switch ($codePoint) {
case 0x00F1: return "A";
case 0x00F2: return ord("B");
case 0x00F3: return array("C");
case 0x00F4: return NULL;
}
}
}
$c = new MyConverter('ascii', 'utf-8');
// This line will trigger toUCallback
var_dump($c->convert("\x80\x81\x82\x83"));
// This line will trigger fromUCallback
var_dump($c->convert("\xC3\xB1\xC3\xB2\xC3\xB3\xC3\xB4"));
--EXPECT--
string(3) "abc"
string(3) "ABC"

View File

@ -0,0 +1,24 @@
--TEST--
Basic UConverter::convert() w/ Subsitution
--SKIPIF--
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
--INI--
intl.use_exceptions=false
--FILE--
<?php
$c = new UConverter('ascii', 'utf-8');
foreach(array('?','','<unknown>') as $subst) {
if (!$c->setSubstChars($subst)) {
echo "**Disallowed\n";
continue;
}
var_dump($c->convert("This is an ascii string"));
var_dump($c->convert("Snowman: (\xE2\x98\x83)"));
}
--EXPECT--
string(23) "This is an ascii string"
string(12) "Snowman: (?)"
**Disallowed
**Disallowed