mirror of
https://github.com/php/php-src.git
synced 2024-09-22 02:17:32 +00:00
Add UConverter class (ICU's UConverter API)
RFC at http://wiki.php.net/rfc/uconverter
This commit is contained in:
parent
37c304b5db
commit
1faddd15d9
@ -34,6 +34,7 @@ if test "$PHP_INTL" != "no"; then
|
||||
common/common_error.c \
|
||||
common/common_enum.cpp \
|
||||
common/common_date.cpp \
|
||||
converter/converter.c \
|
||||
formatter/formatter.c \
|
||||
formatter/formatter_main.c \
|
||||
formatter/formatter_class.c \
|
||||
@ -86,6 +87,7 @@ if test "$PHP_INTL" != "no"; then
|
||||
idn/idn.c \
|
||||
$icu_spoof_src, $ext_shared,,$ICU_INCS -Wno-write-strings)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/collator)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/converter)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/common)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/formatter)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/normalizer)
|
||||
|
@ -26,6 +26,9 @@ if (PHP_INTL != "no") {
|
||||
common_enum.cpp \
|
||||
common_date.cpp \
|
||||
", "intl");
|
||||
ADD_SOURCES(configure_module_dirname + "/converter", "\
|
||||
converter.c \
|
||||
", "intl");
|
||||
ADD_SOURCES(configure_module_dirname + "/formatter", "\
|
||||
formatter.c \
|
||||
formatter_attr.c \
|
||||
|
1104
ext/intl/converter/converter.c
Normal file
1104
ext/intl/converter/converter.c
Normal file
File diff suppressed because it is too large
Load Diff
28
ext/intl/converter/converter.h
Normal file
28
ext/intl/converter/converter.h
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| PHP Version 5 |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.01 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| http://www.php.net/license/3_01.txt |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Authors: Sara Golemon <pollita@php.net> |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
#ifndef PHP_INTL_CONVERTER_H
|
||||
#define PHP_INTL_CONVERTER_H
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "php.h"
|
||||
|
||||
int php_converter_minit(INIT_FUNC_ARGS);
|
||||
|
||||
#endif /* PHP_INTL_CONVERTER_H */
|
@ -34,6 +34,8 @@
|
||||
#include "collator/collator_create.h"
|
||||
#include "collator/collator_error.h"
|
||||
|
||||
#include "converter/converter.h"
|
||||
|
||||
#include "formatter/formatter.h"
|
||||
#include "formatter/formatter_class.h"
|
||||
#include "formatter/formatter_attr.h"
|
||||
@ -986,6 +988,9 @@ PHP_MINIT_FUNCTION( intl )
|
||||
/* Global error handling. */
|
||||
intl_error_init( NULL TSRMLS_CC );
|
||||
|
||||
/* 'Converter' class for codepage conversions */
|
||||
php_converter_minit(INIT_FUNC_ARGS_PASSTHRU);
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
/* }}} */
|
||||
|
21
ext/intl/tests/uconverter_enum.phpt
Normal file
21
ext/intl/tests/uconverter_enum.phpt
Normal file
@ -0,0 +1,21 @@
|
||||
--TEST--
|
||||
UConverter Enumerations
|
||||
--SKIPIF--
|
||||
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
|
||||
--FILE--
|
||||
<?php
|
||||
$avail = UConverter::getAvailable();
|
||||
var_dump(count($avail) > 100);
|
||||
var_dump(in_array('UTF-7', $avail));
|
||||
var_dump(in_array('CESU-8', $avail));
|
||||
var_dump(in_array('ISO-8859-1', $avail));
|
||||
|
||||
$latin1 = UConverter::getAliases('latin1');
|
||||
var_dump(in_array('ISO-8859-1', $latin1));
|
||||
|
||||
--EXPECT--
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
17
ext/intl/tests/uconverter_func_basic.phpt
Normal file
17
ext/intl/tests/uconverter_func_basic.phpt
Normal file
@ -0,0 +1,17 @@
|
||||
--TEST--
|
||||
Basic UConverter::transcode() usage
|
||||
--SKIPIF--
|
||||
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
|
||||
--FILE--
|
||||
<?php
|
||||
var_dump(UConverter::transcode("This is an ascii string", 'utf-8', 'latin1'));
|
||||
// urlencode so that non-ascii shows up parsable in phpt file
|
||||
var_dump(urlencode(UConverter::transcode("Espa\xF1ol", 'utf-8', 'latin1')));
|
||||
var_dump(urlencode(UConverter::transcode("Stra\xDFa", 'utf-8', 'latin1')));
|
||||
|
||||
var_dump(bin2hex(UConverter::transcode("\xE4", 'utf-8', 'koi8-r')));
|
||||
--EXPECT--
|
||||
string(23) "This is an ascii string"
|
||||
string(12) "Espa%C3%B1ol"
|
||||
string(11) "Stra%C3%9Fa"
|
||||
string(4) "d094"
|
31
ext/intl/tests/uconverter_func_subst.phpt
Normal file
31
ext/intl/tests/uconverter_func_subst.phpt
Normal file
@ -0,0 +1,31 @@
|
||||
--TEST--
|
||||
Basic UConverter::convert() w/ Subsitution
|
||||
--SKIPIF--
|
||||
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
|
||||
--INI--
|
||||
intl.use_exceptions=false
|
||||
--FILE--
|
||||
<?php
|
||||
foreach(array('?','','??') as $subst) {
|
||||
$opts = array('to_subst' => $subst);
|
||||
$ret = UConverter::transcode("This is an ascii string", 'ascii', 'utf-8', $opts);
|
||||
if ($ret === NULL) {
|
||||
echo "Error: ", intl_get_error_message(), "\n";
|
||||
} else {
|
||||
var_dump($ret);
|
||||
}
|
||||
$ret = UConverter::transcode("Snowman: (\xE2\x98\x83)", 'ascii', 'utf-8', $opts);
|
||||
if ($ret === NULL) {
|
||||
echo "Error: ", intl_get_error_message(), "\n";
|
||||
} else {
|
||||
var_dump($ret);
|
||||
}
|
||||
}
|
||||
|
||||
--EXPECTF--
|
||||
string(23) "This is an ascii string"
|
||||
string(12) "Snowman: (?)"
|
||||
Error: transcode() returned error 1: U_ILLEGAL_ARGUMENT_ERROR: U_ILLEGAL_ARGUMENT_ERROR
|
||||
Error: transcode() returned error 1: U_ILLEGAL_ARGUMENT_ERROR: U_ILLEGAL_ARGUMENT_ERROR
|
||||
Error: transcode() returned error 1: U_ILLEGAL_ARGUMENT_ERROR: U_ILLEGAL_ARGUMENT_ERROR
|
||||
Error: transcode() returned error 1: U_ILLEGAL_ARGUMENT_ERROR: U_ILLEGAL_ARGUMENT_ERROR
|
18
ext/intl/tests/uconverter_oop_algo.phpt
Normal file
18
ext/intl/tests/uconverter_oop_algo.phpt
Normal file
@ -0,0 +1,18 @@
|
||||
--TEST--
|
||||
UConverter Algorithmic converters
|
||||
--SKIPIF--
|
||||
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
|
||||
--FILE--
|
||||
<?php
|
||||
$c = new UConverter('utf-8', 'latin1');
|
||||
var_dump(UConverter::LATIN_1 === $c->getSourceType());
|
||||
var_dump(UConverter::UTF8 === $c->getDestinationType());
|
||||
|
||||
$c = new UConverter('koi8-r', 'utf-32be');
|
||||
var_dump(UConverter::UTF32_BigEndian === $c->getSourceType());
|
||||
var_dump(UConverter::SBCS === $c->getDestinationType());
|
||||
--EXPECT--
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
21
ext/intl/tests/uconverter_oop_basic.phpt
Normal file
21
ext/intl/tests/uconverter_oop_basic.phpt
Normal file
@ -0,0 +1,21 @@
|
||||
--TEST--
|
||||
Basic UConverter::convert() usage
|
||||
--SKIPIF--
|
||||
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
|
||||
--FILE--
|
||||
<?php
|
||||
$c = new UConverter('utf-8', 'latin1');
|
||||
var_dump($c->convert("This is an ascii string"));
|
||||
// urlencode so that non-ascii shows up parsable in phpt file
|
||||
var_dump(urlencode($c->convert("Espa\xF1ol"))); // U+00F1 LATIN SMALL LETTER N WITH TILDE
|
||||
var_dump(urlencode($c->convert("Stra\xDFa"))); // U+00DF LATIN SMALL LETTER SHARP S
|
||||
var_dump(urlencode($c->convert("Stra\xC3\x9Fa", true))); // Reverse prior op
|
||||
|
||||
$k = new UConverter('utf-8', 'koi8-r');
|
||||
var_dump(bin2hex($k->convert("\xE4"))); // U+0414 CYRILLIC CAPITAL LETTER DE
|
||||
--EXPECT--
|
||||
string(23) "This is an ascii string"
|
||||
string(12) "Espa%C3%B1ol"
|
||||
string(11) "Stra%C3%9Fa"
|
||||
string(8) "Stra%DFa"
|
||||
string(4) "d094"
|
52
ext/intl/tests/uconverter_oop_callback.phpt
Normal file
52
ext/intl/tests/uconverter_oop_callback.phpt
Normal file
@ -0,0 +1,52 @@
|
||||
--TEST--
|
||||
UConverter::convert() w/ Callback Reasons
|
||||
--SKIPIF--
|
||||
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
|
||||
--FILE--
|
||||
<?php
|
||||
class MyConverter extends UConverter {
|
||||
/**
|
||||
* Called during conversion from source encoding to internal UChar representation
|
||||
*/
|
||||
public function toUCallback($reason, $source, $codeUnits, &$error) {
|
||||
echo "toUCallback(", UConverter::reasonText($reason), ", ...)\n";
|
||||
return parent::toUCallback($reason, $source, $codeUnits, $error);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called during conversion from internal UChar to destination encoding
|
||||
*/
|
||||
public function fromUCallback($reason, $source, $codePoint, &$error) {
|
||||
echo "fromUCallback(", UConverter::reasonText($reason), ", ...)\n";
|
||||
return parent::fromUCallback($reason, $source, $codePoint, $error);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$c = new MyConverter('ascii', 'utf-8');
|
||||
foreach(array("regular", "irregul\xC1\xA1r", "\xC2\xA1unsupported!") as $word) {
|
||||
$c->convert($word);
|
||||
}
|
||||
--EXPECT--
|
||||
toUCallback(REASON_RESET, ...)
|
||||
toUCallback(REASON_RESET, ...)
|
||||
fromUCallback(REASON_RESET, ...)
|
||||
fromUCallback(REASON_RESET, ...)
|
||||
toUCallback(REASON_RESET, ...)
|
||||
toUCallback(REASON_ILLEGAL, ...)
|
||||
toUCallback(REASON_RESET, ...)
|
||||
toUCallback(REASON_ILLEGAL, ...)
|
||||
fromUCallback(REASON_RESET, ...)
|
||||
fromUCallback(REASON_UNASSIGNED, ...)
|
||||
fromUCallback(REASON_RESET, ...)
|
||||
fromUCallback(REASON_UNASSIGNED, ...)
|
||||
toUCallback(REASON_RESET, ...)
|
||||
toUCallback(REASON_RESET, ...)
|
||||
fromUCallback(REASON_RESET, ...)
|
||||
fromUCallback(REASON_UNASSIGNED, ...)
|
||||
fromUCallback(REASON_RESET, ...)
|
||||
fromUCallback(REASON_UNASSIGNED, ...)
|
||||
toUCallback(REASON_CLOSE, ...)
|
||||
fromUCallback(REASON_CLOSE, ...)
|
||||
toUCallback(REASON_CLOSE, ...)
|
||||
fromUCallback(REASON_CLOSE, ...)
|
40
ext/intl/tests/uconverter_oop_callback_return.phpt
Normal file
40
ext/intl/tests/uconverter_oop_callback_return.phpt
Normal file
@ -0,0 +1,40 @@
|
||||
--TEST--
|
||||
UConverter::convert() w/ Callback Return Values
|
||||
--SKIPIF--
|
||||
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
|
||||
--FILE--
|
||||
<?php
|
||||
class MyConverter extends UConverter {
|
||||
public function toUCallback($reason, $source, $codeUnits, &$error) {
|
||||
$error = U_ZERO_ERROR;
|
||||
switch ($codeUnits) {
|
||||
case "\x80": return NULL;
|
||||
case "\x81": return 'a';
|
||||
case "\x82": return ord('b');
|
||||
case "\x83": return array('c');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called during conversion from internal UChar to destination encoding
|
||||
*/
|
||||
public function fromUCallback($reason, $source, $codePoint, &$error) {
|
||||
$error = U_ZERO_ERROR;
|
||||
switch ($codePoint) {
|
||||
case 0x00F1: return "A";
|
||||
case 0x00F2: return ord("B");
|
||||
case 0x00F3: return array("C");
|
||||
case 0x00F4: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$c = new MyConverter('ascii', 'utf-8');
|
||||
// This line will trigger toUCallback
|
||||
var_dump($c->convert("\x80\x81\x82\x83"));
|
||||
// This line will trigger fromUCallback
|
||||
var_dump($c->convert("\xC3\xB1\xC3\xB2\xC3\xB3\xC3\xB4"));
|
||||
--EXPECT--
|
||||
string(3) "abc"
|
||||
string(3) "ABC"
|
24
ext/intl/tests/uconverter_oop_subst.phpt
Normal file
24
ext/intl/tests/uconverter_oop_subst.phpt
Normal file
@ -0,0 +1,24 @@
|
||||
--TEST--
|
||||
Basic UConverter::convert() w/ Subsitution
|
||||
--SKIPIF--
|
||||
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
|
||||
--INI--
|
||||
intl.use_exceptions=false
|
||||
--FILE--
|
||||
<?php
|
||||
$c = new UConverter('ascii', 'utf-8');
|
||||
|
||||
foreach(array('?','','<unknown>') as $subst) {
|
||||
if (!$c->setSubstChars($subst)) {
|
||||
echo "**Disallowed\n";
|
||||
continue;
|
||||
}
|
||||
var_dump($c->convert("This is an ascii string"));
|
||||
var_dump($c->convert("Snowman: (\xE2\x98\x83)"));
|
||||
}
|
||||
|
||||
--EXPECT--
|
||||
string(23) "This is an ascii string"
|
||||
string(12) "Snowman: (?)"
|
||||
**Disallowed
|
||||
**Disallowed
|
Loading…
Reference in New Issue
Block a user