php-src/ext/mbstring/ucgendat/uctest.php
Peter Kokot 02294f0c84 Make PHP development tools files and scripts executable
This patch makes several scripts and PHP development tools files
executable and adds more proper shebangs to the PHP scripts.

The `#!/usr/bin/env php` shebang provides running the script via
`./script.php` and uses env to find PHP script location on the system.
At the same time it still provides running the script with a user
defined PHP location using `php script.php`.
2018-08-29 20:58:17 +02:00

133 lines
3.8 KiB
PHP
Executable File

#!/usr/bin/env php
<?php error_reporting(E_ALL);
$dir = __DIR__;
$unicodeDataFile = $dir . '/UnicodeData.txt';
$caseFoldingFile = $dir . '/CaseFolding.txt';
$specialCasingFile = $dir . '/SpecialCasing.txt';
$files = [$unicodeDataFile, $caseFoldingFile, $specialCasingFile];
foreach ($files as $file) {
if (!file_exists($file)) {
echo "File $file does not exist.\n";
return;
}
}
testUnicodeData(file_get_contents($unicodeDataFile));
testCaseFolding(file_get_contents($caseFoldingFile));
testSpecialCasing(file_get_contents($specialCasingFile));
function parseDataFile(string $input) {
$lines = explode("\n", $input);
foreach ($lines as $line) {
// Strip comments
if (false !== $hashPos = strpos($line, '#')) {
$line = substr($line, 0, $hashPos);
}
// Skip empty lines
$line = trim($line);
if ($line === '') {
continue;
}
$fields = array_map('trim', explode(';', $line));
yield $fields;
}
}
function parseCodes(string $strCodes) : array {
$codes = [];
foreach (explode(' ', $strCodes) as $strCode) {
$codes[] = intval($strCode, 16);
}
return $codes;
}
function testCaseMap($type, int $origCode, array $newCodes) {
$origChar = mb_chr($origCode);
$newStr = "";
foreach ($newCodes as $newCode) {
$newStr .= mb_chr($newCode);
}
$mbNewStr = mb_convert_case($origChar, $type);
if ($mbNewStr !== $newStr) {
echo "$type: $mbNewStr != $newStr\n";
}
}
function testSimpleCaseMap($type, int $origCode, int $newCode) {
if ($newCode) {
testCaseMap($type, $origCode, [$newCode]);
} else {
testCaseMap($type, $origCode, [$origCode]);
}
}
function testUnicodeData(string $input) {
$uppers = [];
$folds = [];
foreach (parseDataFile($input) as $fields) {
assert(count($fields) == 15);
$code = intval($fields[0], 16);
$upperCase = intval($fields[12], 16);
$lowerCase = intval($fields[13], 16);
$titleCase = intval($fields[14], 16);
testSimpleCaseMap(MB_CASE_UPPER_SIMPLE, $code, $upperCase);
testSimpleCaseMap(MB_CASE_LOWER_SIMPLE, $code, $lowerCase);
// Unfortunately MB_CASE_TITLE does not actually return the title case, even when passed
// only a single character. It does ad-hoc magic based on the character class, so that
// certain characters, such as roman numerals or circled characters will not be
// title-cased.
//testSimpleCaseMap(MB_CASE_TITLE_SIMPLE, $code, $titleCase ?: $upperCase);
$chr = mb_chr($code);
$upper = mb_strtoupper($chr);
$uppers[$upper][] = $chr;
$fold = mb_convert_case($chr, 3);
$folds[$fold][] = $chr;
}
}
function testCaseFolding(string $input) {
foreach (parseDataFile($input) as $fields) {
assert(count($fields) == 4);
$code = intval($fields[0], 16);
$status = $fields[1];
if ($status == 'C' || $status == 'S') {
$foldCode = intval($fields[2], 16);
testSimpleCaseMap(MB_CASE_FOLD_SIMPLE, $code, $foldCode);
} else if ($status == 'F') {
$foldCodes = parseCodes($fields[2]);
testCaseMap(MB_CASE_FOLD, $code, $foldCodes);
}
}
}
function testSpecialCasing(string $input) {
foreach (parseDataFile($input) as $fields) {
assert(count($fields) >= 5);
$code = intval($fields[0], 16);
$lower = parseCodes($fields[1]);
$title = parseCodes($fields[2]);
$upper = parseCodes($fields[3]);
$cond = $fields[4];
if ($cond) {
// We don't support conditional mappings
continue;
}
testCaseMap(MB_CASE_LOWER, $code, $lower);
testCaseMap(MB_CASE_UPPER, $code, $upper);
testCaseMap(MB_CASE_TITLE, $code, $title);
}
}