mirror of
https://github.com/php/php-src.git
synced 2024-10-01 06:46:08 +00:00
406 lines
11 KiB
Plaintext
406 lines
11 KiB
Plaintext
|
--TEST--
|
||
|
Translation of HTML entities for encoding ISO-8859-5
|
||
|
--FILE--
|
||
|
<?php
|
||
|
$arr = array(
|
||
|
0x00A0 => array(0xA0, "NO-BREAK SPACE"),
|
||
|
0x0401 => array(0xA1, "CYRILLIC CAPITAL LETTER IO"),
|
||
|
0x0402 => array(0xA2, "CYRILLIC CAPITAL LETTER DJE"),
|
||
|
0x0403 => array(0xA3, "CYRILLIC CAPITAL LETTER GJE"),
|
||
|
0x0404 => array(0xA4, "CYRILLIC CAPITAL LETTER UKRAINIAN IE"),
|
||
|
0x0405 => array(0xA5, "CYRILLIC CAPITAL LETTER DZE"),
|
||
|
0x0406 => array(0xA6, "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I"),
|
||
|
0x0407 => array(0xA7, "CYRILLIC CAPITAL LETTER YI"),
|
||
|
0x0408 => array(0xA8, "CYRILLIC CAPITAL LETTER JE"),
|
||
|
0x0409 => array(0xA9, "CYRILLIC CAPITAL LETTER LJE"),
|
||
|
0x040A => array(0xAA, "CYRILLIC CAPITAL LETTER NJE"),
|
||
|
0x040B => array(0xAB, "CYRILLIC CAPITAL LETTER TSHE"),
|
||
|
0x040C => array(0xAC, "CYRILLIC CAPITAL LETTER KJE"),
|
||
|
0x00AD => array(0xAD, "SOFT HYPHEN"),
|
||
|
0x040E => array(0xAE, "CYRILLIC CAPITAL LETTER SHORT U"),
|
||
|
0x040F => array(0xAF, "CYRILLIC CAPITAL LETTER DZHE"),
|
||
|
0x0410 => array(0xB0, "CYRILLIC CAPITAL LETTER A"),
|
||
|
0x0411 => array(0xB1, "CYRILLIC CAPITAL LETTER BE"),
|
||
|
0x0412 => array(0xB2, "CYRILLIC CAPITAL LETTER VE"),
|
||
|
0x0413 => array(0xB3, "CYRILLIC CAPITAL LETTER GHE"),
|
||
|
0x0414 => array(0xB4, "CYRILLIC CAPITAL LETTER DE"),
|
||
|
0x0415 => array(0xB5, "CYRILLIC CAPITAL LETTER IE"),
|
||
|
0x0416 => array(0xB6, "CYRILLIC CAPITAL LETTER ZHE"),
|
||
|
0x0417 => array(0xB7, "CYRILLIC CAPITAL LETTER ZE"),
|
||
|
0x0418 => array(0xB8, "CYRILLIC CAPITAL LETTER I"),
|
||
|
0x0419 => array(0xB9, "CYRILLIC CAPITAL LETTER SHORT I"),
|
||
|
0x041A => array(0xBA, "CYRILLIC CAPITAL LETTER KA"),
|
||
|
0x041B => array(0xBB, "CYRILLIC CAPITAL LETTER EL"),
|
||
|
0x041C => array(0xBC, "CYRILLIC CAPITAL LETTER EM"),
|
||
|
0x041D => array(0xBD, "CYRILLIC CAPITAL LETTER EN"),
|
||
|
0x041E => array(0xBE, "CYRILLIC CAPITAL LETTER O"),
|
||
|
0x041F => array(0xBF, "CYRILLIC CAPITAL LETTER PE"),
|
||
|
0x0420 => array(0xC0, "CYRILLIC CAPITAL LETTER ER"),
|
||
|
0x0421 => array(0xC1, "CYRILLIC CAPITAL LETTER ES"),
|
||
|
0x0422 => array(0xC2, "CYRILLIC CAPITAL LETTER TE"),
|
||
|
0x0423 => array(0xC3, "CYRILLIC CAPITAL LETTER U"),
|
||
|
0x0424 => array(0xC4, "CYRILLIC CAPITAL LETTER EF"),
|
||
|
0x0425 => array(0xC5, "CYRILLIC CAPITAL LETTER HA"),
|
||
|
0x0426 => array(0xC6, "CYRILLIC CAPITAL LETTER TSE"),
|
||
|
0x0427 => array(0xC7, "CYRILLIC CAPITAL LETTER CHE"),
|
||
|
0x0428 => array(0xC8, "CYRILLIC CAPITAL LETTER SHA"),
|
||
|
0x0429 => array(0xC9, "CYRILLIC CAPITAL LETTER SHCHA"),
|
||
|
0x042A => array(0xCA, "CYRILLIC CAPITAL LETTER HARD SIGN"),
|
||
|
0x042B => array(0xCB, "CYRILLIC CAPITAL LETTER YERU"),
|
||
|
0x042C => array(0xCC, "CYRILLIC CAPITAL LETTER SOFT SIGN"),
|
||
|
0x042D => array(0xCD, "CYRILLIC CAPITAL LETTER E"),
|
||
|
0x042E => array(0xCE, "CYRILLIC CAPITAL LETTER YU"),
|
||
|
0x042F => array(0xCF, "CYRILLIC CAPITAL LETTER YA"),
|
||
|
0x0430 => array(0xD0, "CYRILLIC SMALL LETTER A"),
|
||
|
0x0431 => array(0xD1, "CYRILLIC SMALL LETTER BE"),
|
||
|
0x0432 => array(0xD2, "CYRILLIC SMALL LETTER VE"),
|
||
|
0x0433 => array(0xD3, "CYRILLIC SMALL LETTER GHE"),
|
||
|
0x0434 => array(0xD4, "CYRILLIC SMALL LETTER DE"),
|
||
|
0x0435 => array(0xD5, "CYRILLIC SMALL LETTER IE"),
|
||
|
0x0436 => array(0xD6, "CYRILLIC SMALL LETTER ZHE"),
|
||
|
0x0437 => array(0xD7, "CYRILLIC SMALL LETTER ZE"),
|
||
|
0x0438 => array(0xD8, "CYRILLIC SMALL LETTER I"),
|
||
|
0x0439 => array(0xD9, "CYRILLIC SMALL LETTER SHORT I"),
|
||
|
0x043A => array(0xDA, "CYRILLIC SMALL LETTER KA"),
|
||
|
0x043B => array(0xDB, "CYRILLIC SMALL LETTER EL"),
|
||
|
0x043C => array(0xDC, "CYRILLIC SMALL LETTER EM"),
|
||
|
0x043D => array(0xDD, "CYRILLIC SMALL LETTER EN"),
|
||
|
0x043E => array(0xDE, "CYRILLIC SMALL LETTER O"),
|
||
|
0x043F => array(0xDF, "CYRILLIC SMALL LETTER PE"),
|
||
|
0x0440 => array(0xE0, "CYRILLIC SMALL LETTER ER"),
|
||
|
0x0441 => array(0xE1, "CYRILLIC SMALL LETTER ES"),
|
||
|
0x0442 => array(0xE2, "CYRILLIC SMALL LETTER TE"),
|
||
|
0x0443 => array(0xE3, "CYRILLIC SMALL LETTER U"),
|
||
|
0x0444 => array(0xE4, "CYRILLIC SMALL LETTER EF"),
|
||
|
0x0445 => array(0xE5, "CYRILLIC SMALL LETTER HA"),
|
||
|
0x0446 => array(0xE6, "CYRILLIC SMALL LETTER TSE"),
|
||
|
0x0447 => array(0xE7, "CYRILLIC SMALL LETTER CHE"),
|
||
|
0x0448 => array(0xE8, "CYRILLIC SMALL LETTER SHA"),
|
||
|
0x0449 => array(0xE9, "CYRILLIC SMALL LETTER SHCHA"),
|
||
|
0x044A => array(0xEA, "CYRILLIC SMALL LETTER HARD SIGN"),
|
||
|
0x044B => array(0xEB, "CYRILLIC SMALL LETTER YERU"),
|
||
|
0x044C => array(0xEC, "CYRILLIC SMALL LETTER SOFT SIGN"),
|
||
|
0x044D => array(0xED, "CYRILLIC SMALL LETTER E"),
|
||
|
0x044E => array(0xEE, "CYRILLIC SMALL LETTER YU"),
|
||
|
0x044F => array(0xEF, "CYRILLIC SMALL LETTER YA"),
|
||
|
0x2116 => array(0xF0, "NUMERO SIGN"),
|
||
|
0x0451 => array(0xF1, "CYRILLIC SMALL LETTER IO"),
|
||
|
0x0452 => array(0xF2, "CYRILLIC SMALL LETTER DJE"),
|
||
|
0x0453 => array(0xF3, "CYRILLIC SMALL LETTER GJE"),
|
||
|
0x0454 => array(0xF4, "CYRILLIC SMALL LETTER UKRAINIAN IE"),
|
||
|
0x0455 => array(0xF5, "CYRILLIC SMALL LETTER DZE"),
|
||
|
0x0456 => array(0xF6, "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I"),
|
||
|
0x0457 => array(0xF7, "CYRILLIC SMALL LETTER YI"),
|
||
|
0x0458 => array(0xF8, "CYRILLIC SMALL LETTER JE"),
|
||
|
0x0459 => array(0xF9, "CYRILLIC SMALL LETTER LJE"),
|
||
|
0x045A => array(0xFA, "CYRILLIC SMALL LETTER NJE"),
|
||
|
0x045B => array(0xFB, "CYRILLIC SMALL LETTER TSHE"),
|
||
|
0x045C => array(0xFC, "CYRILLIC SMALL LETTER KJE"),
|
||
|
0x00A7 => array(0xFD, "SECTION SIGN"),
|
||
|
0x045E => array(0xFE, "CYRILLIC SMALL LETTER SHORT U"),
|
||
|
0x045F => array(0xFF, "CYRILLIC SMALL LETTER DZHE"),
|
||
|
);
|
||
|
|
||
|
foreach ($arr as $u => $v) {
|
||
|
$ent = sprintf("&#x%X;", $u);
|
||
|
$res = html_entity_decode($ent, ENT_QUOTES, 'ISO-8859-5');
|
||
|
$d = unpack("H*", $res);
|
||
|
echo sprintf("%s: %s => %s\n", $v[1], $ent, $d[1]);
|
||
|
|
||
|
$ent = sprintf("&#x%X;", $v[0]);
|
||
|
$res = html_entity_decode($ent, ENT_QUOTES, 'ISO-8859-5');
|
||
|
if ($res[0] != "&" || $res[1] != "#")
|
||
|
$res = unpack("H*", $res)[1];
|
||
|
echo sprintf("%s => %s\n\n", $ent, $res);
|
||
|
}
|
||
|
--EXPECT--
|
||
|
NO-BREAK SPACE:   => a0
|
||
|
  => a0
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER IO: Ё => a1
|
||
|
¡ => ¡
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER DJE: Ђ => a2
|
||
|
¢ => ¢
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER GJE: Ѓ => a3
|
||
|
£ => £
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER UKRAINIAN IE: Є => a4
|
||
|
¤ => ¤
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER DZE: Ѕ => a5
|
||
|
¥ => ¥
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I: І => a6
|
||
|
¦ => ¦
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER YI: Ї => a7
|
||
|
§ => fd
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER JE: Ј => a8
|
||
|
¨ => ¨
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER LJE: Љ => a9
|
||
|
© => ©
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER NJE: Њ => aa
|
||
|
ª => ª
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER TSHE: Ћ => ab
|
||
|
« => «
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER KJE: Ќ => ac
|
||
|
¬ => ¬
|
||
|
|
||
|
SOFT HYPHEN: ­ => ad
|
||
|
­ => ad
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER SHORT U: Ў => ae
|
||
|
® => ®
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER DZHE: Џ => af
|
||
|
¯ => ¯
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER A: А => b0
|
||
|
° => °
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER BE: Б => b1
|
||
|
± => ±
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER VE: В => b2
|
||
|
² => ²
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER GHE: Г => b3
|
||
|
³ => ³
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER DE: Д => b4
|
||
|
´ => ´
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER IE: Е => b5
|
||
|
µ => µ
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER ZHE: Ж => b6
|
||
|
¶ => ¶
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER ZE: З => b7
|
||
|
· => ·
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER I: И => b8
|
||
|
¸ => ¸
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER SHORT I: Й => b9
|
||
|
¹ => ¹
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER KA: К => ba
|
||
|
º => º
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER EL: Л => bb
|
||
|
» => »
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER EM: М => bc
|
||
|
¼ => ¼
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER EN: Н => bd
|
||
|
½ => ½
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER O: О => be
|
||
|
¾ => ¾
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER PE: П => bf
|
||
|
¿ => ¿
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER ER: Р => c0
|
||
|
À => À
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER ES: С => c1
|
||
|
Á => Á
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER TE: Т => c2
|
||
|
 => Â
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER U: У => c3
|
||
|
à => Ã
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER EF: Ф => c4
|
||
|
Ä => Ä
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER HA: Х => c5
|
||
|
Å => Å
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER TSE: Ц => c6
|
||
|
Æ => Æ
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER CHE: Ч => c7
|
||
|
Ç => Ç
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER SHA: Ш => c8
|
||
|
È => È
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER SHCHA: Щ => c9
|
||
|
É => É
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER HARD SIGN: Ъ => ca
|
||
|
Ê => Ê
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER YERU: Ы => cb
|
||
|
Ë => Ë
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER SOFT SIGN: Ь => cc
|
||
|
Ì => Ì
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER E: Э => cd
|
||
|
Í => Í
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER YU: Ю => ce
|
||
|
Î => Î
|
||
|
|
||
|
CYRILLIC CAPITAL LETTER YA: Я => cf
|
||
|
Ï => Ï
|
||
|
|
||
|
CYRILLIC SMALL LETTER A: а => d0
|
||
|
Ð => Ð
|
||
|
|
||
|
CYRILLIC SMALL LETTER BE: б => d1
|
||
|
Ñ => Ñ
|
||
|
|
||
|
CYRILLIC SMALL LETTER VE: в => d2
|
||
|
Ò => Ò
|
||
|
|
||
|
CYRILLIC SMALL LETTER GHE: г => d3
|
||
|
Ó => Ó
|
||
|
|
||
|
CYRILLIC SMALL LETTER DE: д => d4
|
||
|
Ô => Ô
|
||
|
|
||
|
CYRILLIC SMALL LETTER IE: е => d5
|
||
|
Õ => Õ
|
||
|
|
||
|
CYRILLIC SMALL LETTER ZHE: ж => d6
|
||
|
Ö => Ö
|
||
|
|
||
|
CYRILLIC SMALL LETTER ZE: з => d7
|
||
|
× => ×
|
||
|
|
||
|
CYRILLIC SMALL LETTER I: и => d8
|
||
|
Ø => Ø
|
||
|
|
||
|
CYRILLIC SMALL LETTER SHORT I: й => d9
|
||
|
Ù => Ù
|
||
|
|
||
|
CYRILLIC SMALL LETTER KA: к => da
|
||
|
Ú => Ú
|
||
|
|
||
|
CYRILLIC SMALL LETTER EL: л => db
|
||
|
Û => Û
|
||
|
|
||
|
CYRILLIC SMALL LETTER EM: м => dc
|
||
|
Ü => Ü
|
||
|
|
||
|
CYRILLIC SMALL LETTER EN: н => dd
|
||
|
Ý => Ý
|
||
|
|
||
|
CYRILLIC SMALL LETTER O: о => de
|
||
|
Þ => Þ
|
||
|
|
||
|
CYRILLIC SMALL LETTER PE: п => df
|
||
|
ß => ß
|
||
|
|
||
|
CYRILLIC SMALL LETTER ER: р => e0
|
||
|
à => à
|
||
|
|
||
|
CYRILLIC SMALL LETTER ES: с => e1
|
||
|
á => á
|
||
|
|
||
|
CYRILLIC SMALL LETTER TE: т => e2
|
||
|
â => â
|
||
|
|
||
|
CYRILLIC SMALL LETTER U: у => e3
|
||
|
ã => ã
|
||
|
|
||
|
CYRILLIC SMALL LETTER EF: ф => e4
|
||
|
ä => ä
|
||
|
|
||
|
CYRILLIC SMALL LETTER HA: х => e5
|
||
|
å => å
|
||
|
|
||
|
CYRILLIC SMALL LETTER TSE: ц => e6
|
||
|
æ => æ
|
||
|
|
||
|
CYRILLIC SMALL LETTER CHE: ч => e7
|
||
|
ç => ç
|
||
|
|
||
|
CYRILLIC SMALL LETTER SHA: ш => e8
|
||
|
è => è
|
||
|
|
||
|
CYRILLIC SMALL LETTER SHCHA: щ => e9
|
||
|
é => é
|
||
|
|
||
|
CYRILLIC SMALL LETTER HARD SIGN: ъ => ea
|
||
|
ê => ê
|
||
|
|
||
|
CYRILLIC SMALL LETTER YERU: ы => eb
|
||
|
ë => ë
|
||
|
|
||
|
CYRILLIC SMALL LETTER SOFT SIGN: ь => ec
|
||
|
ì => ì
|
||
|
|
||
|
CYRILLIC SMALL LETTER E: э => ed
|
||
|
í => í
|
||
|
|
||
|
CYRILLIC SMALL LETTER YU: ю => ee
|
||
|
î => î
|
||
|
|
||
|
CYRILLIC SMALL LETTER YA: я => ef
|
||
|
ï => ï
|
||
|
|
||
|
NUMERO SIGN: № => f0
|
||
|
ð => ð
|
||
|
|
||
|
CYRILLIC SMALL LETTER IO: ё => 2623783435313b
|
||
|
ñ => ñ
|
||
|
|
||
|
CYRILLIC SMALL LETTER DJE: ђ => 2623783435323b
|
||
|
ò => ò
|
||
|
|
||
|
CYRILLIC SMALL LETTER GJE: ѓ => 2623783435333b
|
||
|
ó => ó
|
||
|
|
||
|
CYRILLIC SMALL LETTER UKRAINIAN IE: є => 2623783435343b
|
||
|
ô => ô
|
||
|
|
||
|
CYRILLIC SMALL LETTER DZE: ѕ => 2623783435353b
|
||
|
õ => õ
|
||
|
|
||
|
CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I: і => 2623783435363b
|
||
|
ö => ö
|
||
|
|
||
|
CYRILLIC SMALL LETTER YI: ї => 2623783435373b
|
||
|
÷ => ÷
|
||
|
|
||
|
CYRILLIC SMALL LETTER JE: ј => 2623783435383b
|
||
|
ø => ø
|
||
|
|
||
|
CYRILLIC SMALL LETTER LJE: љ => 2623783435393b
|
||
|
ù => ù
|
||
|
|
||
|
CYRILLIC SMALL LETTER NJE: њ => 2623783435413b
|
||
|
ú => ú
|
||
|
|
||
|
CYRILLIC SMALL LETTER TSHE: ћ => 2623783435423b
|
||
|
û => û
|
||
|
|
||
|
CYRILLIC SMALL LETTER KJE: ќ => 2623783435433b
|
||
|
ü => ü
|
||
|
|
||
|
SECTION SIGN: § => fd
|
||
|
ý => ý
|
||
|
|
||
|
CYRILLIC SMALL LETTER SHORT U: ў => 2623783435453b
|
||
|
þ => þ
|
||
|
|
||
|
CYRILLIC SMALL LETTER DZHE: џ => 2623783435463b
|
||
|
ÿ => ÿ
|
||
|
|
||
|
|