Fix #70091: Phar does not mark UTF-8 filenames in ZIP archives

The default encoding of filenames in a ZIP archive is IBM Code Page
437.  Phar, however, only supports UTF-8 filenames.  Therefore we have
to mark filenames as being stored in UTF-8 by setting the general
purpose bit 11 (the language encoding flag).

The effect of not setting this bit for non ASCII filenames can be seen
in popular tools like 7-Zip and UnZip, but not when extracting the
archives via ext/phar (which is agnostic to the filename encoding), or
via ext/zip (which guesses the encoding).  Thus we add a somewhat
brittle low-level test case.

Closes GH-6630.
This commit is contained in:
Christoph M. Becker 2021-01-26 16:50:04 +01:00
parent 94af11d5e1
commit 6a0b889f57
3 changed files with 67 additions and 0 deletions

1
NEWS
View File

@ -5,6 +5,7 @@ PHP NEWS
- Phar:
. Fixed bug #75850 (Unclear error message wrt. __halt_compiler() w/o
semicolon) (cmb)
. Fixed bug #70091 (Phar does not mark UTF-8 filenames in ZIP archives). (cmb)
- Zip:
. Fixed bug #80648 (Fix for bug 79296 should be based on runtime version).

View File

@ -0,0 +1,60 @@
--TEST--
Bug #70091 (Phar does not mark UTF-8 filenames in ZIP archives)
--SKIPIF--
<?php
if (!extension_loaded('phar')) die('skip phar extension not available');
if (!extension_loaded('zlib')) die('skip zlib extension not available');
?>
--FILE--
<?php
$phar = new PharData(__DIR__ . '/bug70091.zip');
$phar->addFromString('föö', '');
$phar->addFromString('foo', '');
unset($phar);
$stream = fopen(__DIR__ . '/bug70091.zip', 'r');
$data = fread($stream, 8);
var_dump(unpack('H8sig/@6/nflags', $data));
fseek($stream, 53);
$data = fread($stream, 8);
var_dump(unpack('H8sig/@6/nflags', $data));
fseek($stream, 104);
$data = fread($stream, 10);
var_dump(unpack('H8sig/@8/nflags', $data));
fseek($stream, 173);
$data = fread($stream, 10);
var_dump(unpack('H8sig/@8/nflags', $data));
?>
--EXPECT--
array(2) {
["sig"]=>
string(8) "504b0304"
["flags"]=>
int(8)
}
array(2) {
["sig"]=>
string(8) "504b0304"
["flags"]=>
int(8)
}
array(2) {
["sig"]=>
string(8) "504b0102"
["flags"]=>
int(8)
}
array(2) {
["sig"]=>
string(8) "504b0102"
["flags"]=>
int(8)
}
--CLEAN--
<?php
@unlink(__DIR__ . '/bug70091.zip');
?>

View File

@ -829,6 +829,7 @@ static int phar_zip_changed_apply_int(phar_entry_info *entry, void *arg) /* {{{
zend_off_t offset;
int not_really_modified = 0;
p = (struct _phar_zip_pass*) arg;
uint16_t general_purpose_flags;
if (entry->is_mounted) {
return ZEND_HASH_APPLY_KEEP;
@ -878,6 +879,11 @@ static int phar_zip_changed_apply_int(phar_entry_info *entry, void *arg) /* {{{
memcpy(central.datestamp, local.datestamp, sizeof(local.datestamp));
PHAR_SET_16(central.filename_len, entry->filename_len + (entry->is_dir ? 1 : 0));
PHAR_SET_16(local.filename_len, entry->filename_len + (entry->is_dir ? 1 : 0));
// set language encoding flag (all filenames have to be UTF-8 anyway)
general_purpose_flags = PHAR_GET_16(central.flags);
PHAR_SET_16(central.flags, general_purpose_flags | (1 << 11));
general_purpose_flags = PHAR_GET_16(local.flags);
PHAR_SET_16(local.flags, general_purpose_flags | (1 << 11));
PHAR_SET_32(central.offset, php_stream_tell(p->filefp));
/* do extra field for perms later */