- Require declare(encoding=...) to come before any opcodes. Read source

comments for more info.
- Op arrays now know which script encoding they were compiled from.
- Use this information to intelligently convert inline HTML blocks to
  the output encoding. Currently it opens and closes a new converter for
  each block, but we can optimize it.
This commit is contained in:
Andrei Zmievski 2005-08-24 20:42:09 +00:00
parent 64dbc03c93
commit d0d6a1f16f
8 changed files with 99 additions and 11 deletions

View File

@ -156,6 +156,7 @@ void init_compiler(TSRMLS_D)
zend_init_compiler_data_structures(TSRMLS_C);
zend_init_rsrc_list(TSRMLS_C);
zend_hash_init(&CG(filenames_table), 5, NULL, (dtor_func_t) free_estring, 0);
zend_hash_init(&CG(script_encodings_table), 5, NULL, (dtor_func_t) free_estring, 0);
zend_llist_init(&CG(open_files), sizeof(zend_file_handle), (void (*)(void *)) zend_file_handle_dtor, 0);
CG(unclean_shutdown) = 0;
}
@ -170,6 +171,7 @@ void shutdown_compiler(TSRMLS_D)
zend_stack_destroy(&CG(object_stack));
zend_stack_destroy(&CG(declare_stack));
zend_stack_destroy(&CG(list_stack));
zend_hash_destroy(&CG(script_encodings_table));
zend_hash_destroy(&CG(filenames_table));
zend_llist_destroy(&CG(open_files));
}
@ -215,6 +217,34 @@ ZEND_API zend_bool zend_is_compiling(TSRMLS_D)
}
ZEND_API char *zend_set_compiled_script_encoding(char *new_script_enc TSRMLS_DC)
{
char **pp, *p;
int length = strlen(new_script_enc);
if (zend_hash_find(&CG(script_encodings_table), new_script_enc, length+1, (void **) &pp) == SUCCESS) {
CG(script_encoding) = *pp;
return *pp;
}
p = estrndup(new_script_enc, length);
zend_hash_update(&CG(script_encodings_table), new_script_enc, length+1, &p, sizeof(char *), (void **) &pp);
CG(script_encoding) = p;
return p;
}
ZEND_API void zend_restore_compiled_script_encoding(char *original_script_enc TSRMLS_DC)
{
CG(script_encoding) = original_script_enc;
}
ZEND_API char *zend_get_compiled_script_encoding(TSRMLS_D)
{
return CG(script_encoding);
}
static zend_uint get_temporary_variable(zend_op_array *op_array)
{
return (op_array->T)++ * sizeof(temp_variable);
@ -3764,14 +3794,30 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC)
convert_to_long(&val->u.constant);
CG(declarables).ticks = val->u.constant;
} else if (UG(unicode) && ZEND_U_EQUAL(Z_TYPE(var->u.constant), Z_UNIVAL(var->u.constant), Z_UNILEN(var->u.constant), "encoding", sizeof("encoding")-1)) {
UErrorCode status = U_ZERO_ERROR;
if (val->u.constant.type == IS_CONSTANT) {
zend_error(E_COMPILE_ERROR, "Cannot use constants as encoding");
}
/*
* Check that the pragma comes before any opcodes. If the compilation
* got as far as this, the previous portion of the script must have been
* parseable according to the .ini script_encoding setting. We still
* want to tell them to put declare() at the top.
*/
if (CG(active_op_array)->last > 0) {
zend_error(E_COMPILE_ERROR, "Encoding declaration pragma has to be the very first statement in the script");
}
convert_to_string(&val->u.constant);
if (zend_prepare_scanner_converters(Z_STRVAL(val->u.constant), 1 TSRMLS_CC) == FAILURE) {
zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", Z_STRVAL(val->u.constant));
}
zend_set_compiled_script_encoding((char*)ucnv_getName(LANG_SCNG(output_conv), &status) TSRMLS_CC);
/*
* Because we require declare(encoding=...) to be the very first thing,
* we can safely cache the script encoding in the op array here.
*/
CG(active_op_array)->script_encoding = zend_get_compiled_script_encoding(TSRMLS_C);
efree(val->u.constant.value.str.val);
}
zval_dtor(&var->u.constant);

View File

@ -209,6 +209,7 @@ struct _zend_op_array {
zend_bool uses_this;
char *filename;
char *script_encoding;
zend_uint line_start;
zend_uint line_end;
char *doc_comment;
@ -329,6 +330,9 @@ ZEND_API void zend_restore_compiled_filename(char *original_compiled_filename TS
ZEND_API char *zend_get_compiled_filename(TSRMLS_D);
ZEND_API int zend_get_compiled_lineno(TSRMLS_D);
ZEND_API int zend_get_scanned_file_offset(TSRMLS_D);
ZEND_API char *zend_set_compiled_script_encoding(char *new_script_enc TSRMLS_DC);
ZEND_API void zend_restore_compiled_script_encoding(char *original_script_enc TSRMLS_DC);
ZEND_API char *zend_get_compiled_script_encoding(TSRMLS_D);
ZEND_API char* zend_get_compiled_variable_name(zend_op_array *op_array, zend_uint var, int* name_len);

View File

@ -133,6 +133,9 @@ struct _zend_compiler_globals {
zend_uchar literal_type;
HashTable script_encodings_table;
char *script_encoding;
#ifdef ZTS
HashTable *global_function_table;
HashTable *global_class_table;

View File

@ -28,6 +28,7 @@ typedef struct _zend_lex_state {
zend_file_handle *in;
uint lineno;
char *filename;
char *script_encoding;
UConverter *input_conv; /* converter for flex input */
UConverter *output_conv; /* converter for data from flex output */

View File

@ -209,6 +209,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
BEGIN(lex_state->state);
CG(zend_lineno) = lex_state->lineno;
zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
zend_restore_compiled_script_encoding(lex_state->script_encoding TSRMLS_CC);
if (SCNG(input_conv)) {
ucnv_close(SCNG(input_conv));
@ -758,6 +759,7 @@ ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
}
zend_set_compiled_filename(file_path TSRMLS_CC);
zend_set_compiled_script_encoding((char*)ucnv_getName(SCNG(output_conv), &status) TSRMLS_CC);
if (CG(start_lineno)) {
CG(zend_lineno) = CG(start_lineno);
@ -875,6 +877,7 @@ zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
{
const char *encoding;
UErrorCode status = U_ZERO_ERROR;
if (Z_TYPE_P(str) == IS_UNICODE) {
convert_to_string_with_converter(str, UG(utf8_conv));
@ -895,6 +898,7 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D
yy_scan_buffer(str->value.str.val, str->value.str.len+2 TSRMLS_CC);
zend_set_compiled_filename(filename TSRMLS_CC);
zend_set_compiled_script_encoding((char*)ucnv_getName(SCNG(output_conv), &status) TSRMLS_CC);
CG(zend_lineno) = 1;
CG(increment_lineno) = 0;
return SUCCESS;
@ -1937,7 +1941,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
if (func_name) {
len += u_strlen((UChar*)func_name);
} else {
func_name = EMPTY_STR;
func_name = (char*)EMPTY_STR;
}
zendlval->value.str.len = len;
Z_USTRVAL_P(zendlval) = eumalloc(len+1);

View File

@ -77,6 +77,7 @@ void init_op_array(zend_op_array *op_array, zend_uchar type, int initial_ops_siz
op_array->function_name = NULL;
op_array->filename = zend_get_compiled_filename(TSRMLS_C);
op_array->script_encoding = zend_get_compiled_script_encoding(TSRMLS_C);
op_array->doc_comment = NULL;
op_array->doc_comment_len = 0;

View File

@ -894,9 +894,13 @@ ZEND_VM_HANDLER(40, ZEND_ECHO, CONST|TMP|VAR|CV, ANY)
/* Convert inline HTML blocks to the output encoding, but only if necessary. */
if (opline->extended_value &&
strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
EG(active_op_array)->script_encoding)) {
zval z_conv;
zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
UConverter *script_enc_conv = NULL;
if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
}
zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
z_conv.type = IS_BINARY;
if (U_SUCCESS(status)) {
zend_print_variable(&z_conv);
@ -904,6 +908,7 @@ ZEND_VM_HANDLER(40, ZEND_ECHO, CONST|TMP|VAR|CV, ANY)
zend_error(E_WARNING, "Could not convert inline HTML for output");
}
zval_dtor(&z_conv);
ucnv_close(script_enc_conv);
} else {
zend_print_variable(z);
}

View File

@ -1357,9 +1357,14 @@ static int ZEND_ECHO_SPEC_CONST_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
/* Convert inline HTML blocks to the output encoding, but only if necessary. */
if (opline->extended_value &&
strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
EG(active_op_array)->script_encoding)) {
zval z_conv;
zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
UConverter *script_enc_conv = NULL;
if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
}
printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
z_conv.type = IS_BINARY;
if (U_SUCCESS(status)) {
zend_print_variable(&z_conv);
@ -1367,6 +1372,7 @@ static int ZEND_ECHO_SPEC_CONST_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
zend_error(E_WARNING, "Could not convert inline HTML for output");
}
zval_dtor(&z_conv);
ucnv_close(script_enc_conv);
} else {
zend_print_variable(z);
}
@ -3864,9 +3870,14 @@ static int ZEND_ECHO_SPEC_TMP_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
/* Convert inline HTML blocks to the output encoding, but only if necessary. */
if (opline->extended_value &&
strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
EG(active_op_array)->script_encoding)) {
zval z_conv;
zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
UConverter *script_enc_conv = NULL;
if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
}
printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
z_conv.type = IS_BINARY;
if (U_SUCCESS(status)) {
zend_print_variable(&z_conv);
@ -3874,6 +3885,7 @@ static int ZEND_ECHO_SPEC_TMP_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
zend_error(E_WARNING, "Could not convert inline HTML for output");
}
zval_dtor(&z_conv);
ucnv_close(script_enc_conv);
} else {
zend_print_variable(z);
}
@ -6907,9 +6919,14 @@ static int ZEND_ECHO_SPEC_VAR_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
/* Convert inline HTML blocks to the output encoding, but only if necessary. */
if (opline->extended_value &&
strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
EG(active_op_array)->script_encoding)) {
zval z_conv;
zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
UConverter *script_enc_conv = NULL;
if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
}
printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
z_conv.type = IS_BINARY;
if (U_SUCCESS(status)) {
zend_print_variable(&z_conv);
@ -6917,6 +6934,7 @@ static int ZEND_ECHO_SPEC_VAR_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
zend_error(E_WARNING, "Could not convert inline HTML for output");
}
zval_dtor(&z_conv);
ucnv_close(script_enc_conv);
} else {
zend_print_variable(z);
}
@ -19602,9 +19620,14 @@ static int ZEND_ECHO_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
/* Convert inline HTML blocks to the output encoding, but only if necessary. */
if (opline->extended_value &&
strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
EG(active_op_array)->script_encoding)) {
zval z_conv;
zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
UConverter *script_enc_conv = NULL;
if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
}
printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
z_conv.type = IS_BINARY;
if (U_SUCCESS(status)) {
zend_print_variable(&z_conv);
@ -19612,6 +19635,7 @@ static int ZEND_ECHO_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
zend_error(E_WARNING, "Could not convert inline HTML for output");
}
zval_dtor(&z_conv);
ucnv_close(script_enc_conv);
} else {
zend_print_variable(z);
}