From 4df3dd767912ea129c7874f4b991767d31a44479 Mon Sep 17 00:00:00 2001 From: Arnaud Le Blanc Date: Fri, 8 Jul 2022 14:47:46 +0200 Subject: [PATCH] Reduce memory allocated by var_export, json_encode, serialize, and other (#8902) smart_str uses an over-allocated string to optimize for append operations. Functions that use smart_str tend to return the over-allocated string directly. This results in unnecessary memory usage, especially for small strings. The overhead can be up to 231 bytes for strings smaller than that, and 4095 for other strings. This can be avoided for strings smaller than `4096 - zend_string header size - 1` by reallocating the string. This change introduces `smart_str_trim_to_size()`, and calls it in `smart_str_extract()`. Functions that use `smart_str` are updated to use `smart_str_extract()`. Fixes GH-8896 --- NEWS | 9 +++++++-- UPGRADING.INTERNALS | 7 +++++++ Zend/zend.c | 3 +-- Zend/zend_smart_str.h | 15 ++++++++++++++- Zend/zend_smart_str_public.h | 1 + ext/dom/documenttype.c | 3 +-- ext/filter/sanitizing_filters.c | 3 +-- ext/iconv/iconv.c | 14 +++----------- ext/json/json.c | 6 +----- ext/mbstring/php_mbregex.c | 9 +++------ ext/session/session.c | 5 ++--- ext/soap/php_sdl.c | 6 ++---- ext/spl/spl_array.c | 2 +- ext/spl/spl_dllist.c | 4 +--- ext/spl/spl_observer.c | 2 +- ext/standard/http.c | 8 +------- ext/standard/string.c | 3 +-- ext/standard/var.c | 8 ++------ 18 files changed, 50 insertions(+), 58 deletions(-) diff --git a/NEWS b/NEWS index d9569ed0fd5..6f96a91ea90 100644 --- a/NEWS +++ b/NEWS @@ -2,11 +2,16 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? ????, PHP 8.2.0beta1 +- Core: + . Reduced the memory footprint of strings returned by var_export(), + json_encode(), serialize(), iconv_*(), mb_ereg*(), session_create_id(), + http_build_query(), strstr(), Reflection*::__toString(). (Arnaud) + - CLI: - - Updated the mime-type table for the builtin-server. (Ayesh Karunaratne) + . Updated the mime-type table for the builtin-server. (Ayesh Karunaratne) - FPM: - - Added listen.setfib pool option to set route FIB on FreeBSD. (David Carlier) + . Added listen.setfib pool option to set route FIB on FreeBSD. (David Carlier) 07 Jul 2022, PHP 8.2.0alpha3 diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 2ceab0da617..e0aebcbb6b5 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -39,6 +39,13 @@ PHP 8.2 INTERNALS UPGRADE NOTES - zend_object_do_operation_t * Added a new zero_position argument to php_stream_fopen_from_fd_rel to reflect if this a newly created file so the current file offset needs not to be checked. +* Added smart_str_trim_to_size(). The function trims the memory allocated for the + string. This can considerably reduce the memory footprint of strings smaller + than approximately 4096 bytes. +* smart_str_extract() and the spprintf family of functions now use + smart_str_trim_to_size() before returning the string. +* It is recommended to use smart_str_extract() or smart_str_trim_to_size() when + using the smart_str API. ======================== 2. Build system changes diff --git a/Zend/zend.c b/Zend/zend.c index 7d0eacda58c..ef1fe3d9d05 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -272,8 +272,7 @@ ZEND_API zend_string *zend_vstrpprintf(size_t max_len, const char *format, va_li ZSTR_LEN(buf.s) = max_len; } - smart_str_0(&buf); - return buf.s; + return smart_str_extract(&buf); } /* }}} */ diff --git a/Zend/zend_smart_str.h b/Zend/zend_smart_str.h index 80fe28615bf..cf28016993f 100644 --- a/Zend/zend_smart_str.h +++ b/Zend/zend_smart_str.h @@ -25,6 +25,10 @@ smart_str_appendl_ex((dest), (src), strlen(src), (what)) #define smart_str_appends(dest, src) \ smart_str_appendl((dest), (src), strlen(src)) +#define smart_str_extract(dest) \ + smart_str_extract_ex((dest), 0) +#define smart_str_trim_to_size(dest) \ + smart_str_trim_to_size_ex((dest), 0) #define smart_str_extend(dest, len) \ smart_str_extend_ex((dest), (len), 0) #define smart_str_appendc(dest, c) \ @@ -101,10 +105,19 @@ static zend_always_inline size_t smart_str_get_len(smart_str *str) { return str->s ? ZSTR_LEN(str->s) : 0; } -static zend_always_inline zend_string *smart_str_extract(smart_str *str) { +static zend_always_inline void smart_str_trim_to_size_ex(smart_str *str, bool persistent) +{ + if (str->s && str->a > ZSTR_LEN(str->s)) { + str->s = zend_string_realloc(str->s, ZSTR_LEN(str->s), persistent); + str->a = ZSTR_LEN(str->s); + } +} + +static zend_always_inline zend_string *smart_str_extract_ex(smart_str *str, bool persistent) { if (str->s) { zend_string *res; smart_str_0(str); + smart_str_trim_to_size_ex(str, persistent); res = str->s; str->s = NULL; return res; diff --git a/Zend/zend_smart_str_public.h b/Zend/zend_smart_str_public.h index 8b37db5bd1e..e81a6839b3b 100644 --- a/Zend/zend_smart_str_public.h +++ b/Zend/zend_smart_str_public.h @@ -18,6 +18,7 @@ #define ZEND_SMART_STR_PUBLIC_H typedef struct { + /** See smart_str_extract() */ zend_string *s; size_t a; } smart_str; diff --git a/ext/dom/documenttype.c b/ext/dom/documenttype.c index 960744ef1a0..b046b05f80e 100644 --- a/ext/dom/documenttype.c +++ b/ext/dom/documenttype.c @@ -188,8 +188,7 @@ int dom_documenttype_internal_subset_read(dom_object *obj, zval *retval) } if (ret_buf.s) { - smart_str_0(&ret_buf); - ZVAL_NEW_STR(retval, ret_buf.s); + ZVAL_STR(retval, smart_str_extract(&ret_buf)); return SUCCESS; } } diff --git a/ext/filter/sanitizing_filters.c b/ext/filter/sanitizing_filters.c index 7c57171e9e8..eac384172a6 100644 --- a/ext/filter/sanitizing_filters.c +++ b/ext/filter/sanitizing_filters.c @@ -46,9 +46,8 @@ static void php_filter_encode_html(zval *value, const unsigned char *chars) s++; } - smart_str_0(&str); zval_ptr_dtor(value); - ZVAL_NEW_STR(value, str.s); + ZVAL_STR(value, smart_str_extract(&str)); } static const unsigned char hexchars[] = "0123456789ABCDEF"; diff --git a/ext/iconv/iconv.c b/ext/iconv/iconv.c index 6fce7a92e34..c0286de7bb1 100644 --- a/ext/iconv/iconv.c +++ b/ext/iconv/iconv.c @@ -1845,7 +1845,7 @@ PHP_FUNCTION(iconv_substr) _php_iconv_show_error(err, GENERIC_SUPERSET_NAME, charset); if (err == PHP_ICONV_ERR_SUCCESS && retval.s != NULL) { - RETURN_NEW_STR(retval.s); + RETURN_STR(smart_str_extract(&retval)); } smart_str_free(&retval); RETURN_FALSE; @@ -2038,11 +2038,7 @@ PHP_FUNCTION(iconv_mime_encode) _php_iconv_show_error(err, out_charset, in_charset); if (err == PHP_ICONV_ERR_SUCCESS) { - if (retval.s != NULL) { - RETVAL_STR(retval.s); - } else { - RETVAL_EMPTY_STRING(); - } + RETVAL_STR(smart_str_extract(&retval)); } else { smart_str_free(&retval); RETVAL_FALSE; @@ -2083,11 +2079,7 @@ PHP_FUNCTION(iconv_mime_decode) _php_iconv_show_error(err, charset, "???"); if (err == PHP_ICONV_ERR_SUCCESS) { - if (retval.s != NULL) { - RETVAL_STR(retval.s); - } else { - RETVAL_EMPTY_STRING(); - } + RETVAL_STR(smart_str_extract(&retval)); } else { smart_str_free(&retval); RETVAL_FALSE; diff --git a/ext/json/json.c b/ext/json/json.c index 8ece58d79a5..4fac95fab39 100644 --- a/ext/json/json.c +++ b/ext/json/json.c @@ -212,11 +212,7 @@ PHP_FUNCTION(json_encode) } } - smart_str_0(&buf); /* copy? */ - if (buf.s) { - RETURN_NEW_STR(buf.s); - } - RETURN_EMPTY_STRING(); + RETURN_STR(smart_str_extract(&buf)); } /* }}} */ diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 58652688044..99dc91e34dc 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -1144,13 +1144,10 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp if (err <= -2) { smart_str_free(&out_buf); - RETVAL_FALSE; - } else if (out_buf.s) { - smart_str_0(&out_buf); - RETVAL_STR(out_buf.s); - } else { - RETVAL_EMPTY_STRING(); + RETURN_FALSE; } + + RETURN_STR(smart_str_extract(&out_buf)); } /* }}} */ diff --git a/ext/session/session.c b/ext/session/session.c index 4f7dd5f5776..cbfbda1941b 100644 --- a/ext/session/session.c +++ b/ext/session/session.c @@ -1464,7 +1464,7 @@ PHPAPI zend_result php_session_reset_id(void) /* {{{ */ smart_str_0(&var); if (sid) { zval_ptr_dtor_str(sid); - ZVAL_NEW_STR(sid, var.s); + ZVAL_STR(sid, smart_str_extract(&var)); } else { REGISTER_STRINGL_CONSTANT("SID", ZSTR_VAL(var.s), ZSTR_LEN(var.s), 0); smart_str_free(&var); @@ -2362,8 +2362,7 @@ PHP_FUNCTION(session_create_id) php_error_docref(NULL, E_WARNING, "Failed to create new ID"); RETURN_FALSE; } - smart_str_0(&id); - RETVAL_NEW_STR(id.s); + RETVAL_STR(smart_str_extract(&id)); } /* }}} */ diff --git a/ext/soap/php_sdl.c b/ext/soap/php_sdl.c index c78a7de9bef..be42f9502d8 100644 --- a/ext/soap/php_sdl.c +++ b/ext/soap/php_sdl.c @@ -3261,8 +3261,7 @@ sdlPtr get_sdl(zval *this_ptr, char *uri, zend_long cache_wsdl) smart_str_appends(&proxy,Z_STRVAL_P(proxy_host)); smart_str_appends(&proxy,":"); smart_str_append_long(&proxy,Z_LVAL_P(proxy_port)); - smart_str_0(&proxy); - ZVAL_NEW_STR(&str_proxy, proxy.s); + ZVAL_STR(&str_proxy, smart_str_extract(&proxy)); if (!context) { context = php_stream_context_alloc(); @@ -3304,8 +3303,7 @@ sdlPtr get_sdl(zval *this_ptr, char *uri, zend_long cache_wsdl) http_context_headers(context, has_authorization, has_proxy_authorization, 0, &headers); } - smart_str_0(&headers); - ZVAL_NEW_STR(&str_headers, headers.s); + ZVAL_STR(&str_headers, smart_str_extract(&headers)); php_stream_context_set_option(context, "http", "header", &str_headers); zval_ptr_dtor(&str_headers); } diff --git a/ext/spl/spl_array.c b/ext/spl/spl_array.c index f4b1f8c4279..ddf85b58d1d 100644 --- a/ext/spl/spl_array.c +++ b/ext/spl/spl_array.c @@ -1554,7 +1554,7 @@ PHP_METHOD(ArrayObject, serialize) /* done */ PHP_VAR_SERIALIZE_DESTROY(var_hash); - RETURN_NEW_STR(buf.s); + RETURN_STR(smart_str_extract(&buf)); } /* }}} */ /* {{{ unserialize the object */ diff --git a/ext/spl/spl_dllist.c b/ext/spl/spl_dllist.c index 2c7534a0322..eda54bfc8a0 100644 --- a/ext/spl/spl_dllist.c +++ b/ext/spl/spl_dllist.c @@ -1032,12 +1032,10 @@ PHP_METHOD(SplDoublyLinkedList, serialize) current = next; } - smart_str_0(&buf); - /* done */ PHP_VAR_SERIALIZE_DESTROY(var_hash); - RETURN_NEW_STR(buf.s); + RETURN_STR(smart_str_extract(&buf)); } /* }}} */ /* {{{ Unserializes storage */ diff --git a/ext/spl/spl_observer.c b/ext/spl/spl_observer.c index 4c6cef6cb06..1e64d072099 100644 --- a/ext/spl/spl_observer.c +++ b/ext/spl/spl_observer.c @@ -808,7 +808,7 @@ PHP_METHOD(SplObjectStorage, serialize) /* done */ PHP_VAR_SERIALIZE_DESTROY(var_hash); - RETURN_NEW_STR(buf.s); + RETURN_STR(smart_str_extract(&buf)); } /* }}} */ /* {{{ Unserializes storage */ diff --git a/ext/standard/http.c b/ext/standard/http.c index e0e95d8d6d5..95e583343aa 100644 --- a/ext/standard/http.c +++ b/ext/standard/http.c @@ -238,12 +238,6 @@ PHP_FUNCTION(http_build_query) php_url_encode_hash_ex(HASH_OF(formdata), &formstr, prefix, prefix_len, NULL, 0, NULL, 0, (Z_TYPE_P(formdata) == IS_OBJECT ? formdata : NULL), arg_sep, (int)enc_type); - if (!formstr.s) { - RETURN_EMPTY_STRING(); - } - - smart_str_0(&formstr); - - RETURN_NEW_STR(formstr.s); + RETURN_STR(smart_str_extract(&formstr)); } /* }}} */ diff --git a/ext/standard/string.c b/ext/standard/string.c index aad6db427d4..37347c2bce5 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -2891,8 +2891,7 @@ static void php_strtr_array(zval *return_value, zend_string *input, HashTable *p if (result.s) { smart_str_appendl(&result, str + old_pos, slen - old_pos); - smart_str_0(&result); - RETVAL_NEW_STR(result.s); + RETVAL_STR(smart_str_extract(&result)); } else { smart_str_free(&result); RETVAL_STR_COPY(input); diff --git a/ext/standard/var.c b/ext/standard/var.c index 54f8657da80..9421a5bb74d 100644 --- a/ext/standard/var.c +++ b/ext/standard/var.c @@ -645,7 +645,7 @@ PHP_FUNCTION(var_export) smart_str_0 (&buf); if (return_output) { - RETURN_NEW_STR(buf.s); + RETURN_STR(smart_str_extract(&buf)); } else { PHPWRITE(ZSTR_VAL(buf.s), ZSTR_LEN(buf.s)); smart_str_free(&buf); @@ -1318,11 +1318,7 @@ PHP_FUNCTION(serialize) RETURN_THROWS(); } - if (buf.s) { - RETURN_NEW_STR(buf.s); - } else { - RETURN_EMPTY_STRING(); - } + RETURN_STR(smart_str_extract(&buf)); } /* }}} */