Improve performance of WeakReference/WeakMap. Avoid hash collisions on pointers. (#7690)

Shift pointers by ZEND_MM_ALIGNMENT_LOG2
to avoid the noticeable performance degradation caused by hash table collisions.
in `EG(weakrefs)` and zend_weakmap->ht

On 64-bit platforms, pointers are usually aligned to at least 8 bytes,
so only one in 8 hash buckets were actually getting used.
(With the metadata needed to track allocations,
alignment might be at least 16 bytes in practice)

Address review comments, add optimization

Make it public for any extensions that need to work with EG(weakrefs)
for instrumentation, debugging, etc. (e.g. zend_test)

PHP 8.1 and previous releases would use the raw pointer value as a hash key instead.
This commit is contained in:
Tyson Andre 2021-11-27 19:52:30 -05:00 committed by GitHub
parent c4a1c0c18c
commit 7504cf189b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 89 additions and 48 deletions

View File

@ -35,8 +35,16 @@ typedef struct _zend_weakmap_iterator {
uint32_t ht_iter;
} zend_weakmap_iterator;
/* The EG(weakrefs) ht is a map from object address a tagged pointer, that may be one of
* zend_weakref*, zend_weakmap* or HashTable*. */
/* EG(weakrefs) is a map from a key corresponding to a zend_object pointer to all the WeakReference and/or WeakMap entries relating to that pointer.
*
* 1. For a single WeakReference,
* the HashTable's corresponding value's tag is a ZEND_WEAKREF_TAG_REF and the pointer is a singleton WeakReference instance (zend_weakref *) for that zend_object pointer (from WeakReference::create()).
* 2. For a single WeakMap, the HashTable's corresponding value's tag is a ZEND_WEAKREF_TAG_MAP and the pointer is a WeakMap instance (zend_weakmap *).
* 3. For multiple values associated with the same zend_object pointer, the HashTable entry's tag is a ZEND_WEAKREF_TAG_HT with a HashTable mapping
* tagged pointers of at most 1 WeakReference and 1 or more WeakMaps to the same tagged pointer.
*
* ZEND_MM_ALIGNED_OFFSET_LOG2 is at least 2 on supported architectures (pointers to the objects in question are aligned to 4 bytes (1<<2) even on 32-bit systems),
* i.e. the least two significant bits of the pointer can be used as a tag (ZEND_WEAKREF_TAG_*). */
#define ZEND_WEAKREF_TAG_REF 0
#define ZEND_WEAKREF_TAG_MAP 1
#define ZEND_WEAKREF_TAG_HT 2
@ -56,38 +64,40 @@ static zend_object_handlers zend_weakmap_handlers;
#define zend_weakmap_fetch(z) zend_weakmap_from(Z_OBJ_P(z))
static inline void zend_weakref_unref_single(
void *ptr, uintptr_t tag, zend_ulong obj_addr)
void *ptr, uintptr_t tag, zend_object *object)
{
if (tag == ZEND_WEAKREF_TAG_REF) {
/* Unreferencing WeakReference (at ptr) singleton that pointed to object. */
zend_weakref *wr = ptr;
wr->referent = NULL;
} else {
/* unreferencing WeakMap entry (at ptr) with a key of object. */
ZEND_ASSERT(tag == ZEND_WEAKREF_TAG_MAP);
zend_hash_index_del((HashTable *) ptr, obj_addr);
zend_hash_index_del((HashTable *) ptr, zend_object_to_weakref_key(object));
}
}
static void zend_weakref_unref(zend_ulong obj_addr, void *tagged_ptr) {
static void zend_weakref_unref(zend_object *object, void *tagged_ptr) {
void *ptr = ZEND_WEAKREF_GET_PTR(tagged_ptr);
uintptr_t tag = ZEND_WEAKREF_GET_TAG(tagged_ptr);
if (tag == ZEND_WEAKREF_TAG_HT) {
HashTable *ht = ptr;
ZEND_HASH_MAP_FOREACH_PTR(ht, tagged_ptr) {
zend_weakref_unref_single(
ZEND_WEAKREF_GET_PTR(tagged_ptr), ZEND_WEAKREF_GET_TAG(tagged_ptr), obj_addr);
ZEND_WEAKREF_GET_PTR(tagged_ptr), ZEND_WEAKREF_GET_TAG(tagged_ptr), object);
} ZEND_HASH_FOREACH_END();
zend_hash_destroy(ht);
FREE_HASHTABLE(ht);
} else {
zend_weakref_unref_single(ptr, tag, obj_addr);
zend_weakref_unref_single(ptr, tag, object);
}
}
static void zend_weakref_register(zend_object *object, void *payload) {
GC_ADD_FLAGS(object, IS_OBJ_WEAKLY_REFERENCED);
zend_ulong obj_addr = (zend_ulong) object;
zval *zv = zend_hash_index_lookup(&EG(weakrefs), obj_addr);
zend_ulong obj_key = zend_object_to_weakref_key(object);
zval *zv = zend_hash_index_lookup(&EG(weakrefs), obj_key);
if (Z_TYPE_P(zv) == IS_NULL) {
ZVAL_PTR(zv, payload);
return;
@ -105,25 +115,28 @@ static void zend_weakref_register(zend_object *object, void *payload) {
zend_hash_init(ht, 0, NULL, NULL, 0);
zend_hash_index_add_new_ptr(ht, (zend_ulong) tagged_ptr, tagged_ptr);
zend_hash_index_add_new_ptr(ht, (zend_ulong) payload, payload);
zend_hash_index_update_ptr(
&EG(weakrefs), obj_addr, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_HT));
/* Replace the single WeakMap or WeakReference entry in EG(weakrefs) with a HashTable with 2 entries in place. */
ZVAL_PTR(zv, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_HT));
}
static void zend_weakref_unregister(zend_object *object, void *payload, bool weakref_free) {
zend_ulong obj_addr = (zend_ulong) object;
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_addr);
zend_ulong obj_key = zend_object_to_weakref_key(object);
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_key);
ZEND_ASSERT(tagged_ptr && "Weakref not registered?");
void *ptr = ZEND_WEAKREF_GET_PTR(tagged_ptr);
uintptr_t tag = ZEND_WEAKREF_GET_TAG(tagged_ptr);
if (tag != ZEND_WEAKREF_TAG_HT) {
ZEND_ASSERT(tagged_ptr == payload);
zend_hash_index_del(&EG(weakrefs), obj_addr);
zend_hash_index_del(&EG(weakrefs), obj_key);
GC_DEL_FLAGS(object, IS_OBJ_WEAKLY_REFERENCED);
/* Do this last, as it may destroy the object. */
if (weakref_free) {
zend_weakref_unref_single(ptr, tag, obj_addr);
zend_weakref_unref_single(ptr, tag, object);
} else {
/* The optimization of skipping unref is only used in the destructor of WeakMap */
ZEND_ASSERT(ZEND_WEAKREF_GET_TAG(payload) == ZEND_WEAKREF_TAG_MAP);
}
return;
}
@ -139,18 +152,21 @@ static void zend_weakref_unregister(zend_object *object, void *payload, bool wea
GC_DEL_FLAGS(object, IS_OBJ_WEAKLY_REFERENCED);
zend_hash_destroy(ht);
FREE_HASHTABLE(ht);
zend_hash_index_del(&EG(weakrefs), obj_addr);
zend_hash_index_del(&EG(weakrefs), obj_key);
}
/* Do this last, as it may destroy the object. */
if (weakref_free) {
zend_weakref_unref_single(
ZEND_WEAKREF_GET_PTR(payload), ZEND_WEAKREF_GET_TAG(payload), obj_addr);
ZEND_WEAKREF_GET_PTR(payload), ZEND_WEAKREF_GET_TAG(payload), object);
} else {
/* The optimization of skipping unref is only used in the destructor of WeakMap */
ZEND_ASSERT(ZEND_WEAKREF_GET_TAG(payload) == ZEND_WEAKREF_TAG_MAP);
}
}
ZEND_API zval *zend_weakrefs_hash_add(HashTable *ht, zend_object *key, zval *pData) {
zval *zv = zend_hash_index_add(ht, (zend_ulong) key, pData);
zval *zv = zend_hash_index_add(ht, zend_object_to_weakref_key(key), pData);
if (zv) {
zend_weakref_register(key, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_MAP));
}
@ -158,7 +174,7 @@ ZEND_API zval *zend_weakrefs_hash_add(HashTable *ht, zend_object *key, zval *pDa
}
ZEND_API zend_result zend_weakrefs_hash_del(HashTable *ht, zend_object *key) {
zval *zv = zend_hash_index_find(ht, (zend_ulong) key);
zval *zv = zend_hash_index_find(ht, zend_object_to_weakref_key(key));
if (zv) {
zend_weakref_unregister(key, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_MAP), 1);
return SUCCESS;
@ -170,17 +186,19 @@ void zend_weakrefs_init(void) {
zend_hash_init(&EG(weakrefs), 8, NULL, NULL, 0);
}
/* This is called when the object is garbage collected
* to remove all WeakReference and WeakMap entries weakly referencing that object. */
void zend_weakrefs_notify(zend_object *object) {
/* Annoyingly we can't use the HT destructor here, because we need access to the key (which
* is the object address), which is not provided to the dtor. */
zend_ulong obj_addr = (zend_ulong) object;
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_addr);
const zend_ulong obj_key = zend_object_to_weakref_key(object);
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_key);
#if ZEND_DEBUG
ZEND_ASSERT(tagged_ptr && "Tracking of the IS_OBJ_WEAKLY_REFERENCE flag should be precise");
#endif
if (tagged_ptr) {
zend_weakref_unref(obj_addr, tagged_ptr);
zend_hash_index_del(&EG(weakrefs), obj_addr);
zend_weakref_unref(object, tagged_ptr);
zend_hash_index_del(&EG(weakrefs), obj_key);
}
}
@ -199,7 +217,7 @@ static zend_object* zend_weakref_new(zend_class_entry *ce) {
}
static zend_always_inline bool zend_weakref_find(zend_object *referent, zval *return_value) {
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), (zend_ulong) referent);
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), zend_object_to_weakref_key(referent));
if (!tagged_ptr) {
return 0;
}
@ -295,13 +313,13 @@ static zend_object *zend_weakmap_create_object(zend_class_entry *ce)
static void zend_weakmap_free_obj(zend_object *object)
{
zend_weakmap *wm = zend_weakmap_from(object);
zend_ulong obj_addr;
ZEND_HASH_MAP_FOREACH_NUM_KEY(&wm->ht, obj_addr) {
zend_ulong obj_key;
ZEND_HASH_MAP_FOREACH_NUM_KEY(&wm->ht, obj_key) {
/* Optimization: Don't call zend_weakref_unref_single to free individual entries from wm->ht when unregistering (which would do a hash table lookup, call zend_hash_index_del, and skip over any bucket collisions).
* Let freeing the corresponding values for WeakMap entries be done in zend_hash_destroy, freeing objects sequentially.
* The performance difference is notable for larger WeakMaps with worse cache locality. */
zend_weakref_unregister(
(zend_object *) obj_addr, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 0);
zend_weakref_key_to_object(obj_key), ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 0);
} ZEND_HASH_FOREACH_END();
zend_hash_destroy(&wm->ht);
zend_object_std_dtor(&wm->std);
@ -320,12 +338,12 @@ static zval *zend_weakmap_read_dimension(zend_object *object, zval *offset, int
}
zend_weakmap *wm = zend_weakmap_from(object);
zend_object *obj_key = Z_OBJ_P(offset);
zval *zv = zend_hash_index_find(&wm->ht, (zend_ulong) obj_key);
zend_object *obj_addr = Z_OBJ_P(offset);
zval *zv = zend_hash_index_find(&wm->ht, zend_object_to_weakref_key(obj_addr));
if (zv == NULL) {
if (type != BP_VAR_IS) {
zend_throw_error(NULL,
"Object %s#%d not contained in WeakMap", ZSTR_VAL(obj_key->ce->name), obj_key->handle);
"Object %s#%d not contained in WeakMap", ZSTR_VAL(obj_addr->ce->name), obj_addr->handle);
return NULL;
}
return NULL;
@ -350,10 +368,11 @@ static void zend_weakmap_write_dimension(zend_object *object, zval *offset, zval
}
zend_weakmap *wm = zend_weakmap_from(object);
zend_object *obj_key = Z_OBJ_P(offset);
zend_object *obj_addr = Z_OBJ_P(offset);
zend_ulong obj_key = zend_object_to_weakref_key(obj_addr);
Z_TRY_ADDREF_P(value);
zval *zv = zend_hash_index_find(&wm->ht, (zend_ulong) obj_key);
zval *zv = zend_hash_index_find(&wm->ht, obj_key);
if (zv) {
/* Because the destructors can have side effects such as resizing or rehashing the WeakMap storage,
* free the zval only after overwriting the original value. */
@ -364,8 +383,8 @@ static void zend_weakmap_write_dimension(zend_object *object, zval *offset, zval
return;
}
zend_weakref_register(obj_key, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP));
zend_hash_index_add_new(&wm->ht, (zend_ulong) obj_key, value);
zend_weakref_register(obj_addr, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP));
zend_hash_index_add_new(&wm->ht, obj_key, value);
}
/* int return and check_empty due to Object Handler API */
@ -377,7 +396,7 @@ static int zend_weakmap_has_dimension(zend_object *object, zval *offset, int che
}
zend_weakmap *wm = zend_weakmap_from(object);
zval *zv = zend_hash_index_find(&wm->ht, (zend_ulong) Z_OBJ_P(offset));
zval *zv = zend_hash_index_find(&wm->ht, zend_object_to_weakref_key(Z_OBJ_P(offset)));
if (!zv) {
return 0;
}
@ -396,13 +415,13 @@ static void zend_weakmap_unset_dimension(zend_object *object, zval *offset)
}
zend_weakmap *wm = zend_weakmap_from(object);
zend_object *obj_key = Z_OBJ_P(offset);
if (!zend_hash_index_exists(&wm->ht, (zend_ulong) Z_OBJ_P(offset))) {
zend_object *obj_addr = Z_OBJ_P(offset);
if (!zend_hash_index_exists(&wm->ht, zend_object_to_weakref_key(obj_addr))) {
/* Object not in WeakMap, do nothing. */
return;
}
zend_weakref_unregister(obj_key, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 1);
zend_weakref_unregister(obj_addr, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 1);
}
static int zend_weakmap_count_elements(zend_object *object, zend_long *count)
@ -423,10 +442,10 @@ static HashTable *zend_weakmap_get_properties_for(zend_object *object, zend_prop
ALLOC_HASHTABLE(ht);
zend_hash_init(ht, zend_hash_num_elements(&wm->ht), NULL, ZVAL_PTR_DTOR, 0);
zend_ulong obj_addr;
zend_ulong obj_key;
zval *val;
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&wm->ht, obj_addr, val) {
zend_object *obj = (zend_object*)obj_addr;
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&wm->ht, obj_key, val) {
zend_object *obj = zend_weakref_key_to_object(obj_key);
zval pair;
array_init(&pair);
@ -460,11 +479,11 @@ static zend_object *zend_weakmap_clone_obj(zend_object *old_object)
zend_weakmap *new_wm = zend_weakmap_from(new_object);
zend_hash_copy(&new_wm->ht, &old_wm->ht, NULL);
zend_ulong obj_addr;
zend_ulong obj_key;
zval *val;
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&new_wm->ht, obj_addr, val) {
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&new_wm->ht, obj_key, val) {
zend_weakref_register(
(zend_object *) obj_addr, ZEND_WEAKREF_ENCODE(new_wm, ZEND_WEAKREF_TAG_MAP));
zend_weakref_key_to_object(obj_key), ZEND_WEAKREF_ENCODE(new_wm, ZEND_WEAKREF_TAG_MAP));
zval_add_ref(val);
} ZEND_HASH_FOREACH_END();
return new_object;
@ -511,7 +530,7 @@ static void zend_weakmap_iterator_get_current_key(zend_object_iterator *obj_iter
ZEND_ASSERT(0 && "Must have integer key");
}
ZVAL_OBJ_COPY(key, (zend_object *) num_key);
ZVAL_OBJ_COPY(key, zend_weakref_key_to_object(num_key));
}
static void zend_weakmap_iterator_move_forward(zend_object_iterator *obj_iter)

View File

@ -17,6 +17,8 @@
#ifndef ZEND_WEAKREFS_H
#define ZEND_WEAKREFS_H
#include "zend_alloc.h"
BEGIN_EXTERN_C()
extern ZEND_API zend_class_entry *zend_ce_weakref;
@ -40,6 +42,26 @@ static zend_always_inline void *zend_weakrefs_hash_add_ptr(HashTable *ht, zend_o
}
}
/* Because php uses the raw numbers as a hash function, raw pointers will lead to hash collisions.
* We have a guarantee that the lowest ZEND_MM_ALIGNED_OFFSET_LOG2 bits of a pointer are zero.
*
* E.g. On most 64-bit platforms, pointers are aligned to 8 bytes, so the least significant 3 bits are always 0 and can be discarded.
*
* NOTE: This function is only used for EG(weakrefs) and zend_weakmap->ht.
* It is not used for the HashTable instances associated with ZEND_WEAKREF_TAG_HT tags (created in zend_weakref_register, which uses ZEND_WEAKREF_ENCODE instead).
* The ZEND_WEAKREF_TAG_HT instances are used to disambiguate between multiple weak references to the same zend_object.
*/
static zend_always_inline zend_ulong zend_object_to_weakref_key(const zend_object *object)
{
ZEND_ASSERT(((uintptr_t)object) % ZEND_MM_ALIGNMENT == 0);
return ((uintptr_t) object) >> ZEND_MM_ALIGNMENT_LOG2;
}
static zend_always_inline zend_object *zend_weakref_key_to_object(zend_ulong key)
{
return (zend_object *) (((uintptr_t) key) << ZEND_MM_ALIGNMENT_LOG2);
}
END_EXTERN_C()
#endif

View File

@ -509,9 +509,9 @@ PHP_RINIT_FUNCTION(zend_test)
PHP_RSHUTDOWN_FUNCTION(zend_test)
{
zend_ulong objptr;
ZEND_HASH_FOREACH_NUM_KEY(&ZT_G(global_weakmap), objptr) {
zend_weakrefs_hash_del(&ZT_G(global_weakmap), (zend_object *)(uintptr_t)objptr);
zend_ulong obj_key;
ZEND_HASH_FOREACH_NUM_KEY(&ZT_G(global_weakmap), obj_key) {
zend_weakrefs_hash_del(&ZT_G(global_weakmap), zend_weakref_key_to_object(obj_key));
} ZEND_HASH_FOREACH_END();
zend_hash_destroy(&ZT_G(global_weakmap));
return SUCCESS;