Minor performance improvement to SADD and HSET (#12019)

For sets and hashes that will eventually be stored as the hash encoding, it's much faster to immediately convert them to their hash encoding and then perform the insertions since it avoids the O(N) search and frequent reallocations. This change checks the number of arguments in the incoming command, and converts the data-structure if the number of new entries exceeds the listpack-max-entries configuration. This can cause us to over-allocate memory if their are duplicate entries in the input, which is unexpected.

unstable

Summary:
  throughput summary: 805.54 requests per second
  latency summary (msec):
          avg       min       p50       p95       p99       max
       61.908    25.680    68.351    73.279    75.967    79.295
hset-improvement

Summary:
  throughput summary: 4701.46 requests per second
  latency summary (msec):
          avg       min       p50       p95       p99       max
       10.546     0.832    11.959    12.471    13.119    14.967
This commit is contained in:
Madelyn Olson 2023-05-08 16:11:20 -07:00 committed by GitHub
parent 51af17e3cf
commit a129a60181
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 39 additions and 7 deletions

View File

@ -3069,7 +3069,7 @@ void dismissMemoryInChild(void);
int restartServer(int flags, mstime_t delay);
/* Set data type */
robj *setTypeCreate(sds value);
robj *setTypeCreate(sds value, size_t size_hint);
int setTypeAdd(robj *subject, sds value);
int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
int setTypeRemove(robj *subject, sds value);

View File

@ -43,6 +43,16 @@ void hashTypeTryConversion(robj *o, robj **argv, int start, int end) {
if (o->encoding != OBJ_ENCODING_LISTPACK) return;
/* We guess that most of the values in the input are unique, so
* if there are enough arguments we create a pre-sized hash, which
* might overallocate memory if their are duplicates. */
size_t new_fields = (end - start + 1) / 2;
if (new_fields > server.hash_max_listpack_entries) {
hashTypeConvert(o, OBJ_ENCODING_HT);
dictExpand(o->ptr, new_fields);
return;
}
for (i = start; i <= end; i++) {
if (!sdsEncodedObject(argv[i]))
continue;

View File

@ -39,11 +39,31 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
/* Factory method to return a set that *can* hold "value". When the object has
* an integer-encodable value, an intset will be returned. Otherwise a regular
* hash table. */
robj *setTypeCreate(sds value) {
if (isSdsRepresentableAsLongLong(value,NULL) == C_OK)
* hash table.
*
* The size hint indicates approximately how many items will be added which is
* used to determine the initial representation. */
robj *setTypeCreate(sds value, size_t size_hint) {
if (isSdsRepresentableAsLongLong(value,NULL) == C_OK && size_hint < server.set_max_intset_entries)
return createIntsetObject();
return createSetListpackObject();
if (size_hint < server.set_max_listpack_entries)
return createSetListpackObject();
/* We may oversize the set by using the hint if the hint is not accurate,
* but we will assume this is accpetable to maximize performance. */
robj *o = createSetObject();
dictExpand(o->ptr, size_hint);
return o;
}
/* Check if the existing set should be converted to another encoding based off the
* the size hint. */
void setTypeMaybeConvert(robj *set, size_t size_hint) {
if ((set->encoding == OBJ_ENCODING_LISTPACK && size_hint >= server.set_max_listpack_entries)
|| (set->encoding == OBJ_ENCODING_INTSET && size_hint >= server.set_max_intset_entries))
{
setTypeConvertAndExpand(set, OBJ_ENCODING_HT, size_hint, 1);
}
}
/* Return the maximum number of entries to store in an intset. */
@ -590,8 +610,10 @@ void saddCommand(client *c) {
if (checkType(c,set,OBJ_SET)) return;
if (set == NULL) {
set = setTypeCreate(c->argv[2]->ptr);
set = setTypeCreate(c->argv[2]->ptr, c->argc - 2);
dbAdd(c->db,c->argv[1],set);
} else {
setTypeMaybeConvert(set, c->argc - 2);
}
for (j = 2; j < c->argc; j++) {
@ -672,7 +694,7 @@ void smoveCommand(client *c) {
/* Create the destination set when it doesn't exist */
if (!dstset) {
dstset = setTypeCreate(ele->ptr);
dstset = setTypeCreate(ele->ptr, 1);
dbAdd(c->db,c->argv[2],dstset);
}