diff --git a/src/server.h b/src/server.h index bced3dbb8..af6847f13 100644 --- a/src/server.h +++ b/src/server.h @@ -3069,7 +3069,7 @@ void dismissMemoryInChild(void); int restartServer(int flags, mstime_t delay); /* Set data type */ -robj *setTypeCreate(sds value); +robj *setTypeCreate(sds value, size_t size_hint); int setTypeAdd(robj *subject, sds value); int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds); int setTypeRemove(robj *subject, sds value); diff --git a/src/t_hash.c b/src/t_hash.c index 6005f4442..f7d5af649 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -43,6 +43,16 @@ void hashTypeTryConversion(robj *o, robj **argv, int start, int end) { if (o->encoding != OBJ_ENCODING_LISTPACK) return; + /* We guess that most of the values in the input are unique, so + * if there are enough arguments we create a pre-sized hash, which + * might overallocate memory if their are duplicates. */ + size_t new_fields = (end - start + 1) / 2; + if (new_fields > server.hash_max_listpack_entries) { + hashTypeConvert(o, OBJ_ENCODING_HT); + dictExpand(o->ptr, new_fields); + return; + } + for (i = start; i <= end; i++) { if (!sdsEncodedObject(argv[i])) continue; diff --git a/src/t_set.c b/src/t_set.c index ba32ae51c..56ad95a53 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -39,11 +39,31 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, /* Factory method to return a set that *can* hold "value". When the object has * an integer-encodable value, an intset will be returned. Otherwise a regular - * hash table. */ -robj *setTypeCreate(sds value) { - if (isSdsRepresentableAsLongLong(value,NULL) == C_OK) + * hash table. + * + * The size hint indicates approximately how many items will be added which is + * used to determine the initial representation. */ +robj *setTypeCreate(sds value, size_t size_hint) { + if (isSdsRepresentableAsLongLong(value,NULL) == C_OK && size_hint < server.set_max_intset_entries) return createIntsetObject(); - return createSetListpackObject(); + if (size_hint < server.set_max_listpack_entries) + return createSetListpackObject(); + + /* We may oversize the set by using the hint if the hint is not accurate, + * but we will assume this is accpetable to maximize performance. */ + robj *o = createSetObject(); + dictExpand(o->ptr, size_hint); + return o; +} + +/* Check if the existing set should be converted to another encoding based off the + * the size hint. */ +void setTypeMaybeConvert(robj *set, size_t size_hint) { + if ((set->encoding == OBJ_ENCODING_LISTPACK && size_hint >= server.set_max_listpack_entries) + || (set->encoding == OBJ_ENCODING_INTSET && size_hint >= server.set_max_intset_entries)) + { + setTypeConvertAndExpand(set, OBJ_ENCODING_HT, size_hint, 1); + } } /* Return the maximum number of entries to store in an intset. */ @@ -590,8 +610,10 @@ void saddCommand(client *c) { if (checkType(c,set,OBJ_SET)) return; if (set == NULL) { - set = setTypeCreate(c->argv[2]->ptr); + set = setTypeCreate(c->argv[2]->ptr, c->argc - 2); dbAdd(c->db,c->argv[1],set); + } else { + setTypeMaybeConvert(set, c->argc - 2); } for (j = 2; j < c->argc; j++) { @@ -672,7 +694,7 @@ void smoveCommand(client *c) { /* Create the destination set when it doesn't exist */ if (!dstset) { - dstset = setTypeCreate(ele->ptr); + dstset = setTypeCreate(ele->ptr, 1); dbAdd(c->db,c->argv[2],dstset); }