From 1bd8a084354138191871101dc10dfc420ec87c66 Mon Sep 17 00:00:00 2001 From: jamespedwards42 Date: Sun, 29 May 2016 15:53:24 -0700 Subject: [PATCH 0001/1722] Fix modules intro typos. --- src/modules/INTRO.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/modules/INTRO.md b/src/modules/INTRO.md index c64a50078..44c5510eb 100644 --- a/src/modules/INTRO.md +++ b/src/modules/INTRO.md @@ -162,7 +162,7 @@ There are a few functions in order to work with string objects: const char *RedisModule_StringPtrLen(RedisModuleString *string, size_t *len); -The above function accesses a string by returning its pointer and setting its +The above function accesses a string by returning its pointer and setting its length in `len`. You should never write to a string object pointer, as you can see from the `const` pointer qualifier. @@ -344,7 +344,7 @@ section). # Releasing call reply objects -Reply objects must be freed using `RedisModule_FreeCallRelpy`. For arrays, +Reply objects must be freed using `RedisModule_FreeCallReply`. For arrays, you need to free only the top level reply, not the nested replies. Currently the module implementation provides a protection in order to avoid crashing if you free a nested reply object for error, however this feature @@ -623,7 +623,7 @@ access) for speed. The API will return a pointer and a length, so that's possible to access and, if needed, modify the string directly. size_t len, j; - char *myptr = RedisModule_StringDMA(key,REDISMODULE_WRITE,&len); + char *myptr = RedisModule_StringDMA(key,&len,REDISMODULE_WRITE); for (j = 0; j < len; j++) myptr[j] = 'A'; In the above example we write directly on the string. Note that if you want @@ -783,4 +783,3 @@ Documentation missing, please check the following functions inside `module.c`: RedisModule_IsKeysPositionRequest(ctx); RedisModule_KeyAtPos(ctx,pos); - From 343b54cdbec837fff68847ccc5195cc09e38d049 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 Jun 2016 17:15:18 +0200 Subject: [PATCH 0002/1722] Test TOUCH and new TTL / TYPE behavior about object access time. --- tests/test_helper.tcl | 1 + tests/unit/introspection-2.tcl | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 tests/unit/introspection-2.tcl diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index d31829480..5f114c5dc 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -46,6 +46,7 @@ set ::all_tests { unit/scripting unit/maxmemory unit/introspection + unit/introspection-2 unit/limits unit/obuf-limits unit/bitops diff --git a/tests/unit/introspection-2.tcl b/tests/unit/introspection-2.tcl new file mode 100644 index 000000000..350a8a016 --- /dev/null +++ b/tests/unit/introspection-2.tcl @@ -0,0 +1,23 @@ +start_server {tags {"introspection"}} { + test {TTL and TYPYE do not alter the last access time of a key} { + r set foo bar + after 3000 + r ttl foo + r type foo + assert {[r object idletime foo] >= 2} + } + + test {TOUCH alters the last access time of a key} { + r set foo bar + after 3000 + r touch foo + assert {[r object idletime foo] < 2} + } + + test {TOUCH returns the number of existing keys specified} { + r flushdb + r set key1 1 + r set key2 2 + r touch key0 key1 key2 key3 + } 2 +} From 4f2bf52409d5b692f23534bd05665cfb7003f108 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 16 Jun 2016 12:54:33 +0200 Subject: [PATCH 0003/1722] Minor aesthetic fixes to PR #3264. Comment format fixed + local var modified from camel case to underscore separators as Redis code base normally does (camel case is mostly used for global symbols like structure names, function names, global vars, ...). --- src/bitops.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/bitops.c b/src/bitops.c index 9ae52c81b..302e811d2 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -907,7 +907,7 @@ void bitfieldCommand(client *c) { struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */ int owtype = BFOVERFLOW_WRAP; /* Overflow type. */ int readonly = 1; - long highestWriteOffset = 0; + long higest_write_offset = 0; for (j = 2; j < c->argc; j++) { int remargs = c->argc-j-1; /* Remaining args other than current. */ @@ -957,7 +957,7 @@ void bitfieldCommand(client *c) { if (opcode != BITFIELDOP_GET) { readonly = 0; - highestWriteOffset = bitoffset + bits - 1; + higest_write_offset = bitoffset + bits - 1; /* INCRBY and SET require another argument. */ if (getLongLongFromObjectOrReply(c,c->argv[j+3],&i64,NULL) != C_OK){ zfree(ops); @@ -979,15 +979,15 @@ void bitfieldCommand(client *c) { } if (readonly) { - /* Lookup for read is ok if key doesn't exit, but errors - * if it's not a string*/ + /* Lookup for read is ok if key doesn't exit, but errors + * if it's not a string. */ o = lookupKeyRead(c->db,c->argv[1]); if (o != NULL && checkType(c,o,OBJ_STRING)) return; } else { /* Lookup by making room up to the farest bit reached by * this operation. */ if ((o = lookupStringForBitCommand(c, - highestWriteOffset)) == NULL) return; + higest_write_offset)) == NULL) return; } addReplyMultiBulkLen(c,numops); From 975c5bc7fea810085f11c25efecfc04f349ae795 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 16 Jun 2016 15:53:57 +0200 Subject: [PATCH 0004/1722] RESTORE: accept RDB dumps with older versions. Reference issue #3218. Checking the code I can't find a reason why the original RESTORE code was so opinionated about restoring only the current version. The code in to `rdb.c` appears to be capable as always to restore data from older versions of Redis, and the only places where it is needed the current version in order to correctly restore data, is while loading the opcodes, not the values itself as it happens in the case of RESTORE. For the above reasons, this commit enables RESTORE to accept older versions of values payloads. --- src/cluster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index 1f19db3e4..9289f6782 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4535,7 +4535,7 @@ int verifyDumpPayload(unsigned char *p, size_t len) { /* Verify RDB version */ rdbver = (footer[1] << 8) | footer[0]; - if (rdbver != RDB_VERSION) return C_ERR; + if (rdbver > RDB_VERSION) return C_ERR; /* Verify CRC64 */ crc = crc64(0,p,len-8); From 1f0bd013026657a498e4b889a56568af22b93f10 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 16 Jun 2016 17:23:31 +0200 Subject: [PATCH 0005/1722] redis-cli: really connect to the right server. I recently introduced populating the autocomplete help array with the COMMAND command if available. However this was performed before parsing the arguments, defaulting to instance 6379. After the connection is performed it remains stable. The effect is that if there is an instance running on port 6339, whatever port you specify is ignored and 6379 is connected to instead. The right port will be selected only after a reconnection. Close #3314. --- src/redis-cli.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 027a2658c..17fb53394 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2591,13 +2591,16 @@ int main(int argc, char **argv) { else config.output = OUTPUT_STANDARD; config.mb_delim = sdsnew("\n"); - cliInitHelp(); - cliIntegrateHelp(); firstarg = parseOptions(argc,argv); argc -= firstarg; argv += firstarg; + /* Initialize the help and, if possible, use the COMMAND command in order + * to retrieve missing entries. */ + cliInitHelp(); + cliIntegrateHelp(); + /* Latency mode */ if (config.latency_mode) { if (cliConnect(0) == REDIS_ERR) exit(1); From 9d450c4e7c6e17af6d5281a19263493ffe815553 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 16 Jun 2016 19:24:34 +0200 Subject: [PATCH 0006/1722] Fix Sentinel pending commands counting. This bug most experienced effect was an inability of Redis to reconfigure back old masters to slaves after they are reachable again after a failover. This was due to failing to reset the count of the pending commands properly, so the master appeared fovever down. Was introduced in Redis 3.2 new Sentinel connection sharing feature which is a lot more complex than the 3.0 code, but more scalable. Many thanks to people reporting the issue, and especially to @sskorgal for investigating the issue in depth. Hopefully closes #3285. --- src/sentinel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sentinel.c b/src/sentinel.c index 6c48f3ed9..f8ebd0c6f 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -1910,6 +1910,7 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) { link->cc->errstr); instanceLinkCloseConnection(link,link->cc); } else { + link->pending_commands = 0; link->cc_conn_time = mstime(); link->cc->data = link; redisAeAttach(server.el,link->cc); From 42a2e12656ce949fb6db52f964a2218750c2e296 Mon Sep 17 00:00:00 2001 From: Misha Nasledov Date: Thu, 16 Jun 2016 16:50:53 -0700 Subject: [PATCH 0007/1722] Fix incorrect comment for checkForSentinelMode function --- src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index e2a636258..06244081f 100644 --- a/src/server.c +++ b/src/server.c @@ -3825,7 +3825,7 @@ void setupSignalHandlers(void) { void memtest(size_t megabytes, int passes); /* Returns 1 if there is --sentinel among the arguments or if - * argv[0] is exactly "redis-sentinel". */ + * argv[0] contains "redis-sentinel". */ int checkForSentinelMode(int argc, char **argv) { int j; From ce9d66d6e4103e9b69c71e61be08065c4f332f9d Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 20 Jun 2016 23:08:06 +0300 Subject: [PATCH 0008/1722] Use const in Redis Module API where possible. --- src/debug.c | 14 +++++++------- src/intset.c | 2 +- src/intset.h | 2 +- src/module.c | 6 +++--- src/object.c | 4 ++-- src/quicklist.c | 2 +- src/quicklist.h | 2 +- src/redismodule.h | 6 +++--- src/server.h | 22 +++++++++++----------- src/t_hash.c | 4 ++-- src/t_list.c | 2 +- src/t_set.c | 6 +++--- src/t_zset.c | 4 ++-- 13 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/debug.c b/src/debug.c index 1e179caff..f3e109479 100644 --- a/src/debug.c +++ b/src/debug.c @@ -550,7 +550,7 @@ void debugCommand(client *c) { /* =========================== Crash handling ============================== */ -void _serverAssert(char *estr, char *file, int line) { +void _serverAssert(const char *estr, const char *file, int line) { bugReportStart(); serverLog(LL_WARNING,"=== ASSERTION FAILED ==="); serverLog(LL_WARNING,"==> %s:%d '%s' is not true",file,line,estr); @@ -563,7 +563,7 @@ void _serverAssert(char *estr, char *file, int line) { *((char*)-1) = 'x'; } -void _serverAssertPrintClientInfo(client *c) { +void _serverAssertPrintClientInfo(const client *c) { int j; bugReportStart(); @@ -587,7 +587,7 @@ void _serverAssertPrintClientInfo(client *c) { } } -void serverLogObjectDebugInfo(robj *o) { +void serverLogObjectDebugInfo(const robj *o) { serverLog(LL_WARNING,"Object type: %d", o->type); serverLog(LL_WARNING,"Object encoding: %d", o->encoding); serverLog(LL_WARNING,"Object refcount: %d", o->refcount); @@ -607,23 +607,23 @@ void serverLogObjectDebugInfo(robj *o) { } else if (o->type == OBJ_ZSET) { serverLog(LL_WARNING,"Sorted set size: %d", (int) zsetLength(o)); if (o->encoding == OBJ_ENCODING_SKIPLIST) - serverLog(LL_WARNING,"Skiplist level: %d", (int) ((zset*)o->ptr)->zsl->level); + serverLog(LL_WARNING,"Skiplist level: %d", (int) ((const zset*)o->ptr)->zsl->level); } } -void _serverAssertPrintObject(robj *o) { +void _serverAssertPrintObject(const robj *o) { bugReportStart(); serverLog(LL_WARNING,"=== ASSERTION FAILED OBJECT CONTEXT ==="); serverLogObjectDebugInfo(o); } -void _serverAssertWithInfo(client *c, robj *o, char *estr, char *file, int line) { +void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line) { if (c) _serverAssertPrintClientInfo(c); if (o) _serverAssertPrintObject(o); _serverAssert(estr,file,line); } -void _serverPanic(char *msg, char *file, int line) { +void _serverPanic(const char *msg, const char *file, int line) { bugReportStart(); serverLog(LL_WARNING,"------------------------------------------------"); serverLog(LL_WARNING,"!!! Software Failure. Press left mouse button to continue"); diff --git a/src/intset.c b/src/intset.c index b0a597fc7..30ea85344 100644 --- a/src/intset.c +++ b/src/intset.c @@ -272,7 +272,7 @@ uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) { } /* Return intset length */ -uint32_t intsetLen(intset *is) { +uint32_t intsetLen(const intset *is) { return intrev32ifbe(is->length); } diff --git a/src/intset.h b/src/intset.h index 30a854f89..8119e6636 100644 --- a/src/intset.h +++ b/src/intset.h @@ -44,7 +44,7 @@ intset *intsetRemove(intset *is, int64_t value, int *success); uint8_t intsetFind(intset *is, int64_t value); int64_t intsetRandom(intset *is); uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value); -uint32_t intsetLen(intset *is); +uint32_t intsetLen(const intset *is); size_t intsetBlobLen(intset *is); #ifdef REDIS_TEST diff --git a/src/module.c b/src/module.c index 54f279075..65063338d 100644 --- a/src/module.c +++ b/src/module.c @@ -687,7 +687,7 @@ void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) { /* Given a string module object, this function returns the string pointer * and length of the string. The returned pointer and length should only * be used for read only accesses and never modified. */ -const char *RM_StringPtrLen(RedisModuleString *str, size_t *len) { +const char *RM_StringPtrLen(const RedisModuleString *str, size_t *len) { if (len) *len = sdslen(str->ptr); return str->ptr; } @@ -696,7 +696,7 @@ const char *RM_StringPtrLen(RedisModuleString *str, size_t *len) { * Returns REDISMODULE_OK on success. If the string can't be parsed * as a valid, strict long long (no spaces before/after), REDISMODULE_ERR * is returned. */ -int RM_StringToLongLong(RedisModuleString *str, long long *ll) { +int RM_StringToLongLong(const RedisModuleString *str, long long *ll) { return string2ll(str->ptr,sdslen(str->ptr),ll) ? REDISMODULE_OK : REDISMODULE_ERR; } @@ -704,7 +704,7 @@ int RM_StringToLongLong(RedisModuleString *str, long long *ll) { /* Convert the string into a double, storing it at `*d`. * Returns REDISMODULE_OK on success or REDISMODULE_ERR if the string is * not a valid string representation of a double value. */ -int RM_StringToDouble(RedisModuleString *str, double *d) { +int RM_StringToDouble(const RedisModuleString *str, double *d) { int retval = getDoubleFromObject(str,d); return (retval == C_OK) ? REDISMODULE_OK : REDISMODULE_ERR; } diff --git a/src/object.c b/src/object.c index b9e5667ef..9d1d4b7e9 100644 --- a/src/object.c +++ b/src/object.c @@ -539,7 +539,7 @@ size_t stringObjectLen(robj *o) { } } -int getDoubleFromObject(robj *o, double *target) { +int getDoubleFromObject(const robj *o, double *target) { double value; char *eptr; @@ -550,7 +550,7 @@ int getDoubleFromObject(robj *o, double *target) { if (sdsEncodedObject(o)) { errno = 0; value = strtod(o->ptr, &eptr); - if (isspace(((char*)o->ptr)[0]) || + if (isspace(((const char*)o->ptr)[0]) || eptr[0] != '\0' || (errno == ERANGE && (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) || diff --git a/src/quicklist.c b/src/quicklist.c index be02e3276..adf9ba1de 100644 --- a/src/quicklist.c +++ b/src/quicklist.c @@ -149,7 +149,7 @@ REDIS_STATIC quicklistNode *quicklistCreateNode(void) { } /* Return cached quicklist count */ -unsigned int quicklistCount(quicklist *ql) { return ql->count; } +unsigned int quicklistCount(const quicklist *ql) { return ql->count; } /* Free entire quicklist. */ void quicklistRelease(quicklist *quicklist) { diff --git a/src/quicklist.h b/src/quicklist.h index e040368e5..8f3875900 100644 --- a/src/quicklist.h +++ b/src/quicklist.h @@ -154,7 +154,7 @@ int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data, void *(*saver)(unsigned char *data, unsigned int sz)); int quicklistPop(quicklist *quicklist, int where, unsigned char **data, unsigned int *sz, long long *slong); -unsigned int quicklistCount(quicklist *ql); +unsigned int quicklistCount(const quicklist *ql); int quicklistCompare(unsigned char *p1, unsigned char *p2, int p2_len); size_t quicklistGetLzf(const quicklistNode *node, void **data); diff --git a/src/redismodule.h b/src/redismodule.h index 618b39e49..6151e9fea 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -123,7 +123,7 @@ RedisModuleCallReply *REDISMODULE_API_FUNC(RedisModule_CallReplyArrayElement)(Re RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateString)(RedisModuleCtx *ctx, const char *ptr, size_t len); RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromLongLong)(RedisModuleCtx *ctx, long long ll); void REDISMODULE_API_FUNC(RedisModule_FreeString)(RedisModuleCtx *ctx, RedisModuleString *str); -const char *REDISMODULE_API_FUNC(RedisModule_StringPtrLen)(RedisModuleString *str, size_t *len); +const char *REDISMODULE_API_FUNC(RedisModule_StringPtrLen)(const RedisModuleString *str, size_t *len); int REDISMODULE_API_FUNC(RedisModule_ReplyWithError)(RedisModuleCtx *ctx, const char *err); int REDISMODULE_API_FUNC(RedisModule_ReplyWithSimpleString)(RedisModuleCtx *ctx, const char *msg); int REDISMODULE_API_FUNC(RedisModule_ReplyWithArray)(RedisModuleCtx *ctx, long len); @@ -133,8 +133,8 @@ int REDISMODULE_API_FUNC(RedisModule_ReplyWithString)(RedisModuleCtx *ctx, Redis int REDISMODULE_API_FUNC(RedisModule_ReplyWithNull)(RedisModuleCtx *ctx); int REDISMODULE_API_FUNC(RedisModule_ReplyWithDouble)(RedisModuleCtx *ctx, double d); int REDISMODULE_API_FUNC(RedisModule_ReplyWithCallReply)(RedisModuleCtx *ctx, RedisModuleCallReply *reply); -int REDISMODULE_API_FUNC(RedisModule_StringToLongLong)(RedisModuleString *str, long long *ll); -int REDISMODULE_API_FUNC(RedisModule_StringToDouble)(RedisModuleString *str, double *d); +int REDISMODULE_API_FUNC(RedisModule_StringToLongLong)(const RedisModuleString *str, long long *ll); +int REDISMODULE_API_FUNC(RedisModule_StringToDouble)(const RedisModuleString *str, double *d); void REDISMODULE_API_FUNC(RedisModule_AutoMemory)(RedisModuleCtx *ctx); int REDISMODULE_API_FUNC(RedisModule_Replicate)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...); int REDISMODULE_API_FUNC(RedisModule_ReplicateVerbatim)(RedisModuleCtx *ctx); diff --git a/src/server.h b/src/server.h index 10fbf3237..cd5ac77f3 100644 --- a/src/server.h +++ b/src/server.h @@ -1048,8 +1048,8 @@ struct redisServer { long long latency_monitor_threshold; dict *latency_events; /* Assert & bug reporting */ - char *assert_failed; - char *assert_file; + const char *assert_failed; + const char *assert_file; int assert_line; int bug_report_start; /* True if bug report header was already logged. */ int watchdog_period; /* Software watchdog period in ms. 0 = off */ @@ -1245,7 +1245,7 @@ void addReplyStatusFormat(client *c, const char *fmt, ...); void listTypeTryConversion(robj *subject, robj *value); void listTypePush(robj *subject, robj *value, int where); robj *listTypePop(robj *subject, int where); -unsigned long listTypeLength(robj *subject); +unsigned long listTypeLength(const robj *subject); listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction); void listTypeReleaseIterator(listTypeIterator *li); int listTypeNext(listTypeIterator *li, listTypeEntry *entry); @@ -1305,7 +1305,7 @@ int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg); int checkType(client *c, robj *o, int type); int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg); int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg); -int getDoubleFromObject(robj *o, double *target); +int getDoubleFromObject(const robj *o, double *target); int getLongLongFromObject(robj *o, long long *target); int getLongDoubleFromObject(robj *o, long double *target); int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg); @@ -1406,7 +1406,7 @@ void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr); void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr); unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range); unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range); -unsigned int zsetLength(robj *zobj); +unsigned int zsetLength(const robj *zobj); void zsetConvert(robj *zobj, int encoding); void zsetConvertToZiplistIfNeeded(robj *zobj, size_t maxelelen); int zsetScore(robj *zobj, sds member, double *score); @@ -1479,7 +1479,7 @@ int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele); sds setTypeNextObject(setTypeIterator *si); int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele); unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set); -unsigned long setTypeSize(robj *subject); +unsigned long setTypeSize(const robj *subject); void setTypeConvert(robj *subject, int enc); /* Hash data type */ @@ -1492,7 +1492,7 @@ void hashTypeTryConversion(robj *subject, robj **argv, int start, int end); void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2); int hashTypeExists(robj *o, sds key); int hashTypeDelete(robj *o, sds key); -unsigned long hashTypeLength(robj *o); +unsigned long hashTypeLength(const robj *o); hashTypeIterator *hashTypeInitIterator(robj *subject); void hashTypeReleaseIterator(hashTypeIterator *hi); int hashTypeNext(hashTypeIterator *hi); @@ -1799,11 +1799,11 @@ void *realloc(void *ptr, size_t size) __attribute__ ((deprecated)); #endif /* Debugging stuff */ -void _serverAssertWithInfo(client *c, robj *o, char *estr, char *file, int line); -void _serverAssert(char *estr, char *file, int line); -void _serverPanic(char *msg, char *file, int line); +void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line); +void _serverAssert(const char *estr, const char *file, int line); +void _serverPanic(const char *msg, const char *file, int line); void bugReportStart(void); -void serverLogObjectDebugInfo(robj *o); +void serverLogObjectDebugInfo(const robj *o); void sigsegvHandler(int sig, siginfo_t *info, void *secret); sds genRedisInfoString(char *section); void enableWatchdog(int period); diff --git a/src/t_hash.c b/src/t_hash.c index c75b391d7..a49559336 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -308,13 +308,13 @@ int hashTypeDelete(robj *o, sds field) { } /* Return the number of elements in a hash. */ -unsigned long hashTypeLength(robj *o) { +unsigned long hashTypeLength(const robj *o) { unsigned long length = ULONG_MAX; if (o->encoding == OBJ_ENCODING_ZIPLIST) { length = ziplistLen(o->ptr) / 2; } else if (o->encoding == OBJ_ENCODING_HT) { - length = dictSize((dict*)o->ptr); + length = dictSize((const dict*)o->ptr); } else { serverPanic("Unknown hash encoding"); } diff --git a/src/t_list.c b/src/t_list.c index f9969fa2e..a0a30998d 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -71,7 +71,7 @@ robj *listTypePop(robj *subject, int where) { return value; } -unsigned long listTypeLength(robj *subject) { +unsigned long listTypeLength(const robj *subject) { if (subject->encoding == OBJ_ENCODING_QUICKLIST) { return quicklistCount(subject->ptr); } else { diff --git a/src/t_set.c b/src/t_set.c index db5c544b6..ddd82b8b0 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -219,11 +219,11 @@ int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele) { return setobj->encoding; } -unsigned long setTypeSize(robj *subject) { +unsigned long setTypeSize(const robj *subject) { if (subject->encoding == OBJ_ENCODING_HT) { - return dictSize((dict*)subject->ptr); + return dictSize((const dict*)subject->ptr); } else if (subject->encoding == OBJ_ENCODING_INTSET) { - return intsetLen((intset*)subject->ptr); + return intsetLen((const intset*)subject->ptr); } else { serverPanic("Unknown set encoding"); } diff --git a/src/t_zset.c b/src/t_zset.c index 7c96cf63a..c61ba8089 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -1100,12 +1100,12 @@ unsigned char *zzlDeleteRangeByRank(unsigned char *zl, unsigned int start, unsig * Common sorted set API *----------------------------------------------------------------------------*/ -unsigned int zsetLength(robj *zobj) { +unsigned int zsetLength(const robj *zobj) { int length = -1; if (zobj->encoding == OBJ_ENCODING_ZIPLIST) { length = zzlLength(zobj->ptr); } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) { - length = ((zset*)zobj->ptr)->zsl->length; + length = ((const zset*)zobj->ptr)->zsl->length; } else { serverPanic("Unknown sorted set encoding"); } From fa96f33bef787c5fe62f0353c387f39ef46eaf0e Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Tue, 21 Jun 2016 10:22:19 +0300 Subject: [PATCH 0009/1722] Fix occasional RM_OpenKey() crashes. --- src/module.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/module.c b/src/module.c index 54f279075..6a4b95705 100644 --- a/src/module.c +++ b/src/module.c @@ -1047,6 +1047,7 @@ void *RM_OpenKey(RedisModuleCtx *ctx, robj *keyname, int mode) { kp->value = value; kp->iter = NULL; kp->mode = mode; + kp->ztype = REDISMODULE_ZSET_RANGE_NONE; RM_ZsetRangeStop(kp); autoMemoryAdd(ctx,REDISMODULE_AM_KEY,kp); return (void*)kp; From 5b5b2f86c582cd270cd84641c658d5d47be3a39c Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Wed, 22 Jun 2016 07:30:06 +0300 Subject: [PATCH 0010/1722] Cleanup: remove zset reset function from RM_ZsetRangeStop(). --- src/module.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/module.c b/src/module.c index 6a4b95705..ef99fe4d0 100644 --- a/src/module.c +++ b/src/module.c @@ -163,7 +163,8 @@ void RM_CloseKey(RedisModuleKey *key); void autoMemoryCollect(RedisModuleCtx *ctx); robj **moduleCreateArgvFromUserFormat(const char *cmdname, const char *fmt, int *argcp, int *flags, va_list ap); void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx); -void RM_ZsetRangeStop(RedisModuleKey *key); +void RM_ZsetRangeStop(RedisModuleKey *kp); +static void zsetKeyReset(RedisModuleKey *key); /* -------------------------------------------------------------------------- * Heap allocation raw functions @@ -1047,8 +1048,7 @@ void *RM_OpenKey(RedisModuleCtx *ctx, robj *keyname, int mode) { kp->value = value; kp->iter = NULL; kp->mode = mode; - kp->ztype = REDISMODULE_ZSET_RANGE_NONE; - RM_ZsetRangeStop(kp); + zsetKeyReset(kp); autoMemoryAdd(ctx,REDISMODULE_AM_KEY,kp); return (void*)kp; } @@ -1434,19 +1434,25 @@ int RM_ZsetScore(RedisModuleKey *key, RedisModuleString *ele, double *score) { * Key API for Sorted Set iterator * -------------------------------------------------------------------------- */ -/* Stop a sorted set iteration. */ -void RM_ZsetRangeStop(RedisModuleKey *key) { - /* Free resources if needed. */ - if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) - zslFreeLexRange(&key->zlrs); - /* Setup sensible values so that misused iteration API calls when an - * iterator is not active will result into something more sensible - * than crashing. */ +static void zsetKeyReset(RedisModuleKey *key) +{ key->ztype = REDISMODULE_ZSET_RANGE_NONE; key->zcurrent = NULL; key->zer = 1; } +/* Stop a sorted set iteration. */ +void RM_ZsetRangeStop(RedisModuleKey *key) { + /* Free resources if needed. */ + if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) { + zslFreeLexRange(&key->zlrs); + } + /* Setup sensible values so that misused iteration API calls when an + * iterator is not active will result into something more sensible + * than crashing. */ + zsetKeyReset(key); +} + /* Return the "End of range" flag value to signal the end of the iteration. */ int RM_ZsetRangeEndReached(RedisModuleKey *key) { return key->zer; From aa345bf65944ffd690f5e0717ec1b6d60536c5da Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Jun 2016 15:24:51 +0200 Subject: [PATCH 0011/1722] Modules doc: hint about replacing libc malloc calls. --- src/modules/TYPES.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/modules/TYPES.md b/src/modules/TYPES.md index cd870c141..1c31950fa 100644 --- a/src/modules/TYPES.md +++ b/src/modules/TYPES.md @@ -354,4 +354,18 @@ allocation functions provided by the module API is exactly compatible with `malloc()`, `realloc()`, `free()` and `strdup()`, so converting the libraries in order to use these functions should be trivial. +In case you have an external library that uses libc `malloc()`, and you want +to avoid replacing manually all the calls with the Redis Modules API calls, +an approach could be to use simple macros in order to replace the libc calls +with the Redis API calls. Something like this could work: + #define malloc RedisModule_Alloc + #define realloc RedisModule_Realloc + #define free RedisModule_Free + #define strdup RedisModule_Strdup + +However take in mind that mixing libc calls with Redis API calls will result +into troubles and crashes, so if you replace calls using macros, you need to +make sure that all the calls are correctly replaced, and that the code with +the substituted calls will never, for example, attempt to call +`RedisModule_Free()` with a pointer allocated using libc `malloc()`. From bdc079dca40e2eb18949828f02cf34828381845e Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Wed, 22 Jun 2016 17:32:41 +0300 Subject: [PATCH 0012/1722] added RM_Calloc implementation --- src/module.c | 11 ++++++++++- src/redismodule.h | 3 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index 54f279075..56f225ab4 100644 --- a/src/module.c +++ b/src/module.c @@ -172,11 +172,19 @@ void RM_ZsetRangeStop(RedisModuleKey *key); /* Use like malloc(). Memory allocated with this function is reported in * Redis INFO memory, used for keys eviction according to maxmemory settings * and in general is taken into account as memory allocated by Redis. - * You should avoid to use malloc(). */ + * You should avoid using malloc(). */ void *RM_Alloc(size_t bytes) { return zmalloc(bytes); } +/* Use like calloc(). Memory allocated with this function is reported in + * Redis INFO memory, used for keys eviction according to maxmemory settings + * and in general is taken into account as memory allocated by Redis. + * You should avoid using calloc() directly. */ +void *RM_Calloc(size_t nmemb, size_t size) { + return zcalloc(nmemb*size); +} + /* Use like realloc() for memory obtained with RedisModule_Alloc(). */ void* RM_Realloc(void *ptr, size_t bytes) { return zrealloc(ptr,bytes); @@ -2791,6 +2799,7 @@ int moduleRegisterApi(const char *funcname, void *funcptr) { void moduleRegisterCoreAPI(void) { server.moduleapi = dictCreate(&moduleAPIDictType,NULL); REGISTER_API(Alloc); + REGISTER_API(Calloc); REGISTER_API(Realloc); REGISTER_API(Free); REGISTER_API(Strdup); diff --git a/src/redismodule.h b/src/redismodule.h index 618b39e49..080d3fd10 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -96,9 +96,11 @@ typedef void (*RedisModuleTypeFreeFunc)(void *value); #define REDISMODULE_API_FUNC(x) (*x) + void *REDISMODULE_API_FUNC(RedisModule_Alloc)(size_t bytes); void *REDISMODULE_API_FUNC(RedisModule_Realloc)(void *ptr, size_t bytes); void REDISMODULE_API_FUNC(RedisModule_Free)(void *ptr); +void REDISMODULE_API_FUNC(RedisModule_Calloc)(size_t nmemb, size_t size); char *REDISMODULE_API_FUNC(RedisModule_Strdup)(const char *str); int REDISMODULE_API_FUNC(RedisModule_GetApi)(const char *, void *); int REDISMODULE_API_FUNC(RedisModule_CreateCommand)(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep); @@ -187,6 +189,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int void *getapifuncptr = ((void**)ctx)[0]; RedisModule_GetApi = (int (*)(const char *, void *)) (unsigned long)getapifuncptr; REDISMODULE_GET_API(Alloc); + REDISMODULE_GET_API(Calloc); REDISMODULE_GET_API(Free); REDISMODULE_GET_API(Realloc); REDISMODULE_GET_API(Strdup); From 159c09f29e7a9372fdb1f025d491d919a63489fa Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Wed, 22 Jun 2016 20:57:24 +0300 Subject: [PATCH 0013/1722] Add RedisModule_CreateStringFromString(). --- src/module.c | 12 ++++++++++++ src/modules/API.md | 10 ++++++++++ src/object.c | 2 +- src/redismodule.h | 2 ++ src/server.h | 2 +- 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/module.c b/src/module.c index 54f279075..25cdda1cd 100644 --- a/src/module.c +++ b/src/module.c @@ -673,6 +673,17 @@ RedisModuleString *RM_CreateStringFromLongLong(RedisModuleCtx *ctx, long long ll return RM_CreateString(ctx,buf,len); } +/* Like RedisModule_CreatString(), but creates a string starting from another + * RedisModuleString. + * + * The returned string must be released with RedisModule_FreeString() or by + * enabling automatic memory management. */ +RedisModuleString *RM_CreateStringFromString(RedisModuleCtx *ctx, const RedisModuleString *str) { + RedisModuleString *o = dupStringObject(str); + autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o); + return o; +} + /* Free a module string object obtained with one of the Redis modules API calls * that return new string objects. * @@ -2828,6 +2839,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(CreateStringFromCallReply); REGISTER_API(CreateString); REGISTER_API(CreateStringFromLongLong); + REGISTER_API(CreateStringFromString); REGISTER_API(FreeString); REGISTER_API(StringPtrLen); REGISTER_API(AutoMemory); diff --git a/src/modules/API.md b/src/modules/API.md index e03edf6af..634f4b23f 100644 --- a/src/modules/API.md +++ b/src/modules/API.md @@ -179,6 +179,16 @@ integer instead of taking a buffer and its length. The returned string must be released with `RedisModule_FreeString()` or by enabling automatic memory management. +## `RM_CreateStringFromString` + + RedisModuleString *RM_CreateStringFromString(RedisModuleCtx *ctx, const RedisModuleString *str); + +Like `RedisModule_CreatString()`, but creates a string starting from an existing +RedisModuleString. + +The returned string must be released with `RedisModule_FreeString()` or by +enabling automatic memory management. + ## `RM_FreeString` void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str); diff --git a/src/object.c b/src/object.c index b9e5667ef..cf523d08d 100644 --- a/src/object.c +++ b/src/object.c @@ -147,7 +147,7 @@ robj *createStringObjectFromLongDouble(long double value, int humanfriendly) { * will always result in a fresh object that is unshared (refcount == 1). * * The resulting object always has refcount set to 1. */ -robj *dupStringObject(robj *o) { +robj *dupStringObject(const robj *o) { robj *d; serverAssert(o->type == OBJ_STRING); diff --git a/src/redismodule.h b/src/redismodule.h index 618b39e49..18a5c14f3 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -122,6 +122,7 @@ size_t REDISMODULE_API_FUNC(RedisModule_CallReplyLength)(RedisModuleCallReply *r RedisModuleCallReply *REDISMODULE_API_FUNC(RedisModule_CallReplyArrayElement)(RedisModuleCallReply *reply, size_t idx); RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateString)(RedisModuleCtx *ctx, const char *ptr, size_t len); RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromLongLong)(RedisModuleCtx *ctx, long long ll); +RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromString)(RedisModuleCtx *ctx, const RedisModuleString *str); void REDISMODULE_API_FUNC(RedisModule_FreeString)(RedisModuleCtx *ctx, RedisModuleString *str); const char *REDISMODULE_API_FUNC(RedisModule_StringPtrLen)(RedisModuleString *str, size_t *len); int REDISMODULE_API_FUNC(RedisModule_ReplyWithError)(RedisModuleCtx *ctx, const char *err); @@ -225,6 +226,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(CreateStringFromCallReply); REDISMODULE_GET_API(CreateString); REDISMODULE_GET_API(CreateStringFromLongLong); + REDISMODULE_GET_API(CreateStringFromString); REDISMODULE_GET_API(FreeString); REDISMODULE_GET_API(StringPtrLen); REDISMODULE_GET_API(AutoMemory); diff --git a/src/server.h b/src/server.h index 10fbf3237..a81b49983 100644 --- a/src/server.h +++ b/src/server.h @@ -1285,7 +1285,7 @@ robj *createObject(int type, void *ptr); robj *createStringObject(const char *ptr, size_t len); robj *createRawStringObject(const char *ptr, size_t len); robj *createEmbeddedStringObject(const char *ptr, size_t len); -robj *dupStringObject(robj *o); +robj *dupStringObject(const robj *o); int isSdsRepresentableAsLongLong(sds s, long long *llval); int isObjectRepresentableAsLongLong(robj *o, long long *llongval); robj *tryObjectEncoding(robj *o); From 7bc37d864f2a3e128f6a806d72d2dc6cefdc453e Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Jun 2016 09:09:51 +0200 Subject: [PATCH 0014/1722] Modules: implement zig-zag scanning in autoMemoryFreed(). Most of the time to check the last element is the way to go, however there are patterns where the contrary is the best choice. Zig-zag scanning implemented in this commmit always checks the obvious element first (the last added -- think at a loop where the last element allocated gets freed again and again), and continues checking one element in the head and one in the tail. Thanks to @dvisrky that fixed the original implementation of the function and proposed zig zag scanning. --- src/module.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/module.c b/src/module.c index ed178cecc..d77a8f956 100644 --- a/src/module.c +++ b/src/module.c @@ -610,23 +610,27 @@ void autoMemoryAdd(RedisModuleCtx *ctx, int type, void *ptr) { void autoMemoryFreed(RedisModuleCtx *ctx, int type, void *ptr) { if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return; - int j; - for (j = ctx->amqueue_used - 1; j >= 0; j--) { - if (ctx->amqueue[j].type == type && - ctx->amqueue[j].ptr == ptr) - { - ctx->amqueue[j].type = REDISMODULE_AM_FREED; - - /* Switch the freed element and the top element, to avoid growing - * the queue unnecessarily if we allocate/free in a loop */ - if (j != ctx->amqueue_used-1) { - ctx->amqueue[j] = ctx->amqueue[ctx->amqueue_used-1]; - } - /* Reduce the size of the queue because we either moved the top - * element elsewhere or freed it */ - ctx->amqueue_used--; + int count = (ctx->amqueue_used+1)/2; + for (int j = 0; j < count; j++) { + for (int side = 0; side < 2; side++) { + /* For side = 0 check right side of the array, for + * side = 1 check the left side instead (zig-zag scanning). */ + int i = (side == 0) ? (ctx->amqueue_used - 1 - j) : j; + if (ctx->amqueue[i].type == type && + ctx->amqueue[i].ptr == ptr) + { + ctx->amqueue[i].type = REDISMODULE_AM_FREED; - break; + /* Switch the freed element and the top element, to avoid growing + * the queue unnecessarily if we allocate/free in a loop */ + if (i != ctx->amqueue_used-1) { + ctx->amqueue[i] = ctx->amqueue[ctx->amqueue_used-1]; + } + /* Reduce the size of the queue because we either moved the top + * element elsewhere or freed it */ + ctx->amqueue_used--; + return; + } } } } From 4b8cd3ab71f4ba0fb9c9f6a37380f3c904803e5f Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Jun 2016 09:38:30 +0200 Subject: [PATCH 0015/1722] Commit change in autoMemoryFreed(): first -> last. It's more natural to call the last entry added as "last", the original commet got me confused until I actually read the code. --- src/module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index d77a8f956..3bff47793 100644 --- a/src/module.c +++ b/src/module.c @@ -621,11 +621,12 @@ void autoMemoryFreed(RedisModuleCtx *ctx, int type, void *ptr) { { ctx->amqueue[i].type = REDISMODULE_AM_FREED; - /* Switch the freed element and the top element, to avoid growing + /* Switch the freed element and the last element, to avoid growing * the queue unnecessarily if we allocate/free in a loop */ if (i != ctx->amqueue_used-1) { ctx->amqueue[i] = ctx->amqueue[ctx->amqueue_used-1]; } + /* Reduce the size of the queue because we either moved the top * element elsewhere or freed it */ ctx->amqueue_used--; From 84f1e03f9a382d60a948fef3e196f8fa63b81cec Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Wed, 15 Jun 2016 16:27:16 +0300 Subject: [PATCH 0016/1722] Add RedisModule_Log() logging API function. --- src/module.c | 25 +++++++++++++++++++++++++ src/modules/API.md | 8 ++++++++ src/redismodule.h | 9 +++++++++ 3 files changed, 42 insertions(+) diff --git a/src/module.c b/src/module.c index 3bff47793..a71d442ca 100644 --- a/src/module.c +++ b/src/module.c @@ -2768,6 +2768,30 @@ void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) { return; } +/* -------------------------------------------------------------------------- + * Logging + * -------------------------------------------------------------------------- */ + +/* Produces a log message to the standard Redis log. */ +void RM_Log(RedisModuleCtx *ctx, int level, const char *fmt, ...) +{ + va_list ap; + char msg[LOG_MAX_LEN]; + size_t name_len; + + if ((level&0xff) < server.verbosity) return; + if (!ctx->module) return; /* Can only log if module is initialized */ + + name_len = snprintf(msg, sizeof(msg),"%s: ", ctx->module->name); + + va_start(ap, fmt); + vsnprintf(msg + name_len, sizeof(msg) - name_len, fmt, ap); + va_end(ap); + + serverLogRaw(level,msg); +} + + /* -------------------------------------------------------------------------- * Modules API internals * -------------------------------------------------------------------------- */ @@ -2886,6 +2910,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(SaveDouble); REGISTER_API(LoadDouble); REGISTER_API(EmitAOF); + REGISTER_API(Log); } /* Global initialization at Redis startup. */ diff --git a/src/modules/API.md b/src/modules/API.md index e03edf6af..24768c5b8 100644 --- a/src/modules/API.md +++ b/src/modules/API.md @@ -1115,3 +1115,11 @@ by a module. The command works exactly like `RedisModule_Call()` in the way the parameters are passed, but it does not return anything as the error handling is performed by Redis itself. +## `RM_Log` + + void RM_Log(RedisModuleCtx *ctx, int level, const char *fmt, ...); + +Produce a log message into the standard Redis log. All standard Redis logging +configuration applies here. Messages can only be logged after a module has +initialized, and are prefixed by the name of the module. Log level is +specified using the REDISMODULE_LOG_* macros. diff --git a/src/redismodule.h b/src/redismodule.h index 618b39e49..aa43a7367 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -68,6 +68,13 @@ #define REDISMODULE_POSITIVE_INFINITE (1.0/0.0) #define REDISMODULE_NEGATIVE_INFINITE (-1.0/0.0) +/* Logging levels */ +#define REDISMODULE_LOG_DEBUG 0 +#define REDISMODULE_LOG_VERBOSE 1 +#define REDISMODULE_LOG_NOTICE 2 +#define REDISMODULE_LOG_WARNING 3 + + /* ------------------------- End of common defines ------------------------ */ #ifndef REDISMODULE_CORE @@ -180,6 +187,7 @@ RedisModuleString *REDISMODULE_API_FUNC(RedisModule_LoadString)(RedisModuleIO *i char *REDISMODULE_API_FUNC(RedisModule_LoadStringBuffer)(RedisModuleIO *io, size_t *lenptr); void REDISMODULE_API_FUNC(RedisModule_SaveDouble)(RedisModuleIO *io, double value); double REDISMODULE_API_FUNC(RedisModule_LoadDouble)(RedisModuleIO *io); +void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, int level, const char *fmt, ...); /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); @@ -270,6 +278,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(SaveDouble); REDISMODULE_GET_API(LoadDouble); REDISMODULE_GET_API(EmitAOF); + REDISMODULE_GET_API(Log); RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; From 4900e98d70c99861e7aefb49b759508ba5be96eb Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Jun 2016 12:11:30 +0200 Subject: [PATCH 0017/1722] Modules: changes to logging function. This commit changes what provided by PR #3315 (merged) in order to let the user specify the log level as a string. The define could be also used, but when this happens, they must be decoupled from the defines in the Redis core, like in the other part of the Redis modules implementations, so that a switch statement (or a function) remaps between the two, otherwise we are no longer free to change the internal Redis defines. --- src/module.c | 29 +++++++++++++++++++++++------ src/modules/API.md | 20 +++++++++++++++----- src/redismodule.h | 9 +-------- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/src/module.c b/src/module.c index a71d442ca..dff7feb37 100644 --- a/src/module.c +++ b/src/module.c @@ -2772,17 +2772,35 @@ void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) { * Logging * -------------------------------------------------------------------------- */ -/* Produces a log message to the standard Redis log. */ -void RM_Log(RedisModuleCtx *ctx, int level, const char *fmt, ...) -{ +/* Produces a log message to the standard Redis log, the format accepts + * printf-alike specifiers, while level is a string describing the log + * level to use when emitting the log, and must be one of the following: + * + * * "debug" + * * "verbose" + * * "notice" + * * "warning" + * + * If the specified log level is invalid, verbose is used by default. + * There is a fixed limit to the length of the log line this function is able + * to emit, this limti is not specified but is guaranteed to be more than + * a few lines of text. + */ +void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) { va_list ap; char msg[LOG_MAX_LEN]; size_t name_len; + int level; - if ((level&0xff) < server.verbosity) return; if (!ctx->module) return; /* Can only log if module is initialized */ - name_len = snprintf(msg, sizeof(msg),"%s: ", ctx->module->name); + if (!strcasecmp(levelstr,"debug")) level = LL_DEBUG; + else if (!strcasecmp(levelstr,"verbose")) level = LL_VERBOSE; + else if (!strcasecmp(levelstr,"notice")) level = LL_NOTICE; + else if (!strcasecmp(levelstr,"warning")) level = LL_WARNING; + else level = LL_VERBOSE; /* Default. */ + + name_len = snprintf(msg, sizeof(msg),"<%s> ", ctx->module->name); va_start(ap, fmt); vsnprintf(msg + name_len, sizeof(msg) - name_len, fmt, ap); @@ -2791,7 +2809,6 @@ void RM_Log(RedisModuleCtx *ctx, int level, const char *fmt, ...) serverLogRaw(level,msg); } - /* -------------------------------------------------------------------------- * Modules API internals * -------------------------------------------------------------------------- */ diff --git a/src/modules/API.md b/src/modules/API.md index 24768c5b8..021b2aa10 100644 --- a/src/modules/API.md +++ b/src/modules/API.md @@ -1117,9 +1117,19 @@ handling is performed by Redis itself. ## `RM_Log` - void RM_Log(RedisModuleCtx *ctx, int level, const char *fmt, ...); + void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...); + +Produces a log message to the standard Redis log, the format accepts +printf-alike specifiers, while level is a string describing the log +level to use when emitting the log, and must be one of the following: + +* "debug" +* "verbose" +* "notice" +* "warning" + +If the specified log level is invalid, verbose is used by default. +There is a fixed limit to the length of the log line this function is able +to emit, this limti is not specified but is guaranteed to be more than +a few lines of text. -Produce a log message into the standard Redis log. All standard Redis logging -configuration applies here. Messages can only be logged after a module has -initialized, and are prefixed by the name of the module. Log level is -specified using the REDISMODULE_LOG_* macros. diff --git a/src/redismodule.h b/src/redismodule.h index aa43a7367..f376c36c1 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -68,13 +68,6 @@ #define REDISMODULE_POSITIVE_INFINITE (1.0/0.0) #define REDISMODULE_NEGATIVE_INFINITE (-1.0/0.0) -/* Logging levels */ -#define REDISMODULE_LOG_DEBUG 0 -#define REDISMODULE_LOG_VERBOSE 1 -#define REDISMODULE_LOG_NOTICE 2 -#define REDISMODULE_LOG_WARNING 3 - - /* ------------------------- End of common defines ------------------------ */ #ifndef REDISMODULE_CORE @@ -187,7 +180,7 @@ RedisModuleString *REDISMODULE_API_FUNC(RedisModule_LoadString)(RedisModuleIO *i char *REDISMODULE_API_FUNC(RedisModule_LoadStringBuffer)(RedisModuleIO *io, size_t *lenptr); void REDISMODULE_API_FUNC(RedisModule_SaveDouble)(RedisModuleIO *io, double value); double REDISMODULE_API_FUNC(RedisModule_LoadDouble)(RedisModuleIO *io); -void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, int level, const char *fmt, ...); +void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...); /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); From 7b19bd3bb364d370f32f6af7b691321f6e5a3cdc Mon Sep 17 00:00:00 2001 From: tielei <43289893@qq.com> Date: Thu, 23 Jun 2016 19:53:56 +0800 Subject: [PATCH 0018/1722] A string with 21 chars is not representable as a 64-bit integer. --- src/object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index 9d1d4b7e9..ab927688b 100644 --- a/src/object.c +++ b/src/object.c @@ -385,10 +385,10 @@ robj *tryObjectEncoding(robj *o) { if (o->refcount > 1) return o; /* Check if we can represent this string as a long integer. - * Note that we are sure that a string larger than 21 chars is not + * Note that we are sure that a string larger than 20 chars is not * representable as a 32 nor 64 bit integer. */ len = sdslen(s); - if (len <= 21 && string2l(s,len,&value)) { + if (len <= 20 && string2l(s,len,&value)) { /* This object is encodable as a long. Try to use a shared object. * Note that we avoid using shared integers when maxmemory is used * because every object needs to have a private LRU field for the LRU From 3d42fa5b7523a9ee950c20cd480bc07e9d32e111 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Jun 2016 16:12:59 +0200 Subject: [PATCH 0019/1722] Minor change to conform PR #3331 to Redis code base style. Also avoid "static" in order to have symbols during crashes. --- src/module.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/module.c b/src/module.c index 62efa373a..f5921d8f2 100644 --- a/src/module.c +++ b/src/module.c @@ -1457,9 +1457,8 @@ static void zsetKeyReset(RedisModuleKey *key) /* Stop a sorted set iteration. */ void RM_ZsetRangeStop(RedisModuleKey *key) { /* Free resources if needed. */ - if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) { + if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) zslFreeLexRange(&key->zlrs); - } /* Setup sensible values so that misused iteration API calls when an * iterator is not active will result into something more sensible * than crashing. */ From b98fc4bfee2b035633e07c9f05e43189f539feee Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Jun 2016 16:18:14 +0200 Subject: [PATCH 0020/1722] Actually remove static from #3331. I forgot -a when amending in the previous commit. --- src/module.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/module.c b/src/module.c index f5921d8f2..fa866aa23 100644 --- a/src/module.c +++ b/src/module.c @@ -1447,8 +1447,7 @@ int RM_ZsetScore(RedisModuleKey *key, RedisModuleString *ele, double *score) { * Key API for Sorted Set iterator * -------------------------------------------------------------------------- */ -static void zsetKeyReset(RedisModuleKey *key) -{ +void zsetKeyReset(RedisModuleKey *key) { key->ztype = REDISMODULE_ZSET_RANGE_NONE; key->zcurrent = NULL; key->zer = 1; From 67950680b9ae92e0f1c675cd8c803143f9a99f13 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Jun 2016 16:20:48 +0200 Subject: [PATCH 0021/1722] Modules: mention RedisModule_Calloc() in the doc. --- src/modules/INTRO.md | 1 + 1 file changed, 1 insertion(+) diff --git a/src/modules/INTRO.md b/src/modules/INTRO.md index 1ba972f82..e5576b7fc 100644 --- a/src/modules/INTRO.md +++ b/src/modules/INTRO.md @@ -812,6 +812,7 @@ specific functions, that are exact replacements for `malloc`, `free`, void *RedisModule_Alloc(size_t bytes); void* RedisModule_Realloc(void *ptr, size_t bytes); void RedisModule_Free(void *ptr); + void RedisModule_Calloc(size_t nmemb, size_t size); char *RedisModule_Strdup(const char *str); They work exactly like their `libc` equivalent calls, however they use From feee7d547a6d190ff0ca9870700984ee77caa9cc Mon Sep 17 00:00:00 2001 From: rojingeorge Date: Thu, 23 Jun 2016 21:58:03 +0530 Subject: [PATCH 0022/1722] Display the nodes summary once the cluster is established using redis-trib.rb Display the nodes summary once the cluster is established using redis-trib.rb After the cluster meet and join was done, when the summary was shown, it was giving info regarding the nodes. This fix ensures that confusion where the slaves were shown as masters. Fix would be to reset the nodes and reload the cluster information before checking the cluster status after creating it. --- src/redis-trib.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/redis-trib.rb b/src/redis-trib.rb index 68d46bdf8..b40b5decb 100755 --- a/src/redis-trib.rb +++ b/src/redis-trib.rb @@ -1305,6 +1305,11 @@ class RedisTrib sleep 1 wait_cluster_join flush_nodes_config # Useful for the replicas + # Reset the node information, so that when the + # final summary is listed in check_cluster about the newly created cluster + # all the nodes would get properly listed as slaves or masters + reset_nodes + load_cluster_info_from_node(argv[0]) check_cluster end From 0c94a88075c84d14cc8598734aec9444a9f2da91 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Thu, 23 Jun 2016 22:30:32 +0300 Subject: [PATCH 0023/1722] Fix RedisModule_Calloc() definition typo. --- src/redismodule.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redismodule.h b/src/redismodule.h index f1aaea49b..fd9e46dc6 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -100,7 +100,7 @@ typedef void (*RedisModuleTypeFreeFunc)(void *value); void *REDISMODULE_API_FUNC(RedisModule_Alloc)(size_t bytes); void *REDISMODULE_API_FUNC(RedisModule_Realloc)(void *ptr, size_t bytes); void REDISMODULE_API_FUNC(RedisModule_Free)(void *ptr); -void REDISMODULE_API_FUNC(RedisModule_Calloc)(size_t nmemb, size_t size); +void *REDISMODULE_API_FUNC(RedisModule_Calloc)(size_t nmemb, size_t size); char *REDISMODULE_API_FUNC(RedisModule_Strdup)(const char *str); int REDISMODULE_API_FUNC(RedisModule_GetApi)(const char *, void *); int REDISMODULE_API_FUNC(RedisModule_CreateCommand)(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep); From c147eaf8f8ff4f204be957262338eb6217c031f0 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 27 Jun 2016 18:02:33 +0200 Subject: [PATCH 0024/1722] Fix quicklistReplaceAtIndex() by updating the quicklist ziplist size. The quicklist takes a cached version of the ziplist representation size in bytes. The implementation must update this length every time the underlying ziplist changes. However quicklistReplaceAtIndex() failed to fix the length. During LSET calls, the size of the ziplist blob and the cached size inside the quicklist diverged. Later, when this size is used in an authoritative way, for example during nodes splitting in order to copy the nodes, we end with a duplicated node that may contain random garbage. This commit should fix issue #3343, however several problems were found reviewing the quicklist.c code in search of this bug that should be addressed soon or later. For example: 1. To take a cached ziplist length is fragile since failing to update it leads to this kind of issues. 2. The node splitting code needs auditing. For example it works just for a side effect of ziplistDeleteRange() to be able to cope with a wrong count of elements to remove. The code inside quicklist.c assumes that -1 means "delete till the end" while actually it's just a count of how many elements to delete, and is an unsigned count. So -1 gets converted into the maximum integer, and just by chance the ziplist code stops deleting elements after there are no more to delete. 3. Node splitting is extremely inefficient, it copies the node and removes elements from both nodes even when actually there is to move a single entry from one node to the other, or when the new resulting node is empty at all so there is nothing to copy but just to create a new node. However at least for Redis 3.2 to introduce fresh code inside quicklist.c may be even more risky, so instead I'm writing a better fuzzy tester to stress the internals a bit more in order to anticipate other possible bugs. This bug was found using a fuzzy tester written after having some clue about where the bug could be. The tester eventually created a ~2000 commands sequence able to always crash Redis. I wrote a better version of the tester that searched for the smallest sequence that could crash Redis automatically. Later this smaller sequence was minimized by removing random commands till it still crashed the server. This resulted into a sequence of 7 commands. With this small sequence it was just a matter of filling the code with enough printf() to understand enough state to fix the bug. --- src/quicklist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/quicklist.c b/src/quicklist.c index adf9ba1de..9cb052525 100644 --- a/src/quicklist.c +++ b/src/quicklist.c @@ -671,6 +671,7 @@ int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data, /* quicklistIndex provides an uncompressed node */ entry.node->zl = ziplistDelete(entry.node->zl, &entry.zi); entry.node->zl = ziplistInsert(entry.node->zl, entry.zi, data, sz); + quicklistNodeUpdateSz(entry.node); quicklistCompress(quicklist, entry.node); return 1; } else { From 2f3b3ae8964d926c8b1e666eee21f2fe0de4ca86 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Jun 2016 09:26:28 +0200 Subject: [PATCH 0025/1722] Regression test for issue #3343 exact min crash sequence. Note: it was verified that it can crash the test suite without the patch applied. --- tests/unit/type/list-3.tcl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/unit/type/list-3.tcl b/tests/unit/type/list-3.tcl index ece6ea2d5..744f70371 100644 --- a/tests/unit/type/list-3.tcl +++ b/tests/unit/type/list-3.tcl @@ -13,6 +13,22 @@ start_server { assert_equal [r lindex l 1] [lindex $mylist 1] } + test {Regression for quicklist #3343 bug} { + r del mylist + r lpush mylist 401 + r lpush mylist 392 + r rpush mylist [string repeat x 5105]"799" + r lset mylist -1 [string repeat x 1014]"702" + r lpop mylist + r lset mylist -1 [string repeat x 4149]"852" + r linsert mylist before 401 [string repeat x 9927]"12" + r lrange mylist 0 -1 + r ping ; # It's enough if the server is still alive + } {PONG} + + test {Stress tester for #3343-alike bugs} { + } + tags {slow} { test {ziplist implementation: value encoding and backlink} { if {$::accurate} {set iterations 100} else {set iterations 10} From 30e2f37ab7baf1ea56999a1148ad34f508393c8b Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Jun 2016 09:33:36 +0200 Subject: [PATCH 0026/1722] Stress tester WIP. --- tests/unit/type/list-3.tcl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/type/list-3.tcl b/tests/unit/type/list-3.tcl index 744f70371..7888409ec 100644 --- a/tests/unit/type/list-3.tcl +++ b/tests/unit/type/list-3.tcl @@ -27,6 +27,9 @@ start_server { } {PONG} test {Stress tester for #3343-alike bugs} { + for {set j 0} {$j < 100} {incr j} { + puts [randomInt 10] + } } tags {slow} { From 163f95978b094875838356f660a1e432381bd638 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Jun 2016 09:42:20 +0200 Subject: [PATCH 0027/1722] Test: new randomized stress tester for #3343 alike bugs. --- tests/unit/type/list-3.tcl | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/unit/type/list-3.tcl b/tests/unit/type/list-3.tcl index 7888409ec..b5bd48cb0 100644 --- a/tests/unit/type/list-3.tcl +++ b/tests/unit/type/list-3.tcl @@ -27,8 +27,33 @@ start_server { } {PONG} test {Stress tester for #3343-alike bugs} { - for {set j 0} {$j < 100} {incr j} { - puts [randomInt 10] + r del key + for {set j 0} {$j < 10000} {incr j} { + set op [randomInt 6] + set small_signed_count [expr 5-[randomInt 10]] + if {[randomInt 2] == 0} { + set ele [randomInt 1000] + } else { + set ele [string repeat x [randomInt 10000]][randomInt 1000] + } + switch $op { + 0 {r lpush key $ele} + 1 {r rpush key $ele} + 2 {r lpop key} + 3 {r rpop key} + 4 { + catch {r lset key $small_signed_count $ele} + } + 5 { + set otherele [randomInt 1000] + if {[randomInt 2] == 0} { + set where before + } else { + set where after + } + r linsert key $where $otherele $ele + } + } } } From 679290d6321cf7d1b46360ba5276a4a61e5bedf7 Mon Sep 17 00:00:00 2001 From: Yusaku Kaneta Date: Wed, 29 Jun 2016 18:09:16 +0900 Subject: [PATCH 0028/1722] Fix the firstkey, lastkey, and keystep of moduleCommand --- src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index 06244081f..aad068423 100644 --- a/src/server.c +++ b/src/server.c @@ -123,7 +123,7 @@ struct redisServer server; /* server global state */ * are not fast commands. */ struct redisCommand redisCommandTable[] = { - {"module",moduleCommand,-2,"as",0,NULL,1,1,1,0,0}, + {"module",moduleCommand,-2,"as",0,NULL,0,0,0,0,0}, {"get",getCommand,2,"rF",0,NULL,1,1,1,0,0}, {"set",setCommand,-3,"wm",0,NULL,1,1,1,0,0}, {"setnx",setnxCommand,3,"wmF",0,NULL,1,1,1,0,0}, From 3d4399e1600018b1e84da21b1f4c0d0b7295147e Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 30 Jun 2016 23:44:44 +0200 Subject: [PATCH 0029/1722] In Redis RDB check: initial POC. So far we used an external program (later executed within Redis) and parser in order to check RDB files for correctness. This forces, at each RDB format update, to have two copies of the same format implementation that are hard to keep in sync. Morover the former RDB checker only checked the very high-level format of the file, without actually trying to load things in memory. Certain corruptions can only be handled by really loading key-value pairs. This first commit attempts to unify the Redis RDB loadig code with the task of checking the RDB file for correctness. More work is needed but it looks like a sounding direction so far. --- src/redis-check-rdb.c | 855 ++++++++++-------------------------------- src/server.c | 2 +- 2 files changed, 193 insertions(+), 664 deletions(-) diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 0723d2af4..c7886b0d4 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -1,6 +1,5 @@ /* - * Copyright (c) 2009-2012, Pieter Noordhuis - * Copyright (c) 2009-2012, Salvatore Sanfilippo + * Copyright (c) 2016, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,696 +27,226 @@ * POSSIBILITY OF SUCH DAMAGE. */ - #include "server.h" #include "rdb.h" -#include -#include -#include -#include -#include -#include -#include "lzf.h" -#include "crc64.h" -#define ERROR(...) { \ - serverLog(LL_WARNING, __VA_ARGS__); \ - exit(1); \ +#include + +void rdbLoadProgressCallback(rio *r, const void *buf, size_t len); +long long rdbLoadMillisecondTime(rio *rdb); + +struct { + rio *rio; + robj *key; /* Current key we are reading. */ + unsigned long keys; /* Number of keys processed. */ + unsigned long expires; /* Number of keys with an expire. */ + unsigned long already_expired; /* Number of keys already expired. */ + int doing; /* The state while reading the RDB. */ +} rdbstate; + +#define RDB_CHECK_DOING_START 0 +#define RDB_CHECK_DOING_READ_EXPIRE 1 +#define RDB_CHECK_DOING_READ_KEY 2 +#define RDB_CHECK_DOING_READ_VALUE 3 + +/* Called on RDB errors. Provides details about the RDB and the offset + * we were when the error was detected. */ +void rdbCheckError(const char *fmt, ...) { + char msg[1024]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + printf("*** RDB CHECK FAILED: %s ***\n", msg); + printf("AT RDB OFFSET: %llu\n", + (unsigned long long) (rdbstate.rio ? + rdbstate.rio->processed_bytes : 0)); + if (rdbstate.key) + printf("READING KEY: %s\n", (char*)rdbstate.key->ptr); } -/* data type to hold offset in file and size */ -typedef struct { - void *data; - size_t size; - size_t offset; -} pos; +/* Print informations during RDB checking. */ +void rdbCheckInfo(const char *fmt, ...) { + char msg[1024]; + va_list ap; -static unsigned char level = 0; -static pos positions[16]; + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); -#define CURR_OFFSET (positions[level].offset) - -/* Hold a stack of errors */ -typedef struct { - char error[16][1024]; - size_t offset[16]; - size_t level; -} errors_t; -static errors_t errors; - -#define SHIFT_ERROR(provided_offset, ...) { \ - sprintf(errors.error[errors.level], __VA_ARGS__); \ - errors.offset[errors.level] = provided_offset; \ - errors.level++; \ + printf("[offset %llu] %s\n", + (unsigned long long) (rdbstate.rio ? + rdbstate.rio->processed_bytes : 0), msg); } -/* Data type to hold opcode with optional key name an success status */ -typedef struct { - char* key; - int type; - char success; -} entry; +/* During RDB check we setup a special signal handler for memory violations + * and similar conditions, so that we can log the offending part of the RDB + * if the crash is due to broken content. */ +void rdbCheckHandleCrash(int sig, siginfo_t *info, void *secret) { + UNUSED(sig); + UNUSED(info); + UNUSED(secret); -#define MAX_TYPES_NUM 256 -#define MAX_TYPE_NAME_LEN 16 -/* store string types for output */ -static char types[MAX_TYPES_NUM][MAX_TYPE_NAME_LEN]; - -/* Return true if 't' is a valid object type. */ -static int rdbCheckType(unsigned char t) { - /* In case a new object type is added, update the following - * condition as necessary. */ - return - (t >= RDB_TYPE_HASH_ZIPMAP && t <= RDB_TYPE_HASH_ZIPLIST) || - t <= RDB_TYPE_HASH || - t >= RDB_OPCODE_EXPIRETIME_MS; + rdbCheckError("Server crash checking the specified RDB file!"); + exit(1); } -/* when number of bytes to read is negative, do a peek */ -static int readBytes(void *target, long num) { - char peek = (num < 0) ? 1 : 0; - num = (num < 0) ? -num : num; +void rdbCheckSetupSignals(void) { + struct sigaction act; - pos p = positions[level]; - if (p.offset + num > p.size) { - return 0; - } else { - memcpy(target, (void*)((size_t)p.data + p.offset), num); - if (!peek) positions[level].offset += num; - } - return 1; -} - -int processHeader(void) { - char buf[10] = "_________"; - int dump_version; - - if (!readBytes(buf, 9)) { - ERROR("Cannot read header"); - } - - /* expect the first 5 bytes to equal REDIS */ - if (memcmp(buf,"REDIS",5) != 0) { - ERROR("Wrong signature in header"); - } - - dump_version = (int)strtol(buf + 5, NULL, 10); - if (dump_version < 1 || dump_version > 6) { - ERROR("Unknown RDB format version: %d", dump_version); - } - return dump_version; -} - -static int loadType(entry *e) { - uint32_t offset = CURR_OFFSET; - - /* this byte needs to qualify as type */ - unsigned char t; - if (readBytes(&t, 1)) { - if (rdbCheckType(t)) { - e->type = t; - return 1; - } else { - SHIFT_ERROR(offset, "Unknown type (0x%02x)", t); - } - } else { - SHIFT_ERROR(offset, "Could not read type"); - } - - /* failure */ - return 0; -} - -static int peekType() { - unsigned char t; - if (readBytes(&t, -1) && (rdbCheckType(t))) - return t; - return -1; -} - -/* discard time, just consume the bytes */ -static int processTime(int type) { - uint32_t offset = CURR_OFFSET; - unsigned char t[8]; - int timelen = (type == RDB_OPCODE_EXPIRETIME_MS) ? 8 : 4; - - if (readBytes(t,timelen)) { - return 1; - } else { - SHIFT_ERROR(offset, "Could not read time"); - } - - /* failure */ - return 0; -} - -static uint64_t loadLength(int *isencoded) { - unsigned char buf[2]; - uint32_t len; - int type; - - if (isencoded) *isencoded = 0; - if (!readBytes(buf, 1)) return RDB_LENERR; - type = (buf[0] & 0xC0) >> 6; - if (type == RDB_6BITLEN) { - /* Read a 6 bit len */ - return buf[0] & 0x3F; - } else if (type == RDB_ENCVAL) { - /* Read a 6 bit len encoding type */ - if (isencoded) *isencoded = 1; - return buf[0] & 0x3F; - } else if (type == RDB_14BITLEN) { - /* Read a 14 bit len */ - if (!readBytes(buf+1,1)) return RDB_LENERR; - return ((buf[0] & 0x3F) << 8) | buf[1]; - } else if (buf[0] == RDB_32BITLEN) { - /* Read a 32 bit len */ - if (!readBytes(&len, 4)) return RDB_LENERR; - return ntohl(len); - } else if (buf[0] == RDB_64BITLEN) { - /* Read a 64 bit len */ - if (!readBytes(&len, 8)) return RDB_LENERR; - return ntohu64(len); - } else { - return RDB_LENERR; - } -} - -static char *loadIntegerObject(int enctype) { - uint32_t offset = CURR_OFFSET; - unsigned char enc[4]; - long long val; - - if (enctype == RDB_ENC_INT8) { - uint8_t v; - if (!readBytes(enc, 1)) return NULL; - v = enc[0]; - val = (int8_t)v; - } else if (enctype == RDB_ENC_INT16) { - uint16_t v; - if (!readBytes(enc, 2)) return NULL; - v = enc[0]|(enc[1]<<8); - val = (int16_t)v; - } else if (enctype == RDB_ENC_INT32) { - uint32_t v; - if (!readBytes(enc, 4)) return NULL; - v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24); - val = (int32_t)v; - } else { - SHIFT_ERROR(offset, "Unknown integer encoding (0x%02x)", enctype); - return NULL; - } - - /* convert val into string */ - char *buf; - buf = zmalloc(sizeof(char) * 128); - sprintf(buf, "%lld", val); - return buf; -} - -static char* loadLzfStringObject() { - uint64_t slen, clen; - char *c, *s; - - if ((clen = loadLength(NULL)) == RDB_LENERR) return NULL; - if ((slen = loadLength(NULL)) == RDB_LENERR) return NULL; - - c = zmalloc(clen); - if (!readBytes(c, clen)) { - zfree(c); - return NULL; - } - - s = zmalloc(slen+1); - if (lzf_decompress(c,clen,s,slen) == 0) { - zfree(c); zfree(s); - return NULL; - } - - zfree(c); - return s; -} - -/* returns NULL when not processable, char* when valid */ -static char* loadStringObject() { - uint64_t offset = CURR_OFFSET; - uint64_t len; - int isencoded; - - len = loadLength(&isencoded); - if (isencoded) { - switch(len) { - case RDB_ENC_INT8: - case RDB_ENC_INT16: - case RDB_ENC_INT32: - return loadIntegerObject(len); - case RDB_ENC_LZF: - return loadLzfStringObject(); - default: - /* unknown encoding */ - SHIFT_ERROR(offset, "Unknown string encoding (0x%02llx)", - (unsigned long long) len); - return NULL; - } - } - - if (len == RDB_LENERR) return NULL; - - char *buf = zmalloc(sizeof(char) * (len+1)); - if (buf == NULL) return NULL; - buf[len] = '\0'; - if (!readBytes(buf, len)) { - zfree(buf); - return NULL; - } - return buf; -} - -static int processStringObject(char** store) { - unsigned long offset = CURR_OFFSET; - char *key = loadStringObject(); - if (key == NULL) { - SHIFT_ERROR(offset, "Error reading string object"); - zfree(key); - return 0; - } - - if (store != NULL) { - *store = key; - } else { - zfree(key); - } - return 1; -} - -static double* loadDoubleValue() { - char buf[256]; - unsigned char len; - double* val; - - if (!readBytes(&len,1)) return NULL; - - val = zmalloc(sizeof(double)); - switch(len) { - case 255: *val = R_NegInf; return val; - case 254: *val = R_PosInf; return val; - case 253: *val = R_Nan; return val; - default: - if (!readBytes(buf, len)) { - zfree(val); - return NULL; - } - buf[len] = '\0'; - sscanf(buf, "%lg", val); - return val; - } -} - -static int processDoubleValue(double** store) { - unsigned long offset = CURR_OFFSET; - double *val = loadDoubleValue(); - if (val == NULL) { - SHIFT_ERROR(offset, "Error reading double value"); - zfree(val); - return 0; - } - - if (store != NULL) { - *store = val; - } else { - zfree(val); - } - return 1; -} - -static int loadPair(entry *e) { - uint64_t offset = CURR_OFFSET; - uint64_t i; - - /* read key first */ - char *key; - if (processStringObject(&key)) { - e->key = key; - } else { - SHIFT_ERROR(offset, "Error reading entry key"); - return 0; - } - - uint64_t length = 0; - if (e->type == RDB_TYPE_LIST || - e->type == RDB_TYPE_SET || - e->type == RDB_TYPE_ZSET || - e->type == RDB_TYPE_HASH) { - if ((length = loadLength(NULL)) == RDB_LENERR) { - SHIFT_ERROR(offset, "Error reading %s length", types[e->type]); - return 0; - } - } - - switch(e->type) { - case RDB_TYPE_STRING: - case RDB_TYPE_HASH_ZIPMAP: - case RDB_TYPE_LIST_ZIPLIST: - case RDB_TYPE_SET_INTSET: - case RDB_TYPE_ZSET_ZIPLIST: - case RDB_TYPE_HASH_ZIPLIST: - if (!processStringObject(NULL)) { - SHIFT_ERROR(offset, "Error reading entry value"); - return 0; - } - break; - case RDB_TYPE_LIST: - case RDB_TYPE_SET: - for (i = 0; i < length; i++) { - offset = CURR_OFFSET; - if (!processStringObject(NULL)) { - SHIFT_ERROR(offset, "Error reading element at index %llu (length: %llu)", - (unsigned long long) i, (unsigned long long) length); - return 0; - } - } - break; - case RDB_TYPE_ZSET: - for (i = 0; i < length; i++) { - offset = CURR_OFFSET; - if (!processStringObject(NULL)) { - SHIFT_ERROR(offset, "Error reading element key at index %llu (length: %llu)", - (unsigned long long) i, (unsigned long long) length); - return 0; - } - offset = CURR_OFFSET; - if (!processDoubleValue(NULL)) { - SHIFT_ERROR(offset, "Error reading element value at index %llu (length: %llu)", - (unsigned long long) i, (unsigned long long) length); - return 0; - } - } - break; - case RDB_TYPE_HASH: - for (i = 0; i < length; i++) { - offset = CURR_OFFSET; - if (!processStringObject(NULL)) { - SHIFT_ERROR(offset, "Error reading element key at index %llu (length: %llu)", - (unsigned long long) i, (unsigned long long) length); - return 0; - } - offset = CURR_OFFSET; - if (!processStringObject(NULL)) { - SHIFT_ERROR(offset, "Error reading element value at index %llu (length: %llu)", - (unsigned long long) i, (unsigned long long) length); - return 0; - } - } - break; - default: - SHIFT_ERROR(offset, "Type not implemented"); - return 0; - } - /* because we're done, we assume success */ - e->success = 1; - return 1; -} - -static entry loadEntry() { - entry e = { NULL, -1, 0 }; - uint64_t length, offset[4]; - - /* reset error container */ - errors.level = 0; - - offset[0] = CURR_OFFSET; - if (!loadType(&e)) { - return e; - } - - offset[1] = CURR_OFFSET; - if (e.type == RDB_OPCODE_SELECTDB) { - if ((length = loadLength(NULL)) == RDB_LENERR) { - SHIFT_ERROR(offset[1], "Error reading database number"); - return e; - } - if (length > 63) { - SHIFT_ERROR(offset[1], "Database number out of range (%llu)", - (unsigned long long) length); - return e; - } - } else if (e.type == RDB_OPCODE_EOF) { - if (positions[level].offset < positions[level].size) { - SHIFT_ERROR(offset[0], "Unexpected EOF"); - } else { - e.success = 1; - } - return e; - } else { - /* optionally consume expire */ - if (e.type == RDB_OPCODE_EXPIRETIME || - e.type == RDB_OPCODE_EXPIRETIME_MS) { - if (!processTime(e.type)) return e; - if (!loadType(&e)) return e; - } - - offset[1] = CURR_OFFSET; - if (!loadPair(&e)) { - SHIFT_ERROR(offset[1], "Error for type %s", types[e.type]); - return e; - } - } - - /* all entries are followed by a valid type: - * e.g. a new entry, SELECTDB, EXPIRE, EOF */ - offset[2] = CURR_OFFSET; - if (peekType() == -1) { - SHIFT_ERROR(offset[2], "Followed by invalid type"); - SHIFT_ERROR(offset[0], "Error for type %s", types[e.type]); - e.success = 0; - } else { - e.success = 1; - } - - return e; -} - -static void printCentered(int indent, int width, char* body) { - char head[256], tail[256]; - memset(head, '\0', 256); - memset(tail, '\0', 256); - - memset(head, '=', indent); - memset(tail, '=', width - 2 - indent - strlen(body)); - serverLog(LL_WARNING, "%s %s %s", head, body, tail); -} - -static void printValid(uint64_t ops, uint64_t bytes) { - char body[80]; - sprintf(body, "Processed %llu valid opcodes (in %llu bytes)", - (unsigned long long) ops, (unsigned long long) bytes); - printCentered(4, 80, body); -} - -static void printSkipped(uint64_t bytes, uint64_t offset) { - char body[80]; - sprintf(body, "Skipped %llu bytes (resuming at 0x%08llx)", - (unsigned long long) bytes, (unsigned long long) offset); - printCentered(4, 80, body); -} - -static void printErrorStack(entry *e) { - unsigned int i; - char body[64]; - - if (e->type == -1) { - sprintf(body, "Error trace"); - } else if (e->type >= 253) { - sprintf(body, "Error trace (%s)", types[e->type]); - } else if (!e->key) { - sprintf(body, "Error trace (%s: (unknown))", types[e->type]); - } else { - char tmp[41]; - strncpy(tmp, e->key, 40); - - /* display truncation at the last 3 chars */ - if (strlen(e->key) > 40) { - memset(&tmp[37], '.', 3); - } - - /* display unprintable characters as ? */ - for (i = 0; i < strlen(tmp); i++) { - if (tmp[i] <= 32) tmp[i] = '?'; - } - sprintf(body, "Error trace (%s: %s)", types[e->type], tmp); - } - - printCentered(4, 80, body); - - /* display error stack */ - for (i = 0; i < errors.level; i++) { - serverLog(LL_WARNING, "0x%08lx - %s", - (unsigned long) errors.offset[i], errors.error[i]); - } -} - -void process(void) { - uint64_t num_errors = 0, num_valid_ops = 0, num_valid_bytes = 0; - entry entry = { NULL, -1, 0 }; - int dump_version = processHeader(); - - /* Exclude the final checksum for RDB >= 5. Will be checked at the end. */ - if (dump_version >= 5) { - if (positions[0].size < 8) { - serverLog(LL_WARNING, "RDB version >= 5 but no room for checksum."); - exit(1); - } - positions[0].size -= 8; - } - - level = 1; - while(positions[0].offset < positions[0].size) { - positions[1] = positions[0]; - - entry = loadEntry(); - if (!entry.success) { - printValid(num_valid_ops, num_valid_bytes); - printErrorStack(&entry); - num_errors++; - num_valid_ops = 0; - num_valid_bytes = 0; - - /* search for next valid entry */ - uint64_t offset = positions[0].offset + 1; - int i = 0; - - while (!entry.success && offset < positions[0].size) { - positions[1].offset = offset; - - /* find 3 consecutive valid entries */ - for (i = 0; i < 3; i++) { - entry = loadEntry(); - if (!entry.success) break; - } - /* check if we found 3 consecutive valid entries */ - if (i < 3) { - offset++; - } - } - - /* print how many bytes we have skipped to find a new valid opcode */ - if (offset < positions[0].size) { - printSkipped(offset - positions[0].offset, offset); - } - - positions[0].offset = offset; - } else { - num_valid_ops++; - num_valid_bytes += positions[1].offset - positions[0].offset; - - /* advance position */ - positions[0] = positions[1]; - } - zfree(entry.key); - } - - /* because there is another potential error, - * print how many valid ops we have processed */ - printValid(num_valid_ops, num_valid_bytes); - - /* expect an eof */ - if (entry.type != RDB_OPCODE_EOF) { - /* last byte should be EOF, add error */ - errors.level = 0; - SHIFT_ERROR(positions[0].offset, "Expected EOF, got %s", types[entry.type]); - - /* this is an EOF error so reset type */ - entry.type = -1; - printErrorStack(&entry); - - num_errors++; - } - - /* Verify checksum */ - if (dump_version >= 5) { - uint64_t crc = crc64(0,positions[0].data,positions[0].size); - uint64_t crc2; - unsigned char *p = (unsigned char*)positions[0].data+positions[0].size; - crc2 = ((uint64_t)p[0] << 0) | - ((uint64_t)p[1] << 8) | - ((uint64_t)p[2] << 16) | - ((uint64_t)p[3] << 24) | - ((uint64_t)p[4] << 32) | - ((uint64_t)p[5] << 40) | - ((uint64_t)p[6] << 48) | - ((uint64_t)p[7] << 56); - if (crc != crc2) { - SHIFT_ERROR(positions[0].offset, "RDB CRC64 does not match."); - } else { - serverLog(LL_WARNING, "CRC64 checksum is OK"); - } - } - - /* print summary on errors */ - if (num_errors) { - serverLog(LL_WARNING, "Total unprocessable opcodes: %llu", - (unsigned long long) num_errors); - } + sigemptyset(&act.sa_mask); + act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO; +#warning "Uncomment here" +// act.sa_sigaction = rdbCheckHandleCrash; + sigaction(SIGSEGV, &act, NULL); + sigaction(SIGBUS, &act, NULL); + sigaction(SIGFPE, &act, NULL); + sigaction(SIGILL, &act, NULL); } +/* Check the specified RDB file. */ int redis_check_rdb(char *rdbfilename) { - int fd; - off_t size; - struct stat stat; - void *data; + uint64_t dbid; + int type, rdbver; + char buf[1024]; + long long expiretime, now = mstime(); + FILE *fp; + rio rdb; - fd = open(rdbfilename, O_RDONLY); - if (fd < 1) { - ERROR("Cannot open file: %s", rdbfilename); + if ((fp = fopen(rdbfilename,"r")) == NULL) return C_ERR; + + rioInitWithFile(&rdb,fp); + rdbstate.rio = &rdb; + rdb.update_cksum = rdbLoadProgressCallback; + if (rioRead(&rdb,buf,9) == 0) goto eoferr; + buf[9] = '\0'; + if (memcmp(buf,"REDIS",5) != 0) { + rdbCheckError("Wrong signature trying to load DB from file"); + return 1; } - if (fstat(fd, &stat) == -1) { - ERROR("Cannot stat: %s", rdbfilename); - } else { - size = stat.st_size; + rdbver = atoi(buf+5); + if (rdbver < 1 || rdbver > RDB_VERSION) { + rdbCheckError("Can't handle RDB format version %d",rdbver); + return 1; } - if (sizeof(size_t) == sizeof(int32_t) && size >= INT_MAX) { - ERROR("Cannot check dump files >2GB on a 32-bit platform"); + startLoading(fp); + while(1) { + robj *key, *val; + expiretime = -1; + + /* Read type. */ + if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; + + /* Handle special types. */ + if (type == RDB_OPCODE_EXPIRETIME) { + /* EXPIRETIME: load an expire associated with the next key + * to load. Note that after loading an expire we need to + * load the actual type, and continue. */ + if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr; + /* We read the time so we need to read the object type again. */ + if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; + /* the EXPIRETIME opcode specifies time in seconds, so convert + * into milliseconds. */ + expiretime *= 1000; + } else if (type == RDB_OPCODE_EXPIRETIME_MS) { + /* EXPIRETIME_MS: milliseconds precision expire times introduced + * with RDB v3. Like EXPIRETIME but no with more precision. */ + if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr; + /* We read the time so we need to read the object type again. */ + if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; + } else if (type == RDB_OPCODE_EOF) { + /* EOF: End of file, exit the main loop. */ + break; + } else if (type == RDB_OPCODE_SELECTDB) { + /* SELECTDB: Select the specified database. */ + if ((dbid = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) + goto eoferr; + rdbCheckInfo("Selecting DB ID %d", dbid); + continue; /* Read type again. */ + } else if (type == RDB_OPCODE_RESIZEDB) { + /* RESIZEDB: Hint about the size of the keys in the currently + * selected data base, in order to avoid useless rehashing. */ + uint64_t db_size, expires_size; + if ((db_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) + goto eoferr; + if ((expires_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) + goto eoferr; + continue; /* Read type again. */ + } else if (type == RDB_OPCODE_AUX) { + /* AUX: generic string-string fields. Use to add state to RDB + * which is backward compatible. Implementations of RDB loading + * are requierd to skip AUX fields they don't understand. + * + * An AUX field is composed of two strings: key and value. */ + robj *auxkey, *auxval; + if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; + if ((auxval = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; + + rdbCheckInfo("%s = '%s'", (char*)auxkey->ptr, (char*)auxval->ptr); + decrRefCount(auxkey); + decrRefCount(auxval); + continue; /* Read type again. */ + } + + /* Read key */ + if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; + rdbstate.key = key; + rdbstate.keys++; + /* Read value */ + if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr; + /* Check if the key already expired. This function is used when loading + * an RDB file from disk, either at startup, or when an RDB was + * received from the master. In the latter case, the master is + * responsible for key expiry. If we would expire keys here, the + * snapshot taken by the master may not be reflected on the slave. */ + if (server.masterhost == NULL && expiretime != -1 && expiretime < now) + rdbstate.already_expired++; + if (expiretime != -1) rdbstate.expires++; + rdbstate.key = NULL; + decrRefCount(key); + decrRefCount(val); + } + /* Verify the checksum if RDB version is >= 5 */ + if (rdbver >= 5 && server.rdb_checksum) { + uint64_t cksum, expected = rdb.cksum; + + if (rioRead(&rdb,&cksum,8) == 0) goto eoferr; + memrev64ifbe(&cksum); + if (cksum == 0) { + rdbCheckInfo("RDB file was saved with checksum disabled: no check performed."); + } else if (cksum != expected) { + rdbCheckError("RDB CRC error"); + } } - data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); - if (data == MAP_FAILED) { - ERROR("Cannot mmap: %s", rdbfilename); - } - - /* Initialize static vars */ - positions[0].data = data; - positions[0].size = size; - positions[0].offset = 0; - errors.level = 0; - - /* Object types */ - sprintf(types[RDB_TYPE_STRING], "STRING"); - sprintf(types[RDB_TYPE_LIST], "LIST"); - sprintf(types[RDB_TYPE_SET], "SET"); - sprintf(types[RDB_TYPE_ZSET], "ZSET"); - sprintf(types[RDB_TYPE_HASH], "HASH"); - - /* Object types only used for dumping to disk */ - sprintf(types[RDB_OPCODE_EXPIRETIME], "EXPIRETIME"); - sprintf(types[RDB_OPCODE_SELECTDB], "SELECTDB"); - sprintf(types[RDB_OPCODE_EOF], "EOF"); - - process(); - - munmap(data, size); - close(fd); + fclose(fp); return 0; + +eoferr: /* unexpected end of file is handled here with a fatal exit */ + rdbCheckError("Unexpected EOF reading RDB file"); + return 1; } /* RDB check main: called form redis.c when Redis is executed with the - * redis-check-rdb alias. */ + * redis-check-rdb alias. + * + * The function never returns, but exits with the status code according + * to success (RDB is sane) or error (RDB is corrupted). */ int redis_check_rdb_main(char **argv, int argc) { if (argc != 2) { fprintf(stderr, "Usage: %s \n", argv[0]); exit(1); } - serverLog(LL_WARNING, "Checking RDB file %s", argv[1]); - exit(redis_check_rdb(argv[1])); - return 0; + createSharedObjects(); /* Needed for loading. */ + server.loading_process_events_interval_bytes = 0; + rdbCheckInfo("Checking RDB file %s", argv[1]); + rdbCheckSetupSignals(); + int retval = redis_check_rdb(argv[1]); + if (retval == 0) { + rdbCheckInfo("\\o/ RDB looks OK! \\o/"); + } + exit(retval); } diff --git a/src/server.c b/src/server.c index 06244081f..69eb89390 100644 --- a/src/server.c +++ b/src/server.c @@ -4033,7 +4033,7 @@ int main(int argc, char **argv) { * the program main. However the program is part of the Redis executable * so that we can easily execute an RDB check on loading errors. */ if (strstr(argv[0],"redis-check-rdb") != NULL) - exit(redis_check_rdb_main(argv,argc)); + redis_check_rdb_main(argv,argc); if (argc >= 2) { j = 1; /* First option to parse in argv[] */ From 9aacdebf63026b29203c3d5630b862a549c6aaf5 Mon Sep 17 00:00:00 2001 From: sskorgal Date: Fri, 1 Jul 2016 10:42:22 +0530 Subject: [PATCH 0030/1722] Fix for redis_cli printing default DB when select command fails. --- src/redis-cli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 17fb53394..070388380 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -916,7 +916,7 @@ static int cliSendCommand(int argc, char **argv, int repeat) { return REDIS_ERR; } else { /* Store database number when SELECT was successfully executed. */ - if (!strcasecmp(command,"select") && argc == 2) { + if (!strcasecmp(command,"select") && argc == 2 && config.last_cmd_type != REDIS_REPLY_ERROR) { config.dbnum = atoi(argv[1]); cliRefreshPrompt(); } else if (!strcasecmp(command,"auth") && argc == 2) { From c67f1a46e3b1eafe5258c0542f2281cf9b05e062 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 1 Jul 2016 09:36:52 +0200 Subject: [PATCH 0031/1722] In Redis RDB check: better error reporting. --- src/rdb.c | 17 +++++++++--- src/redis-check-rdb.c | 64 +++++++++++++++++++++++++++++++++++++------ src/server.c | 2 +- src/server.h | 2 +- 4 files changed, 71 insertions(+), 14 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 6d29f80ce..300d15602 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -43,11 +43,20 @@ #define rdbExitReportCorruptRDB(reason) rdbCheckThenExit(reason, __LINE__); +extern int rdbCheckMode; +void rdbCheckError(const char *fmt, ...); + void rdbCheckThenExit(char *reason, int where) { - serverLog(LL_WARNING, "Corrupt RDB detected at rdb.c:%d (%s). " - "Running 'redis-check-rdb %s'", - where, reason, server.rdb_filename); - redis_check_rdb(server.rdb_filename); + if (!rdbCheckMode) { + serverLog(LL_WARNING, "Corrupt RDB detected at rdb.c:%d (%s). " + "Running 'redis-check-rdb %s'", + where, reason, server.rdb_filename); + char *argv[2] = {"",server.rdb_filename}; + redis_check_rdb_main(2,argv); + } else { + rdbCheckError("Internal error in RDB reading function at rdb.c:%d (%s)", + where, reason); + } exit(1); } diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index c7886b0d4..a9d21b093 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -32,8 +32,10 @@ #include +void createSharedObjects(void); void rdbLoadProgressCallback(rio *r, const void *buf, size_t len); long long rdbLoadMillisecondTime(rio *rdb); +int rdbCheckMode = 0; struct { rio *rio; @@ -44,10 +46,34 @@ struct { int doing; /* The state while reading the RDB. */ } rdbstate; +/* At every loading step try to remember what we were about to do, so that + * we can log this information when an error is encountered. */ #define RDB_CHECK_DOING_START 0 -#define RDB_CHECK_DOING_READ_EXPIRE 1 -#define RDB_CHECK_DOING_READ_KEY 2 -#define RDB_CHECK_DOING_READ_VALUE 3 +#define RDB_CHECK_DOING_READ_TYPE 1 +#define RDB_CHECK_DOING_READ_EXPIRE 2 +#define RDB_CHECK_DOING_READ_KEY 3 +#define RDB_CHECK_DOING_READ_OBJECT_VALUE 4 +#define RDB_CHECK_DOING_CHECK_SUM 5 +#define RDB_CHECK_DOING_READ_LEN 6 +#define RDB_CHECK_DOING_READ_AUX 7 + +char *rdb_check_doing_string[] = { + "start", + "read-type", + "read-expire", + "read-key", + "read-object-value", + "check-sum", + "read-len", + "read-aux" +}; + +/* Show a few stats collected into 'rdbstate' */ +void rdbShowGenericInfo(void) { + printf("[info] %lu keys read\n", rdbstate.keys); + printf("[info] %lu expires\n", rdbstate.expires); + printf("[info] %lu already expired\n", rdbstate.already_expired); +} /* Called on RDB errors. Provides details about the RDB and the offset * we were when the error was detected. */ @@ -59,12 +85,16 @@ void rdbCheckError(const char *fmt, ...) { vsnprintf(msg, sizeof(msg), fmt, ap); va_end(ap); - printf("*** RDB CHECK FAILED: %s ***\n", msg); - printf("AT RDB OFFSET: %llu\n", + printf("--- RDB ERROR DETECTED ---\n"); + printf("[offset %llu] %s\n", (unsigned long long) (rdbstate.rio ? - rdbstate.rio->processed_bytes : 0)); + rdbstate.rio->processed_bytes : 0), msg); + printf("[additional info] While doing: %s\n", + rdb_check_doing_string[rdbstate.doing]); if (rdbstate.key) - printf("READING KEY: %s\n", (char*)rdbstate.key->ptr); + printf("[additional info] Reading key '%s'\n", + (char*)rdbstate.key->ptr); + rdbShowGenericInfo(); } /* Print informations during RDB checking. */ @@ -138,15 +168,18 @@ int redis_check_rdb(char *rdbfilename) { expiretime = -1; /* Read type. */ + rdbstate.doing = RDB_CHECK_DOING_READ_TYPE; if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; /* Handle special types. */ if (type == RDB_OPCODE_EXPIRETIME) { + rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE; /* EXPIRETIME: load an expire associated with the next key * to load. Note that after loading an expire we need to * load the actual type, and continue. */ if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr; /* We read the time so we need to read the object type again. */ + rdbstate.doing = RDB_CHECK_DOING_READ_TYPE; if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; /* the EXPIRETIME opcode specifies time in seconds, so convert * into milliseconds. */ @@ -154,14 +187,17 @@ int redis_check_rdb(char *rdbfilename) { } else if (type == RDB_OPCODE_EXPIRETIME_MS) { /* EXPIRETIME_MS: milliseconds precision expire times introduced * with RDB v3. Like EXPIRETIME but no with more precision. */ + rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE; if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr; /* We read the time so we need to read the object type again. */ + rdbstate.doing = RDB_CHECK_DOING_READ_TYPE; if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; } else if (type == RDB_OPCODE_EOF) { /* EOF: End of file, exit the main loop. */ break; } else if (type == RDB_OPCODE_SELECTDB) { /* SELECTDB: Select the specified database. */ + rdbstate.doing = RDB_CHECK_DOING_READ_LEN; if ((dbid = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) goto eoferr; rdbCheckInfo("Selecting DB ID %d", dbid); @@ -170,6 +206,7 @@ int redis_check_rdb(char *rdbfilename) { /* RESIZEDB: Hint about the size of the keys in the currently * selected data base, in order to avoid useless rehashing. */ uint64_t db_size, expires_size; + rdbstate.doing = RDB_CHECK_DOING_READ_LEN; if ((db_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) goto eoferr; if ((expires_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) @@ -182,6 +219,7 @@ int redis_check_rdb(char *rdbfilename) { * * An AUX field is composed of two strings: key and value. */ robj *auxkey, *auxval; + rdbstate.doing = RDB_CHECK_DOING_READ_AUX; if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; if ((auxval = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; @@ -189,13 +227,20 @@ int redis_check_rdb(char *rdbfilename) { decrRefCount(auxkey); decrRefCount(auxval); continue; /* Read type again. */ + } else { + if (!rdbIsObjectType(type)) { + rdbCheckError("Invalid object type: %d", type); + return 1; + } } /* Read key */ + rdbstate.doing = RDB_CHECK_DOING_READ_KEY; if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; rdbstate.key = key; rdbstate.keys++; /* Read value */ + rdbstate.doing = RDB_CHECK_DOING_READ_OBJECT_VALUE; if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr; /* Check if the key already expired. This function is used when loading * an RDB file from disk, either at startup, or when an RDB was @@ -213,6 +258,7 @@ int redis_check_rdb(char *rdbfilename) { if (rdbver >= 5 && server.rdb_checksum) { uint64_t cksum, expected = rdb.cksum; + rdbstate.doing = RDB_CHECK_DOING_CHECK_SUM; if (rioRead(&rdb,&cksum,8) == 0) goto eoferr; memrev64ifbe(&cksum); if (cksum == 0) { @@ -235,18 +281,20 @@ eoferr: /* unexpected end of file is handled here with a fatal exit */ * * The function never returns, but exits with the status code according * to success (RDB is sane) or error (RDB is corrupted). */ -int redis_check_rdb_main(char **argv, int argc) { +int redis_check_rdb_main(int argc, char **argv) { if (argc != 2) { fprintf(stderr, "Usage: %s \n", argv[0]); exit(1); } createSharedObjects(); /* Needed for loading. */ server.loading_process_events_interval_bytes = 0; + rdbCheckMode = 1; rdbCheckInfo("Checking RDB file %s", argv[1]); rdbCheckSetupSignals(); int retval = redis_check_rdb(argv[1]); if (retval == 0) { rdbCheckInfo("\\o/ RDB looks OK! \\o/"); + rdbShowGenericInfo(); } exit(retval); } diff --git a/src/server.c b/src/server.c index 69eb89390..49150045a 100644 --- a/src/server.c +++ b/src/server.c @@ -4033,7 +4033,7 @@ int main(int argc, char **argv) { * the program main. However the program is part of the Redis executable * so that we can easily execute an RDB check on loading errors. */ if (strstr(argv[0],"redis-check-rdb") != NULL) - redis_check_rdb_main(argv,argc); + redis_check_rdb_main(argc,argv); if (argc >= 2) { j = 1; /* First option to parse in argv[] */ diff --git a/src/server.h b/src/server.h index c1963bf21..9d3fc2d33 100644 --- a/src/server.h +++ b/src/server.h @@ -1597,7 +1597,7 @@ void sentinelIsRunning(void); /* redis-check-rdb */ int redis_check_rdb(char *rdbfilename); -int redis_check_rdb_main(char **argv, int argc); +int redis_check_rdb_main(int argc, char **argv); /* Scripting */ void scriptingInit(int setup); From 80920cba7351e78c6d6b6406e0b1844fa15e36df Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 1 Jul 2016 09:52:35 +0200 Subject: [PATCH 0032/1722] In Redis RDB check: minor output message changes. --- src/redis-check-rdb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index a9d21b093..9c1715232 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -223,7 +223,8 @@ int redis_check_rdb(char *rdbfilename) { if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; if ((auxval = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; - rdbCheckInfo("%s = '%s'", (char*)auxkey->ptr, (char*)auxval->ptr); + rdbCheckInfo("AUX FIELD %s = '%s'", + (char*)auxkey->ptr, (char*)auxval->ptr); decrRefCount(auxkey); decrRefCount(auxval); continue; /* Read type again. */ @@ -265,6 +266,8 @@ int redis_check_rdb(char *rdbfilename) { rdbCheckInfo("RDB file was saved with checksum disabled: no check performed."); } else if (cksum != expected) { rdbCheckError("RDB CRC error"); + } else { + rdbCheckInfo("Checksum OK"); } } From d882ccb6a277113c912daeacbfa51f9917319bf5 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 1 Jul 2016 09:55:18 +0200 Subject: [PATCH 0033/1722] Added a trivial program to randomly corrupt RDB files in /utils. --- utils/corrupt_rdb.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 utils/corrupt_rdb.c diff --git a/utils/corrupt_rdb.c b/utils/corrupt_rdb.c new file mode 100644 index 000000000..7ba9caeee --- /dev/null +++ b/utils/corrupt_rdb.c @@ -0,0 +1,44 @@ +/* Trivia program to corrupt an RDB file in order to check the RDB check + * program behavior and effectiveness. + * + * Copyright (C) 2016 Salvatore Sanfilippo. + * This software is released in the 3-clause BSD license. */ + +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + struct stat stat; + int fd, cycles; + + if (argc != 3) { + fprintf(stderr,"Usage: \n"); + exit(1); + } + + srand(time(NULL)); + cycles = atoi(argv[2]); + fd = open("dump.rdb",O_RDWR); + if (fd == -1) { + perror("open"); + exit(1); + } + fstat(fd,&stat); + + while(cycles--) { + unsigned char buf[32]; + unsigned long offset = rand()%stat.st_size; + int writelen = 1+rand()%31; + int j; + + for (j = 0; j < writelen; j++) buf[j] = (char)rand(); + lseek(fd,offset,SEEK_SET); + printf("Writing %d bytes at offset %lu\n", writelen, offset); + write(fd,buf,writelen); + } + return 0; +} From 247cf937bb0c6177d41e056fd390e3bebc3ee1b3 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 1 Jul 2016 11:40:40 +0200 Subject: [PATCH 0034/1722] In Redis RDB check: log object type on error. --- src/redis-check-rdb.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 9c1715232..47897d788 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -40,6 +40,7 @@ int rdbCheckMode = 0; struct { rio *rio; robj *key; /* Current key we are reading. */ + int key_type; /* Current key type if != -1. */ unsigned long keys; /* Number of keys processed. */ unsigned long expires; /* Number of keys with an expire. */ unsigned long already_expired; /* Number of keys already expired. */ @@ -68,6 +69,23 @@ char *rdb_check_doing_string[] = { "read-aux" }; +char *rdb_type_string[] = { + "string", + "list-linked", + "set-hashtable", + "zset-v1", + "hash-hashtable", + "zset-v2", + "module-value", + "","", + "hash-zipmap", + "list-ziplist", + "set-intset", + "zset-ziplist", + "hash-ziplist", + "quicklist" +}; + /* Show a few stats collected into 'rdbstate' */ void rdbShowGenericInfo(void) { printf("[info] %lu keys read\n", rdbstate.keys); @@ -94,6 +112,12 @@ void rdbCheckError(const char *fmt, ...) { if (rdbstate.key) printf("[additional info] Reading key '%s'\n", (char*)rdbstate.key->ptr); + if (rdbstate.key_type != -1) + printf("[additional info] Reading type %d (%s)\n", + rdbstate.key_type, + ((unsigned)rdbstate.key_type < + sizeof(rdb_type_string)/sizeof(char*)) ? + rdb_type_string[rdbstate.key_type] : "unknown"); rdbShowGenericInfo(); } @@ -128,8 +152,7 @@ void rdbCheckSetupSignals(void) { sigemptyset(&act.sa_mask); act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO; -#warning "Uncomment here" -// act.sa_sigaction = rdbCheckHandleCrash; + act.sa_sigaction = rdbCheckHandleCrash; sigaction(SIGSEGV, &act, NULL); sigaction(SIGBUS, &act, NULL); sigaction(SIGFPE, &act, NULL); @@ -233,6 +256,7 @@ int redis_check_rdb(char *rdbfilename) { rdbCheckError("Invalid object type: %d", type); return 1; } + rdbstate.key_type = type; } /* Read key */ @@ -254,6 +278,7 @@ int redis_check_rdb(char *rdbfilename) { rdbstate.key = NULL; decrRefCount(key); decrRefCount(val); + rdbstate.key_type = -1; } /* Verify the checksum if RDB version is >= 5 */ if (rdbver >= 5 && server.rdb_checksum) { From ae3ce0eba30d2ff1045dac3814536fc43db3039a Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 1 Jul 2016 11:59:25 +0200 Subject: [PATCH 0035/1722] In Redis RDB check: log decompression errors. --- src/rdb.c | 6 +++++- src/redis-check-rdb.c | 19 ++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 300d15602..9b4dcd027 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -45,6 +45,7 @@ extern int rdbCheckMode; void rdbCheckError(const char *fmt, ...); +void rdbCheckSetError(const char *fmt, ...); void rdbCheckThenExit(char *reason, int where) { if (!rdbCheckMode) { @@ -341,7 +342,10 @@ void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) { /* Load the compressed representation and uncompress it to target. */ if (rioRead(rdb,c,clen) == 0) goto err; - if (lzf_decompress(c,clen,val,len) == 0) goto err; + if (lzf_decompress(c,clen,val,len) == 0) { + if (rdbCheckMode) rdbCheckSetError("Invalid LZF compressed string"); + goto err; + } zfree(c); if (plain || sds) { diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 47897d788..e123b7f34 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -45,6 +45,8 @@ struct { unsigned long expires; /* Number of keys with an expire. */ unsigned long already_expired; /* Number of keys already expired. */ int doing; /* The state while reading the RDB. */ + int error_set; /* True if error is populated. */ + char error[1024]; } rdbstate; /* At every loading step try to remember what we were about to do, so that @@ -135,6 +137,17 @@ void rdbCheckInfo(const char *fmt, ...) { rdbstate.rio->processed_bytes : 0), msg); } +/* Used inside rdb.c in order to log specific errors happening inside + * the RDB loading internals. */ +void rdbCheckSetError(const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + vsnprintf(rdbstate.error, sizeof(rdbstate.error), fmt, ap); + va_end(ap); + rdbstate.error_set = 1; +} + /* During RDB check we setup a special signal handler for memory violations * and similar conditions, so that we can log the offending part of the RDB * if the crash is due to broken content. */ @@ -300,7 +313,11 @@ int redis_check_rdb(char *rdbfilename) { return 0; eoferr: /* unexpected end of file is handled here with a fatal exit */ - rdbCheckError("Unexpected EOF reading RDB file"); + if (rdbstate.error_set) { + rdbCheckError(rdbstate.error); + } else { + rdbCheckError("Unexpected EOF reading RDB file"); + } return 1; } From 3ede1e6d95a3590ac0ddc9646bf64b6660357a75 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 1 Jul 2016 15:26:55 +0200 Subject: [PATCH 0036/1722] In Redis RDB check: more details in error reportings. --- src/rdb.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 9b4dcd027..3b7cec6d7 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -41,22 +41,29 @@ #include #include -#define rdbExitReportCorruptRDB(reason) rdbCheckThenExit(reason, __LINE__); +#define rdbExitReportCorruptRDB(...) rdbCheckThenExit(__LINE__,__VA_ARGS__) extern int rdbCheckMode; void rdbCheckError(const char *fmt, ...); void rdbCheckSetError(const char *fmt, ...); -void rdbCheckThenExit(char *reason, int where) { - if (!rdbCheckMode) { - serverLog(LL_WARNING, "Corrupt RDB detected at rdb.c:%d (%s). " - "Running 'redis-check-rdb %s'", - where, reason, server.rdb_filename); +void rdbCheckThenExit(int linenum, char *reason, ...) { + va_list ap; + char msg[1024]; + int len; + + len = snprintf(msg,sizeof(msg), + "Internal error in RDB reading function at rdb.c:%d -> ", linenum); + va_start(ap,reason); + vsnprintf(msg+len,sizeof(msg)-len,reason,ap); + va_end(ap); + + if (!rdbCheckMode) { + serverLog(LL_WARNING, "%s", msg); char *argv[2] = {"",server.rdb_filename}; redis_check_rdb_main(2,argv); } else { - rdbCheckError("Internal error in RDB reading function at rdb.c:%d (%s)", - where, reason); + rdbCheckError("%s",msg); } exit(1); } @@ -172,7 +179,8 @@ int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr) { if (rioRead(rdb,&len,8) == 0) return -1; *lenptr = ntohu64(len); } else { - rdbExitReportCorruptRDB("Unknown length encoding in rdbLoadLen()"); + rdbExitReportCorruptRDB( + "Unknown length encoding %d in rdbLoadLen()",type); return -1; /* Never reached. */ } return 0; @@ -240,7 +248,7 @@ void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) { val = (int32_t)v; } else { val = 0; /* anti-warning */ - rdbExitReportCorruptRDB("Unknown RDB integer encoding type"); + rdbExitReportCorruptRDB("Unknown RDB integer encoding type %d",enctype); } if (plain || sds) { char buf[LONG_STR_SIZE], *p; @@ -457,7 +465,7 @@ void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) { case RDB_ENC_LZF: return rdbLoadLzfStringObject(rdb,flags,lenptr); default: - rdbExitReportCorruptRDB("Unknown RDB encoding type"); + rdbExitReportCorruptRDB("Unknown RDB string encoding type %d",len); } } @@ -1288,7 +1296,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { hashTypeConvert(o, OBJ_ENCODING_HT); break; default: - rdbExitReportCorruptRDB("Unknown encoding"); + rdbExitReportCorruptRDB("Unknown RDB encoding type %d",rdbtype); break; } } else if (rdbtype == RDB_TYPE_MODULE) { @@ -1313,7 +1321,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { } o = createModuleObject(mt,ptr); } else { - rdbExitReportCorruptRDB("Unknown object type"); + rdbExitReportCorruptRDB("Unknown RDB encoding type %d",rdbtype); } return o; } From f503ca0c695802cdd33b2881e85de49f60606d1a Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 4 Jul 2016 12:08:37 +0200 Subject: [PATCH 0037/1722] Make tcp-keepalive default to 300 in internal conf. We already changed the default in the redis.conf template, but I forgot to change the internal config as well. --- src/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.h b/src/server.h index 9d3fc2d33..3ad196462 100644 --- a/src/server.h +++ b/src/server.h @@ -114,7 +114,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT 0 /* Use +10000 offset. */ #define CONFIG_DEFAULT_DAEMONIZE 0 #define CONFIG_DEFAULT_UNIX_SOCKET_PERM 0 -#define CONFIG_DEFAULT_TCP_KEEPALIVE 0 +#define CONFIG_DEFAULT_TCP_KEEPALIVE 300 #define CONFIG_DEFAULT_PROTECTED_MODE 1 #define CONFIG_DEFAULT_LOGFILE "" #define CONFIG_DEFAULT_SYSLOG_ENABLED 0 From 1ce90bd39f065004d9e5be6cbff2f835b52719a5 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 4 Jul 2016 12:41:25 +0200 Subject: [PATCH 0038/1722] Fix test for new RDB checksum failure message. --- tests/integration/rdb.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl index f2de3504d..2ed47cc58 100644 --- a/tests/integration/rdb.tcl +++ b/tests/integration/rdb.tcl @@ -89,7 +89,7 @@ close $fd start_server_and_kill_it [list "dir" $server_path] { test {Server should not start if RDB is corrupted} { wait_for_condition 50 100 { - [string match {*RDB checksum*} \ + [string match {*CRC error*} \ [exec tail -n10 < [dict get $srv stdout]]] } else { fail "Server started even if RDB was corrupted!" From e1ecc3ab8e5470c256fed30e30b573334a4142d2 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 4 Jul 2016 16:09:07 +0200 Subject: [PATCH 0039/1722] CONFIG GET is now no longer case sensitive. Like CONFIG SET always was. Close #3369. --- src/config.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/config.c b/src/config.c index 77029b934..683ec8719 100644 --- a/src/config.c +++ b/src/config.c @@ -1067,7 +1067,7 @@ badfmt: /* Bad format errors */ *----------------------------------------------------------------------------*/ #define config_get_string_field(_name,_var) do { \ - if (stringmatch(pattern,_name,0)) { \ + if (stringmatch(pattern,_name,1)) { \ addReplyBulkCString(c,_name); \ addReplyBulkCString(c,_var ? _var : ""); \ matches++; \ @@ -1075,7 +1075,7 @@ badfmt: /* Bad format errors */ } while(0); #define config_get_bool_field(_name,_var) do { \ - if (stringmatch(pattern,_name,0)) { \ + if (stringmatch(pattern,_name,1)) { \ addReplyBulkCString(c,_name); \ addReplyBulkCString(c,_var ? "yes" : "no"); \ matches++; \ @@ -1083,7 +1083,7 @@ badfmt: /* Bad format errors */ } while(0); #define config_get_numerical_field(_name,_var) do { \ - if (stringmatch(pattern,_name,0)) { \ + if (stringmatch(pattern,_name,1)) { \ ll2string(buf,sizeof(buf),_var); \ addReplyBulkCString(c,_name); \ addReplyBulkCString(c,buf); \ @@ -1092,7 +1092,7 @@ badfmt: /* Bad format errors */ } while(0); #define config_get_enum_field(_name,_var,_enumvar) do { \ - if (stringmatch(pattern,_name,0)) { \ + if (stringmatch(pattern,_name,1)) { \ addReplyBulkCString(c,_name); \ addReplyBulkCString(c,configEnumGetNameOrUnknown(_enumvar,_var)); \ matches++; \ @@ -1215,12 +1215,12 @@ void configGetCommand(client *c) { /* Everything we can't handle with macros follows. */ - if (stringmatch(pattern,"appendonly",0)) { + if (stringmatch(pattern,"appendonly",1)) { addReplyBulkCString(c,"appendonly"); addReplyBulkCString(c,server.aof_state == AOF_OFF ? "no" : "yes"); matches++; } - if (stringmatch(pattern,"dir",0)) { + if (stringmatch(pattern,"dir",1)) { char buf[1024]; if (getcwd(buf,sizeof(buf)) == NULL) @@ -1230,7 +1230,7 @@ void configGetCommand(client *c) { addReplyBulkCString(c,buf); matches++; } - if (stringmatch(pattern,"save",0)) { + if (stringmatch(pattern,"save",1)) { sds buf = sdsempty(); int j; @@ -1246,7 +1246,7 @@ void configGetCommand(client *c) { sdsfree(buf); matches++; } - if (stringmatch(pattern,"client-output-buffer-limit",0)) { + if (stringmatch(pattern,"client-output-buffer-limit",1)) { sds buf = sdsempty(); int j; @@ -1264,14 +1264,14 @@ void configGetCommand(client *c) { sdsfree(buf); matches++; } - if (stringmatch(pattern,"unixsocketperm",0)) { + if (stringmatch(pattern,"unixsocketperm",1)) { char buf[32]; snprintf(buf,sizeof(buf),"%o",server.unixsocketperm); addReplyBulkCString(c,"unixsocketperm"); addReplyBulkCString(c,buf); matches++; } - if (stringmatch(pattern,"slaveof",0)) { + if (stringmatch(pattern,"slaveof",1)) { char buf[256]; addReplyBulkCString(c,"slaveof"); @@ -1283,7 +1283,7 @@ void configGetCommand(client *c) { addReplyBulkCString(c,buf); matches++; } - if (stringmatch(pattern,"notify-keyspace-events",0)) { + if (stringmatch(pattern,"notify-keyspace-events",1)) { robj *flagsobj = createObject(OBJ_STRING, keyspaceEventsFlagsToString(server.notify_keyspace_events)); @@ -1292,7 +1292,7 @@ void configGetCommand(client *c) { decrRefCount(flagsobj); matches++; } - if (stringmatch(pattern,"bind",0)) { + if (stringmatch(pattern,"bind",1)) { sds aux = sdsjoin(server.bindaddr,server.bindaddr_count," "); addReplyBulkCString(c,"bind"); From b5cef76b741aae2cdcba22e92750fe5a4a458dfb Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 4 Jul 2016 18:45:24 +0200 Subject: [PATCH 0040/1722] Sentinel: fix cross-master Sentinel address update. This commit both fixes the crash reported with issue #3364 and also properly closes the old links after the Sentinel address for the other masters gets updated. The two problems where: 1. The Sentinel that switched address may not monitor all the masters, it is possible that there is no match, and the 'match' variable is NULL. Now we check for no match and 'continue' to the next master. 2. By ispecting the code because of issue "1" I noticed that there was a problem in the code that disconnects the link of the Sentinel that needs the address update. Basically link->disconnected is non-zero even if just *a single link* (cc -- command link or pc -- pubsub link) are disconnected, so to check with if (link->disconnected) in order to close the links risks to leave one link connected. I was able to manually reproduce the crash at "1" and verify that the commit resolves the issue. Close #3364. --- src/sentinel.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/sentinel.c b/src/sentinel.c index f8ebd0c6f..baf6f9cbd 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -1062,11 +1062,18 @@ int sentinelUpdateSentinelAddressInAllMasters(sentinelRedisInstance *ri) { sentinelRedisInstance *master = dictGetVal(de), *match; match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels, NULL,0,ri->runid); - if (match->link->disconnected == 0) { + /* If there is no match, this master does not know about this + * Sentinel, try with the next one. */ + if (match == NULL) continue; + + /* Disconnect the old links if connected. */ + if (match->link->cc != NULL) instanceLinkCloseConnection(match->link,match->link->cc); + if (match->link->pc != NULL) instanceLinkCloseConnection(match->link,match->link->pc); - } + if (match == ri) continue; /* Address already updated for it. */ + /* Update the address of the matching Sentinel by copying the address * of the Sentinel object that received the address update. */ releaseSentinelAddr(match->addr); From 14ee3418e57ae7cb0c5d21aba23789189155a647 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 5 Jul 2016 15:18:40 +0200 Subject: [PATCH 0041/1722] redis-cli: check SELECT reply type just in state updated. In issues #3361 / #3365 a problem was reported / fixed with redis-cli not updating correctly the current DB on error after SELECT. In theory this bug was fixed in 0042fb0e, but actually the commit only fixed the prompt updating, not the fact the state was set in a wrong way. This commit removes the check in the prompt update, now that hopefully it is the state that is correct, there is no longer need for this check. --- src/redis-cli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 070388380..6aacecc77 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -161,7 +161,7 @@ static void cliRefreshPrompt(void) { len = anetFormatAddr(config.prompt, sizeof(config.prompt), config.hostip, config.hostport); /* Add [dbnum] if needed */ - if (config.dbnum != 0 && config.last_cmd_type != REDIS_REPLY_ERROR) + if (config.dbnum != 0) len += snprintf(config.prompt+len,sizeof(config.prompt)-len,"[%d]", config.dbnum); snprintf(config.prompt+len,sizeof(config.prompt)-len,"> "); From d1f5aa7f8706bd62e89555f3f64dfd2c1d15a45a Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 11:43:33 +0200 Subject: [PATCH 0042/1722] getLongLongFromObject: use string2ll() instead of strict_strtoll(). strict_strtoll() has a bug that reports the empty string as ok and parses it as zero. Apparently nobody ever replaced this old call with the faster/saner string2ll() which is used otherwise in the rest of the Redis core. This commit close #3333. --- src/object.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/object.c b/src/object.c index 447e5fc30..ad29c1bd2 100644 --- a/src/object.c +++ b/src/object.c @@ -619,20 +619,6 @@ int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, cons return C_OK; } -/* Helper function for getLongLongFromObject(). The function parses the string - * as a long long value in a strict way (no spaces before/after). On success - * C_OK is returned, otherwise C_ERR is returned. */ -int strict_strtoll(char *str, long long *vp) { - char *eptr; - long long value; - - errno = 0; - value = strtoll(str, &eptr, 10); - if (isspace(str[0]) || eptr[0] != '\0' || errno == ERANGE) return C_ERR; - if (vp) *vp = value; - return C_OK; -} - int getLongLongFromObject(robj *o, long long *target) { long long value; @@ -641,7 +627,7 @@ int getLongLongFromObject(robj *o, long long *target) { } else { serverAssertWithInfo(NULL,o,o->type == OBJ_STRING); if (sdsEncodedObject(o)) { - if (strict_strtoll(o->ptr,&value) == C_ERR) return C_ERR; + if (string2ll(o->ptr,sdslen(o->ptr),&value) == 0) return C_ERR; } else if (o->encoding == OBJ_ENCODING_INT) { value = (long)o->ptr; } else { From 669db62ae1e699805c3acd47747428037850303b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 11:50:13 +0200 Subject: [PATCH 0043/1722] Regression test for issue #3333. --- tests/unit/expire.tcl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index ff3dacb33..0a50dd31b 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -198,4 +198,10 @@ start_server {tags {"expire"}} { r set foo b lsort [r keys *] } {a e foo s t} + + test {EXPIRE with empty string as TTL should report an error} { + r set foo bar + catch {r expire foo ""} e + set e + } {*not an integer*} } From 262bc9d0950c4341f9b88b88502886f0b4bf2a9b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 12:24:45 +0200 Subject: [PATCH 0044/1722] Generate Makefile.dep at every build. Normally we used to update it from time to time. Too fragile... better to generate dependencies at every run and delete them on 'make clean'. --- .gitignore | 1 + src/Makefile | 15 ++-- src/Makefile.dep | 186 ----------------------------------------------- 3 files changed, 7 insertions(+), 195 deletions(-) delete mode 100644 src/Makefile.dep diff --git a/.gitignore b/.gitignore index 3d346fbcf..a188cfc82 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ deps/lua/src/liblua.a .make-* .prerequisites *.dSYM +Makefile.dep diff --git a/src/Makefile b/src/Makefile index 89355984c..5c2ef9f25 100644 --- a/src/Makefile +++ b/src/Makefile @@ -142,16 +142,13 @@ all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCH @echo "Hint: It's a good idea to run 'make test' ;)" @echo "" +Makefile.dep: + -$(REDIS_CC) -MM *.c > Makefile.dep 2> /dev/null || true + +-include Makefile.dep + .PHONY: all -# Deps (use make dep to generate this) -include Makefile.dep - -dep: - $(REDIS_CC) -MM *.c > Makefile.dep - -.PHONY: dep - persist-settings: distclean echo STD=$(STD) >> .make-settings echo WARN=$(WARN) >> .make-settings @@ -211,7 +208,7 @@ $(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ) $(REDIS_CC) -c $< clean: - rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html + rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html Makefile.dep .PHONY: clean diff --git a/src/Makefile.dep b/src/Makefile.dep deleted file mode 100644 index 90784a2e7..000000000 --- a/src/Makefile.dep +++ /dev/null @@ -1,186 +0,0 @@ -adlist.o: adlist.c adlist.h zmalloc.h -ae.o: ae.c ae.h zmalloc.h config.h ae_kqueue.c ae_epoll.c ae_select.c ae_evport.c -ae_epoll.o: ae_epoll.c -ae_evport.o: ae_evport.c -ae_kqueue.o: ae_kqueue.c -ae_select.o: ae_select.c -anet.o: anet.c fmacros.h anet.h -aof.o: aof.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - bio.h -bio.o: bio.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - bio.h -bitops.o: bitops.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -blocked.o: blocked.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -cluster.o: cluster.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - cluster.h -config.o: config.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - cluster.h -crc16.o: crc16.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -crc64.o: crc64.c -db.o: db.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - cluster.h atomicvar.h -debug.o: debug.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - bio.h -dict.o: dict.c fmacros.h dict.h zmalloc.h redisassert.h -endianconv.o: endianconv.c -geo.o: geo.c geo.h server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - ../deps/geohash-int/geohash_helper.h ../deps/geohash-int/geohash.h -hyperloglog.o: hyperloglog.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -intset.o: intset.c intset.h zmalloc.h endianconv.h config.h -latency.o: latency.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -lazyfree.o: lazyfree.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - bio.h atomicvar.h cluster.h -lzf_c.o: lzf_c.c lzfP.h -lzf_d.o: lzf_d.c lzfP.h -memtest.o: memtest.c config.h -multi.o: multi.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -networking.o: networking.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -notify.o: notify.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -object.o: object.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -pqsort.o: pqsort.c -pubsub.o: pubsub.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -quicklist.o: quicklist.c quicklist.h zmalloc.h ziplist.h util.h sds.h \ - lzf.h -rand.o: rand.c -rdb.o: rdb.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - lzf.h -redis-benchmark.o: redis-benchmark.c fmacros.h ../deps/hiredis/sds.h ae.h \ - ../deps/hiredis/hiredis.h adlist.h zmalloc.h -redis-check-aof.o: redis-check-aof.c fmacros.h config.h -redis-check-rdb.o: redis-check-rdb.c server.h fmacros.h config.h \ - solarisfixes.h ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h \ - sds.h dict.h adlist.h zmalloc.h anet.h ziplist.h intset.h version.h \ - util.h latency.h sparkline.h quicklist.h zipmap.h sha1.h endianconv.h \ - crc64.h rdb.h rio.h lzf.h -redis-cli.o: redis-cli.c fmacros.h version.h ../deps/hiredis/hiredis.h \ - ../deps/hiredis/sds.h zmalloc.h ../deps/linenoise/linenoise.h help.h \ - anet.h ae.h -release.o: release.c release.h version.h crc64.h -replication.o: replication.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -rio.o: rio.c fmacros.h rio.h sds.h util.h crc64.h config.h server.h \ - solarisfixes.h ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h \ - dict.h adlist.h zmalloc.h anet.h ziplist.h intset.h version.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h rdb.h -scripting.o: scripting.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - rand.h cluster.h ../deps/lua/src/lauxlib.h ../deps/lua/src/lua.h \ - ../deps/lua/src/lualib.h -sds.o: sds.c sds.h sdsalloc.h zmalloc.h -sentinel.o: sentinel.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - ../deps/hiredis/hiredis.h ../deps/hiredis/async.h \ - ../deps/hiredis/hiredis.h -server.o: server.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - cluster.h slowlog.h bio.h asciilogo.h -setproctitle.o: setproctitle.c -sha1.o: sha1.c solarisfixes.h sha1.h config.h -slowlog.o: slowlog.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - slowlog.h -sort.o: sort.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h \ - pqsort.h -sparkline.o: sparkline.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -syncio.o: syncio.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -t_hash.o: t_hash.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -t_list.o: t_list.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -t_set.o: t_set.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -t_string.o: t_string.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -t_zset.o: t_zset.c server.h fmacros.h config.h solarisfixes.h \ - ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \ - sparkline.h quicklist.h zipmap.h sha1.h endianconv.h crc64.h rdb.h rio.h -util.o: util.c fmacros.h util.h sds.h sha1.h -ziplist.o: ziplist.c zmalloc.h util.h sds.h ziplist.h endianconv.h \ - config.h redisassert.h -zipmap.o: zipmap.c zmalloc.h endianconv.h config.h -zmalloc.o: zmalloc.c config.h zmalloc.h atomicvar.h From d14ec7e97593e336e1b46687135a9b06a372f5c1 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 12:56:43 +0200 Subject: [PATCH 0045/1722] Makefile: don't build dependencies file for clean, distclean. --- src/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Makefile b/src/Makefile index 5c2ef9f25..480ebef63 100644 --- a/src/Makefile +++ b/src/Makefile @@ -16,6 +16,7 @@ release_hdr := $(shell sh -c './mkreleasehdr.sh') uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') OPTIMIZATION?=-O2 DEPENDENCY_TARGETS=hiredis linenoise lua geohash-int +NODEPS:=clean distclean # Default settings STD=-std=c99 -pedantic -DREDIS_STATIC='' @@ -145,7 +146,9 @@ all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCH Makefile.dep: -$(REDIS_CC) -MM *.c > Makefile.dep 2> /dev/null || true +ifeq (0, $(words $(findstring $(MAKECMDGOALS), $(NODEPS)))) -include Makefile.dep +endif .PHONY: all From 0df4deada40aa00916892e08ddbef472ff126893 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 15:24:06 +0200 Subject: [PATCH 0046/1722] Expire and LRU related code moved into different files. --- src/Makefile | 2 +- src/db.c | 128 -------------- src/object.c | 12 -- src/server.c | 492 --------------------------------------------------- src/server.h | 3 + 5 files changed, 4 insertions(+), 633 deletions(-) diff --git a/src/Makefile b/src/Makefile index 480ebef63..2ee01a463 100644 --- a/src/Makefile +++ b/src/Makefile @@ -128,7 +128,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o REDIS_GEOHASH_OBJ=../deps/geohash-int/geohash.o ../deps/geohash-int/geohash_helper.o REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o diff --git a/src/db.c b/src/db.c index 4db7d890f..03615fd34 100644 --- a/src/db.c +++ b/src/db.c @@ -1010,134 +1010,6 @@ int expireIfNeeded(redisDb *db, robj *key) { dbSyncDelete(db,key); } -/*----------------------------------------------------------------------------- - * Expires Commands - *----------------------------------------------------------------------------*/ - -/* This is the generic command implementation for EXPIRE, PEXPIRE, EXPIREAT - * and PEXPIREAT. Because the commad second argument may be relative or absolute - * the "basetime" argument is used to signal what the base time is (either 0 - * for *AT variants of the command, or the current time for relative expires). - * - * unit is either UNIT_SECONDS or UNIT_MILLISECONDS, and is only used for - * the argv[2] parameter. The basetime is always specified in milliseconds. */ -void expireGenericCommand(client *c, long long basetime, int unit) { - robj *key = c->argv[1], *param = c->argv[2]; - long long when; /* unix time in milliseconds when the key will expire. */ - - if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK) - return; - - if (unit == UNIT_SECONDS) when *= 1000; - when += basetime; - - /* No key, return zero. */ - if (lookupKeyWrite(c->db,key) == NULL) { - addReply(c,shared.czero); - return; - } - - /* EXPIRE with negative TTL, or EXPIREAT with a timestamp into the past - * should never be executed as a DEL when load the AOF or in the context - * of a slave instance. - * - * Instead we take the other branch of the IF statement setting an expire - * (possibly in the past) and wait for an explicit DEL from the master. */ - if (when <= mstime() && !server.loading && !server.masterhost) { - robj *aux; - - int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db,key) : - dbSyncDelete(c->db,key); - serverAssertWithInfo(c,key,deleted); - server.dirty++; - - /* Replicate/AOF this as an explicit DEL or UNLINK. */ - aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del; - rewriteClientCommandVector(c,2,aux,key); - signalModifiedKey(c->db,key); - notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id); - addReply(c, shared.cone); - return; - } else { - setExpire(c->db,key,when); - addReply(c,shared.cone); - signalModifiedKey(c->db,key); - notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id); - server.dirty++; - return; - } -} - -void expireCommand(client *c) { - expireGenericCommand(c,mstime(),UNIT_SECONDS); -} - -void expireatCommand(client *c) { - expireGenericCommand(c,0,UNIT_SECONDS); -} - -void pexpireCommand(client *c) { - expireGenericCommand(c,mstime(),UNIT_MILLISECONDS); -} - -void pexpireatCommand(client *c) { - expireGenericCommand(c,0,UNIT_MILLISECONDS); -} - -void ttlGenericCommand(client *c, int output_ms) { - long long expire, ttl = -1; - - /* If the key does not exist at all, return -2 */ - if (lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH) == NULL) { - addReplyLongLong(c,-2); - return; - } - /* The key exists. Return -1 if it has no expire, or the actual - * TTL value otherwise. */ - expire = getExpire(c->db,c->argv[1]); - if (expire != -1) { - ttl = expire-mstime(); - if (ttl < 0) ttl = 0; - } - if (ttl == -1) { - addReplyLongLong(c,-1); - } else { - addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000)); - } -} - -void ttlCommand(client *c) { - ttlGenericCommand(c, 0); -} - -void pttlCommand(client *c) { - ttlGenericCommand(c, 1); -} - -void persistCommand(client *c) { - dictEntry *de; - - de = dictFind(c->db->dict,c->argv[1]->ptr); - if (de == NULL) { - addReply(c,shared.czero); - } else { - if (removeExpire(c->db,c->argv[1])) { - addReply(c,shared.cone); - server.dirty++; - } else { - addReply(c,shared.czero); - } - } -} - -/* TOUCH key1 [key2 key3 ... keyN] */ -void touchCommand(client *c) { - int touched = 0; - for (int j = 1; j < c->argc; j++) - if (lookupKeyRead(c->db,c->argv[j]) != NULL) touched++; - addReplyLongLong(c,touched); -} - /* ----------------------------------------------------------------------------- * API to get key arguments from commands * ---------------------------------------------------------------------------*/ diff --git a/src/object.c b/src/object.c index ad29c1bd2..ec886f1f6 100644 --- a/src/object.c +++ b/src/object.c @@ -682,18 +682,6 @@ char *strEncoding(int encoding) { } } -/* Given an object returns the min number of milliseconds the object was never - * requested, using an approximated LRU algorithm. */ -unsigned long long estimateObjectIdleTime(robj *o) { - unsigned long long lruclock = LRU_CLOCK(); - if (lruclock >= o->lru) { - return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION; - } else { - return (lruclock + (LRU_CLOCK_MAX - o->lru)) * - LRU_CLOCK_RESOLUTION; - } -} - /* This is a helper function for the OBJECT command. We need to lookup keys * without any modification of LRU or other parameters. */ robj *objectCommandLookup(client *c, robj *key) { diff --git a/src/server.c b/src/server.c index 49150045a..4d6f9f1ab 100644 --- a/src/server.c +++ b/src/server.c @@ -738,186 +738,6 @@ void updateDictResizePolicy(void) { /* ======================= Cron: called every 100 ms ======================== */ -/* Helper function for the activeExpireCycle() function. - * This function will try to expire the key that is stored in the hash table - * entry 'de' of the 'expires' hash table of a Redis database. - * - * If the key is found to be expired, it is removed from the database and - * 1 is returned. Otherwise no operation is performed and 0 is returned. - * - * When a key is expired, server.stat_expiredkeys is incremented. - * - * The parameter 'now' is the current time in milliseconds as is passed - * to the function to avoid too many gettimeofday() syscalls. */ -int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) { - long long t = dictGetSignedIntegerVal(de); - if (now > t) { - sds key = dictGetKey(de); - robj *keyobj = createStringObject(key,sdslen(key)); - - propagateExpire(db,keyobj,server.lazyfree_lazy_expire); - if (server.lazyfree_lazy_expire) - dbAsyncDelete(db,keyobj); - else - dbSyncDelete(db,keyobj); - notifyKeyspaceEvent(NOTIFY_EXPIRED, - "expired",keyobj,db->id); - decrRefCount(keyobj); - server.stat_expiredkeys++; - return 1; - } else { - return 0; - } -} - -/* Try to expire a few timed out keys. The algorithm used is adaptive and - * will use few CPU cycles if there are few expiring keys, otherwise - * it will get more aggressive to avoid that too much memory is used by - * keys that can be removed from the keyspace. - * - * No more than CRON_DBS_PER_CALL databases are tested at every - * iteration. - * - * This kind of call is used when Redis detects that timelimit_exit is - * true, so there is more work to do, and we do it more incrementally from - * the beforeSleep() function of the event loop. - * - * Expire cycle type: - * - * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a - * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION - * microseconds, and is not repeated again before the same amount of time. - * - * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is - * executed, where the time limit is a percentage of the REDIS_HZ period - * as specified by the REDIS_EXPIRELOOKUPS_TIME_PERC define. */ - -void activeExpireCycle(int type) { - /* This function has some global state in order to continue the work - * incrementally across calls. */ - static unsigned int current_db = 0; /* Last DB tested. */ - static int timelimit_exit = 0; /* Time limit hit in previous call? */ - static long long last_fast_cycle = 0; /* When last fast cycle ran. */ - - int j, iteration = 0; - int dbs_per_call = CRON_DBS_PER_CALL; - long long start = ustime(), timelimit; - - if (type == ACTIVE_EXPIRE_CYCLE_FAST) { - /* Don't start a fast cycle if the previous cycle did not exited - * for time limt. Also don't repeat a fast cycle for the same period - * as the fast cycle total duration itself. */ - if (!timelimit_exit) return; - if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return; - last_fast_cycle = start; - } - - /* We usually should test CRON_DBS_PER_CALL per iteration, with - * two exceptions: - * - * 1) Don't test more DBs than we have. - * 2) If last time we hit the time limit, we want to scan all DBs - * in this iteration, as there is work to do in some DB and we don't want - * expired keys to use memory for too much time. */ - if (dbs_per_call > server.dbnum || timelimit_exit) - dbs_per_call = server.dbnum; - - /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time - * per iteration. Since this function gets called with a frequency of - * server.hz times per second, the following is the max amount of - * microseconds we can spend in this function. */ - timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/server.hz/100; - timelimit_exit = 0; - if (timelimit <= 0) timelimit = 1; - - if (type == ACTIVE_EXPIRE_CYCLE_FAST) - timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */ - - for (j = 0; j < dbs_per_call; j++) { - int expired; - redisDb *db = server.db+(current_db % server.dbnum); - - /* Increment the DB now so we are sure if we run out of time - * in the current DB we'll restart from the next. This allows to - * distribute the time evenly across DBs. */ - current_db++; - - /* Continue to expire if at the end of the cycle more than 25% - * of the keys were expired. */ - do { - unsigned long num, slots; - long long now, ttl_sum; - int ttl_samples; - - /* If there is nothing to expire try next DB ASAP. */ - if ((num = dictSize(db->expires)) == 0) { - db->avg_ttl = 0; - break; - } - slots = dictSlots(db->expires); - now = mstime(); - - /* When there are less than 1% filled slots getting random - * keys is expensive, so stop here waiting for better times... - * The dictionary will be resized asap. */ - if (num && slots > DICT_HT_INITIAL_SIZE && - (num*100/slots < 1)) break; - - /* The main collection cycle. Sample random keys among keys - * with an expire set, checking for expired ones. */ - expired = 0; - ttl_sum = 0; - ttl_samples = 0; - - if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP) - num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP; - - while (num--) { - dictEntry *de; - long long ttl; - - if ((de = dictGetRandomKey(db->expires)) == NULL) break; - ttl = dictGetSignedIntegerVal(de)-now; - if (activeExpireCycleTryExpire(db,de,now)) expired++; - if (ttl > 0) { - /* We want the average TTL of keys yet not expired. */ - ttl_sum += ttl; - ttl_samples++; - } - } - - /* Update the average TTL stats for this database. */ - if (ttl_samples) { - long long avg_ttl = ttl_sum/ttl_samples; - - /* Do a simple running average with a few samples. - * We just use the current estimate with a weight of 2% - * and the previous estimate with a weight of 98%. */ - if (db->avg_ttl == 0) db->avg_ttl = avg_ttl; - db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50); - } - - /* We can't block forever here even if there are many keys to - * expire. So after a given amount of milliseconds return to the - * caller waiting for the other active expire cycle. */ - iteration++; - if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */ - long long elapsed = ustime()-start; - - latencyAddSampleIfNeeded("expire-cycle",elapsed/1000); - if (elapsed > timelimit) timelimit_exit = 1; - } - if (timelimit_exit) return; - /* We don't repeat the cycle if there are less than 25% of keys - * found expired in the current DB. */ - } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4); - } -} - -unsigned int getLRUClock(void) { - return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX; -} - /* Add a sample to the operations per second array of samples. */ void trackInstantaneousMetric(int metric, long long current_reading) { long long t = mstime() - server.inst_metric[metric].last_sample_time; @@ -3339,318 +3159,6 @@ void monitorCommand(client *c) { addReply(c,shared.ok); } -/* ============================ Maxmemory directive ======================== */ - -/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config - * file to limit the max memory used by the server, before processing a - * command. - * - * The goal of the function is to free enough memory to keep Redis under the - * configured memory limit. - * - * The function starts calculating how many bytes should be freed to keep - * Redis under the limit, and enters a loop selecting the best keys to - * evict accordingly to the configured policy. - * - * If all the bytes needed to return back under the limit were freed the - * function returns C_OK, otherwise C_ERR is returned, and the caller - * should block the execution of commands that will result in more memory - * used by the server. - * - * ------------------------------------------------------------------------ - * - * LRU approximation algorithm - * - * Redis uses an approximation of the LRU algorithm that runs in constant - * memory. Every time there is a key to expire, we sample N keys (with - * N very small, usually in around 5) to populate a pool of best keys to - * evict of M keys (the pool size is defined by MAXMEMORY_EVICTION_POOL_SIZE). - * - * The N keys sampled are added in the pool of good keys to expire (the one - * with an old access time) if they are better than one of the current keys - * in the pool. - * - * After the pool is populated, the best key we have in the pool is expired. - * However note that we don't remove keys from the pool when they are deleted - * so the pool may contain keys that no longer exist. - * - * When we try to evict a key, and all the entries in the pool don't exist - * we populate it again. This time we'll be sure that the pool has at least - * one key that can be evicted, if there is at least one key that can be - * evicted in the whole database. */ - -/* Create a new eviction pool. */ -struct evictionPoolEntry *evictionPoolAlloc(void) { - struct evictionPoolEntry *ep; - int j; - - ep = zmalloc(sizeof(*ep)*MAXMEMORY_EVICTION_POOL_SIZE); - for (j = 0; j < MAXMEMORY_EVICTION_POOL_SIZE; j++) { - ep[j].idle = 0; - ep[j].key = NULL; - } - return ep; -} - -/* This is an helper function for freeMemoryIfNeeded(), it is used in order - * to populate the evictionPool with a few entries every time we want to - * expire a key. Keys with idle time smaller than one of the current - * keys are added. Keys are always added if there are free entries. - * - * We insert keys on place in ascending order, so keys with the smaller - * idle time are on the left, and keys with the higher idle time on the - * right. */ - -#define EVICTION_SAMPLES_ARRAY_SIZE 16 -void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) { - int j, k, count; - dictEntry *_samples[EVICTION_SAMPLES_ARRAY_SIZE]; - dictEntry **samples; - - /* Try to use a static buffer: this function is a big hit... - * Note: it was actually measured that this helps. */ - if (server.maxmemory_samples <= EVICTION_SAMPLES_ARRAY_SIZE) { - samples = _samples; - } else { - samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples); - } - - count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples); - for (j = 0; j < count; j++) { - unsigned long long idle; - sds key; - robj *o; - dictEntry *de; - - de = samples[j]; - key = dictGetKey(de); - /* If the dictionary we are sampling from is not the main - * dictionary (but the expires one) we need to lookup the key - * again in the key dictionary to obtain the value object. */ - if (sampledict != keydict) de = dictFind(keydict, key); - o = dictGetVal(de); - idle = estimateObjectIdleTime(o); - - /* Insert the element inside the pool. - * First, find the first empty bucket or the first populated - * bucket that has an idle time smaller than our idle time. */ - k = 0; - while (k < MAXMEMORY_EVICTION_POOL_SIZE && - pool[k].key && - pool[k].idle < idle) k++; - if (k == 0 && pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key != NULL) { - /* Can't insert if the element is < the worst element we have - * and there are no empty buckets. */ - continue; - } else if (k < MAXMEMORY_EVICTION_POOL_SIZE && pool[k].key == NULL) { - /* Inserting into empty position. No setup needed before insert. */ - } else { - /* Inserting in the middle. Now k points to the first element - * greater than the element to insert. */ - if (pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key == NULL) { - /* Free space on the right? Insert at k shifting - * all the elements from k to end to the right. */ - memmove(pool+k+1,pool+k, - sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1)); - } else { - /* No free space on right? Insert at k-1 */ - k--; - /* Shift all elements on the left of k (included) to the - * left, so we discard the element with smaller idle time. */ - sdsfree(pool[0].key); - memmove(pool,pool+1,sizeof(pool[0])*k); - } - } - pool[k].key = sdsdup(key); - pool[k].idle = idle; - } - if (samples != _samples) zfree(samples); -} - -int freeMemoryIfNeeded(void) { - size_t mem_reported, mem_used, mem_tofree, mem_freed; - int slaves = listLength(server.slaves); - mstime_t latency, eviction_latency; - long long delta; - - /* Check if we are over the memory usage limit. If we are not, no need - * to subtract the slaves output buffers. We can just return ASAP. */ - mem_reported = zmalloc_used_memory(); - if (mem_reported <= server.maxmemory) return C_OK; - - /* Remove the size of slaves output buffers and AOF buffer from the - * count of used memory. */ - mem_used = mem_reported; - if (slaves) { - listIter li; - listNode *ln; - - listRewind(server.slaves,&li); - while((ln = listNext(&li))) { - client *slave = listNodeValue(ln); - unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave); - if (obuf_bytes > mem_used) - mem_used = 0; - else - mem_used -= obuf_bytes; - } - } - if (server.aof_state != AOF_OFF) { - mem_used -= sdslen(server.aof_buf); - mem_used -= aofRewriteBufferSize(); - } - - /* Check if we are still over the memory limit. */ - if (mem_used <= server.maxmemory) return C_OK; - - /* Compute how much memory we need to free. */ - mem_tofree = mem_used - server.maxmemory; - mem_freed = 0; - - if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) - goto cant_free; /* We need to free memory, but policy forbids. */ - - latencyStartMonitor(latency); - while (mem_freed < mem_tofree) { - int j, k, keys_freed = 0; - - for (j = 0; j < server.dbnum; j++) { - long bestval = 0; /* just to prevent warning */ - sds bestkey = NULL; - dictEntry *de; - redisDb *db = server.db+j; - dict *dict; - - if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || - server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) - { - dict = server.db[j].dict; - } else { - dict = server.db[j].expires; - } - if (dictSize(dict) == 0) continue; - - /* volatile-random and allkeys-random policy */ - if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM || - server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM) - { - de = dictGetRandomKey(dict); - bestkey = dictGetKey(de); - } - - /* volatile-lru and allkeys-lru policy */ - else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || - server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU) - { - struct evictionPoolEntry *pool = db->eviction_pool; - - while(bestkey == NULL) { - evictionPoolPopulate(dict, db->dict, db->eviction_pool); - /* Go backward from best to worst element to evict. */ - for (k = MAXMEMORY_EVICTION_POOL_SIZE-1; k >= 0; k--) { - if (pool[k].key == NULL) continue; - de = dictFind(dict,pool[k].key); - - /* Remove the entry from the pool. */ - sdsfree(pool[k].key); - /* Shift all elements on its right to left. */ - memmove(pool+k,pool+k+1, - sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1)); - /* Clear the element on the right which is empty - * since we shifted one position to the left. */ - pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key = NULL; - pool[MAXMEMORY_EVICTION_POOL_SIZE-1].idle = 0; - - /* If the key exists, is our pick. Otherwise it is - * a ghost and we need to try the next element. */ - if (de) { - bestkey = dictGetKey(de); - break; - } else { - /* Ghost... */ - continue; - } - } - } - } - - /* volatile-ttl */ - else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { - for (k = 0; k < server.maxmemory_samples; k++) { - sds thiskey; - long thisval; - - de = dictGetRandomKey(dict); - thiskey = dictGetKey(de); - thisval = (long) dictGetVal(de); - - /* Expire sooner (minor expire unix timestamp) is better - * candidate for deletion */ - if (bestkey == NULL || thisval < bestval) { - bestkey = thiskey; - bestval = thisval; - } - } - } - - /* Finally remove the selected key. */ - if (bestkey) { - robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); - propagateExpire(db,keyobj,server.lazyfree_lazy_eviction); - /* We compute the amount of memory freed by db*Delete() alone. - * It is possible that actually the memory needed to propagate - * the DEL in AOF and replication link is greater than the one - * we are freeing removing the key, but we can't account for - * that otherwise we would never exit the loop. - * - * AOF and Output buffer memory will be freed eventually so - * we only care about memory used by the key space. */ - delta = (long long) zmalloc_used_memory(); - latencyStartMonitor(eviction_latency); - if (server.lazyfree_lazy_eviction) - dbAsyncDelete(db,keyobj); - else - dbSyncDelete(db,keyobj); - latencyEndMonitor(eviction_latency); - latencyAddSampleIfNeeded("eviction-del",eviction_latency); - latencyRemoveNestedEvent(latency,eviction_latency); - delta -= (long long) zmalloc_used_memory(); - mem_freed += delta; - server.stat_evictedkeys++; - notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted", - keyobj, db->id); - decrRefCount(keyobj); - keys_freed++; - - /* When the memory to free starts to be big enough, we may - * start spending so much time here that is impossible to - * deliver data to the slaves fast enough, so we force the - * transmission here inside the loop. */ - if (slaves) flushSlavesOutputBuffers(); - } - } - if (!keys_freed) { - latencyEndMonitor(latency); - latencyAddSampleIfNeeded("eviction-cycle",latency); - goto cant_free; /* nothing to free... */ - } - } - latencyEndMonitor(latency); - latencyAddSampleIfNeeded("eviction-cycle",latency); - return C_OK; - -cant_free: - /* We are here if we are not able to reclaim memory. There is only one - * last thing we can try: check if the lazyfree thread has jobs in queue - * and wait... */ - while(bioPendingJobsOfType(BIO_LAZY_FREE)) { - if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree) - break; - usleep(1000); - } - return C_ERR; -} - /* =================================== Main! ================================ */ #ifdef __linux__ diff --git a/src/server.h b/src/server.h index 3ad196462..a238f41e8 100644 --- a/src/server.h +++ b/src/server.h @@ -1613,6 +1613,9 @@ void replyToBlockedClientTimedOut(client *c); int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int unit); void disconnectAllBlockedClients(void); +/* expire.c -- Handling of expired keys */ +void activeExpireCycle(int type); + /* Git SHA1 */ char *redisGitSHA1(void); char *redisGitDirty(void); From 0ed805f7f4826ff5af54a1df6dab32f15a74af9b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 15:28:18 +0200 Subject: [PATCH 0047/1722] Add expire.c and evict.c. --- src/evict.c | 364 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/expire.c | 354 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 718 insertions(+) create mode 100644 src/evict.c create mode 100644 src/expire.c diff --git a/src/evict.c b/src/evict.c new file mode 100644 index 000000000..c35b10b8f --- /dev/null +++ b/src/evict.c @@ -0,0 +1,364 @@ +/* Maxmemory directive handling (LRU eviction and other policies). + * + * ---------------------------------------------------------------------------- + * + * Copyright (c) 2009-2016, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include "bio.h" + +/* Return the LRU clock, based on the clock resolution. This is a time + * in a reduced-bits format that can be used to set and check the + * object->lru field of redisObject structures. */ +unsigned int getLRUClock(void) { + return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX; +} + +/* Given an object returns the min number of milliseconds the object was never + * requested, using an approximated LRU algorithm. */ +unsigned long long estimateObjectIdleTime(robj *o) { + unsigned long long lruclock = LRU_CLOCK(); + if (lruclock >= o->lru) { + return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION; + } else { + return (lruclock + (LRU_CLOCK_MAX - o->lru)) * + LRU_CLOCK_RESOLUTION; + } +} + +/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config + * file to limit the max memory used by the server, before processing a + * command. + * + * The goal of the function is to free enough memory to keep Redis under the + * configured memory limit. + * + * The function starts calculating how many bytes should be freed to keep + * Redis under the limit, and enters a loop selecting the best keys to + * evict accordingly to the configured policy. + * + * If all the bytes needed to return back under the limit were freed the + * function returns C_OK, otherwise C_ERR is returned, and the caller + * should block the execution of commands that will result in more memory + * used by the server. + * + * ------------------------------------------------------------------------ + * + * LRU approximation algorithm + * + * Redis uses an approximation of the LRU algorithm that runs in constant + * memory. Every time there is a key to expire, we sample N keys (with + * N very small, usually in around 5) to populate a pool of best keys to + * evict of M keys (the pool size is defined by MAXMEMORY_EVICTION_POOL_SIZE). + * + * The N keys sampled are added in the pool of good keys to expire (the one + * with an old access time) if they are better than one of the current keys + * in the pool. + * + * After the pool is populated, the best key we have in the pool is expired. + * However note that we don't remove keys from the pool when they are deleted + * so the pool may contain keys that no longer exist. + * + * When we try to evict a key, and all the entries in the pool don't exist + * we populate it again. This time we'll be sure that the pool has at least + * one key that can be evicted, if there is at least one key that can be + * evicted in the whole database. */ + +/* Create a new eviction pool. */ +struct evictionPoolEntry *evictionPoolAlloc(void) { + struct evictionPoolEntry *ep; + int j; + + ep = zmalloc(sizeof(*ep)*MAXMEMORY_EVICTION_POOL_SIZE); + for (j = 0; j < MAXMEMORY_EVICTION_POOL_SIZE; j++) { + ep[j].idle = 0; + ep[j].key = NULL; + } + return ep; +} + +/* This is an helper function for freeMemoryIfNeeded(), it is used in order + * to populate the evictionPool with a few entries every time we want to + * expire a key. Keys with idle time smaller than one of the current + * keys are added. Keys are always added if there are free entries. + * + * We insert keys on place in ascending order, so keys with the smaller + * idle time are on the left, and keys with the higher idle time on the + * right. */ + +#define EVICTION_SAMPLES_ARRAY_SIZE 16 +void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) { + int j, k, count; + dictEntry *_samples[EVICTION_SAMPLES_ARRAY_SIZE]; + dictEntry **samples; + + /* Try to use a static buffer: this function is a big hit... + * Note: it was actually measured that this helps. */ + if (server.maxmemory_samples <= EVICTION_SAMPLES_ARRAY_SIZE) { + samples = _samples; + } else { + samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples); + } + + count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples); + for (j = 0; j < count; j++) { + unsigned long long idle; + sds key; + robj *o; + dictEntry *de; + + de = samples[j]; + key = dictGetKey(de); + /* If the dictionary we are sampling from is not the main + * dictionary (but the expires one) we need to lookup the key + * again in the key dictionary to obtain the value object. */ + if (sampledict != keydict) de = dictFind(keydict, key); + o = dictGetVal(de); + idle = estimateObjectIdleTime(o); + + /* Insert the element inside the pool. + * First, find the first empty bucket or the first populated + * bucket that has an idle time smaller than our idle time. */ + k = 0; + while (k < MAXMEMORY_EVICTION_POOL_SIZE && + pool[k].key && + pool[k].idle < idle) k++; + if (k == 0 && pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key != NULL) { + /* Can't insert if the element is < the worst element we have + * and there are no empty buckets. */ + continue; + } else if (k < MAXMEMORY_EVICTION_POOL_SIZE && pool[k].key == NULL) { + /* Inserting into empty position. No setup needed before insert. */ + } else { + /* Inserting in the middle. Now k points to the first element + * greater than the element to insert. */ + if (pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key == NULL) { + /* Free space on the right? Insert at k shifting + * all the elements from k to end to the right. */ + memmove(pool+k+1,pool+k, + sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1)); + } else { + /* No free space on right? Insert at k-1 */ + k--; + /* Shift all elements on the left of k (included) to the + * left, so we discard the element with smaller idle time. */ + sdsfree(pool[0].key); + memmove(pool,pool+1,sizeof(pool[0])*k); + } + } + pool[k].key = sdsdup(key); + pool[k].idle = idle; + } + if (samples != _samples) zfree(samples); +} + +int freeMemoryIfNeeded(void) { + size_t mem_reported, mem_used, mem_tofree, mem_freed; + int slaves = listLength(server.slaves); + mstime_t latency, eviction_latency; + long long delta; + + /* Check if we are over the memory usage limit. If we are not, no need + * to subtract the slaves output buffers. We can just return ASAP. */ + mem_reported = zmalloc_used_memory(); + if (mem_reported <= server.maxmemory) return C_OK; + + /* Remove the size of slaves output buffers and AOF buffer from the + * count of used memory. */ + mem_used = mem_reported; + if (slaves) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + client *slave = listNodeValue(ln); + unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave); + if (obuf_bytes > mem_used) + mem_used = 0; + else + mem_used -= obuf_bytes; + } + } + if (server.aof_state != AOF_OFF) { + mem_used -= sdslen(server.aof_buf); + mem_used -= aofRewriteBufferSize(); + } + + /* Check if we are still over the memory limit. */ + if (mem_used <= server.maxmemory) return C_OK; + + /* Compute how much memory we need to free. */ + mem_tofree = mem_used - server.maxmemory; + mem_freed = 0; + + if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) + goto cant_free; /* We need to free memory, but policy forbids. */ + + latencyStartMonitor(latency); + while (mem_freed < mem_tofree) { + int j, k, keys_freed = 0; + + for (j = 0; j < server.dbnum; j++) { + long bestval = 0; /* just to prevent warning */ + sds bestkey = NULL; + dictEntry *de; + redisDb *db = server.db+j; + dict *dict; + + if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) + { + dict = server.db[j].dict; + } else { + dict = server.db[j].expires; + } + if (dictSize(dict) == 0) continue; + + /* volatile-random and allkeys-random policy */ + if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM || + server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM) + { + de = dictGetRandomKey(dict); + bestkey = dictGetKey(de); + } + + /* volatile-lru and allkeys-lru policy */ + else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU) + { + struct evictionPoolEntry *pool = db->eviction_pool; + + while(bestkey == NULL) { + evictionPoolPopulate(dict, db->dict, db->eviction_pool); + /* Go backward from best to worst element to evict. */ + for (k = MAXMEMORY_EVICTION_POOL_SIZE-1; k >= 0; k--) { + if (pool[k].key == NULL) continue; + de = dictFind(dict,pool[k].key); + + /* Remove the entry from the pool. */ + sdsfree(pool[k].key); + /* Shift all elements on its right to left. */ + memmove(pool+k,pool+k+1, + sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1)); + /* Clear the element on the right which is empty + * since we shifted one position to the left. */ + pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key = NULL; + pool[MAXMEMORY_EVICTION_POOL_SIZE-1].idle = 0; + + /* If the key exists, is our pick. Otherwise it is + * a ghost and we need to try the next element. */ + if (de) { + bestkey = dictGetKey(de); + break; + } else { + /* Ghost... */ + continue; + } + } + } + } + + /* volatile-ttl */ + else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { + for (k = 0; k < server.maxmemory_samples; k++) { + sds thiskey; + long thisval; + + de = dictGetRandomKey(dict); + thiskey = dictGetKey(de); + thisval = (long) dictGetVal(de); + + /* Expire sooner (minor expire unix timestamp) is better + * candidate for deletion */ + if (bestkey == NULL || thisval < bestval) { + bestkey = thiskey; + bestval = thisval; + } + } + } + + /* Finally remove the selected key. */ + if (bestkey) { + robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); + propagateExpire(db,keyobj,server.lazyfree_lazy_eviction); + /* We compute the amount of memory freed by db*Delete() alone. + * It is possible that actually the memory needed to propagate + * the DEL in AOF and replication link is greater than the one + * we are freeing removing the key, but we can't account for + * that otherwise we would never exit the loop. + * + * AOF and Output buffer memory will be freed eventually so + * we only care about memory used by the key space. */ + delta = (long long) zmalloc_used_memory(); + latencyStartMonitor(eviction_latency); + if (server.lazyfree_lazy_eviction) + dbAsyncDelete(db,keyobj); + else + dbSyncDelete(db,keyobj); + latencyEndMonitor(eviction_latency); + latencyAddSampleIfNeeded("eviction-del",eviction_latency); + latencyRemoveNestedEvent(latency,eviction_latency); + delta -= (long long) zmalloc_used_memory(); + mem_freed += delta; + server.stat_evictedkeys++; + notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted", + keyobj, db->id); + decrRefCount(keyobj); + keys_freed++; + + /* When the memory to free starts to be big enough, we may + * start spending so much time here that is impossible to + * deliver data to the slaves fast enough, so we force the + * transmission here inside the loop. */ + if (slaves) flushSlavesOutputBuffers(); + } + } + if (!keys_freed) { + latencyEndMonitor(latency); + latencyAddSampleIfNeeded("eviction-cycle",latency); + goto cant_free; /* nothing to free... */ + } + } + latencyEndMonitor(latency); + latencyAddSampleIfNeeded("eviction-cycle",latency); + return C_OK; + +cant_free: + /* We are here if we are not able to reclaim memory. There is only one + * last thing we can try: check if the lazyfree thread has jobs in queue + * and wait... */ + while(bioPendingJobsOfType(BIO_LAZY_FREE)) { + if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree) + break; + usleep(1000); + } + return C_ERR; +} + diff --git a/src/expire.c b/src/expire.c new file mode 100644 index 000000000..ccfa959ef --- /dev/null +++ b/src/expire.c @@ -0,0 +1,354 @@ +/* Implementation of EXPIRE (keys with fixed time to live). + * + * ---------------------------------------------------------------------------- + * + * Copyright (c) 2009-2016, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" + +/*----------------------------------------------------------------------------- + * Incremental collection of expired keys. + * + * When keys are accessed they are expired on-access. However we need a + * mechanism in order to ensure keys are eventually removed when expired even + * if no access is performed on them. + *----------------------------------------------------------------------------*/ + +/* Helper function for the activeExpireCycle() function. + * This function will try to expire the key that is stored in the hash table + * entry 'de' of the 'expires' hash table of a Redis database. + * + * If the key is found to be expired, it is removed from the database and + * 1 is returned. Otherwise no operation is performed and 0 is returned. + * + * When a key is expired, server.stat_expiredkeys is incremented. + * + * The parameter 'now' is the current time in milliseconds as is passed + * to the function to avoid too many gettimeofday() syscalls. */ +int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) { + long long t = dictGetSignedIntegerVal(de); + if (now > t) { + sds key = dictGetKey(de); + robj *keyobj = createStringObject(key,sdslen(key)); + + propagateExpire(db,keyobj,server.lazyfree_lazy_expire); + if (server.lazyfree_lazy_expire) + dbAsyncDelete(db,keyobj); + else + dbSyncDelete(db,keyobj); + notifyKeyspaceEvent(NOTIFY_EXPIRED, + "expired",keyobj,db->id); + decrRefCount(keyobj); + server.stat_expiredkeys++; + return 1; + } else { + return 0; + } +} + +/* Try to expire a few timed out keys. The algorithm used is adaptive and + * will use few CPU cycles if there are few expiring keys, otherwise + * it will get more aggressive to avoid that too much memory is used by + * keys that can be removed from the keyspace. + * + * No more than CRON_DBS_PER_CALL databases are tested at every + * iteration. + * + * This kind of call is used when Redis detects that timelimit_exit is + * true, so there is more work to do, and we do it more incrementally from + * the beforeSleep() function of the event loop. + * + * Expire cycle type: + * + * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a + * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION + * microseconds, and is not repeated again before the same amount of time. + * + * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is + * executed, where the time limit is a percentage of the REDIS_HZ period + * as specified by the REDIS_EXPIRELOOKUPS_TIME_PERC define. */ + +void activeExpireCycle(int type) { + /* This function has some global state in order to continue the work + * incrementally across calls. */ + static unsigned int current_db = 0; /* Last DB tested. */ + static int timelimit_exit = 0; /* Time limit hit in previous call? */ + static long long last_fast_cycle = 0; /* When last fast cycle ran. */ + + int j, iteration = 0; + int dbs_per_call = CRON_DBS_PER_CALL; + long long start = ustime(), timelimit; + + if (type == ACTIVE_EXPIRE_CYCLE_FAST) { + /* Don't start a fast cycle if the previous cycle did not exited + * for time limt. Also don't repeat a fast cycle for the same period + * as the fast cycle total duration itself. */ + if (!timelimit_exit) return; + if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return; + last_fast_cycle = start; + } + + /* We usually should test CRON_DBS_PER_CALL per iteration, with + * two exceptions: + * + * 1) Don't test more DBs than we have. + * 2) If last time we hit the time limit, we want to scan all DBs + * in this iteration, as there is work to do in some DB and we don't want + * expired keys to use memory for too much time. */ + if (dbs_per_call > server.dbnum || timelimit_exit) + dbs_per_call = server.dbnum; + + /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time + * per iteration. Since this function gets called with a frequency of + * server.hz times per second, the following is the max amount of + * microseconds we can spend in this function. */ + timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/server.hz/100; + timelimit_exit = 0; + if (timelimit <= 0) timelimit = 1; + + if (type == ACTIVE_EXPIRE_CYCLE_FAST) + timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */ + + for (j = 0; j < dbs_per_call; j++) { + int expired; + redisDb *db = server.db+(current_db % server.dbnum); + + /* Increment the DB now so we are sure if we run out of time + * in the current DB we'll restart from the next. This allows to + * distribute the time evenly across DBs. */ + current_db++; + + /* Continue to expire if at the end of the cycle more than 25% + * of the keys were expired. */ + do { + unsigned long num, slots; + long long now, ttl_sum; + int ttl_samples; + + /* If there is nothing to expire try next DB ASAP. */ + if ((num = dictSize(db->expires)) == 0) { + db->avg_ttl = 0; + break; + } + slots = dictSlots(db->expires); + now = mstime(); + + /* When there are less than 1% filled slots getting random + * keys is expensive, so stop here waiting for better times... + * The dictionary will be resized asap. */ + if (num && slots > DICT_HT_INITIAL_SIZE && + (num*100/slots < 1)) break; + + /* The main collection cycle. Sample random keys among keys + * with an expire set, checking for expired ones. */ + expired = 0; + ttl_sum = 0; + ttl_samples = 0; + + if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP) + num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP; + + while (num--) { + dictEntry *de; + long long ttl; + + if ((de = dictGetRandomKey(db->expires)) == NULL) break; + ttl = dictGetSignedIntegerVal(de)-now; + if (activeExpireCycleTryExpire(db,de,now)) expired++; + if (ttl > 0) { + /* We want the average TTL of keys yet not expired. */ + ttl_sum += ttl; + ttl_samples++; + } + } + + /* Update the average TTL stats for this database. */ + if (ttl_samples) { + long long avg_ttl = ttl_sum/ttl_samples; + + /* Do a simple running average with a few samples. + * We just use the current estimate with a weight of 2% + * and the previous estimate with a weight of 98%. */ + if (db->avg_ttl == 0) db->avg_ttl = avg_ttl; + db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50); + } + + /* We can't block forever here even if there are many keys to + * expire. So after a given amount of milliseconds return to the + * caller waiting for the other active expire cycle. */ + iteration++; + if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */ + long long elapsed = ustime()-start; + + latencyAddSampleIfNeeded("expire-cycle",elapsed/1000); + if (elapsed > timelimit) timelimit_exit = 1; + } + if (timelimit_exit) return; + /* We don't repeat the cycle if there are less than 25% of keys + * found expired in the current DB. */ + } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4); + } +} + +/*----------------------------------------------------------------------------- + * Expires Commands + *----------------------------------------------------------------------------*/ + +/* This is the generic command implementation for EXPIRE, PEXPIRE, EXPIREAT + * and PEXPIREAT. Because the commad second argument may be relative or absolute + * the "basetime" argument is used to signal what the base time is (either 0 + * for *AT variants of the command, or the current time for relative expires). + * + * unit is either UNIT_SECONDS or UNIT_MILLISECONDS, and is only used for + * the argv[2] parameter. The basetime is always specified in milliseconds. */ +void expireGenericCommand(client *c, long long basetime, int unit) { + robj *key = c->argv[1], *param = c->argv[2]; + long long when; /* unix time in milliseconds when the key will expire. */ + + if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK) + return; + + if (unit == UNIT_SECONDS) when *= 1000; + when += basetime; + + /* No key, return zero. */ + if (lookupKeyWrite(c->db,key) == NULL) { + addReply(c,shared.czero); + return; + } + + /* EXPIRE with negative TTL, or EXPIREAT with a timestamp into the past + * should never be executed as a DEL when load the AOF or in the context + * of a slave instance. + * + * Instead we take the other branch of the IF statement setting an expire + * (possibly in the past) and wait for an explicit DEL from the master. */ + if (when <= mstime() && !server.loading && !server.masterhost) { + robj *aux; + + int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db,key) : + dbSyncDelete(c->db,key); + serverAssertWithInfo(c,key,deleted); + server.dirty++; + + /* Replicate/AOF this as an explicit DEL or UNLINK. */ + aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del; + rewriteClientCommandVector(c,2,aux,key); + signalModifiedKey(c->db,key); + notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id); + addReply(c, shared.cone); + return; + } else { + setExpire(c->db,key,when); + addReply(c,shared.cone); + signalModifiedKey(c->db,key); + notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id); + server.dirty++; + return; + } +} + +/* EXPIRE key seconds */ +void expireCommand(client *c) { + expireGenericCommand(c,mstime(),UNIT_SECONDS); +} + +/* EXPIREAT key time */ +void expireatCommand(client *c) { + expireGenericCommand(c,0,UNIT_SECONDS); +} + +/* PEXPIRE key milliseconds */ +void pexpireCommand(client *c) { + expireGenericCommand(c,mstime(),UNIT_MILLISECONDS); +} + +/* PEXPIREAT key ms_time */ +void pexpireatCommand(client *c) { + expireGenericCommand(c,0,UNIT_MILLISECONDS); +} + +/* Implements TTL and PTTL */ +void ttlGenericCommand(client *c, int output_ms) { + long long expire, ttl = -1; + + /* If the key does not exist at all, return -2 */ + if (lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH) == NULL) { + addReplyLongLong(c,-2); + return; + } + /* The key exists. Return -1 if it has no expire, or the actual + * TTL value otherwise. */ + expire = getExpire(c->db,c->argv[1]); + if (expire != -1) { + ttl = expire-mstime(); + if (ttl < 0) ttl = 0; + } + if (ttl == -1) { + addReplyLongLong(c,-1); + } else { + addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000)); + } +} + +/* TTL key */ +void ttlCommand(client *c) { + ttlGenericCommand(c, 0); +} + +/* PTTL key */ +void pttlCommand(client *c) { + ttlGenericCommand(c, 1); +} + +/* PERSIST key */ +void persistCommand(client *c) { + dictEntry *de; + + de = dictFind(c->db->dict,c->argv[1]->ptr); + if (de == NULL) { + addReply(c,shared.czero); + } else { + if (removeExpire(c->db,c->argv[1])) { + addReply(c,shared.cone); + server.dirty++; + } else { + addReply(c,shared.czero); + } + } +} + +/* TOUCH key1 [key2 key3 ... keyN] */ +void touchCommand(client *c) { + int touched = 0; + for (int j = 1; j < c->argc; j++) + if (lookupKeyRead(c->db,c->argv[j]) != NULL) touched++; + addReplyLongLong(c,touched); +} + From d7322b6f2f6f953a0ee7026a45a7915331edf992 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 16:02:38 +0200 Subject: [PATCH 0048/1722] geohash.c and geohash_helper.c are part of Redis. They were under /deps since they originate from a different source tree, however at this point they are very modified and we took ownership of both the files making changes, fixing bugs, so there is no upgrade path from the original code tree. Given that, better to move the code under /src with proper dependencies and with a more simpler editing experience. --- deps/Makefile | 7 ------- deps/geohash-int/Makefile | 23 ---------------------- src/Makefile | 11 +++++------ {deps/geohash-int => src}/geohash.c | 6 +++--- {deps/geohash-int => src}/geohash.h | 0 {deps/geohash-int => src}/geohash_helper.c | 8 ++++---- {deps/geohash-int => src}/geohash_helper.h | 0 7 files changed, 12 insertions(+), 43 deletions(-) delete mode 100644 deps/geohash-int/Makefile rename {deps/geohash-int => src}/geohash.c (98%) rename {deps/geohash-int => src}/geohash.h (100%) rename {deps/geohash-int => src}/geohash_helper.c (98%) rename {deps/geohash-int => src}/geohash_helper.h (100%) diff --git a/deps/Makefile b/deps/Makefile index 1c10bce9e..e148a331c 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -36,7 +36,6 @@ distclean: -(cd hiredis && $(MAKE) clean) > /dev/null || true -(cd linenoise && $(MAKE) clean) > /dev/null || true -(cd lua && $(MAKE) clean) > /dev/null || true - -(cd geohash-int && $(MAKE) clean) > /dev/null || true -(cd jemalloc && [ -f Makefile ] && $(MAKE) distclean) > /dev/null || true -(rm -f .make-*) @@ -82,9 +81,3 @@ jemalloc: .make-prerequisites cd jemalloc && $(MAKE) CFLAGS="$(JEMALLOC_CFLAGS)" LDFLAGS="$(JEMALLOC_LDFLAGS)" lib/libjemalloc.a .PHONY: jemalloc - -geohash-int: .make-prerequisites - @printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) - cd geohash-int && $(MAKE) - -.PHONY: geohash-int diff --git a/deps/geohash-int/Makefile b/deps/geohash-int/Makefile deleted file mode 100644 index b7c259577..000000000 --- a/deps/geohash-int/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -STD= -WARN= -Wall -OPT= -O2 - -R_CFLAGS= $(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) -R_LDFLAGS= $(LDFLAGS) -DEBUG= -g - -R_CC=$(CC) $(R_CFLAGS) -R_LD=$(CC) $(R_LDFLAGS) - -all: geohash.o geohash_helper.o - -.PHONY: all - -geohash.o: geohash.h geohash.c -geohash_helper.o: geohash.h geohash_helper.h geohash_helper.c - -.c.o: - $(R_CC) -c $< - -clean: - rm -f *.o diff --git a/src/Makefile b/src/Makefile index 2ee01a463..325402ec2 100644 --- a/src/Makefile +++ b/src/Makefile @@ -15,12 +15,12 @@ release_hdr := $(shell sh -c './mkreleasehdr.sh') uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') OPTIMIZATION?=-O2 -DEPENDENCY_TARGETS=hiredis linenoise lua geohash-int +DEPENDENCY_TARGETS=hiredis linenoise lua NODEPS:=clean distclean # Default settings STD=-std=c99 -pedantic -DREDIS_STATIC='' -WARN=-Wall -W +WARN=-Wall -W -Wno-missing-field-initializers OPT=$(OPTIMIZATION) PREFIX?=/usr/local @@ -54,7 +54,7 @@ endif # Override default settings if possible -include .make-settings -FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(REDIS_CFLAGS) -I../deps/geohash-int +FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(REDIS_CFLAGS) FINAL_LDFLAGS=$(LDFLAGS) $(REDIS_LDFLAGS) $(DEBUG) FINAL_LIBS=-lm DEBUG=-g -ggdb @@ -128,8 +128,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o -REDIS_GEOHASH_OBJ=../deps/geohash-int/geohash.o ../deps/geohash-int/geohash_helper.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o REDIS_BENCHMARK_NAME=redis-benchmark @@ -182,7 +181,7 @@ endif # redis-server $(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ) - $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a $(REDIS_GEOHASH_OBJ) $(FINAL_LIBS) + $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a $(FINAL_LIBS) # redis-sentinel $(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME) diff --git a/deps/geohash-int/geohash.c b/src/geohash.c similarity index 98% rename from deps/geohash-int/geohash.c rename to src/geohash.c index d3bc7de25..a5e1dffbf 100644 --- a/deps/geohash-int/geohash.c +++ b/src/geohash.c @@ -151,7 +151,7 @@ int geohashEncode(GeoHashRange *long_range, GeoHashRange *lat_range, } int geohashEncodeType(double longitude, double latitude, uint8_t step, GeoHashBits *hash) { - GeoHashRange r[2] = { { 0 } }; + GeoHashRange r[2] = {{0}}; geohashGetCoordRange(&r[0], &r[1]); return geohashEncode(&r[0], &r[1], longitude, latitude, step, hash); } @@ -194,7 +194,7 @@ int geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range, } int geohashDecodeType(const GeoHashBits hash, GeoHashArea *area) { - GeoHashRange r[2] = { { 0 } }; + GeoHashRange r[2] = {{0}}; geohashGetCoordRange(&r[0], &r[1]); return geohashDecode(r[0], r[1], hash, area); } @@ -211,7 +211,7 @@ int geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy) { } int geohashDecodeToLongLatType(const GeoHashBits hash, double *xy) { - GeoHashArea area = { { 0 } }; + GeoHashArea area = {{0}}; if (!xy || !geohashDecodeType(hash, &area)) return 0; return geohashDecodeAreaToLongLat(&area, xy); diff --git a/deps/geohash-int/geohash.h b/src/geohash.h similarity index 100% rename from deps/geohash-int/geohash.h rename to src/geohash.h diff --git a/deps/geohash-int/geohash_helper.c b/src/geohash_helper.c similarity index 98% rename from deps/geohash-int/geohash_helper.c rename to src/geohash_helper.c index 4b8894676..da8833fa8 100644 --- a/deps/geohash-int/geohash_helper.c +++ b/src/geohash_helper.c @@ -110,10 +110,10 @@ int geohashBoundingBox(double longitude, double latitude, double radius_meters, GeoHashRadius geohashGetAreasByRadius(double longitude, double latitude, double radius_meters) { GeoHashRange long_range, lat_range; - GeoHashRadius radius = { { 0 } }; - GeoHashBits hash = { 0 }; - GeoHashNeighbors neighbors = { { 0 } }; - GeoHashArea area = { { 0 } }; + GeoHashRadius radius = {{0}}; + GeoHashBits hash = {0,0}; + GeoHashNeighbors neighbors = {{0}}; + GeoHashArea area = {{0}}; double min_lon, max_lon, min_lat, max_lat; double bounds[4]; int steps; diff --git a/deps/geohash-int/geohash_helper.h b/src/geohash_helper.h similarity index 100% rename from deps/geohash-int/geohash_helper.h rename to src/geohash_helper.h From 173cdcaccd378af5e46d11870eaf07c8f7ca1805 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 16:31:11 +0200 Subject: [PATCH 0049/1722] Fix definition of M_PI in geohash_helper.c. Without the right feature macros M_PI is not defined in math.h. --- src/geohash_helper.c | 2 ++ src/geohash_helper.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/geohash_helper.c b/src/geohash_helper.c index da8833fa8..585d1005b 100644 --- a/src/geohash_helper.c +++ b/src/geohash_helper.c @@ -34,7 +34,9 @@ * https://github.com/yinqiwen/ardb/blob/d42503/src/geo/geohash_helper.cpp */ +#include "fmacros.h" #include "geohash_helper.h" +#include #define D_R (M_PI / 180.0) #define R_MAJOR 6378137.0 diff --git a/src/geohash_helper.h b/src/geohash_helper.h index bff111dbe..eb0dda38a 100644 --- a/src/geohash_helper.h +++ b/src/geohash_helper.h @@ -32,7 +32,6 @@ #ifndef GEOHASH_HELPER_HPP_ #define GEOHASH_HELPER_HPP_ -#include #include "geohash.h" #define GZERO(s) s.bits = s.step = 0; From ceaa949cb8388772cedb0697b020c94f3329f529 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 16:38:05 +0200 Subject: [PATCH 0050/1722] Fix signess issue in geohashEstimateStepsByRadius(). --- src/geohash_helper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/geohash_helper.c b/src/geohash_helper.c index 585d1005b..6e8ca4070 100644 --- a/src/geohash_helper.c +++ b/src/geohash_helper.c @@ -80,7 +80,8 @@ uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) { int geohashBitsComparator(const GeoHashBits *a, const GeoHashBits *b) { /* If step not equal, compare on step. Else, compare on bits. */ - return a->step != b->step ? a->step - b->step : a->bits - b->bits; + return a->step != b->step ? (a->step - b->step) : + ((int64_t)a->bits - (int64_t)b->bits); } int geohashBoundingBox(double longitude, double latitude, double radius_meters, From 6430ab2861e945a1aa430cb22b3fb8d465cc85b4 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 16:39:23 +0200 Subject: [PATCH 0051/1722] Remove dead code from geohash_helper.c. The function removed also had potential bugs related to signess of the expression, and is not used anyway. --- src/geohash_helper.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/geohash_helper.c b/src/geohash_helper.c index 6e8ca4070..a65759bc4 100644 --- a/src/geohash_helper.c +++ b/src/geohash_helper.c @@ -78,12 +78,6 @@ uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) { return step; } -int geohashBitsComparator(const GeoHashBits *a, const GeoHashBits *b) { - /* If step not equal, compare on step. Else, compare on bits. */ - return a->step != b->step ? (a->step - b->step) : - ((int64_t)a->bits - (int64_t)b->bits); -} - int geohashBoundingBox(double longitude, double latitude, double radius_meters, double *bounds) { if (!bounds) return 0; From 4e3fea4f1a86709d16151324b2aa127bc8826d09 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 19:00:43 +0200 Subject: [PATCH 0052/1722] Fix redis_check_rdb() return value. --- src/redis-check-rdb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index e123b7f34..8da860a08 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -172,7 +172,8 @@ void rdbCheckSetupSignals(void) { sigaction(SIGILL, &act, NULL); } -/* Check the specified RDB file. */ +/* Check the specified RDB file. Return 0 if the RDB looks sane, otherwise + * 1 is returned. */ int redis_check_rdb(char *rdbfilename) { uint64_t dbid; int type, rdbver; @@ -181,7 +182,7 @@ int redis_check_rdb(char *rdbfilename) { FILE *fp; rio rdb; - if ((fp = fopen(rdbfilename,"r")) == NULL) return C_ERR; + if ((fp = fopen(rdbfilename,"r")) == NULL) return 1; rioInitWithFile(&rdb,fp); rdbstate.rio = &rdb; From 3b5a3c72492c82e3535d59dd1166df616fe86ca2 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 19:05:44 +0200 Subject: [PATCH 0053/1722] redis_check_rdb_main(): create shared objects only if needed. Otherwise Valgrind will complain a memory leak under certain tests where RDB checking is invoked from within Redis. --- src/redis-check-rdb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 8da860a08..53fb67b80 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -332,7 +332,11 @@ int redis_check_rdb_main(int argc, char **argv) { fprintf(stderr, "Usage: %s \n", argv[0]); exit(1); } - createSharedObjects(); /* Needed for loading. */ + /* In order to call the loading functions we need to create the shared + * integer objects, however since this function may be called from + * an already initialized Redis instance, check if we really need to. */ + if (shared.integers[0] == NULL) + createSharedObjects(); server.loading_process_events_interval_bytes = 0; rdbCheckMode = 1; rdbCheckInfo("Checking RDB file %s", argv[1]); From 2e7ab9a360c84bdf6e88f258cf6b39407aae4383 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Jul 2016 19:12:24 +0200 Subject: [PATCH 0054/1722] redis_check_rdb(): the rio structure must be global. The rio structure is referenced in the global 'riostate' structure in order for the logging functions to be always able to access the state of the "pseudo-loading" of the RDB, needed for the check. Courtesy of Valgrind. --- src/redis-check-rdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 53fb67b80..08be40f6a 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -180,7 +180,7 @@ int redis_check_rdb(char *rdbfilename) { char buf[1024]; long long expiretime, now = mstime(); FILE *fp; - rio rdb; + static rio rdb; /* Pointed by global struct riostate. */ if ((fp = fopen(rdbfilename,"r")) == NULL) return 1; From c35d315148e0c28cd3040c8ee798c4deff687bb3 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 11 Jul 2016 16:22:09 +0200 Subject: [PATCH 0055/1722] LRU: test-lru.rb improved in different ways. 1. Scan keys with pause to account for actual LRU precision. 2. Test cross-DB with 100 keys allocated in DB1. 3. Output results that don't fluctuate depending on number of keys. 4. Output results in percentage to make more sense. 5. Save file instead of outputting to STDOUT. 6. Support running multiple times with average of outputs. 7. Label each square (DIV) with its ID as HTML title. --- utils/lru/README | 10 +- utils/lru/test-lru.rb | 272 +++++++++++++++++++++++++++--------------- 2 files changed, 182 insertions(+), 100 deletions(-) diff --git a/utils/lru/README b/utils/lru/README index 288189e3e..f043b2979 100644 --- a/utils/lru/README +++ b/utils/lru/README @@ -3,11 +3,17 @@ Redis approximated LRU algorithm against the theoretical output of true LRU algorithm. In order to use the program you need to recompile Redis setting the define -REDIS_LRU_CLOCK_RESOLUTION to 1, by editing redis.h. +REDIS_LRU_CLOCK_RESOLUTION to 1, by editing the file server.h. This allows to execute the program in a fast way since the 1 ms resolution is enough for all the objects to have a different enough time stamp during the test. The program is executed like this: - ruby test-lru.rb > /tmp/lru.html + ruby test-lru.rb /tmp/lru.html + +You can optionally specify a number of times to run, so that the program +will output averages of different runs, by adding an additional argument. +For instance in order to run the test 10 times use: + + ruby test-lru.rb /tmp/lru.html 10 diff --git a/utils/lru/test-lru.rb b/utils/lru/test-lru.rb index ee0527ef4..dadc6d505 100644 --- a/utils/lru/test-lru.rb +++ b/utils/lru/test-lru.rb @@ -1,112 +1,188 @@ require 'rubygems' require 'redis' -r = Redis.new -r.config("SET","maxmemory","2000000") -r.config("SET","maxmemory-policy","allkeys-lru") -r.config("SET","maxmemory-samples",5) -r.config("RESETSTAT") -r.flushall +$runs = []; # Remember the error rate of each run for average purposes. -puts < - - -
+    .new {
+        border: 1px green solid;
+    }
+
+    .otherdb {
+        border: 1px red solid;
+    }
+
+    .ex {
+        background-color: #666;
+    }
+    
+    
 EOF
 
-# Fill
-oldsize = r.dbsize
-id = 0
-while true
-    id += 1
-    r.set(id,"foo")
-    newsize = r.dbsize
-    break if newsize == oldsize
-    oldsize = newsize
-end
-
-inserted = r.dbsize
-first_set_max_id = id
-puts "#{r.dbsize} keys inserted"
-
-# Access keys sequentially
-
-puts "Access keys sequentially"
-(1..first_set_max_id).each{|id|
-    r.get(id)
-#    sleep 0.001
-}
-
-# Insert more 50% keys. We expect that the new keys
-half = inserted/2
-puts "Insert enough keys to evict half the keys we inserted"
-add = 0
-while true
-    add += 1
-    id += 1
-    r.set(id,"foo")
-    break if r.info['evicted_keys'].to_i >= half
-end
-
-puts "#{add} additional keys added."
-puts "#{r.dbsize} keys in DB"
-
-# Check if evicted keys respect LRU
-# We consider errors from 1 to N progressively more serious as they violate
-# more the access pattern.
-
-errors = 0
-e = 1
-edecr = 1.0/(first_set_max_id/2)
-(1..(first_set_max_id/2)).each{|id|
-    e -= edecr if e > 0
-    e = 0 if e < 0
-    if r.exists(id)
-        errors += e
-    end
-}
-
-puts "#{errors} errors!"
-puts "
" - -# Generate the graphical representation -(1..id).each{|id| - # Mark first set and added items in a different way. - c = "box" - if id <= first_set_max_id - c << " old" - else - c << " new" + # Fill the DB up to the first eviction. + oldsize = r.dbsize + id = 0 + while true + id += 1 + r.set(id,"foo") + newsize = r.dbsize + break if newsize == oldsize # A key was evicted? Stop. + oldsize = newsize end - # Add class if exists - c << " ex" if r.exists(id) - puts "
" -} + inserted = r.dbsize + first_set_max_id = id + html << "#{r.dbsize} keys inserted" -# Close HTML page + # Access keys sequentially, so that in theory the first part will be expired + # and the latter part will not, according to perfect LRU. -puts < - + STDERR.puts "Access keys sequentially" + (1..first_set_max_id).each{|id| + r.get(id) + sleep 0.001 + STDERR.print(".") if (id % 150) == 0 + } + STDERR.puts + + # Insert more 50% keys. We expect that the new keys will rarely be expired + # since their last access time is recent compared to the others. + # + # Note that we insert the first 100 keys of the new set into DB1 instead + # of DB0, so that we can try how cross-DB eviction works. + half = inserted/2 + html << "Insert enough keys to evict half the keys we inserted" + add = 0 + + otherdb_start_idx = id+1 + otherdb_end_idx = id+100 + while true + add += 1 + id += 1 + if id >= otherdb_start_idx && id <= otherdb_end_idx + r.select(1) + r.set(id,"foo") + r.select(0) + else + r.set(id,"foo") + end + break if r.info['evicted_keys'].to_i >= half + end + + html << "#{add} additional keys added." + html << "#{r.dbsize} keys in DB" + + # Check if evicted keys respect LRU + # We consider errors from 1 to N progressively more serious as they violate + # more the access pattern. + + errors = 0 + e = 1 + error_per_key = 100000.0/first_set_max_id + half_set_size = first_set_max_id/2 + maxerr = 0 + (1..(first_set_max_id/2)).each{|id| + if id >= otherdb_start_idx && id <= otherdb_end_idx + r.select(1) + exists = r.exists(id) + r.select(0) + else + exists = r.exists(id) + end + if id < first_set_max_id/2 + thiserr = error_per_key * ((half_set_size-id).to_f/half_set_size) + maxerr += thiserr + errors += thiserr if exists + elsif id >= first_set_max_id/2 + thiserr = error_per_key * ((id-half_set_size).to_f/half_set_size) + maxerr += thiserr + errors += thiserr if !exists + end + } + errors = errors*100/maxerr + + STDERR.puts "Test finished with #{errors}% error! Generating HTML on stdout." + + html << "#{errors}% error!" + html << "
" + $runs << errors + + # Generate the graphical representation + (1..id).each{|id| + # Mark first set and added items in a different way. + c = "box" + if id >= otherdb_start_idx && id <= otherdb_end_idx + c << " otherdb" + elsif id <= first_set_max_id + c << " old" + else + c << " new" + end + + # Add class if exists + if id >= otherdb_start_idx && id <= otherdb_end_idx + r.select(1) + exists = r.exists(id) + r.select(0) + else + exists = r.exists(id) + end + + c << " ex" if exists + html << "
" + } + + # Close HTML page + + html << < + EOF + + f = File.open(filename,"w") + f.write(html) + f.close +end + +def print_avg + avg = ($runs.reduce {|a,b| a+b}) / $runs.length + puts "#{$runs.length} runs, AVG is #{avg}" +end + +if ARGV.length < 1 + STDERR.puts "Usage: ruby test-lru.rb [num-runs]" + exit 1 +end + +filename = ARGV[0] +numruns = 1 + +numruns = ARGV[1].to_i if ARGV.length == 2 + +numruns.times { + testit(filename) + print_avg if numruns != 1 +} From bbac261b64810bd60ac064ee8f3f85e97fe992ef Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 11 Jul 2016 16:26:02 +0200 Subject: [PATCH 0056/1722] LRU: Fix output fixes to new test-lru.rb. --- utils/lru/test-lru.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/utils/lru/test-lru.rb b/utils/lru/test-lru.rb index dadc6d505..1a6baf467 100644 --- a/utils/lru/test-lru.rb +++ b/utils/lru/test-lru.rb @@ -7,7 +7,7 @@ def testit(filename) r = Redis.new r.config("SET","maxmemory","2000000") r.config("SET","maxmemory-policy","allkeys-lru") - r.config("SET","maxmemory-samples",10) + r.config("SET","maxmemory-samples",5) r.config("RESETSTAT") r.flushall @@ -55,7 +55,7 @@ EOF inserted = r.dbsize first_set_max_id = id - html << "#{r.dbsize} keys inserted" + html << "#{r.dbsize} keys inserted.\n" # Access keys sequentially, so that in theory the first part will be expired # and the latter part will not, according to perfect LRU. @@ -74,7 +74,7 @@ EOF # Note that we insert the first 100 keys of the new set into DB1 instead # of DB0, so that we can try how cross-DB eviction works. half = inserted/2 - html << "Insert enough keys to evict half the keys we inserted" + html << "Insert enough keys to evict half the keys we inserted.\n" add = 0 otherdb_start_idx = id+1 @@ -92,8 +92,8 @@ EOF break if r.info['evicted_keys'].to_i >= half end - html << "#{add} additional keys added." - html << "#{r.dbsize} keys in DB" + html << "#{add} additional keys added.\n" + html << "#{r.dbsize} keys in DB.\n" # Check if evicted keys respect LRU # We consider errors from 1 to N progressively more serious as they violate @@ -126,7 +126,7 @@ EOF STDERR.puts "Test finished with #{errors}% error! Generating HTML on stdout." - html << "#{errors}% error!" + html << "#{errors}% error!\n" html << "" $runs << errors From f708bf2a141671cd7d6d8453e6cdbe9c4f782e13 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 11 Jul 2016 19:18:17 +0200 Subject: [PATCH 0057/1722] Remove useless memmove() from freeMemoryIfNeeded(). We start from the end of the pool to the initial item, zero-ing every entry we use or every ghost entry, there is nothing to memmove since to the right everything should be already set to NULL. --- src/evict.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/evict.c b/src/evict.c index c35b10b8f..bc3c9de24 100644 --- a/src/evict.c +++ b/src/evict.c @@ -264,13 +264,8 @@ int freeMemoryIfNeeded(void) { /* Remove the entry from the pool. */ sdsfree(pool[k].key); - /* Shift all elements on its right to left. */ - memmove(pool+k,pool+k+1, - sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1)); - /* Clear the element on the right which is empty - * since we shifted one position to the left. */ - pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key = NULL; - pool[MAXMEMORY_EVICTION_POOL_SIZE-1].idle = 0; + pool[k].key = NULL; + pool[k].idle = 0; /* If the key exists, is our pick. Otherwise it is * a ghost and we need to try the next element. */ From f07cc50f8973c72a863a57eeea82d8af011fc057 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 12 Jul 2016 11:22:41 +0200 Subject: [PATCH 0058/1722] redis-benchmark: new option to show server errors on stdout. Disabled by default, can be activated with -e. Maybe the reverse was more safe but departs from the past behavior. --- src/redis-benchmark.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 2829df913..50905c872 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -65,6 +65,7 @@ static struct config { int randomkeys_keyspacelen; int keepalive; int pipeline; + int showerrors; long long start; long long totlatency; long long *latency; @@ -212,6 +213,16 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { exit(1); } + if (config.showerrors) { + static time_t lasterr_time = 0; + time_t now = time(NULL); + redisReply *r = reply; + if (r->type == REDIS_REPLY_ERROR && lasterr_time != now) { + lasterr_time = now; + printf("Error from server: %s\n", r->str); + } + } + freeReplyObject(reply); /* This is an OK for prefix commands such as auth and select.*/ if (c->prefix_pending > 0) { @@ -227,7 +238,7 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { c->randptr[j] -= c->prefixlen; c->prefixlen = 0; } - continue; + continue; } if (config.requests_finished < config.requests) @@ -518,6 +529,8 @@ int parseOptions(int argc, const char **argv) { config.loop = 1; } else if (!strcmp(argv[i],"-I")) { config.idlemode = 1; + } else if (!strcmp(argv[i],"-e")) { + config.showerrors = 1; } else if (!strcmp(argv[i],"-t")) { if (lastarg) goto invalid; /* We get the list of tests to run as a string in the form @@ -569,6 +582,8 @@ usage: " is executed. Default tests use this to hit random keys in the\n" " specified range.\n" " -P Pipeline requests. Default 1 (no pipeline).\n" +" -e If server replies with errors, show them on stdout.\n" +" (no more than 1 error per second is displayed)\n" " -q Quiet. Just show query/sec values\n" " --csv Output in CSV format\n" " -l Loop. Run the tests forever\n" @@ -649,6 +664,7 @@ int main(int argc, const char **argv) { config.keepalive = 1; config.datasize = 3; config.pipeline = 1; + config.showerrors = 0; config.randomkeys = 0; config.randomkeys_keyspacelen = 0; config.quiet = 0; From 9f47c5a53bbc9ca356cf86a45b73371ea34c505d Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 8 Jul 2016 14:17:11 +0200 Subject: [PATCH 0059/1722] LRU: use C99 variable len stack array in evictionPoolPopulate(). --- src/evict.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/evict.c b/src/evict.c index bc3c9de24..dbec6b4b5 100644 --- a/src/evict.c +++ b/src/evict.c @@ -115,16 +115,7 @@ struct evictionPoolEntry *evictionPoolAlloc(void) { #define EVICTION_SAMPLES_ARRAY_SIZE 16 void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) { int j, k, count; - dictEntry *_samples[EVICTION_SAMPLES_ARRAY_SIZE]; - dictEntry **samples; - - /* Try to use a static buffer: this function is a big hit... - * Note: it was actually measured that this helps. */ - if (server.maxmemory_samples <= EVICTION_SAMPLES_ARRAY_SIZE) { - samples = _samples; - } else { - samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples); - } + dictEntry *samples[server.maxmemory_samples]; count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples); for (j = 0; j < count; j++) { @@ -175,7 +166,6 @@ void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEn pool[k].key = sdsdup(key); pool[k].idle = idle; } - if (samples != _samples) zfree(samples); } int freeMemoryIfNeeded(void) { From 4ef07b3f9d8753cdcd63957de30772d49adfeb0d Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 7 Jul 2016 15:01:58 +0200 Subject: [PATCH 0060/1722] Move prototype of evictionPoolAlloc() in server.h. --- src/server.c | 2 -- src/server.h | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/server.c b/src/server.c index 4d6f9f1ab..362df2bd0 100644 --- a/src/server.c +++ b/src/server.c @@ -299,8 +299,6 @@ struct redisCommand redisCommandTable[] = { {"latency",latencyCommand,-2,"aslt",0,NULL,0,0,0,0,0} }; -struct evictionPoolEntry *evictionPoolAlloc(void); - /*============================ Utility functions ============================ */ /* Low level logging. To use only for very big messages, otherwise diff --git a/src/server.h b/src/server.h index a238f41e8..4faa9de72 100644 --- a/src/server.h +++ b/src/server.h @@ -1616,6 +1616,9 @@ void disconnectAllBlockedClients(void); /* expire.c -- Handling of expired keys */ void activeExpireCycle(int type); +/* evict.c -- maxmemory handling and LRU eviction. */ +struct evictionPoolEntry *evictionPoolAlloc(void); + /* Git SHA1 */ char *redisGitSHA1(void); char *redisGitDirty(void); From 8464262b2813a08f2fe9c79eec21d02ea18c86ea Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 7 Jul 2016 15:04:25 +0200 Subject: [PATCH 0061/1722] Move the struct evictionPoolEntry() into only file using it. Local scope is always better when possible. --- src/evict.c | 21 +++++++++++++++++++++ src/server.h | 13 +------------ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/evict.c b/src/evict.c index dbec6b4b5..5e08274e4 100644 --- a/src/evict.c +++ b/src/evict.c @@ -33,6 +33,27 @@ #include "server.h" #include "bio.h" +/* ---------------------------------------------------------------------------- + * Data structures + * --------------------------------------------------------------------------*/ + +/* To improve the quality of the LRU approximation we take a set of keys + * that are good candidate for eviction across freeMemoryIfNeeded() calls. + * + * Entries inside the eviciton pool are taken ordered by idle time, putting + * greater idle times to the right (ascending order). + * + * Empty entries have the key pointer set to NULL. */ +#define MAXMEMORY_EVICTION_POOL_SIZE 16 +struct evictionPoolEntry { + unsigned long long idle; /* Object idle time. */ + sds key; /* Key name. */ +}; + +/* ---------------------------------------------------------------------------- + * Implementation of eviction, aging and LRU + * --------------------------------------------------------------------------*/ + /* Return the LRU clock, based on the clock resolution. This is a time * in a reduced-bits format that can be used to set and check the * object->lru field of redisObject structures. */ diff --git a/src/server.h b/src/server.h index 4faa9de72..b7e9f54f0 100644 --- a/src/server.h +++ b/src/server.h @@ -550,18 +550,7 @@ typedef struct redisObject { _var.ptr = _ptr; \ } while(0) -/* To improve the quality of the LRU approximation we take a set of keys - * that are good candidate for eviction across freeMemoryIfNeeded() calls. - * - * Entries inside the eviciton pool are taken ordered by idle time, putting - * greater idle times to the right (ascending order). - * - * Empty entries have the key pointer set to NULL. */ -#define MAXMEMORY_EVICTION_POOL_SIZE 16 -struct evictionPoolEntry { - unsigned long long idle; /* Object idle time. */ - sds key; /* Key name. */ -}; +struct evictionPoolEntry; /* Defined in evict.c */ /* Redis database representation. There are multiple databases identified * by integers from 0 (the default database) up to the max configured From 8934a2a78a9fe4f3baf68f4373eaf520f7264951 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 12 Jul 2016 12:31:37 +0200 Subject: [PATCH 0062/1722] LRU: cache SDS strings in the eviction pool. To destroy and recreate the pool[].key element is slow, so we allocate in pool[].cached SDS strings that can account up to 255 chars keys and try to reuse them. This provides a solid 20% performance improvement in real world workload alike benchmarks. --- src/evict.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/evict.c b/src/evict.c index 5e08274e4..5e2e20121 100644 --- a/src/evict.c +++ b/src/evict.c @@ -44,10 +44,12 @@ * greater idle times to the right (ascending order). * * Empty entries have the key pointer set to NULL. */ -#define MAXMEMORY_EVICTION_POOL_SIZE 16 +#define EVPOOL_SIZE 16 +#define EVPOOL_CACHED_SDS_SIZE 255 struct evictionPoolEntry { unsigned long long idle; /* Object idle time. */ sds key; /* Key name. */ + sds cached; /* Cached SDS object for key name. */ }; /* ---------------------------------------------------------------------------- @@ -96,7 +98,7 @@ unsigned long long estimateObjectIdleTime(robj *o) { * Redis uses an approximation of the LRU algorithm that runs in constant * memory. Every time there is a key to expire, we sample N keys (with * N very small, usually in around 5) to populate a pool of best keys to - * evict of M keys (the pool size is defined by MAXMEMORY_EVICTION_POOL_SIZE). + * evict of M keys (the pool size is defined by EVPOOL_SIZE). * * The N keys sampled are added in the pool of good keys to expire (the one * with an old access time) if they are better than one of the current keys @@ -116,10 +118,11 @@ struct evictionPoolEntry *evictionPoolAlloc(void) { struct evictionPoolEntry *ep; int j; - ep = zmalloc(sizeof(*ep)*MAXMEMORY_EVICTION_POOL_SIZE); - for (j = 0; j < MAXMEMORY_EVICTION_POOL_SIZE; j++) { + ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE); + for (j = 0; j < EVPOOL_SIZE; j++) { ep[j].idle = 0; ep[j].key = NULL; + ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE); } return ep; } @@ -158,33 +161,45 @@ void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEn * First, find the first empty bucket or the first populated * bucket that has an idle time smaller than our idle time. */ k = 0; - while (k < MAXMEMORY_EVICTION_POOL_SIZE && + while (k < EVPOOL_SIZE && pool[k].key && pool[k].idle < idle) k++; - if (k == 0 && pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key != NULL) { + if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) { /* Can't insert if the element is < the worst element we have * and there are no empty buckets. */ continue; - } else if (k < MAXMEMORY_EVICTION_POOL_SIZE && pool[k].key == NULL) { + } else if (k < EVPOOL_SIZE && pool[k].key == NULL) { /* Inserting into empty position. No setup needed before insert. */ } else { /* Inserting in the middle. Now k points to the first element * greater than the element to insert. */ - if (pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key == NULL) { + if (pool[EVPOOL_SIZE-1].key == NULL) { /* Free space on the right? Insert at k shifting * all the elements from k to end to the right. */ memmove(pool+k+1,pool+k, - sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1)); + sizeof(pool[0])*(EVPOOL_SIZE-k-1)); } else { /* No free space on right? Insert at k-1 */ k--; /* Shift all elements on the left of k (included) to the * left, so we discard the element with smaller idle time. */ - sdsfree(pool[0].key); + if (pool[0].key != pool[0].cached) sdsfree(pool[0].key); memmove(pool,pool+1,sizeof(pool[0])*k); } } - pool[k].key = sdsdup(key); + + /* Try to reuse the cached SDS string allocated in the pool entry, + * because allocating and deallocating this object is costly + * (according to the profiler, not my fantasy. Remember: + * premature optimizbla bla bla bla. */ + int klen = sdslen(key); + if (klen > EVPOOL_CACHED_SDS_SIZE) { + pool[k].key = sdsdup(key); + } else { + memcpy(pool[k].cached,key,klen+1); + sdssetlen(pool[k].cached,klen); + pool[k].key = pool[k].cached; + } pool[k].idle = idle; } } @@ -269,12 +284,13 @@ int freeMemoryIfNeeded(void) { while(bestkey == NULL) { evictionPoolPopulate(dict, db->dict, db->eviction_pool); /* Go backward from best to worst element to evict. */ - for (k = MAXMEMORY_EVICTION_POOL_SIZE-1; k >= 0; k--) { + for (k = EVPOOL_SIZE-1; k >= 0; k--) { if (pool[k].key == NULL) continue; de = dictFind(dict,pool[k].key); /* Remove the entry from the pool. */ - sdsfree(pool[k].key); + if (pool[k].key != pool[k].cached) + sdsfree(pool[k].key); pool[k].key = NULL; pool[k].idle = 0; From bb5b1c599192112b54de577a29923f821a074f5e Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 13 Jul 2016 10:45:37 +0200 Subject: [PATCH 0063/1722] LRU: Make cross-database choices for eviction. The LRU eviction code used to make local choices: for each DB visited it selected the best key to evict. This was repeated for each DB. However this means that there could be DBs with very frequently accessed keys that are targeted by the LRU algorithm while there were other DBs with many better candidates to expire. This commit attempts to fix this problem for the LRU policy. However the TTL policy is still not fixed by this commit. The TTL policy will be fixed in a successive commit. This is an initial (partial because of TTL policy) fix for issue #2647. --- src/evict.c | 258 +++++++++++++++++++++++++++++++-------------------- src/server.c | 2 +- src/server.h | 5 +- 3 files changed, 158 insertions(+), 107 deletions(-) diff --git a/src/evict.c b/src/evict.c index 5e2e20121..6ce3aef0d 100644 --- a/src/evict.c +++ b/src/evict.c @@ -50,8 +50,11 @@ struct evictionPoolEntry { unsigned long long idle; /* Object idle time. */ sds key; /* Key name. */ sds cached; /* Cached SDS object for key name. */ + int dbid; /* Key DB number. */ }; +static struct evictionPoolEntry *EvictionPoolLRU; + /* ---------------------------------------------------------------------------- * Implementation of eviction, aging and LRU * --------------------------------------------------------------------------*/ @@ -114,7 +117,7 @@ unsigned long long estimateObjectIdleTime(robj *o) { * evicted in the whole database. */ /* Create a new eviction pool. */ -struct evictionPoolEntry *evictionPoolAlloc(void) { +void evictionPoolAlloc(void) { struct evictionPoolEntry *ep; int j; @@ -123,8 +126,9 @@ struct evictionPoolEntry *evictionPoolAlloc(void) { ep[j].idle = 0; ep[j].key = NULL; ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE); + ep[j].dbid = 0; } - return ep; + EvictionPoolLRU = ep; } /* This is an helper function for freeMemoryIfNeeded(), it is used in order @@ -136,8 +140,7 @@ struct evictionPoolEntry *evictionPoolAlloc(void) { * idle time are on the left, and keys with the higher idle time on the * right. */ -#define EVICTION_SAMPLES_ARRAY_SIZE 16 -void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) { +void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) { int j, k, count; dictEntry *samples[server.maxmemory_samples]; @@ -176,15 +179,21 @@ void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEn if (pool[EVPOOL_SIZE-1].key == NULL) { /* Free space on the right? Insert at k shifting * all the elements from k to end to the right. */ + + /* Save SDS before overwriting. */ + sds cached = pool[EVPOOL_SIZE-1].cached; memmove(pool+k+1,pool+k, sizeof(pool[0])*(EVPOOL_SIZE-k-1)); + pool[k].cached = cached; } else { /* No free space on right? Insert at k-1 */ k--; /* Shift all elements on the left of k (included) to the * left, so we discard the element with smaller idle time. */ + sds cached = pool[0].cached; /* Save SDS before overwriting. */ if (pool[0].key != pool[0].cached) sdsfree(pool[0].key); memmove(pool,pool+1,sizeof(pool[0])*k); + pool[k].cached = cached; } } @@ -201,6 +210,7 @@ void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEn pool[k].key = pool[k].cached; } pool[k].idle = idle; + pool[k].dbid = dbid; } } @@ -249,119 +259,161 @@ int freeMemoryIfNeeded(void) { latencyStartMonitor(latency); while (mem_freed < mem_tofree) { - int j, k, keys_freed = 0; + int j, k, i, keys_freed = 0; + static int next_db = 0; + sds bestkey = NULL; + int bestdbid; + redisDb *db; + dict *dict; + dictEntry *de; - for (j = 0; j < server.dbnum; j++) { - long bestval = 0; /* just to prevent warning */ - sds bestkey = NULL; - dictEntry *de; - redisDb *db = server.db+j; - dict *dict; + if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU) + { + struct evictionPoolEntry *pool = EvictionPoolLRU; - if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || - server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) - { - dict = server.db[j].dict; - } else { - dict = server.db[j].expires; + while(bestkey == NULL) { + unsigned long total_keys = 0, keys; + + /* We don't want to make local-db choices when expiring keys, + * so to start populate the eviction pool sampling keys from + * every DB. */ + for (i = 0; i < server.dbnum; i++) { + db = server.db+i; + dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU) ? + db->dict : db->expires; + if ((keys = dictSize(dict)) != 0) { + evictionPoolPopulate(i, dict, db->dict, pool); + total_keys += keys; + } + } + if (!total_keys) break; /* No keys to evict. */ + + /* Go backward from best to worst element to evict. */ + for (k = EVPOOL_SIZE-1; k >= 0; k--) { + if (pool[k].key == NULL) continue; + bestdbid = pool[k].dbid; + + if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU) { + de = dictFind(server.db[pool[k].dbid].dict, + pool[k].key); + } else { + de = dictFind(server.db[pool[k].dbid].expires, + pool[k].key); + } + + /* Remove the entry from the pool. */ + if (pool[k].key != pool[k].cached) + sdsfree(pool[k].key); + pool[k].key = NULL; + pool[k].idle = 0; + + /* If the key exists, is our pick. Otherwise it is + * a ghost and we need to try the next element. */ + if (de) { + bestkey = dictGetKey(de); + break; + } else { + /* Ghost... Iterate again. */ + } + } } - if (dictSize(dict) == 0) continue; + } - /* volatile-random and allkeys-random policy */ - if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM || - server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM) - { - de = dictGetRandomKey(dict); - bestkey = dictGetKey(de); + /* volatile-random and allkeys-random policy */ + else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM || + server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM) + { + /* When evicting a random key, we try to evict a key for + * each DB, so we use the static 'next_db' variable to + * incrementally visit all DBs. */ + for (i = 0; i < server.dbnum; i++) { + j = (++next_db) % server.dbnum; + db = server.db+j; + dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ? + db->dict : db->expires; + if (dictSize(dict) != 0) { + de = dictGetRandomKey(dict); + bestkey = dictGetKey(de); + bestdbid = j; + break; + } } + } - /* volatile-lru and allkeys-lru policy */ - else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || - server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU) - { - struct evictionPoolEntry *pool = db->eviction_pool; + /* volatile-ttl */ + else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { + long bestttl = 0; /* Initialized to avoid warning. */ - while(bestkey == NULL) { - evictionPoolPopulate(dict, db->dict, db->eviction_pool); - /* Go backward from best to worst element to evict. */ - for (k = EVPOOL_SIZE-1; k >= 0; k--) { - if (pool[k].key == NULL) continue; - de = dictFind(dict,pool[k].key); + /* In this policy we scan a single DB per iteration (visiting + * a different DB per call), expiring the key with the smallest + * TTL among the few sampled. + * + * Note that this algorithm makes local-DB choices, and should + * use a pool and code more similr to the one used in the + * LRU eviction policies in the future. */ + for (i = 0; i < server.dbnum; i++) { + j = (++next_db) % server.dbnum; + db = server.db+j; + dict = db->expires; + if (dictSize(dict) != 0) { + for (k = 0; k < server.maxmemory_samples; k++) { + sds thiskey; + long thisttl; - /* Remove the entry from the pool. */ - if (pool[k].key != pool[k].cached) - sdsfree(pool[k].key); - pool[k].key = NULL; - pool[k].idle = 0; + de = dictGetRandomKey(dict); + thiskey = dictGetKey(de); + thisttl = (long) dictGetVal(de); - /* If the key exists, is our pick. Otherwise it is - * a ghost and we need to try the next element. */ - if (de) { - bestkey = dictGetKey(de); - break; - } else { - /* Ghost... */ - continue; + /* Keys expiring sooner (smaller unix timestamp) are + * better candidates for deletion */ + if (bestkey == NULL || thisttl < bestttl) { + bestkey = thiskey; + bestttl = thisttl; + bestdbid = j; } } } } - - /* volatile-ttl */ - else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { - for (k = 0; k < server.maxmemory_samples; k++) { - sds thiskey; - long thisval; - - de = dictGetRandomKey(dict); - thiskey = dictGetKey(de); - thisval = (long) dictGetVal(de); - - /* Expire sooner (minor expire unix timestamp) is better - * candidate for deletion */ - if (bestkey == NULL || thisval < bestval) { - bestkey = thiskey; - bestval = thisval; - } - } - } - - /* Finally remove the selected key. */ - if (bestkey) { - robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); - propagateExpire(db,keyobj,server.lazyfree_lazy_eviction); - /* We compute the amount of memory freed by db*Delete() alone. - * It is possible that actually the memory needed to propagate - * the DEL in AOF and replication link is greater than the one - * we are freeing removing the key, but we can't account for - * that otherwise we would never exit the loop. - * - * AOF and Output buffer memory will be freed eventually so - * we only care about memory used by the key space. */ - delta = (long long) zmalloc_used_memory(); - latencyStartMonitor(eviction_latency); - if (server.lazyfree_lazy_eviction) - dbAsyncDelete(db,keyobj); - else - dbSyncDelete(db,keyobj); - latencyEndMonitor(eviction_latency); - latencyAddSampleIfNeeded("eviction-del",eviction_latency); - latencyRemoveNestedEvent(latency,eviction_latency); - delta -= (long long) zmalloc_used_memory(); - mem_freed += delta; - server.stat_evictedkeys++; - notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted", - keyobj, db->id); - decrRefCount(keyobj); - keys_freed++; - - /* When the memory to free starts to be big enough, we may - * start spending so much time here that is impossible to - * deliver data to the slaves fast enough, so we force the - * transmission here inside the loop. */ - if (slaves) flushSlavesOutputBuffers(); - } } + + /* Finally remove the selected key. */ + if (bestkey) { + db = server.db+bestdbid; + robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); + propagateExpire(db,keyobj,server.lazyfree_lazy_eviction); + /* We compute the amount of memory freed by db*Delete() alone. + * It is possible that actually the memory needed to propagate + * the DEL in AOF and replication link is greater than the one + * we are freeing removing the key, but we can't account for + * that otherwise we would never exit the loop. + * + * AOF and Output buffer memory will be freed eventually so + * we only care about memory used by the key space. */ + delta = (long long) zmalloc_used_memory(); + latencyStartMonitor(eviction_latency); + if (server.lazyfree_lazy_eviction) + dbAsyncDelete(db,keyobj); + else + dbSyncDelete(db,keyobj); + latencyEndMonitor(eviction_latency); + latencyAddSampleIfNeeded("eviction-del",eviction_latency); + latencyRemoveNestedEvent(latency,eviction_latency); + delta -= (long long) zmalloc_used_memory(); + mem_freed += delta; + server.stat_evictedkeys++; + notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted", + keyobj, db->id); + decrRefCount(keyobj); + keys_freed++; + + /* When the memory to free starts to be big enough, we may + * start spending so much time here that is impossible to + * deliver data to the slaves fast enough, so we force the + * transmission here inside the loop. */ + if (slaves) flushSlavesOutputBuffers(); + } + if (!keys_freed) { latencyEndMonitor(latency); latencyAddSampleIfNeeded("eviction-cycle",latency); diff --git a/src/server.c b/src/server.c index 362df2bd0..f8847f646 100644 --- a/src/server.c +++ b/src/server.c @@ -1748,10 +1748,10 @@ void initServer(void) { server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL); server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL); server.db[j].watched_keys = dictCreate(&keylistDictType,NULL); - server.db[j].eviction_pool = evictionPoolAlloc(); server.db[j].id = j; server.db[j].avg_ttl = 0; } + evictionPoolAlloc(); /* Initialize the LRU keys pool. */ server.pubsub_channels = dictCreate(&keylistDictType,NULL); server.pubsub_patterns = listCreate(); listSetFreeMethod(server.pubsub_patterns,freePubsubPattern); diff --git a/src/server.h b/src/server.h index b7e9f54f0..0354877fc 100644 --- a/src/server.h +++ b/src/server.h @@ -558,10 +558,9 @@ struct evictionPoolEntry; /* Defined in evict.c */ typedef struct redisDb { dict *dict; /* The keyspace for this DB */ dict *expires; /* Timeout of keys with a timeout set */ - dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */ + dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/ dict *ready_keys; /* Blocked keys that received a PUSH */ dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */ - struct evictionPoolEntry *eviction_pool; /* Eviction pool of keys */ int id; /* Database ID */ long long avg_ttl; /* Average TTL, just for stats */ } redisDb; @@ -1606,7 +1605,7 @@ void disconnectAllBlockedClients(void); void activeExpireCycle(int type); /* evict.c -- maxmemory handling and LRU eviction. */ -struct evictionPoolEntry *evictionPoolAlloc(void); +void evictionPoolAlloc(void); /* Git SHA1 */ char *redisGitSHA1(void); From cd969573ed73951b14ac6be34f677eb7c73a29b7 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 14 Jul 2016 15:21:48 +0200 Subject: [PATCH 0064/1722] LFU: Simulation of the algorithm planned for Redis. We have 24 total bits of space in each object in order to implement an LFU (Least Frequently Used) eviction policy. We split the 24 bits into two fields: 8 bits 16 bits +--------+----------------+ | LOG_C | Last decr time | +--------+----------------+ LOG_C is a logarithmic counter that provides an indication of the access frequency. However this field must also be deceremented otherwise what used to be a frequently accessed key in the past, will remain ranked like that forever, while we want the algorithm to adapt to access pattern changes. So the remaining 16 bits are used in order to store the "decrement time", a reduced-precision unix time (we take 16 bits of the time converted in minutes since we don't care about wrapping around) where the LOG_C counter is halved if it has an high value, or just decremented if it has a low value. New keys don't start at zero, in order to have the ability to collect some accesses before being trashed away, so they start at COUNTER_INIT_VAL. The logaritmic increment performed on LOG_C takes care of COUNTER_INIT_VAL when incrementing the key, so that keys starting at COUNTER_INIT_VAL (or having a smaller value) have a very high chance of being incremented on access. The simulation starts with a power-law access pattern, and later converts into a flat access pattern in order to see how the algorithm adapts. Currenty the decrement operation period is 1 minute, however note that it is not guaranteed that each key will be scanned 1 time every minute, so the actual frequency can be lower. However under high load, we access 3/5 keys every newly inserted key (because of how Redis eviction works). This is a work in progress at this point to evaluate if this works well. --- utils/lru/lfu-simulation.c | 163 +++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 utils/lru/lfu-simulation.c diff --git a/utils/lru/lfu-simulation.c b/utils/lru/lfu-simulation.c new file mode 100644 index 000000000..ebfc434ac --- /dev/null +++ b/utils/lru/lfu-simulation.c @@ -0,0 +1,163 @@ +#include +#include +#include +#include + +int decr_every = 1; +int keyspace_size = 1000000; +time_t switch_after = 30; /* Switch access pattern after N seconds. */ + +struct entry { + /* Field that the LFU Redis implementation will have (we have + * 24 bits of total space in the object->lru field). */ + uint8_t counter; /* Logarithmic counter. */ + uint16_t decrtime; /* (Reduced precision) time of last decrement. */ + + /* Fields only useful for visualization. */ + uint64_t hits; /* Number of real accesses. */ + time_t ctime; /* Key creation time. */ +}; + +#define to_16bit_minutes(x) ((x/60) & 65535) +#define COUNTER_INIT_VAL 5 + +/* Compute the difference in minutes between two 16 bit minutes times + * obtained with to_16bit_minutes(). Since they can wrap around if + * we detect the overflow we account for it as if the counter wrapped + * a single time. */ +uint16_t minutes_diff(uint16_t now, uint16_t prev) { + if (now >= prev) return now-prev; + return 65535-prev+now; +} + +/* Increment a couter logaritmically: the greatest is its value, the + * less likely is that the counter is really incremented. + * The maximum value of the counter is saturated at 255. */ +uint8_t log_incr(uint8_t counter) { + if (counter == 255) return counter; + double r = (double)rand()/RAND_MAX; + double baseval = counter-COUNTER_INIT_VAL; + if (baseval < 0) baseval = 0; + double limit = 1.0/(baseval*10+1); + if (r < limit) counter++; + return counter; +} + +/* Simulate an access to an entry. */ +void access_entry(struct entry *e) { + e->counter = log_incr(e->counter); + e->hits++; +} + +/* Return the entry LFU value and as a side effect decrement the + * entry value if the decrement time was reached. */ +uint8_t scan_entry(struct entry *e) { + if (minutes_diff(to_16bit_minutes(time(NULL)),e->decrtime) + >= decr_every) + { + if (e->counter) { + if (e->counter > COUNTER_INIT_VAL*2) { + e->counter /= 2; + } else { + e->counter--; + } + } + e->decrtime = to_16bit_minutes(time(NULL)); + } + return e->counter; +} + +/* Print the entry info. */ +void show_entry(long pos, struct entry *e) { + char *tag = "normal "; + + if (pos >= 10 && pos <= 14) tag = "new no access"; + if (pos >= 15 && pos <= 19) tag = "new accessed "; + if (pos >= keyspace_size -5) tag= "old no access"; + + printf("%ld] <%s> frequency:%d decrtime:%d [%lu hits | age:%ld seconds]\n", + pos, tag, e->counter, e->decrtime, (unsigned long)e->hits, + time(NULL) - e->ctime); +} + +int main(void) { + time_t start = time(NULL); + time_t new_entry_time = start; + time_t display_time = start; + struct entry *entries = malloc(sizeof(*entries)*keyspace_size); + long j; + + /* Initialize. */ + for (j = 0; j < keyspace_size; j++) { + entries[j].counter = COUNTER_INIT_VAL; + entries[j].decrtime = to_16bit_minutes(start); + entries[j].hits = 0; + entries[j].ctime = time(NULL); + } + + while(1) { + time_t now = time(NULL); + long idx; + + /* Scan N random entries (simulates the eviction under maxmemory). */ + for (j = 0; j < 3; j++) { + scan_entry(entries+(rand()%keyspace_size)); + } + + /* Access a random entry: use a power-law access pattern up to + * 'switch_after' seconds. Then revert to flat access pattern. */ + if (now-start < switch_after) { + /* Power law. */ + #if 0 + idx = keyspace_size; + while(rand() % 2 && idx > 1) idx /= 2; + idx = rand() % idx; + #endif + idx = 1; + while((rand() % 21) != 0 && idx < keyspace_size) idx *= 2; + if (idx > keyspace_size) idx = keyspace_size; + idx = rand() % idx; + } else { + /* Flat. */ + idx = rand() % keyspace_size; + } + + /* Never access entries between position 10 and 14, so that + * we simulate what happens to new entries that are never + * accessed VS new entries which are accessed in positions + * 15-19. + * + * Also never access last 5 entry, so that we have keys which + * are never recreated (old), and never accessed. */ + if ((idx < 10 || idx > 14) && (idx < keyspace_size-5)) + access_entry(entries+idx); + + /* Simulate the addition of new entries at positions between + * 10 and 19, a random one every 10 seconds. */ + if (new_entry_time == now) { + idx = 10+(rand()%10); + entries[idx].counter = COUNTER_INIT_VAL; + entries[idx].decrtime = to_16bit_minutes(start); + entries[idx].hits = 0; + entries[idx].ctime = time(NULL); + new_entry_time = now+10; + } + + /* Show the first 20 entries and the last 20 entries. */ + if (display_time != now) { + printf("=============================\n"); + printf("Current minutes time: %d\n", (int)to_16bit_minutes(now)); + printf("Access method: %s\n", + (now-start < switch_after) ? "power-law" : "flat"); + + for (j = 0; j < 20; j++) + show_entry(j,entries+j); + + for (j = keyspace_size-20; j < keyspace_size; j++) + show_entry(j,entries+j); + display_time = now; + } + } + return 0; +} + From c8a6d75f1ca2629d26b4e6ab80979cc13fcdd021 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 14 Jul 2016 15:51:51 +0200 Subject: [PATCH 0065/1722] LRU simulator: fix new entry creation. --- utils/lru/lfu-simulation.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/lru/lfu-simulation.c b/utils/lru/lfu-simulation.c index ebfc434ac..fe7a38354 100644 --- a/utils/lru/lfu-simulation.c +++ b/utils/lru/lfu-simulation.c @@ -75,7 +75,7 @@ void show_entry(long pos, struct entry *e) { if (pos >= 15 && pos <= 19) tag = "new accessed "; if (pos >= keyspace_size -5) tag= "old no access"; - printf("%ld] <%s> frequency:%d decrtime:%d [%lu hits | age:%ld seconds]\n", + printf("%ld] <%s> frequency:%d decrtime:%d [%lu hits | age:%ld sec]\n", pos, tag, e->counter, e->decrtime, (unsigned long)e->hits, time(NULL) - e->ctime); } @@ -134,7 +134,7 @@ int main(void) { /* Simulate the addition of new entries at positions between * 10 and 19, a random one every 10 seconds. */ - if (new_entry_time == now) { + if (new_entry_time <= now) { idx = 10+(rand()%10); entries[idx].counter = COUNTER_INIT_VAL; entries[idx].decrtime = to_16bit_minutes(start); From 1aaeeaa5378820828bfe49c474a8caa69ce5d674 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 14 Jul 2016 15:55:17 +0200 Subject: [PATCH 0066/1722] LRU simulator: fix new entry creation decr time. --- utils/lru/lfu-simulation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/lru/lfu-simulation.c b/utils/lru/lfu-simulation.c index fe7a38354..2305fb3a2 100644 --- a/utils/lru/lfu-simulation.c +++ b/utils/lru/lfu-simulation.c @@ -137,7 +137,7 @@ int main(void) { if (new_entry_time <= now) { idx = 10+(rand()%10); entries[idx].counter = COUNTER_INIT_VAL; - entries[idx].decrtime = to_16bit_minutes(start); + entries[idx].decrtime = to_16bit_minutes(time(NULL)); entries[idx].hits = 0; entries[idx].ctime = time(NULL); new_entry_time = now+10; From 8192fc3a8ce307b078c4224b0b733383b6595565 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 14 Jul 2016 16:06:36 +0200 Subject: [PATCH 0067/1722] LFU simulator: remove dead code. --- utils/lru/lfu-simulation.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/utils/lru/lfu-simulation.c b/utils/lru/lfu-simulation.c index 2305fb3a2..6aa5911ac 100644 --- a/utils/lru/lfu-simulation.c +++ b/utils/lru/lfu-simulation.c @@ -108,11 +108,6 @@ int main(void) { * 'switch_after' seconds. Then revert to flat access pattern. */ if (now-start < switch_after) { /* Power law. */ - #if 0 - idx = keyspace_size; - while(rand() % 2 && idx > 1) idx /= 2; - idx = rand() % idx; - #endif idx = 1; while((rand() % 21) != 0 && idx < keyspace_size) idx *= 2; if (idx > keyspace_size) idx = keyspace_size; From 13c8765e00a4b081e1b083faca4326171f9c3f96 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Jul 2016 12:12:52 +0200 Subject: [PATCH 0068/1722] LFU: Redis object level implementation. Implementation of LFU maxmemory policy for anything related to Redis objects. Still no actual eviction implemented. --- src/config.c | 2 ++ src/db.c | 8 ++++- src/evict.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/object.c | 24 ++++++++++---- src/server.h | 30 ++++++++++++----- 5 files changed, 142 insertions(+), 15 deletions(-) diff --git a/src/config.c b/src/config.c index 683ec8719..f443ce22a 100644 --- a/src/config.c +++ b/src/config.c @@ -45,9 +45,11 @@ typedef struct configEnum { configEnum maxmemory_policy_enum[] = { {"volatile-lru", MAXMEMORY_VOLATILE_LRU}, + {"volatile-lfu", MAXMEMORY_VOLATILE_LFU}, {"volatile-random",MAXMEMORY_VOLATILE_RANDOM}, {"volatile-ttl",MAXMEMORY_VOLATILE_TTL}, {"allkeys-lru",MAXMEMORY_ALLKEYS_LRU}, + {"allkeys-lfu",MAXMEMORY_ALLKEYS_LFU}, {"allkeys-random",MAXMEMORY_ALLKEYS_RANDOM}, {"noeviction",MAXMEMORY_NO_EVICTION}, {NULL, 0} diff --git a/src/db.c b/src/db.c index 03615fd34..b00227d81 100644 --- a/src/db.c +++ b/src/db.c @@ -53,7 +53,13 @@ robj *lookupKey(redisDb *db, robj *key, int flags) { server.aof_child_pid == -1 && !(flags & LOOKUP_NOTOUCH)) { - val->lru = LRU_CLOCK(); + if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { + unsigned long ldt = val->lru >> 8; + unsigned long counter = LFULogIncr(val->lru & 255); + val->lru = (ldt << 8) | counter; + } else { + val->lru = LRU_CLOCK(); + } } return val; } else { diff --git a/src/evict.c b/src/evict.c index 6ce3aef0d..753a2ac81 100644 --- a/src/evict.c +++ b/src/evict.c @@ -214,6 +214,99 @@ void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evic } } +/* ---------------------------------------------------------------------------- + * LFU (Least Frequently Used) implementation. + + * We have 24 total bits of space in each object in order to implement + * an LFU (Least Frequently Used) eviction policy, since we re-use the + * LRU field for this purpose. + * + * We split the 24 bits into two fields: + * + * 16 bits 8 bits + * +----------------+--------+ + * + Last decr time | LOG_C | + * +----------------+--------+ + * + * LOG_C is a logarithmic counter that provides an indication of the access + * frequency. However this field must also be decremented otherwise what used + * to be a frequently accessed key in the past, will remain ranked like that + * forever, while we want the algorithm to adapt to access pattern changes. + * + * So the remaining 16 bits are used in order to store the "decrement time", + * a reduced-precision Unix time (we take 16 bits of the time converted + * in minutes since we don't care about wrapping around) where the LOG_C + * counter is halved if it has an high value, or just decremented if it + * has a low value. + * + * New keys don't start at zero, in order to have the ability to collect + * some accesses before being trashed away, so they start at COUNTER_INIT_VAL. + * The logarithmic increment performed on LOG_C takes care of COUNTER_INIT_VAL + * when incrementing the key, so that keys starting at COUNTER_INIT_VAL + * (or having a smaller value) have a very high chance of being incremented + * on access. + * + * During decrement, the value of the logarithmic counter is halved if + * its current value is greater than two times the COUNTER_INIT_VAL, otherwise + * it is just decremented by one. + * --------------------------------------------------------------------------*/ + +/* Return the current time in minutes, just taking the least significant + * 16 bits. The returned time is suitable to be stored as LDT (last decrement + * time) for the LFU implementation. */ +unsigned long LFUGetTimeInMinutes(void) { + return (server.unixtime/60) & 65535; +} + +/* Given an object last decrement time, compute the minimum number of minutes + * that elapsed since the last decrement. Handle overflow (ldt greater than + * the current 16 bits minutes time) considering the time as wrapping + * exactly once. */ +unsigned long LFUTimeElapsed(unsigned long ldt) { + unsigned long now = LFUGetTimeInMinutes(); + if (now > ldt) return now-ldt; + return 65535-ldt+now; +} + +/* Logarithmically increment a counter. The greater is the current counter value + * the less likely is that it gets really implemented. Saturate it at 255. */ +#define LFU_LOG_FACTOR 10 +uint8_t LFULogIncr(uint8_t counter) { + if (counter == 255) return 255; + double r = (double)rand()/RAND_MAX; + double baseval = counter - LFU_INIT_VAL; + if (baseval < 0) baseval = 0; + double p = 1.0/(baseval*LFU_LOG_FACTOR+1); + if (r < p) counter++; + return counter; +} + +/* If the object decrement time is reached, decrement the LFU counter and + * update the decrement time field. Return the object frequency counter. + * + * This function is used in order to scan the dataset for the best object + * to fit: as we check for the candidate, we incrementally decrement the + * counter of the scanned objects if needed. */ +#define LFU_DECR_INTERVAL 1 +unsigned long LFUDecrAndReturn(robj *o) { + unsigned long ldt = o->lru >> 8; + unsigned long counter = o->lru & 255; + if (LFUTimeElapsed(ldt) > LFU_DECR_INTERVAL && counter) { + if (counter > LFU_INIT_VAL*2) { + counter /= 2; + } else { + counter--; + } + o->lru = (LFUGetTimeInMinutes()<<8) | counter; + } + return counter; +} + +/* ---------------------------------------------------------------------------- + * The external API for eviction: freeMemroyIfNeeded() is called by the + * server when there is data to add in order to make space if needed. + * --------------------------------------------------------------------------*/ + int freeMemoryIfNeeded(void) { size_t mem_reported, mem_used, mem_tofree, mem_freed; int slaves = listLength(server.slaves); diff --git a/src/object.c b/src/object.c index ec886f1f6..8f19f60f2 100644 --- a/src/object.c +++ b/src/object.c @@ -43,8 +43,13 @@ robj *createObject(int type, void *ptr) { o->ptr = ptr; o->refcount = 1; - /* Set the LRU to the current lruclock (minutes resolution). */ - o->lru = LRU_CLOCK(); + /* Set the LRU to the current lruclock (minutes resolution), or + * alternatively the LFU counter. */ + if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { + o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL; + } else { + o->lru = LRU_CLOCK(); + } return o; } @@ -82,7 +87,11 @@ robj *createEmbeddedStringObject(const char *ptr, size_t len) { o->encoding = OBJ_ENCODING_EMBSTR; o->ptr = sh+1; o->refcount = 1; - o->lru = LRU_CLOCK(); + if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { + o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL; + } else { + o->lru = LRU_CLOCK(); + } sh->len = len; sh->alloc = len; @@ -394,8 +403,7 @@ robj *tryObjectEncoding(robj *o) { * because every object needs to have a private LRU field for the LRU * algorithm to work well. */ if ((server.maxmemory == 0 || - (server.maxmemory_policy != MAXMEMORY_VOLATILE_LRU && - server.maxmemory_policy != MAXMEMORY_ALLKEYS_LRU)) && + !(server.maxmemory & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) && value >= 0 && value < OBJ_SHARED_INTEGERS) { @@ -715,8 +723,12 @@ void objectCommand(client *c) { if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) == NULL) return; addReplyLongLong(c,estimateObjectIdleTime(o)/1000); + } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) { + if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) + == NULL) return; + addReplyLongLong(c,o->lru&255); } else { - addReplyError(c,"Syntax error. Try OBJECT (refcount|encoding|idletime)"); + addReplyError(c,"Syntax error. Try OBJECT (refcount|encoding|idletime|freq)"); } } diff --git a/src/server.h b/src/server.h index 0354877fc..6a9fb2a09 100644 --- a/src/server.h +++ b/src/server.h @@ -341,13 +341,22 @@ typedef long long mstime_t; /* millisecond time type. */ #define SET_OP_DIFF 1 #define SET_OP_INTER 2 -/* Redis maxmemory strategies */ -#define MAXMEMORY_VOLATILE_LRU 0 -#define MAXMEMORY_VOLATILE_TTL 1 -#define MAXMEMORY_VOLATILE_RANDOM 2 -#define MAXMEMORY_ALLKEYS_LRU 3 -#define MAXMEMORY_ALLKEYS_RANDOM 4 -#define MAXMEMORY_NO_EVICTION 5 +/* Redis maxmemory strategies. Instead of using just incremental number + * for this defines, we use a set of flags so that testing for certain + * properties common to multiple policies is faster. */ +#define MAXMEMORY_FLAG_LRU (1<<0) +#define MAXMEMORY_FLAG_LFU (1<<1) +#define MAXMEMORY_FLAG_NO_SHARED_INTEGERS \ + (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) +#define MAXMEMORY_VOLATILE_LRU ((0<<8)|MAXMEMORY_FLAG_LRU) +#define MAXMEMORY_VOLATILE_LFU ((1<<8)|MAXMEMORY_FLAG_LFU) +#define MAXMEMORY_VOLATILE_TTL (2<<8) +#define MAXMEMORY_VOLATILE_RANDOM (3<<8) +#define MAXMEMORY_ALLKEYS_LRU ((4<<8)|MAXMEMORY_FLAG_LRU) +#define MAXMEMORY_ALLKEYS_LFU ((5<<8)|MAXMEMORY_FLAG_LFU) +#define MAXMEMORY_ALLKEYS_RANDOM (6<<8) +#define MAXMEMORY_NO_EVICTION (7<<8) + #define CONFIG_DEFAULT_MAXMEMORY_POLICY MAXMEMORY_NO_EVICTION /* Scripting */ @@ -528,7 +537,9 @@ typedef struct RedisModuleIO { typedef struct redisObject { unsigned type:4; unsigned encoding:4; - unsigned lru:LRU_BITS; /* lru time (relative to server.lruclock) */ + unsigned lru:LRU_BITS; /* LRU time (relative to server.lruclock) or + * LFU data (least significant 8 bits frequency + * and most significant 16 bits decreas time). */ int refcount; void *ptr; } robj; @@ -1606,6 +1617,9 @@ void activeExpireCycle(int type); /* evict.c -- maxmemory handling and LRU eviction. */ void evictionPoolAlloc(void); +#define LFU_INIT_VAL 5 +unsigned long LFUGetTimeInMinutes(void); +uint8_t LFULogIncr(uint8_t value); /* Git SHA1 */ char *redisGitSHA1(void); From a7cd95aa8b768875cf40b37e035bec303b43a3ca Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Jul 2016 10:53:02 +0200 Subject: [PATCH 0069/1722] redis-cli LRU test mode: remove newline from key names. --- src/redis-cli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 6aacecc77..7010e5e5a 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2402,7 +2402,7 @@ long long powerLawRand(long long min, long long max, double alpha) { /* Generates a key name among a set of lru_test_sample_size keys, using * an 80-20 distribution. */ void LRUTestGenKey(char *buf, size_t buflen) { - snprintf(buf, buflen, "lru:%lld\n", + snprintf(buf, buflen, "lru:%lld", powerLawRand(1, config.lru_test_sample_size, 6.2)); } From f934b0bbaece780e2f2c6805d6081296d8613632 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Jul 2016 10:56:11 +0200 Subject: [PATCH 0070/1722] redis-cli LRU test mode: randomize value of key when setting. This way it is possible from an observer to tell when the key is replaced with a new one having the same name. --- src/redis-cli.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 7010e5e5a..d1735d638 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2424,8 +2424,11 @@ static void LRUTestMode(void) { while(mstime() - start_cycle < 1000) { /* Write cycle. */ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) { + char val[6]; + val[5] = '\0'; + for (int i = 0; i < 5; i++) val[i] = 'A'+rand()%('z'-'A'); LRUTestGenKey(key,sizeof(key)); - redisAppendCommand(context, "SET %s val",key); + redisAppendCommand(context, "SET %s %s",key,val); } for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) redisGetReply(context, (void**)&reply); From 4bc127a2bd1fc552da970bc80255c092ee6e72d8 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Jul 2016 13:49:31 +0200 Subject: [PATCH 0071/1722] LFU: Initial naive eviction cycle. It is possible to get better results by using the pool like in the LRU case. Also from tests during the morning I believe the current implementation has issues in the frequency decay function that should decrease the counter at periodic intervals. --- src/db.c | 9 ++++++++- src/evict.c | 36 +++++++++++++++++++++++++++++++++--- src/object.c | 8 ++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/db.c b/src/db.c index b00227d81..d33c810b3 100644 --- a/src/db.c +++ b/src/db.c @@ -175,7 +175,14 @@ void dbOverwrite(redisDb *db, robj *key, robj *val) { dictEntry *de = dictFind(db->dict,key->ptr); serverAssertWithInfo(NULL,key,de != NULL); - dictReplace(db->dict, key->ptr, val); + if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { + robj *old = dictGetVal(de); + int saved_lru = old->lru; + dictReplace(db->dict, key->ptr, val); + val->lru = saved_lru; + } else { + dictReplace(db->dict, key->ptr, val); + } } /* High level Set operation. This function can be used in order to set diff --git a/src/evict.c b/src/evict.c index 753a2ac81..b48892b41 100644 --- a/src/evict.c +++ b/src/evict.c @@ -294,6 +294,7 @@ unsigned long LFUDecrAndReturn(robj *o) { if (LFUTimeElapsed(ldt) > LFU_DECR_INTERVAL && counter) { if (counter > LFU_INIT_VAL*2) { counter /= 2; + if (counter < LFU_INIT_VAL*2) counter = LFU_INIT_VAL*2; } else { counter--; } @@ -360,9 +361,7 @@ int freeMemoryIfNeeded(void) { dict *dict; dictEntry *de; - if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || - server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU) - { + if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) { struct evictionPoolEntry *pool = EvictionPoolLRU; while(bestkey == NULL) { @@ -470,6 +469,37 @@ int freeMemoryIfNeeded(void) { } } + /* allkeys-lfu and volatile-lfu */ + else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { + long bestfreq = 0; /* Initialized to avoid warning. */ + + for (i = 0; i < server.dbnum; i++) { + j = (++next_db) % server.dbnum; + db = server.db+j; + dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LFU) ? + db->dict : db->expires; + if (dictSize(dict) != 0) { + for (k = 0; k < server.maxmemory_samples; k++) { + sds thiskey; + long thisfreq; + + de = dictGetRandomKey(dict); + thiskey = dictGetKey(de); + robj *o = dictFetchValue(db->dict,thiskey); + thisfreq = LFUDecrAndReturn(o); + + /* Keys with a smaller access frequency are + * better candidates for deletion */ + if (bestkey == NULL || thisfreq < bestfreq) { + bestkey = thiskey; + bestfreq = thisfreq; + bestdbid = j; + } + } + } + } + } + /* Finally remove the selected key. */ if (bestkey) { db = server.db+bestdbid; diff --git a/src/object.c b/src/object.c index 8f19f60f2..c64d810b0 100644 --- a/src/object.c +++ b/src/object.c @@ -722,10 +722,18 @@ void objectCommand(client *c) { } else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) { if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) == NULL) return; + if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { + addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust."); + return; + } addReplyLongLong(c,estimateObjectIdleTime(o)/1000); } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) { if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) == NULL) return; + if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) { + addReplyError(c,"An LRU maxmemory policy is selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust."); + return; + } addReplyLongLong(c,o->lru&255); } else { addReplyError(c,"Syntax error. Try OBJECT (refcount|encoding|idletime|freq)"); From c262acb656da0dacceecab450f2cc3cd71263845 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Jul 2016 14:19:38 +0200 Subject: [PATCH 0072/1722] LFU: Fix bugs in frequency decay code. --- src/evict.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/evict.c b/src/evict.c index b48892b41..3025b3e9b 100644 --- a/src/evict.c +++ b/src/evict.c @@ -264,7 +264,7 @@ unsigned long LFUGetTimeInMinutes(void) { * exactly once. */ unsigned long LFUTimeElapsed(unsigned long ldt) { unsigned long now = LFUGetTimeInMinutes(); - if (now > ldt) return now-ldt; + if (now >= ldt) return now-ldt; return 65535-ldt+now; } @@ -291,7 +291,7 @@ uint8_t LFULogIncr(uint8_t counter) { unsigned long LFUDecrAndReturn(robj *o) { unsigned long ldt = o->lru >> 8; unsigned long counter = o->lru & 255; - if (LFUTimeElapsed(ldt) > LFU_DECR_INTERVAL && counter) { + if (LFUTimeElapsed(ldt) >= LFU_DECR_INTERVAL && counter) { if (counter > LFU_INIT_VAL*2) { counter /= 2; if (counter < LFU_INIT_VAL*2) counter = LFU_INIT_VAL*2; From ab78df558e5d41265b6dd9616a5e9ed5fb3f6323 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Jul 2016 18:17:57 +0200 Subject: [PATCH 0073/1722] LFU: Use the LRU pool for the LFU algorithm. Verified to have better real world performances with power-law access patterns because of the data accumulated across calls. --- src/evict.c | 61 ++++++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/src/evict.c b/src/evict.c index 3025b3e9b..d791415a5 100644 --- a/src/evict.c +++ b/src/evict.c @@ -43,11 +43,15 @@ * Entries inside the eviciton pool are taken ordered by idle time, putting * greater idle times to the right (ascending order). * + * When an LFU policy is used instead, a reverse frequency indication is used + * instead of the idle time, so that we still evict by larger value (larger + * inverse frequency means to evict keys with the least frequent accesses). + * * Empty entries have the key pointer set to NULL. */ #define EVPOOL_SIZE 16 #define EVPOOL_CACHED_SDS_SIZE 255 struct evictionPoolEntry { - unsigned long long idle; /* Object idle time. */ + unsigned long long idle; /* Object idle time (inverse frequency for LFU) */ sds key; /* Key name. */ sds cached; /* Cached SDS object for key name. */ int dbid; /* Key DB number. */ @@ -55,6 +59,8 @@ struct evictionPoolEntry { static struct evictionPoolEntry *EvictionPoolLRU; +unsigned long LFUDecrAndReturn(robj *o); + /* ---------------------------------------------------------------------------- * Implementation of eviction, aging and LRU * --------------------------------------------------------------------------*/ @@ -158,7 +164,18 @@ void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evic * again in the key dictionary to obtain the value object. */ if (sampledict != keydict) de = dictFind(keydict, key); o = dictGetVal(de); - idle = estimateObjectIdleTime(o); + if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) { + idle = estimateObjectIdleTime(o); + } else { + /* When we use an LRU policy, we sort the keys by idle time + * so that we expire keys starting from greater idle time. + * However when the policy is an LFU one, we have a frequency + * estimation, and we want to evict keys with lower frequency + * first. So inside the pool we put objects using the inverted + * frequency subtracting the actual frequency to the maximum + * frequency of 255. */ + idle = 255-LFUDecrAndReturn(o); + } /* Insert the element inside the pool. * First, find the first empty bucket or the first populated @@ -361,7 +378,7 @@ int freeMemoryIfNeeded(void) { dict *dict; dictEntry *de; - if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) { + if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU)) { struct evictionPoolEntry *pool = EvictionPoolLRU; while(bestkey == NULL) { @@ -372,7 +389,8 @@ int freeMemoryIfNeeded(void) { * every DB. */ for (i = 0; i < server.dbnum; i++) { db = server.db+i; - dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU) ? + dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == MAXMEMORY_ALLKEYS_LFU) ? db->dict : db->expires; if ((keys = dictSize(dict)) != 0) { evictionPoolPopulate(i, dict, db->dict, pool); @@ -386,7 +404,9 @@ int freeMemoryIfNeeded(void) { if (pool[k].key == NULL) continue; bestdbid = pool[k].dbid; - if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU) { + if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == MAXMEMORY_ALLKEYS_LFU) + { de = dictFind(server.db[pool[k].dbid].dict, pool[k].key); } else { @@ -469,37 +489,6 @@ int freeMemoryIfNeeded(void) { } } - /* allkeys-lfu and volatile-lfu */ - else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { - long bestfreq = 0; /* Initialized to avoid warning. */ - - for (i = 0; i < server.dbnum; i++) { - j = (++next_db) % server.dbnum; - db = server.db+j; - dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LFU) ? - db->dict : db->expires; - if (dictSize(dict) != 0) { - for (k = 0; k < server.maxmemory_samples; k++) { - sds thiskey; - long thisfreq; - - de = dictGetRandomKey(dict); - thiskey = dictGetKey(de); - robj *o = dictFetchValue(db->dict,thiskey); - thisfreq = LFUDecrAndReturn(o); - - /* Keys with a smaller access frequency are - * better candidates for deletion */ - if (bestkey == NULL || thisfreq < bestfreq) { - bestkey = thiskey; - bestfreq = thisfreq; - bestdbid = j; - } - } - } - } - } - /* Finally remove the selected key. */ if (bestkey) { db = server.db+bestdbid; From 0e1b0d627651705376c43c7a4d4a0a2ef3b4eb68 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 20 Jul 2016 15:00:35 +0200 Subject: [PATCH 0074/1722] LFU: make counter log factor and decay time configurable. --- redis.conf | 82 +++++++++++++++++++++++++++++++++++++++++++++------- src/config.c | 16 ++++++++++ src/evict.c | 5 ++-- src/server.c | 2 ++ src/server.h | 4 +++ 5 files changed, 95 insertions(+), 14 deletions(-) diff --git a/redis.conf b/redis.conf index 67cd50245..05158b4e7 100644 --- a/redis.conf +++ b/redis.conf @@ -492,7 +492,7 @@ slave-priority 100 ############################## MEMORY MANAGEMENT ################################ -# Don't use more memory than the specified amount of bytes. +# Set a memory usage limit to the specified amount of bytes. # When the memory limit is reached Redis will try to remove keys # according to the eviction policy selected (see maxmemory-policy). # @@ -501,8 +501,8 @@ slave-priority 100 # that would use more memory, like SET, LPUSH, and so on, and will continue # to reply to read-only commands like GET. # -# This option is usually useful when using Redis as an LRU cache, or to set -# a hard memory limit for an instance (using the 'noeviction' policy). +# This option is usually useful when using Redis as an LRU or LFU cache, or to +# set a hard memory limit for an instance (using the 'noeviction' policy). # # WARNING: If you have slaves attached to an instance with maxmemory on, # the size of the output buffers needed to feed the slaves are subtracted @@ -520,12 +520,20 @@ slave-priority 100 # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory # is reached. You can select among five behaviors: # -# volatile-lru -> remove the key with an expire set using an LRU algorithm -# allkeys-lru -> remove any key according to the LRU algorithm -# volatile-random -> remove a random key with an expire set -# allkeys-random -> remove a random key, any key -# volatile-ttl -> remove the key with the nearest expire time (minor TTL) -# noeviction -> don't expire at all, just return an error on write operations +# volatile-lru -> Evict using approximated LRU among the keys with an expire set. +# allkeys-lru -> Evict any key using approximated LRU. +# volatile-lfu -> Evict using approximated LFU among the keys with an expire set. +# allkeys-lfu -> Evict any key using approximated LFU. +# volatile-random -> Remove a random key among the ones with an expire set. +# allkeys-random -> Remove a random key, any key. +# volatile-ttl -> Remove the key with the nearest expire time (minor TTL) +# noeviction -> Don't evict anything, just return an error on write operations. +# +# LRU means Least Recently Used +# LFU means Least Frequently Used +# +# Both LRU, LFU and volatile-ttl are implemented using approximated +# randomized algorithms. # # Note: with any of the above policies, Redis will return an error on write # operations, when there are no suitable keys for eviction. @@ -540,14 +548,14 @@ slave-priority 100 # # maxmemory-policy noeviction -# LRU and minimal TTL algorithms are not precise algorithms but approximated +# LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated # algorithms (in order to save memory), so you can tune it for speed or # accuracy. For default Redis will check five keys and pick the one that was # used less recently, you can change the sample size using the following # configuration directive. # # The default of 5 produces good enough results. 10 Approximates very closely -# true LRU but costs a bit more CPU. 3 is very fast but not very accurate. +# true LRU but costs more CPU. 3 is faster but not very accurate. # # maxmemory-samples 5 @@ -1113,3 +1121,55 @@ hz 10 # in order to commit the file to the disk more incrementally and avoid # big latency spikes. aof-rewrite-incremental-fsync yes + +# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good +# idea to start with the default settings and only change them after investigating +# how to improve the performances and how the keys LFU change over time, which +# is possible to inspect via the OBJECT FREQ command. +# +# There are two tunable parameters in the Redis LFU implementation: the +# counter logarithm factor and the counter decay time. It is important to +# understand what the two parameters mean before changing them. +# +# The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis +# uses a probabilistic increment with logarithmic behavior. Given the value +# of the old counter, when a key is accessed, the counter is incremented in +# this way: +# +# 1. A random number R between 0 and 1 is extracted. +# 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1). +# 3. The counter is incremented only if R < P. +# +# The default lfu-log-factor is 10. This is a table of how the frequency +# counter changes with a different number of accesses with different +# logarithmic factors: +# +# +--------+------------+------------+------------+------------+------------+ +# | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits | +# +--------+------------+------------+------------+------------+------------+ +# | 0 | 104 | 255 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 1 | 18 | 49 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 10 | 10 | 18 | 142 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 100 | 8 | 11 | 49 | 143 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# +# NOTE: The above table was obtained by running the following commands: +# +# redis-benchmark -n 1000000 incr foo +# redis-cli object freq foo +# +# NOTE 2: The counter initial value is 5 in order to give new objects a chance +# to accumulate hits. +# +# The counter decay time is the time, in minutes, that must elapse in order +# for the key counter to be divided by two (or decremented if it has a value +# less <= 10). +# +# The default value for the lfu-decay-time is 1. A Special value of 0 means to +# decay the counter every time it happens to be scanned. +# +# lfu-log-factor 10 +# lfu-decay-time 1 diff --git a/src/config.c b/src/config.c index f443ce22a..686f80cfd 100644 --- a/src/config.c +++ b/src/config.c @@ -324,6 +324,18 @@ void loadServerConfigFromString(char *config) { err = "maxmemory-samples must be 1 or greater"; goto loaderr; } + } else if (!strcasecmp(argv[0],"lfu-log-factor") && argc == 2) { + server.lfu_log_factor = atoi(argv[1]); + if (server.maxmemory_samples < 0) { + err = "lfu-log-factor must be 0 or greater"; + goto loaderr; + } + } else if (!strcasecmp(argv[0],"lfu-decay-time") && argc == 2) { + server.lfu_decay_time = atoi(argv[1]); + if (server.maxmemory_samples < 1) { + err = "lfu-decay-time must be 0 or greater"; + goto loaderr; + } } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) { slaveof_linenum = linenum; server.masterhost = sdsnew(argv[1]); @@ -955,6 +967,10 @@ void configSetCommand(client *c) { "tcp-keepalive",server.tcpkeepalive,0,LLONG_MAX) { } config_set_numerical_field( "maxmemory-samples",server.maxmemory_samples,1,LLONG_MAX) { + } config_set_numerical_field( + "lfu-log-factor",server.lfu_log_factor,0,LLONG_MAX) { + } config_set_numerical_field( + "lfu-decay-time",server.lfu_decay_time,0,LLONG_MAX) { } config_set_numerical_field( "timeout",server.maxidletime,0,LONG_MAX) { } config_set_numerical_field( diff --git a/src/evict.c b/src/evict.c index d791415a5..4a4ba2ea9 100644 --- a/src/evict.c +++ b/src/evict.c @@ -287,13 +287,12 @@ unsigned long LFUTimeElapsed(unsigned long ldt) { /* Logarithmically increment a counter. The greater is the current counter value * the less likely is that it gets really implemented. Saturate it at 255. */ -#define LFU_LOG_FACTOR 10 uint8_t LFULogIncr(uint8_t counter) { if (counter == 255) return 255; double r = (double)rand()/RAND_MAX; double baseval = counter - LFU_INIT_VAL; if (baseval < 0) baseval = 0; - double p = 1.0/(baseval*LFU_LOG_FACTOR+1); + double p = 1.0/(baseval*server.lfu_log_factor+1); if (r < p) counter++; return counter; } @@ -308,7 +307,7 @@ uint8_t LFULogIncr(uint8_t counter) { unsigned long LFUDecrAndReturn(robj *o) { unsigned long ldt = o->lru >> 8; unsigned long counter = o->lru & 255; - if (LFUTimeElapsed(ldt) >= LFU_DECR_INTERVAL && counter) { + if (LFUTimeElapsed(ldt) >= server.lfu_decay_time && counter) { if (counter > LFU_INIT_VAL*2) { counter /= 2; if (counter < LFU_INIT_VAL*2) counter = LFU_INIT_VAL*2; diff --git a/src/server.c b/src/server.c index f8847f646..abb98edfd 100644 --- a/src/server.c +++ b/src/server.c @@ -1341,6 +1341,8 @@ void initServerConfig(void) { server.maxmemory = CONFIG_DEFAULT_MAXMEMORY; server.maxmemory_policy = CONFIG_DEFAULT_MAXMEMORY_POLICY; server.maxmemory_samples = CONFIG_DEFAULT_MAXMEMORY_SAMPLES; + server.lfu_log_factor = CONFIG_DEFAULT_LFU_LOG_FACTOR; + server.lfu_decay_time = CONFIG_DEFAULT_LFU_DECAY_TIME; server.hash_max_ziplist_entries = OBJ_HASH_MAX_ZIPLIST_ENTRIES; server.hash_max_ziplist_value = OBJ_HASH_MAX_ZIPLIST_VALUE; server.list_max_ziplist_size = OBJ_LIST_MAX_ZIPLIST_SIZE; diff --git a/src/server.h b/src/server.h index 6a9fb2a09..266c5336d 100644 --- a/src/server.h +++ b/src/server.h @@ -129,6 +129,8 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY 0 #define CONFIG_DEFAULT_MAXMEMORY 0 #define CONFIG_DEFAULT_MAXMEMORY_SAMPLES 5 +#define CONFIG_DEFAULT_LFU_LOG_FACTOR 10 +#define CONFIG_DEFAULT_LFU_DECAY_TIME 1 #define CONFIG_DEFAULT_AOF_FILENAME "appendonly.aof" #define CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE 0 #define CONFIG_DEFAULT_AOF_LOAD_TRUNCATED 1 @@ -981,6 +983,8 @@ struct redisServer { unsigned long long maxmemory; /* Max number of memory bytes to use */ int maxmemory_policy; /* Policy for key eviction */ int maxmemory_samples; /* Pricision of random sampling */ + unsigned int lfu_log_factor; /* LFU logarithmic counter factor. */ + unsigned int lfu_decay_time; /* LFU counter decay factor. */ /* Blocked clients */ unsigned int bpop_blocked_clients; /* Number of clients blocked by lists */ list *unblocked_clients; /* list of clients to unblock before next loop */ From 760725aa5bfb6a7f12c931d897d2f781c395a5f7 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 20 Jul 2016 19:02:20 +0200 Subject: [PATCH 0075/1722] test-lru.rb: support for testing volatile-ttl policy. --- utils/lru/test-lru.rb | 61 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/utils/lru/test-lru.rb b/utils/lru/test-lru.rb index 1a6baf467..d511e206f 100644 --- a/utils/lru/test-lru.rb +++ b/utils/lru/test-lru.rb @@ -2,11 +2,16 @@ require 'rubygems' require 'redis' $runs = []; # Remember the error rate of each run for average purposes. +$o = {}; # Options set parsing arguments def testit(filename) r = Redis.new r.config("SET","maxmemory","2000000") - r.config("SET","maxmemory-policy","allkeys-lru") + if $o[:ttl] + r.config("SET","maxmemory-policy","volatile-ttl") + else + r.config("SET","maxmemory-policy","allkeys-lru") + end r.config("SET","maxmemory-samples",5) r.config("RESETSTAT") r.flushall @@ -47,7 +52,11 @@ EOF id = 0 while true id += 1 - r.set(id,"foo") + begin + r.set(id,"foo") + rescue + break + end newsize = r.dbsize break if newsize == oldsize # A key was evicted? Stop. oldsize = newsize @@ -60,12 +69,20 @@ EOF # Access keys sequentially, so that in theory the first part will be expired # and the latter part will not, according to perfect LRU. - STDERR.puts "Access keys sequentially" - (1..first_set_max_id).each{|id| - r.get(id) - sleep 0.001 - STDERR.print(".") if (id % 150) == 0 - } + if $o[:ttl] + STDERR.puts "Set increasing expire value" + (1..first_set_max_id).each{|id| + r.expire(id,1000+id) + STDERR.print(".") if (id % 150) == 0 + } + else + STDERR.puts "Access keys sequentially" + (1..first_set_max_id).each{|id| + r.get(id) + sleep 0.001 + STDERR.print(".") if (id % 150) == 0 + } + end STDERR.puts # Insert more 50% keys. We expect that the new keys will rarely be expired @@ -173,16 +190,34 @@ def print_avg end if ARGV.length < 1 - STDERR.puts "Usage: ruby test-lru.rb [num-runs]" + STDERR.puts "Usage: ruby test-lru.rb [--runs ] [--ttl]" + STDERR.puts "Options:" + STDERR.puts " --runs Execute the test times." + STDERR.puts " --ttl Set keys with increasing TTL values" + STDERR.puts " (starting from 1000 seconds) in order to" + STDERR.puts " test the volatile-lru policy." exit 1 end filename = ARGV[0] -numruns = 1 +$o[:numruns] = 1 -numruns = ARGV[1].to_i if ARGV.length == 2 +# Options parsing +i = 1 +while i < ARGV.length + if ARGV[i] == '--runs' + $o[:numruns] = ARGV[i+1].to_i + i+= 1 + elsif ARGV[i] == '--ttl' + $o[:ttl] = true + else + STDERR.puts "Unknown option #{ARGV[i]}" + exit 1 + end + i+= 1 +end -numruns.times { +$o[:numruns].times { testit(filename) - print_avg if numruns != 1 + print_avg if $o[:numruns] != 1 } From 4376529929841ce369a0ccedbaa6816e5f313ebd Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 20 Jul 2016 19:53:27 +0200 Subject: [PATCH 0076/1722] Volatile-ttl eviction policy implemented in terms of the pool. Precision of the eviction improved sensibly. Also this allows us to have a single code path for most eviction types. --- src/evict.c | 65 ++++++++++++++++------------------------------------ src/server.h | 8 ++++--- 2 files changed, 25 insertions(+), 48 deletions(-) diff --git a/src/evict.c b/src/evict.c index 4a4ba2ea9..802997ce8 100644 --- a/src/evict.c +++ b/src/evict.c @@ -159,14 +159,21 @@ void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evic de = samples[j]; key = dictGetKey(de); + /* If the dictionary we are sampling from is not the main * dictionary (but the expires one) we need to lookup the key * again in the key dictionary to obtain the value object. */ - if (sampledict != keydict) de = dictFind(keydict, key); - o = dictGetVal(de); + if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) { + if (sampledict != keydict) de = dictFind(keydict, key); + o = dictGetVal(de); + } + + /* Calculate the idle time according to the policy. This is called + * idle just because the code initially handled LRU, but is in fact + * just a score where an higher score means better candidate. */ if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) { idle = estimateObjectIdleTime(o); - } else { + } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { /* When we use an LRU policy, we sort the keys by idle time * so that we expire keys starting from greater idle time. * However when the policy is an LFU one, we have a frequency @@ -175,6 +182,11 @@ void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evic * frequency subtracting the actual frequency to the maximum * frequency of 255. */ idle = 255-LFUDecrAndReturn(o); + } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { + /* In this case the sooner the expire the better. */ + idle = ULLONG_MAX - (long)dictGetVal(de); + } else { + serverPanic("Unknown eviction policy in evictionPoolPopulate()"); } /* Insert the element inside the pool. @@ -377,7 +389,9 @@ int freeMemoryIfNeeded(void) { dict *dict; dictEntry *de; - if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU)) { + if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) || + server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) + { struct evictionPoolEntry *pool = EvictionPoolLRU; while(bestkey == NULL) { @@ -388,8 +402,7 @@ int freeMemoryIfNeeded(void) { * every DB. */ for (i = 0; i < server.dbnum; i++) { db = server.db+i; - dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || - server.maxmemory_policy == MAXMEMORY_ALLKEYS_LFU) ? + dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ? db->dict : db->expires; if ((keys = dictSize(dict)) != 0) { evictionPoolPopulate(i, dict, db->dict, pool); @@ -403,9 +416,7 @@ int freeMemoryIfNeeded(void) { if (pool[k].key == NULL) continue; bestdbid = pool[k].dbid; - if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || - server.maxmemory_policy == MAXMEMORY_ALLKEYS_LFU) - { + if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) { de = dictFind(server.db[pool[k].dbid].dict, pool[k].key); } else { @@ -452,42 +463,6 @@ int freeMemoryIfNeeded(void) { } } - /* volatile-ttl */ - else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { - long bestttl = 0; /* Initialized to avoid warning. */ - - /* In this policy we scan a single DB per iteration (visiting - * a different DB per call), expiring the key with the smallest - * TTL among the few sampled. - * - * Note that this algorithm makes local-DB choices, and should - * use a pool and code more similr to the one used in the - * LRU eviction policies in the future. */ - for (i = 0; i < server.dbnum; i++) { - j = (++next_db) % server.dbnum; - db = server.db+j; - dict = db->expires; - if (dictSize(dict) != 0) { - for (k = 0; k < server.maxmemory_samples; k++) { - sds thiskey; - long thisttl; - - de = dictGetRandomKey(dict); - thiskey = dictGetKey(de); - thisttl = (long) dictGetVal(de); - - /* Keys expiring sooner (smaller unix timestamp) are - * better candidates for deletion */ - if (bestkey == NULL || thisttl < bestttl) { - bestkey = thiskey; - bestttl = thisttl; - bestdbid = j; - } - } - } - } - } - /* Finally remove the selected key. */ if (bestkey) { db = server.db+bestdbid; diff --git a/src/server.h b/src/server.h index 266c5336d..4e34453e5 100644 --- a/src/server.h +++ b/src/server.h @@ -348,15 +348,17 @@ typedef long long mstime_t; /* millisecond time type. */ * properties common to multiple policies is faster. */ #define MAXMEMORY_FLAG_LRU (1<<0) #define MAXMEMORY_FLAG_LFU (1<<1) +#define MAXMEMORY_FLAG_ALLKEYS (1<<2) #define MAXMEMORY_FLAG_NO_SHARED_INTEGERS \ (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) + #define MAXMEMORY_VOLATILE_LRU ((0<<8)|MAXMEMORY_FLAG_LRU) #define MAXMEMORY_VOLATILE_LFU ((1<<8)|MAXMEMORY_FLAG_LFU) #define MAXMEMORY_VOLATILE_TTL (2<<8) #define MAXMEMORY_VOLATILE_RANDOM (3<<8) -#define MAXMEMORY_ALLKEYS_LRU ((4<<8)|MAXMEMORY_FLAG_LRU) -#define MAXMEMORY_ALLKEYS_LFU ((5<<8)|MAXMEMORY_FLAG_LFU) -#define MAXMEMORY_ALLKEYS_RANDOM (6<<8) +#define MAXMEMORY_ALLKEYS_LRU ((4<<8)|MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_ALLKEYS) +#define MAXMEMORY_ALLKEYS_LFU ((5<<8)|MAXMEMORY_FLAG_LFU|MAXMEMORY_FLAG_ALLKEYS) +#define MAXMEMORY_ALLKEYS_RANDOM ((6<<8)|MAXMEMORY_FLAG_ALLKEYS) #define MAXMEMORY_NO_EVICTION (7<<8) #define CONFIG_DEFAULT_MAXMEMORY_POLICY MAXMEMORY_NO_EVICTION From b98c2df5a00ac64c9d7013676cd47556c1993079 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 21 Jul 2016 11:14:15 +0200 Subject: [PATCH 0077/1722] Fix maxmemory shared integer check bug introduced with LFU. --- src/object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/object.c b/src/object.c index c64d810b0..a7c1e4c21 100644 --- a/src/object.c +++ b/src/object.c @@ -403,7 +403,7 @@ robj *tryObjectEncoding(robj *o) { * because every object needs to have a private LRU field for the LRU * algorithm to work well. */ if ((server.maxmemory == 0 || - !(server.maxmemory & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) && + !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) && value >= 0 && value < OBJ_SHARED_INTEGERS) { From 1c2fb8a0df382a43306f6dde3c0c7ad22ccab2c5 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 21 Jul 2016 12:10:56 +0200 Subject: [PATCH 0078/1722] Replication: start BGSAVE for replication always in replicationCron(). This makes the replication code conceptually simpler by removing the synchronous BGSAVE trigger in syncCommand(). This also means that socket and disk BGSAVE targets are handled by the same code. --- src/replication.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/replication.c b/src/replication.c index 8aa0d807a..fa062653c 100644 --- a/src/replication.c +++ b/src/replication.c @@ -647,7 +647,7 @@ void syncCommand(client *c) { } else { /* No way, we need to wait for the next BGSAVE in order to * register differences. */ - serverLog(LL_NOTICE,"Waiting for next BGSAVE for SYNC"); + serverLog(LL_NOTICE,"Can't attach the slave to the current BGSAVE. Waiting for next BGSAVE for SYNC"); } /* CASE 2: BGSAVE is in progress, with socket target. */ @@ -657,7 +657,7 @@ void syncCommand(client *c) { /* There is an RDB child process but it is writing directly to * children sockets. We need to wait for the next BGSAVE * in order to synchronize. */ - serverLog(LL_NOTICE,"Waiting for next BGSAVE for SYNC"); + serverLog(LL_NOTICE,"Current BGSAVE has socket target. Waiting for next BGSAVE for SYNC"); /* CASE 3: There is no BGSAVE is progress. */ } else { @@ -671,7 +671,7 @@ void syncCommand(client *c) { /* Target is disk (or the slave is not capable of supporting * diskless replication) and we don't have a BGSAVE in progress, * let's start one. */ - if (startBgsaveForReplication(c->slave_capa) != C_OK) return; + serverLog(LL_NOTICE,"No BGSAVE in progress. Starting one ASAP"); } } @@ -2307,13 +2307,12 @@ void replicationCron(void) { replicationScriptCacheFlush(); } - /* If we are using diskless replication and there are slaves waiting - * in WAIT_BGSAVE_START state, check if enough seconds elapsed and - * start a BGSAVE. + /* Start a BGSAVE good for replication if we have slaves in + * WAIT_BGSAVE_START state. * - * This code is also useful to trigger a BGSAVE if the diskless - * replication was turned off with CONFIG SET, while there were already - * slaves in WAIT_BGSAVE_START state. */ + * In case of diskless replication, we make sure to wait the specified + * number of seconds (according to configuration) so that other slaves + * have the time to arrive before we start streaming. */ if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) { time_t idle, max_idle = 0; int slaves_waiting = 0; @@ -2333,9 +2332,13 @@ void replicationCron(void) { } } - if (slaves_waiting && max_idle > server.repl_diskless_sync_delay) { - /* Start a BGSAVE. Usually with socket target, or with disk target - * if there was a recent socket -> disk config change. */ + if (slaves_waiting && + (!server.repl_diskless_sync || + max_idle > server.repl_diskless_sync_delay)) + { + /* Start the BGSAVE. The called function may start a + * BGSAVE with socket target or disk target depending on the + * configuration and slaves capabilities. */ startBgsaveForReplication(mincapa); } } From 8f6844d7dd08dd8bdcfeef04e92ade283b95803e Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 21 Jul 2016 18:34:53 +0200 Subject: [PATCH 0079/1722] Avoid simultaneous RDB and AOF child process. This patch, written in collaboration with Oran Agra (@oranagra) is a companion to 780a8b1. Together the two patches should avoid that the AOF and RDB saving processes can be spawned at the same time. Previously conditions that could lead to two saving processes at the same time were: 1. When AOF is enabled via CONFIG SET and an RDB saving process is already active. 2. When the SYNC command decides to start an RDB saving process ASAP in order to serve a new slave that cannot partially resynchronize (but only if we have a disk target for replication, for diskless replication there is not such a problem). Condition "1" is not very severe but "2" can happen often and is definitely good at degrading Redis performances in an unexpected way. The two commits have the effect of always spawning RDB savings for replication in replicationCron() instead of attempting to start an RDB save synchronously. Moreover when a BGSAVE or AOF rewrite must be performed, they are instead just postponed using flags that will try to perform such operations ASAP. Finally the BGSAVE command was modified in order to accept a SCHEDULE option so that if an AOF rewrite is in progress, when this option is given, the command no longer returns an error, but instead schedules an RDB rewrite operation for when it will be possible to start it. --- src/aof.c | 7 +++++-- src/rdb.c | 28 +++++++++++++++++++++++++--- src/server.c | 23 ++++++++++++++++++++--- src/server.h | 1 + 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/src/aof.c b/src/aof.c index aa726d33b..6a92a0cd9 100644 --- a/src/aof.c +++ b/src/aof.c @@ -251,7 +251,10 @@ int startAppendOnly(void) { strerror(errno)); return C_ERR; } - if (rewriteAppendOnlyFileBackground() == C_ERR) { + if (server.rdb_child_pid != -1) { + server.aof_rewrite_scheduled = 1; + serverLog(LL_WARNING,"AOF was enabled but there is already a child process saving an RDB file on disk. An AOF background was scheduled to start when possible."); + } else if (rewriteAppendOnlyFileBackground() == C_ERR) { close(server.aof_fd); serverLog(LL_WARNING,"Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error."); return C_ERR; @@ -1273,7 +1276,7 @@ int rewriteAppendOnlyFileBackground(void) { pid_t childpid; long long start; - if (server.aof_child_pid != -1) return C_ERR; + if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR; if (aofCreatePipes() != C_OK) return C_ERR; start = ustime(); if ((childpid = fork()) == 0) { diff --git a/src/rdb.c b/src/rdb.c index 3b7cec6d7..859297943 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -997,7 +997,7 @@ int rdbSaveBackground(char *filename) { pid_t childpid; long long start; - if (server.rdb_child_pid != -1) return C_ERR; + if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR; server.dirty_before_bgsave = server.dirty; server.lastbgsave_try = time(NULL); @@ -1687,7 +1687,7 @@ int rdbSaveToSlavesSockets(void) { long long start; int pipefds[2]; - if (server.rdb_child_pid != -1) return C_ERR; + if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR; /* Before to fork, create a pipe that will be used in order to * send back to the parent the IDs of the slaves that successfully @@ -1842,11 +1842,33 @@ void saveCommand(client *c) { } } +/* BGSAVE [SCHEDULE] */ void bgsaveCommand(client *c) { + int schedule = 0; + + /* The SCHEDULE option changes the behavior of BGSAVE when an AOF rewrite + * is in progress. Instead of returning an error a BGSAVE gets scheduled. */ + if (c->argc > 1) { + if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"schedule")) { + schedule = 1; + } else { + addReply(c,shared.syntaxerr); + return; + } + } + if (server.rdb_child_pid != -1) { addReplyError(c,"Background save already in progress"); } else if (server.aof_child_pid != -1) { - addReplyError(c,"Can't BGSAVE while AOF log rewriting is in progress"); + if (schedule) { + server.rdb_bgsave_scheduled = 1; + addReplyStatus(c,"Background saving scheduled"); + } else { + addReplyError(c, + "An AOF log rewriting in progress: can't BGSAVE right now. " + "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenver " + "possible."); + } } else if (rdbSaveBackground(server.rdb_filename) == C_OK) { addReplyStatus(c,"Background saving started"); } else { diff --git a/src/server.c b/src/server.c index abb98edfd..f9806d280 100644 --- a/src/server.c +++ b/src/server.c @@ -233,7 +233,7 @@ struct redisCommand redisCommandTable[] = { {"ping",pingCommand,-1,"tF",0,NULL,0,0,0,0,0}, {"echo",echoCommand,2,"F",0,NULL,0,0,0,0,0}, {"save",saveCommand,1,"as",0,NULL,0,0,0,0,0}, - {"bgsave",bgsaveCommand,1,"a",0,NULL,0,0,0,0,0}, + {"bgsave",bgsaveCommand,-1,"a",0,NULL,0,0,0,0,0}, {"bgrewriteaof",bgrewriteaofCommand,1,"a",0,NULL,0,0,0,0,0}, {"shutdown",shutdownCommand,-1,"alt",0,NULL,0,0,0,0,0}, {"lastsave",lastsaveCommand,1,"RF",0,NULL,0,0,0,0,0}, @@ -1113,8 +1113,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { /* Clear the paused clients flag if needed. */ clientsArePaused(); /* Don't check return value, just use the side effect. */ - /* Replication cron function -- used to reconnect to master and - * to detect transfer failures. */ + /* Replication cron function -- used to reconnect to master, + * detect transfer failures, start background RDB transfers and so forth. */ run_with_period(1000) replicationCron(); /* Run the Redis Cluster cron. */ @@ -1132,6 +1132,22 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { migrateCloseTimedoutSockets(); } + /* Start a scheduled BGSAVE if the corresponding flag is set. This is + * useful when we are forced to postpone a BGSAVE because an AOF + * rewrite is in progress. + * + * Note: this code must be after the replicationCron() call above so + * make sure when refactoring this file to keep this order. This is useful + * because we want to give priority to RDB savings for replication. */ + if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 && + server.rdb_bgsave_scheduled && + (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY || + server.lastbgsave_status == C_OK)) + { + if (rdbSaveBackground(server.rdb_filename) == C_OK) + server.rdb_bgsave_scheduled = 0; + } + server.cronloops++; return 1000/server.hz; } @@ -1762,6 +1778,7 @@ void initServer(void) { server.rdb_child_pid = -1; server.aof_child_pid = -1; server.rdb_child_type = RDB_CHILD_TYPE_NONE; + server.rdb_bgsave_scheduled = 0; aofRewriteBufferReset(); server.aof_buf = sdsempty(); server.lastsave = time(NULL); /* At startup we consider the DB saved. */ diff --git a/src/server.h b/src/server.h index 4e34453e5..534b59bd5 100644 --- a/src/server.h +++ b/src/server.h @@ -918,6 +918,7 @@ struct redisServer { time_t lastbgsave_try; /* Unix time of last attempted bgsave */ time_t rdb_save_time_last; /* Time used by last RDB save run. */ time_t rdb_save_time_start; /* Current RDB save start time. */ + int rdb_bgsave_scheduled; /* BGSAVE when possible if true. */ int rdb_child_type; /* Type of save by active child. */ int lastbgsave_status; /* C_OK or C_ERR */ int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */ From f844032233a100fafc6cc1eb0333cb8c8d7ef21b Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 22 Jul 2016 10:51:25 +0200 Subject: [PATCH 0080/1722] Sentinel: check Slave INFO state more often when disconnected. During the initial handshake with the master a slave will report to have a very high disconnection time from its master (since technically it was disconnected since forever, so the current UNIX time in seconds is reported). However when the slave is connected again the Sentinel may re-scan the INFO output again only after 10 seconds, which is a long time. During this time Sentinels will consider this instance unable to failover, so a useless delay is introduced. Actaully this hardly happened in the practice because when a slave's master is down, the INFO period for slaves changes to 1 second. However when a manual failover is attempted immediately after adding slaves (like in the case of the Sentinel unit test), this problem may happen. This commit changes the INFO period to 1 second even in the case the slave's master is not down, but the slave reported to be disconnected from the master (by publishing, last time we checked, a master disconnection time field in INFO). This change is required as a result of an unrelated change in the replication code that adds a small delay in the master-slave first synchronization. --- src/sentinel.c | 10 ++++++++-- tests/sentinel/tests/05-manual.tcl | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/sentinel.c b/src/sentinel.c index baf6f9cbd..0168aa637 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -2579,9 +2579,15 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) { /* If this is a slave of a master in O_DOWN condition we start sending * it INFO every second, instead of the usual SENTINEL_INFO_PERIOD * period. In this state we want to closely monitor slaves in case they - * are turned into masters by another Sentinel, or by the sysadmin. */ + * are turned into masters by another Sentinel, or by the sysadmin. + * + * Similarly we monitor the INFO output more often if the slave reports + * to be disconnected from the master, so that we can have a fresh + * disconnection time figure. */ if ((ri->flags & SRI_SLAVE) && - (ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS))) { + ((ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS)) || + (ri->master_link_down_time != 0))) + { info_period = 1000; } else { info_period = SENTINEL_INFO_PERIOD; diff --git a/tests/sentinel/tests/05-manual.tcl b/tests/sentinel/tests/05-manual.tcl index 1a60d814b..5214fdce1 100644 --- a/tests/sentinel/tests/05-manual.tcl +++ b/tests/sentinel/tests/05-manual.tcl @@ -6,7 +6,8 @@ test "Manual failover works" { set old_port [RI $master_id tcp_port] set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] assert {[lindex $addr 1] == $old_port} - S 0 SENTINEL FAILOVER mymaster + catch {S 0 SENTINEL FAILOVER mymaster} reply + assert {$reply eq "OK"} foreach_sentinel_id id { wait_for_condition 1000 50 { [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port From 7534ce9996fbecbe4a809dedb75d469e29906c9c Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 22 Jul 2016 16:39:22 +0200 Subject: [PATCH 0081/1722] Sentinel: new test unit 07 that tests master down conditions. --- tests/sentinel/tests/07-down-conditions.tcl | 68 +++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 tests/sentinel/tests/07-down-conditions.tcl diff --git a/tests/sentinel/tests/07-down-conditions.tcl b/tests/sentinel/tests/07-down-conditions.tcl new file mode 100644 index 000000000..a60656e59 --- /dev/null +++ b/tests/sentinel/tests/07-down-conditions.tcl @@ -0,0 +1,68 @@ +# Test conditions where an instance is considered to be down + +source "../tests/includes/init-tests.tcl" + +proc ensure_master_up {} { + wait_for_condition 1000 50 { + [dict get [S 4 sentinel master mymaster] flags] eq "master" + } else { + fail "Master flags are not just 'master'" + } +} + +proc ensure_master_down {} { + wait_for_condition 1000 50 { + [string match *down* \ + [dict get [S 4 sentinel master mymaster] flags]] + } else { + fail "Master is not flagged SDOWN" + } +} + +test "Crash the majority of Sentinels to prevent failovers for this unit" { + for {set id 0} {$id < $quorum} {incr id} { + kill_instance sentinel $id + } +} + +test "SDOWN is triggered by non-responding but not crashed instance" { + lassign [S 4 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] host port + ensure_master_up + exec ../../../src/redis-cli -h $host -p $port debug sleep 10 > /dev/null & + ensure_master_down + ensure_master_up +} + +test "SDOWN is triggered by crashed instance" { + lassign [S 4 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] host port + ensure_master_up + kill_instance redis 0 + ensure_master_down + restart_instance redis 0 + ensure_master_up +} + +test "SDOWN is triggered by masters advertising as slaves" { + ensure_master_up + R 0 slaveof 127.0.0.1 34567 + ensure_master_down + R 0 slaveof no one + ensure_master_up +} + +test "SDOWN is triggered by misconfigured instance repling with errors" { + ensure_master_up + set orig_dir [lindex [R 0 config get dir] 1] + set orig_save [lindex [R 0 config get save] 1] + # Set dir to / and filename to "tmp" to make sure it will fail. + R 0 config set dir / + R 0 config set dbfilename tmp + R 0 config set save "1000000 1000000" + R 0 bgsave + ensure_master_down + R 0 config set save $orig_save + R 0 config set dir $orig_dir + R 0 config set dbfilename dump.rdb + R 0 bgsave + ensure_master_up +} From 0de6a2702bc5a8eb6f327132e12a5a149c207a36 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 22 Jul 2016 17:03:18 +0200 Subject: [PATCH 0082/1722] Replication: when possible start RDB saving ASAP. In a previous commit the replication code was changed in order to centralize the BGSAVE for replication trigger in replicationCron(), however after further testings, the 1 second delay imposed by this change is not acceptable. So now the BGSAVE is only delayed if the AOF rewriting process is active. However past comments made sure that replicationCron() is always able to trigger the BGSAVE when needed, making the code generally more robust. The new code is more similar to the initial @oranagra patch where the BGSAVE was delayed only if an AOF rewrite was in progress. Trivia: delaying the BGSAVE uncovered a minor Sentinel issue that is now fixed. --- src/replication.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index fa062653c..1d40677e5 100644 --- a/src/replication.c +++ b/src/replication.c @@ -666,12 +666,18 @@ void syncCommand(client *c) { * replicationCron() since we want to delay its start a * few seconds to wait for more slaves to arrive. */ if (server.repl_diskless_sync_delay) - serverLog(LL_NOTICE,"Delay next BGSAVE for SYNC"); + serverLog(LL_NOTICE,"Delay next BGSAVE for diskless SYNC"); } else { /* Target is disk (or the slave is not capable of supporting * diskless replication) and we don't have a BGSAVE in progress, * let's start one. */ - serverLog(LL_NOTICE,"No BGSAVE in progress. Starting one ASAP"); + if (server.aof_child_pid != -1) { + startBgsaveForReplication(c->slave_capa); + } else { + serverLog(LL_NOTICE, + "No BGSAVE in progress, but an AOF rewrite is active. " + "BGSAVE for replication delayed"); + } } } From 976c2425c4eccb5cd3a890b59ca2e1df82304dfd Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Jul 2016 11:07:23 +0200 Subject: [PATCH 0083/1722] Multiple GEORADIUS bugs fixed. By grepping the continuous integration errors log a number of GEORADIUS tests failures were detected. Fortunately when a GEORADIUS failure happens, the test suite logs enough information in order to reproduce the problem: the PRNG seed, coordinates and radius of the query. By reproducing the issues, three different bugs were discovered and fixed in this commit. This commit also improves the already good reporting of the fuzzer and adds the failure vectors as regression tests. The issues found: 1. We need larger squares around the poles in order to cover the area requested by the user. There were already checks in order to use a smaller step (larger squares) but the limit set (+/- 67 degrees) is not enough in certain edge cases, so 66 is used now. 2. Even near the equator, when the search area center is very near the edge of the square, the north, south, west or ovest square may not be able to fully cover the specified radius. Now a test is performed at the edge of the initial guessed search area, and larger squares are used in case the test fails. 3. Because of rounding errors between Redis and Tcl, sometimes the test signaled false positives. This is now addressed. Whenever possible the original code was improved a bit in other ways. A debugging example stanza was added in order to make the next debugging session simpler when the next bug is found. --- src/debugmacro.h | 41 ++++++++++++++++++++ src/geo.c | 3 +- src/geohash.c | 4 +- src/geohash.h | 2 +- src/geohash_helper.c | 89 ++++++++++++++++++++++++++++++++++++-------- tests/unit/geo.tcl | 81 +++++++++++++++++++++++++++++++++++++++- 6 files changed, 199 insertions(+), 21 deletions(-) create mode 100644 src/debugmacro.h diff --git a/src/debugmacro.h b/src/debugmacro.h new file mode 100644 index 000000000..df237bad3 --- /dev/null +++ b/src/debugmacro.h @@ -0,0 +1,41 @@ +/* This file contains debugging macros to be used when investigating issues. + * + * ----------------------------------------------------------------------------- + * + * Copyright (c) 2016, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#define D(...) \ + do { \ + FILE *fp = fopen("/tmp/log.txt","a"); \ + fprintf(fp,"%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \ + fprintf(fp,__VA_ARGS__); \ + fprintf(fp,"\n"); \ + fclose(fp); \ + } while (0); diff --git a/src/geo.c b/src/geo.c index 28cb433dc..331d22435 100644 --- a/src/geo.c +++ b/src/geo.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2014, Matt Stancliff . - * Copyright (c) 2015, Salvatore Sanfilippo . + * Copyright (c) 2015-2016, Salvatore Sanfilippo . * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,6 +30,7 @@ #include "geo.h" #include "geohash_helper.h" +#include "debugmacro.h" /* Things exported from t_zset.c only for geo.c, since it is the only other * part of Redis that requires close zset introspection. */ diff --git a/src/geohash.c b/src/geohash.c index a5e1dffbf..1ae7a7e05 100644 --- a/src/geohash.c +++ b/src/geohash.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2013-2014, yinqiwen * Copyright (c) 2014, Matt Stancliff . - * Copyright (c) 2015, Salvatore Sanfilippo . + * Copyright (c) 2015-2016, Salvatore Sanfilippo . * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -118,7 +118,7 @@ void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range) { lat_range->min = GEO_LAT_MIN; } -int geohashEncode(GeoHashRange *long_range, GeoHashRange *lat_range, +int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range, double longitude, double latitude, uint8_t step, GeoHashBits *hash) { /* Check basic arguments sanity. */ diff --git a/src/geohash.h b/src/geohash.h index c2f57bed0..ed2ef9336 100644 --- a/src/geohash.h +++ b/src/geohash.h @@ -95,7 +95,7 @@ typedef struct { * -1:failed */ void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range); -int geohashEncode(GeoHashRange *long_range, GeoHashRange *lat_range, +int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range, double longitude, double latitude, uint8_t step, GeoHashBits *hash); int geohashEncodeType(double longitude, double latitude, diff --git a/src/geohash_helper.c b/src/geohash_helper.c index a65759bc4..139bcea11 100644 --- a/src/geohash_helper.c +++ b/src/geohash_helper.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2013-2014, yinqiwen * Copyright (c) 2014, Matt Stancliff . - * Copyright (c) 2015, Salvatore Sanfilippo . + * Copyright (c) 2015-2016, Salvatore Sanfilippo . * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,6 +36,7 @@ #include "fmacros.h" #include "geohash_helper.h" +#include "debugmacro.h" #include #define D_R (M_PI / 180.0) @@ -56,8 +57,8 @@ const double MERCATOR_MIN = -20037726.37; static inline double deg_rad(double ang) { return ang * D_R; } static inline double rad_deg(double ang) { return ang / D_R; } -/* You must *ONLY* estimate steps when you are encoding. - * If you are decoding, always decode to GEO_STEP_MAX (26). */ +/* This function is used in order to estimate the step (bits precision) + * of the 9 search area boxes during radius queries. */ uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) { if (range_meters == 0) return 26; int step = 1; @@ -65,12 +66,15 @@ uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) { range_meters *= 2; step++; } - step -= 2; /* Make sure range is included in the worst case. */ + step -= 2; /* Make sure range is included in most of the base cases. */ + /* Wider range torwards the poles... Note: it is possible to do better * than this approximation by computing the distance between meridians * at this latitude, but this does the trick for now. */ - if (lat > 67 || lat < -67) step--; - if (lat > 80 || lat < -80) step--; + if (lat > 66 || lat < -66) { + step--; + if (lat > 80 || lat < -80) step--; + } /* Frame to valid range. */ if (step < 1) step = 1; @@ -105,12 +109,14 @@ int geohashBoundingBox(double longitude, double latitude, double radius_meters, return 1; } +/* Return a set of areas (center + 8) that are able to cover a range query + * for the specified position and radius. */ GeoHashRadius geohashGetAreasByRadius(double longitude, double latitude, double radius_meters) { GeoHashRange long_range, lat_range; - GeoHashRadius radius = {{0}}; - GeoHashBits hash = {0,0}; - GeoHashNeighbors neighbors = {{0}}; - GeoHashArea area = {{0}}; + GeoHashRadius radius; + GeoHashBits hash; + GeoHashNeighbors neighbors; + GeoHashArea area; double min_lon, max_lon, min_lat, max_lat; double bounds[4]; int steps; @@ -123,12 +129,65 @@ GeoHashRadius geohashGetAreasByRadius(double longitude, double latitude, double steps = geohashEstimateStepsByRadius(radius_meters,latitude); - geohashGetCoordRange(&long_range, &lat_range); - geohashEncode(&long_range, &lat_range, longitude, latitude, steps, &hash); - geohashNeighbors(&hash, &neighbors); - geohashGetCoordRange(&long_range, &lat_range); - geohashDecode(long_range, lat_range, hash, &area); + geohashGetCoordRange(&long_range,&lat_range); + geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash); + geohashNeighbors(&hash,&neighbors); + geohashDecode(long_range,lat_range,hash,&area); + /* Check if the step is enough at the limits of the covered area. + * Sometimes when the search area is near an edge of the + * area, the estimated step is not small enough, since one of the + * north / south / west / east square is too near to the search area + * to cover everything. */ + int decrease_step = 0; + { + GeoHashArea north, south, east, west; + + geohashDecode(long_range, lat_range, neighbors.north, &north); + geohashDecode(long_range, lat_range, neighbors.south, &south); + geohashDecode(long_range, lat_range, neighbors.east, &east); + geohashDecode(long_range, lat_range, neighbors.west, &west); + + if (geohashGetDistance(longitude,latitude,longitude,north.latitude.max) + < radius_meters) decrease_step = 1; + if (geohashGetDistance(longitude,latitude,longitude,south.latitude.min) + < radius_meters) decrease_step = 1; + if (geohashGetDistance(longitude,latitude,east.longitude.max,latitude) + < radius_meters) decrease_step = 1; + if (geohashGetDistance(longitude,latitude,west.longitude.min,latitude) + < radius_meters) decrease_step = 1; + } + + if (decrease_step) { + steps--; + geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash); + geohashNeighbors(&hash,&neighbors); + geohashDecode(long_range,lat_range,hash,&area); + } + + /* Example debug info. This turns to be very useful every time there is + * to investigate radius search potential bugs. So better to leave it + * here. */ + if (0) { + GeoHashArea myarea = {{0}}; + geohashDecode(long_range, lat_range, neighbors.west, &myarea); + + /* Dump West. */ + D("Neighbors"); + D("area.longitude.min: %f\n", myarea.longitude.min); + D("area.longitude.max: %f\n", myarea.longitude.max); + D("area.latitude.min: %f\n", myarea.latitude.min); + D("area.latitude.max: %f\n", myarea.latitude.max); + + /* Dump center square. */ + D("Area"); + D("area.longitude.min: %f\n", area.longitude.min); + D("area.longitude.max: %f\n", area.longitude.max); + D("area.latitude.min: %f\n", area.latitude.min); + D("area.latitude.max: %f\n", area.latitude.max); + } + + /* Exclude the search areas that are useless. */ if (area.latitude.min < min_lat) { GZERO(neighbors.south); GZERO(neighbors.south_west); diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl index 3aa06c99c..a08726d2e 100644 --- a/tests/unit/geo.tcl +++ b/tests/unit/geo.tcl @@ -1,4 +1,4 @@ -# Helper functins to simulate search-in-radius in the Tcl side in order to +# Helper functions to simulate search-in-radius in the Tcl side in order to # verify the Redis implementation with a fuzzy test. proc geo_degrad deg {expr {$deg*atan(1)*8/360}} @@ -23,6 +23,44 @@ proc geo_random_point {lonvar latvar} { set lat [expr {-70 + rand()*140}] } +# Return elements non common to both the lists. +# This code is from http://wiki.tcl.tk/15489 +proc compare_lists {List1 List2} { + set DiffList {} + foreach Item $List1 { + if {[lsearch -exact $List2 $Item] == -1} { + lappend DiffList $Item + } + } + foreach Item $List2 { + if {[lsearch -exact $List1 $Item] == -1} { + if {[lsearch -exact $DiffList $Item] == -1} { + lappend DiffList $Item + } + } + } + return $DiffList +} + +# The following list represents sets of random seed, search position +# and radius that caused bugs in the past. It is used by the randomized +# test later as a starting point. When the regression vectors are scanned +# the code reverts to using random data. +# +# The format is: seed km lon lat +set regression_vectors { + {1412 156 149.29737817929004 15.95807862745508} + {441574 143 59.235461856813856 66.269555127373678} + {160645 187 -101.88575239939883 49.061997951502917} + {750269 154 -90.187939661642517 66.615930412251487} + {342880 145 163.03472387745728 64.012747720821181} + {729955 143 137.86663517256579 63.986745399416776} + {939895 151 59.149620271823181 65.204186651485145} + {1412 156 149.29737817929004 15.95807862745508} + {564862 149 84.062063109158544 -65.685403922426232} +} +set rv_idx 0 + start_server {tags {"geo"}} { test {GEOADD create} { r geoadd nyc -73.9454966 40.747533 "lic market" @@ -183,16 +221,25 @@ start_server {tags {"geo"}} { } test {GEOADD + GEORANGE randomized test} { - set attempt 10 + set attempt 20 while {[incr attempt -1]} { + set rv [lindex $regression_vectors $rv_idx] + incr rv_idx + unset -nocomplain debuginfo set srand_seed [randomInt 1000000] + if {$rv ne {}} {set srand_seed [lindex $rv 0]} lappend debuginfo "srand_seed is $srand_seed" expr {srand($srand_seed)} ; # If you need a reproducible run r del mypoints set radius_km [expr {[randomInt 200]+10}] + if {$rv ne {}} {set radius_km [lindex $rv 1]} set radius_m [expr {$radius_km*1000}] geo_random_point search_lon search_lat + if {$rv ne {}} { + set search_lon [lindex $rv 2] + set search_lat [lindex $rv 3] + } lappend debuginfo "Search area: $search_lon,$search_lat $radius_km km" set tcl_result {} set argv {} @@ -208,10 +255,40 @@ start_server {tags {"geo"}} { set res [lsort [r georadius mypoints $search_lon $search_lat $radius_km km]] set res2 [lsort $tcl_result] set test_result OK + if {$res != $res2} { + set rounding_errors 0 + set diff [compare_lists $res $res2] + foreach place $diff { + set mydist [geo_distance $lon $lat $search_lon $search_lat] + set mydist [expr $mydist/1000] + if {($mydist / $radius_km) > 0.999} {incr rounding_errors} + } + # Make sure this is a real error and not a rounidng issue. + if {[llength $diff] == $rounding_errors} { + set res $res2; # Error silenced + } + } + + if {$res != $res2} { + set diff [compare_lists $res $res2] + puts "*** Possible problem in GEO radius query ***" puts "Redis: $res" puts "Tcl : $res2" + puts "Diff : $diff" puts [join $debuginfo "\n"] + foreach place $diff { + if {[lsearch -exact $res2 $place] != -1} { + set where "(only in Tcl)" + } else { + set where "(only in Redis)" + } + lassign [lindex [r geopos mypoints $place] 0] lon lat + set mydist [geo_distance $lon $lat $search_lon $search_lat] + set mydist [expr $mydist/1000] + puts "$place -> [r geopos mypoints $place] $mydist $where" + if {($mydist / $radius_km) > 0.999} {incr rounding_errors} + } set test_result FAIL } unset -nocomplain debuginfo From f8a3861465769586fd0a127f168a1a5baabf6491 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Jul 2016 16:41:20 +0200 Subject: [PATCH 0084/1722] Ability of slave to announce arbitrary ip/port to master. This feature is useful, especially in deployments using Sentinel in order to setup Redis HA, where the slave is executed with NAT or port forwarding, so that the auto-detected port/ip addresses, as listed in the "INFO replication" output of the master, or as provided by the "ROLE" command, don't match the real addresses at which the slave is reachable for connections. --- redis.conf | 29 ++++++++++++++++++++++ src/config.c | 19 +++++++++++++++ src/networking.c | 1 + src/replication.c | 62 ++++++++++++++++++++++++++++++++++++++++++----- src/server.c | 12 ++++++--- src/server.h | 23 ++++++++++++------ 6 files changed, 129 insertions(+), 17 deletions(-) diff --git a/redis.conf b/redis.conf index 05158b4e7..b9217fdb4 100644 --- a/redis.conf +++ b/redis.conf @@ -443,6 +443,35 @@ slave-priority 100 # By default min-slaves-to-write is set to 0 (feature disabled) and # min-slaves-max-lag is set to 10. +# A Redis master is able to list the address and port of the attached +# slaves in different ways. For example the "INFO replication" section +# offers this information, which is used, among other tools, by +# Redis Sentinel in order to discover slave instances. +# Another place where this info is available is in the output of the +# "ROLE" command of a masteer. +# +# The listed IP and address normally reported by a slave is obtained +# in the following way: +# +# IP: The address is auto detected by checking the peer address +# of the socket used by the slave to connect with the master. +# +# Port: The port is communicated by the slave during the replication +# handshake, and is normally the port that the slave is using to +# list for connections. +# +# However when port forwarding or Network Address Translation (NAT) is +# used, the slave may be actually reachable via different IP and port +# pairs. The following two options can be used by a slave in order to +# report to its master a specific set of IP and port, so that both INFO +# and ROLE will report those values. +# +# There is no need to use both the options if you need to override just +# the port or the IP address. +# +# slave-announce-ip 5.5.5.5 +# slave-announce-port 1234 + ################################## SECURITY ################################### # Require clients to issue AUTH before processing any other diff --git a/src/config.c b/src/config.c index 686f80cfd..dd21a0aca 100644 --- a/src/config.c +++ b/src/config.c @@ -633,6 +633,16 @@ void loadServerConfigFromString(char *config) { } } else if (!strcasecmp(argv[0],"slave-priority") && argc == 2) { server.slave_priority = atoi(argv[1]); + } else if (!strcasecmp(argv[0],"slave-announce-ip") && argc == 2) { + zfree(server.slave_announce_ip); + server.slave_announce_ip = zstrdup(argv[1]); + } else if (!strcasecmp(argv[0],"slave-announce-port") && argc == 2) { + server.slave_announce_port = atoi(argv[1]); + if (server.slave_announce_port < 0 || + server.slave_announce_port > 65535) + { + err = "Invalid port"; goto loaderr; + } } else if (!strcasecmp(argv[0],"min-slaves-to-write") && argc == 2) { server.repl_min_slaves_to_write = atoi(argv[1]); if (server.repl_min_slaves_to_write < 0) { @@ -925,6 +935,9 @@ void configSetCommand(client *c) { if (flags == -1) goto badfmt; server.notify_keyspace_events = flags; + } config_set_special_field("slave-announce-ip") { + zfree(server.slave_announce_ip); + server.slave_announce_ip = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL; /* Boolean fields. * config_set_bool_field(name,var). */ @@ -1013,6 +1026,8 @@ void configSetCommand(client *c) { "repl-diskless-sync-delay",server.repl_diskless_sync_delay,0,LLONG_MAX) { } config_set_numerical_field( "slave-priority",server.slave_priority,0,LLONG_MAX) { + } config_set_numerical_field( + "slave-announce-port",server.slave_announce_port,0,65535) { } config_set_numerical_field( "min-slaves-to-write",server.repl_min_slaves_to_write,0,LLONG_MAX) { refreshGoodSlavesCount(); @@ -1133,6 +1148,7 @@ void configGetCommand(client *c) { config_get_string_field("unixsocket",server.unixsocket); config_get_string_field("logfile",server.logfile); config_get_string_field("pidfile",server.pidfile); + config_get_string_field("slave-announce-ip",server.slave_announce_ip); /* Numerical values */ config_get_numerical_field("maxmemory",server.maxmemory); @@ -1177,6 +1193,7 @@ void configGetCommand(client *c) { config_get_numerical_field("maxclients",server.maxclients); config_get_numerical_field("watchdog-period",server.watchdog_period); config_get_numerical_field("slave-priority",server.slave_priority); + config_get_numerical_field("slave-announce-port",server.slave_announce_port); config_get_numerical_field("min-slaves-to-write",server.repl_min_slaves_to_write); config_get_numerical_field("min-slaves-max-lag",server.repl_min_slaves_max_lag); config_get_numerical_field("hz",server.hz); @@ -1865,6 +1882,7 @@ int rewriteConfig(char *path) { rewriteConfigOctalOption(state,"unixsocketperm",server.unixsocketperm,CONFIG_DEFAULT_UNIX_SOCKET_PERM); rewriteConfigNumericalOption(state,"timeout",server.maxidletime,CONFIG_DEFAULT_CLIENT_TIMEOUT); rewriteConfigNumericalOption(state,"tcp-keepalive",server.tcpkeepalive,CONFIG_DEFAULT_TCP_KEEPALIVE); + rewriteConfigNumericalOption(state,"slave-announce-port",server.slave_announce_port,CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT); rewriteConfigEnumOption(state,"loglevel",server.verbosity,loglevel_enum,CONFIG_DEFAULT_VERBOSITY); rewriteConfigStringOption(state,"logfile",server.logfile,CONFIG_DEFAULT_LOGFILE); rewriteConfigYesNoOption(state,"syslog-enabled",server.syslog_enabled,CONFIG_DEFAULT_SYSLOG_ENABLED); @@ -1878,6 +1896,7 @@ int rewriteConfig(char *path) { rewriteConfigStringOption(state,"dbfilename",server.rdb_filename,CONFIG_DEFAULT_RDB_FILENAME); rewriteConfigDirOption(state); rewriteConfigSlaveofOption(state); + rewriteConfigStringOption(state,"slave-announce-ip",server.slave_announce_ip,CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP); rewriteConfigStringOption(state,"masterauth",server.masterauth,NULL); rewriteConfigStringOption(state,"cluster-announce-ip",server.cluster_announce_ip,NULL); rewriteConfigYesNoOption(state,"slave-serve-stale-data",server.repl_serve_stale_data,CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA); diff --git a/src/networking.c b/src/networking.c index 242022a03..723cd599d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -109,6 +109,7 @@ client *createClient(int fd) { c->repl_ack_off = 0; c->repl_ack_time = 0; c->slave_listening_port = 0; + c->slave_ip[0] = '\0'; c->slave_capa = SLAVE_CAPA_NONE; c->reply = listCreate(); c->reply_bytes = 0; diff --git a/src/replication.c b/src/replication.c index 1d40677e5..471ad1a03 100644 --- a/src/replication.c +++ b/src/replication.c @@ -47,7 +47,7 @@ int cancelReplicationHandshake(void); /* Return the pointer to a string representing the slave ip:listening_port * pair. Mostly useful for logging, since we want to log a slave using its - * IP address and it's listening port which is more clear for the user, for + * IP address and its listening port which is more clear for the user, for * example: "Closing connection with slave 10.1.2.3:6380". */ char *replicationGetSlaveName(client *c) { static char buf[NET_PEER_ID_LEN]; @@ -55,7 +55,12 @@ char *replicationGetSlaveName(client *c) { ip[0] = '\0'; buf[0] = '\0'; - if (anetPeerToString(c->fd,ip,sizeof(ip),NULL) != -1) { + if (c->slave_ip[0] != '\0' || + anetPeerToString(c->fd,ip,sizeof(ip),NULL) != -1) + { + /* Note that the 'ip' buffer is always larger than 'c->slave_ip' */ + if (c->slave_ip[0] != '\0') memcpy(ip,c->slave_ip,sizeof(c->slave_ip)); + if (c->slave_listening_port) anetFormatAddr(buf,sizeof(buf),ip,c->slave_listening_port); else @@ -717,6 +722,15 @@ void replconfCommand(client *c) { &port,NULL) != C_OK)) return; c->slave_listening_port = port; + } else if (!strcasecmp(c->argv[j]->ptr,"ip-address")) { + sds ip = c->argv[j+1]->ptr; + if (sdslen(ip) < sizeof(c->slave_ip)) { + memcpy(c->slave_ip,ip,sdslen(ip)+1); + } else { + addReplyErrorFormat(c,"REPLCONF ip-address provided by " + "slave instance is too long: %zd bytes", sdslen(ip)); + return; + } } else if (!strcasecmp(c->argv[j]->ptr,"capa")) { /* Ignore capabilities not understood by this master. */ if (!strcasecmp(c->argv[j+1]->ptr,"eof")) @@ -1462,7 +1476,8 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { /* Set the slave port, so that Master's INFO command can list the * slave listening port correctly. */ if (server.repl_state == REPL_STATE_SEND_PORT) { - sds port = sdsfromlonglong(server.port); + sds port = sdsfromlonglong(server.slave_announce_port ? + server.slave_announce_port : server.port); err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF", "listening-port",port, NULL); sdsfree(port); @@ -1482,6 +1497,37 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { "REPLCONF listening-port: %s", err); } sdsfree(err); + server.repl_state = REPL_STATE_SEND_IP; + } + + /* Skip REPLCONF ip-address if there is no slave-announce-ip option set. */ + if (server.repl_state == REPL_STATE_SEND_IP && + server.slave_announce_ip == NULL) + { + server.repl_state = REPL_STATE_SEND_CAPA; + } + + /* Set the slave ip, so that Master's INFO command can list the + * slave IP address port correctly in case of port forwarding or NAT. */ + if (server.repl_state == REPL_STATE_SEND_IP) { + err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF", + "ip-address",server.slave_announce_ip, NULL); + if (err) goto write_error; + sdsfree(err); + server.repl_state = REPL_STATE_RECEIVE_IP; + return; + } + + /* Receive REPLCONF ip-address reply. */ + if (server.repl_state == REPL_STATE_RECEIVE_IP) { + err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL); + /* Ignore the error if any, not all the Redis versions support + * REPLCONF listening-port. */ + if (err[0] == '-') { + serverLog(LL_NOTICE,"(Non critical) Master does not understand " + "REPLCONF ip-address: %s", err); + } + sdsfree(err); server.repl_state = REPL_STATE_SEND_CAPA; } @@ -1787,12 +1833,16 @@ void roleCommand(client *c) { listRewind(server.slaves,&li); while((ln = listNext(&li))) { client *slave = ln->value; - char ip[NET_IP_STR_LEN]; + char ip[NET_IP_STR_LEN], *slaveip = slave->slave_ip; - if (anetPeerToString(slave->fd,ip,sizeof(ip),NULL) == -1) continue; + if (slaveip[0] == '\0') { + if (anetPeerToString(slave->fd,ip,sizeof(ip),NULL) == -1) + continue; + slaveip = ip; + } if (slave->replstate != SLAVE_STATE_ONLINE) continue; addReplyMultiBulkLen(c,3); - addReplyBulkCString(c,ip); + addReplyBulkCString(c,slaveip); addReplyBulkLongLong(c,slave->slave_listening_port); addReplyBulkLongLong(c,slave->repl_ack_off); slaves++; diff --git a/src/server.c b/src/server.c index f9806d280..c8eaebdea 100644 --- a/src/server.c +++ b/src/server.c @@ -1412,6 +1412,8 @@ void initServerConfig(void) { server.repl_min_slaves_to_write = CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE; server.repl_min_slaves_max_lag = CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG; server.slave_priority = CONFIG_DEFAULT_SLAVE_PRIORITY; + server.slave_announce_ip = CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP; + server.slave_announce_port = CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT; server.master_repl_offset = 0; /* Replication partial resync backlog */ @@ -3056,11 +3058,15 @@ sds genRedisInfoString(char *section) { while((ln = listNext(&li))) { client *slave = listNodeValue(ln); char *state = NULL; - char ip[NET_IP_STR_LEN]; + char ip[NET_IP_STR_LEN], *slaveip = slave->slave_ip; int port; long lag = 0; - if (anetPeerToString(slave->fd,ip,sizeof(ip),&port) == -1) continue; + if (slaveip[0] == '\0') { + if (anetPeerToString(slave->fd,ip,sizeof(ip),&port) == -1) + continue; + slaveip = ip; + } switch(slave->replstate) { case SLAVE_STATE_WAIT_BGSAVE_START: case SLAVE_STATE_WAIT_BGSAVE_END: @@ -3080,7 +3086,7 @@ sds genRedisInfoString(char *section) { info = sdscatprintf(info, "slave%d:ip=%s,port=%d,state=%s," "offset=%lld,lag=%ld\r\n", - slaveid,ip,slave->slave_listening_port,state, + slaveid,slaveip,slave->slave_listening_port,state, slave->repl_ack_off, lag); slaveid++; } diff --git a/src/server.h b/src/server.h index 534b59bd5..f3f6b4ddd 100644 --- a/src/server.h +++ b/src/server.h @@ -126,6 +126,8 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY 5 #define CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA 1 #define CONFIG_DEFAULT_SLAVE_READ_ONLY 1 +#define CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP NULL +#define CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT 0 #define CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY 0 #define CONFIG_DEFAULT_MAXMEMORY 0 #define CONFIG_DEFAULT_MAXMEMORY_SAMPLES 5 @@ -267,13 +269,15 @@ typedef long long mstime_t; /* millisecond time type. */ #define REPL_STATE_RECEIVE_AUTH 5 /* Wait for AUTH reply */ #define REPL_STATE_SEND_PORT 6 /* Send REPLCONF listening-port */ #define REPL_STATE_RECEIVE_PORT 7 /* Wait for REPLCONF reply */ -#define REPL_STATE_SEND_CAPA 8 /* Send REPLCONF capa */ -#define REPL_STATE_RECEIVE_CAPA 9 /* Wait for REPLCONF reply */ -#define REPL_STATE_SEND_PSYNC 10 /* Send PSYNC */ -#define REPL_STATE_RECEIVE_PSYNC 11 /* Wait for PSYNC reply */ +#define REPL_STATE_SEND_IP 8 /* Send REPLCONF ip-address */ +#define REPL_STATE_RECEIVE_IP 9 /* Wait for REPLCONF reply */ +#define REPL_STATE_SEND_CAPA 10 /* Send REPLCONF capa */ +#define REPL_STATE_RECEIVE_CAPA 11 /* Wait for REPLCONF reply */ +#define REPL_STATE_SEND_PSYNC 12 /* Send PSYNC */ +#define REPL_STATE_RECEIVE_PSYNC 13 /* Wait for PSYNC reply */ /* --- End of handshake states --- */ -#define REPL_STATE_TRANSFER 12 /* Receiving .rdb from master */ -#define REPL_STATE_CONNECTED 13 /* Connected to master */ +#define REPL_STATE_TRANSFER 14 /* Receiving .rdb from master */ +#define REPL_STATE_CONNECTED 15 /* Connected to master */ /* State of slaves from the POV of the master. Used in client->replstate. * In SEND_BULK and ONLINE state the slave receives new updates @@ -665,7 +669,8 @@ typedef struct client { copying this slave output buffer should use. */ char replrunid[CONFIG_RUN_ID_SIZE+1]; /* Master run id if is a master. */ - int slave_listening_port; /* As configured with: SLAVECONF listening-port */ + int slave_listening_port; /* As configured with: REPLCONF listening-port */ + char slave_ip[NET_IP_STR_LEN]; /* Optionally given by REPLCONF ip-address */ int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */ multiState mstate; /* MULTI/EXEC state */ int btype; /* Type of blocking op if CLIENT_BLOCKED. */ @@ -971,7 +976,9 @@ struct redisServer { time_t repl_down_since; /* Unix time at which link with master went down */ int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */ int slave_priority; /* Reported in INFO and used by Sentinel. */ - char repl_master_runid[CONFIG_RUN_ID_SIZE+1]; /* Master run id for PSYNC. */ + int slave_announce_port; /* Give the master this listening port. */ + char *slave_announce_ip; /* Give the master this ip address. */ + char repl_master_runid[CONFIG_RUN_ID_SIZE+1]; /* Master run id for PSYNC.*/ long long repl_master_initial_offset; /* Master PSYNC offset. */ int repl_slave_lazy_flush; /* Lazy FLUSHALL before loading DB? */ /* Replication script cache. */ From 2c80701c7db1590ae5a3f084c0225e0c2016c602 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 28 Jul 2016 14:15:31 +0200 Subject: [PATCH 0085/1722] Changelog format modified to be less verbose. --- utils/releasetools/changelog.tcl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/utils/releasetools/changelog.tcl b/utils/releasetools/changelog.tcl index bf0ad999e..4b5424ce2 100755 --- a/utils/releasetools/changelog.tcl +++ b/utils/releasetools/changelog.tcl @@ -21,6 +21,10 @@ append template "\n\n" set date [clock format [clock seconds]] set template [string map [list %ver% $ver %date% $date] $template] -append template [exec git log $branch~30..$branch "--format=format:+-------------------------------------------------------------------------------%n| %s%n| By %an, %ai%n+--------------------------------------------------------------------------------%nhttps://github.com/antirez/redis/commit/%H%n%n%b" --stat] +append template [exec git log $branch~30..$branch "--format=format:%an in commit %h:%n %s" --shortstat] + +#Older, more verbose version. +# +#append template [exec git log $branch~30..$branch "--format=format:+-------------------------------------------------------------------------------%n| %s%n| By %an, %ai%n+--------------------------------------------------------------------------------%nhttps://github.com/antirez/redis/commit/%H%n%n%b" --stat] puts $template From 53cc8d7ce66b971d540341d279aa7b8843bcf482 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 29 Jul 2016 11:28:16 +0200 Subject: [PATCH 0086/1722] Update linenoise to fix insecure redis-cli history file creation. The problem was fixed in antirez/linenoise repository applying a patch contributed by @lamby. Here the new version is updated in the Redis source tree. Close #1418 Close #3322 --- deps/linenoise/linenoise.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/deps/linenoise/linenoise.c b/deps/linenoise/linenoise.c index a807d9b8a..fce14a7c5 100644 --- a/deps/linenoise/linenoise.c +++ b/deps/linenoise/linenoise.c @@ -111,6 +111,7 @@ #include #include #include +#include #include #include #include @@ -1160,10 +1161,14 @@ int linenoiseHistorySetMaxLen(int len) { /* Save the history in the specified file. On success 0 is returned * otherwise -1 is returned. */ int linenoiseHistorySave(const char *filename) { - FILE *fp = fopen(filename,"w"); + mode_t old_umask = umask(S_IXUSR|S_IRWXG|S_IRWXO); + FILE *fp; int j; + fp = fopen(filename,"w"); + umask(old_umask); if (fp == NULL) return -1; + chmod(filename,S_IRUSR|S_IWUSR); for (j = 0; j < history_len; j++) fprintf(fp,"%s\n",history[j]); fclose(fp); From 964cea9fbaf979d9ff63400c24da89f3b8186580 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 2 Aug 2016 10:32:36 +0200 Subject: [PATCH 0087/1722] Remove extra "-" from ASCII horizontal bar in comment. --- src/modules/hellotype.c | 2 +- src/modules/helloworld.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/hellotype.c b/src/modules/hellotype.c index a9c2d20fc..b33ed81cd 100644 --- a/src/modules/hellotype.c +++ b/src/modules/hellotype.c @@ -5,7 +5,7 @@ * works, how a new data type is created, and how to write basic methods * for RDB loading, saving and AOF rewriting. * - * ------------------------------------------------------------------------------ + * ----------------------------------------------------------------------------- * * Copyright (c) 2016, Salvatore Sanfilippo * All rights reserved. diff --git a/src/modules/helloworld.c b/src/modules/helloworld.c index 8d657a52b..4d6f8782d 100644 --- a/src/modules/helloworld.c +++ b/src/modules/helloworld.c @@ -4,7 +4,7 @@ * This module does not do anything useful, if not for a few commands. The * examples are designed in order to show the API. * - * ------------------------------------------------------------------------------ + * ----------------------------------------------------------------------------- * * Copyright (c) 2016, Salvatore Sanfilippo * All rights reserved. From 7d67db4b9f5c29f60ade7a1bc26d4c43e61fc0e5 Mon Sep 17 00:00:00 2001 From: Qu Chen Date: Mon, 1 Aug 2016 12:16:17 -0700 Subject: [PATCH 0088/1722] Fix a bug to delay bgsave while AOF rewrite in progress for replication --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index 471ad1a03..fb96fac1b 100644 --- a/src/replication.c +++ b/src/replication.c @@ -676,7 +676,7 @@ void syncCommand(client *c) { /* Target is disk (or the slave is not capable of supporting * diskless replication) and we don't have a BGSAVE in progress, * let's start one. */ - if (server.aof_child_pid != -1) { + if (server.aof_child_pid == -1) { startBgsaveForReplication(c->slave_capa); } else { serverLog(LL_NOTICE, From 5a522292494d72a9a3fe789bc0fc310bae99f55c Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 2 Aug 2016 15:29:04 +0200 Subject: [PATCH 0089/1722] Modules: StringAppendBuffer() and ability to retain strings. RedisModule_StringRetain() allows, when automatic memory management is on, to keep string objects living after the callback returns. Can also be used in order to use Redis reference counting of objects inside modules. The reason why this is useful is that sometimes when implementing new data types we want to reference RedisModuleString objects inside the module private data structures, so those string objects must be valid after the callback returns even if not referenced inside the Redis key space. --- src/module.c | 85 +++++++++++++++++++++++++++++++++++++++++--- src/modules/Makefile | 7 +++- src/redismodule.h | 4 +++ 3 files changed, 91 insertions(+), 5 deletions(-) diff --git a/src/module.c b/src/module.c index e3603e1d7..4d1d88001 100644 --- a/src/module.c +++ b/src/module.c @@ -615,9 +615,12 @@ void autoMemoryAdd(RedisModuleCtx *ctx, int type, void *ptr) { } /* Mark an object as freed in the auto release queue, so that users can still - * free things manually if they want. */ -void autoMemoryFreed(RedisModuleCtx *ctx, int type, void *ptr) { - if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return; + * free things manually if they want. + * + * The function returns 1 if the object was actually found in the auto memory + * pool, otherwise 0 is returned. */ +int autoMemoryFreed(RedisModuleCtx *ctx, int type, void *ptr) { + if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return 0; int count = (ctx->amqueue_used+1)/2; for (int j = 0; j < count; j++) { @@ -639,10 +642,11 @@ void autoMemoryFreed(RedisModuleCtx *ctx, int type, void *ptr) { /* Reduce the size of the queue because we either moved the top * element elsewhere or freed it */ ctx->amqueue_used--; - return; + return 1; } } } + return 0; } /* Release all the objects in queue. */ @@ -717,6 +721,43 @@ void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) { autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str); } +/* Every call to this function, will make the string 'str' requiring + * an additional call to RedisModule_FreeString() in order to really + * free the string. Note that the automatic freeing of the string obtained + * enabling modules automatic memory management counts for one + * RedisModule_FreeString() call (it is just executed automatically). + * + * Normally you want to call this function when, at the same time + * the following conditions are true: + * + * 1) You have automatic memory management enabled. + * 2) You want to create string objects. + * 3) Those string objects you create need to live *after* the callback + * function(for example a command implementation) creating them returns. + * + * Usually you want this in order to store the created string object + * into your own data structure, for example when implementing a new data + * type. + * + * Note that when memory management is turned off, you don't need + * any call to RetainString() since creating a string will always result + * into a string that lives after the callback function returns, if + * no FreeString() call is performed. */ +void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) { + if (!autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str)) { + /* Increment the string reference counting only if we can't + * just remove the object from the list of objects that should + * be reclaimed. Why we do that, instead of just incrementing + * the refcount in any case, and let the automatic FreeString() + * call at the end to bring the refcount back at the desired + * value? Because this way we ensure that the object refcount + * value is 1 (instead of going to 2 to be dropped later to 1) + * after the call to this function. This is needed for functions + * like RedisModule_StringAppendBuffer() to work. */ + incrRefCount(str); + } +} + /* Given a string module object, this function returns the string pointer * and length of the string. The returned pointer and length should only * be used for read only accesses and never modified. */ @@ -742,6 +783,40 @@ int RM_StringToDouble(const RedisModuleString *str, double *d) { return (retval == C_OK) ? REDISMODULE_OK : REDISMODULE_ERR; } +/* Return the (possibly modified in encoding) input 'str' object if + * the string is unshared, otherwise NULL is returned. */ +RedisModuleString *moduleAssertUnsharedString(RedisModuleString *str) { + if (str->refcount != 1) { + serverLog(LL_WARNING, + "Module attempted to use an in-place string modify operation " + "with a string referenced multiple times. Please check the code " + "for API usage correctness."); + return NULL; + } + if (str->encoding == OBJ_ENCODING_EMBSTR) { + /* Note: here we "leak" the additional allocation that was + * used in order to store the embedded string in the object. */ + str->ptr = sdsnewlen(str->ptr,sdslen(str->ptr)); + str->encoding = OBJ_ENCODING_RAW; + } else if (str->encoding == OBJ_ENCODING_INT) { + /* Convert the string from integer to raw encoding. */ + str->ptr = sdsfromlonglong((long)str->ptr); + str->encoding = OBJ_ENCODING_RAW; + } + return str; +} + +/* Append the specified buffere to the string 'str'. The string must be a + * string created by the user that is referenced only a single time, otherwise + * REDISMODULE_ERR is returend and the operation is not performed. */ +int RM_StringAppendBuffer(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len) { + UNUSED(ctx); + str = moduleAssertUnsharedString(str); + if (str == NULL) return REDISMODULE_ERR; + str->ptr = sdscatlen(str->ptr,buf,len); + return REDISMODULE_OK; +} + /* -------------------------------------------------------------------------- * Reply APIs * @@ -2954,6 +3029,8 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(LoadDouble); REGISTER_API(EmitAOF); REGISTER_API(Log); + REGISTER_API(StringAppendBuffer); + REGISTER_API(RetainString); } /* Global initialization at Redis startup. */ diff --git a/src/modules/Makefile b/src/modules/Makefile index ecac4683f..1a52d65b0 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -13,7 +13,7 @@ endif .SUFFIXES: .c .so .xo .o -all: helloworld.so hellotype.so +all: helloworld.so hellotype.so testmodule.so .c.xo: $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ @@ -28,5 +28,10 @@ hellotype.xo: ../redismodule.h hellotype.so: hellotype.xo $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc +testmodule.xo: ../redismodule.h + +testmodule.so: testmodule.xo + $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc + clean: rm -rf *.xo *.so diff --git a/src/redismodule.h b/src/redismodule.h index fd9e46dc6..b368049d1 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -184,6 +184,8 @@ char *REDISMODULE_API_FUNC(RedisModule_LoadStringBuffer)(RedisModuleIO *io, size void REDISMODULE_API_FUNC(RedisModule_SaveDouble)(RedisModuleIO *io, double value); double REDISMODULE_API_FUNC(RedisModule_LoadDouble)(RedisModuleIO *io); void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...); +int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len); +void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); @@ -277,6 +279,8 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(LoadDouble); REDISMODULE_GET_API(EmitAOF); REDISMODULE_GET_API(Log); + REDISMODULE_GET_API(StringAppendBuffer); + REDISMODULE_GET_API(RetainString); RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; From 89e24b8f4ab0b5602761fb79a74c08bbb967d90d Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Aug 2016 10:23:03 +0200 Subject: [PATCH 0090/1722] Modules: initial draft for a testing module. --- src/module.c | 201 +++++++++++++++++++++------------------ src/modules/testmodule.c | 125 ++++++++++++++++++++++++ src/redismodule.h | 2 + 3 files changed, 235 insertions(+), 93 deletions(-) create mode 100644 src/modules/testmodule.c diff --git a/src/module.c b/src/module.c index 4d1d88001..03bee27ba 100644 --- a/src/module.c +++ b/src/module.c @@ -766,6 +766,10 @@ const char *RM_StringPtrLen(const RedisModuleString *str, size_t *len) { return str->ptr; } +/* -------------------------------------------------------------------------- + * Higher level string operations + * ------------------------------------------------------------------------- */ + /* Convert the string into a long long integer, storing it at `*ll`. * Returns REDISMODULE_OK on success. If the string can't be parsed * as a valid, strict long long (no spaces before/after), REDISMODULE_ERR @@ -783,6 +787,13 @@ int RM_StringToDouble(const RedisModuleString *str, double *d) { return (retval == C_OK) ? REDISMODULE_OK : REDISMODULE_ERR; } +/* Compare two string objects, returning -1, 0 or 1 respectively if + * a < b, a == b, a > b. Strings are compared byte by byte as two + * binary blobs without any encoding care / collation attempt. */ +int RM_StringCompare(RedisModuleString *a, RedisModuleString *b) { + return compareStringObjects(a,b); +} + /* Return the (possibly modified in encoding) input 'str' object if * the string is unshared, otherwise NULL is returned. */ RedisModuleString *moduleAssertUnsharedString(RedisModuleString *str) { @@ -2940,100 +2951,9 @@ int moduleRegisterApi(const char *funcname, void *funcptr) { #define REGISTER_API(name) \ moduleRegisterApi("RedisModule_" #name, (void *)(unsigned long)RM_ ## name) -/* Register all the APIs we export. */ -void moduleRegisterCoreAPI(void) { - server.moduleapi = dictCreate(&moduleAPIDictType,NULL); - REGISTER_API(Alloc); - REGISTER_API(Calloc); - REGISTER_API(Realloc); - REGISTER_API(Free); - REGISTER_API(Strdup); - REGISTER_API(CreateCommand); - REGISTER_API(SetModuleAttribs); - REGISTER_API(WrongArity); - REGISTER_API(ReplyWithLongLong); - REGISTER_API(ReplyWithError); - REGISTER_API(ReplyWithSimpleString); - REGISTER_API(ReplyWithArray); - REGISTER_API(ReplySetArrayLength); - REGISTER_API(ReplyWithString); - REGISTER_API(ReplyWithStringBuffer); - REGISTER_API(ReplyWithNull); - REGISTER_API(ReplyWithCallReply); - REGISTER_API(ReplyWithDouble); - REGISTER_API(GetSelectedDb); - REGISTER_API(SelectDb); - REGISTER_API(OpenKey); - REGISTER_API(CloseKey); - REGISTER_API(KeyType); - REGISTER_API(ValueLength); - REGISTER_API(ListPush); - REGISTER_API(ListPop); - REGISTER_API(StringToLongLong); - REGISTER_API(StringToDouble); - REGISTER_API(Call); - REGISTER_API(CallReplyProto); - REGISTER_API(FreeCallReply); - REGISTER_API(CallReplyInteger); - REGISTER_API(CallReplyType); - REGISTER_API(CallReplyLength); - REGISTER_API(CallReplyArrayElement); - REGISTER_API(CallReplyStringPtr); - REGISTER_API(CreateStringFromCallReply); - REGISTER_API(CreateString); - REGISTER_API(CreateStringFromLongLong); - REGISTER_API(CreateStringFromString); - REGISTER_API(FreeString); - REGISTER_API(StringPtrLen); - REGISTER_API(AutoMemory); - REGISTER_API(Replicate); - REGISTER_API(ReplicateVerbatim); - REGISTER_API(DeleteKey); - REGISTER_API(StringSet); - REGISTER_API(StringDMA); - REGISTER_API(StringTruncate); - REGISTER_API(SetExpire); - REGISTER_API(GetExpire); - REGISTER_API(ZsetAdd); - REGISTER_API(ZsetIncrby); - REGISTER_API(ZsetScore); - REGISTER_API(ZsetRem); - REGISTER_API(ZsetRangeStop); - REGISTER_API(ZsetFirstInScoreRange); - REGISTER_API(ZsetLastInScoreRange); - REGISTER_API(ZsetFirstInLexRange); - REGISTER_API(ZsetLastInLexRange); - REGISTER_API(ZsetRangeCurrentElement); - REGISTER_API(ZsetRangeNext); - REGISTER_API(ZsetRangePrev); - REGISTER_API(ZsetRangeEndReached); - REGISTER_API(HashSet); - REGISTER_API(HashGet); - REGISTER_API(IsKeysPositionRequest); - REGISTER_API(KeyAtPos); - REGISTER_API(GetClientId); - REGISTER_API(PoolAlloc); - REGISTER_API(CreateDataType); - REGISTER_API(ModuleTypeSetValue); - REGISTER_API(ModuleTypeGetType); - REGISTER_API(ModuleTypeGetValue); - REGISTER_API(SaveUnsigned); - REGISTER_API(LoadUnsigned); - REGISTER_API(SaveSigned); - REGISTER_API(LoadSigned); - REGISTER_API(SaveString); - REGISTER_API(SaveStringBuffer); - REGISTER_API(LoadString); - REGISTER_API(LoadStringBuffer); - REGISTER_API(SaveDouble); - REGISTER_API(LoadDouble); - REGISTER_API(EmitAOF); - REGISTER_API(Log); - REGISTER_API(StringAppendBuffer); - REGISTER_API(RetainString); -} - /* Global initialization at Redis startup. */ +void moduleRegisterCoreAPI(void); + void moduleInitModulesSystem(void) { server.loadmodule_queue = listCreate(); modules = dictCreate(&modulesDictType,NULL); @@ -3222,3 +3142,98 @@ void moduleCommand(client *c) { addReply(c,shared.syntaxerr); } } + +/* Register all the APIs we export. Keep this function at the end of the + * file so that's easy to seek it to add new entries. */ +void moduleRegisterCoreAPI(void) { + server.moduleapi = dictCreate(&moduleAPIDictType,NULL); + REGISTER_API(Alloc); + REGISTER_API(Calloc); + REGISTER_API(Realloc); + REGISTER_API(Free); + REGISTER_API(Strdup); + REGISTER_API(CreateCommand); + REGISTER_API(SetModuleAttribs); + REGISTER_API(WrongArity); + REGISTER_API(ReplyWithLongLong); + REGISTER_API(ReplyWithError); + REGISTER_API(ReplyWithSimpleString); + REGISTER_API(ReplyWithArray); + REGISTER_API(ReplySetArrayLength); + REGISTER_API(ReplyWithString); + REGISTER_API(ReplyWithStringBuffer); + REGISTER_API(ReplyWithNull); + REGISTER_API(ReplyWithCallReply); + REGISTER_API(ReplyWithDouble); + REGISTER_API(GetSelectedDb); + REGISTER_API(SelectDb); + REGISTER_API(OpenKey); + REGISTER_API(CloseKey); + REGISTER_API(KeyType); + REGISTER_API(ValueLength); + REGISTER_API(ListPush); + REGISTER_API(ListPop); + REGISTER_API(StringToLongLong); + REGISTER_API(StringToDouble); + REGISTER_API(Call); + REGISTER_API(CallReplyProto); + REGISTER_API(FreeCallReply); + REGISTER_API(CallReplyInteger); + REGISTER_API(CallReplyType); + REGISTER_API(CallReplyLength); + REGISTER_API(CallReplyArrayElement); + REGISTER_API(CallReplyStringPtr); + REGISTER_API(CreateStringFromCallReply); + REGISTER_API(CreateString); + REGISTER_API(CreateStringFromLongLong); + REGISTER_API(CreateStringFromString); + REGISTER_API(FreeString); + REGISTER_API(StringPtrLen); + REGISTER_API(AutoMemory); + REGISTER_API(Replicate); + REGISTER_API(ReplicateVerbatim); + REGISTER_API(DeleteKey); + REGISTER_API(StringSet); + REGISTER_API(StringDMA); + REGISTER_API(StringTruncate); + REGISTER_API(SetExpire); + REGISTER_API(GetExpire); + REGISTER_API(ZsetAdd); + REGISTER_API(ZsetIncrby); + REGISTER_API(ZsetScore); + REGISTER_API(ZsetRem); + REGISTER_API(ZsetRangeStop); + REGISTER_API(ZsetFirstInScoreRange); + REGISTER_API(ZsetLastInScoreRange); + REGISTER_API(ZsetFirstInLexRange); + REGISTER_API(ZsetLastInLexRange); + REGISTER_API(ZsetRangeCurrentElement); + REGISTER_API(ZsetRangeNext); + REGISTER_API(ZsetRangePrev); + REGISTER_API(ZsetRangeEndReached); + REGISTER_API(HashSet); + REGISTER_API(HashGet); + REGISTER_API(IsKeysPositionRequest); + REGISTER_API(KeyAtPos); + REGISTER_API(GetClientId); + REGISTER_API(PoolAlloc); + REGISTER_API(CreateDataType); + REGISTER_API(ModuleTypeSetValue); + REGISTER_API(ModuleTypeGetType); + REGISTER_API(ModuleTypeGetValue); + REGISTER_API(SaveUnsigned); + REGISTER_API(LoadUnsigned); + REGISTER_API(SaveSigned); + REGISTER_API(LoadSigned); + REGISTER_API(SaveString); + REGISTER_API(SaveStringBuffer); + REGISTER_API(LoadString); + REGISTER_API(LoadStringBuffer); + REGISTER_API(SaveDouble); + REGISTER_API(LoadDouble); + REGISTER_API(EmitAOF); + REGISTER_API(Log); + REGISTER_API(StringAppendBuffer); + REGISTER_API(RetainString); + REGISTER_API(StringCompare); +} diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c new file mode 100644 index 000000000..1cd843b3d --- /dev/null +++ b/src/modules/testmodule.c @@ -0,0 +1,125 @@ +/* Module designed to test the Redis modules subsystem. + * + * ----------------------------------------------------------------------------- + * + * Copyright (c) 2016, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "../redismodule.h" + +/* ------------------------------- Test units ------------------------------- */ + +/* TEST.STRING.APPEND -- Test appending to an existing string object. */ +int TestStringAppend(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + RedisModuleString *s = RedisModule_CreateString(ctx,"foo",3); + RedisModule_StringAppendBuffer(ctx,s,"bar",3); + RedisModule_ReplyWithString(ctx,s); + RedisModule_FreeString(ctx,s); + return REDISMODULE_OK; +} + +/* TEST.STRING.APPEND.AM -- Test append with retain when auto memory is on. */ +int TestStringAppendAM(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + RedisModule_AutoMemory(ctx); + RedisModuleString *s = RedisModule_CreateString(ctx,"foo",3); + RedisModule_RetainString(ctx,s); + RedisModule_StringAppendBuffer(ctx,s,"bar",3); + RedisModule_ReplyWithString(ctx,s); + RedisModule_FreeString(ctx,s); + return REDISMODULE_OK; +} + +/* ----------------------------- Test framework ----------------------------- */ + +/* Return 1 if the reply matches the specified string, otherwise log errors + * in the server log and return 0. */ +int TestAssertStringReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply, char *str, size_t len) { + RedisModuleString *mystr, *expected; + + if (RedisModule_CallReplyType(reply) != REDISMODULE_REPLY_STRING) { + RedisModule_Log(ctx,"warning","Unexpected reply type %d", + RedisModule_CallReplyType(reply)); + return 0; + } + mystr = RedisModule_CreateStringFromCallReply(reply); + expected = RedisModule_CreateString(ctx,str,len); + if (RedisModule_StringCompare(mystr,expected) != 0) { + const char *mystr_ptr = RedisModule_StringPtrLen(mystr,NULL); + const char *expected_ptr = RedisModule_StringPtrLen(expected,NULL); + RedisModule_Log(ctx,"warning", + "Unexpected string reply '%s' (instead of '%s')", + mystr_ptr, expected_ptr); + return 0; + } + return 1; +} + +#define T(name,...) \ + do { \ + RedisModule_Log(ctx,"warning","Testing %s", name); \ + reply = RedisModule_Call(ctx,name,__VA_ARGS__); \ + } while (0); + +/* TEST.IT -- Run all the tests. */ +int TestIt(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + RedisModule_AutoMemory(ctx); + RedisModuleCallReply *reply; + + T("test.string.append",""); + if (!TestAssertStringReply(ctx,reply,"foobar",6)) goto fail; + + T("test.string.append.am",""); + if (!TestAssertStringReply(ctx,reply,"foobar",6)) goto fail; + + RedisModule_ReplyWithSimpleString(ctx,"ALL TESTS PASSED"); + return REDISMODULE_OK; + +fail: + RedisModule_ReplyWithSimpleString(ctx, + "SOME TEST NOT PASSED! Check server logs"); + return REDISMODULE_OK; +} + +int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (RedisModule_Init(ctx,"test",1,REDISMODULE_APIVER_1) + == REDISMODULE_ERR) return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"test.string.append", + TestStringAppend,"write deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"test.string.append.am", + TestStringAppendAM,"write deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"test.it", + TestIt,"readonly",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + return REDISMODULE_OK; +} diff --git a/src/redismodule.h b/src/redismodule.h index b368049d1..0a35cf047 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -186,6 +186,7 @@ double REDISMODULE_API_FUNC(RedisModule_LoadDouble)(RedisModuleIO *io); void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...); int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len); void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); +int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b); /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); @@ -281,6 +282,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(Log); REDISMODULE_GET_API(StringAppendBuffer); REDISMODULE_GET_API(RetainString); + REDISMODULE_GET_API(StringCompare); RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; From fa5d8e218ec7e85935ba71ad68b4467b276cd72c Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Aug 2016 10:56:26 +0200 Subject: [PATCH 0091/1722] Fix comment over 80 cols. --- src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index c8eaebdea..4ae4ac222 100644 --- a/src/server.c +++ b/src/server.c @@ -1111,7 +1111,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { freeClientsInAsyncFreeQueue(); /* Clear the paused clients flag if needed. */ - clientsArePaused(); /* Don't check return value, just use the side effect. */ + clientsArePaused(); /* Don't check return value, just use the side effect.*/ /* Replication cron function -- used to reconnect to master, * detect transfer failures, start background RDB transfers and so forth. */ From 954ac5c7562ff034603bce58ddc845c509fb9d9f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Aug 2016 11:12:13 +0200 Subject: [PATCH 0092/1722] Security: Cross Protocol Scripting protection. This is an attempt at mitigating problems due to cross protocol scripting, an attack targeting services using line oriented protocols like Redis that can accept HTTP requests as valid protocol, by discarding the invalid parts and accepting the payloads sent, for example, via a POST request. For this to be effective, when we detect POST and Host: and terminate the connection asynchronously, the networking code was modified in order to never process further input. It was later verified that in a pipelined request containing a POST command, the successive commands are not executed. --- src/networking.c | 26 ++++++++++++++++++++++++-- src/server.c | 2 ++ src/server.h | 1 + 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 723cd599d..2be40ae15 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1242,8 +1242,10 @@ void processInputBuffer(client *c) { /* CLIENT_CLOSE_AFTER_REPLY closes the connection once the reply is * written to the client. Make sure to not let the reply grow after - * this flag has been set (i.e. don't process more commands). */ - if (c->flags & CLIENT_CLOSE_AFTER_REPLY) break; + * this flag has been set (i.e. don't process more commands). + * + * The same applies for clients we want to terminate ASAP. */ + if (c->flags & (CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP)) break; /* Determine request type when unknown. */ if (!c->reqtype) { @@ -1610,6 +1612,26 @@ void clientCommand(client *c) { } } +/* This callback is bound to POST and "Host:" command names. Those are not + * really commands, but are used in security attacks in order to talk to + * Redis instances via HTTP, with a technique called "cross protocol scripting" + * which exploits the fact that services like Redis will discard invalid + * HTTP headers and will process what follows. + * + * As a protection against this attack, Redis will terminate the connection + * when a POST or "Host:" header is seen, and will log the event from + * time to time (to avoid creating a DOS as a result of too many logs). */ +void securityWarningCommand(client *c) { + static time_t logged_time; + time_t now = time(NULL); + + if (labs(now-logged_time) > 60) { + serverLog(LL_WARNING,"Possible SECURITY ATTACK detected. It looks like somebody is sending POST or Host: commands to Redis. This is likely due to an attacker attempting to use Cross Protocol Scripting to compromise your Redis instance. Connection aborted."); + logged_time = now; + } + freeClientAsync(c); +} + /* Rewrite the command vector of the client. All the new objects ref count * is incremented. The old command vector is freed, and the old objects * ref count is decremented. */ diff --git a/src/server.c b/src/server.c index 4ae4ac222..a77582592 100644 --- a/src/server.c +++ b/src/server.c @@ -296,6 +296,8 @@ struct redisCommand redisCommandTable[] = { {"pfcount",pfcountCommand,-2,"r",0,NULL,1,-1,1,0,0}, {"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0}, {"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0}, + {"post",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0}, + {"host:",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0}, {"latency",latencyCommand,-2,"aslt",0,NULL,0,0,0,0,0} }; diff --git a/src/server.h b/src/server.h index f3f6b4ddd..d410d5b2a 100644 --- a/src/server.h +++ b/src/server.h @@ -1812,6 +1812,7 @@ void pfmergeCommand(client *c); void pfdebugCommand(client *c); void latencyCommand(client *c); void moduleCommand(client *c); +void securityWarningCommand(client *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); From 935fb1eae2795bc819b9608dcf1b152edc047e08 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Aug 2016 18:09:36 +0200 Subject: [PATCH 0093/1722] Modules: handle NULL replies more gracefully. After all crashing at every API misuse makes everybody's life more complex. --- src/module.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/module.c b/src/module.c index 03bee27ba..e0bc417d6 100644 --- a/src/module.c +++ b/src/module.c @@ -762,6 +762,11 @@ void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) { * and length of the string. The returned pointer and length should only * be used for read only accesses and never modified. */ const char *RM_StringPtrLen(const RedisModuleString *str, size_t *len) { + if (str == NULL) { + const char *errmsg = "(NULL string reply referenced in module)"; + if (len) *len = strlen(errmsg); + return errmsg; + } if (len) *len = sdslen(str->ptr); return str->ptr; } @@ -2203,6 +2208,7 @@ void RM_FreeCallReply(RedisModuleCallReply *reply) { /* Return the reply type. */ int RM_CallReplyType(RedisModuleCallReply *reply) { + if (!reply) return REDISMODULE_REPLY_UNKNOWN; return reply->type; } From 3ed474aa05c551cebc83d9f5968b81df8342be98 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Aug 2016 18:10:11 +0200 Subject: [PATCH 0094/1722] Modules: basic call/reply tests in test module. --- src/modules/testmodule.c | 71 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index 1cd843b3d..a1a42f43b 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -31,9 +31,48 @@ */ #include "../redismodule.h" +#include + +/* --------------------------------- Helpers -------------------------------- */ + +/* Return true if the reply and the C null term string matches. */ +int TestMatchReply(RedisModuleCallReply *reply, char *str) { + RedisModuleString *mystr; + mystr = RedisModule_CreateStringFromCallReply(reply); + if (!mystr) return 0; + const char *ptr = RedisModule_StringPtrLen(mystr,NULL); + return strcmp(ptr,str) == 0; +} /* ------------------------------- Test units ------------------------------- */ +/* TEST.CALL -- Test Call() API. */ +int TestCall(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + RedisModule_AutoMemory(ctx); + RedisModuleCallReply *reply; + + RedisModule_Call(ctx,"DEL","c","mylist"); + RedisModuleString *mystr = RedisModule_CreateString(ctx,"foo",3); + RedisModule_Call(ctx,"RPUSH","csl","mylist",mystr,(long long)1234); + reply = RedisModule_Call(ctx,"LRANGE","ccc","mylist","0","-1"); + long long items = RedisModule_CallReplyLength(reply); + if (items != 2) goto fail; + + RedisModuleCallReply *item0, *item1; + + item0 = RedisModule_CallReplyArrayElement(reply,0); + item1 = RedisModule_CallReplyArrayElement(reply,1); + if (!TestMatchReply(item0,"foo")) goto fail; + if (!TestMatchReply(item1,"1234")) goto fail; + + RedisModule_ReplyWithSimpleString(ctx,"OK"); + return REDISMODULE_OK; + +fail: + RedisModule_ReplyWithSimpleString(ctx,"ERR"); + return REDISMODULE_OK; +} + /* TEST.STRING.APPEND -- Test appending to an existing string object. */ int TestStringAppend(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModuleString *s = RedisModule_CreateString(ctx,"foo",3); @@ -79,6 +118,24 @@ int TestAssertStringReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply, char return 1; } +/* Return 1 if the reply matches the specified integer, otherwise log errors + * in the server log and return 0. */ +int TestAssertIntegerReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply, long long expected) { + if (RedisModule_CallReplyType(reply) != REDISMODULE_REPLY_INTEGER) { + RedisModule_Log(ctx,"warning","Unexpected reply type %d", + RedisModule_CallReplyType(reply)); + return 0; + } + long long val = RedisModule_CallReplyInteger(reply); + if (val != expected) { + RedisModule_Log(ctx,"warning", + "Unexpected integer reply '%lld' (instead of '%lld')", + val, expected); + return 0; + } + return 1; +} + #define T(name,...) \ do { \ RedisModule_Log(ctx,"warning","Testing %s", name); \ @@ -90,6 +147,16 @@ int TestIt(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModule_AutoMemory(ctx); RedisModuleCallReply *reply; + /* Make sure the DB is empty before to proceed. */ + T("dbsize",""); + if (!TestAssertIntegerReply(ctx,reply,0)) goto fail; + + T("ping",""); + if (!TestAssertStringReply(ctx,reply,"PONG",4)) goto fail; + + T("test.call",""); + if (!TestAssertStringReply(ctx,reply,"OK",2)) goto fail; + T("test.string.append",""); if (!TestAssertStringReply(ctx,reply,"foobar",6)) goto fail; @@ -109,6 +176,10 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) if (RedisModule_Init(ctx,"test",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,"test.call", + TestCall,"write deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,"test.string.append", TestStringAppend,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; From eb3640db0030000e7054113abd40bb5cf07265bb Mon Sep 17 00:00:00 2001 From: Guo Xiao Date: Thu, 4 Aug 2016 15:09:42 +0800 Subject: [PATCH 0095/1722] Use the standard predefined identifier __func__ (since C99) Fix warning: ISO C does not support '__FUNCTION__' predefined identifier [-Wpedantic] --- src/debugmacro.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/debugmacro.h b/src/debugmacro.h index df237bad3..ded2d2667 100644 --- a/src/debugmacro.h +++ b/src/debugmacro.h @@ -34,7 +34,7 @@ #define D(...) \ do { \ FILE *fp = fopen("/tmp/log.txt","a"); \ - fprintf(fp,"%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \ + fprintf(fp,"%s:%s:%d:\t", __FILE__, __func__, __LINE__); \ fprintf(fp,__VA_ARGS__); \ fprintf(fp,"\n"); \ fclose(fp); \ From 3bf40950e1b609b9196e6ccb47a5bdea6b8beea2 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 9 Aug 2016 11:07:32 +0200 Subject: [PATCH 0096/1722] RDB AOF preamble: WIP 1. --- src/aof.c | 79 ++++++++++++++++++++++++++++++++-------------------- src/rdb.c | 23 +++++++++++---- src/rdb.h | 3 ++ src/server.h | 2 ++ 4 files changed, 72 insertions(+), 35 deletions(-) diff --git a/src/aof.c b/src/aof.c index 6a92a0cd9..39229b5df 100644 --- a/src/aof.c +++ b/src/aof.c @@ -989,7 +989,7 @@ int rewriteHashObject(rio *r, robj *key, robj *o) { } /* Call the module type callback in order to rewrite a data type - * taht is exported by a module and is not handled by Redis itself. + * that is exported by a module and is not handled by Redis itself. * The function returns 0 on error, 1 on success. */ int rewriteModuleObject(rio *r, robj *key, robj *o) { RedisModuleIO io; @@ -1015,37 +1015,11 @@ ssize_t aofReadDiffFromParent(void) { return total; } -/* Write a sequence of commands able to fully rebuild the dataset into - * "filename". Used both by REWRITEAOF and BGREWRITEAOF. - * - * In order to minimize the number of commands needed in the rewritten - * log Redis uses variadic commands when possible, such as RPUSH, SADD - * and ZADD. However at max AOF_REWRITE_ITEMS_PER_CMD items per time - * are inserted using a single command. */ -int rewriteAppendOnlyFile(char *filename) { +void rewriteAppendOnlyFileRio(rio *aof) { dictIterator *di = NULL; dictEntry *de; - rio aof; - FILE *fp; - char tmpfile[256]; - int j; - long long now = mstime(); - char byte; size_t processed = 0; - /* Note that we have to use a different temp name here compared to the - * one used by rewriteAppendOnlyFileBackground() function. */ - snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid()); - fp = fopen(tmpfile,"w"); - if (!fp) { - serverLog(LL_WARNING, "Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s", strerror(errno)); - return C_ERR; - } - - server.aof_child_diff = sdsempty(); - rioInitWithFile(&aof,fp); - if (server.aof_rewrite_incremental_fsync) - rioSetAutoSync(&aof,AOF_AUTOSYNC_BYTES); for (j = 0; j < server.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; redisDb *db = server.db+j; @@ -1105,7 +1079,7 @@ int rewriteAppendOnlyFile(char *filename) { if (rioWriteBulkLongLong(&aof,expiretime) == 0) goto werr; } /* Read some diff from the parent process from time to time. */ - if (aof.processed_bytes > processed+1024*10) { + if (aof.processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES) { processed = aof.processed_bytes; aofReadDiffFromParent(); } @@ -1113,6 +1087,52 @@ int rewriteAppendOnlyFile(char *filename) { dictReleaseIterator(di); di = NULL; } + return C_OK; + +werr: + if (di) dictReleaseIterator(di); + return C_ERR; +} + +/* Write a sequence of commands able to fully rebuild the dataset into + * "filename". Used both by REWRITEAOF and BGREWRITEAOF. + * + * In order to minimize the number of commands needed in the rewritten + * log Redis uses variadic commands when possible, such as RPUSH, SADD + * and ZADD. However at max AOF_REWRITE_ITEMS_PER_CMD items per time + * are inserted using a single command. */ +int rewriteAppendOnlyFile(char *filename) { + rio aof; + FILE *fp; + char tmpfile[256]; + int j; + long long now = mstime(); + char byte; + + /* Note that we have to use a different temp name here compared to the + * one used by rewriteAppendOnlyFileBackground() function. */ + snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid()); + fp = fopen(tmpfile,"w"); + if (!fp) { + serverLog(LL_WARNING, "Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s", strerror(errno)); + return C_ERR; + } + + server.aof_child_diff = sdsempty(); + rioInitWithFile(&aof,fp); + + if (server.aof_rewrite_incremental_fsync) + rioSetAutoSync(&aof,AOF_AUTOSYNC_BYTES); + + if (server.aof_use_rdb_prefix) { + int error; + if (rdbSaveRio(&rdb,&error,RDB_SAVE_AOF_PREAMBLE) == C_ERR) { + errno = error; + goto werr; + } + } else { + rewriteAppendOnlyFileRio(&aof); + } /* Do an initial slow fsync here while the parent is still sending * data, in order to make the next final fsync faster. */ @@ -1178,7 +1198,6 @@ werr: serverLog(LL_WARNING,"Write error writing append only file on disk: %s", strerror(errno)); fclose(fp); unlink(tmpfile); - if (di) dictReleaseIterator(di); return C_ERR; } diff --git a/src/rdb.c b/src/rdb.c index 859297943..83e5868cd 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -818,14 +818,16 @@ int rdbSaveAuxFieldStrInt(rio *rdb, char *key, long long val) { } /* Save a few default AUX fields with information about the RDB generated. */ -int rdbSaveInfoAuxFields(rio *rdb) { +int rdbSaveInfoAuxFields(rio *rdb, int flags) { int redis_bits = (sizeof(void*) == 8) ? 64 : 32; + int aof_preamble = (flags & RDB_SAVE_AOF_PREAMBLE) != 0; /* Add a few fields about the state when the RDB was created. */ if (rdbSaveAuxFieldStrStr(rdb,"redis-ver",REDIS_VERSION) == -1) return -1; if (rdbSaveAuxFieldStrInt(rdb,"redis-bits",redis_bits) == -1) return -1; if (rdbSaveAuxFieldStrInt(rdb,"ctime",time(NULL)) == -1) return -1; if (rdbSaveAuxFieldStrInt(rdb,"used-mem",zmalloc_used_memory()) == -1) return -1; + if (rdbSaveAuxFieldStrInt(rdb,"aof-preamble",aof_preamble)) return -1; return 1; } @@ -837,19 +839,20 @@ int rdbSaveInfoAuxFields(rio *rdb) { * When the function returns C_ERR and if 'error' is not NULL, the * integer pointed by 'error' is set to the value of errno just after the I/O * error. */ -int rdbSaveRio(rio *rdb, int *error) { +int rdbSaveRio(rio *rdb, int *error, int flags) { dictIterator *di = NULL; dictEntry *de; char magic[10]; int j; long long now = mstime(); uint64_t cksum; + size_t processed = 0; if (server.rdb_checksum) rdb->update_cksum = rioGenericUpdateChecksum; snprintf(magic,sizeof(magic),"REDIS%04d",RDB_VERSION); if (rdbWriteRaw(rdb,magic,9) == -1) goto werr; - if (rdbSaveInfoAuxFields(rdb) == -1) goto werr; + if (rdbSaveInfoAuxFields(rdb,flags) == -1) goto werr; for (j = 0; j < server.dbnum; j++) { redisDb *db = server.db+j; @@ -886,6 +889,16 @@ int rdbSaveRio(rio *rdb, int *error) { initStaticStringObject(key,keystr); expire = getExpire(db,&key); if (rdbSaveKeyValuePair(rdb,&key,o,expire,now) == -1) goto werr; + + /* When this RDB is produced as part of an AOF rewrite, move + * accumulated diff from parent to child while rewriting in + * order to have a smaller final write. */ + if (flags & RDB_SAVE_AOF_PREAMBLE && + rdb.processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES) + { + processed = rdb.processed_bytes; + aofReadDiffFromParent(); + } } dictReleaseIterator(di); } @@ -923,7 +936,7 @@ int rdbSaveRioWithEOFMark(rio *rdb, int *error) { if (rioWrite(rdb,"$EOF:",5) == 0) goto werr; if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr; if (rioWrite(rdb,"\r\n",2) == 0) goto werr; - if (rdbSaveRio(rdb,error) == C_ERR) goto werr; + if (rdbSaveRio(rdb,error,RDB_SAVE_NONE) == C_ERR) goto werr; if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr; return C_OK; @@ -955,7 +968,7 @@ int rdbSave(char *filename) { } rioInitWithFile(&rdb,fp); - if (rdbSaveRio(&rdb,&error) == C_ERR) { + if (rdbSaveRio(&rdb,&error,RDB_SAVE_NONE) == C_ERR) { errno = error; goto werr; } diff --git a/src/rdb.h b/src/rdb.h index a71ecb16e..2c9a99850 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -106,6 +106,9 @@ #define RDB_LOAD_PLAIN (1<<1) #define RDB_LOAD_SDS (1<<2) +#define RDB_SAVE_NONE 0 +#define RDB_SAVE_AOF_PREAMBLE (1<<0) + int rdbSaveType(rio *rdb, unsigned char type); int rdbLoadType(rio *rdb); int rdbSaveTime(rio *rdb, time_t t); diff --git a/src/server.h b/src/server.h index d410d5b2a..2bc985cbb 100644 --- a/src/server.h +++ b/src/server.h @@ -93,6 +93,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define AOF_REWRITE_PERC 100 #define AOF_REWRITE_MIN_SIZE (64*1024*1024) #define AOF_REWRITE_ITEMS_PER_CMD 64 +#define AOF_READ_DIFF_INTERVAL_BYTES (1024*10) #define CONFIG_DEFAULT_SLOWLOG_LOG_SLOWER_THAN 10000 #define CONFIG_DEFAULT_SLOWLOG_MAX_LEN 128 #define CONFIG_DEFAULT_MAX_CLIENTS 10000 @@ -1365,6 +1366,7 @@ void stopLoading(void); /* RDB persistence */ #include "rdb.h" +int rdbSaveRio(rio *rdb, int *error, int flags); /* AOF persistence */ void flushAppendOnlyFile(int force); From 95ce1aa378a5b55990e7980a33ff5fa080974a1f Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 9 Aug 2016 16:41:40 +0200 Subject: [PATCH 0097/1722] RDB AOF preamble: WIP 2. --- src/aof.c | 46 +++++++++++++++++++++------------------------- src/config.c | 9 +++++++++ src/rdb.c | 6 +++--- src/server.c | 1 + src/server.h | 3 +++ 5 files changed, 37 insertions(+), 28 deletions(-) diff --git a/src/aof.c b/src/aof.c index 39229b5df..104d8fe39 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1015,10 +1015,12 @@ ssize_t aofReadDiffFromParent(void) { return total; } -void rewriteAppendOnlyFileRio(rio *aof) { +int rewriteAppendOnlyFileRio(rio *aof) { dictIterator *di = NULL; dictEntry *de; size_t processed = 0; + long long now = mstime(); + int j; for (j = 0; j < server.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; @@ -1026,14 +1028,10 @@ void rewriteAppendOnlyFileRio(rio *aof) { dict *d = db->dict; if (dictSize(d) == 0) continue; di = dictGetSafeIterator(d); - if (!di) { - fclose(fp); - return C_ERR; - } /* SELECT the new DB */ - if (rioWrite(&aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr; - if (rioWriteBulkLongLong(&aof,j) == 0) goto werr; + if (rioWrite(aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr; + if (rioWriteBulkLongLong(aof,j) == 0) goto werr; /* Iterate this DB writing every entry */ while((de = dictNext(di)) != NULL) { @@ -1054,33 +1052,33 @@ void rewriteAppendOnlyFileRio(rio *aof) { if (o->type == OBJ_STRING) { /* Emit a SET command */ char cmd[]="*3\r\n$3\r\nSET\r\n"; - if (rioWrite(&aof,cmd,sizeof(cmd)-1) == 0) goto werr; + if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr; /* Key and value */ - if (rioWriteBulkObject(&aof,&key) == 0) goto werr; - if (rioWriteBulkObject(&aof,o) == 0) goto werr; + if (rioWriteBulkObject(aof,&key) == 0) goto werr; + if (rioWriteBulkObject(aof,o) == 0) goto werr; } else if (o->type == OBJ_LIST) { - if (rewriteListObject(&aof,&key,o) == 0) goto werr; + if (rewriteListObject(aof,&key,o) == 0) goto werr; } else if (o->type == OBJ_SET) { - if (rewriteSetObject(&aof,&key,o) == 0) goto werr; + if (rewriteSetObject(aof,&key,o) == 0) goto werr; } else if (o->type == OBJ_ZSET) { - if (rewriteSortedSetObject(&aof,&key,o) == 0) goto werr; + if (rewriteSortedSetObject(aof,&key,o) == 0) goto werr; } else if (o->type == OBJ_HASH) { - if (rewriteHashObject(&aof,&key,o) == 0) goto werr; + if (rewriteHashObject(aof,&key,o) == 0) goto werr; } else if (o->type == OBJ_MODULE) { - if (rewriteModuleObject(&aof,&key,o) == 0) goto werr; + if (rewriteModuleObject(aof,&key,o) == 0) goto werr; } else { serverPanic("Unknown object type"); } /* Save the expire time */ if (expiretime != -1) { char cmd[]="*3\r\n$9\r\nPEXPIREAT\r\n"; - if (rioWrite(&aof,cmd,sizeof(cmd)-1) == 0) goto werr; - if (rioWriteBulkObject(&aof,&key) == 0) goto werr; - if (rioWriteBulkLongLong(&aof,expiretime) == 0) goto werr; + if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr; + if (rioWriteBulkObject(aof,&key) == 0) goto werr; + if (rioWriteBulkLongLong(aof,expiretime) == 0) goto werr; } /* Read some diff from the parent process from time to time. */ - if (aof.processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES) { - processed = aof.processed_bytes; + if (aof->processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES) { + processed = aof->processed_bytes; aofReadDiffFromParent(); } } @@ -1105,8 +1103,6 @@ int rewriteAppendOnlyFile(char *filename) { rio aof; FILE *fp; char tmpfile[256]; - int j; - long long now = mstime(); char byte; /* Note that we have to use a different temp name here compared to the @@ -1124,14 +1120,14 @@ int rewriteAppendOnlyFile(char *filename) { if (server.aof_rewrite_incremental_fsync) rioSetAutoSync(&aof,AOF_AUTOSYNC_BYTES); - if (server.aof_use_rdb_prefix) { + if (server.aof_use_rdb_preamble) { int error; - if (rdbSaveRio(&rdb,&error,RDB_SAVE_AOF_PREAMBLE) == C_ERR) { + if (rdbSaveRio(&aof,&error,RDB_SAVE_AOF_PREAMBLE) == C_ERR) { errno = error; goto werr; } } else { - rewriteAppendOnlyFileRio(&aof); + if (rewriteAppendOnlyFileRio(&aof) == C_ERR) goto werr; } /* Do an initial slow fsync here while the parent is still sending diff --git a/src/config.c b/src/config.c index dd21a0aca..1d81180b7 100644 --- a/src/config.c +++ b/src/config.c @@ -475,6 +475,10 @@ void loadServerConfigFromString(char *config) { if ((server.aof_load_truncated = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } + } else if (!strcasecmp(argv[0],"aof-use-rdb-preamble") && argc == 2) { + if ((server.aof_use_rdb_preamble = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) { if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) { err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN"; @@ -953,6 +957,8 @@ void configSetCommand(client *c) { "aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync) { } config_set_bool_field( "aof-load-truncated",server.aof_load_truncated) { + } config_set_bool_field( + "aof-use-rdb-preamble",server.aof_use_rdb_preamble) { } config_set_bool_field( "slave-serve-stale-data",server.repl_serve_stale_data) { } config_set_bool_field( @@ -1227,6 +1233,8 @@ void configGetCommand(client *c) { server.aof_rewrite_incremental_fsync); config_get_bool_field("aof-load-truncated", server.aof_load_truncated); + config_get_bool_field("aof-use-rdb-preamble", + server.aof_use_rdb_preamble); config_get_bool_field("lazyfree-lazy-eviction", server.lazyfree_lazy_eviction); config_get_bool_field("lazyfree-lazy-expire", @@ -1947,6 +1955,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"hz",server.hz,CONFIG_DEFAULT_HZ); rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC); rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,CONFIG_DEFAULT_AOF_LOAD_TRUNCATED); + rewriteConfigYesNoOption(state,"aof-use-rdb-preamble",server.aof_use_rdb_preamble,CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE); rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,SUPERVISED_NONE); rewriteConfigYesNoOption(state,"lazyfree-lazy-eviction",server.lazyfree_lazy_eviction,CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION); rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE); diff --git a/src/rdb.c b/src/rdb.c index 83e5868cd..570e20f4d 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -827,7 +827,7 @@ int rdbSaveInfoAuxFields(rio *rdb, int flags) { if (rdbSaveAuxFieldStrInt(rdb,"redis-bits",redis_bits) == -1) return -1; if (rdbSaveAuxFieldStrInt(rdb,"ctime",time(NULL)) == -1) return -1; if (rdbSaveAuxFieldStrInt(rdb,"used-mem",zmalloc_used_memory()) == -1) return -1; - if (rdbSaveAuxFieldStrInt(rdb,"aof-preamble",aof_preamble)) return -1; + if (rdbSaveAuxFieldStrInt(rdb,"aof-preamble",aof_preamble) == -1) return -1; return 1; } @@ -894,9 +894,9 @@ int rdbSaveRio(rio *rdb, int *error, int flags) { * accumulated diff from parent to child while rewriting in * order to have a smaller final write. */ if (flags & RDB_SAVE_AOF_PREAMBLE && - rdb.processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES) + rdb->processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES) { - processed = rdb.processed_bytes; + processed = rdb->processed_bytes; aofReadDiffFromParent(); } } diff --git a/src/server.c b/src/server.c index a77582592..d7ce68852 100644 --- a/src/server.c +++ b/src/server.c @@ -1345,6 +1345,7 @@ void initServerConfig(void) { server.aof_flush_postponed_start = 0; server.aof_rewrite_incremental_fsync = CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC; server.aof_load_truncated = CONFIG_DEFAULT_AOF_LOAD_TRUNCATED; + server.aof_use_rdb_preamble = CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE; server.pidfile = NULL; server.rdb_filename = zstrdup(CONFIG_DEFAULT_RDB_FILENAME); server.aof_filename = zstrdup(CONFIG_DEFAULT_AOF_FILENAME); diff --git a/src/server.h b/src/server.h index 2bc985cbb..a5f0ee1a6 100644 --- a/src/server.h +++ b/src/server.h @@ -137,6 +137,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_AOF_FILENAME "appendonly.aof" #define CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE 0 #define CONFIG_DEFAULT_AOF_LOAD_TRUNCATED 1 +#define CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE 0 #define CONFIG_DEFAULT_ACTIVE_REHASHING 1 #define CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC 1 #define CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE 0 @@ -901,6 +902,7 @@ struct redisServer { int aof_last_write_status; /* C_OK or C_ERR */ int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */ int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */ + int aof_use_rdb_preamble; /* Use RDB preamble on AOF rewrites. */ /* AOF pipes used to communicate between parent and child during rewrite. */ int aof_pipe_write_data_to_child; int aof_pipe_read_data_from_parent; @@ -1379,6 +1381,7 @@ int startAppendOnly(void); void backgroundRewriteDoneHandler(int exitcode, int bysignal); void aofRewriteBufferReset(void); unsigned long aofRewriteBufferSize(void); +ssize_t aofReadDiffFromParent(void); /* Sorted sets data type */ From 96d57f8d557b1a83dc42985359689893566cfd33 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 11 Aug 2016 15:27:23 +0200 Subject: [PATCH 0098/1722] RDB AOF preamble: WIP 3 (RDB loading refactoring). --- src/rdb.c | 67 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 570e20f4d..3b37b3835 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1386,67 +1386,61 @@ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) { } } -int rdbLoad(char *filename) { +/* Load an RDB file from the rio stream 'rdb'. On success C_OK is returned, + * otherwise C_ERR is returned and 'errno' is set accordingly. */ +int rdbLoadRio(rio *rdb) { uint64_t dbid; int type, rdbver; redisDb *db = server.db+0; char buf[1024]; long long expiretime, now = mstime(); - FILE *fp; - rio rdb; - if ((fp = fopen(filename,"r")) == NULL) return C_ERR; - - rioInitWithFile(&rdb,fp); - rdb.update_cksum = rdbLoadProgressCallback; - rdb.max_processing_chunk = server.loading_process_events_interval_bytes; - if (rioRead(&rdb,buf,9) == 0) goto eoferr; + rdb->update_cksum = rdbLoadProgressCallback; + rdb->max_processing_chunk = server.loading_process_events_interval_bytes; + if (rioRead(rdb,buf,9) == 0) goto eoferr; buf[9] = '\0'; if (memcmp(buf,"REDIS",5) != 0) { - fclose(fp); serverLog(LL_WARNING,"Wrong signature trying to load DB from file"); errno = EINVAL; return C_ERR; } rdbver = atoi(buf+5); if (rdbver < 1 || rdbver > RDB_VERSION) { - fclose(fp); serverLog(LL_WARNING,"Can't handle RDB format version %d",rdbver); errno = EINVAL; return C_ERR; } - startLoading(fp); while(1) { robj *key, *val; expiretime = -1; /* Read type. */ - if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; + if ((type = rdbLoadType(rdb)) == -1) goto eoferr; /* Handle special types. */ if (type == RDB_OPCODE_EXPIRETIME) { /* EXPIRETIME: load an expire associated with the next key * to load. Note that after loading an expire we need to * load the actual type, and continue. */ - if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr; + if ((expiretime = rdbLoadTime(rdb)) == -1) goto eoferr; /* We read the time so we need to read the object type again. */ - if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; + if ((type = rdbLoadType(rdb)) == -1) goto eoferr; /* the EXPIRETIME opcode specifies time in seconds, so convert * into milliseconds. */ expiretime *= 1000; } else if (type == RDB_OPCODE_EXPIRETIME_MS) { /* EXPIRETIME_MS: milliseconds precision expire times introduced * with RDB v3. Like EXPIRETIME but no with more precision. */ - if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr; + if ((expiretime = rdbLoadMillisecondTime(rdb)) == -1) goto eoferr; /* We read the time so we need to read the object type again. */ - if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; + if ((type = rdbLoadType(rdb)) == -1) goto eoferr; } else if (type == RDB_OPCODE_EOF) { /* EOF: End of file, exit the main loop. */ break; } else if (type == RDB_OPCODE_SELECTDB) { /* SELECTDB: Select the specified database. */ - if ((dbid = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) + if ((dbid = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr; if (dbid >= (unsigned)server.dbnum) { serverLog(LL_WARNING, @@ -1461,9 +1455,9 @@ int rdbLoad(char *filename) { /* RESIZEDB: Hint about the size of the keys in the currently * selected data base, in order to avoid useless rehashing. */ uint64_t db_size, expires_size; - if ((db_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) + if ((db_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr; - if ((expires_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) + if ((expires_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr; dictExpand(db->dict,db_size); dictExpand(db->expires,expires_size); @@ -1475,8 +1469,8 @@ int rdbLoad(char *filename) { * * An AUX field is composed of two strings: key and value. */ robj *auxkey, *auxval; - if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; - if ((auxval = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; + if ((auxkey = rdbLoadStringObject(rdb)) == NULL) goto eoferr; + if ((auxval = rdbLoadStringObject(rdb)) == NULL) goto eoferr; if (((char*)auxkey->ptr)[0] == '%') { /* All the fields with a name staring with '%' are considered @@ -1498,9 +1492,9 @@ int rdbLoad(char *filename) { } /* Read key */ - if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; + if ((key = rdbLoadStringObject(rdb)) == NULL) goto eoferr; /* Read value */ - if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr; + if ((val = rdbLoadObject(type,rdb)) == NULL) goto eoferr; /* Check if the key already expired. This function is used when loading * an RDB file from disk, either at startup, or when an RDB was * received from the master. In the latter case, the master is @@ -1521,9 +1515,9 @@ int rdbLoad(char *filename) { } /* Verify the checksum if RDB version is >= 5 */ if (rdbver >= 5 && server.rdb_checksum) { - uint64_t cksum, expected = rdb.cksum; + uint64_t cksum, expected = rdb->cksum; - if (rioRead(&rdb,&cksum,8) == 0) goto eoferr; + if (rioRead(rdb,&cksum,8) == 0) goto eoferr; memrev64ifbe(&cksum); if (cksum == 0) { serverLog(LL_WARNING,"RDB file was saved with checksum disabled: no check performed."); @@ -1532,9 +1526,6 @@ int rdbLoad(char *filename) { rdbExitReportCorruptRDB("RDB CRC error"); } } - - fclose(fp); - stopLoading(); return C_OK; eoferr: /* unexpected end of file is handled here with a fatal exit */ @@ -1543,6 +1534,24 @@ eoferr: /* unexpected end of file is handled here with a fatal exit */ return C_ERR; /* Just to avoid warning */ } +/* Like rdbLoadRio() but takes a filename instead of a rio stream. The + * filename is open for reading and a rio stream object created in order + * to do the actual loading. Moreover the ETA displayed in the INFO + * output is initialized and finalized. */ +int rdbLoad(char *filename) { + FILE *fp; + rio rdb; + int retval; + + if ((fp = fopen(filename,"r")) == NULL) return C_ERR; + startLoading(fp); + rioInitWithFile(&rdb,fp); + retval = rdbLoadRio(&rdb); + fclose(fp); + stopLoading(); + return retval; +} + /* A background saving child (BGSAVE) terminated its work. Handle this. * This function covers the case of actual BGSAVEs. */ void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) { From 982df150f805ecad019d6cb17d91c03f65c98664 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 11 Aug 2016 15:42:28 +0200 Subject: [PATCH 0099/1722] RDB AOF preamble: WIP 4 (Mixed RDB/AOF loading). --- src/aof.c | 40 +++++++++++++++++++++++++++++++++------- src/rdb.h | 1 + 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/aof.c b/src/aof.c index 104d8fe39..5523066b5 100644 --- a/src/aof.c +++ b/src/aof.c @@ -616,19 +616,23 @@ int loadAppendOnlyFile(char *filename) { struct redis_stat sb; int old_aof_state = server.aof_state; long loops = 0; - off_t valid_up_to = 0; /* Offset of the latest well-formed command loaded. */ - - if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) { - server.aof_current_size = 0; - fclose(fp); - return C_ERR; - } + off_t valid_up_to = 0; /* Offset of latest well-formed command loaded. */ if (fp == NULL) { serverLog(LL_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno)); exit(1); } + /* Handle a zero-length AOF file as a special case. An emtpy AOF file + * is a valid AOF because an empty server with AOF enabled will create + * a zero length file at startup, that will remain like that if no write + * operation is received. */ + if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) { + server.aof_current_size = 0; + fclose(fp); + return C_ERR; + } + /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI * to the same file we're about to read. */ server.aof_state = AOF_OFF; @@ -636,6 +640,28 @@ int loadAppendOnlyFile(char *filename) { fakeClient = createFakeClient(); startLoading(fp); + /* Check if this AOF file has an RDB preamble. In that case we need to + * load the RDB file and later continue loading the AOF tail. */ + char sig[5]; /* "REDIS" */ + if (fread(sig,1,5,fp) != 5 || memcmp(sig,"REDIS",5) != 0) { + /* No RDB preamble, seek back at 0 offset. */ + if (fseek(fp,0,SEEK_SET) == -1) goto readerr; + } else { + /* RDB preamble. Pass loading the RDB functions. */ + rio rdb; + + serverLog(LL_NOTICE,"Reading RDB preamble from AOF file..."); + if (fseek(fp,0,SEEK_SET) == -1) goto readerr; + rioInitWithFile(&rdb,fp); + if (rdbLoadRio(&rdb) != C_OK) { + serverLog(LL_WARNING,"Error reading the RDB preamble of the AOF file, AOF loading aborted"); + goto readerr; + } else { + serverLog(LL_NOTICE,"Reading the remaining AOF tail..."); + } + } + + /* Read the actual AOF file, in REPL format, command by command. */ while(1) { int argc, j; unsigned long len; diff --git a/src/rdb.h b/src/rdb.h index 2c9a99850..cd1d65392 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -134,5 +134,6 @@ ssize_t rdbSaveRawString(rio *rdb, unsigned char *s, size_t len); void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr); int rdbSaveBinaryDoubleValue(rio *rdb, double val); int rdbLoadBinaryDoubleValue(rio *rdb, double *val); +int rdbLoadRio(rio *rdb); #endif From 02a360eca0dc6d83738955a525c43c355dea3dfd Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 11 Aug 2016 19:53:01 +0200 Subject: [PATCH 0100/1722] Sentinel example config: warn about protected mode. --- sentinel.conf | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sentinel.conf b/sentinel.conf index 39d1044e2..0e1b266ed 100644 --- a/sentinel.conf +++ b/sentinel.conf @@ -1,5 +1,21 @@ # Example sentinel.conf +# *** IMPORTANT *** +# +# By default Sentinel will not be reachable from interfaces different than +# localhost, either use the 'bind' directive to bind to a list of network +# interfaces, or disable protected mode with "protected-mode no" by +# adding it to this configuration file. +# +# Before doing that MAKE SURE the instance is protected from the outside +# world via firewalling or other means. +# +# For example you may use one of the following: +# +# bind 127.0.0.1 192.168.1.1 +# +# protected-mode no + # port # The port that this sentinel instance will run on port 26379 @@ -178,4 +194,3 @@ sentinel failover-timeout mymaster 180000 # # sentinel client-reconfig-script mymaster /var/redis/reconfig.sh - From cfe0810936bf66b724e92d33abde906c9ba7bb3e Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 24 Aug 2016 15:33:39 +0200 Subject: [PATCH 0101/1722] Document RDB preamble in AOF rewrites in redis.conf. --- redis.conf | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/redis.conf b/redis.conf index b9217fdb4..a7b7f3a97 100644 --- a/redis.conf +++ b/redis.conf @@ -755,6 +755,20 @@ auto-aof-rewrite-min-size 64mb # will be found. aof-load-truncated yes +# When rewriting the AOF file, Redis is able to use an RDB preamble in the +# AOF file for faster rewrites and recoveries. When this option is turned +# on the rewritten AOF file is composed of two different stanzas: +# +# [RDB file][AOF tail] +# +# When loading Redis recognizes that the AOF file starts with the "REDIS" +# string and loads the prefixed RDB file, and continues loading the AOF +# tail. +# +# This is currently turned off by default in order to avoid the surprise +# of a format change, but will at some point be used as the default. +aof-use-rdb-preamble no + ################################ LUA SCRIPTING ############################### # Max execution time of a Lua script in milliseconds. From cf5154f72ef909a2a0923a4092dd2875f170bd9a Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 24 Aug 2016 15:39:39 +0200 Subject: [PATCH 0102/1722] RDB AOF preamble: test it in the aofrw unit. --- tests/unit/aofrw.tcl | 107 ++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/tests/unit/aofrw.tcl b/tests/unit/aofrw.tcl index 4fdbdc6c6..c5430eedc 100644 --- a/tests/unit/aofrw.tcl +++ b/tests/unit/aofrw.tcl @@ -4,60 +4,63 @@ start_server {tags {"aofrw"}} { r config set auto-aof-rewrite-percentage 0 ; # Disable auto-rewrite. waitForBgrewriteaof r - test {AOF rewrite during write load} { - # Start a write load for 10 seconds - set master [srv 0 client] - set master_host [srv 0 host] - set master_port [srv 0 port] - set load_handle0 [start_write_load $master_host $master_port 10] - set load_handle1 [start_write_load $master_host $master_port 10] - set load_handle2 [start_write_load $master_host $master_port 10] - set load_handle3 [start_write_load $master_host $master_port 10] - set load_handle4 [start_write_load $master_host $master_port 10] + foreach rdbpre {yes no} { + r config set aof-use-rdb-preamble $rdbpre + test "AOF rewrite during write load: RDB preamble=$rdbpre" { + # Start a write load for 10 seconds + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + set load_handle0 [start_write_load $master_host $master_port 10] + set load_handle1 [start_write_load $master_host $master_port 10] + set load_handle2 [start_write_load $master_host $master_port 10] + set load_handle3 [start_write_load $master_host $master_port 10] + set load_handle4 [start_write_load $master_host $master_port 10] - # Make sure the instance is really receiving data - wait_for_condition 50 100 { - [r dbsize] > 0 - } else { - fail "No write load detected." + # Make sure the instance is really receiving data + wait_for_condition 50 100 { + [r dbsize] > 0 + } else { + fail "No write load detected." + } + + # After 3 seconds, start a rewrite, while the write load is still + # active. + after 3000 + r bgrewriteaof + waitForBgrewriteaof r + + # Let it run a bit more so that we'll append some data to the new + # AOF. + after 1000 + + # Stop the processes generating the load if they are still active + stop_write_load $load_handle0 + stop_write_load $load_handle1 + stop_write_load $load_handle2 + stop_write_load $load_handle3 + stop_write_load $load_handle4 + + # Make sure that we remain the only connected client. + # This step is needed to make sure there are no pending writes + # that will be processed between the two "debug digest" calls. + wait_for_condition 50 100 { + [llength [split [string trim [r client list]] "\n"]] == 1 + } else { + puts [r client list] + fail "Clients generating loads are not disconnecting" + } + + # Get the data set digest + set d1 [r debug digest] + + # Load the AOF + r debug loadaof + set d2 [r debug digest] + + # Make sure they are the same + assert {$d1 eq $d2} } - - # After 3 seconds, start a rewrite, while the write load is still - # active. - after 3000 - r bgrewriteaof - waitForBgrewriteaof r - - # Let it run a bit more so that we'll append some data to the new - # AOF. - after 1000 - - # Stop the processes generating the load if they are still active - stop_write_load $load_handle0 - stop_write_load $load_handle1 - stop_write_load $load_handle2 - stop_write_load $load_handle3 - stop_write_load $load_handle4 - - # Make sure that we remain the only connected client. - # This step is needed to make sure there are no pending writes - # that will be processed between the two "debug digest" calls. - wait_for_condition 50 100 { - [llength [split [string trim [r client list]] "\n"]] == 1 - } else { - puts [r client list] - fail "Clients generating loads are not disconnecting" - } - - # Get the data set digest - set d1 [r debug digest] - - # Load the AOF - r debug loadaof - set d2 [r debug digest] - - # Make sure they are the same - assert {$d1 eq $d2} } } From 982a34d0677a9565655a036c487657e1c4d5c21c Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 1 Sep 2016 11:04:22 +0200 Subject: [PATCH 0103/1722] sds: don't check for impossible string size in 32 bit systems. --- src/sds.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/sds.c b/src/sds.c index 26e90a6db..eafa13c29 100644 --- a/src/sds.c +++ b/src/sds.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "sds.h" #include "sdsalloc.h" @@ -61,8 +62,10 @@ static inline char sdsReqType(size_t string_size) { return SDS_TYPE_8; if (string_size < 1<<16) return SDS_TYPE_16; +#if (LONG_MAX == LLONG_MAX) if (string_size < 1ll<<32) return SDS_TYPE_32; +#endif return SDS_TYPE_64; } From b462b22e598df4dcd2b5734e803c0f9d70365320 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 1 Sep 2016 11:08:44 +0200 Subject: [PATCH 0104/1722] Fix rdb.c var types when calling rdbLoadLen(). Technically as soon as Redis 64 bit gets proper support for loading collections and/or DBs with more than 2^32 elements, the 32 bit version should be modified in order to check if what we read from rdbLoadLen() overflows. This would only apply to huge RDB files created with a 64 bit instance and later loaded into a 32 bit instance. --- src/rdb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 859297943..58cde1f28 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1052,7 +1052,7 @@ void rdbRemoveTempFile(pid_t childpid) { * On success a newly allocated object is returned, otherwise NULL. */ robj *rdbLoadObject(int rdbtype, rio *rdb) { robj *o = NULL, *ele, *dec; - size_t len; + uint64_t len; unsigned int i; if (rdbtype == RDB_TYPE_STRING) { @@ -1119,7 +1119,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { } } else if (rdbtype == RDB_TYPE_ZSET_2 || rdbtype == RDB_TYPE_ZSET) { /* Read list/set value. */ - size_t zsetlen; + uint64_t zsetlen; size_t maxelelen = 0; zset *zs; @@ -1154,7 +1154,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { maxelelen <= server.zset_max_ziplist_value) zsetConvert(o,OBJ_ENCODING_ZIPLIST); } else if (rdbtype == RDB_TYPE_HASH) { - size_t len; + uint64_t len; int ret; sds field, value; From ab03414ea5137d0e8c105c944854f2db7d7c790d Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Sep 2016 10:32:57 +0200 Subject: [PATCH 0105/1722] dict.c benchmark. --- src/Makefile | 5 ++++- src/dict.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 325402ec2..6bd8d8d66 100644 --- a/src/Makefile +++ b/src/Makefile @@ -203,6 +203,9 @@ $(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ) $(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ) $(REDIS_LD) -o $@ $^ $(FINAL_LIBS) +dict-benchmark: dict.c zmalloc.c sds.c + $(REDIS_CC) $(FINAL_CFLAGS) dict.c zmalloc.c sds.c -D DICT_BENCHMARK_MAIN -o dict-benchmark + # Because the jemalloc.h header is generated as a part of the jemalloc build, # building it should complete before building any other object. Instead of # depending on a single artifact, build all dependencies first. @@ -210,7 +213,7 @@ $(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ) $(REDIS_CC) -c $< clean: - rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html Makefile.dep + rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html Makefile.dep dict-benchmark .PHONY: clean diff --git a/src/dict.c b/src/dict.c index 887a0f65e..39432956b 100644 --- a/src/dict.c +++ b/src/dict.c @@ -45,7 +45,11 @@ #include "dict.h" #include "zmalloc.h" +#ifndef DICT_BENCHMARK_MAIN #include "redisassert.h" +#else +#include +#endif /* Using dictEnableResize() / dictDisableResize() we make possible to * enable/disable resizing of the hash table as needed. This is very important @@ -1083,3 +1087,55 @@ void dictGetStats(char *buf, size_t bufsize, dict *d) { /* Make sure there is a NULL term at the end. */ if (orig_bufsize) orig_buf[orig_bufsize-1] = '\0'; } + +/* ------------------------------- Benchmark ---------------------------------*/ + +#ifdef DICT_BENCHMARK_MAIN + +#include "sds.h" + +unsigned int hashCallback(const void *key) { + return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +int compareCallback(void *privdata, const void *key1, const void *key2) { + int l1,l2; + DICT_NOTUSED(privdata); + + l1 = sdslen((sds)key1); + l2 = sdslen((sds)key2); + if (l1 != l2) return 0; + return memcmp(key1, key2, l1) == 0; +} + +void freeCallback(void *privdata, void *val) { + DICT_NOTUSED(privdata); + + sdsfree(val); +} + +dictType BenchmarkDictType = { + hashCallback, + NULL, + NULL, + compareCallback, + freeCallback, + NULL +}; + +int main(void) { + long j; + long hits = 0, misses = 0; + long long start, elapsed; + dict *dict = dictCreate(&BenchmarkDictType,NULL); + + start = timeInMilliseconds(); + for (j = 0; j < 5000000; j++) { + int retval = dictAdd(dict,sdsfromlonglong(j),(void*)j); + assert(retval == DICT_OK); + } + elapsed = timeInMilliseconds()-start; + printf("Inserting 5M items: %lld ms\n", elapsed); + assert(dictSize(dict) == 5000000); +} +#endif From bd8a49019495e698645bb9ba67c4f4743815f7e8 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Sep 2016 10:44:19 +0200 Subject: [PATCH 0106/1722] dict.c benchmark: take optional count argument. --- src/dict.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/dict.c b/src/dict.c index 39432956b..cbd47527f 100644 --- a/src/dict.c +++ b/src/dict.c @@ -1123,19 +1123,27 @@ dictType BenchmarkDictType = { NULL }; -int main(void) { +/* dict-benchmark [count] */ +int main(int argc, char **argv) { long j; long hits = 0, misses = 0; long long start, elapsed; dict *dict = dictCreate(&BenchmarkDictType,NULL); + long count = 0; + + if (argc == 2) { + count = strtol(argv[1],NULL,10); + } else { + count = 5000000; + } start = timeInMilliseconds(); - for (j = 0; j < 5000000; j++) { + for (j = 0; j < count; j++) { int retval = dictAdd(dict,sdsfromlonglong(j),(void*)j); assert(retval == DICT_OK); } elapsed = timeInMilliseconds()-start; printf("Inserting 5M items: %lld ms\n", elapsed); - assert(dictSize(dict) == 5000000); + assert((long)dictSize(dict) == count); } #endif From b97146827db9f448448d34cd74c64c8b56ab68e9 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Sep 2016 10:53:24 +0200 Subject: [PATCH 0107/1722] dict.c benchmark improvements. --- src/dict.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/dict.c b/src/dict.c index cbd47527f..f1cafe2e2 100644 --- a/src/dict.c +++ b/src/dict.c @@ -1123,10 +1123,15 @@ dictType BenchmarkDictType = { NULL }; +#define start_benchmark() start = timeInMilliseconds() +#define end_benchmark(msg) do { \ + elapsed = timeInMilliseconds()-start; \ + printf(msg ": %ld items in %lld ms\n", count, elapsed); \ +} while(0); + /* dict-benchmark [count] */ int main(int argc, char **argv) { long j; - long hits = 0, misses = 0; long long start, elapsed; dict *dict = dictCreate(&BenchmarkDictType,NULL); long count = 0; @@ -1137,13 +1142,31 @@ int main(int argc, char **argv) { count = 5000000; } - start = timeInMilliseconds(); + start_benchmark(); for (j = 0; j < count; j++) { int retval = dictAdd(dict,sdsfromlonglong(j),(void*)j); assert(retval == DICT_OK); } - elapsed = timeInMilliseconds()-start; - printf("Inserting 5M items: %lld ms\n", elapsed); + end_benchmark("Inserting"); assert((long)dictSize(dict) == count); + + start_benchmark(); + for (j = 0; j < count; j++) { + sds key = sdsfromlonglong(rand() % count); + dictEntry *de = dictFind(dict,key); + assert(de != NULL); + sdsfree(key); + } + end_benchmark("Accessing existing"); + + start_benchmark(); + for (j = 0; j < count; j++) { + sds key = sdsfromlonglong(rand() % count); + key[0] = 'X'; + dictEntry *de = dictFind(dict,key); + assert(de == NULL); + sdsfree(key); + } + end_benchmark("Accessing missing"); } #endif From a65fd55d3f5a4fde9ab3ab872995c76d4276bef0 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Sep 2016 11:06:03 +0200 Subject: [PATCH 0108/1722] dict.c benchmark: finish rehashing before testing lookups. --- src/dict.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/dict.c b/src/dict.c index f1cafe2e2..ae9516ffa 100644 --- a/src/dict.c +++ b/src/dict.c @@ -1150,6 +1150,11 @@ int main(int argc, char **argv) { end_benchmark("Inserting"); assert((long)dictSize(dict) == count); + /* Wait for rehashing. */ + while (dictIsRehashing(dict)) { + dictRehashMilliseconds(dict,100); + } + start_benchmark(); for (j = 0; j < count; j++) { sds key = sdsfromlonglong(rand() % count); From 259a9091683e94c15e916501497a7a668f3105c2 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Sep 2016 12:34:37 +0200 Subject: [PATCH 0109/1722] dict.c benchmark: mixed del/insert benchmark. --- src/dict.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/dict.c b/src/dict.c index ae9516ffa..b15ae4876 100644 --- a/src/dict.c +++ b/src/dict.c @@ -1173,5 +1173,16 @@ int main(int argc, char **argv) { sdsfree(key); } end_benchmark("Accessing missing"); + + start_benchmark(); + for (j = 0; j < count; j++) { + sds key = sdsfromlonglong(j); + int retval = dictDelete(dict,key); + assert(retval == DICT_OK); + key[0] += 17; /* Change first number to letter. */ + retval = dictAdd(dict,key,(void*)j); + assert(retval == DICT_OK); + } + end_benchmark("Removing and adding"); } #endif From 229466fdf1ddfcceb833af59d71d32dbbd326f6a Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Sep 2016 15:27:25 +0200 Subject: [PATCH 0110/1722] dict.c benchmark minor improvements. --- src/dict.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/dict.c b/src/dict.c index b15ae4876..e37b659e6 100644 --- a/src/dict.c +++ b/src/dict.c @@ -1155,6 +1155,24 @@ int main(int argc, char **argv) { dictRehashMilliseconds(dict,100); } + start_benchmark(); + for (j = 0; j < count; j++) { + sds key = sdsfromlonglong(j); + dictEntry *de = dictFind(dict,key); + assert(de != NULL); + sdsfree(key); + } + end_benchmark("Linear access of existing elements"); + + start_benchmark(); + for (j = 0; j < count; j++) { + sds key = sdsfromlonglong(j); + dictEntry *de = dictFind(dict,key); + assert(de != NULL); + sdsfree(key); + } + end_benchmark("Linear access of existing elements (2nd round)"); + start_benchmark(); for (j = 0; j < count; j++) { sds key = sdsfromlonglong(rand() % count); @@ -1162,7 +1180,7 @@ int main(int argc, char **argv) { assert(de != NULL); sdsfree(key); } - end_benchmark("Accessing existing"); + end_benchmark("Random access of existing elements"); start_benchmark(); for (j = 0; j < count; j++) { From 7698b97ca54a8e98d8481f94b5371a106072a61c Mon Sep 17 00:00:00 2001 From: oranagra Date: Mon, 4 Jul 2016 23:28:32 +0300 Subject: [PATCH 0111/1722] crash log - add hex dump of function code --- src/debug.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/debug.c b/src/debug.c index f3e109479..3d88819f1 100644 --- a/src/debug.c +++ b/src/debug.c @@ -39,6 +39,9 @@ #include #include #include "bio.h" +#include +#define _GNU_SOURCE +#include #endif /* HAVE_BACKTRACE */ #ifdef __CYGWIN__ @@ -669,6 +672,8 @@ static void *getMcontextEip(ucontext_t *uc) { return (void*) uc->uc_mcontext.gregs[16]; /* Linux 64 */ #elif defined(__ia64__) /* Linux IA64 */ return (void*) uc->uc_mcontext.sc_ip; + #elif defined(__arm__) /* Linux ARM */ + return (void*) uc->uc_mcontext.arm_pc; #endif #else return NULL; @@ -1036,6 +1041,23 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) { /* free(messages); Don't call free() with possibly corrupted memory. */ if (server.daemonize && server.supervised == 0) unlink(server.pidfile); + if (eip != NULL) { + Dl_info info; + if (dladdr(eip, &info) != 0) + { + serverLog(LL_WARNING, + "symbol: %s (base %p), in module: %s (base: %p)", + info.dli_sname, info.dli_saddr, info.dli_fname, info.dli_fbase); + size_t len = (long)eip - (long)info.dli_saddr; + long sz = sysconf(_SC_PAGESIZE); + if (len < 1<<13) { /* we don't have functions over 8k (verified) */ + long end = ((long)eip + sz) & ~(sz-1); /* round up to page boundary */ + len = end - (long)info.dli_saddr; + serverLogHexDump(LL_WARNING, "dump of function", info.dli_saddr ,len); + } + } + } + /* Make sure we exit with the right signal at the end. So for instance * the core will be dumped if enabled. */ sigemptyset (&act.sa_mask); From 3e4e2fd08a772d5e24e3fdde642bc0ba0ae28d02 Mon Sep 17 00:00:00 2001 From: wyx Date: Fri, 9 Sep 2016 10:22:57 +0800 Subject: [PATCH 0112/1722] fix memory error on module unload --- src/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index e0bc417d6..feab67dfb 100644 --- a/src/module.c +++ b/src/module.c @@ -3085,7 +3085,7 @@ int moduleUnload(sds name) { /* Remove from list of modules. */ serverLog(LL_NOTICE,"Module %s unloaded",module->name); - dictDelete(modules,module->name); + dictDeleteNoFree(modules,module->name); moduleFreeModuleStructure(module); return REDISMODULE_OK; From de5ac381795a6c5b1ab4f1ef937a9b56baf539e4 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 9 Sep 2016 10:59:29 +0200 Subject: [PATCH 0113/1722] crash log - improve code dump with more info and called symbols. --- src/debug.c | 79 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/src/debug.c b/src/debug.c index 3d88819f1..82638f832 100644 --- a/src/debug.c +++ b/src/debug.c @@ -975,6 +975,32 @@ int memtest_test_linux_anonymous_maps(void) { } #endif +/* Scans the (assumed) x86 code starting at addr, for a max of `len` + * bytes, searching for E8 (callq) opcodes, and dumping the symbols + * and the call offset if they appear to be valid. */ +void dumpX86Calls(void *addr, size_t len) { + size_t j; + unsigned char *p = addr; + Dl_info info; + /* Hash table to best-effort avoid printing the same symbol + * multiple times. */ + unsigned long ht[256] = {0}; + + if (len < 5) return; + for (j = 0; j < len-4; j++) { + if (p[j] != 0xE8) continue; /* Not an E8 CALL opcode. */ + unsigned long target = (unsigned long)addr+j+5; + target += *((int32_t*)(p+j+1)); + if (dladdr((void*)target, &info) != 0 && info.dli_sname != NULL) { + if (ht[target&0xff] != target) { + printf("Function at 0x%lx is %s\n",target,info.dli_sname); + ht[target&0xff] = target; + } + j += 4; /* Skip the 32 bit immediate. */ + } + } +} + void sigsegvHandler(int sig, siginfo_t *info, void *secret) { ucontext_t *uc = (ucontext_t*) secret; void *eip = getMcontextEip(uc); @@ -1025,39 +1051,52 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) { bioKillThreads(); if (memtest_test_linux_anonymous_maps()) { serverLogRaw(LL_WARNING|LL_RAW, - "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!"); + "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!\n"); } else { serverLogRaw(LL_WARNING|LL_RAW, - "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible."); + "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible.\n"); } #endif + if (eip != NULL) { + Dl_info info; + if (dladdr(eip, &info) != 0) { + serverLog(LL_WARNING|LL_RAW, + "\n------ DUMPING CODE AROUND EIP ------\n" + "Symbol: %s (base: %p)\n" + "Module: %s (base %p)\n" + "$ xxd -r -p /tmp/dump.hex /tmp/dump.bin\n" + "$ objdump --adjust-vma=%p -D -b binary -m i386:x86-64 /tmp/dump.bin\n" + "------\n", + info.dli_sname, info.dli_saddr, info.dli_fname, info.dli_fbase, + info.dli_saddr); + size_t len = (long)eip - (long)info.dli_saddr; + unsigned long sz = sysconf(_SC_PAGESIZE); + if (len < 1<<13) { /* we don't have functions over 8k (verified) */ + /* Find the address of the next page, which is our "safety" + * limit when dumping. Then try to dump just 128 bytes more + * than EIP if there is room, or stop sooner. */ + unsigned long next = ((unsigned long)eip + sz) & ~(sz-1); + unsigned long end = (unsigned long)eip + 128; + if (end > next) end = next; + len = end - (unsigned long)info.dli_saddr; + serverLogHexDump(LL_WARNING, "dump of function", + info.dli_saddr ,len); + dumpX86Calls(info.dli_saddr,len); + } + } + } + serverLogRaw(LL_WARNING|LL_RAW, "\n=== REDIS BUG REPORT END. Make sure to include from START to END. ===\n\n" " Please report the crash by opening an issue on github:\n\n" " http://github.com/antirez/redis/issues\n\n" " Suspect RAM error? Use redis-server --test-memory to verify it.\n\n" ); + /* free(messages); Don't call free() with possibly corrupted memory. */ if (server.daemonize && server.supervised == 0) unlink(server.pidfile); - if (eip != NULL) { - Dl_info info; - if (dladdr(eip, &info) != 0) - { - serverLog(LL_WARNING, - "symbol: %s (base %p), in module: %s (base: %p)", - info.dli_sname, info.dli_saddr, info.dli_fname, info.dli_fbase); - size_t len = (long)eip - (long)info.dli_saddr; - long sz = sysconf(_SC_PAGESIZE); - if (len < 1<<13) { /* we don't have functions over 8k (verified) */ - long end = ((long)eip + sz) & ~(sz-1); /* round up to page boundary */ - len = end - (long)info.dli_saddr; - serverLogHexDump(LL_WARNING, "dump of function", info.dli_saddr ,len); - } - } - } - /* Make sure we exit with the right signal at the end. So for instance * the core will be dumped if enabled. */ sigemptyset (&act.sa_mask); @@ -1075,7 +1114,7 @@ void serverLogHexDump(int level, char *descr, void *value, size_t len) { unsigned char *v = value; char charset[] = "0123456789abcdef"; - serverLog(level,"%s (hexdump):", descr); + serverLog(level,"%s (hexdump of %zu bytes):", descr, len); b = buf; while(len) { b[0] = charset[(*v)>>4]; From fef4253c313fd71b2d52911efcda2f54f0fc7378 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 9 Sep 2016 11:15:10 +0200 Subject: [PATCH 0114/1722] debug.c: no need to define _GNU_SOURCE, is defined in fmacros.h. --- src/debug.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/debug.c b/src/debug.c index 82638f832..6f0b5e702 100644 --- a/src/debug.c +++ b/src/debug.c @@ -40,7 +40,6 @@ #include #include "bio.h" #include -#define _GNU_SOURCE #include #endif /* HAVE_BACKTRACE */ From c7dbd7d6c5092f41a8cd5d453d944b88a3811807 Mon Sep 17 00:00:00 2001 From: oranagra Date: Sun, 21 Aug 2016 20:39:15 +0300 Subject: [PATCH 0115/1722] fix tcp binding when IPv6 is unsupported --- src/anet.c | 2 +- src/server.c | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/anet.c b/src/anet.c index 1728f3eb9..ef1711d06 100644 --- a/src/anet.c +++ b/src/anet.c @@ -486,7 +486,7 @@ static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backl goto end; } if (p == NULL) { - anetSetError(err, "unable to bind socket"); + anetSetError(err, "unable to bind socket, errno: %d", errno); goto error; } diff --git a/src/server.c b/src/server.c index a77582592..fe8364d00 100644 --- a/src/server.c +++ b/src/server.c @@ -1636,6 +1636,7 @@ int listenToPort(int port, int *fds, int *count) { if (server.bindaddr_count == 0) server.bindaddr[0] = NULL; for (j = 0; j < server.bindaddr_count || j == 0; j++) { if (server.bindaddr[j] == NULL) { + int unsupported = 0; /* Bind * for both IPv6 and IPv4, we enter here only if * server.bindaddr_count == 0. */ fds[*count] = anetTcp6Server(server.neterr,port,NULL, @@ -1643,19 +1644,22 @@ int listenToPort(int port, int *fds, int *count) { if (fds[*count] != ANET_ERR) { anetNonBlock(NULL,fds[*count]); (*count)++; + } else if (errno == EAFNOSUPPORT) + unsupported++; - /* Bind the IPv4 address as well. */ - fds[*count] = anetTcpServer(server.neterr,port,NULL, - server.tcp_backlog); - if (fds[*count] != ANET_ERR) { - anetNonBlock(NULL,fds[*count]); - (*count)++; - } - } + /* Bind the IPv4 address as well. */ + fds[*count] = anetTcpServer(server.neterr,port,NULL, + server.tcp_backlog); + if (fds[*count] != ANET_ERR) { + anetNonBlock(NULL,fds[*count]); + (*count)++; + } else if (errno == EAFNOSUPPORT) + unsupported++; /* Exit the loop if we were able to bind * on IPv4 and IPv6, + * or if one is unsupported by th OS. * otherwise fds[*count] will be ANET_ERR and we'll print an * error and return to the caller with an error. */ - if (*count == 2) break; + if (*count + unsupported == 2) break; } else if (strchr(server.bindaddr[j],':')) { /* Bind IPv6 address. */ fds[*count] = anetTcp6Server(server.neterr,port,server.bindaddr[j], From 9edd074a1d86c0bed718282ce61a70b0b08a3940 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 9 Sep 2016 14:59:48 +0200 Subject: [PATCH 0116/1722] fix the fix for the TCP binding. This commit attempts to fix a problem with PR #3467. --- src/server.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/server.c b/src/server.c index fe8364d00..c737a470f 100644 --- a/src/server.c +++ b/src/server.c @@ -1644,19 +1644,24 @@ int listenToPort(int port, int *fds, int *count) { if (fds[*count] != ANET_ERR) { anetNonBlock(NULL,fds[*count]); (*count)++; - } else if (errno == EAFNOSUPPORT) + } else if (errno == EAFNOSUPPORT) { unsupported++; + serverLog(LL_WARNING,"Not listening to IPv6: unsupproted"); + } - /* Bind the IPv4 address as well. */ - fds[*count] = anetTcpServer(server.neterr,port,NULL, - server.tcp_backlog); - if (fds[*count] != ANET_ERR) { - anetNonBlock(NULL,fds[*count]); - (*count)++; - } else if (errno == EAFNOSUPPORT) - unsupported++; + if (*count == 1 || unsupported) { + /* Bind the IPv4 address as well. */ + fds[*count] = anetTcpServer(server.neterr,port,NULL, + server.tcp_backlog); + if (fds[*count] != ANET_ERR) { + anetNonBlock(NULL,fds[*count]); + (*count)++; + } else if (errno == EAFNOSUPPORT) { + unsupported++; + serverLog(LL_WARNING,"Not listening to IPv4: unsupproted"); + } + } /* Exit the loop if we were able to bind * on IPv4 and IPv6, - * or if one is unsupported by th OS. * otherwise fds[*count] will be ANET_ERR and we'll print an * error and return to the caller with an error. */ if (*count + unsupported == 2) break; From c72557349af0a3920829e14ca16def06e4067f85 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 9 Sep 2016 16:01:29 +0200 Subject: [PATCH 0117/1722] Example modules: Add C99 standard to cflags. --- src/modules/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/Makefile b/src/modules/Makefile index 1a52d65b0..3cd51023f 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -4,10 +4,10 @@ uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') # Compile flags for linux / osx ifeq ($(uname_S),Linux) - SHOBJ_CFLAGS ?= -fno-common -g -ggdb + SHOBJ_CFLAGS ?= -fno-common -g -ggdb -std=c99 SHOBJ_LDFLAGS ?= -shared else - SHOBJ_CFLAGS ?= -dynamic -fno-common -g -ggdb + SHOBJ_CFLAGS ?= -dynamic -fno-common -g -ggdb -std=c99 SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup endif From 1f3cd7087fa1fd982d3b6683320de77ce9c14cbe Mon Sep 17 00:00:00 2001 From: Alexander Zhukov Date: Mon, 12 Sep 2016 13:53:47 +0300 Subject: [PATCH 0118/1722] Fix an article usage --- redis.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redis.conf b/redis.conf index a7b7f3a97..53d519658 100644 --- a/redis.conf +++ b/redis.conf @@ -818,7 +818,7 @@ lua-time-limit 5000 # A slave of a failing master will avoid to start a failover if its data # looks too old. # -# There is no simple way for a slave to actually have a exact measure of +# There is no simple way for a slave to actually have an exact measure of # its "data age", so the following two checks are performed: # # 1) If there are multiple slaves able to failover, they exchange messages From 1ef16debfbc5bdf169208d2a98404007ced45d6a Mon Sep 17 00:00:00 2001 From: oranagra Date: Mon, 9 May 2016 18:01:09 +0300 Subject: [PATCH 0119/1722] Optimize repeated keyname hashing. (Change cherry-picked and modified by @antirez from a larger commit provided by @oranagra in PR #3223). --- src/dict.c | 75 ++++++++++++++++++++++++-------------------------- src/dict.h | 16 +++++------ src/sentinel.c | 10 +++---- src/t_set.c | 2 +- src/t_zset.c | 12 ++++---- 5 files changed, 56 insertions(+), 59 deletions(-) diff --git a/src/dict.c b/src/dict.c index e37b659e6..2d9ac35a2 100644 --- a/src/dict.c +++ b/src/dict.c @@ -66,23 +66,11 @@ static unsigned int dict_force_resize_ratio = 5; static int _dictExpandIfNeeded(dict *ht); static unsigned long _dictNextPower(unsigned long size); -static int _dictKeyIndex(dict *ht, const void *key); +static int _dictKeyIndex(dict *ht, const void *key, unsigned int hash, dictEntry **existing); static int _dictInit(dict *ht, dictType *type, void *privDataPtr); /* -------------------------- hash functions -------------------------------- */ -/* Thomas Wang's 32 bit Mix Function */ -unsigned int dictIntHashFunction(unsigned int key) -{ - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; -} - static uint32_t dict_hash_function_seed = 5381; void dictSetHashFunctionSeed(uint32_t seed) { @@ -325,29 +313,32 @@ static void _dictRehashStep(dict *d) { /* Add an element to the target hash table */ int dictAdd(dict *d, void *key, void *val) { - dictEntry *entry = dictAddRaw(d,key); + dictEntry *entry = dictAddRaw(d,key,NULL); if (!entry) return DICT_ERR; dictSetVal(d, entry, val); return DICT_OK; } -/* Low level add. This function adds the entry but instead of setting - * a value returns the dictEntry structure to the user, that will make - * sure to fill the value field as he wishes. +/* Low level add or find: + * This function adds the entry but instead of setting a value returns the + * dictEntry structure to the user, that will make sure to fill the value + * field as he wishes. * * This function is also directly exposed to the user API to be called * mainly in order to store non-pointers inside the hash value, example: * - * entry = dictAddRaw(dict,mykey); + * entry = dictAddRaw(dict,mykey,NULL); * if (entry != NULL) dictSetSignedIntegerVal(entry,1000); * * Return values: * - * If key already exists NULL is returned. + * If key already exists NULL is returned, and "*existing" is populated + * with the existing entry if existing is not NULL. + * * If key was added, the hash entry is returned to be manipulated by the caller. */ -dictEntry *dictAddRaw(dict *d, void *key) +dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) { int index; dictEntry *entry; @@ -357,7 +348,7 @@ dictEntry *dictAddRaw(dict *d, void *key) /* Get the index of the new element, or -1 if * the element already exists. */ - if ((index = _dictKeyIndex(d, key)) == -1) + if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1) return NULL; /* Allocate the memory and store the new entry. @@ -375,41 +366,45 @@ dictEntry *dictAddRaw(dict *d, void *key) return entry; } -/* Add an element, discarding the old if the key already exists. +/* Add or Overwrite: + * Add an element, discarding the old value if the key already exists. * Return 1 if the key was added from scratch, 0 if there was already an * element with such key and dictReplace() just performed a value update * operation. */ int dictReplace(dict *d, void *key, void *val) { - dictEntry *entry, auxentry; + dictEntry *entry, *existing, auxentry; /* Try to add the element. If the key * does not exists dictAdd will suceed. */ - if (dictAdd(d, key, val) == DICT_OK) + entry = dictAddRaw(d,key,&existing); + if (entry) { + dictSetVal(d, entry, val); return 1; - /* It already exists, get the entry */ - entry = dictFind(d, key); + } + /* Set the new value and free the old one. Note that it is important * to do that in this order, as the value may just be exactly the same * as the previous one. In this context, think to reference counting, * you want to increment (set), and then decrement (free), and not the * reverse. */ - auxentry = *entry; - dictSetVal(d, entry, val); + auxentry = *existing; + dictSetVal(d, existing, val); dictFreeVal(d, &auxentry); return 0; } -/* dictReplaceRaw() is simply a version of dictAddRaw() that always +/* Add or Find: + * dictReplaceRaw() is simply a version of dictAddRaw() that always * returns the hash entry of the specified key, even if the key already * exists and can't be added (in that case the entry of the already * existing key is returned.) * * See dictAddRaw() for more information. */ dictEntry *dictReplaceRaw(dict *d, void *key) { - dictEntry *entry = dictFind(d,key); - - return entry ? entry : dictAddRaw(d,key); + dictEntry *entry, *existing; + entry = dictAddRaw(d,key,&existing); + return entry ? entry : existing; } /* Search and remove an element */ @@ -966,27 +961,29 @@ static unsigned long _dictNextPower(unsigned long size) /* Returns the index of a free slot that can be populated with * a hash entry for the given 'key'. - * If the key already exists, -1 is returned. + * If the key already exists, -1 is returned + * and the optional output parameter may be filled. * * Note that if we are in the process of rehashing the hash table, the * index is always returned in the context of the second (new) hash table. */ -static int _dictKeyIndex(dict *d, const void *key) +static int _dictKeyIndex(dict *d, const void *key, unsigned int hash, dictEntry **existing) { - unsigned int h, idx, table; + unsigned int idx, table; dictEntry *he; + if (existing) *existing = NULL; /* Expand the hash table if needed */ if (_dictExpandIfNeeded(d) == DICT_ERR) return -1; - /* Compute the key hash value */ - h = dictHashKey(d, key); for (table = 0; table <= 1; table++) { - idx = h & d->ht[table].sizemask; + idx = hash & d->ht[table].sizemask; /* Search if this slot does not already contain the given key */ he = d->ht[table].table[idx]; while(he) { - if (key==he->key || dictCompareKeys(d, key, he->key)) + if (key==he->key || dictCompareKeys(d, key, he->key)) { + if (existing) *existing = he; return -1; + } he = he->next; } if (!dictIsRehashing(d)) break; diff --git a/src/dict.h b/src/dict.h index 967a238b6..739de68ef 100644 --- a/src/dict.h +++ b/src/dict.h @@ -106,19 +106,19 @@ typedef void (dictScanFunction)(void *privdata, const dictEntry *de); #define dictSetVal(d, entry, _val_) do { \ if ((d)->type->valDup) \ - entry->v.val = (d)->type->valDup((d)->privdata, _val_); \ + (entry)->v.val = (d)->type->valDup((d)->privdata, _val_); \ else \ - entry->v.val = (_val_); \ + (entry)->v.val = (_val_); \ } while(0) #define dictSetSignedIntegerVal(entry, _val_) \ - do { entry->v.s64 = _val_; } while(0) + do { (entry)->v.s64 = _val_; } while(0) #define dictSetUnsignedIntegerVal(entry, _val_) \ - do { entry->v.u64 = _val_; } while(0) + do { (entry)->v.u64 = _val_; } while(0) #define dictSetDoubleVal(entry, _val_) \ - do { entry->v.d = _val_; } while(0) + do { (entry)->v.d = _val_; } while(0) #define dictFreeKey(d, entry) \ if ((d)->type->keyDestructor) \ @@ -126,9 +126,9 @@ typedef void (dictScanFunction)(void *privdata, const dictEntry *de); #define dictSetKey(d, entry, _key_) do { \ if ((d)->type->keyDup) \ - entry->key = (d)->type->keyDup((d)->privdata, _key_); \ + (entry)->key = (d)->type->keyDup((d)->privdata, _key_); \ else \ - entry->key = (_key_); \ + (entry)->key = (_key_); \ } while(0) #define dictCompareKeys(d, key1, key2) \ @@ -150,7 +150,7 @@ typedef void (dictScanFunction)(void *privdata, const dictEntry *de); dict *dictCreate(dictType *type, void *privDataPtr); int dictExpand(dict *d, unsigned long size); int dictAdd(dict *d, void *key, void *val); -dictEntry *dictAddRaw(dict *d, void *key); +dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing); int dictReplace(dict *d, void *key, void *val); dictEntry *dictReplaceRaw(dict *d, void *key); int dictDelete(dict *d, const void *key); diff --git a/src/sentinel.c b/src/sentinel.c index 0168aa637..235611546 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -3640,15 +3640,15 @@ struct sentinelLeader { /* Helper function for sentinelGetLeader, increment the counter * relative to the specified runid. */ int sentinelLeaderIncr(dict *counters, char *runid) { - dictEntry *de = dictFind(counters,runid); + dictEntry *existing, *de; uint64_t oldval; - if (de) { - oldval = dictGetUnsignedIntegerVal(de); - dictSetUnsignedIntegerVal(de,oldval+1); + de = dictAddRaw(counters,runid,&existing); + if (existing) { + oldval = dictGetUnsignedIntegerVal(existing); + dictSetUnsignedIntegerVal(existing,oldval+1); return oldval+1; } else { - de = dictAddRaw(counters,runid); serverAssert(de != NULL); dictSetUnsignedIntegerVal(de,1); return 1; diff --git a/src/t_set.c b/src/t_set.c index ddd82b8b0..d5a801e11 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -53,7 +53,7 @@ int setTypeAdd(robj *subject, sds value) { long long llval; if (subject->encoding == OBJ_ENCODING_HT) { dict *ht = subject->ptr; - dictEntry *de = dictAddRaw(ht,value); + dictEntry *de = dictAddRaw(ht,value,NULL); if (de) { dictSetKey(ht,de,sdsdup(value)); dictSetVal(ht,de,NULL); diff --git a/src/t_zset.c b/src/t_zset.c index c61ba8089..81e6e57c5 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -2262,7 +2262,7 @@ void zunionInterGenericCommand(client *c, robj *dstkey, int op) { } else if (op == SET_OP_UNION) { dict *accumulator = dictCreate(&setAccumulatorDictType,NULL); dictIterator *di; - dictEntry *de; + dictEntry *de, *existing; double score; if (setnum) { @@ -2283,16 +2283,16 @@ void zunionInterGenericCommand(client *c, robj *dstkey, int op) { if (isnan(score)) score = 0; /* Search for this element in the accumulating dictionary. */ - de = dictFind(accumulator,zuiSdsFromValue(&zval)); + de = dictAddRaw(accumulator,zuiSdsFromValue(&zval),&existing); /* If we don't have it, we need to create a new entry. */ - if (de == NULL) { + if (!existing) { tmp = zuiNewSdsFromValue(&zval); /* Remember the longest single element encountered, * to understand if it's possible to convert to ziplist * at the end. */ if (sdslen(tmp) > maxelelen) maxelelen = sdslen(tmp); - /* Add the element with its initial score. */ - de = dictAddRaw(accumulator,tmp); + /* Update the element with its initial score. */ + dictSetKey(accumulator, de, tmp); dictSetDoubleVal(de,score); } else { /* Update the score with the score of the new instance @@ -2301,7 +2301,7 @@ void zunionInterGenericCommand(client *c, robj *dstkey, int op) { * Here we access directly the dictEntry double * value inside the union as it is a big speedup * compared to using the getDouble/setDouble API. */ - zunionInterAggregate(&de->v.d,score,aggregate); + zunionInterAggregate(&existing->v.d,score,aggregate); } } zuiClearIterator(&src[i]); From d02841879048a0ff8c0f6bec28b8252d23bbddce Mon Sep 17 00:00:00 2001 From: oranagra Date: Mon, 9 May 2016 18:01:09 +0300 Subject: [PATCH 0120/1722] Adding objectComputeSize() function. --- src/object.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/src/object.c b/src/object.c index a7c1e4c21..903143f4e 100644 --- a/src/object.c +++ b/src/object.c @@ -36,6 +36,8 @@ #define strtold(a,b) ((long double)strtod((a),(b))) #endif +/* ===================== Creation and parsing of objects ==================== */ + robj *createObject(int type, void *ptr) { robj *o = zmalloc(sizeof(*o)); o->type = type; @@ -690,6 +692,117 @@ char *strEncoding(int encoding) { } } +/* ========================== Objects introspection ========================= */ + +/* Returns the size in bytes consumed by the key's value in RAM */ +size_t objectComputeSize(robj *o) { + robj *ele; + list *l; + listNode *ln; + dict *d; + dictIterator *di; + listIter li; + struct dictEntry *de; + size_t asize = 0, elesize; + + if (o->type == OBJ_STRING) { + if(o->encoding == OBJ_ENCODING_INT) { + asize = sizeof(*o); + } + else if(o->encoding == OBJ_ENCODING_RAW) { + asize = sdsAllocSize(o->ptr)+sizeof(*o); + } else if(o->encoding == OBJ_ENCODING_EMBSTR) { + asize = sdslen(o->ptr)+2+sizeof(*o); + } else { + serverPanic("Unknown string encoding"); + } + } else if (o->type == OBJ_LIST) { + if (o->encoding == OBJ_ENCODING_QUICKLIST) { + quicklist *ql = o->ptr; + quicklistNode *node = ql->head; + asize = sizeof(*o)+sizeof(quicklist); + do { + asize += sizeof(quicklistNode)+ziplistBlobLen(node->zl); + } while ((node = node->next)); + } else if (o->encoding == OBJ_ENCODING_ZIPLIST) { + asize = sizeof(*o)+ziplistBlobLen(o->ptr); + } else if (o->encoding == OBJ_ENCODING_LINKEDLIST) { + l = o->ptr; + asize = sizeof(*o)+sizeof(list); + listRewind(l,&li); + while((ln = listNext(&li))) { + ele = ln->value; + elesize = (ele->encoding == OBJ_ENCODING_RAW) ? + (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); + asize += (sizeof(listNode)+elesize); + } + } else { + serverPanic("Unknown list encoding"); + } + } else if (o->type == OBJ_SET) { + if (o->encoding == OBJ_ENCODING_HT) { + d = o->ptr; + di = dictGetIterator(d); + asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d)); + while((de = dictNext(di)) != NULL) { + ele = dictGetKey(de); + elesize = (ele->encoding == OBJ_ENCODING_RAW) ? + (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); + asize += (sizeof(struct dictEntry)+elesize); + } + dictReleaseIterator(di); + } else if (o->encoding == OBJ_ENCODING_INTSET) { + intset *is = o->ptr; + asize = sizeof(*o)+sizeof(*is)+is->encoding*is->length; + } else { + serverPanic("Unknown set encoding"); + } + } else if (o->type == OBJ_ZSET) { + if (o->encoding == OBJ_ENCODING_ZIPLIST) { + asize = sizeof(*o)+(ziplistBlobLen(o->ptr)); + } else if (o->encoding == OBJ_ENCODING_SKIPLIST) { + d = ((zset*)o->ptr)->dict; + di = dictGetIterator(d); + asize = sizeof(*o)+sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d)); + while((de = dictNext(di)) != NULL) { + ele = dictGetKey(de); + elesize = (ele->encoding == OBJ_ENCODING_RAW) ? + (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); + asize += (sizeof(struct dictEntry)+elesize); + asize += sizeof(zskiplistNode)*dictSize(d); + } + dictReleaseIterator(di); + } else { + serverPanic("Unknown sorted set encoding"); + } + } else if (o->type == OBJ_HASH) { + if (o->encoding == OBJ_ENCODING_ZIPLIST) { + asize = sizeof(*o)+(ziplistBlobLen(o->ptr)); + } else if (o->encoding == OBJ_ENCODING_HT) { + d = o->ptr; + di = dictGetIterator(d); + asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d)); + while((de = dictNext(di)) != NULL) { + ele = dictGetKey(de); + elesize = (ele->encoding == OBJ_ENCODING_RAW) ? + (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); + ele = dictGetVal(de); + elesize = (ele->encoding == OBJ_ENCODING_RAW) ? + (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); + asize += (sizeof(struct dictEntry)+elesize); + } + dictReleaseIterator(di); + } else { + serverPanic("Unknown hash encoding"); + } + } else { + serverPanic("Unknown object type"); + } + return asize; +} + +/* ============================ The OBJECT command ========================== */ + /* This is a helper function for the OBJECT command. We need to lookup keys * without any modification of LRU or other parameters. */ robj *objectCommandLookup(client *c, robj *key) { From 33ed0a71cdb2fbb65e63e71ac54f4c0d71395baf Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Sep 2016 10:26:36 +0200 Subject: [PATCH 0121/1722] objectComputeSize(): estimate collections sampling N elements. For most tasks, we need the memory estimation to be O(1) by default. This commit also implements an initial MEMORY command. Note that objectComputeSize() takes the number of samples to check as argument, so MEMORY should be able to get the sample size as option to make precision VS CPU tradeoff tunable. Related to: PR #3223. --- src/object.c | 78 ++++++++++++++++++++++++++++++++-------------------- src/server.c | 1 + src/server.h | 3 +- 3 files changed, 51 insertions(+), 31 deletions(-) diff --git a/src/object.c b/src/object.c index 903143f4e..cb5d1818c 100644 --- a/src/object.c +++ b/src/object.c @@ -694,16 +694,17 @@ char *strEncoding(int encoding) { /* ========================== Objects introspection ========================= */ -/* Returns the size in bytes consumed by the key's value in RAM */ -size_t objectComputeSize(robj *o) { +/* Returns the size in bytes consumed by the key's value in RAM. + * Note that the returned value is just an approximation, especially in the + * case of aggregated data types where only "sample_size" elements + * are checked and averaged to estimate the total size. */ +#define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */ +size_t objectComputeSize(robj *o, size_t sample_size) { robj *ele; - list *l; - listNode *ln; dict *d; dictIterator *di; - listIter li; struct dictEntry *de; - size_t asize = 0, elesize; + size_t asize = 0, elesize = 0, samples = 0; if (o->type == OBJ_STRING) { if(o->encoding == OBJ_ENCODING_INT) { @@ -722,20 +723,12 @@ size_t objectComputeSize(robj *o) { quicklistNode *node = ql->head; asize = sizeof(*o)+sizeof(quicklist); do { - asize += sizeof(quicklistNode)+ziplistBlobLen(node->zl); - } while ((node = node->next)); + elesize += sizeof(quicklistNode)+ziplistBlobLen(node->zl); + samples++; + } while ((node = node->next) && samples < sample_size); + asize += (double)elesize/samples*listTypeLength(o); } else if (o->encoding == OBJ_ENCODING_ZIPLIST) { asize = sizeof(*o)+ziplistBlobLen(o->ptr); - } else if (o->encoding == OBJ_ENCODING_LINKEDLIST) { - l = o->ptr; - asize = sizeof(*o)+sizeof(list); - listRewind(l,&li); - while((ln = listNext(&li))) { - ele = ln->value; - elesize = (ele->encoding == OBJ_ENCODING_RAW) ? - (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); - asize += (sizeof(listNode)+elesize); - } } else { serverPanic("Unknown list encoding"); } @@ -744,13 +737,15 @@ size_t objectComputeSize(robj *o) { d = o->ptr; di = dictGetIterator(d); asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d)); - while((de = dictNext(di)) != NULL) { + while((de = dictNext(di)) != NULL && samples < sample_size) { ele = dictGetKey(de); - elesize = (ele->encoding == OBJ_ENCODING_RAW) ? + elesize += (ele->encoding == OBJ_ENCODING_RAW) ? (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); - asize += (sizeof(struct dictEntry)+elesize); + elesize += sizeof(struct dictEntry); + samples++; } dictReleaseIterator(di); + if (samples) asize += (double)elesize/samples*dictSize(d); } else if (o->encoding == OBJ_ENCODING_INTSET) { intset *is = o->ptr; asize = sizeof(*o)+sizeof(*is)+is->encoding*is->length; @@ -764,14 +759,16 @@ size_t objectComputeSize(robj *o) { d = ((zset*)o->ptr)->dict; di = dictGetIterator(d); asize = sizeof(*o)+sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d)); - while((de = dictNext(di)) != NULL) { + while((de = dictNext(di)) != NULL && samples < sample_size) { ele = dictGetKey(de); - elesize = (ele->encoding == OBJ_ENCODING_RAW) ? + elesize += (ele->encoding == OBJ_ENCODING_RAW) ? (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); - asize += (sizeof(struct dictEntry)+elesize); - asize += sizeof(zskiplistNode)*dictSize(d); + elesize += sizeof(struct dictEntry); + elesize += sizeof(zskiplistNode)*dictSize(d); + samples++; } dictReleaseIterator(di); + if (samples) asize += (double)elesize/samples*dictSize(d); } else { serverPanic("Unknown sorted set encoding"); } @@ -782,16 +779,19 @@ size_t objectComputeSize(robj *o) { d = o->ptr; di = dictGetIterator(d); asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d)); - while((de = dictNext(di)) != NULL) { + while((de = dictNext(di)) != NULL && samples < sample_size) { ele = dictGetKey(de); - elesize = (ele->encoding == OBJ_ENCODING_RAW) ? + elesize += (ele->encoding == OBJ_ENCODING_RAW) ? (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); ele = dictGetVal(de); - elesize = (ele->encoding == OBJ_ENCODING_RAW) ? + elesize += (ele->encoding == OBJ_ENCODING_RAW) ? (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); - asize += (sizeof(struct dictEntry)+elesize); + elesize += sizeof(struct dictEntry); + samples++; + printf("%zu samples: %zu usage\n", samples, elesize); } dictReleaseIterator(di); + if (samples) asize += (double)elesize/samples*dictSize(d); } else { serverPanic("Unknown hash encoding"); } @@ -801,7 +801,7 @@ size_t objectComputeSize(robj *o) { return asize; } -/* ============================ The OBJECT command ========================== */ +/* ======================= The OBJECT and MEMORY commands =================== */ /* This is a helper function for the OBJECT command. We need to lookup keys * without any modification of LRU or other parameters. */ @@ -853,3 +853,21 @@ void objectCommand(client *c) { } } +/* The memory command will eventually be a complete interface for the + * memory introspection capabilities of Redis. + * + * Usage: MEMORY usage */ +void memoryCommand(client *c) { + robj *o; + + if (!strcasecmp(c->argv[1]->ptr,"usage") && c->argc == 3) { + if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) + == NULL) return; + size_t usage = objectComputeSize(o,OBJ_COMPUTE_SIZE_DEF_SAMPLES); + usage += sdsAllocSize(c->argv[1]->ptr); + usage += sizeof(dictEntry); + addReplyLongLong(c,usage); + } else { + addReplyError(c,"Syntax error. Try MEMORY usage "); + } +} diff --git a/src/server.c b/src/server.c index e794ad132..d143bf0c8 100644 --- a/src/server.c +++ b/src/server.c @@ -274,6 +274,7 @@ struct redisCommand redisCommandTable[] = { {"readwrite",readwriteCommand,1,"F",0,NULL,0,0,0,0,0}, {"dump",dumpCommand,2,"r",0,NULL,1,1,1,0,0}, {"object",objectCommand,3,"r",0,NULL,2,2,2,0,0}, + {"memory",memoryCommand,3,"r",0,NULL,0,0,0,0,0}, {"client",clientCommand,-2,"as",0,NULL,0,0,0,0,0}, {"eval",evalCommand,-3,"s",0,evalGetKeys,0,0,0,0,0}, {"evalsha",evalShaCommand,-3,"s",0,evalGetKeys,0,0,0,0,0}, diff --git a/src/server.h b/src/server.h index a5f0ee1a6..281989107 100644 --- a/src/server.h +++ b/src/server.h @@ -532,7 +532,7 @@ typedef struct RedisModuleIO { #define OBJ_ENCODING_INT 1 /* Encoded as integer */ #define OBJ_ENCODING_HT 2 /* Encoded as hash table */ #define OBJ_ENCODING_ZIPMAP 3 /* Encoded as zipmap */ -#define OBJ_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */ +#define OBJ_ENCODING_LINKEDLIST 4 /* No longer used: old list encoding. */ #define OBJ_ENCODING_ZIPLIST 5 /* Encoded as ziplist */ #define OBJ_ENCODING_INTSET 6 /* Encoded as intset */ #define OBJ_ENCODING_SKIPLIST 7 /* Encoded as skiplist */ @@ -1792,6 +1792,7 @@ void readonlyCommand(client *c); void readwriteCommand(client *c); void dumpCommand(client *c); void objectCommand(client *c); +void memoryCommand(client *c); void clientCommand(client *c); void evalCommand(client *c); void evalShaCommand(client *c); From a114054182649d7f11572b34bce528760bcf433a Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Sep 2016 17:39:22 +0200 Subject: [PATCH 0122/1722] MEMORY OVERHEAD implemented (using Oran Agra initial implementation). This code was extracted from @oranagra PR #3223 and modified in order to provide only certain amounts of information compared to the original code. It was also moved from DEBUG to the newly introduced MEMORY command. Thanks to Oran for the implementation and the PR. It implements detailed memory usage stats that can be useful in both provisioning and troubleshooting memory usage in Redis. --- src/object.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/server.c | 3 +- src/server.h | 1 + 3 files changed, 104 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index cb5d1818c..3b627e90e 100644 --- a/src/object.c +++ b/src/object.c @@ -867,7 +867,107 @@ void memoryCommand(client *c) { usage += sdsAllocSize(c->argv[1]->ptr); usage += sizeof(dictEntry); addReplyLongLong(c,usage); + } else if (!strcasecmp(c->argv[1]->ptr,"overhead") && c->argc == 2) { + int j; + size_t mem_total = 0; + size_t mem = 0; + size_t zmalloc_used = zmalloc_used_memory(); + + int toplevel_keys = 8; + void *tlk = addDeferredMultiBulkLength(c); + + addReplyBulkCString(c,"total.allocated"); + addReplyLongLong(c,zmalloc_used); + + addReplyBulkCString(c,"startup.allocated"); + addReplyLongLong(c,server.initial_memory_usage); + mem_total += server.initial_memory_usage; + + mem = 0; + if (server.repl_backlog) + mem += zmalloc_size(server.repl_backlog); + addReplyBulkCString(c,"replication.backlog"); + addReplyLongLong(c,mem); + mem_total += mem; + + mem = 0; + if (listLength(server.slaves)) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + client *client = listNodeValue(ln); + mem += getClientOutputBufferMemoryUsage(client); + mem += sdsAllocSize(client->querybuf); + mem += sizeof(client); + } + } + addReplyBulkCString(c,"clients.slaves"); + addReplyLongLong(c,mem); + mem_total+=mem; + + mem = 0; + if (listLength(server.clients)) { + listIter li; + listNode *ln; + + listRewind(server.clients,&li); + while((ln = listNext(&li))) { + client *client = listNodeValue(ln); + if (client->flags & CLIENT_SLAVE) + continue; + mem += getClientOutputBufferMemoryUsage(client); + mem += sdsAllocSize(client->querybuf); + mem += sizeof(client); + } + } + addReplyBulkCString(c,"clients.normal"); + addReplyLongLong(c,mem); + mem_total+=mem; + + mem = 0; + if (server.aof_state != AOF_OFF) { + mem += sdslen(server.aof_buf); + mem += aofRewriteBufferSize(); + } + addReplyBulkCString(c,"aof.buffer"); + addReplyLongLong(c,mem); + mem_total+=mem; + + for (j = 0; j < server.dbnum; j++) { + redisDb *db = server.db+j; + long long keyscount = dictSize(db->dict); + if (keyscount==0) continue; + + char dbname[32]; + toplevel_keys++; + snprintf(dbname,sizeof(dbname),"db.%d",j); + addReplyBulkCString(c,dbname); + addReplyMultiBulkLen(c,4); + + mem = dictSize(db->dict) * sizeof(dictEntry) + + dictSlots(db->dict) * sizeof(dictEntry*) + + dictSize(db->dict) * sizeof(robj); + addReplyBulkCString(c,"overhead.hashtable.main"); + addReplyLongLong(c,mem); + mem_total+=mem; + + mem = dictSize(db->expires) * sizeof(dictEntry) + + dictSlots(db->expires) * sizeof(dictEntry*); + addReplyBulkCString(c,"overhead.hashtable.expires"); + addReplyLongLong(c,mem); + mem_total+=mem; + } + + addReplyBulkCString(c,"overhead.total"); + addReplyLongLong(c,mem_total); + + addReplyBulkCString(c,"dataset"); + addReplyLongLong(c,zmalloc_used - mem_total); + + setDeferredMultiBulkLength(c,tlk,toplevel_keys*2); } else { - addReplyError(c,"Syntax error. Try MEMORY usage "); + addReplyError(c,"Syntax error. Try MEMORY [usage ] | [overhead]"); } } diff --git a/src/server.c b/src/server.c index d143bf0c8..91c17f650 100644 --- a/src/server.c +++ b/src/server.c @@ -274,7 +274,7 @@ struct redisCommand redisCommandTable[] = { {"readwrite",readwriteCommand,1,"F",0,NULL,0,0,0,0,0}, {"dump",dumpCommand,2,"r",0,NULL,1,1,1,0,0}, {"object",objectCommand,3,"r",0,NULL,2,2,2,0,0}, - {"memory",memoryCommand,3,"r",0,NULL,0,0,0,0,0}, + {"memory",memoryCommand,-2,"r",0,NULL,0,0,0,0,0}, {"client",clientCommand,-2,"as",0,NULL,0,0,0,0,0}, {"eval",evalCommand,-3,"s",0,evalGetKeys,0,0,0,0,0}, {"evalsha",evalShaCommand,-3,"s",0,evalGetKeys,0,0,0,0,0}, @@ -1860,6 +1860,7 @@ void initServer(void) { slowlogInit(); latencyMonitorInit(); bioInit(); + server.initial_memory_usage = zmalloc_used_memory(); } /* Populates the Redis Command Table starting from the hard coded list diff --git a/src/server.h b/src/server.h index 281989107..0d104425c 100644 --- a/src/server.h +++ b/src/server.h @@ -801,6 +801,7 @@ struct redisServer { int cronloops; /* Number of times the cron function run */ char runid[CONFIG_RUN_ID_SIZE+1]; /* ID always different at every exec. */ int sentinel_mode; /* True if this instance is a Sentinel. */ + size_t initial_memory_usage; /* Bytes used after initialization. */ /* Modules */ dict *moduleapi; /* Exported APIs dictionary for modules. */ list *loadmodule_queue; /* List of modules to load at startup. */ From 40cf4d9a0aa90b4ee4ce1938b26172c65999b0b0 Mon Sep 17 00:00:00 2001 From: oranagra Date: Mon, 9 May 2016 18:01:09 +0300 Subject: [PATCH 0123/1722] dict.c: introduce dictUnlink(). Notes by @antirez: This patch was picked from a larger commit by Oran and adapted to change the API a bit. The basic idea is to avoid double lookups when there is to use the value of the deleted entry. BEFORE: entry = dictFind( ... ); /* 1st lookup. */ /* Do somethjing with the entry. */ dictDelete(...); /* 2nd lookup. */ AFTER: entry = dictUnlink( ... ); /* 1st lookup. */ /* Do somethjing with the entry. */ dictFreeUnlinkedEntry(entry); /* No lookups!. */ --- src/dict.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- src/dict.h | 3 ++- src/module.c | 3 ++- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/dict.c b/src/dict.c index 2d9ac35a2..322490283 100644 --- a/src/dict.c +++ b/src/dict.c @@ -407,14 +407,15 @@ dictEntry *dictReplaceRaw(dict *d, void *key) { return entry ? entry : existing; } -/* Search and remove an element */ -static int dictGenericDelete(dict *d, const void *key, int nofree) -{ +/* Search and remove an element. This is an helper function for + * dictDelete() and dictUnlink(), please check the top comment + * of those functions. */ +static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { unsigned int h, idx; dictEntry *he, *prevHe; int table; - if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */ + if (d->ht[0].used == 0) return NULL; if (dictIsRehashing(d)) _dictRehashStep(d); h = dictHashKey(d, key); @@ -432,27 +433,58 @@ static int dictGenericDelete(dict *d, const void *key, int nofree) if (!nofree) { dictFreeKey(d, he); dictFreeVal(d, he); + zfree(he); } - zfree(he); d->ht[table].used--; - return DICT_OK; + return he; } prevHe = he; he = he->next; } if (!dictIsRehashing(d)) break; } - return DICT_ERR; /* not found */ + return NULL; /* not found */ } +/* Remove an element, returning DICT_OK on success or DICT_ERR if the + * element was not found. */ int dictDelete(dict *ht, const void *key) { - return dictGenericDelete(ht,key,0); + return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR; } -int dictDeleteNoFree(dict *ht, const void *key) { +/* Remove an element from the table, but without actually releasing + * the key, value and dictionary entry. The dictionary entry is returned + * if the element was found (and unlinked from the table), and the user + * should later call `dictFreeUnlinkedEntry()` with it in order to release it. + * Otherwise if the key is not found, NULL is returned. + * + * This function is useful when we want to remove something from the hash + * table but want to use its value before actually deleting the entry. + * Without this function the pattern would require two lookups: + * + * entry = dictFind(...); + * // Do something with entry + * dictDelete(dictionary,entry); + * + * Thanks to this function it is possible to avoid this, and use + * instead: + * + * entry = dictUnlink(dictionary,entry); + * // Do something with entry + * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again. + */ +dictEntry *dictUnlink(dict *ht, const void *key) { return dictGenericDelete(ht,key,1); } +/* You need to call this function to really free the entry after a call + * to dictUnlink(). */ +void dictFreeUnlinkedEntry(dict *d, dictEntry *he) { + dictFreeKey(d, he); + dictFreeVal(d, he); + zfree(he); +} + /* Destroy an entire dictionary */ int _dictClear(dict *d, dictht *ht, void(callback)(void *)) { unsigned long i; diff --git a/src/dict.h b/src/dict.h index 739de68ef..406fa36d8 100644 --- a/src/dict.h +++ b/src/dict.h @@ -154,7 +154,8 @@ dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing); int dictReplace(dict *d, void *key, void *val); dictEntry *dictReplaceRaw(dict *d, void *key); int dictDelete(dict *d, const void *key); -int dictDeleteNoFree(dict *d, const void *key); +dictEntry *dictUnlink(dict *ht, const void *key); +void dictFreeUnlinkedEntry(dict *d, dictEntry *he); void dictRelease(dict *d); dictEntry * dictFind(dict *d, const void *key); void *dictFetchValue(dict *d, const void *key); diff --git a/src/module.c b/src/module.c index feab67dfb..3c757a5cb 100644 --- a/src/module.c +++ b/src/module.c @@ -3085,7 +3085,8 @@ int moduleUnload(sds name) { /* Remove from list of modules. */ serverLog(LL_NOTICE,"Module %s unloaded",module->name); - dictDeleteNoFree(modules,module->name); + dictDelete(modules,module->name); + module->name = NULL; /* The name was already freed by dictDelete(). */ moduleFreeModuleStructure(module); return REDISMODULE_OK; From e027e9e85deb07231390c88bc02e54c17f41dcd4 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Sep 2016 15:10:53 +0200 Subject: [PATCH 0124/1722] Apply the new dictUnlink() where possible. Optimizations suggested and originally implemented by @oranagra. Re-applied by @antirez using the modified API. --- src/dict.c | 3 ++- src/lazyfree.c | 5 +++-- src/t_zset.c | 9 ++++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/dict.c b/src/dict.c index 322490283..5e327a794 100644 --- a/src/dict.c +++ b/src/dict.c @@ -478,8 +478,9 @@ dictEntry *dictUnlink(dict *ht, const void *key) { } /* You need to call this function to really free the entry after a call - * to dictUnlink(). */ + * to dictUnlink(). It's safe to call this function with 'he' = NULL. */ void dictFreeUnlinkedEntry(dict *d, dictEntry *he) { + if (he == NULL) return; dictFreeKey(d, he); dictFreeVal(d, he); zfree(he); diff --git a/src/lazyfree.c b/src/lazyfree.c index dba3f00e2..c05252159 100644 --- a/src/lazyfree.c +++ b/src/lazyfree.c @@ -57,7 +57,7 @@ int dbAsyncDelete(redisDb *db, robj *key) { /* If the value is composed of a few allocations, to free in a lazy way * is actually just slower... So under a certain limit we just free * the object synchronously. */ - dictEntry *de = dictFind(db->dict,key->ptr); + dictEntry *de = dictUnlink(db->dict,key->ptr); if (de) { robj *val = dictGetVal(de); size_t free_effort = lazyfreeGetFreeEffort(val); @@ -73,7 +73,8 @@ int dbAsyncDelete(redisDb *db, robj *key) { /* Release the key-val pair, or just the key if we set the val * field to NULL in order to lazy free it later. */ - if (dictDelete(db->dict,key->ptr) == DICT_OK) { + if (de) { + dictFreeUnlinkedEntry(db->dict,de); if (server.cluster_enabled) slotToKeyDel(key); return 1; } else { diff --git a/src/t_zset.c b/src/t_zset.c index 81e6e57c5..8d905be02 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -1387,7 +1387,7 @@ int zsetDel(robj *zobj, sds ele) { dictEntry *de; double score; - de = dictFind(zs->dict,ele); + de = dictUnlink(zs->dict,ele); if (de != NULL) { /* Get the score in order to delete from the skiplist later. */ score = *(double*)dictGetVal(de); @@ -1397,12 +1397,11 @@ int zsetDel(robj *zobj, sds ele) { * actually releases the SDS string representing the element, * which is shared between the skiplist and the hash table, so * we need to delete from the skiplist as the final step. */ - int retval1 = dictDelete(zs->dict,ele); + dictFreeUnlinkedEntry(zs->dict,de); /* Delete from skiplist. */ - int retval2 = zslDelete(zs->zsl,score,ele,NULL); - - serverAssert(retval1 == DICT_OK && retval2); + int retval = zslDelete(zs->zsl,score,ele,NULL); + serverAssert(retval); if (htNeedsResize(zs->dict)) dictResize(zs->dict); return 1; From 28942ddac70f10f7ae78b253a374a031af837a31 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Sep 2016 16:41:05 +0200 Subject: [PATCH 0125/1722] Trim comment to 80 cols. --- src/sentinel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentinel.c b/src/sentinel.c index 235611546..1f47dd337 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -3674,7 +3674,7 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) { serverAssert(master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS)); counters = dictCreate(&leaderVotesDictType,NULL); - voters = dictSize(master->sentinels)+1; /* All the other sentinels and me. */ + voters = dictSize(master->sentinels)+1; /* All the other sentinels and me.*/ /* Count other sentinels votes */ di = dictGetIterator(master->sentinels); From 42acf62e5fa60a463252108fdce55ae9d92c01ac Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Sep 2016 16:43:38 +0200 Subject: [PATCH 0126/1722] dict.c: dictReplaceRaw() -> dictAddOrFind(). What they say about "naming things" in programming? --- src/db.c | 2 +- src/dict.c | 4 ++-- src/dict.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/db.c b/src/db.c index d33c810b3..c7940d735 100644 --- a/src/db.c +++ b/src/db.c @@ -943,7 +943,7 @@ void setExpire(redisDb *db, robj *key, long long when) { /* Reuse the sds from the main dict in the expire dict */ kde = dictFind(db->dict,key->ptr); serverAssertWithInfo(NULL,key,kde != NULL); - de = dictReplaceRaw(db->expires,dictGetKey(kde)); + de = dictAddOrFind(db->expires,dictGetKey(kde)); dictSetSignedIntegerVal(de,when); } diff --git a/src/dict.c b/src/dict.c index 5e327a794..04dfae6cc 100644 --- a/src/dict.c +++ b/src/dict.c @@ -395,13 +395,13 @@ int dictReplace(dict *d, void *key, void *val) } /* Add or Find: - * dictReplaceRaw() is simply a version of dictAddRaw() that always + * dictAddOrFind() is simply a version of dictAddRaw() that always * returns the hash entry of the specified key, even if the key already * exists and can't be added (in that case the entry of the already * existing key is returned.) * * See dictAddRaw() for more information. */ -dictEntry *dictReplaceRaw(dict *d, void *key) { +dictEntry *dictAddOrFind(dict *d, void *key) { dictEntry *entry, *existing; entry = dictAddRaw(d,key,&existing); return entry ? entry : existing; diff --git a/src/dict.h b/src/dict.h index 406fa36d8..04b247a25 100644 --- a/src/dict.h +++ b/src/dict.h @@ -151,8 +151,8 @@ dict *dictCreate(dictType *type, void *privDataPtr); int dictExpand(dict *d, unsigned long size); int dictAdd(dict *d, void *key, void *val); dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing); +dictEntry *dictAddOrFind(dict *d, void *key); int dictReplace(dict *d, void *key, void *val); -dictEntry *dictReplaceRaw(dict *d, void *key); int dictDelete(dict *d, const void *key); dictEntry *dictUnlink(dict *ht, const void *key); void dictFreeUnlinkedEntry(dict *d, dictEntry *he); From 8a81227859a3062ea50f9f97d03c0bc24fdbc430 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 15 Sep 2016 09:37:55 +0200 Subject: [PATCH 0127/1722] MEMORY OVERHEAD refactored into a generic API. --- src/object.c | 201 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 129 insertions(+), 72 deletions(-) diff --git a/src/object.c b/src/object.c index 3b627e90e..8cf86235f 100644 --- a/src/object.c +++ b/src/object.c @@ -853,6 +853,120 @@ void objectCommand(client *c) { } } +/* This structure is returned by the getMemoryOverheadData() function in + * order to return memory overhead information. */ +struct memoh { + size_t total_allocated; + size_t startup_allocated; + size_t repl_backlog; + size_t clients_slaves; + size_t clients_normal; + size_t aof_buffer; + size_t overhead_total; + size_t dataset; + size_t num_dbs; + struct { + size_t dbid; + size_t overhead_ht_main; + size_t overhead_ht_expires; + } *db; +}; + +/* Release data obtained with getMemoryOverheadData(). */ +void freeMemoryOverheadData(struct memoh *mh) { + zfree(mh->db); + zfree(mh); +} + +/* Return a struct memoh filled with memory overhead information used + * for the MEMORY OVERHEAD and INFO command. The returned structure + * pointer should be freed calling freeMemoryOverheadData(). */ +struct memoh *getMemoryOverheadData(void) { + int j; + size_t mem_total = 0; + size_t mem = 0; + size_t zmalloc_used = zmalloc_used_memory(); + struct memoh *mh = zcalloc(sizeof(*mh)); + + mh->total_allocated = zmalloc_used; + mh->startup_allocated = server.initial_memory_usage; + mem_total += server.initial_memory_usage; + + mem = 0; + if (server.repl_backlog) + mem += zmalloc_size(server.repl_backlog); + mh->repl_backlog = mem; + mem_total += mem; + + mem = 0; + if (listLength(server.slaves)) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + client *client = listNodeValue(ln); + mem += getClientOutputBufferMemoryUsage(client); + mem += sdsAllocSize(client->querybuf); + mem += sizeof(client); + } + } + mh->clients_slaves = mem; + mem_total+=mem; + + mem = 0; + if (listLength(server.clients)) { + listIter li; + listNode *ln; + + listRewind(server.clients,&li); + while((ln = listNext(&li))) { + client *client = listNodeValue(ln); + if (client->flags & CLIENT_SLAVE) + continue; + mem += getClientOutputBufferMemoryUsage(client); + mem += sdsAllocSize(client->querybuf); + mem += sizeof(client); + } + } + mh->clients_normal = mem; + mem_total+=mem; + + mem = 0; + if (server.aof_state != AOF_OFF) { + mem += sdslen(server.aof_buf); + mem += aofRewriteBufferSize(); + } + mh->aof_buffer = mem; + mem_total+=mem; + + for (j = 0; j < server.dbnum; j++) { + redisDb *db = server.db+j; + long long keyscount = dictSize(db->dict); + if (keyscount==0) continue; + + mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1)); + mh->db[mh->num_dbs].dbid = j; + + mem = dictSize(db->dict) * sizeof(dictEntry) + + dictSlots(db->dict) * sizeof(dictEntry*) + + dictSize(db->dict) * sizeof(robj); + mh->db[mh->num_dbs].overhead_ht_main = mem; + mem_total+=mem; + + mem = dictSize(db->expires) * sizeof(dictEntry) + + dictSlots(db->expires) * sizeof(dictEntry*); + mh->db[mh->num_dbs].overhead_ht_expires = mem; + mem_total+=mem; + + mh->num_dbs++; + } + + mh->overhead_total = mem_total; + mh->dataset = zmalloc_used - mem_total; + return mh; +} + /* The memory command will eventually be a complete interface for the * memory introspection capabilities of Redis. * @@ -868,105 +982,48 @@ void memoryCommand(client *c) { usage += sizeof(dictEntry); addReplyLongLong(c,usage); } else if (!strcasecmp(c->argv[1]->ptr,"overhead") && c->argc == 2) { - int j; - size_t mem_total = 0; - size_t mem = 0; - size_t zmalloc_used = zmalloc_used_memory(); + struct memoh *mh = getMemoryOverheadData(); - int toplevel_keys = 8; - void *tlk = addDeferredMultiBulkLength(c); + addReplyMultiBulkLen(c,(8+mh->num_dbs)*2); addReplyBulkCString(c,"total.allocated"); - addReplyLongLong(c,zmalloc_used); + addReplyLongLong(c,mh->total_allocated); addReplyBulkCString(c,"startup.allocated"); - addReplyLongLong(c,server.initial_memory_usage); - mem_total += server.initial_memory_usage; + addReplyLongLong(c,mh->startup_allocated); - mem = 0; - if (server.repl_backlog) - mem += zmalloc_size(server.repl_backlog); addReplyBulkCString(c,"replication.backlog"); - addReplyLongLong(c,mem); - mem_total += mem; + addReplyLongLong(c,mh->repl_backlog); - mem = 0; - if (listLength(server.slaves)) { - listIter li; - listNode *ln; - - listRewind(server.slaves,&li); - while((ln = listNext(&li))) { - client *client = listNodeValue(ln); - mem += getClientOutputBufferMemoryUsage(client); - mem += sdsAllocSize(client->querybuf); - mem += sizeof(client); - } - } addReplyBulkCString(c,"clients.slaves"); - addReplyLongLong(c,mem); - mem_total+=mem; + addReplyLongLong(c,mh->clients_slaves); - mem = 0; - if (listLength(server.clients)) { - listIter li; - listNode *ln; - - listRewind(server.clients,&li); - while((ln = listNext(&li))) { - client *client = listNodeValue(ln); - if (client->flags & CLIENT_SLAVE) - continue; - mem += getClientOutputBufferMemoryUsage(client); - mem += sdsAllocSize(client->querybuf); - mem += sizeof(client); - } - } addReplyBulkCString(c,"clients.normal"); - addReplyLongLong(c,mem); - mem_total+=mem; + addReplyLongLong(c,mh->clients_normal); - mem = 0; - if (server.aof_state != AOF_OFF) { - mem += sdslen(server.aof_buf); - mem += aofRewriteBufferSize(); - } addReplyBulkCString(c,"aof.buffer"); - addReplyLongLong(c,mem); - mem_total+=mem; - - for (j = 0; j < server.dbnum; j++) { - redisDb *db = server.db+j; - long long keyscount = dictSize(db->dict); - if (keyscount==0) continue; + addReplyLongLong(c,mh->aof_buffer); + for (size_t j = 0; j < mh->num_dbs; j++) { char dbname[32]; - toplevel_keys++; - snprintf(dbname,sizeof(dbname),"db.%d",j); + snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid); addReplyBulkCString(c,dbname); addReplyMultiBulkLen(c,4); - mem = dictSize(db->dict) * sizeof(dictEntry) + - dictSlots(db->dict) * sizeof(dictEntry*) + - dictSize(db->dict) * sizeof(robj); addReplyBulkCString(c,"overhead.hashtable.main"); - addReplyLongLong(c,mem); - mem_total+=mem; + addReplyLongLong(c,mh->db[j].overhead_ht_main); - mem = dictSize(db->expires) * sizeof(dictEntry) + - dictSlots(db->expires) * sizeof(dictEntry*); addReplyBulkCString(c,"overhead.hashtable.expires"); - addReplyLongLong(c,mem); - mem_total+=mem; + addReplyLongLong(c,mh->db[j].overhead_ht_expires); } addReplyBulkCString(c,"overhead.total"); - addReplyLongLong(c,mem_total); + addReplyLongLong(c,mh->overhead_total); addReplyBulkCString(c,"dataset"); - addReplyLongLong(c,zmalloc_used - mem_total); + addReplyLongLong(c,mh->dataset); - setDeferredMultiBulkLength(c,tlk,toplevel_keys*2); + freeMemoryOverheadData(mh); } else { addReplyError(c,"Syntax error. Try MEMORY [usage ] | [overhead]"); } From 2eeedfe97f0c5b88851b08f9c6b0f9bb9bf9fceb Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 15 Sep 2016 09:42:51 +0200 Subject: [PATCH 0128/1722] C struct memoh renamed redisMemOverhead. API prototypes added. --- src/object.c | 33 +++++++-------------------------- src/server.h | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/object.c b/src/object.c index 8cf86235f..ddc49dfd0 100644 --- a/src/object.c +++ b/src/object.c @@ -853,40 +853,21 @@ void objectCommand(client *c) { } } -/* This structure is returned by the getMemoryOverheadData() function in - * order to return memory overhead information. */ -struct memoh { - size_t total_allocated; - size_t startup_allocated; - size_t repl_backlog; - size_t clients_slaves; - size_t clients_normal; - size_t aof_buffer; - size_t overhead_total; - size_t dataset; - size_t num_dbs; - struct { - size_t dbid; - size_t overhead_ht_main; - size_t overhead_ht_expires; - } *db; -}; - /* Release data obtained with getMemoryOverheadData(). */ -void freeMemoryOverheadData(struct memoh *mh) { +void freeMemoryOverheadData(struct redisMemOverhead *mh) { zfree(mh->db); zfree(mh); } -/* Return a struct memoh filled with memory overhead information used - * for the MEMORY OVERHEAD and INFO command. The returned structure - * pointer should be freed calling freeMemoryOverheadData(). */ -struct memoh *getMemoryOverheadData(void) { +/* Return a struct redisMemOverhead filled with memory overhead + * information used for the MEMORY OVERHEAD and INFO command. The returned + * structure pointer should be freed calling freeMemoryOverheadData(). */ +struct redisMemOverhead *getMemoryOverheadData(void) { int j; size_t mem_total = 0; size_t mem = 0; size_t zmalloc_used = zmalloc_used_memory(); - struct memoh *mh = zcalloc(sizeof(*mh)); + struct redisMemOverhead *mh = zcalloc(sizeof(*mh)); mh->total_allocated = zmalloc_used; mh->startup_allocated = server.initial_memory_usage; @@ -982,7 +963,7 @@ void memoryCommand(client *c) { usage += sizeof(dictEntry); addReplyLongLong(c,usage); } else if (!strcasecmp(c->argv[1]->ptr,"overhead") && c->argc == 2) { - struct memoh *mh = getMemoryOverheadData(); + struct redisMemOverhead *mh = getMemoryOverheadData(); addReplyMultiBulkLen(c,(8+mh->num_dbs)*2); diff --git a/src/server.h b/src/server.h index 0d104425c..fb094507b 100644 --- a/src/server.h +++ b/src/server.h @@ -769,6 +769,25 @@ typedef struct redisOpArray { int numops; } redisOpArray; +/* This structure is returned by the getMemoryOverheadData() function in + * order to return memory overhead information. */ +struct redisMemOverhead { + size_t total_allocated; + size_t startup_allocated; + size_t repl_backlog; + size_t clients_slaves; + size_t clients_normal; + size_t aof_buffer; + size_t overhead_total; + size_t dataset; + size_t num_dbs; + struct { + size_t dbid; + size_t overhead_ht_main; + size_t overhead_ht_expires; + } *db; +}; + /*----------------------------------------------------------------------------- * Global server state *----------------------------------------------------------------------------*/ @@ -1481,6 +1500,8 @@ void updateCachedTime(void); void resetServerStats(void); unsigned int getLRUClock(void); const char *evictPolicyToString(void); +struct redisMemOverhead *getMemoryOverheadData(void); +void freeMemoryOverheadData(struct redisMemOverhead *mh); #define RESTART_SERVER_NONE 0 #define RESTART_SERVER_GRACEFULLY (1<<0) /* Do proper shutdown. */ From a427affe3051ea45ab4b6c6468bb77195a9861db Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 15 Sep 2016 10:33:23 +0200 Subject: [PATCH 0129/1722] INFO: new memory reporting fields added. --- src/server.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index 91c17f650..fbc2b0a4d 100644 --- a/src/server.c +++ b/src/server.c @@ -2812,6 +2812,7 @@ sds genRedisInfoString(char *section) { size_t total_system_mem = server.system_memory_size; const char *evict_policy = evictPolicyToString(); long long memory_lua = (long long)lua_gc(server.lua,LUA_GCCOUNT,0)*1024; + struct redisMemOverhead *mh = getMemoryOverheadData(); /* Peak memory is updated from time to time by serverCron() so it * may happen that the instantaneous value is slightly bigger than @@ -2827,6 +2828,10 @@ sds genRedisInfoString(char *section) { bytesToHuman(used_memory_rss_hmem,server.resident_set_size); bytesToHuman(maxmemory_hmem,server.maxmemory); + size_t net_usage = 1; + if (zmalloc_used > mh->startup_allocated) + net_usage = zmalloc_used - mh->startup_allocated; + if (sections++) info = sdscat(info,"\r\n"); info = sdscatprintf(info, "# Memory\r\n" @@ -2836,6 +2841,10 @@ sds genRedisInfoString(char *section) { "used_memory_rss_human:%s\r\n" "used_memory_peak:%zu\r\n" "used_memory_peak_human:%s\r\n" + "used_memory_overhead:%zu\r\n" + "used_memory_startup:%zu\r\n" + "used_memory_dataset:%zu\r\n" + "used_memory_dataset_perc:%.2f%%\r\n" "total_system_memory:%lu\r\n" "total_system_memory_human:%s\r\n" "used_memory_lua:%lld\r\n" @@ -2852,6 +2861,10 @@ sds genRedisInfoString(char *section) { used_memory_rss_hmem, server.stat_peak_memory, peak_hmem, + mh->overhead_total, + mh->startup_allocated, + mh->dataset, + (float)mh->dataset*100/net_usage, (unsigned long)total_system_mem, total_system_hmem, memory_lua, @@ -2862,7 +2875,8 @@ sds genRedisInfoString(char *section) { zmalloc_get_fragmentation_ratio(server.resident_set_size), ZMALLOC_LIB, lazyfreeGetPendingObjectsCount() - ); + ); + freeMemoryOverheadData(mh); } /* Persistence */ From 033a49e23dd1f2a53f7b88c40f2ce4db437cd2c5 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 15 Sep 2016 15:25:05 +0200 Subject: [PATCH 0130/1722] MEMORY USAGE: SAMPLES option added + fixes to size computation. The new SAMPLES option is added, defaulting to 5, and with 0 being a special value to scan the whole set of elements. Fixes to the object size computation were made since the original PR assumed data structures still contaning robj structures, while now after the lazyfree changes, are all SDS strings. --- src/object.c | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/src/object.c b/src/object.c index ddc49dfd0..e47d663e0 100644 --- a/src/object.c +++ b/src/object.c @@ -700,7 +700,7 @@ char *strEncoding(int encoding) { * are checked and averaged to estimate the total size. */ #define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */ size_t objectComputeSize(robj *o, size_t sample_size) { - robj *ele; + sds ele, ele2; dict *d; dictIterator *di; struct dictEntry *de; @@ -709,8 +709,7 @@ size_t objectComputeSize(robj *o, size_t sample_size) { if (o->type == OBJ_STRING) { if(o->encoding == OBJ_ENCODING_INT) { asize = sizeof(*o); - } - else if(o->encoding == OBJ_ENCODING_RAW) { + } else if(o->encoding == OBJ_ENCODING_RAW) { asize = sdsAllocSize(o->ptr)+sizeof(*o); } else if(o->encoding == OBJ_ENCODING_EMBSTR) { asize = sdslen(o->ptr)+2+sizeof(*o); @@ -739,9 +738,7 @@ size_t objectComputeSize(robj *o, size_t sample_size) { asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d)); while((de = dictNext(di)) != NULL && samples < sample_size) { ele = dictGetKey(de); - elesize += (ele->encoding == OBJ_ENCODING_RAW) ? - (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); - elesize += sizeof(struct dictEntry); + elesize += sizeof(struct dictEntry) + sdsAllocSize(ele); samples++; } dictReleaseIterator(di); @@ -761,10 +758,8 @@ size_t objectComputeSize(robj *o, size_t sample_size) { asize = sizeof(*o)+sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d)); while((de = dictNext(di)) != NULL && samples < sample_size) { ele = dictGetKey(de); - elesize += (ele->encoding == OBJ_ENCODING_RAW) ? - (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); - elesize += sizeof(struct dictEntry); - elesize += sizeof(zskiplistNode)*dictSize(d); + elesize += sdsAllocSize(ele); + elesize += sizeof(struct dictEntry) + sizeof(zskiplistNode); samples++; } dictReleaseIterator(di); @@ -781,14 +776,10 @@ size_t objectComputeSize(robj *o, size_t sample_size) { asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d)); while((de = dictNext(di)) != NULL && samples < sample_size) { ele = dictGetKey(de); - elesize += (ele->encoding == OBJ_ENCODING_RAW) ? - (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); - ele = dictGetVal(de); - elesize += (ele->encoding == OBJ_ENCODING_RAW) ? - (sizeof(*o)+sdsAllocSize(ele->ptr)) : sizeof(*o); + ele2 = dictGetVal(de); + elesize += sdsAllocSize(ele) + sdsAllocSize(ele2); elesize += sizeof(struct dictEntry); samples++; - printf("%zu samples: %zu usage\n", samples, elesize); } dictReleaseIterator(di); if (samples) asize += (double)elesize/samples*dictSize(d); @@ -955,10 +946,28 @@ struct redisMemOverhead *getMemoryOverheadData(void) { void memoryCommand(client *c) { robj *o; - if (!strcasecmp(c->argv[1]->ptr,"usage") && c->argc == 3) { + if (!strcasecmp(c->argv[1]->ptr,"usage") && c->argc >= 3) { + long long samples = OBJ_COMPUTE_SIZE_DEF_SAMPLES; + for (int j = 3; j < c->argc; j++) { + if (!strcasecmp(c->argv[j]->ptr,"samples") && + j+1 < c->argc) + { + if (getLongLongFromObjectOrReply(c,c->argv[j+1],&samples,NULL) + == C_ERR) return; + if (samples < 0) { + addReply(c,shared.syntaxerr); + return; + } + if (samples == 0) samples = LLONG_MAX;; + j++; /* skip option argument. */ + } else { + addReply(c,shared.syntaxerr); + return; + } + } if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) == NULL) return; - size_t usage = objectComputeSize(o,OBJ_COMPUTE_SIZE_DEF_SAMPLES); + size_t usage = objectComputeSize(o,samples); usage += sdsAllocSize(c->argv[1]->ptr); usage += sizeof(dictEntry); addReplyLongLong(c,usage); From a08f8acfc49ae2aee893bbbcebd7826f15b098df Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 15 Sep 2016 17:33:11 +0200 Subject: [PATCH 0131/1722] MEMORY command: HELP + dataset percentage (like in INFO). --- src/object.c | 21 ++++++++++++++++++--- src/server.c | 6 +----- src/server.h | 1 + 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/object.c b/src/object.c index e47d663e0..02d023248 100644 --- a/src/object.c +++ b/src/object.c @@ -936,6 +936,12 @@ struct redisMemOverhead *getMemoryOverheadData(void) { mh->overhead_total = mem_total; mh->dataset = zmalloc_used - mem_total; + + size_t net_usage = 1; + if (zmalloc_used > mh->startup_allocated) + net_usage = zmalloc_used - mh->startup_allocated; + mh->dataset_perc = (float)mh->dataset*100/net_usage; + return mh; } @@ -974,7 +980,7 @@ void memoryCommand(client *c) { } else if (!strcasecmp(c->argv[1]->ptr,"overhead") && c->argc == 2) { struct redisMemOverhead *mh = getMemoryOverheadData(); - addReplyMultiBulkLen(c,(8+mh->num_dbs)*2); + addReplyMultiBulkLen(c,(9+mh->num_dbs)*2); addReplyBulkCString(c,"total.allocated"); addReplyLongLong(c,mh->total_allocated); @@ -1010,11 +1016,20 @@ void memoryCommand(client *c) { addReplyBulkCString(c,"overhead.total"); addReplyLongLong(c,mh->overhead_total); - addReplyBulkCString(c,"dataset"); + addReplyBulkCString(c,"dataset.bytes"); addReplyLongLong(c,mh->dataset); + addReplyBulkCString(c,"dataset.percentage"); + addReplyDouble(c,mh->dataset_perc); + freeMemoryOverheadData(mh); + } else if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) { + addReplyMultiBulkLen(c,2); + addReplyBulkCString(c, + "MEMORY USAGE [SAMPLES ] - Estimate memory usage of key"); + addReplyBulkCString(c, + "MEMORY OVERHEAD - Show memory usage details"); } else { - addReplyError(c,"Syntax error. Try MEMORY [usage ] | [overhead]"); + addReplyError(c,"Syntax error. Try MEMORY HELP"); } } diff --git a/src/server.c b/src/server.c index fbc2b0a4d..556ea965b 100644 --- a/src/server.c +++ b/src/server.c @@ -2828,10 +2828,6 @@ sds genRedisInfoString(char *section) { bytesToHuman(used_memory_rss_hmem,server.resident_set_size); bytesToHuman(maxmemory_hmem,server.maxmemory); - size_t net_usage = 1; - if (zmalloc_used > mh->startup_allocated) - net_usage = zmalloc_used - mh->startup_allocated; - if (sections++) info = sdscat(info,"\r\n"); info = sdscatprintf(info, "# Memory\r\n" @@ -2864,7 +2860,7 @@ sds genRedisInfoString(char *section) { mh->overhead_total, mh->startup_allocated, mh->dataset, - (float)mh->dataset*100/net_usage, + mh->dataset_perc, (unsigned long)total_system_mem, total_system_hmem, memory_lua, diff --git a/src/server.h b/src/server.h index fb094507b..3f2028f83 100644 --- a/src/server.h +++ b/src/server.h @@ -780,6 +780,7 @@ struct redisMemOverhead { size_t aof_buffer; size_t overhead_total; size_t dataset; + float dataset_perc; size_t num_dbs; struct { size_t dbid; From 65281a94a51852a432261b09149b1c074d85bd78 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 15 Sep 2016 17:43:13 +0200 Subject: [PATCH 0132/1722] objectComputeSize(): skiplist nodes have different sizes. The size of the node depends on the node level, however it is not stored into the node itself, is an implicit information, so we use zmalloc_size() in order to compute the sorted set size. --- src/object.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/object.c b/src/object.c index 02d023248..cfeba5a43 100644 --- a/src/object.c +++ b/src/object.c @@ -754,15 +754,15 @@ size_t objectComputeSize(robj *o, size_t sample_size) { asize = sizeof(*o)+(ziplistBlobLen(o->ptr)); } else if (o->encoding == OBJ_ENCODING_SKIPLIST) { d = ((zset*)o->ptr)->dict; - di = dictGetIterator(d); + zskiplist *zsl = ((zset*)o->ptr)->zsl; + zskiplistNode *znode = zsl->header->level[0].forward; asize = sizeof(*o)+sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d)); - while((de = dictNext(di)) != NULL && samples < sample_size) { - ele = dictGetKey(de); - elesize += sdsAllocSize(ele); - elesize += sizeof(struct dictEntry) + sizeof(zskiplistNode); + while(znode != NULL && samples < sample_size) { + elesize += sdsAllocSize(znode->ele); + elesize += sizeof(struct dictEntry) + zmalloc_size(znode); samples++; + znode = znode->level[0].forward; } - dictReleaseIterator(di); if (samples) asize += (double)elesize/samples*dictSize(d); } else { serverPanic("Unknown sorted set encoding"); From d28b77b15fd78c825c1693265c9265ab712556d3 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Sep 2016 10:11:52 +0200 Subject: [PATCH 0133/1722] Group MEMORY command related APIs together in the source code. --- src/object.c | 106 +++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/src/object.c b/src/object.c index cfeba5a43..401e88126 100644 --- a/src/object.c +++ b/src/object.c @@ -692,7 +692,7 @@ char *strEncoding(int encoding) { } } -/* ========================== Objects introspection ========================= */ +/* =========================== Memory introspection ========================== */ /* Returns the size in bytes consumed by the key's value in RAM. * Note that the returned value is just an approximation, especially in the @@ -792,58 +792,6 @@ size_t objectComputeSize(robj *o, size_t sample_size) { return asize; } -/* ======================= The OBJECT and MEMORY commands =================== */ - -/* This is a helper function for the OBJECT command. We need to lookup keys - * without any modification of LRU or other parameters. */ -robj *objectCommandLookup(client *c, robj *key) { - dictEntry *de; - - if ((de = dictFind(c->db->dict,key->ptr)) == NULL) return NULL; - return (robj*) dictGetVal(de); -} - -robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) { - robj *o = objectCommandLookup(c,key); - - if (!o) addReply(c, reply); - return o; -} - -/* Object command allows to inspect the internals of an Redis Object. - * Usage: OBJECT */ -void objectCommand(client *c) { - robj *o; - - if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) { - if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) - == NULL) return; - addReplyLongLong(c,o->refcount); - } else if (!strcasecmp(c->argv[1]->ptr,"encoding") && c->argc == 3) { - if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) - == NULL) return; - addReplyBulkCString(c,strEncoding(o->encoding)); - } else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) { - if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) - == NULL) return; - if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { - addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust."); - return; - } - addReplyLongLong(c,estimateObjectIdleTime(o)/1000); - } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) { - if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) - == NULL) return; - if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) { - addReplyError(c,"An LRU maxmemory policy is selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust."); - return; - } - addReplyLongLong(c,o->lru&255); - } else { - addReplyError(c,"Syntax error. Try OBJECT (refcount|encoding|idletime|freq)"); - } -} - /* Release data obtained with getMemoryOverheadData(). */ void freeMemoryOverheadData(struct redisMemOverhead *mh) { zfree(mh->db); @@ -945,6 +893,58 @@ struct redisMemOverhead *getMemoryOverheadData(void) { return mh; } +/* ======================= The OBJECT and MEMORY commands =================== */ + +/* This is a helper function for the OBJECT command. We need to lookup keys + * without any modification of LRU or other parameters. */ +robj *objectCommandLookup(client *c, robj *key) { + dictEntry *de; + + if ((de = dictFind(c->db->dict,key->ptr)) == NULL) return NULL; + return (robj*) dictGetVal(de); +} + +robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) { + robj *o = objectCommandLookup(c,key); + + if (!o) addReply(c, reply); + return o; +} + +/* Object command allows to inspect the internals of an Redis Object. + * Usage: OBJECT */ +void objectCommand(client *c) { + robj *o; + + if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) { + if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) + == NULL) return; + addReplyLongLong(c,o->refcount); + } else if (!strcasecmp(c->argv[1]->ptr,"encoding") && c->argc == 3) { + if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) + == NULL) return; + addReplyBulkCString(c,strEncoding(o->encoding)); + } else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) { + if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) + == NULL) return; + if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { + addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust."); + return; + } + addReplyLongLong(c,estimateObjectIdleTime(o)/1000); + } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) { + if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) + == NULL) return; + if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) { + addReplyError(c,"An LRU maxmemory policy is selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust."); + return; + } + addReplyLongLong(c,o->lru&255); + } else { + addReplyError(c,"Syntax error. Try OBJECT (refcount|encoding|idletime|freq)"); + } +} + /* The memory command will eventually be a complete interface for the * memory introspection capabilities of Redis. * From 03c1271cf2bf67c350dd21561acf7a3a7a3b8483 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Sep 2016 10:26:23 +0200 Subject: [PATCH 0134/1722] Memory related subcommands of DEBUG moved to MEMORY. --- src/debug.c | 36 ------------------------------------ src/object.c | 43 ++++++++++++++++++++++++++++++++++++++++--- src/redis-cli.c | 5 +++-- 3 files changed, 43 insertions(+), 41 deletions(-) diff --git a/src/debug.c b/src/debug.c index 6f0b5e702..bceae8b3b 100644 --- a/src/debug.c +++ b/src/debug.c @@ -252,14 +252,6 @@ void computeDatasetDigest(unsigned char *final) { } } -#if defined(USE_JEMALLOC) -void inputCatSds(void *result, const char *str) { - /* result is actually a (sds *), so re-cast it here */ - sds *info = (sds *)result; - *info = sdscat(*info, str); -} -#endif - void debugCommand(client *c) { if (c->argc == 1) { addReplyError(c,"You must specify a subcommand for DEBUG. Try DEBUG HELP for info."); @@ -303,10 +295,6 @@ void debugCommand(client *c) { "structsize -- Return the size of different Redis core C structures."); blen++; addReplyStatus(c, "htstats -- Return hash table statistics of the specified Redis database."); - blen++; addReplyStatus(c, - "jemalloc info -- Show internal jemalloc statistics."); - blen++; addReplyStatus(c, - "jemalloc purge -- Force jemalloc to release unused memory."); setDeferredMultiBulkLength(c,blenp,blen); } else if (!strcasecmp(c->argv[1]->ptr,"segfault")) { *((char*)-1) = 'x'; @@ -520,30 +508,6 @@ void debugCommand(client *c) { stats = sdscat(stats,buf); addReplyBulkSds(c,stats); - } else if (!strcasecmp(c->argv[1]->ptr,"jemalloc") && c->argc == 3) { -#if defined(USE_JEMALLOC) - if (!strcasecmp(c->argv[2]->ptr, "info")) { - sds info = sdsempty(); - je_malloc_stats_print(inputCatSds, &info, NULL); - addReplyBulkSds(c, info); - } else if (!strcasecmp(c->argv[2]->ptr, "purge")) { - char tmp[32]; - unsigned narenas = 0; - size_t sz = sizeof(unsigned); - if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) { - sprintf(tmp, "arena.%d.purge", narenas); - if (!je_mallctl(tmp, NULL, 0, NULL, 0)) { - addReply(c, shared.ok); - return; - } - } - addReplyError(c, "Error purging dirty pages"); - } else { - addReplyErrorFormat(c, "Valid jemalloc debug fields: info, purge"); - } -#else - addReplyErrorFormat(c, "jemalloc support not available"); -#endif } else { addReplyErrorFormat(c, "Unknown DEBUG subcommand or wrong number of arguments for '%s'", (char*)c->argv[1]->ptr); diff --git a/src/object.c b/src/object.c index 401e88126..bb1a3eb55 100644 --- a/src/object.c +++ b/src/object.c @@ -893,6 +893,14 @@ struct redisMemOverhead *getMemoryOverheadData(void) { return mh; } +/* Helper for "MEMORY allocator-stats", used as a callback for the jemalloc + * stats output. */ +void inputCatSds(void *result, const char *str) { + /* result is actually a (sds *), so re-cast it here */ + sds *info = (sds *)result; + *info = sdscat(*info, str); +} + /* ======================= The OBJECT and MEMORY commands =================== */ /* This is a helper function for the OBJECT command. We need to lookup keys @@ -1023,12 +1031,41 @@ void memoryCommand(client *c) { addReplyDouble(c,mh->dataset_perc); freeMemoryOverheadData(mh); + } else if (!strcasecmp(c->argv[1]->ptr,"allocator-stats") && c->argc == 2) { +#if defined(USE_JEMALLOC) + sds info = sdsempty(); + je_malloc_stats_print(inputCatSds, &info, NULL); + addReplyBulkSds(c, info); +#else + addReplyBulkCString(c,"Stats not supported for the current allocator"); +#endif + } else if (!strcasecmp(c->argv[1]->ptr,"purge") && c->argc == 2) { +#if defined(USE_JEMALLOC) + char tmp[32]; + unsigned narenas = 0; + size_t sz = sizeof(unsigned); + if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) { + sprintf(tmp, "arena.%d.purge", narenas); + if (!je_mallctl(tmp, NULL, 0, NULL, 0)) { + addReply(c, shared.ok); + return; + } + } + addReplyError(c, "Error purging dirty pages"); +#else + addReply(c, shared.ok); + /* Nothing to do for other allocators. */ +#endif } else if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) { - addReplyMultiBulkLen(c,2); + addReplyMultiBulkLen(c,4); addReplyBulkCString(c, - "MEMORY USAGE [SAMPLES ] - Estimate memory usage of key"); +"MEMORY USAGE [SAMPLES ] - Estimate memory usage of key"); addReplyBulkCString(c, - "MEMORY OVERHEAD - Show memory usage details"); +"MEMORY OVERHEAD - Show memory usage details"); + addReplyBulkCString(c, +"MEMORY PURGE - Ask the allocator to release memory"); + addReplyBulkCString(c, +"MEMORY ALLOCATOR-STATS - Show allocator internal stats"); } else { addReplyError(c,"Syntax error. Try MEMORY HELP"); } diff --git a/src/redis-cli.c b/src/redis-cli.c index d1735d638..2036e7f46 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -843,8 +843,9 @@ static int cliSendCommand(int argc, char **argv, int repeat) { output_raw = 0; if (!strcasecmp(command,"info") || (argc >= 2 && !strcasecmp(command,"debug") && - ((!strcasecmp(argv[1],"jemalloc") && !strcasecmp(argv[2],"info")) || - !strcasecmp(argv[1],"htstats"))) || + !strcasecmp(argv[1],"htstats")) || + (argc >= 2 && !strcasecmp(command,"memory") && + !strcasecmp(argv[1],"allocator-stats")) || (argc == 2 && !strcasecmp(command,"cluster") && (!strcasecmp(argv[1],"nodes") || !strcasecmp(argv[1],"info"))) || From 00c0c40ba5e7ef12b674232a662a9e73acfcf379 Mon Sep 17 00:00:00 2001 From: oranagra Date: Wed, 18 May 2016 08:08:43 +0300 Subject: [PATCH 0135/1722] add zmalloc used mem to DEBUG SDSLEN --- src/debug.c | 8 +++++--- src/server.h | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/debug.c b/src/debug.c index bceae8b3b..e00526577 100644 --- a/src/debug.c +++ b/src/debug.c @@ -409,12 +409,14 @@ void debugCommand(client *c) { addReplyError(c,"Not an sds encoded string."); } else { addReplyStatusFormat(c, - "key_sds_len:%lld, key_sds_avail:%lld, " - "val_sds_len:%lld, val_sds_avail:%lld", + "key_sds_len:%lld, key_sds_avail:%lld, key_zmalloc: %lld, " + "val_sds_len:%lld, val_sds_avail:%lld, val_zmalloc: %lld", (long long) sdslen(key), (long long) sdsavail(key), + (long long) sdsZmallocSize(key), (long long) sdslen(val->ptr), - (long long) sdsavail(val->ptr)); + (long long) sdsavail(val->ptr), + (long long) getStringObjectSdsUsedMemory(val)); } } else if (!strcasecmp(c->argv[1]->ptr,"populate") && (c->argc == 3 || c->argc == 4)) { diff --git a/src/server.h b/src/server.h index 3f2028f83..6d5edb5ae 100644 --- a/src/server.h +++ b/src/server.h @@ -1242,6 +1242,8 @@ void addReplyHumanLongDouble(client *c, long double d); void addReplyLongLong(client *c, long long ll); void addReplyMultiBulkLen(client *c, long length); void copyClientOutputBuffer(client *dst, client *src); +size_t sdsZmallocSize(sds s); +size_t getStringObjectSdsUsedMemory(robj *o); void *dupClientReplyValue(void *o); void getClientsMaxBuffers(unsigned long *longest_output_list, unsigned long *biggest_input_buffer); From 29a75e3d62146d2c6b297ffecceddf3a38d07453 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Sep 2016 10:43:19 +0200 Subject: [PATCH 0136/1722] Provide percentage of memory peak used info. --- src/object.c | 10 +++++++++- src/server.c | 2 ++ src/server.h | 2 ++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/object.c b/src/object.c index bb1a3eb55..110f7d1c6 100644 --- a/src/object.c +++ b/src/object.c @@ -810,6 +810,7 @@ struct redisMemOverhead *getMemoryOverheadData(void) { mh->total_allocated = zmalloc_used; mh->startup_allocated = server.initial_memory_usage; + mh->peak_allocated = server.stat_peak_memory; mem_total += server.initial_memory_usage; mem = 0; @@ -889,6 +890,7 @@ struct redisMemOverhead *getMemoryOverheadData(void) { if (zmalloc_used > mh->startup_allocated) net_usage = zmalloc_used - mh->startup_allocated; mh->dataset_perc = (float)mh->dataset*100/net_usage; + mh->peak_perc = (float)zmalloc_used*100/mh->peak_allocated; return mh; } @@ -988,7 +990,10 @@ void memoryCommand(client *c) { } else if (!strcasecmp(c->argv[1]->ptr,"overhead") && c->argc == 2) { struct redisMemOverhead *mh = getMemoryOverheadData(); - addReplyMultiBulkLen(c,(9+mh->num_dbs)*2); + addReplyMultiBulkLen(c,(11+mh->num_dbs)*2); + + addReplyBulkCString(c,"peak.allocated"); + addReplyLongLong(c,mh->peak_allocated); addReplyBulkCString(c,"total.allocated"); addReplyLongLong(c,mh->total_allocated); @@ -1030,6 +1035,9 @@ void memoryCommand(client *c) { addReplyBulkCString(c,"dataset.percentage"); addReplyDouble(c,mh->dataset_perc); + addReplyBulkCString(c,"peak.percentage"); + addReplyDouble(c,mh->peak_perc); + freeMemoryOverheadData(mh); } else if (!strcasecmp(c->argv[1]->ptr,"allocator-stats") && c->argc == 2) { #if defined(USE_JEMALLOC) diff --git a/src/server.c b/src/server.c index 556ea965b..2a37e431d 100644 --- a/src/server.c +++ b/src/server.c @@ -2837,6 +2837,7 @@ sds genRedisInfoString(char *section) { "used_memory_rss_human:%s\r\n" "used_memory_peak:%zu\r\n" "used_memory_peak_human:%s\r\n" + "used_memory_peak_perc:%.2f%%\r\n" "used_memory_overhead:%zu\r\n" "used_memory_startup:%zu\r\n" "used_memory_dataset:%zu\r\n" @@ -2857,6 +2858,7 @@ sds genRedisInfoString(char *section) { used_memory_rss_hmem, server.stat_peak_memory, peak_hmem, + mh->peak_perc, mh->overhead_total, mh->startup_allocated, mh->dataset, diff --git a/src/server.h b/src/server.h index 6d5edb5ae..f6d18cd3c 100644 --- a/src/server.h +++ b/src/server.h @@ -772,6 +772,7 @@ typedef struct redisOpArray { /* This structure is returned by the getMemoryOverheadData() function in * order to return memory overhead information. */ struct redisMemOverhead { + size_t peak_allocated; size_t total_allocated; size_t startup_allocated; size_t repl_backlog; @@ -781,6 +782,7 @@ struct redisMemOverhead { size_t overhead_total; size_t dataset; float dataset_perc; + float peak_perc; size_t num_dbs; struct { size_t dbid; From e31078c7b1a4937ba19be847e3c75a21b62ec0b6 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Sep 2016 16:36:53 +0200 Subject: [PATCH 0137/1722] MEMORY DOCTOR initial implementation. --- src/object.c | 104 +++++++++++++++++++++++++++++++++++++++++++++--- src/redis-cli.c | 3 +- src/server.c | 2 +- src/server.h | 3 ++ 4 files changed, 104 insertions(+), 8 deletions(-) diff --git a/src/object.c b/src/object.c index 110f7d1c6..976c46a4e 100644 --- a/src/object.c +++ b/src/object.c @@ -811,6 +811,8 @@ struct redisMemOverhead *getMemoryOverheadData(void) { mh->total_allocated = zmalloc_used; mh->startup_allocated = server.initial_memory_usage; mh->peak_allocated = server.stat_peak_memory; + mh->fragmentation = + zmalloc_get_fragmentation_ratio(server.resident_set_size); mem_total += server.initial_memory_usage; mem = 0; @@ -866,6 +868,7 @@ struct redisMemOverhead *getMemoryOverheadData(void) { long long keyscount = dictSize(db->dict); if (keyscount==0) continue; + mh->total_keys += keyscount; mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1)); mh->db[mh->num_dbs].dbid = j; @@ -885,12 +888,15 @@ struct redisMemOverhead *getMemoryOverheadData(void) { mh->overhead_total = mem_total; mh->dataset = zmalloc_used - mem_total; + mh->peak_perc = (float)zmalloc_used*100/mh->peak_allocated; + /* Metrics computed after subtracting the startup memory from + * the total memory. */ size_t net_usage = 1; if (zmalloc_used > mh->startup_allocated) net_usage = zmalloc_used - mh->startup_allocated; mh->dataset_perc = (float)mh->dataset*100/net_usage; - mh->peak_perc = (float)zmalloc_used*100/mh->peak_allocated; + mh->bytes_per_key = mh->total_keys ? (net_usage / mh->total_keys) : 0; return mh; } @@ -903,6 +909,80 @@ void inputCatSds(void *result, const char *str) { *info = sdscat(*info, str); } +/* This implements MEMORY DOCTOR. An human readable analysis of the Redis + * memory condition. */ +sds getMemoryDoctorReport(void) { + int empty = 0; /* Instance is empty or almost empty. */ + int big_peak = 0; /* Memory peak is much larger than used mem. */ + int high_frag = 0; /* High fragmentation. */ + int big_slave_buf = 0; /* Slave buffers are too big. */ + int big_client_buf = 0; /* Client buffers are too big. */ + int num_reports = 0; + struct redisMemOverhead *mh = getMemoryOverheadData(); + + if (mh->total_allocated < (1024*1024*5)) { + empty = 1; + num_reports++; + } else { + /* Peak is > 150% of current used memory? */ + if (((float)mh->peak_allocated / mh->total_allocated) > 1.5) { + big_peak = 1; + num_reports++; + } + + /* Fragmentation is higher than 1.4? */ + if (mh->fragmentation > 1.4) { + high_frag = 1; + num_reports++; + } + + /* Clients using more than 200k each average? */ + long numslaves = listLength(server.slaves); + long numclients = listLength(server.clients)-numslaves; + if (mh->clients_normal / numclients > (1024*200)) { + big_client_buf = 1; + num_reports++; + } + + /* Slaves using more than 10 MB each? */ + if (mh->clients_slaves / numslaves > (1024*1024*10)) { + big_slave_buf = 1; + num_reports++; + } + } + + sds s; + if (num_reports == 0) { + s = sdsnew( + "Hi Sam, I can't find any memory issue in your instnace. " + "I can only account for what occurs on this base."); + } else if (empty == 1) { + s = sdsnew( + "Hi Sam, this instance is empty or is using very little memory, " + "my issues detector can't be used in these conditions. " + "Please, leave for your mission on Earth and fill it with some data. " + "The new Sam and I will be back to our programming as soon as I " + "finished rebooting."); + } else { + s = sdsnew("Sam, I detected a few issues in this Redis instance memory implants:\n\n"); + if (big_peak) { + s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n"); + } + if (high_frag) { + s = sdscatprintf(s," * High fragmentation: This instance has a memory fragmentation greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is: %s\n\n", ZMALLOC_LIB); + } + if (big_slave_buf) { + s = sdscat(s," * Big slave buffers: The slave output buffers in this instance are greater than 10MB for each slave (on average). This likely means that there is some slave instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what slave is not receiving data correctly and why. You can use the INFO output in order to check the slaves delays and the CLIENT LIST command to check the output buffers of each slave.\n\n"); + } + if (big_client_buf) { + s = sdscat(s," * Big client buffers: The clients output buffers in this instance are greater than 200K per client (on average). This may result from different causes, like Pub/Sub clients subscribed to channels bot not receiving data fast enough, so that data piles on the Redis instance output buffer, or clients sending commands with large replies or very large sequences of commands in the same pipeline. Please use the CLIENT LIST command in order to investigate the issue if it causes problems in your instance, or to understand better why certain clients are using a big amount of memory.\n\n"); + } + s = sdscat(s,"I'm here to keep you safe, Sam. I want to help you.\n"); + } + freeMemoryOverheadData(mh); + return s; +} + /* ======================= The OBJECT and MEMORY commands =================== */ /* This is a helper function for the OBJECT command. We need to lookup keys @@ -987,10 +1067,10 @@ void memoryCommand(client *c) { usage += sdsAllocSize(c->argv[1]->ptr); usage += sizeof(dictEntry); addReplyLongLong(c,usage); - } else if (!strcasecmp(c->argv[1]->ptr,"overhead") && c->argc == 2) { + } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) { struct redisMemOverhead *mh = getMemoryOverheadData(); - addReplyMultiBulkLen(c,(11+mh->num_dbs)*2); + addReplyMultiBulkLen(c,(14+mh->num_dbs)*2); addReplyBulkCString(c,"peak.allocated"); addReplyLongLong(c,mh->peak_allocated); @@ -1029,6 +1109,12 @@ void memoryCommand(client *c) { addReplyBulkCString(c,"overhead.total"); addReplyLongLong(c,mh->overhead_total); + addReplyBulkCString(c,"keys.count"); + addReplyLongLong(c,mh->total_keys); + + addReplyBulkCString(c,"keys.bytes-per-key"); + addReplyLongLong(c,mh->bytes_per_key); + addReplyBulkCString(c,"dataset.bytes"); addReplyLongLong(c,mh->dataset); @@ -1038,8 +1124,11 @@ void memoryCommand(client *c) { addReplyBulkCString(c,"peak.percentage"); addReplyDouble(c,mh->peak_perc); + addReplyBulkCString(c,"fragmentation"); + addReplyDouble(c,mh->fragmentation); + freeMemoryOverheadData(mh); - } else if (!strcasecmp(c->argv[1]->ptr,"allocator-stats") && c->argc == 2) { + } else if (!strcasecmp(c->argv[1]->ptr,"malloc-stats") && c->argc == 2) { #if defined(USE_JEMALLOC) sds info = sdsempty(); je_malloc_stats_print(inputCatSds, &info, NULL); @@ -1047,6 +1136,9 @@ void memoryCommand(client *c) { #else addReplyBulkCString(c,"Stats not supported for the current allocator"); #endif + } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) { + sds report = getMemoryDoctorReport(); + addReplyBulkSds(c,report); } else if (!strcasecmp(c->argv[1]->ptr,"purge") && c->argc == 2) { #if defined(USE_JEMALLOC) char tmp[32]; @@ -1069,11 +1161,11 @@ void memoryCommand(client *c) { addReplyBulkCString(c, "MEMORY USAGE [SAMPLES ] - Estimate memory usage of key"); addReplyBulkCString(c, -"MEMORY OVERHEAD - Show memory usage details"); +"MEMORY STATS - Show memory usage details"); addReplyBulkCString(c, "MEMORY PURGE - Ask the allocator to release memory"); addReplyBulkCString(c, -"MEMORY ALLOCATOR-STATS - Show allocator internal stats"); +"MEMORY MALLOC-STATS - Show allocator internal stats"); } else { addReplyError(c,"Syntax error. Try MEMORY HELP"); } diff --git a/src/redis-cli.c b/src/redis-cli.c index 2036e7f46..4bbf41a6f 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -845,7 +845,8 @@ static int cliSendCommand(int argc, char **argv, int repeat) { (argc >= 2 && !strcasecmp(command,"debug") && !strcasecmp(argv[1],"htstats")) || (argc >= 2 && !strcasecmp(command,"memory") && - !strcasecmp(argv[1],"allocator-stats")) || + (!strcasecmp(argv[1],"allocator-stats") || + !strcasecmp(argv[1],"doctor"))) || (argc == 2 && !strcasecmp(command,"cluster") && (!strcasecmp(argv[1],"nodes") || !strcasecmp(argv[1],"info"))) || diff --git a/src/server.c b/src/server.c index 2a37e431d..36be973e1 100644 --- a/src/server.c +++ b/src/server.c @@ -2870,7 +2870,7 @@ sds genRedisInfoString(char *section) { server.maxmemory, maxmemory_hmem, evict_policy, - zmalloc_get_fragmentation_ratio(server.resident_set_size), + mh->fragmentation, ZMALLOC_LIB, lazyfreeGetPendingObjectsCount() ); diff --git a/src/server.h b/src/server.h index f6d18cd3c..0af439cc5 100644 --- a/src/server.h +++ b/src/server.h @@ -781,8 +781,11 @@ struct redisMemOverhead { size_t aof_buffer; size_t overhead_total; size_t dataset; + size_t total_keys; + size_t bytes_per_key; float dataset_perc; float peak_perc; + float fragmentation; size_t num_dbs; struct { size_t dbid; From 18a863b911997df9e4bedbec210d83aef58b9182 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Sep 2016 16:40:25 +0200 Subject: [PATCH 0138/1722] Surround allocator name with quotes in MEMORY DOCTOR output. --- src/object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/object.c b/src/object.c index 976c46a4e..f224329ec 100644 --- a/src/object.c +++ b/src/object.c @@ -969,7 +969,7 @@ sds getMemoryDoctorReport(void) { s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n"); } if (high_frag) { - s = sdscatprintf(s," * High fragmentation: This instance has a memory fragmentation greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is: %s\n\n", ZMALLOC_LIB); + s = sdscatprintf(s," * High fragmentation: This instance has a memory fragmentation greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB); } if (big_slave_buf) { s = sdscat(s," * Big slave buffers: The slave output buffers in this instance are greater than 10MB for each slave (on average). This likely means that there is some slave instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what slave is not receiving data correctly and why. You can use the INFO output in order to check the slaves delays and the CLIENT LIST command to check the output buffers of each slave.\n\n"); From 0bbc67861611f7bdad2388e2b511fbc0d2e0201f Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Sep 2016 16:52:00 +0200 Subject: [PATCH 0139/1722] Typo fixed from MEMORY DOCTOR output. --- src/object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/object.c b/src/object.c index f224329ec..75839926e 100644 --- a/src/object.c +++ b/src/object.c @@ -954,7 +954,7 @@ sds getMemoryDoctorReport(void) { sds s; if (num_reports == 0) { s = sdsnew( - "Hi Sam, I can't find any memory issue in your instnace. " + "Hi Sam, I can't find any memory issue in your instance. " "I can only account for what occurs on this base."); } else if (empty == 1) { s = sdsnew( From f8eb8712441cdc232fcefbab76e3e5cd8bc7ea89 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Sep 2016 09:47:35 +0200 Subject: [PATCH 0140/1722] redis-cli: "allocator-stats" -> "malloc-stats". It was changed in Redis but not in redis-cli. Thanks to @oranagra for signaling. --- src/redis-cli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4bbf41a6f..054c2fc7c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -845,7 +845,7 @@ static int cliSendCommand(int argc, char **argv, int repeat) { (argc >= 2 && !strcasecmp(command,"debug") && !strcasecmp(argv[1],"htstats")) || (argc >= 2 && !strcasecmp(command,"memory") && - (!strcasecmp(argv[1],"allocator-stats") || + (!strcasecmp(argv[1],"malloc-stats") || !strcasecmp(argv[1],"doctor"))) || (argc == 2 && !strcasecmp(command,"cluster") && (!strcasecmp(argv[1],"nodes") || From 1525b7380f12b2314626886d34b1c0fa12aa6833 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Sep 2016 10:28:05 +0200 Subject: [PATCH 0141/1722] zmalloc: zmalloc_get_smap_bytes_by_field() modified to work for any PID. The goal is to get copy-on-write amount of the child from the parent. --- src/aof.c | 2 +- src/latency.c | 2 +- src/rdb.c | 4 ++-- src/zmalloc.c | 24 ++++++++++++++++++------ src/zmalloc.h | 4 ++-- 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/aof.c b/src/aof.c index 5523066b5..fc261db21 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1328,7 +1328,7 @@ int rewriteAppendOnlyFileBackground(void) { redisSetProcTitle("redis-aof-rewrite"); snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); if (rewriteAppendOnlyFile(tmpfile) == C_OK) { - size_t private_dirty = zmalloc_get_private_dirty(); + size_t private_dirty = zmalloc_get_private_dirty(-1); if (private_dirty) { serverLog(LL_NOTICE, diff --git a/src/latency.c b/src/latency.c index 6f8b2a59f..53e0ec7be 100644 --- a/src/latency.c +++ b/src/latency.c @@ -79,7 +79,7 @@ int THPIsEnabled(void) { * value of the function is non-zero, the process is being targeted by * THP support, and is likely to have memory usage / latency issues. */ int THPGetAnonHugePagesSize(void) { - return zmalloc_get_smap_bytes_by_field("AnonHugePages:"); + return zmalloc_get_smap_bytes_by_field("AnonHugePages:",-1); } /* ---------------------------- Latency API --------------------------------- */ diff --git a/src/rdb.c b/src/rdb.c index 0cda23c5d..fd80fb3c9 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1024,7 +1024,7 @@ int rdbSaveBackground(char *filename) { redisSetProcTitle("redis-rdb-bgsave"); retval = rdbSave(filename); if (retval == C_OK) { - size_t private_dirty = zmalloc_get_private_dirty(); + size_t private_dirty = zmalloc_get_private_dirty(-1); if (private_dirty) { serverLog(LL_NOTICE, @@ -1761,7 +1761,7 @@ int rdbSaveToSlavesSockets(void) { retval = C_ERR; if (retval == C_OK) { - size_t private_dirty = zmalloc_get_private_dirty(); + size_t private_dirty = zmalloc_get_private_dirty(-1); if (private_dirty) { serverLog(LL_NOTICE, diff --git a/src/zmalloc.c b/src/zmalloc.c index ab4af99e2..367258746 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -304,15 +304,26 @@ float zmalloc_get_fragmentation_ratio(size_t rss) { * /proc/self/smaps. The field must be specified with trailing ":" as it * apperas in the smaps output. * - * Example: zmalloc_get_smap_bytes_by_field("Rss:"); + * If a pid is specified, the information is extracted for such a pid, + * otherwise if pid is -1 the information is reported is about the + * current process. + * + * Example: zmalloc_get_smap_bytes_by_field("Rss:",-1); */ #if defined(HAVE_PROC_SMAPS) -size_t zmalloc_get_smap_bytes_by_field(char *field) { +size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) { char line[1024]; size_t bytes = 0; - FILE *fp = fopen("/proc/self/smaps","r"); int flen = strlen(field); + if (pid == -1) { + FILE *fp = fopen("/proc/self/smaps","r"); + } else { + char filename[128]; + snprintf(filename,sizeof(filename),"/proc/%ld/smaps",pid); + FILE *fp = fopen(filename,"r"); + } + if (!fp) return 0; while(fgets(line,sizeof(line),fp) != NULL) { if (strncmp(line,field,flen) == 0) { @@ -327,14 +338,15 @@ size_t zmalloc_get_smap_bytes_by_field(char *field) { return bytes; } #else -size_t zmalloc_get_smap_bytes_by_field(char *field) { +size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) { ((void) field); + ((void) pid); return 0; } #endif -size_t zmalloc_get_private_dirty(void) { - return zmalloc_get_smap_bytes_by_field("Private_Dirty:"); +size_t zmalloc_get_private_dirty(long pid) { + return zmalloc_get_smap_bytes_by_field("Private_Dirty:",pid); } /* Returns the size of physical memory (RAM) in bytes. diff --git a/src/zmalloc.h b/src/zmalloc.h index a47ea6ccf..9badf8f4c 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -75,8 +75,8 @@ void zmalloc_enable_thread_safeness(void); void zmalloc_set_oom_handler(void (*oom_handler)(size_t)); float zmalloc_get_fragmentation_ratio(size_t rss); size_t zmalloc_get_rss(void); -size_t zmalloc_get_private_dirty(void); -size_t zmalloc_get_smap_bytes_by_field(char *field); +size_t zmalloc_get_private_dirty(long pid); +size_t zmalloc_get_smap_bytes_by_field(char *field, long pid); size_t zmalloc_get_memory_size(void); void zlibc_free(void *ptr); From 477d5e0c7078688a3a38d742c6200e93d57ac3f8 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Sep 2016 10:34:39 +0200 Subject: [PATCH 0142/1722] zmalloc: Make fp var non local to fix build. --- src/zmalloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/zmalloc.c b/src/zmalloc.c index 367258746..22bf84fce 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -315,13 +315,14 @@ size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) { char line[1024]; size_t bytes = 0; int flen = strlen(field); + FILE *fp; if (pid == -1) { - FILE *fp = fopen("/proc/self/smaps","r"); + fp = fopen("/proc/self/smaps","r"); } else { char filename[128]; snprintf(filename,sizeof(filename),"/proc/%ld/smaps",pid); - FILE *fp = fopen(filename,"r"); + fp = fopen(filename,"r"); } if (!fp) return 0; From 5ac4559da33b389ec2edfe0ec0e7e24857683aa5 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Sep 2016 13:45:20 +0200 Subject: [PATCH 0143/1722] Child -> Parent pipe for COW info transferring. --- src/Makefile | 2 +- src/aof.c | 5 +++ src/childinfo.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ src/rdb.c | 10 ++++++ src/server.c | 16 ++++++++-- src/server.h | 19 +++++++++++ 6 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 src/childinfo.c diff --git a/src/Makefile b/src/Makefile index 6bd8d8d66..2bf3c9347 100644 --- a/src/Makefile +++ b/src/Makefile @@ -128,7 +128,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o REDIS_BENCHMARK_NAME=redis-benchmark diff --git a/src/aof.c b/src/aof.c index fc261db21..e52b7e214 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1319,6 +1319,7 @@ int rewriteAppendOnlyFileBackground(void) { if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR; if (aofCreatePipes() != C_OK) return C_ERR; + openChildInfoPipe(); start = ustime(); if ((childpid = fork()) == 0) { char tmpfile[256]; @@ -1335,6 +1336,9 @@ int rewriteAppendOnlyFileBackground(void) { "AOF rewrite: %zu MB of memory used by copy-on-write", private_dirty/(1024*1024)); } + + server.child_info_data.cow_size = private_dirty; + sendChildInfo(CHILD_INFO_TYPE_AOF); exitFromChild(0); } else { exitFromChild(1); @@ -1345,6 +1349,7 @@ int rewriteAppendOnlyFileBackground(void) { server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */ latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000); if (childpid == -1) { + closeChildInfoPipe(); serverLog(LL_WARNING, "Can't rewrite append only file in background: fork: %s", strerror(errno)); diff --git a/src/childinfo.c b/src/childinfo.c new file mode 100644 index 000000000..123c20421 --- /dev/null +++ b/src/childinfo.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include + +/* Open a child-parent channel used in order to move information about the + * RDB / AOF saving process from the child to the parent (for instance + * the amount of copy on write memory used) */ +void openChildInfoPipe(void) { + if (pipe(server.child_info_pipe) == -1) { + /* On error our two file descriptors should be still set to -1, + * but we call anyway cloesChildInfoPipe() since can't hurt. */ + closeChildInfoPipe(); + } else if (anetNonBlock(NULL,server.child_info_pipe[0]) != ANET_OK) { + closeChildInfoPipe(); + } +} + +/* Close the pipes opened with openChildInfoPipe(). */ +void closeChildInfoPipe(void) { + if (server.child_info_pipe[0] != -1 || + server.child_info_pipe[1] != -1) + { + close(server.child_info_pipe[0]); + close(server.child_info_pipe[1]); + server.child_info_pipe[0] = -1; + server.child_info_pipe[1] = -1; + } +} + +/* Send COW data to parent. The child should call this function after populating + * the corresponding fields it want to sent (according to the process type). */ +void sendChildInfo(int ptype) { + if (server.child_info_pipe[1] == -1) return; + server.child_info_data.magic = CHILD_INFO_MAGIC; + server.child_info_data.process_type = ptype; + ssize_t wlen = sizeof(server.child_info_data); + if (write(server.child_info_pipe[1],&server.child_info_data,wlen) != wlen) { + /* Nothing to do on error, this will be detected by the other side. */ + } +} + +/* Receive COW data from parent. */ +void receiveChildInfo(void) { + if (server.child_info_pipe[0] == -1) return; + ssize_t wlen = sizeof(server.child_info_data); + if (read(server.child_info_pipe[0],&server.child_info_data,wlen) == wlen && + server.child_info_data.magic == CHILD_INFO_MAGIC) + { + if (server.child_info_data.process_type == CHILD_INFO_TYPE_RDB) { + server.stat_rdb_cow_bytes = server.child_info_data.cow_size; + } else if (server.child_info_data.process_type == CHILD_INFO_TYPE_AOF) { + server.stat_aof_cow_bytes = server.child_info_data.cow_size; + } + } +} diff --git a/src/rdb.c b/src/rdb.c index fd80fb3c9..5e03dbab5 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1014,6 +1014,7 @@ int rdbSaveBackground(char *filename) { server.dirty_before_bgsave = server.dirty; server.lastbgsave_try = time(NULL); + openChildInfoPipe(); start = ustime(); if ((childpid = fork()) == 0) { @@ -1031,6 +1032,9 @@ int rdbSaveBackground(char *filename) { "RDB: %zu MB of memory used by copy-on-write", private_dirty/(1024*1024)); } + + server.child_info_data.cow_size = private_dirty; + sendChildInfo(CHILD_INFO_TYPE_RDB); } exitFromChild((retval == C_OK) ? 0 : 1); } else { @@ -1039,6 +1043,7 @@ int rdbSaveBackground(char *filename) { server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */ latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000); if (childpid == -1) { + closeChildInfoPipe(); server.lastbgsave_status = C_ERR; serverLog(LL_WARNING,"Can't save in background: fork: %s", strerror(errno)); @@ -1744,6 +1749,7 @@ int rdbSaveToSlavesSockets(void) { } /* Create the child process. */ + openChildInfoPipe(); start = ustime(); if ((childpid = fork()) == 0) { /* Child */ @@ -1769,6 +1775,9 @@ int rdbSaveToSlavesSockets(void) { private_dirty/(1024*1024)); } + server.child_info_data.cow_size = private_dirty; + sendChildInfo(CHILD_INFO_TYPE_RDB); + /* If we are returning OK, at least one slave was served * with the RDB file as expected, so we need to send a report * to the parent via the pipe. The format of the message is: @@ -1837,6 +1846,7 @@ int rdbSaveToSlavesSockets(void) { } close(pipefds[0]); close(pipefds[1]); + closeChildInfoPipe(); } else { serverLog(LL_NOTICE,"Background RDB transfer started by pid %d", childpid); diff --git a/src/server.c b/src/server.c index 36be973e1..77087b56d 100644 --- a/src/server.c +++ b/src/server.c @@ -1046,8 +1046,10 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { (int) server.aof_child_pid); } else if (pid == server.rdb_child_pid) { backgroundSaveDoneHandler(exitcode,bysignal); + if (!bysignal && exitcode == 0) receiveChildInfo(); } else if (pid == server.aof_child_pid) { backgroundRewriteDoneHandler(exitcode,bysignal); + if (!bysignal && exitcode == 0) receiveChildInfo(); } else { if (!ldbRemoveChild(pid)) { serverLog(LL_WARNING, @@ -1056,6 +1058,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { } } updateDictResizePolicy(); + closeChildInfoPipe(); } } else { /* If there is not a background saving/rewrite in progress check if @@ -1794,6 +1797,9 @@ void initServer(void) { server.aof_child_pid = -1; server.rdb_child_type = RDB_CHILD_TYPE_NONE; server.rdb_bgsave_scheduled = 0; + server.child_info_pipe[0] = -1; + server.child_info_pipe[1] = -1; + server.child_info_data.magic = 0; aofRewriteBufferReset(); server.aof_buf = sdsempty(); server.lastsave = time(NULL); /* At startup we consider the DB saved. */ @@ -1805,6 +1811,8 @@ void initServer(void) { /* A few stats we don't want to reset: server startup time, and peak mem. */ server.stat_starttime = time(NULL); server.stat_peak_memory = 0; + server.stat_rdb_cow_bytes = 0; + server.stat_aof_cow_bytes = 0; server.resident_set_size = 0; server.lastbgsave_status = C_OK; server.aof_last_write_status = C_OK; @@ -2889,13 +2897,15 @@ sds genRedisInfoString(char *section) { "rdb_last_bgsave_status:%s\r\n" "rdb_last_bgsave_time_sec:%jd\r\n" "rdb_current_bgsave_time_sec:%jd\r\n" + "rdb_last_cow_size:%zu\r\n" "aof_enabled:%d\r\n" "aof_rewrite_in_progress:%d\r\n" "aof_rewrite_scheduled:%d\r\n" "aof_last_rewrite_time_sec:%jd\r\n" "aof_current_rewrite_time_sec:%jd\r\n" "aof_last_bgrewrite_status:%s\r\n" - "aof_last_write_status:%s\r\n", + "aof_last_write_status:%s\r\n" + "aof_last_cow_size:%zu\r\n", server.loading, server.dirty, server.rdb_child_pid != -1, @@ -2904,6 +2914,7 @@ sds genRedisInfoString(char *section) { (intmax_t)server.rdb_save_time_last, (intmax_t)((server.rdb_child_pid == -1) ? -1 : time(NULL)-server.rdb_save_time_start), + server.stat_rdb_cow_bytes, server.aof_state != AOF_OFF, server.aof_child_pid != -1, server.aof_rewrite_scheduled, @@ -2911,7 +2922,8 @@ sds genRedisInfoString(char *section) { (intmax_t)((server.aof_child_pid == -1) ? -1 : time(NULL)-server.aof_rewrite_time_start), (server.aof_lastbgrewrite_status == C_OK) ? "ok" : "err", - (server.aof_last_write_status == C_OK) ? "ok" : "err"); + (server.aof_last_write_status == C_OK) ? "ok" : "err", + server.stat_aof_cow_bytes); if (server.aof_state != AOF_OFF) { info = sdscatprintf(info, diff --git a/src/server.h b/src/server.h index 0af439cc5..8aad8f983 100644 --- a/src/server.h +++ b/src/server.h @@ -806,6 +806,10 @@ struct clusterState; #undef hz #endif +#define CHILD_INFO_MAGIC 0xC17DDA7A12345678LL +#define CHILD_INFO_TYPE_RDB 0 +#define CHILD_INFO_TYPE_AOF 1 + struct redisServer { /* General */ pid_t pid; /* Main process pid. */ @@ -884,6 +888,8 @@ struct redisServer { size_t resident_set_size; /* RSS sampled in serverCron(). */ long long stat_net_input_bytes; /* Bytes read from network. */ long long stat_net_output_bytes; /* Bytes written to network. */ + size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */ + size_t stat_aof_cow_bytes; /* Copy on write bytes during AOF rewrite. */ /* The following two are used to track instantaneous metrics, like * number of operations per second, network traffic. */ struct { @@ -958,6 +964,13 @@ struct redisServer { int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */ int rdb_pipe_write_result_to_parent; /* RDB pipes used to return the state */ int rdb_pipe_read_result_from_child; /* of each slave in diskless SYNC. */ + /* Pipe and data structures for child -> parent info sharing. */ + int child_info_pipe[2]; /* Pipe used to write the child_info_data. */ + struct { + int process_type; /* AOF or RDB child? */ + size_t cow_size; /* Copy on write size. */ + unsigned long long magic; /* Magic value to make sure data is valid. */ + } child_info_data; /* Propagation of commands in AOF / replication */ redisOpArray also_propagate; /* Additional command to propagate. */ /* Logging */ @@ -1411,6 +1424,12 @@ void aofRewriteBufferReset(void); unsigned long aofRewriteBufferSize(void); ssize_t aofReadDiffFromParent(void); +/* Child info */ +void openChildInfoPipe(void); +void closeChildInfoPipe(void); +void sendChildInfo(int process_type); +void receiveChildInfo(void); + /* Sorted sets data type */ /* Input flags. */ From de5aa838dd1003f6c71f7380eb9b3c79fda63a9e Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Sep 2016 14:11:17 +0200 Subject: [PATCH 0144/1722] Clear child data when opening the pipes. This is important both to reset the magic to 0, so that it will not match if the structure is not explicitly set, and to initialize other things we may add like counters and such. --- src/childinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/childinfo.c b/src/childinfo.c index 123c20421..719025e8c 100644 --- a/src/childinfo.c +++ b/src/childinfo.c @@ -40,6 +40,8 @@ void openChildInfoPipe(void) { closeChildInfoPipe(); } else if (anetNonBlock(NULL,server.child_info_pipe[0]) != ANET_OK) { closeChildInfoPipe(); + } else { + memset(&server.child_info_data,0,sizeof(server.child_info_data)); } } From cc88e14c87c0a9a55c973959b4ff794c2bc6c74b Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Sep 2016 17:22:30 +0200 Subject: [PATCH 0145/1722] dict.c: fix dictGenericDelete() return ASAP condition. Recently we moved the "return ASAP" condition for the Delete() function from checking .size to checking .used, which is smarter, however while testing the first table alone always works to ensure the dict is totally emtpy, when we test the .size field, testing .used requires testing both T0 and T1, since a rehashing could be in progress. --- src/dict.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dict.c b/src/dict.c index 04dfae6cc..b9b2390f1 100644 --- a/src/dict.c +++ b/src/dict.c @@ -415,7 +415,8 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { dictEntry *he, *prevHe; int table; - if (d->ht[0].used == 0) return NULL; + if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL; + if (dictIsRehashing(d)) _dictRehashStep(d); h = dictHashKey(d, key); From 57009a1ebe0de55473ee10cf6fa4892c17976b12 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Wed, 21 Sep 2016 12:30:38 +0300 Subject: [PATCH 0146/1722] added RM_CreateStringPrintf --- src/module.c | 25 +++++++++++++++++++++++-- src/modules/testmodule.c | 26 ++++++++++++++++++++++++++ src/object.c | 2 +- src/redismodule.h | 2 ++ 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/module.c b/src/module.c index 3c757a5cb..b94c98acf 100644 --- a/src/module.c +++ b/src/module.c @@ -681,13 +681,33 @@ void autoMemoryCollect(RedisModuleCtx *ctx) { * * The string is created by copying the `len` bytes starting * at `ptr`. No reference is retained to the passed buffer. */ -RedisModuleString *RM_CreateString(RedisModuleCtx *ctx, const char *ptr, size_t len) -{ +RedisModuleString *RM_CreateString(RedisModuleCtx *ctx, const char *ptr, size_t len) { RedisModuleString *o = createStringObject(ptr,len); autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o); return o; } + +/* Create a new module string object from a printf format and arguments. + * The returned string must be freed with RedisModule_FreeString(), unless automatic + * memory is enabled. + * + * The string is created using the sds formatter function sdscatvprintf() */ +RedisModuleString *RM_CreateStringPrintf(RedisModuleCtx *ctx, const char *fmt, ...) { + sds s = sdsempty(); + + va_list ap; + va_start(ap, fmt); + s = sdscatvprintf(s, fmt, ap); + va_end(ap); + + RedisModuleString *o = createObject(OBJ_STRING, s); + autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o); + + return o; +} + + /* Like RedisModule_CreatString(), but creates a string starting from a long long * integer instead of taking a buffer and its length. * @@ -3194,6 +3214,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(CreateString); REGISTER_API(CreateStringFromLongLong); REGISTER_API(CreateStringFromString); + REGISTER_API(CreateStringPrintf); REGISTER_API(FreeString); REGISTER_API(StringPtrLen); REGISTER_API(AutoMemory); diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index a1a42f43b..db809a8e6 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -93,6 +93,25 @@ int TestStringAppendAM(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) return REDISMODULE_OK; } +/* TEST.STRING.PRINTF -- Test string formatting. */ +int TestStringPrintf(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + RedisModule_AutoMemory(ctx); + if (argc < 3) { + return RedisModule_WrongArity(ctx); + } + RedisModuleString *s = RedisModule_CreateStringPrintf(ctx, + "Got %d args. argv[1]: %s, argv[2]: %s", + argc, + RedisModule_StringPtrLen(argv[1], NULL), + RedisModule_StringPtrLen(argv[2], NULL) + ); + + RedisModule_ReplyWithString(ctx,s); + + return REDISMODULE_OK; +} + + /* ----------------------------- Test framework ----------------------------- */ /* Return 1 if the reply matches the specified string, otherwise log errors @@ -163,6 +182,9 @@ int TestIt(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { T("test.string.append.am",""); if (!TestAssertStringReply(ctx,reply,"foobar",6)) goto fail; + T("test.string.printf", "cc", "foo", "bar"); + if (!TestAssertStringReply(ctx,reply,"Got 3 args. argv[1]: foo, argv[2]: bar",38)) goto fail; + RedisModule_ReplyWithSimpleString(ctx,"ALL TESTS PASSED"); return REDISMODULE_OK; @@ -188,6 +210,10 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) TestStringAppendAM,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,"test.string.printf", + TestStringPrintf,"write deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,"test.it", TestIt,"readonly",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; diff --git a/src/object.c b/src/object.c index 75839926e..210e980e7 100644 --- a/src/object.c +++ b/src/object.c @@ -75,7 +75,7 @@ robj *makeObjectShared(robj *o) { /* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain * string object where o->ptr points to a proper sds string. */ robj *createRawStringObject(const char *ptr, size_t len) { - return createObject(OBJ_STRING,sdsnewlen(ptr,len)); + return createObject(OBJ_STRING, sdsnewlen(ptr,len)); } /* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is diff --git a/src/redismodule.h b/src/redismodule.h index 0a35cf047..75b13ad52 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -125,6 +125,7 @@ RedisModuleCallReply *REDISMODULE_API_FUNC(RedisModule_CallReplyArrayElement)(Re RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateString)(RedisModuleCtx *ctx, const char *ptr, size_t len); RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromLongLong)(RedisModuleCtx *ctx, long long ll); RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromString)(RedisModuleCtx *ctx, const RedisModuleString *str); +RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringPrintf)(RedisModuleCtx *ctx, const char *fmt, ...); void REDISMODULE_API_FUNC(RedisModule_FreeString)(RedisModuleCtx *ctx, RedisModuleString *str); const char *REDISMODULE_API_FUNC(RedisModule_StringPtrLen)(const RedisModuleString *str, size_t *len); int REDISMODULE_API_FUNC(RedisModule_ReplyWithError)(RedisModuleCtx *ctx, const char *err); @@ -234,6 +235,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(CreateString); REDISMODULE_GET_API(CreateStringFromLongLong); REDISMODULE_GET_API(CreateStringFromString); + REDISMODULE_GET_API(CreateStringPrintf); REDISMODULE_GET_API(FreeString); REDISMODULE_GET_API(StringPtrLen); REDISMODULE_GET_API(AutoMemory); From 00a4156fdf2b12fa94d51b10247937d9469f4016 Mon Sep 17 00:00:00 2001 From: charsyam Date: Sat, 24 Sep 2016 15:53:57 +0900 Subject: [PATCH 0147/1722] Simple change just using slaves instead of server.slaves --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index fb96fac1b..67091dd0b 100644 --- a/src/replication.c +++ b/src/replication.c @@ -244,7 +244,7 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { } /* Write the command to every slave. */ - listRewind(server.slaves,&li); + listRewind(slaves,&li); while((ln = listNext(&li))) { client *slave = ln->value; From 915683cee943e74992e9aed4509b12b7d1f02e35 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 25 Sep 2016 22:48:41 +0200 Subject: [PATCH 0148/1722] Security: CONFIG SET client-output-buffer-limit overflow fixed. This commit fixes a vunlerability reported by Cory Duplantis of Cisco Talos, see TALOS-2016-0206 for reference. CONFIG SET client-output-buffer-limit accepts as client class "master" which is actually only used to implement CLIENT KILL. The "master" class has ID 3. What happens is that the global structure: server.client_obuf_limits[class] Is accessed with class = 3. However it is a 3 elements array, so writing the 4th element means to write up to 24 bytes of memory *after* the end of the array, since the structure is defined as: typedef struct clientBufferLimitsConfig { unsigned long long hard_limit_bytes; unsigned long long soft_limit_bytes; time_t soft_limit_seconds; } clientBufferLimitsConfig; EVALUATION OF IMPACT: Checking what's past the boundaries of the array in the global 'server' structure, we find AOF state fields: clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT]; /* AOF persistence */ int aof_state; /* AOF_(ON|OFF|WAIT_REWRITE) */ int aof_fsync; /* Kind of fsync() policy */ char *aof_filename; /* Name of the AOF file */ int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */ int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */ off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */ off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */ off_t aof_current_size; /* AOF current size. */ Writing to most of these fields should be harmless and only cause problems in Redis persistence that should not escalate to security problems. However unfortunately writing to "aof_filename" could be potentially a security issue depending on the access pattern. Searching for "aof.filename" accesses in the source code returns many different usages of the field, including using it as input for open(), logging to the Redis log file or syslog, and calling the rename() syscall. It looks possible that attacks could lead at least to informations disclosure of the state and data inside Redis. However note that the attacker must already have access to the server. But, worse than that, it looks possible that being able to change the AOF filename can be used to mount more powerful attacks: like overwriting random files with AOF data (easily a potential security issue as demostrated here: http://antirez.com/news/96), or even more subtle attacks where the AOF filename is changed to a path were a malicious AOF file is loaded in order to exploit other potential issues when the AOF parser is fed with untrusted input (no known issue known currently). The fix checks the places where the 'master' class is specifiedf in order to access configuration data structures, and return an error in this cases. WHO IS AT RISK? The "master" client class was introduced in Redis in Jul 28 2015. Every Redis instance released past this date is not vulnerable while all the releases after this date are. Notably: Redis 3.0.x is NOT vunlerable. Redis 3.2.x IS vulnerable. Redis unstable is vulnerable. In order for the instance to be at risk, at least one of the following conditions must be true: 1. The attacker can access Redis remotely and is able to send the CONFIG SET command (often banned in managed Redis instances). 2. The attacker is able to control the "redis.conf" file and can wait or trigger a server restart. The problem was fixed 26th September 2016 in all the releases affected. --- src/config.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/config.c b/src/config.c index 1d81180b7..8f3b81a19 100644 --- a/src/config.c +++ b/src/config.c @@ -616,8 +616,9 @@ void loadServerConfigFromString(char *config) { unsigned long long hard, soft; int soft_seconds; - if (class == -1) { - err = "Unrecognized client limit class"; + if (class == -1 || class == CLIENT_TYPE_MASTER) { + err = "Unrecognized client limit class: the user specified " + "an invalid one, or 'master' which has no buffer limits."; goto loaderr; } hard = memtoll(argv[2],NULL); @@ -906,7 +907,8 @@ void configSetCommand(client *c) { long val; if ((j % 4) == 0) { - if (getClientTypeByName(v[j]) == -1) { + int class = getClientTypeByName(v[j]); + if (class == -1 || class == CLIENT_TYPE_MASTER) { sdsfreesplitres(v,vlen); goto badfmt; } From 707413234f6cff5015509eede21bdd7104a4de60 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 27 Sep 2016 00:29:47 +0200 Subject: [PATCH 0149/1722] debug.c: include dlfcn.h regardless of BACKTRACE support. --- src/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/debug.c b/src/debug.c index e00526577..d48caedcc 100644 --- a/src/debug.c +++ b/src/debug.c @@ -33,6 +33,7 @@ #include #include +#include #ifdef HAVE_BACKTRACE #include @@ -40,7 +41,6 @@ #include #include "bio.h" #include -#include #endif /* HAVE_BACKTRACE */ #ifdef __CYGWIN__ From f73d3e71dcd56425aa169a13b7e3c02e2ea9ddc4 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 2 Oct 2016 11:01:36 +0200 Subject: [PATCH 0150/1722] Add compiler optimizations to example module makefile. --- src/modules/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/Makefile b/src/modules/Makefile index 3cd51023f..554642ffd 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -4,10 +4,10 @@ uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') # Compile flags for linux / osx ifeq ($(uname_S),Linux) - SHOBJ_CFLAGS ?= -fno-common -g -ggdb -std=c99 + SHOBJ_CFLAGS ?= -fno-common -g -ggdb -std=c99 -O2 SHOBJ_LDFLAGS ?= -shared else - SHOBJ_CFLAGS ?= -dynamic -fno-common -g -ggdb -std=c99 + SHOBJ_CFLAGS ?= -dynamic -fno-common -g -ggdb -std=c99 -O2 SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup endif From 77e9a88be3524cde45df741409315fd3c98cf6fa Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 2 Oct 2016 16:51:37 +0200 Subject: [PATCH 0151/1722] Modules: API to log from module I/O callbacks. --- src/module.c | 54 +++++++++++++++++++++++++++++++++-------------- src/redismodule.h | 2 ++ 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/src/module.c b/src/module.c index b94c98acf..ab63f8b04 100644 --- a/src/module.c +++ b/src/module.c @@ -2908,7 +2908,30 @@ void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) { * Logging * -------------------------------------------------------------------------- */ -/* Produces a log message to the standard Redis log, the format accepts +/* This is the low level function implementing both: + * + * RM_Log() + * RM_LogIOError() + * + */ +void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap) { + char msg[LOG_MAX_LEN]; + size_t name_len; + int level; + + if (!strcasecmp(levelstr,"debug")) level = LL_DEBUG; + else if (!strcasecmp(levelstr,"verbose")) level = LL_VERBOSE; + else if (!strcasecmp(levelstr,"notice")) level = LL_NOTICE; + else if (!strcasecmp(levelstr,"warning")) level = LL_WARNING; + else level = LL_VERBOSE; /* Default. */ + + name_len = snprintf(msg, sizeof(msg),"<%s> ", module->name); + vsnprintf(msg + name_len, sizeof(msg) - name_len, fmt, ap); + serverLogRaw(level,msg); +} + +/* + * Produces a log message to the standard Redis log, the format accepts * printf-alike specifiers, while level is a string describing the log * level to use when emitting the log, and must be one of the following: * @@ -2923,26 +2946,24 @@ void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) { * a few lines of text. */ void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) { - va_list ap; - char msg[LOG_MAX_LEN]; - size_t name_len; - int level; - if (!ctx->module) return; /* Can only log if module is initialized */ - if (!strcasecmp(levelstr,"debug")) level = LL_DEBUG; - else if (!strcasecmp(levelstr,"verbose")) level = LL_VERBOSE; - else if (!strcasecmp(levelstr,"notice")) level = LL_NOTICE; - else if (!strcasecmp(levelstr,"warning")) level = LL_WARNING; - else level = LL_VERBOSE; /* Default. */ - - name_len = snprintf(msg, sizeof(msg),"<%s> ", ctx->module->name); - + va_list ap; va_start(ap, fmt); - vsnprintf(msg + name_len, sizeof(msg) - name_len, fmt, ap); + RM_LogRaw(ctx->module,levelstr,fmt,ap); va_end(ap); +} - serverLogRaw(level,msg); +/* Log errors from RDB / AOF serialization callbacks. + * + * This function should be used when a callback is returning a critical + * error to the caller since cannot load or save the data for some + * critical reason. */ +void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + RM_LogRaw(io->type->module,levelstr,fmt,ap); + va_end(ap); } /* -------------------------------------------------------------------------- @@ -3261,6 +3282,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(LoadDouble); REGISTER_API(EmitAOF); REGISTER_API(Log); + REGISTER_API(LogIOError); REGISTER_API(StringAppendBuffer); REGISTER_API(RetainString); REGISTER_API(StringCompare); diff --git a/src/redismodule.h b/src/redismodule.h index 75b13ad52..dcc0aa1ea 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -185,6 +185,7 @@ char *REDISMODULE_API_FUNC(RedisModule_LoadStringBuffer)(RedisModuleIO *io, size void REDISMODULE_API_FUNC(RedisModule_SaveDouble)(RedisModuleIO *io, double value); double REDISMODULE_API_FUNC(RedisModule_LoadDouble)(RedisModuleIO *io); void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...); +void REDISMODULE_API_FUNC(RedisModule_LogIOError)(RedisModuleIO *io, const char *levelstr, const char *fmt, ...); int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len); void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b); @@ -282,6 +283,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(LoadDouble); REDISMODULE_GET_API(EmitAOF); REDISMODULE_GET_API(Log); + REDISMODULE_GET_API(LogIOError); REDISMODULE_GET_API(StringAppendBuffer); REDISMODULE_GET_API(RetainString); REDISMODULE_GET_API(StringCompare); From 80df924169b6918f40cfb7d25f790a115e34ccf6 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 3 Oct 2016 00:08:35 +0200 Subject: [PATCH 0152/1722] Modules: API to save/load single precision floating point numbers. When double precision is not needed, to take 2x space in the serialization is not good. --- src/module.c | 27 +++++++++++++++++++++++++++ src/rdb.c | 17 +++++++++++++++-- src/rdb.h | 2 ++ src/redismodule.h | 4 ++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/module.c b/src/module.c index ab63f8b04..e7594c166 100644 --- a/src/module.c +++ b/src/module.c @@ -2849,6 +2849,31 @@ double RM_LoadDouble(RedisModuleIO *io) { return value; } +/* In the context of the rdb_save method of a module data type, saves a float + * value to the RDB file. The float can be a valid number, a NaN or infinity. + * It is possible to load back the value with RedisModule_LoadFloat(). */ +void RM_SaveFloat(RedisModuleIO *io, float value) { + if (io->error) return; + int retval = rdbSaveBinaryFloatValue(io->rio, value); + if (retval == -1) { + io->error = 1; + } else { + io->bytes += retval; + } +} + +/* In the context of the rdb_save method of a module data type, loads back the + * float value saved by RedisModule_SaveFloat(). */ +float RM_LoadFloat(RedisModuleIO *io) { + float value; + int retval = rdbLoadBinaryFloatValue(io->rio, &value); + if (retval == -1) { + moduleRDBLoadError(io); + return 0; /* Never reached. */ + } + return value; +} + /* -------------------------------------------------------------------------- * AOF API for modules data types * -------------------------------------------------------------------------- */ @@ -3280,6 +3305,8 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(LoadStringBuffer); REGISTER_API(SaveDouble); REGISTER_API(LoadDouble); + REGISTER_API(SaveFloat); + REGISTER_API(LoadFloat); REGISTER_API(EmitAOF); REGISTER_API(Log); REGISTER_API(LogIOError); diff --git a/src/rdb.c b/src/rdb.c index 5e03dbab5..252044344 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -567,17 +567,30 @@ int rdbLoadDoubleValue(rio *rdb, double *val) { * Return -1 on error, the size of the serialized value on success. */ int rdbSaveBinaryDoubleValue(rio *rdb, double val) { memrev64ifbe(&val); - return rdbWriteRaw(rdb,&val,8); + return rdbWriteRaw(rdb,&val,sizeof(val)); } /* Loads a double from RDB 8 or greater. See rdbSaveBinaryDoubleValue() for * more info. On error -1 is returned, otherwise 0. */ int rdbLoadBinaryDoubleValue(rio *rdb, double *val) { - if (rioRead(rdb,val,8) == 0) return -1; + if (rioRead(rdb,val,sizeof(*val)) == 0) return -1; memrev64ifbe(val); return 0; } +/* Like rdbSaveBinaryDoubleValue() but single precision. */ +int rdbSaveBinaryFloatValue(rio *rdb, float val) { + memrev32ifbe(&val); + return rdbWriteRaw(rdb,&val,sizeof(val)); +} + +/* Like rdbLoadBinaryDoubleValue() but single precision. */ +int rdbLoadBinaryFloatValue(rio *rdb, float *val) { + if (rioRead(rdb,val,sizeof(*val)) == 0) return -1; + memrev32ifbe(val); + return 0; +} + /* Save the object type of object "o". */ int rdbSaveObjectType(rio *rdb, robj *o) { switch (o->type) { diff --git a/src/rdb.h b/src/rdb.h index cd1d65392..60c52a7c1 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -134,6 +134,8 @@ ssize_t rdbSaveRawString(rio *rdb, unsigned char *s, size_t len); void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr); int rdbSaveBinaryDoubleValue(rio *rdb, double val); int rdbLoadBinaryDoubleValue(rio *rdb, double *val); +int rdbSaveBinaryFloatValue(rio *rdb, float val); +int rdbLoadBinaryFloatValue(rio *rdb, float *val); int rdbLoadRio(rio *rdb); #endif diff --git a/src/redismodule.h b/src/redismodule.h index dcc0aa1ea..346110090 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -184,6 +184,8 @@ RedisModuleString *REDISMODULE_API_FUNC(RedisModule_LoadString)(RedisModuleIO *i char *REDISMODULE_API_FUNC(RedisModule_LoadStringBuffer)(RedisModuleIO *io, size_t *lenptr); void REDISMODULE_API_FUNC(RedisModule_SaveDouble)(RedisModuleIO *io, double value); double REDISMODULE_API_FUNC(RedisModule_LoadDouble)(RedisModuleIO *io); +void REDISMODULE_API_FUNC(RedisModule_SaveFloat)(RedisModuleIO *io, float value); +float REDISMODULE_API_FUNC(RedisModule_LoadFloat)(RedisModuleIO *io); void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...); void REDISMODULE_API_FUNC(RedisModule_LogIOError)(RedisModuleIO *io, const char *levelstr, const char *fmt, ...); int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len); @@ -281,6 +283,8 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(LoadStringBuffer); REDISMODULE_GET_API(SaveDouble); REDISMODULE_GET_API(LoadDouble); + REDISMODULE_GET_API(SaveFloat); + REDISMODULE_GET_API(LoadFloat); REDISMODULE_GET_API(EmitAOF); REDISMODULE_GET_API(Log); REDISMODULE_GET_API(LogIOError); From 185e565bec92c948b65d0d337380eff7c51cc350 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 6 Oct 2016 08:48:21 +0200 Subject: [PATCH 0153/1722] Copyright notice added to module.c. --- src/module.c | 29 +++++++++++++++++++++++++++++ src/server.c | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index e7594c166..c339303b5 100644 --- a/src/module.c +++ b/src/module.c @@ -1,3 +1,32 @@ +/* + * Copyright (c) 2016, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + #include "server.h" #include "cluster.h" #include diff --git a/src/server.c b/src/server.c index 77087b56d..36b04abfb 100644 --- a/src/server.c +++ b/src/server.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2012, Salvatore Sanfilippo + * Copyright (c) 2009-2016, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without From 6b0d47796ff7faf179bcba52d4f14f6db215e111 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 6 Oct 2016 17:05:38 +0200 Subject: [PATCH 0154/1722] Module: Ability to get context from IO context. It was noted by @dvirsky that it is not possible to use string functions when writing the AOF file. This sometimes is critical since the command rewriting may need to be built in the context of the AOF callback, and without access to the context, and the limited types that the AOF production functions will accept, this can be an issue. Moreover there are other needs that we can't anticipate regarding the ability to use Redis Modules APIs using the context in order to build representations to emit AOF / RDB. Because of this a new API was added that allows the user to get a temporary context from the IO context. The context is auto released if obtained when the RDB / AOF callback returns. Calling multiple time the function to get the context, always returns the same one, since it is invalid to have more than a single context. --- src/aof.c | 4 ++++ src/module.c | 15 +++++++++++++++ src/rdb.c | 4 ++++ src/redismodule.h | 3 +++ src/server.h | 4 ++++ 5 files changed, 30 insertions(+) diff --git a/src/aof.c b/src/aof.c index e52b7e214..c75153cc7 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1023,6 +1023,10 @@ int rewriteModuleObject(rio *r, robj *key, robj *o) { moduleType *mt = mv->type; moduleInitIOContext(io,mt,r); mt->aof_rewrite(&io,key,mv->value); + if (io.ctx) { + moduleFreeContext(io.ctx); + zfree(io.ctx); + } return io.error ? 0 : 1; } diff --git a/src/module.c b/src/module.c index c339303b5..742d9b974 100644 --- a/src/module.c +++ b/src/module.c @@ -2958,6 +2958,20 @@ void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) { return; } +/* -------------------------------------------------------------------------- + * IO context handling + * -------------------------------------------------------------------------- */ + +RedisModuleCtx *RM_GetContextFromIO(RedisModuleIO *io) { + if (io->ctx) return io->ctx; /* Can't have more than one... */ + RedisModuleCtx ctxtemplate = REDISMODULE_CTX_INIT; + io->ctx = zmalloc(sizeof(*io)); + *(io->ctx) = ctxtemplate; + io->ctx->module = io->type->module; + io->ctx->client = NULL; + return io->ctx; +} + /* -------------------------------------------------------------------------- * Logging * -------------------------------------------------------------------------- */ @@ -3342,4 +3356,5 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(StringAppendBuffer); REGISTER_API(RetainString); REGISTER_API(StringCompare); + REGISTER_API(GetContextFromIO); } diff --git a/src/rdb.c b/src/rdb.c index 252044344..29f880dac 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -770,6 +770,10 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { /* Then write the module-specific representation. */ mt->rdb_save(&io,mv->value); + if (io.ctx) { + moduleFreeContext(io.ctx); + zfree(io.ctx); + } return io.error ? -1 : (ssize_t)io.bytes; } else { serverPanic("Unknown object type"); diff --git a/src/redismodule.h b/src/redismodule.h index 346110090..9b3c061ac 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -191,6 +191,7 @@ void REDISMODULE_API_FUNC(RedisModule_LogIOError)(RedisModuleIO *io, const char int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len); void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b); +RedisModuleCtx *REDISMODULE_API_FUNC(RM_GetContextFromIO)(RedisModuleIO *io); /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); @@ -291,6 +292,8 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(StringAppendBuffer); REDISMODULE_GET_API(RetainString); REDISMODULE_GET_API(StringCompare); + REDISMODULE_GET_API(GetIOContext); + REDISMODULE_GET_API(FreeIOContext); RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; diff --git a/src/server.h b/src/server.h index 8aad8f983..21e32824d 100644 --- a/src/server.h +++ b/src/server.h @@ -463,6 +463,7 @@ typedef long long mstime_t; /* millisecond time type. */ struct RedisModule; struct RedisModuleIO; struct RedisModuleDigest; +struct RedisModuleCtx; struct redisObject; /* Each module type implementation should export a set of methods in order @@ -516,6 +517,7 @@ typedef struct RedisModuleIO { rio *rio; /* Rio stream. */ moduleType *type; /* Module type doing the operation. */ int error; /* True if error condition happened. */ + struct RedisModuleCtx *ctx; /* Optional context, via GetIOContext() call. */ } RedisModuleIO; #define moduleInitIOContext(iovar,mtype,rioptr) do { \ @@ -523,6 +525,7 @@ typedef struct RedisModuleIO { iovar.type = mtype; \ iovar.bytes = 0; \ iovar.error = 0; \ + iovar.ctx = NULL; \ } while(0); /* Objects encoding. Some kind of objects like Strings and Hashes can be @@ -1222,6 +1225,7 @@ void moduleLoadFromQueue(void); int *moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); moduleType *moduleTypeLookupModuleByID(uint64_t id); void moduleTypeNameByID(char *name, uint64_t moduleid); +void moduleFreeContext(struct RedisModuleCtx *ctx); /* Utils */ long long ustime(void); From 089e4bb667a7386bc7366757493526800e6a11cd Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 6 Oct 2016 17:10:47 +0200 Subject: [PATCH 0155/1722] Fix name of mispelled function. --- src/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.h b/src/server.h index 21e32824d..b9c46b810 100644 --- a/src/server.h +++ b/src/server.h @@ -517,7 +517,7 @@ typedef struct RedisModuleIO { rio *rio; /* Rio stream. */ moduleType *type; /* Module type doing the operation. */ int error; /* True if error condition happened. */ - struct RedisModuleCtx *ctx; /* Optional context, via GetIOContext() call. */ + struct RedisModuleCtx *ctx; /* Optional context, see RM_GetContextFromIO()*/ } RedisModuleIO; #define moduleInitIOContext(iovar,mtype,rioptr) do { \ From 9558e60f9ca5ffdbb5d835e153f5372e8e2c35e2 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 6 Oct 2016 18:26:02 +0200 Subject: [PATCH 0156/1722] Fix typos in GetContextFromIO API declaration. --- src/redismodule.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/redismodule.h b/src/redismodule.h index 9b3c061ac..eaf5dfab1 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -191,7 +191,7 @@ void REDISMODULE_API_FUNC(RedisModule_LogIOError)(RedisModuleIO *io, const char int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len); void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b); -RedisModuleCtx *REDISMODULE_API_FUNC(RM_GetContextFromIO)(RedisModuleIO *io); +RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetContextFromIO)(RedisModuleIO *io); /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); @@ -292,8 +292,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(StringAppendBuffer); REDISMODULE_GET_API(RetainString); REDISMODULE_GET_API(StringCompare); - REDISMODULE_GET_API(GetIOContext); - REDISMODULE_GET_API(FreeIOContext); + REDISMODULE_GET_API(GetContextFromIO); RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; From 925752a13d9fa5dbb699be634025838fca8f7208 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 7 Oct 2016 11:55:35 +0200 Subject: [PATCH 0157/1722] Module: API to block clients with threading support. Just a draft to align the main ideas, never executed code. Compiles. --- src/blocked.c | 7 +- src/module.c | 180 ++++++++++++++++++++++++++++++++++++++++++++++---- src/server.c | 4 ++ src/server.h | 9 +++ 4 files changed, 187 insertions(+), 13 deletions(-) diff --git a/src/blocked.c b/src/blocked.c index d22872548..54b26b713 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -136,6 +136,8 @@ void unblockClient(client *c) { unblockClientWaitingData(c); } else if (c->btype == BLOCKED_WAIT) { unblockClientWaitingReplicas(c); + } else if (c->btype == BLOCKED_MODULE) { + unblockClientFromModule(c); } else { serverPanic("Unknown btype in unblockClient()."); } @@ -153,12 +155,15 @@ void unblockClient(client *c) { } /* This function gets called when a blocked client timed out in order to - * send it a reply of some kind. */ + * send it a reply of some kind. After this function is called, + * unblockClient() will be called with the same client as argument. */ void replyToBlockedClientTimedOut(client *c) { if (c->btype == BLOCKED_LIST) { addReply(c,shared.nullmultibulk); } else if (c->btype == BLOCKED_WAIT) { addReplyLongLong(c,replicationCountAcksByOffset(c->bpop.reploffset)); + } else if (c->btype == BLOCKED_MODULE) { + moduleBlockedClientTimedOut(c); } else { serverPanic("Unknown btype in replyToBlockedClientTimedOut()."); } diff --git a/src/module.c b/src/module.c index 742d9b974..9a939d2fc 100644 --- a/src/module.c +++ b/src/module.c @@ -105,6 +105,7 @@ struct RedisModuleCtx { int flags; /* REDISMODULE_CTX_... flags. */ void **postponed_arrays; /* To set with RM_ReplySetArrayLength(). */ int postponed_arrays_count; /* Number of entries in postponed_arrays. */ + void *blocked_privdata; /* Privdata set when unblocking a clinet. */ /* Used if there is the REDISMODULE_CTX_KEYS_POS_REQUEST flag set. */ int *keys_pos; @@ -114,10 +115,12 @@ struct RedisModuleCtx { }; typedef struct RedisModuleCtx RedisModuleCtx; -#define REDISMODULE_CTX_INIT {(void*)(unsigned long)&RM_GetApi, NULL, NULL, NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL} +#define REDISMODULE_CTX_INIT {(void*)(unsigned long)&RM_GetApi, NULL, NULL, NULL, 0, 0, 0, NULL, 0, NULL, NULL, 0, NULL} #define REDISMODULE_CTX_MULTI_EMITTED (1<<0) #define REDISMODULE_CTX_AUTO_MEMORY (1<<1) #define REDISMODULE_CTX_KEYS_POS_REQUEST (1<<2) +#define REDISMODULE_CTX_BLOCKED_REPLY (1<<3) +#define REDISMODULE_CTX_BLOCKED_TIMEOUT (1<<4) /* This represents a Redis key opened with RM_OpenKey(). */ struct RedisModuleKey { @@ -183,6 +186,23 @@ typedef struct RedisModuleCallReply { } val; } RedisModuleCallReply; +/* Structure representing a blocked client. We get a pointer to such + * an object when blocking from modules. */ +typedef struct RedisModuleBlockedClient { + client *client; /* Pointer to the blocked client. or NULL if the client + was destroyed during the life of this object. */ + RedisModule *module; /* Module blocking the client. */ + RedisModuleCmdFunc reply_callback; /* Reply callback on normal completion.*/ + RedisModuleCmdFunc timeout_callback; /* Reply callback on timeout. */ + void (*free_privdata)(void *); /* privdata cleanup callback. */ + void *privdata; /* Module private data that may be used by the reply + or timeout callback. It is set via the + RedisModule_UnblockClient() API. */ +} RedisModuleBlockedClient; + +static pthread_mutex_t moduleUnblockedClientsMutex = PTHREAD_MUTEX_INITIALIZER; +static list *moduleUnblockedClients; + /* -------------------------------------------------------------------------- * Prototypes * -------------------------------------------------------------------------- */ @@ -403,6 +423,26 @@ void moduleFreeContext(RedisModuleCtx *ctx) { } } +/* Helper function for when a command callback is called, in order to handle + * details needed to correctly replicate commands. */ +void moduleHandlePropagationAfterCommandCallback(RedisModuleCtx *ctx) { + client *c = ctx->client; + + /* We don't want any automatic propagation here since in modules we handle + * replication / AOF propagation in explicit ways. */ + preventCommandPropagation(c); + + /* Handle the replication of the final EXEC, since whatever a command + * emits is always wrappered around MULTI/EXEC. */ + if (ctx->flags & REDISMODULE_CTX_MULTI_EMITTED) { + robj *propargv[1]; + propargv[0] = createStringObject("EXEC",4); + alsoPropagate(server.execCommand,c->db->id,propargv,1, + PROPAGATE_AOF|PROPAGATE_REPL); + decrRefCount(propargv[0]); + } +} + /* This Redis command binds the normal Redis command invocation with commands * exported by modules. */ void RedisModuleCommandDispatcher(client *c) { @@ -412,17 +452,7 @@ void RedisModuleCommandDispatcher(client *c) { ctx.module = cp->module; ctx.client = c; cp->func(&ctx,(void**)c->argv,c->argc); - preventCommandPropagation(c); - - /* Handle the replication of the final EXEC, since whatever a command - * emits is always wrappered around MULTI/EXEC. */ - if (ctx.flags & REDISMODULE_CTX_MULTI_EMITTED) { - robj *propargv[1]; - propargv[0] = createStringObject("EXEC",4); - alsoPropagate(server.execCommand,c->db->id,propargv,1, - PROPAGATE_AOF|PROPAGATE_REPL); - decrRefCount(propargv[0]); - } + moduleHandlePropagationAfterCommandCallback(&ctx); moduleFreeContext(&ctx); } @@ -3034,6 +3064,130 @@ void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ... va_end(ap); } +/* -------------------------------------------------------------------------- + * Blocking clients from modules + * -------------------------------------------------------------------------- */ + +/* This is called from blocked.c in order to unblock a client: may be called + * for multiple reasons while the client is in the middle of being blocked + * because the client is terminated, but is also called for cleanup when a + * client is unblocked in a clean way after replaying. + * + * What we do here is just to set the client to NULL in the redis module + * blocked client handle. This way if the client is terminated while there + * is a pending threaded operation involving the blocked client, we'll know + * that the client no longer exists and no reply callback should be called. + * + * The structure RedisModuleBlockedClient will be always deallocated when + * running the list of clients blocked by a module that need to be unblocked. */ +void unblockClientFromModule(client *c) { + RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle; + bc->client = NULL; +} + +int RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms) { + client *c = ctx->client; + c->bpop.module_blocked_handle = zmalloc(sizeof(RedisModuleBlockedClient)); + RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle; + + bc->client = c; + bc->module = ctx->module; + bc->reply_callback = reply_callback; + bc->timeout_callback = timeout_callback; + bc->free_privdata = free_privdata; + bc->privdata = NULL; + c->bpop.timeout = timeout_ms; + + blockClient(c,BLOCKED_MODULE); + return REDISMODULE_OK; +} + +/* Unblock a client blocked by `RedisModule_BlockedClient`. This will trigger + * the reply callbacks to be called in order to reply to the client. + * The 'privdata' argument will be accessible by the reply callback, so + * the caller of this function can pass any value that is needed in order to + * actually reply to the client. + * + * A common usage for 'privdata' is a thread that computes something that + * needs to be passed to the client, included but not limited some slow + * to compute reply or some reply obtained via networking. + * + * Note: this function can be called from threads spawned by the module. */ +int RM_UnblockClient(RedisModuleBlockedClient *bc, void *privdata) { + pthread_mutex_lock(&moduleUnblockedClientsMutex); + bc->privdata = privdata; + listAddNodeTail(moduleUnblockedClients,bc); + pthread_mutex_unlock(&moduleUnblockedClientsMutex); + return REDISMODULE_OK; +} + +/* This function will check the moduleUnblockedClients queue in order to + * call the reply callback and really unblock the client. + * + * Clients end into this list because of calls to RM_UnblockClient(), + * however it is possible that while the module was doing work for the + * blocked client, it was terminated by Redis (for timeout or other reasons). + * When this happens the RedisModuleBlockedClient structure in the queue + * will have the 'client' field set to NULL. */ +void moduleHandleBlockedClients(void) { + listNode *ln; + RedisModuleBlockedClient *bc; + + pthread_mutex_lock(&moduleUnblockedClientsMutex); + while (listLength(moduleUnblockedClients)) { + ln = listFirst(moduleUnblockedClients); + bc = ln->value; + client *c = bc->client; + listDelNode(server.unblocked_clients,ln); + + if (c != NULL) { + RedisModuleCtx ctx = REDISMODULE_CTX_INIT; + ctx.flags |= REDISMODULE_CTX_BLOCKED_REPLY; + ctx.blocked_privdata = bc->privdata; + ctx.module = bc->module; + ctx.client = bc->client; + bc->reply_callback(&ctx,(void**)c->argv,c->argc); + moduleHandlePropagationAfterCommandCallback(&ctx); + moduleFreeContext(&ctx); + } + if (bc->privdata && bc->free_privdata) + bc->free_privdata(bc->privdata); + zfree(bc); + } + pthread_mutex_unlock(&moduleUnblockedClientsMutex); +} + +/* Called when our client timed out. After this function unblockClient() + * is called, and it will invalidate the blocked client. So this function + * does not need to do any cleanup. Eventually the module will call the + * API to unblock the client and the memory will be released. */ +void moduleBlockedClientTimedOut(client *c) { + RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle; + RedisModuleCtx ctx = REDISMODULE_CTX_INIT; + ctx.flags |= REDISMODULE_CTX_BLOCKED_TIMEOUT; + ctx.module = bc->module; + ctx.client = bc->client; + bc->timeout_callback(&ctx,(void**)c->argv,c->argc); + moduleFreeContext(&ctx); +} + +/* Return non-zero if a module command was called in order to fill the + * reply for a blocked client. */ +int RM_IsBlockedReplyRequest(RedisModuleCtx *ctx) { + return (ctx->flags & REDISMODULE_CTX_BLOCKED_REPLY) != 0; +} + +/* Return non-zero if a module command was called in order to fill the + * reply for a blocked client that timed out. */ +int RM_IsBlockedTimeoutRequest(RedisModuleCtx *ctx) { + return (ctx->flags & REDISMODULE_CTX_BLOCKED_TIMEOUT) != 0; +} + +/* Get the privata data set by RedisModule_UnblockClient() */ +void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx) { + return ctx->blocked_privdata; +} + /* -------------------------------------------------------------------------- * Modules API internals * -------------------------------------------------------------------------- */ @@ -3070,6 +3224,8 @@ int moduleRegisterApi(const char *funcname, void *funcptr) { void moduleRegisterCoreAPI(void); void moduleInitModulesSystem(void) { + moduleUnblockedClients = listCreate(); + server.loadmodule_queue = listCreate(); modules = dictCreate(&modulesDictType,NULL); moduleRegisterCoreAPI(); diff --git a/src/server.c b/src/server.c index 36b04abfb..a05491852 100644 --- a/src/server.c +++ b/src/server.c @@ -1195,6 +1195,10 @@ void beforeSleep(struct aeEventLoop *eventLoop) { if (listLength(server.clients_waiting_acks)) processClientsWaitingReplicas(); + /* Check if there are clients unblocked by modules that implement + * blocking commands. */ + moduleHandleBlockedClients(); + /* Try to process pending commands for clients that were just unblocked. */ if (listLength(server.unblocked_clients)) processUnblockedClients(); diff --git a/src/server.h b/src/server.h index b9c46b810..69ee52e64 100644 --- a/src/server.h +++ b/src/server.h @@ -245,6 +245,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define BLOCKED_NONE 0 /* Not blocked, no CLIENT_BLOCKED flag set. */ #define BLOCKED_LIST 1 /* BLPOP & co. */ #define BLOCKED_WAIT 2 /* WAIT for synchronous replication. */ +#define BLOCKED_MODULE 3 /* Blocked by a loadable module. */ /* Client request types */ #define PROTO_REQ_INLINE 1 @@ -619,6 +620,11 @@ typedef struct blockingState { /* BLOCKED_WAIT */ int numreplicas; /* Number of replicas we are waiting for ACK. */ long long reploffset; /* Replication offset to reach. */ + + /* BLOCKED_MODULE */ + void *module_blocked_handle; /* RedisModuleBlockedClient structure. + which is opaque for the Redis core, only + handled in module.c. */ } blockingState; /* The following structure represents a node in the server.ready_keys list, @@ -1226,6 +1232,9 @@ int *moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, moduleType *moduleTypeLookupModuleByID(uint64_t id); void moduleTypeNameByID(char *name, uint64_t moduleid); void moduleFreeContext(struct RedisModuleCtx *ctx); +void unblockClientFromModule(client *c); +void moduleHandleBlockedClients(void); +void moduleBlockedClientTimedOut(client *c); /* Utils */ long long ustime(void); From c10a839e8216b53151e1a94b3e3aa1608612a73e Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 7 Oct 2016 13:07:13 +0200 Subject: [PATCH 0158/1722] Enable warning in example modules Makefile. --- src/modules/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/Makefile b/src/modules/Makefile index 554642ffd..1027b2e0e 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -4,10 +4,10 @@ uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') # Compile flags for linux / osx ifeq ($(uname_S),Linux) - SHOBJ_CFLAGS ?= -fno-common -g -ggdb -std=c99 -O2 + SHOBJ_CFLAGS ?= -W -Wall -fno-common -g -ggdb -std=c99 -O2 SHOBJ_LDFLAGS ?= -shared else - SHOBJ_CFLAGS ?= -dynamic -fno-common -g -ggdb -std=c99 -O2 + SHOBJ_CFLAGS ?= -W -Wall -dynamic -fno-common -g -ggdb -std=c99 -O2 SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup endif From 2371a8b3b32a26712f17b315d7336888f4af2d45 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 7 Oct 2016 13:10:29 +0200 Subject: [PATCH 0159/1722] Modules: introduce warning suppression macro for unused args. --- src/modules/hellotype.c | 5 +++++ src/modules/testmodule.c | 15 +++++++++++++++ src/redismodule.h | 2 ++ 3 files changed, 22 insertions(+) diff --git a/src/modules/hellotype.c b/src/modules/hellotype.c index b33ed81cd..535eb88e1 100644 --- a/src/modules/hellotype.c +++ b/src/modules/hellotype.c @@ -227,6 +227,8 @@ void HelloTypeAofRewrite(RedisModuleIO *aof, RedisModuleString *key, void *value } void HelloTypeDigest(RedisModuleDigest *digest, void *value) { + REDISMODULE_NOT_USED(digest); + REDISMODULE_NOT_USED(value); /* TODO: The DIGEST module interface is yet not implemented. */ } @@ -237,6 +239,9 @@ void HelloTypeFree(void *value) { /* This function must be present on each Redis module. It is used in order to * register the commands into the Redis server. */ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_Init(ctx,"hellotype",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) return REDISMODULE_ERR; diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index db809a8e6..8da45c0ea 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -48,6 +48,9 @@ int TestMatchReply(RedisModuleCallReply *reply, char *str) { /* TEST.CALL -- Test Call() API. */ int TestCall(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + RedisModule_AutoMemory(ctx); RedisModuleCallReply *reply; @@ -75,6 +78,9 @@ fail: /* TEST.STRING.APPEND -- Test appending to an existing string object. */ int TestStringAppend(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + RedisModuleString *s = RedisModule_CreateString(ctx,"foo",3); RedisModule_StringAppendBuffer(ctx,s,"bar",3); RedisModule_ReplyWithString(ctx,s); @@ -84,6 +90,9 @@ int TestStringAppend(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { /* TEST.STRING.APPEND.AM -- Test append with retain when auto memory is on. */ int TestStringAppendAM(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + RedisModule_AutoMemory(ctx); RedisModuleString *s = RedisModule_CreateString(ctx,"foo",3); RedisModule_RetainString(ctx,s); @@ -163,6 +172,9 @@ int TestAssertIntegerReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply, lon /* TEST.IT -- Run all the tests. */ int TestIt(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + RedisModule_AutoMemory(ctx); RedisModuleCallReply *reply; @@ -195,6 +207,9 @@ fail: } int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_Init(ctx,"test",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) return REDISMODULE_ERR; diff --git a/src/redismodule.h b/src/redismodule.h index eaf5dfab1..4743fa98c 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -68,6 +68,8 @@ #define REDISMODULE_POSITIVE_INFINITE (1.0/0.0) #define REDISMODULE_NEGATIVE_INFINITE (-1.0/0.0) +#define REDISMODULE_NOT_USED(V) ((void) V) + /* ------------------------- End of common defines ------------------------ */ #ifndef REDISMODULE_CORE From e102e93c9d29dbdd7b60cc9ceabf44c06c591ffb Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 7 Oct 2016 13:48:05 +0200 Subject: [PATCH 0160/1722] Modules: blocking commands WIP: API exported, a first example. --- src/module.c | 23 +++++++++++++++++++++-- src/modules/Makefile | 7 ++++++- src/redismodule.h | 11 +++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/module.c b/src/module.c index 9a939d2fc..0cba99872 100644 --- a/src/module.c +++ b/src/module.c @@ -3085,7 +3085,21 @@ void unblockClientFromModule(client *c) { bc->client = NULL; } -int RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms) { +/* Block a client in the context of a blocking command, returning an handle + * which will be used, later, in order to block the client with a call to + * RedisModule_UnblockClient(). The arguments specify callback functions + * and a timeout after which the client is unblocked. + * + * The callbacks are called in the following contexts: + * + * reply_callback: called after a successful RedisModule_UnblockClient() call + * in order to reply to the client and unblock it. + * reply_timeout: called when the timeout is reached in order to send an + * error to the client. + * free_privdata: called in order to free the privata data that is passed + * by RedisModule_UnblockClient() call. + */ +RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms) { client *c = ctx->client; c->bpop.module_blocked_handle = zmalloc(sizeof(RedisModuleBlockedClient)); RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle; @@ -3099,7 +3113,7 @@ int RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, Redis c->bpop.timeout = timeout_ms; blockClient(c,BLOCKED_MODULE); - return REDISMODULE_OK; + return bc; } /* Unblock a client blocked by `RedisModule_BlockedClient`. This will trigger @@ -3513,4 +3527,9 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(RetainString); REGISTER_API(StringCompare); REGISTER_API(GetContextFromIO); + REGISTER_API(BlockClient); + REGISTER_API(UnblockClient); + REGISTER_API(IsBlockedReplyRequest); + REGISTER_API(IsBlockedTimeoutRequest); + REGISTER_API(GetBlockedClientPrivateData); } diff --git a/src/modules/Makefile b/src/modules/Makefile index 1027b2e0e..066e65e9b 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -13,7 +13,7 @@ endif .SUFFIXES: .c .so .xo .o -all: helloworld.so hellotype.so testmodule.so +all: helloworld.so hellotype.so helloblock.so testmodule.so .c.xo: $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ @@ -28,6 +28,11 @@ hellotype.xo: ../redismodule.h hellotype.so: hellotype.xo $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc +helloblock.xo: ../redismodule.h + +helloblock.so: helloblock.xo + $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lpthread -lc + testmodule.xo: ../redismodule.h testmodule.so: testmodule.xo diff --git a/src/redismodule.h b/src/redismodule.h index 4743fa98c..e931ecd07 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -84,6 +84,7 @@ typedef struct RedisModuleCallReply RedisModuleCallReply; typedef struct RedisModuleIO RedisModuleIO; typedef struct RedisModuleType RedisModuleType; typedef struct RedisModuleDigest RedisModuleDigest; +typedef struct RedisModuleBlockedClient RedisModuleBlockedClient; typedef int (*RedisModuleCmdFunc) (RedisModuleCtx *ctx, RedisModuleString **argv, int argc); @@ -194,6 +195,11 @@ int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, Re void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b); RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetContextFromIO)(RedisModuleIO *io); +RedisModuleBlockedClient *REDISMODULE_API_FUNC(RedisModule_BlockClient)(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms); +int REDISMODULE_API_FUNC(RedisModule_UnblockClient)(RedisModuleBlockedClient *bc, void *privdata); +int REDISMODULE_API_FUNC(RedisModule_IsBlockedReplyRequest)(RedisModuleCtx *ctx); +int REDISMODULE_API_FUNC(RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx); +void *REDISMODULE_API_FUNC(RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx); /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); @@ -295,6 +301,11 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(RetainString); REDISMODULE_GET_API(StringCompare); REDISMODULE_GET_API(GetContextFromIO); + REDISMODULE_GET_API(BlockClient); + REDISMODULE_GET_API(UnblockClient); + REDISMODULE_GET_API(IsBlockedReplyRequest); + REDISMODULE_GET_API(IsBlockedTimeoutRequest); + REDISMODULE_GET_API(GetBlockedClientPrivateData); RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; From 5a1a9e13f273840c3e172942c2aaa922ac07951c Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 7 Oct 2016 16:34:19 +0200 Subject: [PATCH 0161/1722] Modules: RM_Milliseconds() API added. --- src/module.c | 6 ++++++ src/redismodule.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/module.c b/src/module.c index 0cba99872..2bf07aee0 100644 --- a/src/module.c +++ b/src/module.c @@ -648,6 +648,11 @@ void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int api ctx->module = module; } +/* Return the current UNIX time in milliseconds. */ +long long RM_Milliseconds(void) { + return mstime(); +} + /* -------------------------------------------------------------------------- * Automatic memory management for modules * -------------------------------------------------------------------------- */ @@ -3532,4 +3537,5 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(IsBlockedReplyRequest); REGISTER_API(IsBlockedTimeoutRequest); REGISTER_API(GetBlockedClientPrivateData); + REGISTER_API(Milliseconds); } diff --git a/src/redismodule.h b/src/redismodule.h index e931ecd07..104de9586 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -200,6 +200,7 @@ int REDISMODULE_API_FUNC(RedisModule_UnblockClient)(RedisModuleBlockedClient *bc int REDISMODULE_API_FUNC(RedisModule_IsBlockedReplyRequest)(RedisModuleCtx *ctx); int REDISMODULE_API_FUNC(RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx); void *REDISMODULE_API_FUNC(RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx); +long long REDISMODULE_API_FUNC(RedisModule_Milliseconds)(void); /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); @@ -306,6 +307,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(IsBlockedReplyRequest); REDISMODULE_GET_API(IsBlockedTimeoutRequest); REDISMODULE_GET_API(GetBlockedClientPrivateData); + REDISMODULE_GET_API(Milliseconds); RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; From 7ac5f45831017ac85e6386914839a3c17db95ca9 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 7 Oct 2016 16:34:40 +0200 Subject: [PATCH 0162/1722] Modules: fixes to the blocking commands API: examples now works. --- src/module.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/module.c b/src/module.c index 2bf07aee0..a3e755c29 100644 --- a/src/module.c +++ b/src/module.c @@ -3115,7 +3115,7 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc bc->timeout_callback = timeout_callback; bc->free_privdata = free_privdata; bc->privdata = NULL; - c->bpop.timeout = timeout_ms; + c->bpop.timeout = timeout_ms ? (mstime()+timeout_ms) : 0; blockClient(c,BLOCKED_MODULE); return bc; @@ -3157,7 +3157,11 @@ void moduleHandleBlockedClients(void) { ln = listFirst(moduleUnblockedClients); bc = ln->value; client *c = bc->client; - listDelNode(server.unblocked_clients,ln); + listDelNode(moduleUnblockedClients,ln); + pthread_mutex_unlock(&moduleUnblockedClientsMutex); + + /* Release the lock during the loop, as long as we don't + * touch the shared list. */ if (c != NULL) { RedisModuleCtx ctx = REDISMODULE_CTX_INIT; @@ -3172,6 +3176,10 @@ void moduleHandleBlockedClients(void) { if (bc->privdata && bc->free_privdata) bc->free_privdata(bc->privdata); zfree(bc); + if (c != NULL) unblockClient(bc->client); + + /* Lock again before to iterate the loop. */ + pthread_mutex_lock(&moduleUnblockedClientsMutex); } pthread_mutex_unlock(&moduleUnblockedClientsMutex); } From 33223ded21c62c97db22eb8b35bb41406bde4932 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 7 Oct 2016 16:35:06 +0200 Subject: [PATCH 0163/1722] Modules: blocking command example added. --- src/modules/helloblock.c | 115 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 src/modules/helloblock.c diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c new file mode 100644 index 000000000..f3764be46 --- /dev/null +++ b/src/modules/helloblock.c @@ -0,0 +1,115 @@ +/* Helloblock module -- An example of blocking command implementation + * with threads. + * + * ----------------------------------------------------------------------------- + * + * Copyright (c) 2016, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "../redismodule.h" +#include +#include +#include +#include + +/* Reply callback for blocking command HELLO.BLOCK */ +int HelloBlock_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + int *myint = RedisModule_GetBlockedClientPrivateData(ctx); + return RedisModule_ReplyWithLongLong(ctx,*myint); +} + +/* Timeout callback for blocking command HELLO.BLOCK */ +int HelloBlock_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + return RedisModule_ReplyWithSimpleString(ctx,"Request timedout"); +} + +/* Private data freeing callback for HELLO.BLOCK command. */ +void HelloBlock_FreeData(void *privdata) { + RedisModule_Free(privdata); +} + +/* The thread entry point that actually executes the blocking part + * of the command HELLO.BLOCK. */ +void *HelloBlock_ThreadMain(void *arg) { + void **targ = arg; + RedisModuleBlockedClient *bc = targ[0]; + long long delay = (unsigned long)targ[1]; + RedisModule_Free(targ); + + sleep(delay); + int *r = RedisModule_Alloc(sizeof(int)); + *r = rand(); + RedisModule_UnblockClient(bc,r); + return NULL; +} + +/* HELLO.BLOCK -- Block for seconds, then reply with + * a random number. Timeout is the command timeout, so that you can test + * what happens when the delay is greater than the timeout. */ +int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 3) return RedisModule_WrongArity(ctx); + long long delay; + long long timeout; + + if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx,"ERR invalid count"); + } + + if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx,"ERR invalid count"); + } + + pthread_t tid; + RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout); + + /* Now that we setup a blocking client, we need to pass the control + * to the thread. However we need to pass arguments to the thread: + * the delay and a reference to the blocked client handle. */ + void **targ = RedisModule_Alloc(sizeof(void*)*2); + targ[0] = bc; + targ[1] = (void*)(unsigned long) delay; + + if (pthread_create(&tid,NULL,HelloBlock_ThreadMain,targ) != 0) { + /* RedisModule_BlockedClientAbort(bc); */ + return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread"); + } + return REDISMODULE_OK; +} + +/* This function must be present on each Redis module. It is used in order to + * register the commands into the Redis server. */ +int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (RedisModule_Init(ctx,"helloblock",1,REDISMODULE_APIVER_1) + == REDISMODULE_ERR) return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"hello.block", + HelloBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + return REDISMODULE_OK; +} From 8e814916c335f6bf421376d67ce3d5f7d0856402 Mon Sep 17 00:00:00 2001 From: yyoshiki41 Date: Mon, 10 Oct 2016 01:13:20 +0900 Subject: [PATCH 0164/1722] Refactor redis-trib.rb --- src/redis-trib.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/redis-trib.rb b/src/redis-trib.rb index b40b5decb..39db97947 100755 --- a/src/redis-trib.rb +++ b/src/redis-trib.rb @@ -1445,7 +1445,7 @@ class RedisTrib xputs ">>> Importing data from #{source_addr} to cluster #{argv[1]}" use_copy = opt['copy'] use_replace = opt['replace'] - + # Check the existing cluster. load_cluster_info_from_node(argv[0]) check_cluster @@ -1669,7 +1669,6 @@ ALLOWED_OPTIONS={ def show_help puts "Usage: redis-trib \n\n" COMMANDS.each{|k,v| - o = "" puts " #{k.ljust(15)} #{v[2]}" if ALLOWED_OPTIONS[k] ALLOWED_OPTIONS[k].each{|optname,has_arg| From 834a914b39c320f539646009acda4fccc4c4db02 Mon Sep 17 00:00:00 2001 From: jybaek Date: Thu, 13 Oct 2016 15:17:07 +0900 Subject: [PATCH 0165/1722] Remove Duplicate Processing --- src/redis-cli.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 054c2fc7c..7d34e7f97 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -628,7 +628,6 @@ sds sdscatcolor(sds o, char *s, size_t len, char *color) { int bold = strstr(color,"bold") != NULL; int ccode = 37; /* Defaults to white. */ if (strstr(color,"red")) ccode = 31; - else if (strstr(color,"red")) ccode = 31; else if (strstr(color,"green")) ccode = 32; else if (strstr(color,"yellow")) ccode = 33; else if (strstr(color,"blue")) ccode = 34; From 626270fc828e8675b77834d9683db538d15d2c13 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 13 Oct 2016 12:41:48 +0200 Subject: [PATCH 0166/1722] Example modules: remove warnings about types and not used args. --- src/modules/helloblock.c | 7 +++++++ src/modules/helloworld.c | 8 ++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c index f3764be46..f76b3beb5 100644 --- a/src/modules/helloblock.c +++ b/src/modules/helloblock.c @@ -39,12 +39,16 @@ /* Reply callback for blocking command HELLO.BLOCK */ int HelloBlock_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); int *myint = RedisModule_GetBlockedClientPrivateData(ctx); return RedisModule_ReplyWithLongLong(ctx,*myint); } /* Timeout callback for blocking command HELLO.BLOCK */ int HelloBlock_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); return RedisModule_ReplyWithSimpleString(ctx,"Request timedout"); } @@ -104,6 +108,9 @@ int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a /* This function must be present on each Redis module. It is used in order to * register the commands into the Redis server. */ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_Init(ctx,"helloblock",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) return REDISMODULE_ERR; diff --git a/src/modules/helloworld.c b/src/modules/helloworld.c index 4d6f8782d..4e30af2a0 100644 --- a/src/modules/helloworld.c +++ b/src/modules/helloworld.c @@ -46,6 +46,8 @@ * fetch the currently selected DB, the other in order to send the client * an integer reply as response. */ int HelloSimple_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); RedisModule_ReplyWithLongLong(ctx,RedisModule_GetSelectedDb(ctx)); return REDISMODULE_OK; } @@ -237,6 +239,8 @@ int HelloRandArray_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, i * comments the function implementation). */ int HelloRepl1_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); RedisModuleCallReply *reply; RedisModule_AutoMemory(ctx); @@ -519,7 +523,7 @@ int HelloLeftPad_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int /* If the string is already larger than the target len, just return * the string itself. */ - if (strlen >= padlen) + if (strlen >= (size_t)padlen) return RedisModule_ReplyWithString(ctx,argv[1]); /* Padding must be a single character in this simple implementation. */ @@ -530,7 +534,7 @@ int HelloLeftPad_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int /* Here we use our pool allocator, for our throw-away allocation. */ padlen -= strlen; char *buf = RedisModule_PoolAlloc(ctx,padlen+strlen); - for (size_t j = 0; j < padlen; j++) buf[j] = *ch; + for (long long j = 0; j < padlen; j++) buf[j] = *ch; memcpy(buf+padlen,str,strlen); RedisModule_ReplyWithStringBuffer(ctx,buf,padlen+strlen); From f34ceb42c08138ee8e0bffe153572f638b825d48 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 13 Oct 2016 12:48:36 +0200 Subject: [PATCH 0167/1722] module.c: trim comment to 80 cols. --- src/module.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/module.c b/src/module.c index a3e755c29..91cd53090 100644 --- a/src/module.c +++ b/src/module.c @@ -752,11 +752,11 @@ RedisModuleString *RM_CreateString(RedisModuleCtx *ctx, const char *ptr, size_t } -/* Create a new module string object from a printf format and arguments. - * The returned string must be freed with RedisModule_FreeString(), unless automatic - * memory is enabled. +/* Create a new module string object from a printf format and arguments. + * The returned string must be freed with RedisModule_FreeString(), unless + * automatic memory is enabled. * - * The string is created using the sds formatter function sdscatvprintf() */ + * The string is created using the sds formatter function sdscatvprintf(). */ RedisModuleString *RM_CreateStringPrintf(RedisModuleCtx *ctx, const char *fmt, ...) { sds s = sdsempty(); From 6b22a3f9f2997ad083fbf65e1798f702f111c0fc Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 13 Oct 2016 16:57:28 +0200 Subject: [PATCH 0168/1722] Modules: blocking API documented. --- src/modules/BLOCK.md | 265 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 src/modules/BLOCK.md diff --git a/src/modules/BLOCK.md b/src/modules/BLOCK.md new file mode 100644 index 000000000..d4f3c93bc --- /dev/null +++ b/src/modules/BLOCK.md @@ -0,0 +1,265 @@ +Blocking commands in Redis modules +=== + +Redis has a few blocking commands among the built-in set of commands. +One of the most used is `BLPOP` (or the symmetric `BRPOP`) which blocks +waiting for elements arriving in a list. + +The interesting fact about blocking commands is that they do not block +the whole server, but just the client calling them. Usually the reason to +block is that we expect some external event to happen: this can be +some change in the Redis data structures like in the `BLPOP` case, a +long computation happening in a thread, to receive some data from the +network, and so forth. + +Redis modules have the ability to implement blocking commands as well, +this documentation shows how the API works and describes a few patterns +that can be used in order to model blocking commands. + +How blocking and resuming works. +--- + +_Note: You may want to check the `helloblock.c` example in the Redis source tree +inside the `src/modules` directory, for a simple to understand example +on how the blocking API is applied._ + +In Redis modules, commands are implemented by callback functions that +are invoked by the Redis core when the specific command is called +by the user. Normally the callback terminates its execution sending +some reply to the client. Using the following function instead, the +function implementing the module command may request that the client +is put into the blocked state: + + RedisModuleBlockedClient *RedisModule_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms); + +The function returns a `RedisModuleBlockedClient` object, which is later +used in order to unblock the client. The arguments have the following +meaning: + +* `ctx` is the command execution context as usually in the rest of the API. +* `reply_callback` is the callback, having the same prototype of a normal command function, that is called when the client is unblocked in order to return a reply to the client. +* `timeout_callback` is the callback, having the same prototype of a normal command function that is called when the client reached the `ms` timeout. +* `free_privdata` is the callback that is called in order to free the private data. Private data is a pointer to some data that is passed between the API used to unblock the client, to the callback that will send the reply to the client. We'll see how this mechanism works later in this document. +* `ms` is the timeout in milliseconds. When the timeout is reached, the timeout callback is called and the client is automatically aborted. + +Once a client is blocked, it can be unblocked with the following API: + + int RedisModule_UnblockClient(RedisModuleBlockedClient *bc, void *privdata); + +The function takes as argument the blocked client object returned by +the previous call to `RedisModule_BlockClient()`, and unblock the client. +Immediately before the client gets unblocked, the `reply_callback` function +specified when the client was blocked is called: this function will +have access to the `privdata` pointer used here. + +IMPORTANT: The above function is thread safe, and can be called from within +a thread doing some work in order to implement the command that blocked +the client. + +The `privdata` data will be freed automatically using the `free_privdata` +callback when the client is unblocked. This is useful **since the reply +callback may never be called** in case the client timeouts or disconnects +from the server, so it's important that it's up to an external function +to have the responsibility to free the data passed if needed. + +To better understand how the API works, we can imagine writing a command +that blocks a client for one second, and then send as reply "Hello!". + +Note: arity checks and other non important things are not implemented +int his command, in order to take the example simple. + + int Example_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, + int argc) + { + RedisModuleBlockedClient *bc = + RedisModule_BlockClient(ctx,reply_func,timeout_func,NULL,0); + + pthread_t tid; + pthread_create(&tid,NULL,threadmain,bc); + + return REDISMODULE_OK; + } + + void *threadmain(void *arg) { + RedisModuleBlockedClient *bc = arg; + + sleep(1); /* Wait one second and unblock. */ + RedisModule_UnblockClient(bc,NULL); + } + +The above command blocks the client ASAP, spawining a thread that will +wait a second and will unblock the client. Let's check the reply and +timeout callbacks, which are in our case very similar, since they +just reply the client with a different reply type. + + int reply_func(RedisModuleCtx *ctx, RedisModuleString **argv, + int argc) + { + return RedisModule_ReplyWithSimpleString(ctx,"Hello!"); + } + + int timeout_func(RedisModuleCtx *ctx, RedisModuleString **argv, + int argc) + { + return RedisModule_ReplyWithNull(ctx); + } + +The reply callback just sends the "Hello!" string to the client. +The important bit here is that the reply callback is called when the +client is unblocked from the thread. + +The timeout command returns `NULL`, as it often happens with actual +Redis blocking commands timing out. + +Passing reply data when unblocking +--- + +The above example is simple to understand but lacks an important +real world aspect of an actual blocking command implementation: often +the reply function will need to know what to reply to the client, +and this information is often provided as the client is unblocked. + +We could modify the above example so that the thread generates a +random number after waiting one second. You can think at it as an +actually expansive operation of some kind. Then this random number +can be passed to the reply function so that we return it to the command +caller. In order to make this working, we modify the functions as follow: + + void *threadmain(void *arg) { + RedisModuleBlockedClient *bc = arg; + + sleep(1); /* Wait one second and unblock. */ + + long *mynumber = RedisModule_Alloc(sizeof(long)); + *mynumber = rand(); + RedisModule_UnblockClient(bc,mynumber); + } + +As you can see, now the unblocking call is passing some private data, +that is the `mynumber` pointer, to the reply callback. In order to +obtain this private data, the reply callback will use the following +fnuction: + + void *RedisModule_GetBlockedClientPrivateData(RedisModuleCtx *ctx); + +So our reply callback is modified like that: + + int reply_func(RedisModuleCtx *ctx, RedisModuleString **argv, + int argc) + { + long *mynumber = RedisModule_GetBlockedClientPrivateData(ctx); + /* IMPORTANT: don't free mynumber here, but in the + * free privdata callback. */ + return RedisModule_ReplyWithLongLong(ctx,mynumber); + } + +Note that we also need to pass a `free_privdata` function when blocking +the client with `RedisModule_BlockClient()`, since the allocated +long value must be freed. Our callback will look like the following: + + void free_privdata(void *privdata) { + RedisModule_Free(privdata); + } + +NOTE: It is important to stress that the private data is best freed in the +`free_privdata` callback becaues the reply function may not be called +if the client disconnects or timeout. + +Also note that the private data is also accessible from the timeout +callback, always using the `GetBlockedClientPrivateData()` API. + +Aborting the blocking of a client +--- + +One problem that sometimes arises is that we need to allocate resources +in order to implement the non blocking command. So we block the client, +then, for example, try to create a thread, but the thread creation function +returns an error. What to do in such a condition in order to recover? We +don't want to take the client blocked, nor we want to call `UnblockClient()` +because this will trigger the reply callback to be called. + +In this case the best thing to do is to use the following function: + + int RedisModule_AbortBlock(RedisModuleBlockedClient *bc); + +Practically this is how to use it: + + int Example_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, + int argc) + { + RedisModuleBlockedClient *bc = + RedisModule_BlockClient(ctx,reply_func,timeout_func,NULL,0); + + pthread_t tid; + if (pthread_create(&tid,NULL,threadmain,bc) != 0) { + RedisModule_AbortBlock(bc); + RedisModule_ReplyWithError(ctx,"Sorry can't create a thread"); + } + + return REDISMODULE_OK; + } + +The client will be unblocked but the reply callback will not be called. + +Implementing the command, reply and timeout callback using a single function +--- + +The following functions can be used in order to implement the reply and +callback with the same function that implements the primary command +function: + + int RedisModule_IsBlockedReplyRequest(RedisModuleCtx *ctx); + int RedisModule_IsBlockedTimeoutRequest(RedisModuleCtx *ctx); + +So I could rewrite the example command without using a separated +reply and timeout callback: + + int Example_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, + int argc) + { + if (RedisModule_IsBlockedReplyRequest(ctx)) { + long *mynumber = RedisModule_GetBlockedClientPrivateData(ctx); + return RedisModule_ReplyWithLongLong(ctx,mynumber); + } else if (RedisModule_IsBlockedTimeoutRequest) { + return RedisModule_ReplyWithNull(ctx); + } + + RedisModuleBlockedClient *bc = + RedisModule_BlockClient(ctx,reply_func,timeout_func,NULL,0); + + pthread_t tid; + if (pthread_create(&tid,NULL,threadmain,bc) != 0) { + RedisModule_AbortBlock(bc); + RedisModule_ReplyWithError(ctx,"Sorry can't create a thread"); + } + + return REDISMODULE_OK; + } + +Functionally is the same but there are people that will prefer the less +verbose implementation that concentrates most of the command logic in a +single function. + +Working on copies of data inside a thread +--- + +An interesting pattern in order to work with threads implementing the +slow part of a command, is to work with a copy of the data, so that +while some operation is performed in a key, the user continues to see +the old version. However when the thread terminated its work, the +representations are swapped and the new, processed version, is used. + +An example of this approach is the +[Neural Redis module](https://github.com/antirez/neural-redis) +where neural networks are trained in different threads while the +user can still execute and inspect their older versions. + +Future work +--- + +An API is work in progress right now in order to allow Redis modules APIs +to be called in a safe way from threads, so that the threaded command +can access the data space and do incremental operations. + +There is no ETA for this feature but it may appear in the course of the +Redis 4.0 release at some point. From a7af7a1f8e3ea3c29b1b74fb679b8e2ebaa436f3 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 13 Oct 2016 16:57:40 +0200 Subject: [PATCH 0169/1722] Modules: AbortBlock() API implemented. --- src/module.c | 10 +++++++++- src/modules/INTRO.md | 1 + src/redismodule.h | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index 91cd53090..66c09a426 100644 --- a/src/module.c +++ b/src/module.c @@ -3140,6 +3140,13 @@ int RM_UnblockClient(RedisModuleBlockedClient *bc, void *privdata) { return REDISMODULE_OK; } +/* Abort a blocked client blocking operation: the client will be unblocked + * without firing the reply callback. */ +int RM_AbortBlock(RedisModuleBlockedClient *bc) { + bc->reply_callback = NULL; + return RM_UnblockClient(bc,NULL); +} + /* This function will check the moduleUnblockedClients queue in order to * call the reply callback and really unblock the client. * @@ -3163,7 +3170,7 @@ void moduleHandleBlockedClients(void) { /* Release the lock during the loop, as long as we don't * touch the shared list. */ - if (c != NULL) { + if (c != NULL && bc->reply_callback != NULL) { RedisModuleCtx ctx = REDISMODULE_CTX_INIT; ctx.flags |= REDISMODULE_CTX_BLOCKED_REPLY; ctx.blocked_privdata = bc->privdata; @@ -3545,5 +3552,6 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(IsBlockedReplyRequest); REGISTER_API(IsBlockedTimeoutRequest); REGISTER_API(GetBlockedClientPrivateData); + REGISTER_API(AbortBlock); REGISTER_API(Milliseconds); } diff --git a/src/modules/INTRO.md b/src/modules/INTRO.md index e5576b7fc..3ac6a4673 100644 --- a/src/modules/INTRO.md +++ b/src/modules/INTRO.md @@ -6,6 +6,7 @@ The modules documentation is composed of the following files: * `INTRO.md` (this file). An overview about Redis Modules system and API. It's a good idea to start your reading here. * `API.md` is generated from module.c top comments of RedisMoule functions. It is a good reference in order to understand how each function works. * `TYPES.md` covers the implementation of native data types into modules. +* `BLOCK.md` shows how to write blocking commands that will not reply immediately, but will block the client, without blocking the Redis server, and will provide a reply whenever will be possible. Redis modules make possible to extend Redis functionality using external modules, implementing new Redis commands at a speed and with features diff --git a/src/redismodule.h b/src/redismodule.h index 104de9586..a07c09488 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -200,6 +200,7 @@ int REDISMODULE_API_FUNC(RedisModule_UnblockClient)(RedisModuleBlockedClient *bc int REDISMODULE_API_FUNC(RedisModule_IsBlockedReplyRequest)(RedisModuleCtx *ctx); int REDISMODULE_API_FUNC(RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx); void *REDISMODULE_API_FUNC(RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx); +int REDISMODULE_API_FUNC(RedisModule_AbortBlock)(RedisModuleBlockedClient *bc); long long REDISMODULE_API_FUNC(RedisModule_Milliseconds)(void); /* This is included inline inside each Redis module. */ @@ -307,6 +308,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(IsBlockedReplyRequest); REDISMODULE_GET_API(IsBlockedTimeoutRequest); REDISMODULE_GET_API(GetBlockedClientPrivateData); + REDISMODULE_GET_API(AbortBlock); REDISMODULE_GET_API(Milliseconds); RedisModule_SetModuleAttribs(ctx,name,ver,apiver); From 0d55b08656b8991416ef92e3bad984b3d7120103 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 13 Oct 2016 17:00:29 +0200 Subject: [PATCH 0170/1722] Modules: use RedisModule_AbortBlock() in the example. --- src/modules/helloblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c index f76b3beb5..71ec9b121 100644 --- a/src/modules/helloblock.c +++ b/src/modules/helloblock.c @@ -99,7 +99,7 @@ int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a targ[1] = (void*)(unsigned long) delay; if (pthread_create(&tid,NULL,HelloBlock_ThreadMain,targ) != 0) { - /* RedisModule_BlockedClientAbort(bc); */ + RedisModule_AbortBlock(bc); return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread"); } return REDISMODULE_OK; From a89f98b8029b49e910455e0058777444dfcf5a4c Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Oct 2016 15:28:04 +0200 Subject: [PATCH 0171/1722] SWAPDB command. This new command swaps two Redis databases, so that immediately all the clients connected to a given DB will see the data of the other DB, and the other way around. Example: SWAPDB 0 1 This will swap DB 0 with DB 1. All the clients connected with DB 0 will immediately see the new data, exactly like all the clients connected with DB 1 will see the data that was formerly of DB 0. MOTIVATION AND HISTORY --- The command was recently demanded by Pedro Melo, but was suggested in the past multiple times, and always refused by me. The reason why it was asked: Imagine you have clients operating in DB 0. At the same time, you create a new version of the dataset in DB 1. When the new version of the dataset is available, you immediately want to swap the two views, so that the clients will transparently use the new version of the data. At the same time you'll likely destroy the DB 1 dataset (that contains the old data) and start to build a new version, to repeat the process. This is an interesting pattern, but the reason why I always opposed to implement this, was that FLUSHDB was a blocking command in Redis before Redis 4.0 improvements. Now we have FLUSHDB ASYNC that releases the old data in O(1) from the point of view of the client, to reclaim memory incrementally in a different thread. At this point, the pattern can really be supported without latency spikes, so I'm providing this implementation for the users to comment. In case a very compelling argument will be made against this new command it may be removed. BEHAVIOR WITH BLOCKING OPERATIONS --- If a client is blocking for a list in a given DB, after the swap it will still be blocked in the same DB ID, since this is the most logical thing to do: if I was blocked for a list push to list "foo", even after the swap I want still a LPUSH to reach the key "foo" in the same DB in order to unblock. However an interesting thing happens when a client is, for instance, blocked waiting for new elements in list "foo" of DB 0. Then the DB 0 and 1 are swapped with SWAPDB. However the DB 1 happened to have a list called "foo" containing elements. When this happens, this implementation can correctly unblock the client. It is possible that there are subtle corner cases that are not covered in the implementation, but since the command is self-contained from the POV of the implementation and the Redis core, it cannot cause anything bad if not used. Tests and documentation are yet to be provided. --- src/db.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/server.c | 1 + src/server.h | 1 + 3 files changed, 88 insertions(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index c7940d735..268e7c384 100644 --- a/src/db.c +++ b/src/db.c @@ -471,7 +471,7 @@ void selectCommand(client *c) { return; } if (selectDb(c,id) == C_ERR) { - addReplyError(c,"invalid DB index"); + addReplyError(c,"DB index is out of range"); } else { addReply(c,shared.ok); } @@ -926,6 +926,91 @@ void moveCommand(client *c) { addReply(c,shared.cone); } +/* Helper function for dbSwapDatabases(): scans the list of keys that have + * one or more blocked clients for B[LR]POP or other list blocking commands + * and signal the keys are ready if they are lists. See the comment where + * the function is used for more info. */ +void scanDatabaseForReadyLists(redisDb *db) { + dictEntry *de; + dictIterator *di = dictGetSafeIterator(db->blocking_keys); + while((de = dictNext(di)) != NULL) { + robj *key = dictGetKey(de); + robj *value = lookupKey(db,key,LOOKUP_NOTOUCH); + if (value && value->type == OBJ_LIST) + signalListAsReady(db, key); + } + dictReleaseIterator(di); +} + +/* Swap two databases at runtime so that all clients will magically see + * the new database even if already connected. Note that the client + * structure c->db points to a given DB, so we need to be smarter and + * swap the underlying referenced structures, otherwise we would need + * to fix all the references to the Redis DB structure. + * + * Returns C_ERR if at least one of the DB ids are out of range, otherwise + * C_OK is returned. */ +int dbSwapDatabases(int id1, int id2) { + if (id1 < 0 || id1 >= server.dbnum || + id2 < 0 || id2 >= server.dbnum) return C_ERR; + if (id1 == id2) return C_OK; + redisDb aux = server.db[id1]; + redisDb *db1 = &server.db[id1], *db2 = &server.db[id2]; + + /* Swap hash tables. Note that we don't swap blocking_keys, + * ready_keys and watched_keys, since we want clients to + * remain in the same DB they were. */ + db1->dict = db2->dict; + db1->expires = db2->expires; + db1->avg_ttl = db2->avg_ttl; + + db2->dict = aux.dict; + db2->expires = aux.expires; + db2->avg_ttl = aux.avg_ttl; + + /* Now we need to handle clients blocked on lists: as an effect + * of swapping the two DBs, a client that was waiting for list + * X in a given DB, may now actually be unblocked if X happens + * to exist in the new version of the DB, after the swap. + * + * However normally we only do this check for efficiency reasons + * in dbAdd() when a list is created. So here we need to rescan + * the list of clients blocked on lists and signal lists as ready + * if needed. */ + scanDatabaseForReadyLists(db1); + scanDatabaseForReadyLists(db2); + return C_OK; +} + +/* SWAPDB db1 db2 */ +void swapdbCommand(client *c) { + long id1, id2; + + /* Not allowed in cluster mode: we have just DB 0 there. */ + if (server.cluster_enabled) { + addReplyError(c,"SWAPDB is not allowed in cluster mode"); + return; + } + + /* Get the two DBs indexes. */ + if (getLongFromObjectOrReply(c, c->argv[1], &id1, + "invalid first DB index") != C_OK) + return; + + if (getLongFromObjectOrReply(c, c->argv[2], &id2, + "invalid second DB index") != C_OK) + return; + + /* Swap... */ + if (dbSwapDatabases(id1,id2) == C_ERR) { + addReplyError(c,"DB index is out of range"); + return; + } else { + server.dirty++; + addReply(c,shared.ok); + } +} + /*----------------------------------------------------------------------------- * Expires API *----------------------------------------------------------------------------*/ diff --git a/src/server.c b/src/server.c index a05491852..7e9b962b3 100644 --- a/src/server.c +++ b/src/server.c @@ -219,6 +219,7 @@ struct redisCommand redisCommandTable[] = { {"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0}, {"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0}, {"select",selectCommand,2,"lF",0,NULL,0,0,0,0,0}, + {"swapdb",swapdbCommand,3,"wF",0,NULL,0,0,0,0,0}, {"move",moveCommand,3,"wF",0,NULL,1,1,1,0,0}, {"rename",renameCommand,3,"w",0,NULL,1,2,1,0,0}, {"renamenx",renamenxCommand,3,"wF",0,NULL,1,2,1,0,0}, diff --git a/src/server.h b/src/server.h index 69ee52e64..b5dbaf0a5 100644 --- a/src/server.h +++ b/src/server.h @@ -1731,6 +1731,7 @@ void incrbyCommand(client *c); void decrbyCommand(client *c); void incrbyfloatCommand(client *c); void selectCommand(client *c); +void swapdbCommand(client *c); void randomkeyCommand(client *c); void keysCommand(client *c); void scanCommand(client *c); From 96dad4da1fc458293d3cbb7dcbe8f49686dd16c5 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Oct 2016 15:48:11 +0200 Subject: [PATCH 0172/1722] Fix SELECT test, broken cause change in error msg. --- tests/unit/other.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index 2f5773930..1d21b561a 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -52,7 +52,7 @@ start_server {tags {"other"}} { test {SELECT an out of range DB} { catch {r select 1000000} err set _ $err - } {*invalid*} + } {*index is out of range*} tags {consistency} { if {![catch {package require sha1}]} { From 5d5ce20ee6827bd584f31f471f18ff8c883a56b3 Mon Sep 17 00:00:00 2001 From: Pedro Melo Date: Mon, 17 Oct 2016 12:50:04 +0100 Subject: [PATCH 0173/1722] Fixes compilation on MacOS 10.8.5, Clang tags/Apple/clang-421.0.57 Redis fails to compile on MacOS 10.8.5 with Clang 4, version 421.0.57 (based on LLVM 3.1svn). When compiling zmalloc.c, we get these warnings: CC zmalloc.o zmalloc.c:109:5: warning: implicit declaration of function '__atomic_add_fetch' is invalid in C99 [-Wimplicit-function-declaration] update_zmalloc_stat_alloc(zmalloc_size(ptr)); ^ zmalloc.c:75:9: note: expanded from macro 'update_zmalloc_stat_alloc' atomicIncr(used_memory,__n,used_memory_mutex); \ ^ ./atomicvar.h:57:37: note: expanded from macro 'atomicIncr' #define atomicIncr(var,count,mutex) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) ^ zmalloc.c:145:5: warning: implicit declaration of function '__atomic_sub_fetch' is invalid in C99 [-Wimplicit-function-declaration] update_zmalloc_stat_free(oldsize); ^ zmalloc.c:85:9: note: expanded from macro 'update_zmalloc_stat_free' atomicDecr(used_memory,__n,used_memory_mutex); \ ^ ./atomicvar.h:58:37: note: expanded from macro 'atomicDecr' #define atomicDecr(var,count,mutex) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED) ^ zmalloc.c:205:9: warning: implicit declaration of function '__atomic_load_n' is invalid in C99 [-Wimplicit-function-declaration] atomicGet(used_memory,um,used_memory_mutex); ^ ./atomicvar.h:60:14: note: expanded from macro 'atomicGet' dstvar = __atomic_load_n(&var,__ATOMIC_RELAXED); \ ^ 3 warnings generated. Also on lazyfree.c: CC lazyfree.o lazyfree.c:68:13: warning: implicit declaration of function '__atomic_add_fetch' is invalid in C99 [-Wimplicit-function-declaration] atomicIncr(lazyfree_objects,1,lazyfree_objects_mutex); ^ ./atomicvar.h:57:37: note: expanded from macro 'atomicIncr' #define atomicIncr(var,count,mutex) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) ^ lazyfree.c:111:5: warning: implicit declaration of function '__atomic_sub_fetch' is invalid in C99 [-Wimplicit-function-declaration] atomicDecr(lazyfree_objects,1,lazyfree_objects_mutex); ^ ./atomicvar.h:58:37: note: expanded from macro 'atomicDecr' #define atomicDecr(var,count,mutex) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED) ^ 2 warnings generated. Then in the linking stage: LINK redis-server Undefined symbols for architecture x86_64: "___atomic_add_fetch", referenced from: _zmalloc in zmalloc.o _zcalloc in zmalloc.o _zrealloc in zmalloc.o _dbAsyncDelete in lazyfree.o _emptyDbAsync in lazyfree.o _slotToKeyFlushAsync in lazyfree.o "___atomic_load_n", referenced from: _zmalloc_used_memory in zmalloc.o _zmalloc_get_fragmentation_ratio in zmalloc.o "___atomic_sub_fetch", referenced from: _zrealloc in zmalloc.o _zfree in zmalloc.o _lazyfreeFreeObjectFromBioThread in lazyfree.o _lazyfreeFreeDatabaseFromBioThread in lazyfree.o _lazyfreeFreeSlotsMapFromBioThread in lazyfree.o ld: symbol(s) not found for architecture x86_64 clang: error: linker command failed with exit code 1 (use -v to see invocation) make[1]: *** [redis-server] Error 1 make: *** [all] Error 2 With this patch, the compilation is sucessful, no warnings. Running `make test` we get a almost clean bill of health. Test pass with one exception: [err]: Check for memory leaks (pid 52793) in tests/unit/dump.tcl [err]: Check for memory leaks (pid 53103) in tests/unit/auth.tcl [err]: Check for memory leaks (pid 53117) in tests/unit/auth.tcl [err]: Check for memory leaks (pid 53131) in tests/unit/protocol.tcl [err]: Check for memory leaks (pid 53145) in tests/unit/protocol.tcl [ok]: Check for memory leaks (pid 53160) [err]: Check for memory leaks (pid 53175) in tests/unit/scan.tcl [ok]: Check for memory leaks (pid 53189) [err]: Check for memory leaks (pid 53221) in tests/unit/type/incr.tcl . . . Full debug log (289MB, uncompressed) available at https://dl.dropboxusercontent.com/u/75548/logs/redis-debug-log-macos-10.8.5.log.xz Most if not all of the memory leak tests fail. Not sure if this is related. They are the only ones that fail. I belive they are not related, but just the memory leak detector is not working properly on 10.8.5. Signed-off-by: Pedro Melo --- src/atomicvar.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atomicvar.h b/src/atomicvar.h index 3489972d2..0655ba5c4 100644 --- a/src/atomicvar.h +++ b/src/atomicvar.h @@ -51,7 +51,7 @@ #ifndef __ATOMIC_VAR_H #define __ATOMIC_VAR_H -#if defined(__ATOMIC_RELAXED) +#if defined(__ATOMIC_RELAXED) && (!defined(__clang__) || __apple_build_version__ > 4210057) /* Implementation using __atomic macros. */ #define atomicIncr(var,count,mutex) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) From 0a051ea1c15cd6bff6458ff691b6c9049f293de1 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 17 Oct 2016 16:41:39 +0200 Subject: [PATCH 0174/1722] Better target MacOS on __atomic macros conditional compilation. --- src/atomicvar.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/atomicvar.h b/src/atomicvar.h index 0655ba5c4..745966062 100644 --- a/src/atomicvar.h +++ b/src/atomicvar.h @@ -51,8 +51,9 @@ #ifndef __ATOMIC_VAR_H #define __ATOMIC_VAR_H -#if defined(__ATOMIC_RELAXED) && (!defined(__clang__) || __apple_build_version__ > 4210057) +#if defined(__ATOMIC_RELAXED) && (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057) /* Implementation using __atomic macros. */ +#warning "Hey" #define atomicIncr(var,count,mutex) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) #define atomicDecr(var,count,mutex) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED) From f487fca5027d3f18196871df3834969d98e38f90 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 19 Oct 2016 10:43:40 +0200 Subject: [PATCH 0175/1722] Remove "Hey!" warning... --- src/atomicvar.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/atomicvar.h b/src/atomicvar.h index 745966062..4aa8fa173 100644 --- a/src/atomicvar.h +++ b/src/atomicvar.h @@ -53,7 +53,6 @@ #if defined(__ATOMIC_RELAXED) && (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057) /* Implementation using __atomic macros. */ -#warning "Hey" #define atomicIncr(var,count,mutex) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) #define atomicDecr(var,count,mutex) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED) From 0ab42c2c00c39838039368e0802f29d4cee23c2e Mon Sep 17 00:00:00 2001 From: sunhe Date: Sat, 22 Oct 2016 01:54:46 +0800 Subject: [PATCH 0176/1722] bitops.c/bitfieldCommand: update higest_write_offset with check --- src/bitops.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bitops.c b/src/bitops.c index 302e811d2..46eee22c3 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -907,7 +907,7 @@ void bitfieldCommand(client *c) { struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */ int owtype = BFOVERFLOW_WRAP; /* Overflow type. */ int readonly = 1; - long higest_write_offset = 0; + size_t higest_write_offset = 0; for (j = 2; j < c->argc; j++) { int remargs = c->argc-j-1; /* Remaining args other than current. */ @@ -957,7 +957,8 @@ void bitfieldCommand(client *c) { if (opcode != BITFIELDOP_GET) { readonly = 0; - higest_write_offset = bitoffset + bits - 1; + if (higest_write_offset < bitoffset + bits - 1) + higest_write_offset = bitoffset + bits - 1; /* INCRBY and SET require another argument. */ if (getLongLongFromObjectOrReply(c,c->argv[j+3],&i64,NULL) != C_OK){ zfree(ops); From c9ffc440c8748dece37d0fa8b8f98aa4fd2cf1b8 Mon Sep 17 00:00:00 2001 From: Michel Tresseras Date: Mon, 24 Oct 2016 17:27:48 +0200 Subject: [PATCH 0177/1722] Typo --- redis.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redis.conf b/redis.conf index a7b7f3a97..3a66f23d4 100644 --- a/redis.conf +++ b/redis.conf @@ -448,7 +448,7 @@ slave-priority 100 # offers this information, which is used, among other tools, by # Redis Sentinel in order to discover slave instances. # Another place where this info is available is in the output of the -# "ROLE" command of a masteer. +# "ROLE" command of a master. # # The listed IP and address normally reported by a slave is obtained # in the following way: From 2025738ce208d4e2479b97e9332d766219b37dae Mon Sep 17 00:00:00 2001 From: jybaek Date: Fri, 28 Oct 2016 10:42:54 +0900 Subject: [PATCH 0178/1722] Add missing fclose() --- src/redis-cli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 054c2fc7c..cbb41f3f1 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1257,6 +1257,7 @@ void cliLoadPreferences(void) { if (argc > 0) cliSetPreferences(argv,argc,0); sdsfreesplitres(argv,argc); } + fclose(fp); } sdsfree(rcfile); } From 450449344565a4b3e453f4073b62a14fcae856c8 Mon Sep 17 00:00:00 2001 From: deep Date: Fri, 28 Oct 2016 19:47:29 +0800 Subject: [PATCH 0179/1722] fix a bug for quicklistDup() function --- src/quicklist.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/quicklist.c b/src/quicklist.c index 9cb052525..c8b72743c 100644 --- a/src/quicklist.c +++ b/src/quicklist.c @@ -1192,12 +1192,12 @@ quicklist *quicklistDup(quicklist *orig) { current = current->next) { quicklistNode *node = quicklistCreateNode(); - if (node->encoding == QUICKLIST_NODE_ENCODING_LZF) { - quicklistLZF *lzf = (quicklistLZF *)node->zl; + if (current->encoding == QUICKLIST_NODE_ENCODING_LZF) { + quicklistLZF *lzf = (quicklistLZF *)current->zl; size_t lzf_sz = sizeof(*lzf) + lzf->sz; node->zl = zmalloc(lzf_sz); memcpy(node->zl, current->zl, lzf_sz); - } else if (node->encoding == QUICKLIST_NODE_ENCODING_RAW) { + } else if (current->encoding == QUICKLIST_NODE_ENCODING_RAW) { node->zl = zmalloc(current->sz); memcpy(node->zl, current->zl, current->sz); } From 7365268efafb5c77340fd834d489907d94b215a5 Mon Sep 17 00:00:00 2001 From: Guy Benoish Date: Mon, 31 Oct 2016 15:08:17 +0200 Subject: [PATCH 0180/1722] Fixed wrong sizeof(client) in object.c --- src/object.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/object.c b/src/object.c index 210e980e7..4d14ae8d9 100644 --- a/src/object.c +++ b/src/object.c @@ -828,9 +828,9 @@ struct redisMemOverhead *getMemoryOverheadData(void) { listRewind(server.slaves,&li); while((ln = listNext(&li))) { - client *client = listNodeValue(ln); - mem += getClientOutputBufferMemoryUsage(client); - mem += sdsAllocSize(client->querybuf); + client *c = listNodeValue(ln); + mem += getClientOutputBufferMemoryUsage(c); + mem += sdsAllocSize(c->querybuf); mem += sizeof(client); } } @@ -844,11 +844,11 @@ struct redisMemOverhead *getMemoryOverheadData(void) { listRewind(server.clients,&li); while((ln = listNext(&li))) { - client *client = listNodeValue(ln); - if (client->flags & CLIENT_SLAVE) + client *c = listNodeValue(ln); + if (c->flags & CLIENT_SLAVE) continue; - mem += getClientOutputBufferMemoryUsage(client); - mem += sdsAllocSize(client->querybuf); + mem += getClientOutputBufferMemoryUsage(c); + mem += sdsAllocSize(c->querybuf); mem += sizeof(client); } } From 47396311df4efe116ff96be618d3cb65cc5d4889 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 31 Oct 2016 15:46:58 +0100 Subject: [PATCH 0181/1722] Test: regression test for #3564 added. --- tests/unit/bitfield.tcl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/unit/bitfield.tcl b/tests/unit/bitfield.tcl index 26e47db0f..d76452b1b 100644 --- a/tests/unit/bitfield.tcl +++ b/tests/unit/bitfield.tcl @@ -189,4 +189,13 @@ start_server {tags {"bitops"}} { r set bits 1 r bitfield bits get u1 0 } {0} + + test {BITFIELD regression for #3564} { + for {set j 0} {$j < 10} {incr j} { + r del mystring + set res [r BITFIELD mystring SET i8 0 10 SET i8 64 10 INCRBY i8 10 99900] + assert {$res eq {0 0 60}} + } + r del mystring + } } From 3b7e7aa13b544d510a1afef34ff7b66c9cfb928d Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Mon, 31 Oct 2016 18:48:16 +0200 Subject: [PATCH 0182/1722] fixed sizeof in allocating io RedisModuleCtx* --- src/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index 66c09a426..876513cbe 100644 --- a/src/module.c +++ b/src/module.c @@ -3000,7 +3000,7 @@ void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) { RedisModuleCtx *RM_GetContextFromIO(RedisModuleIO *io) { if (io->ctx) return io->ctx; /* Can't have more than one... */ RedisModuleCtx ctxtemplate = REDISMODULE_CTX_INIT; - io->ctx = zmalloc(sizeof(*io)); + io->ctx = zmalloc(sizeof(RedisModuleCtx)); *(io->ctx) = ctxtemplate; io->ctx->module = io->type->module; io->ctx->client = NULL; From 90508582541827f44d665a6a44a5beedd54b22f9 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 2 Nov 2016 15:14:48 +0100 Subject: [PATCH 0183/1722] redis-cli typo fixed: perferences -> preferences. Thanks to @qiaodaimadelaowang for signaling the issue. Close #3585. --- src/redis-cli.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index cbb41f3f1..85ca404fb 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -336,7 +336,7 @@ static void cliOutputGenericHelp(void) { " \"help \" to get a list of possible help topics\n" " \"quit\" to exit\n" "\n" - "To set redis-cli perferences:\n" + "To set redis-cli preferences:\n" " \":set hints\" enable online hints\n" " \":set nohints\" disable online hints\n" "Set your preferences in ~/.redisclirc\n", @@ -1222,7 +1222,7 @@ static sds *cliSplitArgs(char *line, int *argc) { } } -/* Set the CLI perferences. This function is invoked when an interactive +/* Set the CLI preferences. This function is invoked when an interactive * ":command" is called, or when reading ~/.redisclirc file, in order to * set user preferences. */ void cliSetPreferences(char **argv, int argc, int interactive) { From ba91b5dde4eec46425e1a67ae1ed1c8e80922fd9 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Nov 2016 11:31:06 +0100 Subject: [PATCH 0184/1722] PSYNC2: different improvements to Redis replication. The gist of the changes is that now, partial resynchronizations between slaves and masters (without the need of a full resync with RDB transfer and so forth), work in a number of cases when it was impossible in the past. For instance: 1. When a slave is promoted to mastrer, the slaves of the old master can partially resynchronize with the new master. 2. Chained slalves (slaves of slaves) can be moved to replicate to other slaves or the master itsef, without requiring a full resync. 3. The master itself, after being turned into a slave, is able to partially resynchronize with the new master, when it joins replication again. In order to obtain this, the following main changes were operated: * Slaves also take a replication backlog, not just masters. * Same stream replication for all the slaves and sub slaves. The replication stream is identical from the top level master to its slaves and is also the same from the slaves to their sub-slaves and so forth. This means that if a slave is later promoted to master, it has the same replication backlong, and can partially resynchronize with its slaves (that were previously slaves of the old master). * A given replication history is no longer identified by the `runid` of a Redis node. There is instead a `replication ID` which changes every time the instance has a new history no longer coherent with the past one. So, for example, slaves publish the same replication history of their master, however when they are turned into masters, they publish a new replication ID, but still remember the old ID, so that they are able to partially resynchronize with slaves of the old master (up to a given offset). * The replication protocol was slightly modified so that a new extended +CONTINUE reply from the master is able to inform the slave of a replication ID change. * REPLCONF CAPA is used in order to notify masters that a slave is able to understand the new +CONTINUE reply. * The RDB file was extended with an auxiliary field that is able to select a given DB after loading in the slave, so that the slave can continue receiving the replication stream from the point it was disconnected without requiring the master to insert "SELECT" statements. This is useful in order to guarantee the "same stream" property, because the slave must be able to accumulate an identical backlog. * Slave pings to sub-slaves are now sent in a special form, when the top-level master is disconnected, in order to don't interfer with the replication stream. We just use out of band "\n" bytes as in other parts of the Redis protocol. An old design document is available here: https://gist.github.com/antirez/ae068f95c0d084891305 However the implementation is not identical to the description because during the work to implement it, different changes were needed in order to make things working well. --- redis.conf | 4 + src/aof.c | 4 +- src/db.c | 2 +- src/debug.c | 4 +- src/networking.c | 26 ++- src/rdb.c | 49 ++++-- src/rdb.h | 10 +- src/replication.c | 418 +++++++++++++++++++++++++++++++++++----------- src/server.c | 19 ++- src/server.h | 47 ++++-- 10 files changed, 440 insertions(+), 143 deletions(-) diff --git a/redis.conf b/redis.conf index a7b7f3a97..bce5332e0 100644 --- a/redis.conf +++ b/redis.conf @@ -402,6 +402,10 @@ repl-disable-tcp-nodelay no # need to elapse, starting from the time the last slave disconnected, for # the backlog buffer to be freed. # +# Note that slaves never free the backlog for timeout, since they may be +# promoted to masters later, and should be able to correctly "partially +# resynchronize" with the slaves: hence they should always accumulate backlog. +# # A value of 0 means to never release the backlog. # # repl-backlog-ttl 3600 diff --git a/src/aof.c b/src/aof.c index c75153cc7..07d8561da 100644 --- a/src/aof.c +++ b/src/aof.c @@ -653,7 +653,7 @@ int loadAppendOnlyFile(char *filename) { serverLog(LL_NOTICE,"Reading RDB preamble from AOF file..."); if (fseek(fp,0,SEEK_SET) == -1) goto readerr; rioInitWithFile(&rdb,fp); - if (rdbLoadRio(&rdb) != C_OK) { + if (rdbLoadRio(&rdb,NULL) != C_OK) { serverLog(LL_WARNING,"Error reading the RDB preamble of the AOF file, AOF loading aborted"); goto readerr; } else { @@ -1152,7 +1152,7 @@ int rewriteAppendOnlyFile(char *filename) { if (server.aof_use_rdb_preamble) { int error; - if (rdbSaveRio(&aof,&error,RDB_SAVE_AOF_PREAMBLE) == C_ERR) { + if (rdbSaveRio(&aof,&error,RDB_SAVE_AOF_PREAMBLE,NULL) == C_ERR) { errno = error; goto werr; } diff --git a/src/db.c b/src/db.c index 268e7c384..55ae663c2 100644 --- a/src/db.c +++ b/src/db.c @@ -413,7 +413,7 @@ void flushallCommand(client *c) { /* Normally rdbSave() will reset dirty, but we don't want this here * as otherwise FLUSHALL will not be replicated nor put into the AOF. */ int saved_dirty = server.dirty; - rdbSave(server.rdb_filename); + rdbSave(server.rdb_filename,NULL); server.dirty = saved_dirty; } server.dirty++; diff --git a/src/debug.c b/src/debug.c index d48caedcc..f4689d532 100644 --- a/src/debug.c +++ b/src/debug.c @@ -320,12 +320,12 @@ void debugCommand(client *c) { if (c->argc >= 3) c->argv[2] = tryObjectEncoding(c->argv[2]); serverAssertWithInfo(c,c->argv[0],1 == 2); } else if (!strcasecmp(c->argv[1]->ptr,"reload")) { - if (rdbSave(server.rdb_filename) != C_OK) { + if (rdbSave(server.rdb_filename,NULL) != C_OK) { addReply(c,shared.err); return; } emptyDb(-1,EMPTYDB_NO_FLAGS,NULL); - if (rdbLoad(server.rdb_filename) != C_OK) { + if (rdbLoad(server.rdb_filename,NULL) != C_OK) { addReplyError(c,"Error trying to load the RDB dump"); return; } diff --git a/src/networking.c b/src/networking.c index 2be40ae15..b2cec8631 100644 --- a/src/networking.c +++ b/src/networking.c @@ -352,6 +352,14 @@ void addReplySds(client *c, sds s) { } } +/* This low level function just adds whatever protocol you send it to the + * client buffer, trying the static buffer initially, and using the string + * of objects if not possible. + * + * It is efficient because does not create an SDS object nor an Redis object + * if not needed. The object will only be created by calling + * _addReplyStringToList() if we fail to extend the existing tail object + * in the list of objects. */ void addReplyString(client *c, const char *s, size_t len) { if (prepareClientToWrite(c) != C_OK) return; if (_addReplyToBuffer(c,s,len) != C_OK) @@ -1022,7 +1030,7 @@ int processInlineBuffer(client *c) { char *newline; int argc, j; sds *argv, aux; - size_t querylen; + size_t querylen, protolen; /* Search for end of line */ newline = strchr(c->querybuf,'\n'); @@ -1035,6 +1043,7 @@ int processInlineBuffer(client *c) { } return C_ERR; } + protolen = (newline - c->querybuf)+1; /* Total protocol bytes of command. */ /* Handle the \r\n case. */ if (newline && newline != c->querybuf && *(newline-1) == '\r') @@ -1057,6 +1066,15 @@ int processInlineBuffer(client *c) { if (querylen == 0 && c->flags & CLIENT_SLAVE) c->repl_ack_time = server.unixtime; + /* Newline from masters can be used to prevent timeouts, but should + * not affect the replication offset since they are always sent + * "out of band" directly writing to the socket and without passing + * from the output buffers. */ + if (querylen == 0 && c->flags & CLIENT_MASTER) { + c->reploff -= protolen; + while (protolen--) chopReplicationBacklog(); + } + /* Leave data after the first line of the query in the buffer */ sdsrange(c->querybuf,querylen+2,-1); @@ -1321,7 +1339,11 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { sdsIncrLen(c->querybuf,nread); c->lastinteraction = server.unixtime; - if (c->flags & CLIENT_MASTER) c->reploff += nread; + if (c->flags & CLIENT_MASTER) { + c->reploff += nread; + replicationFeedSlavesFromMasterStream(server.slaves, + c->querybuf+qblen,nread); + } server.stat_net_input_bytes += nread; if (sdslen(c->querybuf) > server.client_max_querybuf_len) { sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty(); diff --git a/src/rdb.c b/src/rdb.c index 29f880dac..aa9c631de 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -835,7 +835,7 @@ int rdbSaveAuxFieldStrInt(rio *rdb, char *key, long long val) { } /* Save a few default AUX fields with information about the RDB generated. */ -int rdbSaveInfoAuxFields(rio *rdb, int flags) { +int rdbSaveInfoAuxFields(rio *rdb, int flags, rdbSaveInfo *rsi) { int redis_bits = (sizeof(void*) == 8) ? 64 : 32; int aof_preamble = (flags & RDB_SAVE_AOF_PREAMBLE) != 0; @@ -844,6 +844,16 @@ int rdbSaveInfoAuxFields(rio *rdb, int flags) { if (rdbSaveAuxFieldStrInt(rdb,"redis-bits",redis_bits) == -1) return -1; if (rdbSaveAuxFieldStrInt(rdb,"ctime",time(NULL)) == -1) return -1; if (rdbSaveAuxFieldStrInt(rdb,"used-mem",zmalloc_used_memory()) == -1) return -1; + + /* Handle saving options that generate aux fields. */ + if (rsi) { + if (rsi->repl_stream_db && + rdbSaveAuxFieldStrInt(rdb,"repl-stream-db",rsi->repl_stream_db) + == -1) + { + return -1; + } + } if (rdbSaveAuxFieldStrInt(rdb,"aof-preamble",aof_preamble) == -1) return -1; return 1; } @@ -856,7 +866,7 @@ int rdbSaveInfoAuxFields(rio *rdb, int flags) { * When the function returns C_ERR and if 'error' is not NULL, the * integer pointed by 'error' is set to the value of errno just after the I/O * error. */ -int rdbSaveRio(rio *rdb, int *error, int flags) { +int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi) { dictIterator *di = NULL; dictEntry *de; char magic[10]; @@ -869,7 +879,7 @@ int rdbSaveRio(rio *rdb, int *error, int flags) { rdb->update_cksum = rioGenericUpdateChecksum; snprintf(magic,sizeof(magic),"REDIS%04d",RDB_VERSION); if (rdbWriteRaw(rdb,magic,9) == -1) goto werr; - if (rdbSaveInfoAuxFields(rdb,flags) == -1) goto werr; + if (rdbSaveInfoAuxFields(rdb,flags,rsi) == -1) goto werr; for (j = 0; j < server.dbnum; j++) { redisDb *db = server.db+j; @@ -945,7 +955,7 @@ werr: * While the suffix is the 40 bytes hex string we announced in the prefix. * This way processes receiving the payload can understand when it ends * without doing any processing of the content. */ -int rdbSaveRioWithEOFMark(rio *rdb, int *error) { +int rdbSaveRioWithEOFMark(rio *rdb, int *error, rdbSaveInfo *rsi) { char eofmark[RDB_EOF_MARK_SIZE]; getRandomHexChars(eofmark,RDB_EOF_MARK_SIZE); @@ -953,7 +963,7 @@ int rdbSaveRioWithEOFMark(rio *rdb, int *error) { if (rioWrite(rdb,"$EOF:",5) == 0) goto werr; if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr; if (rioWrite(rdb,"\r\n",2) == 0) goto werr; - if (rdbSaveRio(rdb,error,RDB_SAVE_NONE) == C_ERR) goto werr; + if (rdbSaveRio(rdb,error,RDB_SAVE_NONE,rsi) == C_ERR) goto werr; if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr; return C_OK; @@ -964,7 +974,7 @@ werr: /* Write error. */ } /* Save the DB on disk. Return C_ERR on error, C_OK on success. */ -int rdbSave(char *filename) { +int rdbSave(char *filename, rdbSaveInfo *rsi) { char tmpfile[256]; char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */ FILE *fp; @@ -985,7 +995,7 @@ int rdbSave(char *filename) { } rioInitWithFile(&rdb,fp); - if (rdbSaveRio(&rdb,&error,RDB_SAVE_NONE) == C_ERR) { + if (rdbSaveRio(&rdb,&error,RDB_SAVE_NONE,rsi) == C_ERR) { errno = error; goto werr; } @@ -1023,7 +1033,7 @@ werr: return C_ERR; } -int rdbSaveBackground(char *filename) { +int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) { pid_t childpid; long long start; @@ -1040,7 +1050,7 @@ int rdbSaveBackground(char *filename) { /* Child */ closeListeningSockets(0); redisSetProcTitle("redis-rdb-bgsave"); - retval = rdbSave(filename); + retval = rdbSave(filename,rsi); if (retval == C_OK) { size_t private_dirty = zmalloc_get_private_dirty(-1); @@ -1410,7 +1420,7 @@ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) { /* Load an RDB file from the rio stream 'rdb'. On success C_OK is returned, * otherwise C_ERR is returned and 'errno' is set accordingly. */ -int rdbLoadRio(rio *rdb) { +int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi) { uint64_t dbid; int type, rdbver; redisDb *db = server.db+0; @@ -1501,6 +1511,8 @@ int rdbLoadRio(rio *rdb) { serverLog(LL_NOTICE,"RDB '%s': %s", (char*)auxkey->ptr, (char*)auxval->ptr); + } else if (!strcasecmp(auxkey->ptr,"repl-stream-db")) { + if (rsi) rsi->repl_stream_db = atoi(auxval->ptr); } else { /* We ignore fields we don't understand, as by AUX field * contract. */ @@ -1559,8 +1571,11 @@ eoferr: /* unexpected end of file is handled here with a fatal exit */ /* Like rdbLoadRio() but takes a filename instead of a rio stream. The * filename is open for reading and a rio stream object created in order * to do the actual loading. Moreover the ETA displayed in the INFO - * output is initialized and finalized. */ -int rdbLoad(char *filename) { + * output is initialized and finalized. + * + * If you pass an 'rsi' structure initialied with RDB_SAVE_OPTION_INIT, the + * loading code will fiil the information fields in the structure. */ +int rdbLoad(char *filename, rdbSaveInfo *rsi) { FILE *fp; rio rdb; int retval; @@ -1568,7 +1583,7 @@ int rdbLoad(char *filename) { if ((fp = fopen(filename,"r")) == NULL) return C_ERR; startLoading(fp); rioInitWithFile(&rdb,fp); - retval = rdbLoadRio(&rdb); + retval = rdbLoadRio(&rdb,rsi); fclose(fp); stopLoading(); return retval; @@ -1721,7 +1736,7 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) { /* Spawn an RDB child that writes the RDB to the sockets of the slaves * that are currently in SLAVE_STATE_WAIT_BGSAVE_START state. */ -int rdbSaveToSlavesSockets(void) { +int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) { int *fds; uint64_t *clientids; int numfds; @@ -1779,7 +1794,7 @@ int rdbSaveToSlavesSockets(void) { closeListeningSockets(0); redisSetProcTitle("redis-rdb-to-slaves"); - retval = rdbSaveRioWithEOFMark(&slave_sockets,NULL); + retval = rdbSaveRioWithEOFMark(&slave_sockets,NULL,rsi); if (retval == C_OK && rioFlush(&slave_sockets) == 0) retval = C_ERR; @@ -1884,7 +1899,7 @@ void saveCommand(client *c) { addReplyError(c,"Background save already in progress"); return; } - if (rdbSave(server.rdb_filename) == C_OK) { + if (rdbSave(server.rdb_filename,NULL) == C_OK) { addReply(c,shared.ok); } else { addReply(c,shared.err); @@ -1918,7 +1933,7 @@ void bgsaveCommand(client *c) { "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenver " "possible."); } - } else if (rdbSaveBackground(server.rdb_filename) == C_OK) { + } else if (rdbSaveBackground(server.rdb_filename,NULL) == C_OK) { addReplyStatus(c,"Background saving started"); } else { addReply(c,shared.err); diff --git a/src/rdb.h b/src/rdb.h index 60c52a7c1..efe932255 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -118,11 +118,11 @@ uint64_t rdbLoadLen(rio *rdb, int *isencoded); int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr); int rdbSaveObjectType(rio *rdb, robj *o); int rdbLoadObjectType(rio *rdb); -int rdbLoad(char *filename); -int rdbSaveBackground(char *filename); -int rdbSaveToSlavesSockets(void); +int rdbLoad(char *filename, rdbSaveInfo *rsi); +int rdbSaveBackground(char *filename, rdbSaveInfo *rsi); +int rdbSaveToSlavesSockets(rdbSaveInfo *rsi); void rdbRemoveTempFile(pid_t childpid); -int rdbSave(char *filename); +int rdbSave(char *filename, rdbSaveInfo *rsi); ssize_t rdbSaveObject(rio *rdb, robj *o); size_t rdbSavedObjectLen(robj *o); robj *rdbLoadObject(int type, rio *rdb); @@ -136,6 +136,6 @@ int rdbSaveBinaryDoubleValue(rio *rdb, double val); int rdbLoadBinaryDoubleValue(rio *rdb, double *val); int rdbSaveBinaryFloatValue(rio *rdb, float val); int rdbLoadBinaryFloatValue(rio *rdb, float *val); -int rdbLoadRio(rio *rdb); +int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi); #endif diff --git a/src/replication.c b/src/replication.c index 67091dd0b..a98d0d35e 100644 --- a/src/replication.c +++ b/src/replication.c @@ -39,6 +39,7 @@ void replicationDiscardCachedMaster(void); void replicationResurrectCachedMaster(int newfd); +void replicationCacheMasterUsingMyself(void); void replicationSendAck(void); void putSlaveOnline(client *slave); int cancelReplicationHandshake(void); @@ -79,11 +80,6 @@ void createReplicationBacklog(void) { server.repl_backlog = zmalloc(server.repl_backlog_size); server.repl_backlog_histlen = 0; server.repl_backlog_idx = 0; - /* When a new backlog buffer is created, we increment the replication - * offset by one to make sure we'll not be able to PSYNC with any - * previous slave. This is needed because we avoid incrementing the - * master_repl_offset if no backlog exists nor slaves are attached. */ - server.master_repl_offset++; /* We don't have any data inside our buffer, but virtually the first * byte we have is the next byte that will be generated for the @@ -153,6 +149,22 @@ void feedReplicationBacklog(void *ptr, size_t len) { server.repl_backlog_histlen + 1; } +/* Remove the last byte from the replication backlog. This + * is useful when we receive an out of band "\n" to keep the connection + * alive but don't want to count it as replication stream. + * + * As a side effect this function adjusts the master replication offset + * of this instance to account for the missing byte. */ +void chopReplicationBacklog(void) { + if (!server.repl_backlog || !server.repl_backlog_histlen) return; + if (server.repl_backlog_idx == 0) + server.repl_backlog_idx = server.repl_backlog_size-1; + else + server.repl_backlog_idx--; + server.master_repl_offset--; + server.repl_backlog_histlen--; +} + /* Wrapper for feedReplicationBacklog() that takes Redis string objects * as input. */ void feedReplicationBacklogWithObject(robj *o) { @@ -170,12 +182,24 @@ void feedReplicationBacklogWithObject(robj *o) { feedReplicationBacklog(p,len); } +/* Propagate write commands to slaves, and populate the replication backlog + * as well. This function is used if the instance is a master: we use + * the commands received by our clients in order to create the replication + * stream. Instead if the instance is a slave and has sub-slaves attached, + * we use replicationFeedSlavesFromMaster() */ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { listNode *ln; listIter li; int j, len; char llstr[LONG_STR_SIZE]; + /* If the instance is not a top level master, return ASAP: we'll just proxy + * the stream of data we receive from our master instead, in order to + * propagate *identical* replication stream. In this way this slave can + * advertise the same replication ID as the master (since it shares the + * master replication history and has the same backlog and offsets). */ + if (server.masterhost != NULL) return; + /* If there aren't slaves, and there is no backlog buffer to populate, * we can return ASAP. */ if (server.repl_backlog == NULL && listLength(slaves) == 0) return; @@ -265,6 +289,32 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { } } +/* This function is used in order to proxy what we receive from our master + * to our sub-slaves. */ +#include +void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t buflen) { + listNode *ln; + listIter li; + + { + printf("%zu:",buflen); + for (size_t j = 0; j < buflen; j++) { + printf("%c", isprint(buf[j]) ? buf[j] : '.'); + } + printf("\n"); + } + + if (server.repl_backlog) feedReplicationBacklog(buf,buflen); + listRewind(slaves,&li); + while((ln = listNext(&li))) { + client *slave = ln->value; + + /* Don't feed slaves that are still waiting for BGSAVE to start */ + if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue; + addReplyString(slave,buf,buflen); + } +} + void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc) { listNode *ln; listIter li; @@ -329,7 +379,7 @@ long long addReplyReplicationBacklog(client *c, long long offset) { skip = offset - server.repl_backlog_off; serverLog(LL_DEBUG, "[PSYNC] Skipping: %lld", skip); - /* Point j to the oldest byte, that is actaully our + /* Point j to the oldest byte, that is actually our * server.repl_backlog_off byte. */ j = (server.repl_backlog_idx + (server.repl_backlog_size-server.repl_backlog_histlen)) % @@ -361,18 +411,14 @@ long long addReplyReplicationBacklog(client *c, long long offset) { * the BGSAVE process started and before executing any other command * from clients. */ long long getPsyncInitialOffset(void) { - long long psync_offset = server.master_repl_offset; - /* Add 1 to psync_offset if it the replication backlog does not exists - * as when it will be created later we'll increment the offset by one. */ - if (server.repl_backlog == NULL) psync_offset++; - return psync_offset; + return server.master_repl_offset; } /* Send a FULLRESYNC reply in the specific case of a full resynchronization, * as a side effect setup the slave for a full sync in different ways: * - * 1) Remember, into the slave client structure, the offset we sent - * here, so that if new slaves will later attach to the same + * 1) Remember, into the slave client structure, the replication offset + * we sent here, so that if new slaves will later attach to the same * background RDB saving process (by duplicating this client output * buffer), we can get the right offset from this slave. * 2) Set the replication state of the slave to WAIT_BGSAVE_END so that @@ -392,14 +438,14 @@ int replicationSetupSlaveForFullResync(client *slave, long long offset) { slave->replstate = SLAVE_STATE_WAIT_BGSAVE_END; /* We are going to accumulate the incremental changes for this * slave as well. Set slaveseldb to -1 in order to force to re-emit - * a SLEECT statement in the replication stream. */ + * a SELECT statement in the replication stream. */ server.slaveseldb = -1; /* Don't send this reply to slaves that approached us with * the old SYNC command. */ if (!(slave->flags & CLIENT_PRE_PSYNC)) { buflen = snprintf(buf,sizeof(buf),"+FULLRESYNC %s %lld\r\n", - server.runid,offset); + server.replid,offset); if (write(slave->fd,buf,buflen) != buflen) { freeClientAsync(slave); return C_ERR; @@ -415,19 +461,32 @@ int replicationSetupSlaveForFullResync(client *slave, long long offset) { * with the usual full resync. */ int masterTryPartialResynchronization(client *c) { long long psync_offset, psync_len; - char *master_runid = c->argv[1]->ptr; + char *master_replid = c->argv[1]->ptr; char buf[128]; int buflen; - /* Is the runid of this master the same advertised by the wannabe slave - * via PSYNC? If runid changed this master is a different instance and - * there is no way to continue. */ - if (strcasecmp(master_runid, server.runid)) { + /* Parse the replication offset asked by the slave. Go to full sync + * on parse error: this should never happen but we try to handle + * it in a robust way compared to aborting. */ + if (getLongLongFromObjectOrReply(c,c->argv[2],&psync_offset,NULL) != + C_OK) goto need_full_resync; + + /* Is the replication ID of this master the same advertised by the wannabe + * slave via PSYNC? If the replication ID changed this master has a + * different replication history, and there is no way to continue. + * + * Note that there are two potentially valid replication IDs: the ID1 + * and the ID2. The ID2 however is only valid up to a specific offset. */ + if (strcasecmp(master_replid, server.replid) && + (strcasecmp(master_replid, server.replid2) || + psync_offset > server.second_replid_offset)) + { /* Run id "?" is used by slaves that want to force a full resync. */ - if (master_runid[0] != '?') { + if (master_replid[0] != '?') { serverLog(LL_NOTICE,"Partial resynchronization not accepted: " - "Runid mismatch (Client asked for runid '%s', my runid is '%s')", - master_runid, server.runid); + "Replication ID mismatch (Slave asked for '%s', my replication " + "ID is '%s')", + master_replid, server.replid); } else { serverLog(LL_NOTICE,"Full resync requested by slave %s", replicationGetSlaveName(c)); @@ -436,8 +495,6 @@ int masterTryPartialResynchronization(client *c) { } /* We still have the data our slave is asking for? */ - if (getLongLongFromObjectOrReply(c,c->argv[2],&psync_offset,NULL) != - C_OK) goto need_full_resync; if (!server.repl_backlog || psync_offset < server.repl_backlog_off || psync_offset > (server.repl_backlog_off + server.repl_backlog_histlen)) @@ -463,7 +520,11 @@ int masterTryPartialResynchronization(client *c) { /* We can't use the connection buffers since they are used to accumulate * new commands at this stage. But we are sure the socket send buffer is * empty so this write will never fail actually. */ - buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n"); + if (c->slave_capa & SLAVE_CAPA_PSYNC2) { + buflen = snprintf(buf,sizeof(buf),"+CONTINUE %s\r\n", server.replid); + } else { + buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n"); + } if (write(c->fd,buf,buflen) != buflen) { freeClientAsync(c); return C_OK; @@ -515,10 +576,18 @@ int startBgsaveForReplication(int mincapa) { serverLog(LL_NOTICE,"Starting BGSAVE for SYNC with target: %s", socket_target ? "slaves sockets" : "disk"); + rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; + /* If we are saving for a chained slave (that is, if we are, + * in turn, a slave of another instance), make sure after + * loadig the RDB, our slaves select the right DB: we'll just + * send the replication stream we receive from our master, so + * no way to send SELECT commands. */ + if (server.master) rsi.repl_stream_db = server.master->db->id; + if (socket_target) - retval = rdbSaveToSlavesSockets(); + retval = rdbSaveToSlavesSockets(&rsi); else - retval = rdbSaveBackground(server.rdb_filename); + retval = rdbSaveBackground(server.rdb_filename,&rsi); /* If we failed to BGSAVE, remove the slaves waiting for a full * resynchorinization from the list of salves, inform them with @@ -589,22 +658,22 @@ void syncCommand(client *c) { * when this happens masterTryPartialResynchronization() already * replied with: * - * +FULLRESYNC + * +FULLRESYNC * - * So the slave knows the new runid and offset to try a PSYNC later + * So the slave knows the new replid and offset to try a PSYNC later * if the connection with the master is lost. */ if (!strcasecmp(c->argv[0]->ptr,"psync")) { if (masterTryPartialResynchronization(c) == C_OK) { server.stat_sync_partial_ok++; return; /* No full resync needed, return. */ } else { - char *master_runid = c->argv[1]->ptr; + char *master_replid = c->argv[1]->ptr; /* Increment stats for failed PSYNCs, but only if the - * runid is not "?", as this is used by slaves to force a full + * replid is not "?", as this is used by slaves to force a full * resync on purpose when they are not albe to partially * resync. */ - if (master_runid[0] != '?') server.stat_sync_partial_err++; + if (master_replid[0] != '?') server.stat_sync_partial_err++; } } else { /* If a slave uses SYNC, we are dealing with an old implementation @@ -625,6 +694,16 @@ void syncCommand(client *c) { c->flags |= CLIENT_SLAVE; listAddNodeTail(server.slaves,c); + /* Create the replication backlog if needed. */ + if (listLength(server.slaves) == 1 && server.repl_backlog == NULL) { + /* When we create the backlog from scratch, we always use a new + * replication ID and clear the ID2, since there is no valid + * past history. */ + changeReplicationId(); + clearReplicationId2(); + createReplicationBacklog(); + } + /* CASE 1: BGSAVE is in progress, with disk target. */ if (server.rdb_child_pid != -1 && server.rdb_child_type == RDB_CHILD_TYPE_DISK) @@ -685,9 +764,6 @@ void syncCommand(client *c) { } } } - - if (listLength(server.slaves) == 1 && server.repl_backlog == NULL) - createReplicationBacklog(); return; } @@ -735,6 +811,8 @@ void replconfCommand(client *c) { /* Ignore capabilities not understood by this master. */ if (!strcasecmp(c->argv[j+1]->ptr,"eof")) c->slave_capa |= SLAVE_CAPA_EOF; + else if (!strcasecmp(c->argv[j+1]->ptr,"psync2")) + c->slave_capa |= SLAVE_CAPA_PSYNC2; } else if (!strcasecmp(c->argv[j]->ptr,"ack")) { /* REPLCONF ACK is used by slave to inform the master the amount * of replication stream that it processed so far. It is an @@ -928,6 +1006,43 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) { if (startbgsave) startBgsaveForReplication(mincapa); } +/* Change the current instance replication ID with a new, random one. + * This will prevent successful PSYNCs between this master and other + * slaves, so the command should be called when something happens that + * alters the current story of the dataset. */ +void changeReplicationId(void) { + getRandomHexChars(server.replid,CONFIG_RUN_ID_SIZE); + server.replid[CONFIG_RUN_ID_SIZE] = '\0'; +} + +/* Clear (invalidate) the secondary replication ID. This happens, for + * example, after a full resynchronization, when we start a new replication + * history. */ +void clearReplicationId2(void) { + memset(server.replid2,'0',sizeof(server.replid)); + server.replid2[CONFIG_RUN_ID_SIZE] = '\0'; + server.second_replid_offset = -1; +} + +/* Use the current replication ID / offset as secondary replication + * ID, and change the current one in order to start a new history. + * This should be used when an instance is switched from slave to master + * so that it can serve PSYNC requests performed using the master + * replication ID. */ +void shiftReplicationId(void) { + memcpy(server.replid2,server.replid,sizeof(server.replid)); + /* We set the second replid offset to the master offset + 1, since + * the slave will ask for the first byte it has not yet received, so + * we need to add one to the offset: for example if, as a slave, we are + * sure we have the same history as the master for 50 bytes, after we + * are turned into a master, we can accept a PSYNC request with offset + * 51, since the slave asking has the same history up to the 50th + * byte, and is asking for the new bytes starting at offset 51. */ + server.second_replid_offset = server.master_repl_offset+1; + changeReplicationId(); + serverLog(LL_WARNING,"Setting secondary replication ID to %s, valid up to offset: %lld. New replication ID is %s", server.replid2, server.second_replid_offset, server.replid); +} + /* ----------------------------------- SLAVE -------------------------------- */ /* Returns 1 if the given replication state is a handshake state, @@ -965,18 +1080,18 @@ void replicationEmptyDbCallback(void *privdata) { /* Once we have a link with the master and the synchroniziation was * performed, this function materializes the master client we store * at server.master, starting from the specified file descriptor. */ -void replicationCreateMasterClient(int fd) { +void replicationCreateMasterClient(int fd, int dbid) { server.master = createClient(fd); server.master->flags |= CLIENT_MASTER; server.master->authenticated = 1; - server.repl_state = REPL_STATE_CONNECTED; - server.master->reploff = server.repl_master_initial_offset; - memcpy(server.master->replrunid, server.repl_master_runid, - sizeof(server.repl_master_runid)); + server.master->reploff = server.master_initial_offset; + memcpy(server.master->replid, server.master_replid, + sizeof(server.master_replid)); /* If master offset is set to -1, this master is old and is not * PSYNC capable, so we flag it accordingly. */ if (server.master->reploff == -1) server.master->flags |= CLIENT_PRE_PSYNC; + if (dbid != -1) selectDb(server.master,dbid); } /* Asynchronously read the SYNC payload we receive from a master */ @@ -1137,7 +1252,8 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { * time for non blocking loading. */ aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE); serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Loading DB in memory"); - if (rdbLoad(server.rdb_filename) != C_OK) { + rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; + if (rdbLoad(server.rdb_filename,&rsi) != C_OK) { serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); cancelReplicationHandshake(); return; @@ -1145,7 +1261,20 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { /* Final setup of the connected slave <- master link */ zfree(server.repl_transfer_tmpfile); close(server.repl_transfer_fd); - replicationCreateMasterClient(server.repl_transfer_s); + replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db); + server.repl_state = REPL_STATE_CONNECTED; + /* After a full resynchroniziation we use the replication ID and + * offset of the master. The secondary ID / offset are cleared since + * we are starting a new history. */ + memcpy(server.replid,server.master->replid,sizeof(server.replid)); + server.master_repl_offset = server.master->reploff; + clearReplicationId2(); + /* Let's create the replication backlog if needed. Slaves need to + * accumulate the backlog regardless of the fact they have sub-slaves + * or not, in order to behave correctly if they are promoted to + * masters after a failover. */ + if (server.repl_backlog == NULL) createReplicationBacklog(); + serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Finished with success"); /* Restart the AOF subsystem now that we finished the sync. This * will trigger an AOF rewrite, and when done will start appending @@ -1270,7 +1399,7 @@ char *sendSynchronousCommand(int flags, int fd, ...) { * * 1) As a side effect of the function call the function removes the readable * event handler from "fd", unless the return value is PSYNC_WAIT_REPLY. - * 2) server.repl_master_initial_offset is set to the right value according + * 2) server.master_initial_offset is set to the right value according * to the master reply. This will be used to populate the 'server.master' * structure replication offset. */ @@ -1281,31 +1410,31 @@ char *sendSynchronousCommand(int flags, int fd, ...) { #define PSYNC_FULLRESYNC 3 #define PSYNC_NOT_SUPPORTED 4 int slaveTryPartialResynchronization(int fd, int read_reply) { - char *psync_runid; + char *psync_replid; char psync_offset[32]; sds reply; /* Writing half */ if (!read_reply) { - /* Initially set repl_master_initial_offset to -1 to mark the current + /* Initially set master_initial_offset to -1 to mark the current * master run_id and offset as not valid. Later if we'll be able to do * a FULL resync using the PSYNC command we'll set the offset at the * right value, so that this information will be propagated to the * client structure representing the master into server.master. */ - server.repl_master_initial_offset = -1; + server.master_initial_offset = -1; if (server.cached_master) { - psync_runid = server.cached_master->replrunid; + psync_replid = server.cached_master->replid; snprintf(psync_offset,sizeof(psync_offset),"%lld", server.cached_master->reploff+1); - serverLog(LL_NOTICE,"Trying a partial resynchronization (request %s:%s).", psync_runid, psync_offset); + serverLog(LL_NOTICE,"Trying a partial resynchronization (request %s:%s).", psync_replid, psync_offset); } else { serverLog(LL_NOTICE,"Partial resynchronization not possible (no cached master)"); - psync_runid = "?"; + psync_replid = "?"; memcpy(psync_offset,"-1",3); } /* Issue the PSYNC command */ - reply = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"PSYNC",psync_runid,psync_offset,NULL); + reply = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"PSYNC",psync_replid,psync_offset,NULL); if (reply != NULL) { serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply); sdsfree(reply); @@ -1327,31 +1456,31 @@ int slaveTryPartialResynchronization(int fd, int read_reply) { aeDeleteFileEvent(server.el,fd,AE_READABLE); if (!strncmp(reply,"+FULLRESYNC",11)) { - char *runid = NULL, *offset = NULL; + char *replid = NULL, *offset = NULL; /* FULL RESYNC, parse the reply in order to extract the run id * and the replication offset. */ - runid = strchr(reply,' '); - if (runid) { - runid++; - offset = strchr(runid,' '); + replid = strchr(reply,' '); + if (replid) { + replid++; + offset = strchr(replid,' '); if (offset) offset++; } - if (!runid || !offset || (offset-runid-1) != CONFIG_RUN_ID_SIZE) { + if (!replid || !offset || (offset-replid-1) != CONFIG_RUN_ID_SIZE) { serverLog(LL_WARNING, "Master replied with wrong +FULLRESYNC syntax."); /* This is an unexpected condition, actually the +FULLRESYNC * reply means that the master supports PSYNC, but the reply * format seems wrong. To stay safe we blank the master - * runid to make sure next PSYNCs will fail. */ - memset(server.repl_master_runid,0,CONFIG_RUN_ID_SIZE+1); + * replid to make sure next PSYNCs will fail. */ + memset(server.master_replid,0,CONFIG_RUN_ID_SIZE+1); } else { - memcpy(server.repl_master_runid, runid, offset-runid-1); - server.repl_master_runid[CONFIG_RUN_ID_SIZE] = '\0'; - server.repl_master_initial_offset = strtoll(offset,NULL,10); + memcpy(server.master_replid, replid, offset-replid-1); + server.master_replid[CONFIG_RUN_ID_SIZE] = '\0'; + server.master_initial_offset = strtoll(offset,NULL,10); serverLog(LL_NOTICE,"Full resync from master: %s:%lld", - server.repl_master_runid, - server.repl_master_initial_offset); + server.master_replid, + server.master_initial_offset); } /* We are going to full resync, discard the cached master structure. */ replicationDiscardCachedMaster(); @@ -1360,9 +1489,40 @@ int slaveTryPartialResynchronization(int fd, int read_reply) { } if (!strncmp(reply,"+CONTINUE",9)) { - /* Partial resync was accepted, set the replication state accordingly */ + /* Partial resync was accepted. */ serverLog(LL_NOTICE, "Successful partial resynchronization with master."); + + /* Check the new replication ID advertised by the master. If it + * changed, we need to set the new ID as primary ID, and set or + * secondary ID as the old master ID up to the current offset, so + * that our sub-slaves will be able to PSYNC with us after a + * disconnection. */ + char *start = reply+10; + char *end = reply+9; + while(end[0] != '\r' && end[0] != '\n' && end[0] != '\0') end++; + if (end-start == CONFIG_RUN_ID_SIZE) { + char new[CONFIG_RUN_ID_SIZE+1]; + memcpy(new,start,CONFIG_RUN_ID_SIZE); + new[CONFIG_RUN_ID_SIZE] = '\0'; + + if (strcmp(new,server.cached_master->replid)) { + /* Master ID changed. */ + serverLog(LL_WARNING,"Master replication ID changed to %s",new); + + /* Set the old ID as our ID2, up to the current offset+1. */ + memcpy(server.replid2,server.cached_master->replid, + sizeof(server.replid2)); + server.second_replid_offset = server.master_repl_offset+1; + + /* Update the cached master ID and our own primary ID to the + * new one. */ + memcpy(server.replid,new,sizeof(server.replid)); + memcpy(server.cached_master->replid,new,sizeof(server.replid)); + } + } + + /* Setup the replication to continue. */ sdsfree(reply); replicationResurrectCachedMaster(fd); return PSYNC_CONTINUE; @@ -1386,6 +1546,8 @@ int slaveTryPartialResynchronization(int fd, int read_reply) { return PSYNC_NOT_SUPPORTED; } +/* This handler fires when the non blocking connect was able to + * establish a connection with the master. */ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { char tmpfile[256], *err = NULL; int dfd, maxtries = 5; @@ -1402,7 +1564,8 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { return; } - /* Check for errors in the socket. */ + /* Check for errors in the socket: after a non blocking connect() we + * may find that the socket is in error state. */ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1) sockerr = errno; if (sockerr) { @@ -1531,13 +1694,15 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { server.repl_state = REPL_STATE_SEND_CAPA; } - /* Inform the master of our capabilities. While we currently send - * just one capability, it is possible to chain new capabilities here - * in the form of REPLCONF capa X capa Y capa Z ... + /* Inform the master of our (slave) capabilities. + * + * EOF: supports EOF-style RDB transfer for diskless replication. + * PSYNC2: supports PSYNC v2, so understands +CONTINUE . + * * The master will ignore capabilities it does not understand. */ if (server.repl_state == REPL_STATE_SEND_CAPA) { err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF", - "capa","eof",NULL); + "capa","eof","capa","psync2",NULL); if (err) goto write_error; sdsfree(err); server.repl_state = REPL_STATE_RECEIVE_CAPA; @@ -1591,14 +1756,14 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { } /* PSYNC failed or is not supported: we want our slaves to resync with us - * as well, if we have any (chained replication case). The mater may - * transfer us an entirely different data set and we have no way to - * incrementally feed our slaves after that. */ + * as well, if we have any sub-slaves. The mater may transfer us an + * entirely different data set and we have no way to incrementally feed + * our slaves after that. */ disconnectSlaves(); /* Force our slaves to resync with us as well. */ freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */ /* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC - * and the server.repl_master_runid and repl_master_initial_offset are + * and the server.master_replid and master_initial_offset are * already populated. */ if (psync_result == PSYNC_NOT_SUPPORTED) { serverLog(LL_NOTICE,"Retrying with SYNC..."); @@ -1727,15 +1892,23 @@ int cancelReplicationHandshake(void) { /* Set replication to the specified master address and port. */ void replicationSetMaster(char *ip, int port) { + int was_master = server.masterhost == NULL; + sdsfree(server.masterhost); server.masterhost = sdsnew(ip); server.masterport = port; - if (server.master) freeClient(server.master); + if (server.master) { + freeClient(server.master); + } disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */ - disconnectSlaves(); /* Force our slaves to resync with us as well. */ - replicationDiscardCachedMaster(); /* Don't try a PSYNC. */ - freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */ + + /* Force our slaves to resync with us as well. They may hopefully be able + * to partially resync with us, but we can notify the replid change. */ + disconnectSlaves(); cancelReplicationHandshake(); + /* Before destroying our master state, create a cached master using + * our own parameters, to later PSYNC with the new master. */ + if (was_master) replicationCacheMasterUsingMyself(); server.repl_state = REPL_STATE_CONNECT; server.master_repl_offset = 0; server.repl_down_since = 0; @@ -1746,20 +1919,26 @@ void replicationUnsetMaster(void) { if (server.masterhost == NULL) return; /* Nothing to do. */ sdsfree(server.masterhost); server.masterhost = NULL; - if (server.master) { - if (listLength(server.slaves) == 0) { - /* If this instance is turned into a master and there are no - * slaves, it inherits the replication offset from the master. - * Under certain conditions this makes replicas comparable by - * replication offset to understand what is the most updated. */ - server.master_repl_offset = server.master->reploff; - freeReplicationBacklog(); - } - freeClient(server.master); - } + /* When a slave is turned into a master, the current replication ID + * (that was inherited from the master at synchronization time) is + * used as secondary ID up to the current offset, and a new replication + * ID is created to continue with a new replication history. */ + shiftReplicationId(); + if (server.master) freeClient(server.master); replicationDiscardCachedMaster(); cancelReplicationHandshake(); + /* Disconnecting all the slaves is required: we need to inform slaves + * of the replication ID change (see shiftReplicationId() call). However + * the slaves will be able to partially resync with us, so it will be + * a very fast reconnection. */ + disconnectSlaves(); server.repl_state = REPL_STATE_NONE; + + /* We need to make sure the new master will start the replication stream + * with a SELECT statement. This is forced after a full resync, but + * with PSYNC version 2, there is no need for full resync after a + * master switch. */ + server.slaveseldb = -1; } /* This function is called when the slave lose the connection with the @@ -1931,6 +2110,31 @@ void replicationCacheMaster(client *c) { replicationHandleMasterDisconnection(); } +/* This function is called when a master is turend into a slave, in order to + * create from scratch a cached master for the new client, that will allow + * to PSYNC with the slave that was promoted as the new master after a + * failover. + * + * Assuming this instance was previously the master instance of the new master, + * the new master will accept its replication ID, and potentiall also the + * current offset if no data was lost during the failover. So we use our + * current replication ID and offset in order to synthesize a cached master. */ +void replicationCacheMasterUsingMyself(void) { + /* The master client we create can be set to any DBID, because + * the new master will start its replication stream with SELECT. */ + server.master_initial_offset = server.master_repl_offset; + replicationCreateMasterClient(-1,-1); + + /* Use our own ID / offset. */ + memcpy(server.master->replid, server.replid, sizeof(server.replid)); + + /* Set as cached master. */ + unlinkClient(server.master); + server.cached_master = server.master; + server.master = NULL; + serverLog(LL_NOTICE,"Before turning into a slave, using my master parameters to synthesize a cached master: I may be able to synchronize with the new master with just a partial transfer."); +} + /* Free a cached master, called when there are no longer the conditions for * a partial resync on reconnection. */ void replicationDiscardCachedMaster(void) { @@ -2290,7 +2494,9 @@ void replicationCron(void) { robj *ping_argv[1]; /* First, send PING according to ping_slave_period. */ - if ((replication_cron_loops % server.repl_ping_slave_period) == 0) { + if ((replication_cron_loops % server.repl_ping_slave_period) == 0 && + listLength(server.slaves)) + { ping_argv[0] = createStringObject("PING",4); replicationFeedSlaves(server.slaves, server.slaveseldb, ping_argv, 1); @@ -2299,20 +2505,32 @@ void replicationCron(void) { /* Second, send a newline to all the slaves in pre-synchronization * stage, that is, slaves waiting for the master to create the RDB file. + * + * Also send the a newline to all the chained slaves we have, if we lost + * connection from our master, to keep the slaves aware that their + * master is online. This is needed since sub-slaves only receive proxied + * data from top-level masters, so there is no explicit pinging in order + * to avoid altering the replication offsets. This special out of band + * pings (newlines) can be sent, they will have no effect in the offset. + * * The newline will be ignored by the slave but will refresh the - * last-io timer preventing a timeout. In this case we ignore the + * last interaction timer preventing a timeout. In this case we ignore the * ping period and refresh the connection once per second since certain * timeouts are set at a few seconds (example: PSYNC response). */ listRewind(server.slaves,&li); while((ln = listNext(&li))) { client *slave = ln->value; - if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START || + int is_presync = + (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START || (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END && - server.rdb_child_type != RDB_CHILD_TYPE_SOCKET)) - { + server.rdb_child_type != RDB_CHILD_TYPE_SOCKET)); + int is_subslave = server.masterhost && server.master == NULL && + slave->replstate == SLAVE_STATE_ONLINE; + + if (is_presync || is_subslave) { if (write(slave->fd, "\n", 1) == -1) { - /* Don't worry, it's just a ping. */ + /* Don't worry about socket errors, it's just a ping. */ } } } @@ -2337,10 +2555,14 @@ void replicationCron(void) { } } - /* If we have no attached slaves and there is a replication backlog - * using memory, free it after some (configured) time. */ + /* If this is a master without attached slaves and there is a replication + * backlog active, in order to reclaim memory we can free it after some + * (configured) time. Note that this cannot be done for slaves: slaves + * without sub-slaves attached should still accumulate data into the + * backlog, in order to reply to PSYNC queries if they are turned into + * masters after a failover. */ if (listLength(server.slaves) == 0 && server.repl_backlog_time_limit && - server.repl_backlog) + server.repl_backlog && server.masterhost == NULL) { time_t idle = server.unixtime - server.repl_no_slaves_since; diff --git a/src/server.c b/src/server.c index 7e9b962b3..b94490a33 100644 --- a/src/server.c +++ b/src/server.c @@ -1079,7 +1079,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { { serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...", sp->changes, (int)sp->seconds); - rdbSaveBackground(server.rdb_filename); + rdbSaveBackground(server.rdb_filename,NULL); break; } } @@ -1151,7 +1151,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY || server.lastbgsave_status == C_OK)) { - if (rdbSaveBackground(server.rdb_filename) == C_OK) + if (rdbSaveBackground(server.rdb_filename,NULL) == C_OK) server.rdb_bgsave_scheduled = 0; } @@ -1309,10 +1309,11 @@ void initServerConfig(void) { int j; getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE); + server.runid[CONFIG_RUN_ID_SIZE] = '\0'; + changeReplicationId(); server.configfile = NULL; server.executable = NULL; server.hz = CONFIG_DEFAULT_HZ; - server.runid[CONFIG_RUN_ID_SIZE] = '\0'; server.arch_bits = (sizeof(long) == 8) ? 64 : 32; server.port = CONFIG_DEFAULT_SERVER_PORT; server.tcp_backlog = CONFIG_DEFAULT_TCP_BACKLOG; @@ -1409,7 +1410,7 @@ void initServerConfig(void) { server.masterport = 6379; server.master = NULL; server.cached_master = NULL; - server.repl_master_initial_offset = -1; + server.master_initial_offset = -1; server.repl_state = REPL_STATE_NONE; server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT; server.repl_serve_stale_data = CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA; @@ -2471,7 +2472,7 @@ int prepareForShutdown(int flags) { if ((server.saveparamslen > 0 && !nosave) || save) { serverLog(LL_NOTICE,"Saving the final RDB snapshot before exiting."); /* Snapshotting. Perform a SYNC SAVE and exit */ - if (rdbSave(server.rdb_filename) != C_OK) { + if (rdbSave(server.rdb_filename,NULL) != C_OK) { /* Ooops.. error saving! The best we can do is to continue * operating. Note that if there was a background saving process, * in the next cron() Redis will be notified that the background @@ -3135,12 +3136,18 @@ sds genRedisInfoString(char *section) { } } info = sdscatprintf(info, + "master_replid:%s\r\n" + "master_replid2:%s\r\n" "master_repl_offset:%lld\r\n" + "second_repl_offset:%lld\r\n" "repl_backlog_active:%d\r\n" "repl_backlog_size:%lld\r\n" "repl_backlog_first_byte_offset:%lld\r\n" "repl_backlog_histlen:%lld\r\n", + server.replid, + server.replid2, server.master_repl_offset, + server.second_replid_offset, server.repl_backlog != NULL, server.repl_backlog_size, server.repl_backlog_off, @@ -3416,7 +3423,7 @@ void loadDataFromDisk(void) { if (loadAppendOnlyFile(server.aof_filename) == C_OK) serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000); } else { - if (rdbLoad(server.rdb_filename) == C_OK) { + if (rdbLoad(server.rdb_filename,NULL) == C_OK) { serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds", (float)(ustime()-start)/1000000); } else if (errno != ENOENT) { diff --git a/src/server.h b/src/server.h index b5dbaf0a5..8aa1d6fcb 100644 --- a/src/server.h +++ b/src/server.h @@ -293,7 +293,8 @@ typedef long long mstime_t; /* millisecond time type. */ /* Slave capabilities. */ #define SLAVE_CAPA_NONE 0 -#define SLAVE_CAPA_EOF (1<<0) /* Can parse the RDB EOF streaming format. */ +#define SLAVE_CAPA_EOF (1<<0) /* Can parse the RDB EOF streaming format. */ +#define SLAVE_CAPA_PSYNC2 (1<<1) /* Supports PSYNC2 protocol. */ /* Synchronous read timeout - slave side */ #define CONFIG_REPL_SYNCIO_TIMEOUT 5 @@ -679,8 +680,8 @@ typedef struct client { long long psync_initial_offset; /* FULLRESYNC reply offset other slaves copying this slave output buffer should use. */ - char replrunid[CONFIG_RUN_ID_SIZE+1]; /* Master run id if is a master. */ - int slave_listening_port; /* As configured with: REPLCONF listening-port */ + char replid[CONFIG_RUN_ID_SIZE+1]; /* Master replication ID (if master). */ + int slave_listening_port; /* As configured with: SLAVECONF listening-port */ char slave_ip[NET_IP_STR_LEN]; /* Optionally given by REPLCONF ip-address */ int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */ multiState mstate; /* MULTI/EXEC state */ @@ -803,6 +804,20 @@ struct redisMemOverhead { } *db; }; +/* This structure can be optionally passed to RDB save/load functions in + * order to implement additional functionalities, by storing and loading + * metadata to the RDB file. + * + * Currently the only use is to select a DB at load time, useful in + * replication in order to make sure that chained slaves (slaves of slaves) + * select the correct DB and are able to accept the stream coming from the + * top-level master. */ +typedef struct rdbSaveInfo { + int repl_stream_db; /* DB to select in server.master client. */ +} rdbSaveInfo; + +#define RDB_SAVE_INFO_INIT {-1} + /*----------------------------------------------------------------------------- * Global server state *----------------------------------------------------------------------------*/ @@ -988,15 +1003,19 @@ struct redisServer { char *syslog_ident; /* Syslog ident */ int syslog_facility; /* Syslog facility */ /* Replication (master) */ + char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */ + char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/ + long long master_repl_offset; /* My current replication offset */ + long long second_replid_offset; /* Accept offsets up to this for replid2. */ int slaveseldb; /* Last SELECTed DB in replication output */ - long long master_repl_offset; /* Global replication offset */ int repl_ping_slave_period; /* Master pings the slave every N seconds */ char *repl_backlog; /* Replication backlog for partial syncs */ long long repl_backlog_size; /* Backlog circular buffer size */ long long repl_backlog_histlen; /* Backlog actual data length */ - long long repl_backlog_idx; /* Backlog circular buffer current offset */ - long long repl_backlog_off; /* Replication offset of first byte in the - backlog buffer. */ + long long repl_backlog_idx; /* Backlog circular buffer current offset, + that is the next byte will'll write to.*/ + long long repl_backlog_off; /* Replication "master offset" of first + byte in the replication backlog buffer.*/ time_t repl_backlog_time_limit; /* Time without slaves after the backlog gets released. */ time_t repl_no_slaves_since; /* We have no slaves since that time. @@ -1029,8 +1048,11 @@ struct redisServer { int slave_priority; /* Reported in INFO and used by Sentinel. */ int slave_announce_port; /* Give the master this listening port. */ char *slave_announce_ip; /* Give the master this ip address. */ - char repl_master_runid[CONFIG_RUN_ID_SIZE+1]; /* Master run id for PSYNC.*/ - long long repl_master_initial_offset; /* Master PSYNC offset. */ + /* The following two fields is where we store master PSYNC replid/offset + * while the PSYNC is in progress. At the end we'll copy the fields into + * the server->master client structure. */ + char master_replid[CONFIG_RUN_ID_SIZE+1]; /* Master PSYNC runid. */ + long long master_initial_offset; /* Master PSYNC offset. */ int repl_slave_lazy_flush; /* Lazy FLUSHALL before loading DB? */ /* Replication script cache. */ dict *repl_scriptcache_dict; /* SHA1 all slaves are aware of. */ @@ -1259,6 +1281,7 @@ void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask); void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask); void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask); void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask); +void addReplyString(client *c, const char *s, size_t len); void addReplyBulk(client *c, robj *obj); void addReplyBulkCString(client *c, const char *s); void addReplyBulkCBuffer(client *c, const void *p, size_t len); @@ -1393,6 +1416,7 @@ ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout); /* Replication */ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); +void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t buflen); void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc); void updateSlavesWaitingBgsave(int bgsaveerr, int type); void replicationCron(void); @@ -1414,6 +1438,9 @@ long long replicationGetSlaveOffset(void); char *replicationGetSlaveName(client *c); long long getPsyncInitialOffset(void); int replicationSetupSlaveForFullResync(client *slave, long long offset); +void changeReplicationId(void); +void clearReplicationId2(void); +void chopReplicationBacklog(void); /* Generic persistence functions */ void startLoading(FILE *fp); @@ -1422,7 +1449,7 @@ void stopLoading(void); /* RDB persistence */ #include "rdb.h" -int rdbSaveRio(rio *rdb, int *error, int flags); +int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi); /* AOF persistence */ void flushAppendOnlyFile(int force); From cd5255eea438b8765b2696f759e0b14a3e76f0cf Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Nov 2016 15:36:25 +0100 Subject: [PATCH 0185/1722] PSYNC2: Wrap debugging code with if(0) --- src/replication.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index a98d0d35e..1962b3a42 100644 --- a/src/replication.c +++ b/src/replication.c @@ -296,7 +296,9 @@ void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t bufle listNode *ln; listIter li; - { + /* Debugging: this is handy to see the stream sent from master + * to slaves. Disabled with if(0). */ + if (0) { printf("%zu:",buflen); for (size_t j = 0; j < buflen; j++) { printf("%c", isprint(buf[j]) ? buf[j] : '.'); From ba7002b9bb2fc62c51a7a1957f8dd09fd6725797 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 10 Nov 2016 12:35:29 +0100 Subject: [PATCH 0186/1722] PSYNC2: Save replication ID/offset on RDB file. This means that stopping a slave and restarting it will still make it able to PSYNC with the master. Moreover the master itself will retain its ID/offset, in case it gets turned into a slave, or if a slave will try to PSYNC with it with an exactly updated offset (otherwise there is no backlog). This change was possible thanks to PSYNC v2 that makes saving the current replication state much simpler. --- src/rdb.c | 9 +++++++++ src/replication.c | 1 - src/server.c | 13 ++++++++++++- src/server.h | 9 ++++++++- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index aa9c631de..765e13374 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -855,6 +855,8 @@ int rdbSaveInfoAuxFields(rio *rdb, int flags, rdbSaveInfo *rsi) { } } if (rdbSaveAuxFieldStrInt(rdb,"aof-preamble",aof_preamble) == -1) return -1; + if (rdbSaveAuxFieldStrStr(rdb,"repl-id",server.replid) == -1) return -1; + if (rdbSaveAuxFieldStrInt(rdb,"repl-offset",server.master_repl_offset) == -1) return -1; return 1; } @@ -1513,6 +1515,13 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi) { (char*)auxval->ptr); } else if (!strcasecmp(auxkey->ptr,"repl-stream-db")) { if (rsi) rsi->repl_stream_db = atoi(auxval->ptr); + } else if (!strcasecmp(auxkey->ptr,"repl-id")) { + if (rsi && sdslen(auxval->ptr) == CONFIG_RUN_ID_SIZE) { + memcpy(rsi->repl_id,auxval->ptr,CONFIG_RUN_ID_SIZE+1); + rsi->repl_id_is_set = 1; + } + } else if (!strcasecmp(auxkey->ptr,"repl-offset")) { + if (rsi) rsi->repl_offset = strtoll(auxval->ptr,NULL,10); } else { /* We ignore fields we don't understand, as by AUX field * contract. */ diff --git a/src/replication.c b/src/replication.c index 1962b3a42..1a9b2e574 100644 --- a/src/replication.c +++ b/src/replication.c @@ -39,7 +39,6 @@ void replicationDiscardCachedMaster(void); void replicationResurrectCachedMaster(int newfd); -void replicationCacheMasterUsingMyself(void); void replicationSendAck(void); void putSlaveOnline(client *slave); int cancelReplicationHandshake(void); diff --git a/src/server.c b/src/server.c index b94490a33..d17ded9b0 100644 --- a/src/server.c +++ b/src/server.c @@ -3423,9 +3423,20 @@ void loadDataFromDisk(void) { if (loadAppendOnlyFile(server.aof_filename) == C_OK) serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000); } else { - if (rdbLoad(server.rdb_filename,NULL) == C_OK) { + rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; + if (rdbLoad(server.rdb_filename,&rsi) == C_OK) { serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds", (float)(ustime()-start)/1000000); + + /* Restore the replication ID / offset from the RDB file. */ + if (rsi.repl_id_is_set && rsi.repl_offset != -1) { + memcpy(server.replid,rsi.repl_id,sizeof(server.replid)); + server.master_repl_offset = rsi.repl_offset; + /* If we are a slave, create a cached master from this + * information, in order to allow partial resynchronizations + * with masters. */ + if (server.masterhost) replicationCacheMasterUsingMyself(); + } } else if (errno != ENOENT) { serverLog(LL_WARNING,"Fatal error loading the DB: %s. Exiting.",strerror(errno)); exit(1); diff --git a/src/server.h b/src/server.h index 8aa1d6fcb..b7f909933 100644 --- a/src/server.h +++ b/src/server.h @@ -813,10 +813,16 @@ struct redisMemOverhead { * select the correct DB and are able to accept the stream coming from the * top-level master. */ typedef struct rdbSaveInfo { + /* Used saving and loading. */ int repl_stream_db; /* DB to select in server.master client. */ + + /* Used only loading. */ + int repl_id_is_set; /* True if repl_id field is set. */ + char repl_id[CONFIG_RUN_ID_SIZE+1]; /* Replication ID. */ + long long repl_offset; /* Replication offset. */ } rdbSaveInfo; -#define RDB_SAVE_INFO_INIT {-1} +#define RDB_SAVE_INFO_INIT {-1,0,"000000000000000000000000000000",-1} /*----------------------------------------------------------------------------- * Global server state @@ -1441,6 +1447,7 @@ int replicationSetupSlaveForFullResync(client *slave, long long offset); void changeReplicationId(void); void clearReplicationId2(void); void chopReplicationBacklog(void); +void replicationCacheMasterUsingMyself(void); /* Generic persistence functions */ void startLoading(FILE *fp); From 358e51d80a3b1dbb08dfe246acb670d8f717f0be Mon Sep 17 00:00:00 2001 From: oranagra Date: Thu, 10 Nov 2016 16:59:52 +0200 Subject: [PATCH 0187/1722] fix unsigned int overflow in adjustOpenFilesLimit --- src/server.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/server.c b/src/server.c index 7e9b962b3..ed6d7d683 100644 --- a/src/server.c +++ b/src/server.c @@ -1570,9 +1570,9 @@ void adjustOpenFilesLimit(void) { if (bestlimit < oldlimit) bestlimit = oldlimit; if (bestlimit < maxfiles) { - int old_maxclients = server.maxclients; - server.maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS; - if (server.maxclients < 1) { + unsigned int old_maxclients = server.maxclients; + server.maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS; /* NOTICE: server.maxclients is unsigned */ + if (bestlimit <= CONFIG_MIN_RESERVED_FDS) { serverLog(LL_WARNING,"Your current 'ulimit -n' " "of %llu is not enough for the server to start. " "Please increase your open file limit to at least " From 43adf0a308acbb3ae9ba0381ac45f054ed83f973 Mon Sep 17 00:00:00 2001 From: hylepo Date: Fri, 11 Nov 2016 10:33:48 +0800 Subject: [PATCH 0188/1722] Update redis-benchmark.c Fixing typo in the usage of redis-benchmark --- src/redis-benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 50905c872..f382019a4 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -565,7 +565,7 @@ invalid: usage: printf( -"Usage: redis-benchmark [-h ] [-p ] [-c ] [-n [-k ]\n\n" +"Usage: redis-benchmark [-h ] [-p ] [-c ] [-n ] [-k ]\n\n" " -h Server hostname (default 127.0.0.1)\n" " -p Server port (default 6379)\n" " -s Server socket (overrides host and port)\n" From c715f0e4be3463152273eaea29a677a95310bfb7 Mon Sep 17 00:00:00 2001 From: deep011 Date: Wed, 16 Nov 2016 11:04:33 +0800 Subject: [PATCH 0189/1722] fix a possible bug for 'replconf getack' --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index 67091dd0b..7a0a99c6a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -758,7 +758,7 @@ void replconfCommand(client *c) { /* REPLCONF GETACK is used in order to request an ACK ASAP * to the slave. */ if (server.masterhost && server.master) replicationSendAck(); - /* Note: this command does not reply anything! */ + return; } else { addReplyErrorFormat(c,"Unrecognized REPLCONF option: %s", (char*)c->argv[j]->ptr); From 704ab7234b84a65a72dd8c585736386ed91e6da7 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 16 Nov 2016 14:13:13 +0100 Subject: [PATCH 0190/1722] Cluster: handle zero bytes at the end of nodes.conf. --- src/cluster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index 9289f6782..bedf5f81e 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -129,7 +129,7 @@ int clusterLoadConfig(char *filename) { /* Skip blank lines, they can be created either by users manually * editing nodes.conf or by the config writing process if stopped * before the truncate() call. */ - if (line[0] == '\n') continue; + if (line[0] == '\n' || line[0] == '\0') continue; /* Split the line into arguments for processing. */ argv = sdssplitargs(line,&argc); From 1de180cd82dc78e27f3fe4fcba1f160278b88c00 Mon Sep 17 00:00:00 2001 From: oranagra Date: Wed, 16 Nov 2016 21:30:59 +0200 Subject: [PATCH 0191/1722] when a slave loads an RDB, stop an AOFRW fork before flusing db and parsing rdb file, to avoid a CoW disaster. --- src/replication.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/replication.c b/src/replication.c index 67091dd0b..84ef48561 100644 --- a/src/replication.c +++ b/src/replication.c @@ -979,6 +979,18 @@ void replicationCreateMasterClient(int fd) { server.master->flags |= CLIENT_PRE_PSYNC; } +void restartAOF() { + int retry = 10; + while (retry-- && startAppendOnly() == C_ERR) { + serverLog(LL_WARNING,"Failed enabling the AOF after successful master synchronization! Trying it again in one second."); + sleep(1); + } + if (!retry) { + serverLog(LL_WARNING,"FATAL: this slave instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now."); + exit(1); + } +} + /* Asynchronously read the SYNC payload we receive from a master */ #define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { @@ -1120,12 +1132,15 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { } if (eof_reached) { + int aof_is_enabled = server.aof_state != AOF_OFF; if (rename(server.repl_transfer_tmpfile,server.rdb_filename) == -1) { serverLog(LL_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno)); cancelReplicationHandshake(); return; } serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Flushing old data"); + if(aof_is_enabled) /* we need to stop any AOFRW fork before flusing and parsing RDB, otherwise we'll create a CoW disaster */ + stopAppendOnly(); signalFlushedDb(-1); emptyDb( -1, @@ -1140,6 +1155,8 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { if (rdbLoad(server.rdb_filename) != C_OK) { serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); cancelReplicationHandshake(); + if (aof_is_enabled) /* re-enable so that on the next attempt, we can detect that AOF was enabled */ + restartAOF(); return; } /* Final setup of the connected slave <- master link */ @@ -1150,19 +1167,8 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { /* Restart the AOF subsystem now that we finished the sync. This * will trigger an AOF rewrite, and when done will start appending * to the new file. */ - if (server.aof_state != AOF_OFF) { - int retry = 10; - - stopAppendOnly(); - while (retry-- && startAppendOnly() == C_ERR) { - serverLog(LL_WARNING,"Failed enabling the AOF after successful master synchronization! Trying it again in one second."); - sleep(1); - } - if (!retry) { - serverLog(LL_WARNING,"FATAL: this slave instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now."); - exit(1); - } - } + if (aof_is_enabled) + restartAOF(); } return; From eb5c80460e83d777b9ec34ea785727b0fae4b372 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 18 Nov 2016 13:10:29 +0100 Subject: [PATCH 0192/1722] Test: WAIT tests added in wait.tcl unit. --- tests/test_helper.tcl | 1 + tests/unit/wait.tcl | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 tests/unit/wait.tcl diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 5f114c5dc..2f8021e66 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -55,6 +55,7 @@ set ::all_tests { unit/memefficiency unit/hyperloglog unit/lazyfree + unit/wait } # Index to the next test to run in the ::all_tests list. set ::next_test 0 diff --git a/tests/unit/wait.tcl b/tests/unit/wait.tcl new file mode 100644 index 000000000..e2f5d2942 --- /dev/null +++ b/tests/unit/wait.tcl @@ -0,0 +1,42 @@ +start_server {tags {"wait"}} { +start_server {} { + set slave [srv 0 client] + set slave_host [srv 0 host] + set slave_port [srv 0 port] + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + + test {Setup slave} { + $slave slaveof $master_host $master_port + wait_for_condition 50 100 { + [s 0 master_link_status] eq {up} + } else { + fail "Replication not started." + } + } + + test {WAIT should acknowledge 1 additional copy of the data} { + $master set foo 0 + $master incr foo + $master incr foo + $master incr foo + assert {[$master wait 1 5000] == 1} + assert {[$slave get foo] == 3} + } + + test {WAIT should not acknowledge 2 additional copies of the data} { + $master incr foo + assert {[$master wait 2 1000] <= 1} + } + + test {WAIT should not acknowledge 1 additional copy if slave is blocked} { + exec src/redis-cli -h $slave_host -p $slave_port debug sleep 5 > /dev/null 2> /dev/null & + after 1000 ;# Give redis-cli the time to execute the command. + $master set foo 0 + $master incr foo + $master incr foo + $master incr foo + assert {[$master wait 1 3000] == 0} + } +}} From b2dc3a3d9c19d5975d6eeb883b0287aab3d5488a Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 23 Nov 2016 16:12:20 +0100 Subject: [PATCH 0193/1722] PSYNC2: bugfixing pre release. 1. Master replication offset was cleared after switching configuration to some other slave, since it was assumed you can't PSYNC after a switch. Note the case anymore and when we successfully PSYNC we need to have our offset untouched. 2. Secondary replication ID was not reset to "000..." pattern at startup. 3. Master in error state replying -LOADING or other transient errors forced the slave to discard the cached master and full resync. This is now fixed. 4. Better logging of what's happening on failed PSYNCs. --- src/replication.c | 49 ++++++++++++++++++++++++++++++++++++----------- src/server.c | 1 + 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/src/replication.c b/src/replication.c index a3b4ab54c..818b7463b 100644 --- a/src/replication.c +++ b/src/replication.c @@ -484,10 +484,18 @@ int masterTryPartialResynchronization(client *c) { { /* Run id "?" is used by slaves that want to force a full resync. */ if (master_replid[0] != '?') { - serverLog(LL_NOTICE,"Partial resynchronization not accepted: " - "Replication ID mismatch (Slave asked for '%s', my replication " - "ID is '%s')", - master_replid, server.replid); + if (strcasecmp(master_replid, server.replid) && + strcasecmp(master_replid, server.replid2)) + { + serverLog(LL_NOTICE,"Partial resynchronization not accepted: " + "Replication ID mismatch (Slave asked for '%s', my " + "replication IDs are '%s' and '%s')", + master_replid, server.replid, server.replid2); + } else { + serverLog(LL_NOTICE,"Partial resynchronization not accepted: " + "Requested offset for second ID was %lld, but I can reply " + "up to %lld", psync_offset, server.second_replid_offset); + } } else { serverLog(LL_NOTICE,"Full resync requested by slave %s", replicationGetSlaveName(c)); @@ -638,7 +646,7 @@ void syncCommand(client *c) { /* Refuse SYNC requests if we are a slave but the link with our master * is not ok... */ if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED) { - addReplyError(c,"Can't SYNC while not connected with my master"); + addReplySds(c,sdsnew("-NOMASTERLINK Can't SYNC while not connected with my master\r\n")); return; } @@ -1393,8 +1401,9 @@ char *sendSynchronousCommand(int flags, int fd, ...) { * offset is saved. * PSYNC_NOT_SUPPORTED: If the server does not understand PSYNC at all and * the caller should fall back to SYNC. - * PSYNC_WRITE_ERR: There was an error writing the command to the socket. + * PSYNC_WRITE_ERROR: There was an error writing the command to the socket. * PSYNC_WAIT_REPLY: Call again the function with read_reply set to 1. + * PSYNC_TRY_LATER: Master is currently in a transient error condition. * * Notable side effects: * @@ -1410,6 +1419,7 @@ char *sendSynchronousCommand(int flags, int fd, ...) { #define PSYNC_CONTINUE 2 #define PSYNC_FULLRESYNC 3 #define PSYNC_NOT_SUPPORTED 4 +#define PSYNC_TRY_LATER 5 int slaveTryPartialResynchronization(int fd, int read_reply) { char *psync_replid; char psync_offset[32]; @@ -1529,9 +1539,21 @@ int slaveTryPartialResynchronization(int fd, int read_reply) { return PSYNC_CONTINUE; } - /* If we reach this point we received either an error since the master does - * not understand PSYNC, or an unexpected reply from the master. - * Return PSYNC_NOT_SUPPORTED to the caller in both cases. */ + /* If we reach this point we received either an error (since the master does + * not understand PSYNC or because it is in a special state and cannot + * serve our request), or an unexpected reply from the master. + * + * Return PSYNC_NOT_SUPPORTED on errors we don't understand, otherwise + * return PSYNC_TRY_LATER if we believe this is a transient error. */ + + if (!strncmp(reply,"-NOMASTERLINK",13) || + !strncmp(reply,"-LOADING",8)) + { + serverLog(LL_NOTICE, + "Master is currently unable to PSYNC " + "but should be in the future: %s", reply); + return PSYNC_TRY_LATER; + } if (strncmp(reply,"-ERR",4)) { /* If it's not an error, log the unexpected event. */ @@ -1748,6 +1770,12 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { psync_result = slaveTryPartialResynchronization(fd,1); if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */ + /* If the master is in an transient error, we should try to PSYNC + * from scratch later, so go to the error path. This happens when + * the server is loading the dataset or is not connected with its + * master and so forth. */ + if (psync_result == PSYNC_TRY_LATER) goto write_error; + /* Note: if PSYNC does not return WAIT_REPLY, it will take care of * uninstalling the read handler from the file descriptor. */ @@ -1757,7 +1785,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { } /* PSYNC failed or is not supported: we want our slaves to resync with us - * as well, if we have any sub-slaves. The mater may transfer us an + * as well, if we have any sub-slaves. The master may transfer us an * entirely different data set and we have no way to incrementally feed * our slaves after that. */ disconnectSlaves(); /* Force our slaves to resync with us as well. */ @@ -1911,7 +1939,6 @@ void replicationSetMaster(char *ip, int port) { * our own parameters, to later PSYNC with the new master. */ if (was_master) replicationCacheMasterUsingMyself(); server.repl_state = REPL_STATE_CONNECT; - server.master_repl_offset = 0; server.repl_down_since = 0; } diff --git a/src/server.c b/src/server.c index d17ded9b0..309516683 100644 --- a/src/server.c +++ b/src/server.c @@ -1311,6 +1311,7 @@ void initServerConfig(void) { getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE); server.runid[CONFIG_RUN_ID_SIZE] = '\0'; changeReplicationId(); + clearReplicationId2(); server.configfile = NULL; server.executable = NULL; server.hz = CONFIG_DEFAULT_HZ; From 67a41fe3165a511c5a0a2db901a9bcfe2a0bf768 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 24 Nov 2016 11:05:19 +0100 Subject: [PATCH 0194/1722] Modules: fix client blocking calls access to invalid struct field. We already have reference to the client pointer, no need to access the already freed structure. Close #3634. --- src/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index 876513cbe..07f88eff5 100644 --- a/src/module.c +++ b/src/module.c @@ -3183,7 +3183,7 @@ void moduleHandleBlockedClients(void) { if (bc->privdata && bc->free_privdata) bc->free_privdata(bc->privdata); zfree(bc); - if (c != NULL) unblockClient(bc->client); + if (c != NULL) unblockClient(c); /* Lock again before to iterate the loop. */ pthread_mutex_lock(&moduleUnblockedClientsMutex); From a751287188fee768659b2a9893d1c214c377a6c9 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 24 Nov 2016 15:48:18 +0100 Subject: [PATCH 0195/1722] PSYNC2: on transient error jump to error, not write_error. --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index 818b7463b..e0c2cca6f 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1774,7 +1774,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { * from scratch later, so go to the error path. This happens when * the server is loading the dataset or is not connected with its * master and so forth. */ - if (psync_result == PSYNC_TRY_LATER) goto write_error; + if (psync_result == PSYNC_TRY_LATER) goto error; /* Note: if PSYNC does not return WAIT_REPLY, it will take care of * uninstalling the read handler from the file descriptor. */ From b4299286876db177fc862cc4484f57172145a8ad Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 25 Nov 2016 10:55:16 +0100 Subject: [PATCH 0196/1722] Better protocol errors logging. --- src/networking.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/networking.c b/src/networking.c index b2cec8631..6aeb7ce24 100644 --- a/src/networking.c +++ b/src/networking.c @@ -30,8 +30,9 @@ #include "server.h" #include #include +#include -static void setProtocolError(client *c, int pos); +static void setProtocolError(const char *errstr, client *c, int pos); /* Return the size consumed from the allocator, for the specified SDS string, * including internal fragmentation. This function is used in order to compute @@ -1039,7 +1040,7 @@ int processInlineBuffer(client *c) { if (newline == NULL) { if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) { addReplyError(c,"Protocol error: too big inline request"); - setProtocolError(c,0); + setProtocolError("too big inline request",c,0); } return C_ERR; } @@ -1056,7 +1057,7 @@ int processInlineBuffer(client *c) { sdsfree(aux); if (argv == NULL) { addReplyError(c,"Protocol error: unbalanced quotes in request"); - setProtocolError(c,0); + setProtocolError("unbalanced quotes in inline request",c,0); return C_ERR; } @@ -1099,11 +1100,29 @@ int processInlineBuffer(client *c) { /* Helper function. Trims query buffer to make the function that processes * multi bulk requests idempotent. */ -static void setProtocolError(client *c, int pos) { +#define PROTO_DUMP_LEN 128 +static void setProtocolError(const char *errstr, client *c, int pos) { if (server.verbosity <= LL_VERBOSE) { sds client = catClientInfoString(sdsempty(),c); + + /* Sample some protocol to given an idea about what was inside. */ + char buf[256]; + if (sdslen(c->querybuf) < PROTO_DUMP_LEN) { + snprintf(buf,sizeof(buf),"Query buffer during protocol error: '%s'", c->querybuf); + } else { + snprintf(buf,sizeof(buf),"Query buffer during protocol error: '%.*s' (... more %zu bytes ...) '%.*s'", PROTO_DUMP_LEN/2, c->querybuf, sdslen(c->querybuf)-PROTO_DUMP_LEN, PROTO_DUMP_LEN/2, c->querybuf+sdslen(c->querybuf)-PROTO_DUMP_LEN/2); + } + + /* Remove non printable chars. */ + char *p = buf; + while (*p != '\0') { + if (!isprint(*p)) *p = '.'; + p++; + } + + /* Log all the client and protocol info. */ serverLog(LL_VERBOSE, - "Protocol error from client: %s", client); + "Protocol error (%s) from client: %s. %s", errstr, client, buf); sdsfree(client); } c->flags |= CLIENT_CLOSE_AFTER_REPLY; @@ -1124,7 +1143,7 @@ int processMultibulkBuffer(client *c) { if (newline == NULL) { if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) { addReplyError(c,"Protocol error: too big mbulk count string"); - setProtocolError(c,0); + setProtocolError("too big mbulk count string",c,0); } return C_ERR; } @@ -1139,7 +1158,7 @@ int processMultibulkBuffer(client *c) { ok = string2ll(c->querybuf+1,newline-(c->querybuf+1),&ll); if (!ok || ll > 1024*1024) { addReplyError(c,"Protocol error: invalid multibulk length"); - setProtocolError(c,pos); + setProtocolError("invalid mbulk count",c,pos); return C_ERR; } @@ -1165,7 +1184,7 @@ int processMultibulkBuffer(client *c) { if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) { addReplyError(c, "Protocol error: too big bulk count string"); - setProtocolError(c,0); + setProtocolError("too big bulk count string",c,0); return C_ERR; } break; @@ -1179,14 +1198,14 @@ int processMultibulkBuffer(client *c) { addReplyErrorFormat(c, "Protocol error: expected '$', got '%c'", c->querybuf[pos]); - setProtocolError(c,pos); + setProtocolError("expected $ but got something else",c,pos); return C_ERR; } ok = string2ll(c->querybuf+pos+1,newline-(c->querybuf+pos+1),&ll); if (!ok || ll < 0 || ll > 512*1024*1024) { addReplyError(c,"Protocol error: invalid bulk length"); - setProtocolError(c,pos); + setProtocolError("invalid bulk length",c,pos); return C_ERR; } From 893f757ebce6df049a82d566c71da4bd13c81119 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 28 Nov 2016 10:13:24 +0100 Subject: [PATCH 0197/1722] PSYNC2: Test (WIP). This is the PSYNC2 test that helped find issues in the code, and that still can show a protocol desync from time to time. Work is in progress in order to find the issue. For now the test is not enabled in "make test" and must be run manually. --- tests/integration/psync2.tcl | 127 +++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 tests/integration/psync2.tcl diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl new file mode 100644 index 000000000..46a08dad1 --- /dev/null +++ b/tests/integration/psync2.tcl @@ -0,0 +1,127 @@ +start_server {tags {"wait"}} { +start_server {} { +start_server {} { +start_server {} { +start_server {} { + for {set j 0} {$j < 5} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + puts "Log file: [srv [expr 0-$j] stdout]" + } + + set master_id 0 ; # Current master + set start_time [clock seconds] ; # Test start time + set counter_value 0 ; # Current value of the Redis counter "x" + + # Config + set duration 60 ; # Total test seconds + + set genload 1 ; # Load master with writes at every cycle + + set genload_time 5000 ; # Writes duration time in ms + + set disconnect 1 ; # Break replication link between random + # master and slave instances while the + # master is loaded with writes. + + set disconnect_period 1000 ; # Disconnect repl link every N ms. + + while {([clock seconds]-$start_time) < $duration} { + + # Create a random replication layout. + # Start with switching master (this simulates a failover). + + # 1) Select the new master. + set master_id [randomInt 5] + set used [list $master_id] + test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" { + $R($master_id) slaveof no one + if {$counter_value == 0} { + $R($master_id) set x $counter_value + } + } + + # 2) Attach all the slaves to a random instance + while {[llength $used] != 5} { + while 1 { + set slave_id [randomInt 5] + if {[lsearch -exact $used $slave_id] == -1} break + } + set rand [randomInt [llength $used]] + set mid [lindex $used $rand] + set master_host $R_host($mid) + set master_port $R_port($mid) + + test "PSYNC2: Set #$slave_id to replicate from #$mid" { + $R($slave_id) slaveof $master_host $master_port + } + lappend used $slave_id + } + + # 3) Increment the counter and wait for all the instances + # to converge. + test "PSYNC2: cluster is consistent after failover" { + $R($master_id) incr x; incr counter_value + for {set j 0} {$j < 5} {incr j} { + wait_for_condition 50 1000 { + [$R($j) get x] == $counter_value + } else { + fail "Instance #$j x variable is inconsistent" + } + } + } + + # 4) Generate load while breaking the connection of random + # slave-master pairs. + test "PSYNC2: generate load while killing replication links" { + set t [clock milliseconds] + set next_break [expr {$t+$disconnect_period}] + while {[clock milliseconds]-$t < $genload_time} { + if {$genload} { + $R($master_id) incr x; incr counter_value + } + if {[clock milliseconds] == $next_break} { + set next_break \ + [expr {[clock milliseconds]+$disconnect_period}] + set slave_id [randomInt 5] + if {$disconnect} { + $R($slave_id) client kill type master + } + } + } + } + + # 5) Increment the counter and wait for all the instances + set x [$R($master_id) get x] + test "PSYNC2: cluster is consistent after load (x = $x)" { + for {set j 0} {$j < 5} {incr j} { + wait_for_condition 50 1000 { + [$R($j) get x] == $counter_value + } else { + fail "Instance #$j x variable is inconsistent" + } + } + } + + # Put down the old master so that it cannot generate more + # replication stream, this way in the next master switch, the time at + # which we move slaves away is not important, each will have full + # history (otherwise PINGs will make certain slaves have more history), + # and sometimes a full resync will be needed. + $R($master_id) slaveof 127.0.0.1 0 ;# We use port zero to make it fail. + + for {set j 0} {$j < 5} {incr j} { + puts "$j: sync_full: [status $R($j) sync_full]" + puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" + puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" + puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" + puts "---" + } + } + +# XXXXXXXXXXXX + while 1 { puts -nonewline .; flush stdout; after 1000} +# XXXXXXXXXXXX + +}}}}} From 5639ca070ba518787535e47ee39cecb44fda3446 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 28 Nov 2016 17:54:04 +0100 Subject: [PATCH 0198/1722] PSYNC2: stop sending newlines to sub-slaves when master is down. This actually includes two changes: 1) No newlines to take the master-slave link up when the upstream master is down. Doing this is dangerous because the sub-slave often is received replication protocol for an half-command, so can't receive newlines without desyncing the replication link, even with the code in order to cancel out the bytes that PSYNC2 was using. Moreover this is probably also not needed/sane, because anyway the slave can keep serving requests, and because if it's configured to don't serve stale data, it's a good idea, actually, to break the link. 2) When a +CONTINUE with a different ID is received, we now break connection with the sub-slaves: they need to be notified as well. This was part of the original specification but for some reason it was not implemented in the code, and was alter found as a PSYNC2 bug in the integration testing. --- src/networking.c | 12 +----------- src/replication.c | 23 ++++------------------- tests/integration/psync2.tcl | 1 + 3 files changed, 6 insertions(+), 30 deletions(-) diff --git a/src/networking.c b/src/networking.c index 6aeb7ce24..343a910e2 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1031,7 +1031,7 @@ int processInlineBuffer(client *c) { char *newline; int argc, j; sds *argv, aux; - size_t querylen, protolen; + size_t querylen; /* Search for end of line */ newline = strchr(c->querybuf,'\n'); @@ -1044,7 +1044,6 @@ int processInlineBuffer(client *c) { } return C_ERR; } - protolen = (newline - c->querybuf)+1; /* Total protocol bytes of command. */ /* Handle the \r\n case. */ if (newline && newline != c->querybuf && *(newline-1) == '\r') @@ -1067,15 +1066,6 @@ int processInlineBuffer(client *c) { if (querylen == 0 && c->flags & CLIENT_SLAVE) c->repl_ack_time = server.unixtime; - /* Newline from masters can be used to prevent timeouts, but should - * not affect the replication offset since they are always sent - * "out of band" directly writing to the socket and without passing - * from the output buffers. */ - if (querylen == 0 && c->flags & CLIENT_MASTER) { - c->reploff -= protolen; - while (protolen--) chopReplicationBacklog(); - } - /* Leave data after the first line of the query in the buffer */ sdsrange(c->querybuf,querylen+2,-1); diff --git a/src/replication.c b/src/replication.c index e0c2cca6f..f22072a06 100644 --- a/src/replication.c +++ b/src/replication.c @@ -148,22 +148,6 @@ void feedReplicationBacklog(void *ptr, size_t len) { server.repl_backlog_histlen + 1; } -/* Remove the last byte from the replication backlog. This - * is useful when we receive an out of band "\n" to keep the connection - * alive but don't want to count it as replication stream. - * - * As a side effect this function adjusts the master replication offset - * of this instance to account for the missing byte. */ -void chopReplicationBacklog(void) { - if (!server.repl_backlog || !server.repl_backlog_histlen) return; - if (server.repl_backlog_idx == 0) - server.repl_backlog_idx = server.repl_backlog_size-1; - else - server.repl_backlog_idx--; - server.master_repl_offset--; - server.repl_backlog_histlen--; -} - /* Wrapper for feedReplicationBacklog() that takes Redis string objects * as input. */ void feedReplicationBacklogWithObject(robj *o) { @@ -1530,6 +1514,9 @@ int slaveTryPartialResynchronization(int fd, int read_reply) { * new one. */ memcpy(server.replid,new,sizeof(server.replid)); memcpy(server.cached_master->replid,new,sizeof(server.replid)); + + /* Disconnect all the sub-slaves: they need to be notified. */ + disconnectSlaves(); } } @@ -2553,10 +2540,8 @@ void replicationCron(void) { (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START || (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END && server.rdb_child_type != RDB_CHILD_TYPE_SOCKET)); - int is_subslave = server.masterhost && server.master == NULL && - slave->replstate == SLAVE_STATE_ONLINE; - if (is_presync || is_subslave) { + if (is_presync) { if (write(slave->fd, "\n", 1) == -1) { /* Don't worry about socket errors, it's just a ping. */ } diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 46a08dad1..0857381c1 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -87,6 +87,7 @@ start_server {} { set slave_id [randomInt 5] if {$disconnect} { $R($slave_id) client kill type master + puts "+++ Breaking link for slave #$slave_id" } } } From f687d4fb9859ed67a206897a0c4cb4fd3a02de19 Mon Sep 17 00:00:00 2001 From: andyli Date: Tue, 29 Nov 2016 16:34:41 +0800 Subject: [PATCH 0199/1722] Modify MIN->MAX --- src/cluster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index bedf5f81e..0a5a0799c 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -2774,7 +2774,7 @@ void clusterHandleSlaveFailover(void) { * and wait for replies), and the failover retry time (the time to wait * before trying to get voted again). * - * Timeout is MIN(NODE_TIMEOUT*2,2000) milliseconds. + * Timeout is MAX(NODE_TIMEOUT*2,2000) milliseconds. * Retry is two times the Timeout. */ auth_timeout = server.cluster_node_timeout*2; From 49af69dfdd60980c2f67f4bfb37bfd42335b1662 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 29 Nov 2016 10:22:40 +0100 Subject: [PATCH 0200/1722] PSYNC2 test: modify the test for production. --- tests/integration/psync2.tcl | 50 ++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 0857381c1..c6bcc0074 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -3,18 +3,15 @@ start_server {} { start_server {} { start_server {} { start_server {} { - for {set j 0} {$j < 5} {incr j} { - set R($j) [srv [expr 0-$j] client] - set R_host($j) [srv [expr 0-$j] host] - set R_port($j) [srv [expr 0-$j] port] - puts "Log file: [srv [expr 0-$j] stdout]" - } - set master_id 0 ; # Current master set start_time [clock seconds] ; # Test start time set counter_value 0 ; # Current value of the Redis counter "x" # Config + set debug_msg 0 ; # Enable additional debug messages + + set no_exit 0; ; # Do not exit at end of the test + set duration 60 ; # Total test seconds set genload 1 ; # Load master with writes at every cycle @@ -27,6 +24,13 @@ start_server {} { set disconnect_period 1000 ; # Disconnect repl link every N ms. + for {set j 0} {$j < 5} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"} + } + while {([clock seconds]-$start_time) < $duration} { # Create a random replication layout. @@ -87,7 +91,9 @@ start_server {} { set slave_id [randomInt 5] if {$disconnect} { $R($slave_id) client kill type master - puts "+++ Breaking link for slave #$slave_id" + if {$debug_msg} { + puts "+++ Breaking link for slave #$slave_id" + } } } } @@ -112,17 +118,27 @@ start_server {} { # and sometimes a full resync will be needed. $R($master_id) slaveof 127.0.0.1 0 ;# We use port zero to make it fail. - for {set j 0} {$j < 5} {incr j} { - puts "$j: sync_full: [status $R($j) sync_full]" - puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" - puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" - puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" - puts "---" + if {$debug_msg} { + for {set j 0} {$j < 5} {incr j} { + puts "$j: sync_full: [status $R($j) sync_full]" + puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" + puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" + puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" + puts "---" + } + } + + test "PSYNC2: total sum of full synchronizations is exactly 4" { + set sum 0 + for {set j 0} {$j < 5} {incr j} { + incr sum [status $R($j) sync_full] + } + assert {$sum == 4} } } -# XXXXXXXXXXXX - while 1 { puts -nonewline .; flush stdout; after 1000} -# XXXXXXXXXXXX + if {$no_exit} { + while 1 { puts -nonewline .; flush stdout; after 1000} + } }}}}} From d262113e5dc87311c0b0c3d79ac8131bff04e76e Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 29 Nov 2016 10:24:46 +0100 Subject: [PATCH 0201/1722] PSYNC2: Minor memory leak reading -NOMASTERLINK master reply fixed. --- src/replication.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/replication.c b/src/replication.c index f22072a06..28a37786c 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1539,6 +1539,7 @@ int slaveTryPartialResynchronization(int fd, int read_reply) { serverLog(LL_NOTICE, "Master is currently unable to PSYNC " "but should be in the future: %s", reply); + sdsfree(reply); return PSYNC_TRY_LATER; } From 855b0c05e791126734d08b6b0100c375cf807d66 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 29 Nov 2016 10:25:42 +0100 Subject: [PATCH 0202/1722] PSYNC2 test: test added to the default tests. --- tests/test_helper.tcl | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 2f8021e66..fdfe6a01b 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -41,6 +41,7 @@ set ::all_tests { integration/rdb integration/convert-zipmap-hash-on-load integration/logging + integration/psync2 unit/pubsub unit/slowlog unit/scripting From 17c02cee7d0f282ea65a04011826350e16181220 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 29 Nov 2016 10:27:53 +0100 Subject: [PATCH 0203/1722] PSYNC2 test: 20 seconds are enough... --- tests/integration/psync2.tcl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index c6bcc0074..1c0403fb0 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -12,7 +12,7 @@ start_server {} { set no_exit 0; ; # Do not exit at end of the test - set duration 60 ; # Total test seconds + set duration 20 ; # Total test seconds set genload 1 ; # Load master with writes at every cycle @@ -31,7 +31,11 @@ start_server {} { if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"} } + set cycle 1 while {([clock seconds]-$start_time) < $duration} { + test "PSYNC2: --- CYCLE $cycle ---" { + incr cycle + } # Create a random replication layout. # Start with switching master (this simulates a failover). From 02b570290e86c0455a000cf4104ac2341117e2eb Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 29 Nov 2016 11:15:12 +0100 Subject: [PATCH 0204/1722] PSYNC2 test: check ability to resync after restart. --- tests/integration/psync2.tcl | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 1c0403fb0..d91969e3e 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -1,4 +1,4 @@ -start_server {tags {"wait"}} { +start_server {tags {"psync2"}} { start_server {} { start_server {} { start_server {} { @@ -141,6 +141,40 @@ start_server {} { } } + test "PSYNC2: Bring the master back again for next test" { + $R($master_id) slaveof no one + set master_host $R_host($master_id) + set master_port $R_port($master_id) + for {set j 0} {$j < 5} {incr j} { + if {$j == $master_id} continue + $R($j) slaveof $master_host $master_port + } + + # Wait for slaves to sync + wait_for_condition 50 1000 { + [status $R($master_id) connected_slaves] == 4 + } else { + fail "Slave not reconnecting" + } + } + + test "PSYNC2: Partial resync after restart using RDB aux fields" { + # Pick a random slave + set slave_id [expr {($master_id+1)%5}] + set sync_count [status $R($master_id) sync_full] + catch { + $R($slave_id) config rewrite + $R($slave_id) debug restart + } + wait_for_condition 50 1000 { + [status $R($master_id) connected_slaves] == 4 + } else { + fail "Slave not reconnecting" + } + set new_sync_count [status $R($master_id) sync_full] + assert {$sync_count == $new_sync_count} + } + if {$no_exit} { while 1 { puts -nonewline .; flush stdout; after 1000} } From 7a045a9a6bb87eda004b21c78b612795753a3abd Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Wed, 30 Nov 2016 11:49:49 +0200 Subject: [PATCH 0205/1722] fix memory corruption on RM_FreeCallReply --- src/module.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index 07f88eff5..08b653e1c 100644 --- a/src/module.c +++ b/src/module.c @@ -2286,8 +2286,10 @@ void RM_FreeCallReply_Rec(RedisModuleCallReply *reply, int freenested){ * to have the first level function to return on nested replies, but only * if called by the module API. */ void RM_FreeCallReply(RedisModuleCallReply *reply) { + + RedisModuleCtx *ctx = reply->ctx; RM_FreeCallReply_Rec(reply,0); - autoMemoryFreed(reply->ctx,REDISMODULE_AM_REPLY,reply); + autoMemoryFreed(ctx,REDISMODULE_AM_REPLY,reply); } /* Return the reply type. */ From a52b7158359a45c823993b8db6bf31d922360796 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 30 Nov 2016 11:13:55 +0100 Subject: [PATCH 0206/1722] Modules: change type registration API to use a struct of methods. --- src/module.c | 49 ++++++++++++++++++++++++++++++++++------- src/modules/hellotype.c | 10 ++++++++- src/redismodule.h | 14 +++++++++++- src/server.h | 2 ++ 4 files changed, 65 insertions(+), 10 deletions(-) diff --git a/src/module.c b/src/module.c index 07f88eff5..36e23a47c 100644 --- a/src/module.c +++ b/src/module.c @@ -2666,7 +2666,7 @@ void moduleTypeNameByID(char *name, uint64_t moduleid) { /* Register a new data type exported by the module. The parameters are the * following. Please for in depth documentation check the modules API - * documentation, especially the INTRO.md file. + * documentation, especially the TYPES.md file. * * * **name**: A 9 characters data type name that MUST be unique in the Redis * Modules ecosystem. Be creative... and there will be no collisions. Use @@ -2685,12 +2685,31 @@ void moduleTypeNameByID(char *name, uint64_t moduleid) { * still load old data produced by an older version if the rdb_load * callback is able to check the encver value and act accordingly. * The encver must be a positive value between 0 and 1023. + * * **typemethods_ptr** is a pointer to a RedisModuleTypeMethods structure + * that should be populated with the methods callbacks and structure + * version, like in the following example: + * + * RedisModuleTypeMethods tm = { + * .version = REDISMODULE_TYPE_METHOD_VERSION, + * .rdb_load = myType_RDBLoadCallBack, + * .rdb_save = myType_RDBSaveCallBack, + * .aof_rewrite = myType_AOFRewriteCallBack, + * .free = myType_FreeCallBack, + * + * // Optional fields + * .digest = myType_DigestCallBack, + * .mem_usage = myType_MemUsageCallBack, + * } + * * * **rdb_load**: A callback function pointer that loads data from RDB files. * * **rdb_save**: A callback function pointer that saves data to RDB files. * * **aof_rewrite**: A callback function pointer that rewrites data as commands. * * **digest**: A callback function pointer that is used for `DEBUG DIGEST`. * * **free**: A callback function pointer that can free a type value. * + * The **digest* and **mem_usage** methods should currently be omitted since + * they are not yet implemented inside the Redis modules core. + * * Note: the module name "AAAAAAAAA" is reserved and produces an error, it * happens to be pretty lame as well. * @@ -2709,19 +2728,33 @@ void moduleTypeNameByID(char *name, uint64_t moduleid) { * BalancedTreeType = RM_CreateDataType(...); * } */ -moduleType *RM_CreateDataType(RedisModuleCtx *ctx, const char *name, int encver, moduleTypeLoadFunc rdb_load, moduleTypeSaveFunc rdb_save, moduleTypeRewriteFunc aof_rewrite, moduleTypeDigestFunc digest, moduleTypeFreeFunc free) { +moduleType *RM_CreateDataType(RedisModuleCtx *ctx, const char *name, int encver, void *typemethods_ptr) { uint64_t id = moduleTypeEncodeId(name,encver); if (id == 0) return NULL; if (moduleTypeLookupModuleByName(name) != NULL) return NULL; - moduleType *mt = zmalloc(sizeof(*mt)); + long typemethods_version = ((long*)typemethods_ptr)[0]; + if (typemethods_version == 0) return NULL; + + struct typemethods { + uint64_t version; + moduleTypeLoadFunc rdb_load; + moduleTypeSaveFunc rdb_save; + moduleTypeRewriteFunc aof_rewrite; + moduleTypeDigestFunc digest; + moduleTypeMemUsageFunc mem_usage; + moduleTypeFreeFunc free; + } *tms = (struct typemethods*) typemethods_ptr; + + moduleType *mt = zcalloc(sizeof(*mt)); mt->id = id; mt->module = ctx->module; - mt->rdb_load = rdb_load; - mt->rdb_save = rdb_save; - mt->aof_rewrite = aof_rewrite; - mt->digest = digest; - mt->free = free; + mt->rdb_load = tms->rdb_load; + mt->rdb_save = tms->rdb_save; + mt->aof_rewrite = tms->aof_rewrite; + mt->mem_usage = tms->mem_usage; + mt->digest = tms->digest; + mt->free = tms->free; memcpy(mt->name,name,sizeof(mt->name)); listAddNodeTail(ctx->module->types,mt); return mt; diff --git a/src/modules/hellotype.c b/src/modules/hellotype.c index 535eb88e1..02a5bb477 100644 --- a/src/modules/hellotype.c +++ b/src/modules/hellotype.c @@ -245,7 +245,15 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) if (RedisModule_Init(ctx,"hellotype",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) return REDISMODULE_ERR; - HelloType = RedisModule_CreateDataType(ctx,"hellotype",0,HelloTypeRdbLoad,HelloTypeRdbSave,HelloTypeAofRewrite,HelloTypeDigest,HelloTypeFree); + RedisModuleTypeMethods tm = { + .version = REDISMODULE_TYPE_METHOD_VERSION, + .rdb_load = HelloTypeRdbLoad, + .rdb_save = HelloTypeRdbSave, + .aof_rewrite = HelloTypeAofRewrite, + .free = HelloTypeFree + }; + + HelloType = RedisModule_CreateDataType(ctx,"hellotype",0,&tm); if (HelloType == NULL) return REDISMODULE_ERR; if (RedisModule_CreateCommand(ctx,"hellotype.insert", diff --git a/src/redismodule.h b/src/redismodule.h index a07c09488..4ecf3c8f2 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -91,9 +91,21 @@ typedef int (*RedisModuleCmdFunc) (RedisModuleCtx *ctx, RedisModuleString **argv typedef void *(*RedisModuleTypeLoadFunc)(RedisModuleIO *rdb, int encver); typedef void (*RedisModuleTypeSaveFunc)(RedisModuleIO *rdb, void *value); typedef void (*RedisModuleTypeRewriteFunc)(RedisModuleIO *aof, RedisModuleString *key, void *value); +typedef size_t (*RedisModuleTypeMemUsageFunc)(void *value); typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value); typedef void (*RedisModuleTypeFreeFunc)(void *value); +#define REDISMODULE_TYPE_METHOD_VERSION 1 +typedef struct RedisModuleTypeMethods { + uint64_t version; + RedisModuleTypeLoadFunc rdb_load; + RedisModuleTypeSaveFunc rdb_save; + RedisModuleTypeRewriteFunc aof_rewrite; + RedisModuleTypeMemUsageFunc mem_usage; + RedisModuleTypeRewriteFunc digest; + RedisModuleTypeFreeFunc free; +} RedisModuleTypeMethods; + #define REDISMODULE_GET_API(name) \ RedisModule_GetApi("RedisModule_" #name, ((void **)&RedisModule_ ## name)) @@ -172,7 +184,7 @@ int REDISMODULE_API_FUNC(RedisModule_IsKeysPositionRequest)(RedisModuleCtx *ctx) void REDISMODULE_API_FUNC(RedisModule_KeyAtPos)(RedisModuleCtx *ctx, int pos); unsigned long long REDISMODULE_API_FUNC(RedisModule_GetClientId)(RedisModuleCtx *ctx); void *REDISMODULE_API_FUNC(RedisModule_PoolAlloc)(RedisModuleCtx *ctx, size_t bytes); -RedisModuleType *REDISMODULE_API_FUNC(RedisModule_CreateDataType)(RedisModuleCtx *ctx, const char *name, int encver, RedisModuleTypeLoadFunc rdb_load, RedisModuleTypeSaveFunc rdb_save, RedisModuleTypeRewriteFunc aof_rewrite, RedisModuleTypeDigestFunc digest, RedisModuleTypeFreeFunc free); +RedisModuleType *REDISMODULE_API_FUNC(RedisModule_CreateDataType)(RedisModuleCtx *ctx, const char *name, int encver, RedisModuleTypeMethods *typemethods); int REDISMODULE_API_FUNC(RedisModule_ModuleTypeSetValue)(RedisModuleKey *key, RedisModuleType *mt, void *value); RedisModuleType *REDISMODULE_API_FUNC(RedisModule_ModuleTypeGetType)(RedisModuleKey *key); void *REDISMODULE_API_FUNC(RedisModule_ModuleTypeGetValue)(RedisModuleKey *key); diff --git a/src/server.h b/src/server.h index b7f909933..1ad862003 100644 --- a/src/server.h +++ b/src/server.h @@ -476,6 +476,7 @@ typedef void *(*moduleTypeLoadFunc)(struct RedisModuleIO *io, int encver); typedef void (*moduleTypeSaveFunc)(struct RedisModuleIO *io, void *value); typedef void (*moduleTypeRewriteFunc)(struct RedisModuleIO *io, struct redisObject *key, void *value); typedef void (*moduleTypeDigestFunc)(struct RedisModuleDigest *digest, void *value); +typedef size_t (*moduleTypeMemUsageFunc)(void *value); typedef void (*moduleTypeFreeFunc)(void *value); /* The module type, which is referenced in each value of a given type, defines @@ -486,6 +487,7 @@ typedef struct RedisModuleType { moduleTypeLoadFunc rdb_load; moduleTypeSaveFunc rdb_save; moduleTypeRewriteFunc aof_rewrite; + moduleTypeMemUsageFunc mem_usage; moduleTypeDigestFunc digest; moduleTypeFreeFunc free; char name[10]; /* 9 bytes name + null term. Charset: A-Z a-z 0-9 _- */ From 729eef96f1f9eca7d7837ef6aca479f53199cf77 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Wed, 30 Nov 2016 22:05:59 +0200 Subject: [PATCH 0207/1722] Fix typo in RedisModuleTypeMethods declaration. --- src/redismodule.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redismodule.h b/src/redismodule.h index 4ecf3c8f2..186e284c0 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -102,7 +102,7 @@ typedef struct RedisModuleTypeMethods { RedisModuleTypeSaveFunc rdb_save; RedisModuleTypeRewriteFunc aof_rewrite; RedisModuleTypeMemUsageFunc mem_usage; - RedisModuleTypeRewriteFunc digest; + RedisModuleTypeDigestFunc digest; RedisModuleTypeFreeFunc free; } RedisModuleTypeMethods; From 497fb6a0fdf5047a96159cebeaae3e930d63d1f8 Mon Sep 17 00:00:00 2001 From: Chris Lamb Date: Thu, 1 Dec 2016 12:10:14 +0000 Subject: [PATCH 0208/1722] src/rdb.c: Correct "whenver" -> "whenever" typo. --- src/rdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rdb.c b/src/rdb.c index 765e13374..b81d0808a 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1939,7 +1939,7 @@ void bgsaveCommand(client *c) { } else { addReplyError(c, "An AOF log rewriting in progress: can't BGSAVE right now. " - "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenver " + "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever " "possible."); } } else if (rdbSaveBackground(server.rdb_filename,NULL) == C_OK) { From acd9aea8bb1f26c08246fa825d5afcd42cd58e3a Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 2 Dec 2016 10:21:20 +0100 Subject: [PATCH 0209/1722] PSYNC2: Do not accept WAIT in slave instances. No longer makes sense since writable slaves only do local writes now: writes are no longer passed to sub-slaves in the stream. --- src/replication.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/replication.c b/src/replication.c index 28a37786c..94287f9d0 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2362,6 +2362,11 @@ void waitCommand(client *c) { long numreplicas, ackreplicas; long long offset = c->woff; + if (server.masterhost) { + addReplyError(c,"WAIT cannot be used with slave instances. Please also note that since Redis 4.0 if a slave is configured to be writable (which is not the default) writes to slaves are just local and are not propagated."); + return; + } + /* Argument parsing. */ if (getLongFromObjectOrReply(c,c->argv[1],&numreplicas,NULL) != C_OK) return; From d22d2b7b7f62a159adef62bd25e17064e32812af Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Fri, 2 Dec 2016 18:19:36 +0200 Subject: [PATCH 0210/1722] Verify pairs are provided after subcommands Fixes https://github.com/antirez/redis/issues/3639 --- src/t_zset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_zset.c b/src/t_zset.c index 8d905be02..d36fa30ae 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -1521,7 +1521,7 @@ void zaddGenericCommand(client *c, int flags) { /* After the options, we expect to have an even number of args, since * we expect any number of score-element pairs. */ elements = c->argc-scoreidx; - if (elements % 2) { + if (elements % 2 || !elements) { addReply(c,shared.syntaxerr); return; } From 1a5a20596959e320100bd2fb735ab230cc3b68b4 Mon Sep 17 00:00:00 2001 From: cbgbt Date: Thu, 1 Dec 2016 02:33:01 -0800 Subject: [PATCH 0211/1722] cli: Only print elapsed time on OUTPUT_STANDARD --- src/redis-cli.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 85ca404fb..d25b7febb 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1344,7 +1344,9 @@ static void repl(void) { } elapsed = mstime()-start_time; - if (elapsed >= 500) { + if (elapsed >= 500 && + config.output == OUTPUT_STANDARD) + { printf("(%.2fs)\n",(double)elapsed/1000); } } From 81b343c386c4d61d79eb8e107f60ed0a977f71c3 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 5 Dec 2016 14:02:32 +0100 Subject: [PATCH 0212/1722] Geo: fix computation of bounding box. A bug was reported in the context in issue #3631. The root cause of the bug was that certain neighbor boxes were zeroed after the "inside the bounding box or not" check, simply because the bounding box computation function was wrong. A few debugging infos where enhanced and moved in other parts of the code. A check to avoid steps=0 was added, but is unrelated to this issue and I did not verified it was an actual bug in practice. --- src/geo.c | 25 ++++++++++++++++++++- src/geohash_helper.c | 52 ++++++++------------------------------------ 2 files changed, 33 insertions(+), 44 deletions(-) diff --git a/src/geo.c b/src/geo.c index 331d22435..46022bdde 100644 --- a/src/geo.c +++ b/src/geo.c @@ -326,6 +326,7 @@ int membersOfGeoHashBox(robj *zobj, GeoHashBits hash, geoArray *ga, double lon, int membersOfAllNeighbors(robj *zobj, GeoHashRadius n, double lon, double lat, double radius, geoArray *ga) { GeoHashBits neighbors[9]; unsigned int i, count = 0, last_processed = 0; + int debugmsg = 0; neighbors[0] = n.hash; neighbors[1] = n.neighbors.north; @@ -340,8 +341,26 @@ int membersOfAllNeighbors(robj *zobj, GeoHashRadius n, double lon, double lat, d /* For each neighbor (*and* our own hashbox), get all the matching * members and add them to the potential result list. */ for (i = 0; i < sizeof(neighbors) / sizeof(*neighbors); i++) { - if (HASHISZERO(neighbors[i])) + if (HASHISZERO(neighbors[i])) { + if (debugmsg) D("neighbors[%d] is zero",i); continue; + } + + /* Debugging info. */ + if (debugmsg) { + GeoHashRange long_range, lat_range; + geohashGetCoordRange(&long_range,&lat_range); + GeoHashArea myarea = {{0}}; + geohashDecode(long_range, lat_range, neighbors[i], &myarea); + + /* Dump center square. */ + D("neighbors[%d]:\n",i); + D("area.longitude.min: %f\n", myarea.longitude.min); + D("area.longitude.max: %f\n", myarea.longitude.max); + D("area.latitude.min: %f\n", myarea.latitude.min); + D("area.latitude.max: %f\n", myarea.latitude.max); + D("\n"); + } /* When a huge Radius (in the 5000 km range or more) is used, * adjacent neighbors can be the same, leading to duplicated @@ -350,7 +369,11 @@ int membersOfAllNeighbors(robj *zobj, GeoHashRadius n, double lon, double lat, d if (last_processed && neighbors[i].bits == neighbors[last_processed].bits && neighbors[i].step == neighbors[last_processed].step) + { + if (debugmsg) + D("Skipping processing of %d, same as previous\n",i); continue; + } count += membersOfGeoHashBox(zobj, neighbors[i], ga, lon, lat, radius); last_processed = i; } diff --git a/src/geohash_helper.c b/src/geohash_helper.c index 139bcea11..77d8ab392 100644 --- a/src/geohash_helper.c +++ b/src/geohash_helper.c @@ -82,30 +82,18 @@ uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) { return step; } +/* Return the bounding box of the search area centered at latitude,longitude + * having a radius of radius_meter. bounds[0] - bounds[2] is the minimum + * and maxium longitude, while bounds[1] - bounds[3] is the minimum and + * maximum latitude. */ int geohashBoundingBox(double longitude, double latitude, double radius_meters, double *bounds) { if (!bounds) return 0; - double lonr, latr; - lonr = deg_rad(longitude); - latr = deg_rad(latitude); - - if (radius_meters > EARTH_RADIUS_IN_METERS) - radius_meters = EARTH_RADIUS_IN_METERS; - double distance = radius_meters / EARTH_RADIUS_IN_METERS; - double min_latitude = latr - distance; - double max_latitude = latr + distance; - - /* Note: we're being lazy and not accounting for coordinates near poles */ - double min_longitude, max_longitude; - double difference_longitude = asin(sin(distance) / cos(latr)); - min_longitude = lonr - difference_longitude; - max_longitude = lonr + difference_longitude; - - bounds[0] = rad_deg(min_longitude); - bounds[1] = rad_deg(min_latitude); - bounds[2] = rad_deg(max_longitude); - bounds[3] = rad_deg(max_latitude); + bounds[0] = longitude - rad_deg(radius_meters/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude))); + bounds[2] = longitude + rad_deg(radius_meters/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude))); + bounds[1] = latitude - rad_deg(radius_meters/EARTH_RADIUS_IN_METERS); + bounds[3] = latitude + rad_deg(radius_meters/EARTH_RADIUS_IN_METERS); return 1; } @@ -158,35 +146,13 @@ GeoHashRadius geohashGetAreasByRadius(double longitude, double latitude, double < radius_meters) decrease_step = 1; } - if (decrease_step) { + if (steps > 1 && decrease_step) { steps--; geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash); geohashNeighbors(&hash,&neighbors); geohashDecode(long_range,lat_range,hash,&area); } - /* Example debug info. This turns to be very useful every time there is - * to investigate radius search potential bugs. So better to leave it - * here. */ - if (0) { - GeoHashArea myarea = {{0}}; - geohashDecode(long_range, lat_range, neighbors.west, &myarea); - - /* Dump West. */ - D("Neighbors"); - D("area.longitude.min: %f\n", myarea.longitude.min); - D("area.longitude.max: %f\n", myarea.longitude.max); - D("area.latitude.min: %f\n", myarea.latitude.min); - D("area.latitude.max: %f\n", myarea.latitude.max); - - /* Dump center square. */ - D("Area"); - D("area.longitude.min: %f\n", area.longitude.min); - D("area.longitude.max: %f\n", area.longitude.max); - D("area.latitude.min: %f\n", area.latitude.min); - D("area.latitude.max: %f\n", area.latitude.max); - } - /* Exclude the search areas that are useless. */ if (area.latitude.min < min_lat) { GZERO(neighbors.south); From 664cf834c15ea750b3e87c6a1954d985bd2f3131 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 5 Dec 2016 14:15:08 +0100 Subject: [PATCH 0213/1722] Geo: improve fuzz test. The test now uses more diverse radius sizes, especially sizes near or greater the whole earth surface are used, that are known to trigger edge cases. Moreover the PRNG seeding was probably resulting into the same sequence tested over and over again, now seeding unsing the current unix time in milliseconds. Related to #3631. --- tests/unit/geo.tcl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl index a08726d2e..44a0ae1bc 100644 --- a/tests/unit/geo.tcl +++ b/tests/unit/geo.tcl @@ -221,18 +221,26 @@ start_server {tags {"geo"}} { } test {GEOADD + GEORANGE randomized test} { - set attempt 20 + set attempt 30 while {[incr attempt -1]} { set rv [lindex $regression_vectors $rv_idx] incr rv_idx unset -nocomplain debuginfo - set srand_seed [randomInt 1000000] + set srand_seed [clock milliseconds] if {$rv ne {}} {set srand_seed [lindex $rv 0]} lappend debuginfo "srand_seed is $srand_seed" expr {srand($srand_seed)} ; # If you need a reproducible run r del mypoints - set radius_km [expr {[randomInt 200]+10}] + + if {[randomInt 10] == 0} { + # From time to time use very big radiuses + set radius_km [expr {[randomInt 50000]+10}] + } else { + # Normally use a few - ~200km radiuses to stress + # test the code the most in edge cases. + set radius_km [expr {[randomInt 200]+10}] + } if {$rv ne {}} {set radius_km [lindex $rv 1]} set radius_m [expr {$radius_km*1000}] geo_random_point search_lon search_lat From 9fd91769e923c9464d99336e0165d4114d8fbafa Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 2 Dec 2016 16:35:24 +0100 Subject: [PATCH 0214/1722] Modules: API doc updated (auto generated). --- src/modules/API.md | 197 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 190 insertions(+), 7 deletions(-) diff --git a/src/modules/API.md b/src/modules/API.md index 9c7ada9dd..8659f7951 100644 --- a/src/modules/API.md +++ b/src/modules/API.md @@ -7,7 +7,16 @@ Use like malloc(). Memory allocated with this function is reported in Redis INFO memory, used for keys eviction according to maxmemory settings and in general is taken into account as memory allocated by Redis. -You should avoid to use malloc(). +You should avoid using malloc(). + +## `RM_Calloc` + + void *RM_Calloc(size_t nmemb, size_t size); + +Use like calloc(). Memory allocated with this function is reported in +Redis INFO memory, used for keys eviction according to maxmemory settings +and in general is taken into account as memory allocated by Redis. +You should avoid using calloc() directly. ## `RM_Realloc` @@ -150,6 +159,12 @@ Called by `RM_Init()` to setup the `ctx->module` structure. This is an internal function, Redis modules developers don't need to use it. +## `RM_Milliseconds` + + long long RM_Milliseconds(void); + +Return the current UNIX time in milliseconds. + ## `RM_AutoMemory` void RM_AutoMemory(RedisModuleCtx *ctx); @@ -169,6 +184,16 @@ with `RedisModule_FreeString()`, unless automatic memory is enabled. The string is created by copying the `len` bytes starting at `ptr`. No reference is retained to the passed buffer. +## `RM_CreateStringPrintf` + + RedisModuleString *RM_CreateStringPrintf(RedisModuleCtx *ctx, const char *fmt, ...); + +Create a new module string object from a printf format and arguments. +The returned string must be freed with `RedisModule_FreeString()`, unless +automatic memory is enabled. + +The string is created using the sds formatter function sdscatvprintf(). + ## `RM_CreateStringFromLongLong` RedisModuleString *RM_CreateStringFromLongLong(RedisModuleCtx *ctx, long long ll); @@ -183,7 +208,7 @@ enabling automatic memory management. RedisModuleString *RM_CreateStringFromString(RedisModuleCtx *ctx, const RedisModuleString *str); -Like `RedisModule_CreatString()`, but creates a string starting from an existing +Like `RedisModule_CreatString()`, but creates a string starting from another RedisModuleString. The returned string must be released with `RedisModule_FreeString()` or by @@ -200,9 +225,36 @@ It is possible to call this function even when automatic memory management is enabled. In that case the string will be released ASAP and removed from the pool of string to release at the end. +## `RM_RetainString` + + void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str); + +Every call to this function, will make the string 'str' requiring +an additional call to `RedisModule_FreeString()` in order to really +free the string. Note that the automatic freeing of the string obtained +enabling modules automatic memory management counts for one +`RedisModule_FreeString()` call (it is just executed automatically). + +Normally you want to call this function when, at the same time +the following conditions are true: + +1) You have automatic memory management enabled. +2) You want to create string objects. +3) Those string objects you create need to live *after* the callback + function(for example a command implementation) creating them returns. + +Usually you want this in order to store the created string object +into your own data structure, for example when implementing a new data +type. + +Note that when memory management is turned off, you don't need +any call to RetainString() since creating a string will always result +into a string that lives after the callback function returns, if +no FreeString() call is performed. + ## `RM_StringPtrLen` - const char *RM_StringPtrLen(RedisModuleString *str, size_t *len); + const char *RM_StringPtrLen(const RedisModuleString *str, size_t *len); Given a string module object, this function returns the string pointer and length of the string. The returned pointer and length should only @@ -210,7 +262,7 @@ be used for read only accesses and never modified. ## `RM_StringToLongLong` - int RM_StringToLongLong(RedisModuleString *str, long long *ll); + int RM_StringToLongLong(const RedisModuleString *str, long long *ll); Convert the string into a long long integer, storing it at `*ll`. Returns `REDISMODULE_OK` on success. If the string can't be parsed @@ -219,12 +271,28 @@ is returned. ## `RM_StringToDouble` - int RM_StringToDouble(RedisModuleString *str, double *d); + int RM_StringToDouble(const RedisModuleString *str, double *d); Convert the string into a double, storing it at `*d`. Returns `REDISMODULE_OK` on success or `REDISMODULE_ERR` if the string is not a valid string representation of a double value. +## `RM_StringCompare` + + int RM_StringCompare(RedisModuleString *a, RedisModuleString *b); + +Compare two string objects, returning -1, 0 or 1 respectively if +a < b, a == b, a > b. Strings are compared byte by byte as two +binary blobs without any encoding care / collation attempt. + +## `RM_StringAppendBuffer` + + int RM_StringAppendBuffer(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len); + +Append the specified buffere to the string 'str'. The string must be a +string created by the user that is referenced only a single time, otherwise +`REDISMODULE_ERR` is returend and the operation is not performed. + ## `RM_WrongArity` int RM_WrongArity(RedisModuleCtx *ctx); @@ -951,11 +1019,11 @@ that returned the reply object. ## `RM_CreateDataType` - moduleType *RM_CreateDataType(RedisModuleCtx *ctx, const char *name, int encver, moduleTypeLoadFunc rdb_load, moduleTypeSaveFunc rdb_save, moduleTypeRewriteFunc aof_rewrite, moduleTypeDigestFunc digest, moduleTypeFreeFunc free); + moduleType *RM_CreateDataType(RedisModuleCtx *ctx, const char *name, int encver, void *typemethods_ptr); Register a new data type exported by the module. The parameters are the following. Please for in depth documentation check the modules API -documentation, especially the INTRO.md file. +documentation, especially the TYPES.md file. * **name**: A 9 characters data type name that MUST be unique in the Redis Modules ecosystem. Be creative... and there will be no collisions. Use @@ -974,12 +1042,31 @@ documentation, especially the INTRO.md file. still load old data produced by an older version if the rdb_load callback is able to check the encver value and act accordingly. The encver must be a positive value between 0 and 1023. +* **typemethods_ptr** is a pointer to a RedisModuleTypeMethods structure + that should be populated with the methods callbacks and structure + version, like in the following example: + + RedisModuleTypeMethods tm = { + .version = `REDISMODULE_TYPE_METHOD_VERSION`, + .rdb_load = myType_RDBLoadCallBack, + .rdb_save = myType_RDBSaveCallBack, + .aof_rewrite = myType_AOFRewriteCallBack, + .free = myType_FreeCallBack, + + // Optional fields + .digest = myType_DigestCallBack, + .mem_usage = myType_MemUsageCallBack, + } + * **rdb_load**: A callback function pointer that loads data from RDB files. * **rdb_save**: A callback function pointer that saves data to RDB files. * **aof_rewrite**: A callback function pointer that rewrites data as commands. * **digest**: A callback function pointer that is used for `DEBUG DIGEST`. * **free**: A callback function pointer that can free a type value. +The **digest* and **mem_usage** methods should currently be omitted since +they are not yet implemented inside the Redis modules core. + Note: the module name "AAAAAAAAA" is reserved and produces an error, it happens to be pretty lame as well. @@ -1115,6 +1202,21 @@ It is possible to load back the value with `RedisModule_LoadDouble()`. In the context of the rdb_save method of a module data type, loads back the double value saved by `RedisModule_SaveDouble()`. +## `RM_SaveFloat` + + void RM_SaveFloat(RedisModuleIO *io, float value); + +In the context of the rdb_save method of a module data type, saves a float +value to the RDB file. The float can be a valid number, a NaN or infinity. +It is possible to load back the value with `RedisModule_LoadFloat()`. + +## `RM_LoadFloat` + + float RM_LoadFloat(RedisModuleIO *io); + +In the context of the rdb_save method of a module data type, loads back the +float value saved by `RedisModule_SaveFloat()`. + ## `RM_EmitAOF` void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...); @@ -1125,10 +1227,20 @@ by a module. The command works exactly like `RedisModule_Call()` in the way the parameters are passed, but it does not return anything as the error handling is performed by Redis itself. +## `RM_LogRaw` + + void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap); + +This is the low level function implementing both: + + `RM_Log()` + `RM_LogIOError()` + ## `RM_Log` void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...); +/* Produces a log message to the standard Redis log, the format accepts printf-alike specifiers, while level is a string describing the log level to use when emitting the log, and must be one of the following: @@ -1143,3 +1255,74 @@ There is a fixed limit to the length of the log line this function is able to emit, this limti is not specified but is guaranteed to be more than a few lines of text. +## `RM_LogIOError` + + void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ...); + +Log errors from RDB / AOF serialization callbacks. + +This function should be used when a callback is returning a critical +error to the caller since cannot load or save the data for some +critical reason. + +## `RM_BlockClient` + + RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms); + +Block a client in the context of a blocking command, returning an handle +which will be used, later, in order to block the client with a call to +`RedisModule_UnblockClient()`. The arguments specify callback functions +and a timeout after which the client is unblocked. + +The callbacks are called in the following contexts: + +reply_callback: called after a successful `RedisModule_UnblockClient()` call + in order to reply to the client and unblock it. +reply_timeout: called when the timeout is reached in order to send an + error to the client. +free_privdata: called in order to free the privata data that is passed + by `RedisModule_UnblockClient()` call. + +## `RM_UnblockClient` + + int RM_UnblockClient(RedisModuleBlockedClient *bc, void *privdata); + +Unblock a client blocked by ``RedisModule_BlockedClient``. This will trigger +the reply callbacks to be called in order to reply to the client. +The 'privdata' argument will be accessible by the reply callback, so +the caller of this function can pass any value that is needed in order to +actually reply to the client. + +A common usage for 'privdata' is a thread that computes something that +needs to be passed to the client, included but not limited some slow +to compute reply or some reply obtained via networking. + +Note: this function can be called from threads spawned by the module. + +## `RM_AbortBlock` + + int RM_AbortBlock(RedisModuleBlockedClient *bc); + +Abort a blocked client blocking operation: the client will be unblocked +without firing the reply callback. + +## `RM_IsBlockedReplyRequest` + + int RM_IsBlockedReplyRequest(RedisModuleCtx *ctx); + +Return non-zero if a module command was called in order to fill the +reply for a blocked client. + +## `RM_IsBlockedTimeoutRequest` + + int RM_IsBlockedTimeoutRequest(RedisModuleCtx *ctx); + +Return non-zero if a module command was called in order to fill the +reply for a blocked client that timed out. + +## `RM_GetBlockedClientPrivateData` + + void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx); + +Get the privata data set by `RedisModule_UnblockClient()` + From 4ca7de5ed887fba737be38eb853eb13fa04aa7d8 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 2 Dec 2016 16:30:42 +0100 Subject: [PATCH 0215/1722] Modules: types doc updated to new API. --- src/modules/TYPES.md | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/modules/TYPES.md b/src/modules/TYPES.md index 1c31950fa..4e663b870 100644 --- a/src/modules/TYPES.md +++ b/src/modules/TYPES.md @@ -47,23 +47,25 @@ be stored in the global variable. #define MYTYPE_ENCODING_VERSION 0 int RedisModule_OnLoad(RedisModuleCtx *ctx) { - MyType = RedisModule_CreateDataType("MyType-AZ", MYTYPE_ENCODING_VERSION, - MyTypeRDBLoad, MyTypeRDBSave, MyTypeAOFRewrite, MyTypeDigest, - MyTypeFree); + RedisModuleTypeMethods tm = { + .version = REDISMODULE_TYPE_METHOD_VERSION, + .rdb_load = MyTypeRDBLoad, + .rdb_save = MyTypeRDBSave, + .aof_rewrite = MyTypeAOFRewrite, + .free = MyTypeFree + }; + + MyType = RedisModule_CreateDataType("MyType-AZ", + MYTYPE_ENCODING_VERSION, &tm); if (MyType == NULL) return REDISMODULE_ERR; } As you can see from the example above, a single API call is needed in order to register the new type. However a number of function pointers are passed as -arguments. The prototype of `RedisModule_CreateDataType` is the following: - - moduleType *RedisModule_CreateDataType(RedisModuleCtx *ctx, - const char *name, int encver, - moduleTypeLoadFunc rdb_load, - moduleTypeSaveFunc rdb_save, - moduleTypeRewriteFunc aof_rewrite, - moduleTypeDigestFunc digest, - moduleTypeFreeFunc free); +arguments. Certain are optionals while some are mandatory. The above set +of methods *must* be passed, while `.digest` and `.mem_usage` are optional +and are currently not actually supported by the modules internals, so for +now you can just ignore them. The `ctx` argument is the context that we receive in the `OnLoad` function. The type `name` is a 9 character name in the character set that includes @@ -74,6 +76,9 @@ ecosystem, so be creative, use both lower-case and upper case if it makes sense, and try to use the convention of mixing the type name with the name of the author of the module, to create a 9 character unique name. +**NOTE:** It is very important that the name is exactly 9 chars or the +registration of the type will fail. Read more to understand why. + For example if I'm building a *b-tree* data structure and my name is *antirez* I'll call my type **btree1-az**. The name, converted to a 64 bit integer, is stored inside the RDB file when saving the type, and will be used when the @@ -95,12 +100,14 @@ there is data found for a different encoding version (and the encoding version is passed as argument to `rdb_load`), so that the module can still load old RDB files. -The remaining arguments `rdb_load`, `rdb_save`, `aof_rewrite`, `digest` and -`free` are all callbacks with the following prototypes and uses: +The last argument is a structure used in order to pass the type methods to the +registration function: `rdb_load`, `rdb_save`, `aof_rewrite`, `digest` and +`free` and `mem_usage` are all callbacks with the following prototypes and uses: typedef void *(*RedisModuleTypeLoadFunc)(RedisModuleIO *rdb, int encver); typedef void (*RedisModuleTypeSaveFunc)(RedisModuleIO *rdb, void *value); typedef void (*RedisModuleTypeRewriteFunc)(RedisModuleIO *aof, RedisModuleString *key, void *value); + typedef size_t (*RedisModuleTypeMemUsageFunc)(void *value); typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value); typedef void (*RedisModuleTypeFreeFunc)(void *value); @@ -108,6 +115,7 @@ The remaining arguments `rdb_load`, `rdb_save`, `aof_rewrite`, `digest` and * `rdb_save` is called when saving data to the RDB file. * `aof_rewrite` is called when the AOF is being rewritten, and the module needs to tell Redis what is the sequence of commands to recreate the content of a given key. * `digest` is called when `DEBUG DIGEST` is executed and a key holding this module type is found. Currently this is not yet implemented so the function ca be left empty. +* `mem_usage` is called when the `MEMORY` command ask for the total memory consumed by a specific key, and is used in order to get the amount of bytes used by the module value. * `free` is called when a key with the module native type is deleted via `DEL` or in any other mean, in order to let the module reclaim the memory associated with such a value. Ok, but *why* modules types require a 9 characters name? From c64b709956cc5e9a2aa5d65726ec323ea5b710ce Mon Sep 17 00:00:00 2001 From: itamar Date: Mon, 5 Dec 2016 18:34:38 +0200 Subject: [PATCH 0216/1722] Corrects a couple of omissions in the modules docs --- src/modules/API.md | 1 + src/modules/TYPES.md | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/modules/API.md b/src/modules/API.md index 8659f7951..e90429e3b 100644 --- a/src/modules/API.md +++ b/src/modules/API.md @@ -1062,6 +1062,7 @@ documentation, especially the TYPES.md file. * **rdb_save**: A callback function pointer that saves data to RDB files. * **aof_rewrite**: A callback function pointer that rewrites data as commands. * **digest**: A callback function pointer that is used for `DEBUG DIGEST`. +* **mem_usage**: A callback function pointer that is used for `MEMORY`. * **free**: A callback function pointer that can free a type value. The **digest* and **mem_usage** methods should currently be omitted since diff --git a/src/modules/TYPES.md b/src/modules/TYPES.md index 4e663b870..4d497356a 100644 --- a/src/modules/TYPES.md +++ b/src/modules/TYPES.md @@ -55,7 +55,7 @@ be stored in the global variable. .free = MyTypeFree }; - MyType = RedisModule_CreateDataType("MyType-AZ", + MyType = RedisModule_CreateDataType(ctx, "MyType-AZ", MYTYPE_ENCODING_VERSION, &tm); if (MyType == NULL) return REDISMODULE_ERR; } @@ -115,7 +115,7 @@ registration function: `rdb_load`, `rdb_save`, `aof_rewrite`, `digest` and * `rdb_save` is called when saving data to the RDB file. * `aof_rewrite` is called when the AOF is being rewritten, and the module needs to tell Redis what is the sequence of commands to recreate the content of a given key. * `digest` is called when `DEBUG DIGEST` is executed and a key holding this module type is found. Currently this is not yet implemented so the function ca be left empty. -* `mem_usage` is called when the `MEMORY` command ask for the total memory consumed by a specific key, and is used in order to get the amount of bytes used by the module value. +* `mem_usage` is called when the `MEMORY` command asks for the total memory consumed by a specific key, and is used in order to get the amount of bytes used by the module value. * `free` is called when a key with the module native type is deleted via `DEL` or in any other mean, in order to let the module reclaim the memory associated with such a value. Ok, but *why* modules types require a 9 characters name? From a62b2a97a079adfd8078be617d3642effaf44f08 Mon Sep 17 00:00:00 2001 From: wangshaonan Date: Tue, 6 Dec 2016 03:11:27 +0000 Subject: [PATCH 0217/1722] Add '\n' to MEMORY DOCTOR command output message when num_reports is 0 or empty is 1 --- src/object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index 4d14ae8d9..1ae37c9d1 100644 --- a/src/object.c +++ b/src/object.c @@ -955,14 +955,14 @@ sds getMemoryDoctorReport(void) { if (num_reports == 0) { s = sdsnew( "Hi Sam, I can't find any memory issue in your instance. " - "I can only account for what occurs on this base."); + "I can only account for what occurs on this base.\n"); } else if (empty == 1) { s = sdsnew( "Hi Sam, this instance is empty or is using very little memory, " "my issues detector can't be used in these conditions. " "Please, leave for your mission on Earth and fill it with some data. " "The new Sam and I will be back to our programming as soon as I " - "finished rebooting."); + "finished rebooting.\n"); } else { s = sdsnew("Sam, I detected a few issues in this Redis instance memory implants:\n\n"); if (big_peak) { From 31624d82af6f88fb4dd6d0230af1830cd847b2cd Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Thu, 8 Dec 2016 14:27:21 +0100 Subject: [PATCH 0218/1722] Reset the ttl for additional keys Before, if a previous key had a TTL set but the current one didn't, the TTL was reused and thus resulted in wrong expirations set. This behaviour was experienced, when `MigrateDefaultPipeline` in redis-trib was set to >1 Fixes #3655 --- src/cluster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cluster.c b/src/cluster.c index bedf5f81e..4f3b38e99 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4841,6 +4841,7 @@ try_again: /* Create RESTORE payload and generate the protocol to call the command. */ for (j = 0; j < num_keys; j++) { + ttl = 0; expireat = getExpire(c->db,kv[j]); if (expireat != -1) { ttl = expireat-mstime(); From e4ec8f9b98bf1ba406e602a1b6e58d336e03faa6 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 12 Dec 2016 20:18:40 +0200 Subject: [PATCH 0219/1722] Fix redis-cli rare crash. This happens if the server (mysteriously) returns an unexpected response to the COMMAND command. --- src/redis-cli.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 85ca404fb..ac4358220 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -275,6 +275,10 @@ static void cliIntegrateHelp(void) { * don't already match what we have. */ for (size_t j = 0; j < reply->elements; j++) { redisReply *entry = reply->element[j]; + if (entry->type != REDIS_REPLY_ARRAY || entry->elements < 4 || + entry->element[0]->type != REDIS_REPLY_STRING || + entry->element[1]->type != REDIS_REPLY_INTEGER || + entry->element[3]->type != REDIS_REPLY_INTEGER) return; char *cmdname = entry->element[0]->str; int i; From a8a74bb8a53bb8aedffeee0b2a0ea8ddd610861b Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Dec 2016 10:20:06 +0100 Subject: [PATCH 0220/1722] Replication: fix the infamous key leakage of writable slaves + EXPIRE. BACKGROUND AND USE CASEj Redis slaves are normally write only, however the supprot a "writable" mode which is very handy when scaling reads on slaves, that actually need write operations in order to access data. For instance imagine having slaves replicating certain Sets keys from the master. When accessing the data on the slave, we want to peform intersections between such Sets values. However we don't want to intersect each time: to cache the intersection for some time often is a good idea. To do so, it is possible to setup a slave as a writable slave, and perform the intersection on the slave side, perhaps setting a TTL on the resulting key so that it will expire after some time. THE BUG Problem: in order to have a consistent replication, expiring of keys in Redis replication is up to the master, that synthesize DEL operations to send in the replication stream. However slaves logically expire keys by hiding them from read attempts from clients so that if the master did not promptly sent a DEL, the client still see logically expired keys as non existing. Because slaves don't actively expire keys by actually evicting them but just masking from the POV of read operations, if a key is created in a writable slave, and an expire is set, the key will be leaked forever: 1. No DEL will be received from the master, which does not know about such a key at all. 2. No eviction will be performed by the slave, since it needs to disable eviction because it's up to masters, otherwise consistency of data is lost. THE FIX In order to fix the problem, the slave should be able to tag keys that were created in the slave side and have an expire set in some way. My solution involved using an unique additional dictionary created by the writable slave only if needed. The dictionary is obviously keyed by the key name that we need to track: all the keys that are set with an expire directly by a client writing to the slave are tracked. The value in the dictionary is a bitmap of all the DBs where such a key name need to be tracked, so that we can use a single dictionary to track keys in all the DBs used by the slave (actually this limits the solution to the first 64 DBs, but the default with Redis is to use 16 DBs). This solution allows to pay both a small complexity and CPU penalty, which is zero when the feature is not used, actually. The slave-side eviction is encapsulated in code which is not coupled with the rest of the Redis core, if not for the hook to track the keys. TODO I'm doing the first smoke tests to see if the feature works as expected: so far so good. Unit tests should be added before merging into the 4.0 branch. --- src/cluster.c | 2 +- src/db.c | 19 +++++-- src/expire.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++- src/module.c | 2 +- src/rdb.c | 2 +- src/server.c | 5 +- src/server.h | 5 +- src/t_string.c | 2 +- 8 files changed, 161 insertions(+), 11 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index bedf5f81e..0f4fd20e4 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4617,7 +4617,7 @@ void restoreCommand(client *c) { /* Create the key and set the TTL if any */ dbAdd(c->db,c->argv[1],obj); - if (ttl) setExpire(c->db,c->argv[1],mstime()+ttl); + if (ttl) setExpire(c,c->db,c->argv[1],mstime()+ttl); signalModifiedKey(c->db,c->argv[1]); addReply(c,shared.ok); server.dirty++; diff --git a/src/db.c b/src/db.c index 55ae663c2..90a75fcfe 100644 --- a/src/db.c +++ b/src/db.c @@ -190,7 +190,9 @@ void dbOverwrite(redisDb *db, robj *key, robj *val) { * * 1) The ref count of the value object is incremented. * 2) clients WATCHing for the destination key notified. - * 3) The expire time of the key is reset (the key is made persistent). */ + * 3) The expire time of the key is reset (the key is made persistent). + * + * All the new keys in the database should be craeted via this interface. */ void setKey(redisDb *db, robj *key, robj *val) { if (lookupKeyWrite(db,key) == NULL) { dbAdd(db,key,val); @@ -330,6 +332,7 @@ long long emptyDb(int dbnum, int flags, void(callback)(void*)) { slotToKeyFlush(); } } + if (dbnum == -1) flushSlaveKeysWithExpireList(); return removed; } @@ -851,7 +854,7 @@ void renameGenericCommand(client *c, int nx) { dbDelete(c->db,c->argv[2]); } dbAdd(c->db,c->argv[2],o); - if (expire != -1) setExpire(c->db,c->argv[2],expire); + if (expire != -1) setExpire(c,c->db,c->argv[2],expire); dbDelete(c->db,c->argv[1]); signalModifiedKey(c->db,c->argv[1]); signalModifiedKey(c->db,c->argv[2]); @@ -917,7 +920,7 @@ void moveCommand(client *c) { return; } dbAdd(dst,c->argv[1],o); - if (expire != -1) setExpire(dst,c->argv[1],expire); + if (expire != -1) setExpire(c,dst,c->argv[1],expire); incrRefCount(o); /* OK! key moved, free the entry in the source DB */ @@ -1022,7 +1025,11 @@ int removeExpire(redisDb *db, robj *key) { return dictDelete(db->expires,key->ptr) == DICT_OK; } -void setExpire(redisDb *db, robj *key, long long when) { +/* Set an expire to the specified key. If the expire is set in the context + * of an user calling a command 'c' is the client, otherwise 'c' is set + * to NULL. The 'when' parameter is the absolute unix time in milliseconds + * after which the key will no longer be considered valid. */ +void setExpire(client *c, redisDb *db, robj *key, long long when) { dictEntry *kde, *de; /* Reuse the sds from the main dict in the expire dict */ @@ -1030,6 +1037,10 @@ void setExpire(redisDb *db, robj *key, long long when) { serverAssertWithInfo(NULL,key,kde != NULL); de = dictAddOrFind(db->expires,dictGetKey(kde)); dictSetSignedIntegerVal(de,when); + + int writable_slave = server.masterhost && server.repl_slave_ro == 0; + if (c && writable_slave && !(c->flags & CLIENT_MASTER)) + rememberSlaveKeyWithExpire(db,key); } /* Return the expire time of the specified key, or -1 if no expire diff --git a/src/expire.c b/src/expire.c index ccfa959ef..4dd51cfbc 100644 --- a/src/expire.c +++ b/src/expire.c @@ -216,6 +216,139 @@ void activeExpireCycle(int type) { } } +/*----------------------------------------------------------------------------- + * Expires of keys crated in writable slaves + * + * Normally slaves do not process expires: they wait the masters to synthesize + * DEL operations in order to retain consistency. However writable slaves are + * an exception: if a key is created in the slave and an expire is assigned + * to it, we need a way to expire such a key, since the master does not know + * anything about such a key. + * + * In order to do so, we track keys created in the slave side with an expire + * set, and call the expireSlaveKeys() function from time to time in order to + * reclaim the keys if they already expired. + * + * Note that the use case we are trying to cover here, is a popular one where + * slaves are put in writable mode in order to compute slow operations in + * the slave side that are mostly useful to actually read data in a more + * processed way. Think at sets intersections in a tmp key, with an expire so + * that it is also used as a cache to avoid intersecting every time. + * + * This implementation is currently not perfect but a lot better than leaking + * the keys as implemented in 3.2. + *----------------------------------------------------------------------------*/ + +/* The dictionary where we remember key names and database ID of keys we may + * want to expire from the slave. Since this function is not often used we + * don't even care to initialize the database at startup. We'll do it once + * the feature is used the first time, that is, when rememberSlaveKeyWithExpire() + * is called. + * + * The dictionary has an SDS string representing the key as the hash table + * key, while the value is a 64 bit unsigned integer with the bits corresponding + * to the DB where the keys may exist set to 1. Currently the keys created + * with a DB id > 63 are not expired, but a trivial fix is to set the bitmap + * to the max 64 bit unsigned value when we know there is a key with a DB + * ID greater than 63, and check all the configured DBs in such a case. */ +dict *slaveKeysWithExpire = NULL; + +/* Check the set of keys created by the master with an expire set in order to + * check if they should be evicted. */ +void expireSlaveKeys(void) { + if (slaveKeysWithExpire == NULL || + dictSize(slaveKeysWithExpire) == 0) return; + + int cycles = 0, noexpire = 0; + mstime_t start = mstime(); + while(1) { + dictEntry *de = dictGetRandomKey(slaveKeysWithExpire); + sds keyname = dictGetKey(de); + uint64_t dbids = dictGetUnsignedIntegerVal(de); + uint64_t new_dbids = 0; + + /* Check the key against every database corresponding to the + * bits set in the value bitmap. */ + int dbid = 0; + while(dbids && dbid < server.dbnum) { + if ((dbids & 1) != 0) { + redisDb *db = server.db+dbid; + dictEntry *expire = dictFind(db->expires,keyname); + int expired = 0; + + if (expire && + activeExpireCycleTryExpire(server.db+dbid,expire,start)) + { + expired = 1; + } + + /* If the key was not expired in this DB, we need to set the + * corresponding bit in the new bitmap we set as value. + * At the end of the loop if the bitmap is zero, it means we + * no longer need to keep track of this key. */ + if (expire && !expired) { + noexpire++; + new_dbids |= (uint64_t)1 << dbid; + } + } + dbid++; + dbids >>= 1; + } + + /* Set the new bitmap as value of the key, in the dictionary + * of keys with an expire set directly in the writable slave. Otherwise + * if the bitmap is zero, we no longer need to keep track of it. */ + if (new_dbids) + dictSetUnsignedIntegerVal(de,new_dbids); + else + dictDelete(slaveKeysWithExpire,keyname); + + /* Stop conditions: found 3 keys we cna't expire in a row or + * time limit was reached. */ + cycles++; + if (noexpire > 3) break; + if ((cycles % 64) == 0 && mstime()-start > 1) break; + if (dictSize(slaveKeysWithExpire) == 0) break; + } +} + +/* Track keys that received an EXPIRE or similar command in the context + * of a writable slave. */ +void rememberSlaveKeyWithExpire(redisDb *db, robj *key) { + if (slaveKeysWithExpire == NULL) + slaveKeysWithExpire = dictCreate(&keyptrDictType,NULL); + if (db->id > 63) return; + + dictEntry *de = dictAddOrFind(slaveKeysWithExpire,key->ptr); + /* If the entry was just created, set it to a copy of the SDS string + * representing the key: we don't want to need to take those keys + * in sync with the main DB. The keys will be removed by expireSlaveKeys() + * as it scans to find keys to remove. */ + if (de->key == key->ptr) { + de->key = sdsdup(key->ptr); + dictSetUnsignedIntegerVal(de,0); + } + + uint64_t dbids = dictGetUnsignedIntegerVal(de); + dbids |= (uint64_t)1 << db->id; + dictSetUnsignedIntegerVal(de,dbids); +} + +/* Remove the keys in the hash table. We need to do that when data is + * flushed from the server. We may receive new keys from the master with + * the same name/db and it is no longer a good idea to expire them. + * + * Note: technically we should handle the case of a single DB being flushed + * but it is not worth it since anyway race conditions using the same set + * of key names in a wriatable slave and in its master will lead to + * inconsistencies. This is just a best-effort thing we do. */ +void flushSlaveKeysWithExpireList(void) { + if (slaveKeysWithExpire) { + dictRelease(slaveKeysWithExpire); + slaveKeysWithExpire = NULL; + } +} + /*----------------------------------------------------------------------------- * Expires Commands *----------------------------------------------------------------------------*/ @@ -265,7 +398,7 @@ void expireGenericCommand(client *c, long long basetime, int unit) { addReply(c, shared.cone); return; } else { - setExpire(c->db,key,when); + setExpire(c,c->db,key,when); addReply(c,shared.cone); signalModifiedKey(c->db,key); notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id); diff --git a/src/module.c b/src/module.c index 52b15fa34..5f85bf311 100644 --- a/src/module.c +++ b/src/module.c @@ -1342,7 +1342,7 @@ int RM_SetExpire(RedisModuleKey *key, mstime_t expire) { return REDISMODULE_ERR; if (expire != REDISMODULE_NO_EXPIRE) { expire += mstime(); - setExpire(key->db,key->key,expire); + setExpire(key->ctx->client,key->db,key->key,expire); } else { removeExpire(key->db,key->key); } diff --git a/src/rdb.c b/src/rdb.c index b81d0808a..2689b172d 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1552,7 +1552,7 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi) { dbAdd(db,key,val); /* Set the expire time if needed */ - if (expiretime != -1) setExpire(db,key,expiretime); + if (expiretime != -1) setExpire(NULL,db,key,expiretime); decrRefCount(key); } diff --git a/src/server.c b/src/server.c index 309516683..4d204027c 100644 --- a/src/server.c +++ b/src/server.c @@ -870,8 +870,11 @@ void clientsCron(void) { void databasesCron(void) { /* Expire keys by random sampling. Not required for slaves * as master will synthesize DELs for us. */ - if (server.active_expire_enabled && server.masterhost == NULL) + if (server.active_expire_enabled && server.masterhost == NULL) { activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); + } else if (server.masterhost != NULL) { + expireSlaveKeys(); + } /* Perform hash tables rehashing if needed, but only if there are no * other processes saving the DB on disk. Otherwise rehashing is bad diff --git a/src/server.h b/src/server.h index 1ad862003..0bd344e9d 100644 --- a/src/server.h +++ b/src/server.h @@ -1648,7 +1648,7 @@ int removeExpire(redisDb *db, robj *key); void propagateExpire(redisDb *db, robj *key, int lazy); int expireIfNeeded(redisDb *db, robj *key); long long getExpire(redisDb *db, robj *key); -void setExpire(redisDb *db, robj *key, long long when); +void setExpire(client *c, redisDb *db, robj *key, long long when); robj *lookupKey(redisDb *db, robj *key, int flags); robj *lookupKeyRead(redisDb *db, robj *key); robj *lookupKeyWrite(redisDb *db, robj *key); @@ -1731,6 +1731,9 @@ void disconnectAllBlockedClients(void); /* expire.c -- Handling of expired keys */ void activeExpireCycle(int type); +void expireSlaveKeys(void); +void rememberSlaveKeyWithExpire(redisDb *db, robj *key); +void flushSlaveKeysWithExpireList(void); /* evict.c -- maxmemory handling and LRU eviction. */ void evictionPoolAlloc(void); diff --git a/src/t_string.c b/src/t_string.c index 8c737c4e3..75375f446 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -85,7 +85,7 @@ void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, } setKey(c->db,key,val); server.dirty++; - if (expire) setExpire(c->db,key,mstime()+milliseconds); + if (expire) setExpire(c,c->db,key,mstime()+milliseconds); notifyKeyspaceEvent(NOTIFY_STRING,"set",key,c->db->id); if (expire) notifyKeyspaceEvent(NOTIFY_GENERIC, "expire",key,c->db->id); From 1ae5a1326665a6cabac28f3a9e5bfb8aaaecec1f Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Dec 2016 12:21:15 +0100 Subject: [PATCH 0221/1722] Fix created->created typo in expire.c --- src/expire.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expire.c b/src/expire.c index 4dd51cfbc..f590237b7 100644 --- a/src/expire.c +++ b/src/expire.c @@ -217,7 +217,7 @@ void activeExpireCycle(int type) { } /*----------------------------------------------------------------------------- - * Expires of keys crated in writable slaves + * Expires of keys created in writable slaves * * Normally slaves do not process expires: they wait the masters to synthesize * DEL operations in order to retain consistency. However writable slaves are From 3f302b9ecfa5993cb93d61e4fa5a72380aefc13a Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Dec 2016 16:02:29 +0100 Subject: [PATCH 0222/1722] INFO: show num of slave-expires keys tracked. --- src/expire.c | 6 ++++++ src/server.c | 6 ++++-- src/server.h | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/expire.c b/src/expire.c index f590237b7..b05bf9f14 100644 --- a/src/expire.c +++ b/src/expire.c @@ -334,6 +334,12 @@ void rememberSlaveKeyWithExpire(redisDb *db, robj *key) { dictSetUnsignedIntegerVal(de,dbids); } +/* Return the number of keys we are tracking. */ +size_t getSlaveKeyWithExpireCount(void) { + if (slaveKeysWithExpire == NULL) return 0; + return dictSize(slaveKeysWithExpire); +} + /* Remove the keys in the hash table. We need to do that when data is * flushed from the server. We may receive new keys from the master with * the same name/db and it is no longer a good idea to expire them. diff --git a/src/server.c b/src/server.c index 4d204027c..0dc62c2b0 100644 --- a/src/server.c +++ b/src/server.c @@ -3008,7 +3008,8 @@ sds genRedisInfoString(char *section) { "pubsub_channels:%ld\r\n" "pubsub_patterns:%lu\r\n" "latest_fork_usec:%lld\r\n" - "migrate_cached_sockets:%ld\r\n", + "migrate_cached_sockets:%ld\r\n" + "slave_expires_tracked_keys:%zu\r\n", server.stat_numconnections, server.stat_numcommands, getInstantaneousMetric(STATS_METRIC_COMMAND), @@ -3027,7 +3028,8 @@ sds genRedisInfoString(char *section) { dictSize(server.pubsub_channels), listLength(server.pubsub_patterns), server.stat_fork_time, - dictSize(server.migrate_cached_sockets)); + dictSize(server.migrate_cached_sockets), + getSlaveKeyWithExpireCount()); } /* Replication */ diff --git a/src/server.h b/src/server.h index 0bd344e9d..5d3be585a 100644 --- a/src/server.h +++ b/src/server.h @@ -1734,6 +1734,7 @@ void activeExpireCycle(int type); void expireSlaveKeys(void); void rememberSlaveKeyWithExpire(redisDb *db, robj *key); void flushSlaveKeysWithExpireList(void); +size_t getSlaveKeyWithExpireCount(void); /* evict.c -- maxmemory handling and LRU eviction. */ void evictionPoolAlloc(void); From 03119a0831010de89a934d8a408d39454bb3e478 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Dec 2016 16:27:13 +0100 Subject: [PATCH 0223/1722] Writable slaves expires: fix leak in key tracking. We need to use a dictionary type that frees the key, since we copy the keys in the dictionary we use to track expires created in the slave side. --- src/expire.c | 13 +++++++++++-- src/server.h | 5 +++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/expire.c b/src/expire.c index b05bf9f14..637139f63 100644 --- a/src/expire.c +++ b/src/expire.c @@ -315,8 +315,17 @@ void expireSlaveKeys(void) { /* Track keys that received an EXPIRE or similar command in the context * of a writable slave. */ void rememberSlaveKeyWithExpire(redisDb *db, robj *key) { - if (slaveKeysWithExpire == NULL) - slaveKeysWithExpire = dictCreate(&keyptrDictType,NULL); + if (slaveKeysWithExpire == NULL) { + static dictType dt = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL /* val destructor */ + }; + slaveKeysWithExpire = dictCreate(&dt,NULL); + } if (db->id > 63) return; dictEntry *de = dictAddOrFind(slaveKeysWithExpire,key->ptr); diff --git a/src/server.h b/src/server.h index 5d3be585a..2a61ea419 100644 --- a/src/server.h +++ b/src/server.h @@ -1742,6 +1742,11 @@ void evictionPoolAlloc(void); unsigned long LFUGetTimeInMinutes(void); uint8_t LFULogIncr(uint8_t value); +/* Keys hashing / comparison functions for dict.c hash tables. */ +unsigned int dictSdsHash(const void *key); +int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2); +void dictSdsDestructor(void *privdata, void *val); + /* Git SHA1 */ char *redisGitSHA1(void); char *redisGitDirty(void); From 403ed3368a4e78f57f0713e771dace3727a324f2 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Dec 2016 16:28:12 +0100 Subject: [PATCH 0224/1722] Writable slaves expires: unit test. --- tests/integration/replication-3.tcl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/integration/replication-3.tcl b/tests/integration/replication-3.tcl index 0fcbad45b..50dcb9a9a 100644 --- a/tests/integration/replication-3.tcl +++ b/tests/integration/replication-3.tcl @@ -30,6 +30,18 @@ start_server {tags {"repl"}} { } assert_equal [r debug digest] [r -1 debug digest] } + + test {Slave is able to evict keys created in writable slaves} { + r -1 select 5 + assert {[r -1 dbsize] == 0} + r -1 config set slave-read-only no + r -1 set key1 1 ex 5 + r -1 set key2 2 ex 5 + r -1 set key3 3 ex 5 + assert {[r -1 dbsize] == 3} + after 6000 + r -1 dbsize + } {0} } } From 768220fcdc0b403367b8d6d9291feb7e50359ee1 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Dec 2016 12:42:04 +0100 Subject: [PATCH 0225/1722] MIGRATE: Remove upfront ttl initialization. After the fix for #3673 the ttl var is always initialized inside the loop itself, so the early initialization is not needed. Variables declaration also moved to a more local scope. --- src/cluster.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index b14f7d0d2..ddd9937c6 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4750,7 +4750,6 @@ void migrateCommand(client *c) { int copy, replace, j; long timeout; long dbid; - long long ttl, expireat; robj **ov = NULL; /* Objects to migrate. */ robj **kv = NULL; /* Key names. */ robj **newargv = NULL; /* Used to rewrite the command as DEL ... keys ... */ @@ -4765,7 +4764,6 @@ void migrateCommand(client *c) { /* Initialization */ copy = 0; replace = 0; - ttl = 0; /* Parse additional options */ for (j = 6; j < c->argc; j++) { @@ -4841,8 +4839,9 @@ try_again: /* Create RESTORE payload and generate the protocol to call the command. */ for (j = 0; j < num_keys; j++) { - ttl = 0; - expireat = getExpire(c->db,kv[j]); + long long ttl = 0; + long long expireat = getExpire(c->db,kv[j]); + if (expireat != -1) { ttl = expireat-mstime(); if (ttl < 1) ttl = 1; From d8f4c209abec17bf4bff1ef19255b57f8decfe6b Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Thu, 15 Dec 2016 00:07:20 +0200 Subject: [PATCH 0226/1722] fixed stop condition in RM_ZsetRangeNext and RM_ZsetRangePrev --- src/module.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/module.c b/src/module.c index 5f85bf311..a5b3d52ae 100644 --- a/src/module.c +++ b/src/module.c @@ -1858,12 +1858,12 @@ int RM_ZsetRangeNext(RedisModuleKey *key) { } else { /* Are we still within the range? */ if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE && - !zslValueLteMax(ln->score,&key->zrs)) + !zslValueLteMax(next->score,&key->zrs)) { key->zer = 1; return 0; } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) { - if (!zslLexValueLteMax(ln->ele,&key->zlrs)) { + if (!zslLexValueLteMax(next->ele,&key->zlrs)) { key->zer = 1; return 0; } @@ -1921,7 +1921,7 @@ int RM_ZsetRangePrev(RedisModuleKey *key) { } else { /* Are we still within the range? */ if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE && - !zslValueGteMin(ln->score,&key->zrs)) + !zslValueGteMin(prev->score,&key->zrs)) { key->zer = 1; return 0; From d02963eec95fcdff37c71535706e8ee24844cde3 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Dec 2016 09:02:50 +0100 Subject: [PATCH 0227/1722] DEBUG: new "ziplist" subcommand added. Dumps a ziplist on stdout. The commit improves ziplistRepr() and adds a new debugging subcommand so that we can trigger the dump directly from the Redis API. This command capability was used while investigating issue #3684. --- src/debug.c | 14 ++++++++++++++ src/server.h | 2 ++ src/ziplist.c | 31 ++++++++++++++++++------------- src/ziplist.h | 1 + 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/debug.c b/src/debug.c index f4689d532..b8ad4e511 100644 --- a/src/debug.c +++ b/src/debug.c @@ -280,6 +280,8 @@ void debugCommand(client *c) { blen++; addReplyStatus(c, "sdslen -- Show low level SDS string info representing key and value."); blen++; addReplyStatus(c, + "ziplist -- Show low level info about the ziplist encoding."); + blen++; addReplyStatus(c, "populate [prefix] -- Create string keys named key:. If a prefix is specified is used instead of the 'key' prefix."); blen++; addReplyStatus(c, "digest -- Outputs an hex signature representing the current DB content."); @@ -418,6 +420,18 @@ void debugCommand(client *c) { (long long) sdsavail(val->ptr), (long long) getStringObjectSdsUsedMemory(val)); } + } else if (!strcasecmp(c->argv[1]->ptr,"ziplist") && c->argc == 3) { + robj *o; + + if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr)) + == NULL) return; + + if (o->encoding != OBJ_ENCODING_ZIPLIST) { + addReplyError(c,"Not an sds encoded string."); + } else { + ziplistRepr(o->ptr); + addReplyStatus(c,"Ziplist structure printed on stdout"); + } } else if (!strcasecmp(c->argv[1]->ptr,"populate") && (c->argc == 3 || c->argc == 4)) { long keys, j; diff --git a/src/server.h b/src/server.h index 2a61ea419..7ff151de8 100644 --- a/src/server.h +++ b/src/server.h @@ -1655,6 +1655,8 @@ robj *lookupKeyWrite(redisDb *db, robj *key); robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply); robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply); robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags); +robj *objectCommandLookup(client *c, robj *key); +robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply); #define LOOKUP_NONE 0 #define LOOKUP_NOTOUCH (1<<0) void dbAdd(redisDb *db, robj *key, robj *val); diff --git a/src/ziplist.c b/src/ziplist.c index 7428d30e9..f7d7877f7 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -1029,7 +1029,7 @@ void ziplistRepr(unsigned char *zl) { printf( "{total bytes %d} " - "{length %u}\n" + "{num entries %u}\n" "{tail offset %u}\n", intrev32ifbe(ZIPLIST_BYTES(zl)), intrev16ifbe(ZIPLIST_LENGTH(zl)), @@ -1038,16 +1038,15 @@ void ziplistRepr(unsigned char *zl) { while(*p != ZIP_END) { zipEntry(p, &entry); printf( - "{" - "addr 0x%08lx, " - "index %2d, " - "offset %5ld, " - "rl: %5u, " - "hs %2u, " - "pl: %5u, " - "pls: %2u, " - "payload %5u" - "} ", + "{\n" + "\taddr 0x%08lx,\n" + "\tindex %2d,\n" + "\toffset %5ld,\n" + "\thdr+entry len: %5u,\n" + "\thdr len%2u,\n" + "\tprevrawlen: %5u,\n" + "\tprevrawlensize: %2u,\n" + "\tpayload %5u\n", (long unsigned)p, index, (unsigned long) (p-zl), @@ -1056,8 +1055,14 @@ void ziplistRepr(unsigned char *zl) { entry.prevrawlen, entry.prevrawlensize, entry.len); + printf("\tbytes: "); + for (unsigned int i = 0; i < entry.headersize+entry.len; i++) { + printf("%02x|",p[i]); + } + printf("\n"); p += entry.headersize; if (ZIP_IS_STR(entry.encoding)) { + printf("\t[str]"); if (entry.len > 40) { if (fwrite(p,40,1,stdout) == 0) perror("fwrite"); printf("..."); @@ -1066,9 +1071,9 @@ void ziplistRepr(unsigned char *zl) { fwrite(p,entry.len,1,stdout) == 0) perror("fwrite"); } } else { - printf("%lld", (long long) zipLoadInteger(p,entry.encoding)); + printf("\t[int]%lld", (long long) zipLoadInteger(p,entry.encoding)); } - printf("\n"); + printf("\n}\n"); p += entry.len; index++; } diff --git a/src/ziplist.h b/src/ziplist.h index ae96823f9..964a47f6d 100644 --- a/src/ziplist.h +++ b/src/ziplist.h @@ -48,6 +48,7 @@ unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int sle unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip); unsigned int ziplistLen(unsigned char *zl); size_t ziplistBlobLen(unsigned char *zl); +void ziplistRepr(unsigned char *zl); #ifdef REDIS_TEST int ziplistTest(int argc, char *argv[]); From 044068eb7f71e4faa271c8e95f85712d8962a354 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Dec 2016 09:04:57 +0100 Subject: [PATCH 0228/1722] ziplist.c explanation of format improved a bit. --- src/ziplist.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/ziplist.c b/src/ziplist.c index f7d7877f7..684f8ccf8 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -8,24 +8,31 @@ * * ---------------------------------------------------------------------------- * - * ZIPLIST OVERALL LAYOUT: - * The general layout of the ziplist is as follows: - * + * ZIPLIST OVERALL LAYOUT * - * is an unsigned integer to hold the number of bytes that the - * ziplist occupies. This value needs to be stored to be able to resize the + * The general layout of the ziplist is as follows: + * + * ... + * + * All fields are stored in little endian. + * + * is an unsigned integer to hold the number of bytes that + * the ziplist occupies. This value needs to be stored to be able to resize the * entire structure without the need to traverse it first. * - * is the offset to the last entry in the list. This allows a pop - * operation on the far side of the list without the need for full traversal. + * is the offset to the last entry in the list. This allows + * a pop operation on the far side of the list without the need for full + * traversal. * - * is the number of entries.When this value is larger than 2**16-2, - * we need to traverse the entire list to know how many items it holds. + * is the number of entries. When this value is larger + * than 2^16-2, we need to traverse the entire list to know how many items it + * holds. * - * is a single byte special value, equal to 255, which indicates the - * end of the list. + * is a single byte special value, equal to 255, which + * indicates the end of the list. + * + * ZIPLIST ENTRIES * - * ZIPLIST ENTRIES: * Every entry in the ziplist is prefixed by a header that contains two pieces * of information. First, the length of the previous entry is stored to be * able to traverse the list from back to front. Second, the encoding with an From c1545d2f236be821bf3d64ccc39870bd0c7ebe8e Mon Sep 17 00:00:00 2001 From: Harish Murthy Date: Fri, 9 Dec 2016 14:28:19 +0530 Subject: [PATCH 0229/1722] LogLog-Beta Algorithm support within HLL Config option to use LogLog-Beta Algorithm for Cardinality --- src/config.c | 11 +++++++- src/hyperloglog.c | 66 ++++++++++++++++++++++++++++++----------------- src/server.c | 1 + src/server.h | 2 ++ 4 files changed, 55 insertions(+), 25 deletions(-) diff --git a/src/config.c b/src/config.c index 8f3b81a19..6f4559e6c 100644 --- a/src/config.c +++ b/src/config.c @@ -688,6 +688,10 @@ void loadServerConfigFromString(char *config) { err = sentinelHandleConfiguration(argv+1,argc-1); if (err) goto loaderr; } + } else if (!strcasecmp(argv[0],"hll-use-loglogbeta") && argc == 2) { + if ((server.hll_use_loglogbeta = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else { err = "Bad directive or wrong number of arguments"; goto loaderr; } @@ -980,7 +984,9 @@ void configSetCommand(client *c) { } config_set_bool_field( "slave-lazy-flush",server.repl_slave_lazy_flush) { } config_set_bool_field( - "no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) { + "no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) { + } config_set_bool_field( + "hll-use-loglogbeta",server.hll_use_loglogbeta) { /* Numerical fields. * config_set_numerical_field(name,var,min,max) */ @@ -1245,6 +1251,8 @@ void configGetCommand(client *c) { server.lazyfree_lazy_server_del); config_get_bool_field("slave-lazy-flush", server.repl_slave_lazy_flush); + config_get_bool_field("hll-use-loglogbeta", + server.hll_use_loglogbeta); /* Enum values */ config_get_enum_field("maxmemory-policy", @@ -1963,6 +1971,7 @@ int rewriteConfig(char *path) { rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE); rewriteConfigYesNoOption(state,"lazyfree-lazy-server-del",server.lazyfree_lazy_server_del,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL); rewriteConfigYesNoOption(state,"slave-lazy-flush",server.repl_slave_lazy_flush,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH); + rewriteConfigYesNoOption(state,"hll-use-loglogbeta",server.hll_use_loglogbeta,CONFIG_DEFAULT_HLL_USE_LOGLOGBETA); /* Rewrite Sentinel config if in Sentinel mode. */ if (server.sentinel_mode) rewriteConfigSentinelOption(state); diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 8ccc16be2..67a928729 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -993,33 +993,51 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { } else { serverPanic("Unknown HyperLogLog encoding in hllCount()"); } + + if(server.hll_use_loglogbeta) { + /* For loglog-beta there is a single formula to compute + * cardinality for the enture range + */ - /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ - E = (1/E)*alpha*m*m; + double zl = log(ez + 1); + double beta = -0.370393911*ez + + 0.070471823*zl + + 0.17393686*pow(zl,2) + + 0.16339839*pow(zl,3) + + -0.09237745*pow(zl,4) + + 0.03738027*pow(zl,5) + + -0.005384159*pow(zl,6) + + 0.00042419*pow(zl,7); + + E = alpha*m*(m-ez)*(1/(E+beta)); + } else { + /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ + E = (1/E)*alpha*m*m; - /* Use the LINEARCOUNTING algorithm for small cardinalities. - * For larger values but up to 72000 HyperLogLog raw approximation is - * used since linear counting error starts to increase. However HyperLogLog - * shows a strong bias in the range 2.5*16384 - 72000, so we try to - * compensate for it. */ - if (E < m*2.5 && ez != 0) { - E = m*log(m/ez); /* LINEARCOUNTING() */ - } else if (m == 16384 && E < 72000) { - /* We did polynomial regression of the bias for this range, this - * way we can compute the bias for a given cardinality and correct - * according to it. Only apply the correction for P=14 that's what - * we use and the value the correction was verified with. */ - double bias = 5.9119*1.0e-18*(E*E*E*E) - -1.4253*1.0e-12*(E*E*E)+ - 1.2940*1.0e-7*(E*E) - -5.2921*1.0e-3*E+ - 83.3216; - E -= E*(bias/100); + /* Use the LINEARCOUNTING algorithm for small cardinalities. + * For larger values but up to 72000 HyperLogLog raw approximation is + * used since linear counting error starts to increase. However HyperLogLog + * shows a strong bias in the range 2.5*16384 - 72000, so we try to + * compensate for it. */ + if (E < m*2.5 && ez != 0) { + E = m*log(m/ez); /* LINEARCOUNTING() */ + } else if (m == 16384 && E < 72000) { + /* We did polynomial regression of the bias for this range, this + * way we can compute the bias for a given cardinality and correct + * according to it. Only apply the correction for P=14 that's what + * we use and the value the correction was verified with. */ + double bias = 5.9119*1.0e-18*(E*E*E*E) + -1.4253*1.0e-12*(E*E*E)+ + 1.2940*1.0e-7*(E*E) + -5.2921*1.0e-3*E+ + 83.3216; + E -= E*(bias/100); + } + /* We don't apply the correction for E > 1/30 of 2^32 since we use + * a 64 bit function and 6 bit counters. To apply the correction for + * 1/30 of 2^64 is not needed since it would require a huge set + * to approach such a value. */ } - /* We don't apply the correction for E > 1/30 of 2^32 since we use - * a 64 bit function and 6 bit counters. To apply the correction for - * 1/30 of 2^64 is not needed since it would require a huge set - * to approach such a value. */ return (uint64_t) E; } diff --git a/src/server.c b/src/server.c index 0dc62c2b0..f20497009 100644 --- a/src/server.c +++ b/src/server.c @@ -1400,6 +1400,7 @@ void initServerConfig(void) { server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION; server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE; server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL; + server.hll_use_loglogbeta = CONFIG_DEFAULT_HLL_USE_LOGLOGBETA; server.lruclock = getLRUClock(); resetServerSaveParams(); diff --git a/src/server.h b/src/server.h index 7ff151de8..07df986c6 100644 --- a/src/server.h +++ b/src/server.h @@ -151,6 +151,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0 +#define CONFIG_DEFAULT_HLL_USE_LOGLOGBETA 0 #define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */ #define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */ @@ -1149,6 +1150,7 @@ struct redisServer { int watchdog_period; /* Software watchdog period in ms. 0 = off */ /* System hardware info */ size_t system_memory_size; /* Total memory in system as reported by OS */ + int hll_use_loglogbeta; /* Use loglog-beta algorithm for HLL */ }; typedef struct pubsubPattern { From c3c1c53df3922f48772d87cc3c44d7681096aa98 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Dec 2016 10:56:40 +0100 Subject: [PATCH 0230/1722] Fix HLL gnuplot graph generator script for new redis-rb versions. The PFADD now takes an array and has mandatory two arguments. --- utils/hyperloglog/hll-gnuplot-graph.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/hyperloglog/hll-gnuplot-graph.rb b/utils/hyperloglog/hll-gnuplot-graph.rb index 745baddcf..6c7596d17 100644 --- a/utils/hyperloglog/hll-gnuplot-graph.rb +++ b/utils/hyperloglog/hll-gnuplot-graph.rb @@ -30,7 +30,7 @@ def run_experiment(r,seed,max,step) elements << ele i += 1 } - r.pfadd('hll',*elements) + r.pfadd('hll',elements) approx = r.pfcount('hll') err = approx-i rel_err = 100.to_f*err/i From be49812350100f3fcce9807f18faba0cb6af1c5f Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Dec 2016 10:58:19 +0100 Subject: [PATCH 0231/1722] Use llroundl() before converting loglog-beta output to integer. Otherwise for small cardinalities the algorithm will output something like, for example, 4.99 for a candinality of 5, that will be converted to 4 producing a huge error. --- src/hyperloglog.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 67a928729..6dde6298a 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -993,9 +993,9 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { } else { serverPanic("Unknown HyperLogLog encoding in hllCount()"); } - + if(server.hll_use_loglogbeta) { - /* For loglog-beta there is a single formula to compute + /* For loglog-beta there is a single formula to compute * cardinality for the enture range */ @@ -1008,8 +1008,8 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { 0.03738027*pow(zl,5) + -0.005384159*pow(zl,6) + 0.00042419*pow(zl,7); - - E = alpha*m*(m-ez)*(1/(E+beta)); + + E = llroundl(alpha*m*(m-ez)*(1/(E+beta))); } else { /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ E = (1/E)*alpha*m*m; From 6820b5cae110809b3a89571f0753a9cbf09368ee Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Dec 2016 11:05:10 +0100 Subject: [PATCH 0232/1722] Switch PFCOUNT to LogLog-Beta algorithm. The new algorithm provides the same speed with a smaller error for cardinalities in the range 0-100k. Before switching, the new and old algorithm behavior was studied in details in the context of issue #3677. You can find a few graphs and motivations there. --- src/config.c | 11 +-------- src/hyperloglog.c | 57 ++++++++++++----------------------------------- src/server.c | 1 - src/server.h | 2 -- 4 files changed, 15 insertions(+), 56 deletions(-) diff --git a/src/config.c b/src/config.c index 6f4559e6c..8f3b81a19 100644 --- a/src/config.c +++ b/src/config.c @@ -688,10 +688,6 @@ void loadServerConfigFromString(char *config) { err = sentinelHandleConfiguration(argv+1,argc-1); if (err) goto loaderr; } - } else if (!strcasecmp(argv[0],"hll-use-loglogbeta") && argc == 2) { - if ((server.hll_use_loglogbeta = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else { err = "Bad directive or wrong number of arguments"; goto loaderr; } @@ -984,9 +980,7 @@ void configSetCommand(client *c) { } config_set_bool_field( "slave-lazy-flush",server.repl_slave_lazy_flush) { } config_set_bool_field( - "no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) { - } config_set_bool_field( - "hll-use-loglogbeta",server.hll_use_loglogbeta) { + "no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) { /* Numerical fields. * config_set_numerical_field(name,var,min,max) */ @@ -1251,8 +1245,6 @@ void configGetCommand(client *c) { server.lazyfree_lazy_server_del); config_get_bool_field("slave-lazy-flush", server.repl_slave_lazy_flush); - config_get_bool_field("hll-use-loglogbeta", - server.hll_use_loglogbeta); /* Enum values */ config_get_enum_field("maxmemory-policy", @@ -1971,7 +1963,6 @@ int rewriteConfig(char *path) { rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE); rewriteConfigYesNoOption(state,"lazyfree-lazy-server-del",server.lazyfree_lazy_server_del,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL); rewriteConfigYesNoOption(state,"slave-lazy-flush",server.repl_slave_lazy_flush,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH); - rewriteConfigYesNoOption(state,"hll-use-loglogbeta",server.hll_use_loglogbeta,CONFIG_DEFAULT_HLL_USE_LOGLOGBETA); /* Rewrite Sentinel config if in Sentinel mode. */ if (server.sentinel_mode) rewriteConfigSentinelOption(state); diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 6dde6298a..0800bf59d 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -994,50 +994,21 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { serverPanic("Unknown HyperLogLog encoding in hllCount()"); } - if(server.hll_use_loglogbeta) { - /* For loglog-beta there is a single formula to compute - * cardinality for the enture range - */ + /* Apply loglog-beta to the raw estimate. See: + * "LogLog-Beta and More: A New Algorithm for Cardinality Estimation + * Based on LogLog Counting" Jason Qin, Denys Kim, Yumei Tung + * arXiv:1612.02284 */ + double zl = log(ez + 1); + double beta = -0.370393911*ez + + 0.070471823*zl + + 0.17393686*pow(zl,2) + + 0.16339839*pow(zl,3) + + -0.09237745*pow(zl,4) + + 0.03738027*pow(zl,5) + + -0.005384159*pow(zl,6) + + 0.00042419*pow(zl,7); - double zl = log(ez + 1); - double beta = -0.370393911*ez + - 0.070471823*zl + - 0.17393686*pow(zl,2) + - 0.16339839*pow(zl,3) + - -0.09237745*pow(zl,4) + - 0.03738027*pow(zl,5) + - -0.005384159*pow(zl,6) + - 0.00042419*pow(zl,7); - - E = llroundl(alpha*m*(m-ez)*(1/(E+beta))); - } else { - /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ - E = (1/E)*alpha*m*m; - - /* Use the LINEARCOUNTING algorithm for small cardinalities. - * For larger values but up to 72000 HyperLogLog raw approximation is - * used since linear counting error starts to increase. However HyperLogLog - * shows a strong bias in the range 2.5*16384 - 72000, so we try to - * compensate for it. */ - if (E < m*2.5 && ez != 0) { - E = m*log(m/ez); /* LINEARCOUNTING() */ - } else if (m == 16384 && E < 72000) { - /* We did polynomial regression of the bias for this range, this - * way we can compute the bias for a given cardinality and correct - * according to it. Only apply the correction for P=14 that's what - * we use and the value the correction was verified with. */ - double bias = 5.9119*1.0e-18*(E*E*E*E) - -1.4253*1.0e-12*(E*E*E)+ - 1.2940*1.0e-7*(E*E) - -5.2921*1.0e-3*E+ - 83.3216; - E -= E*(bias/100); - } - /* We don't apply the correction for E > 1/30 of 2^32 since we use - * a 64 bit function and 6 bit counters. To apply the correction for - * 1/30 of 2^64 is not needed since it would require a huge set - * to approach such a value. */ - } + E = llroundl(alpha*m*(m-ez)*(1/(E+beta))); return (uint64_t) E; } diff --git a/src/server.c b/src/server.c index f20497009..0dc62c2b0 100644 --- a/src/server.c +++ b/src/server.c @@ -1400,7 +1400,6 @@ void initServerConfig(void) { server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION; server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE; server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL; - server.hll_use_loglogbeta = CONFIG_DEFAULT_HLL_USE_LOGLOGBETA; server.lruclock = getLRUClock(); resetServerSaveParams(); diff --git a/src/server.h b/src/server.h index 07df986c6..7ff151de8 100644 --- a/src/server.h +++ b/src/server.h @@ -151,7 +151,6 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0 -#define CONFIG_DEFAULT_HLL_USE_LOGLOGBETA 0 #define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */ #define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */ @@ -1150,7 +1149,6 @@ struct redisServer { int watchdog_period; /* Software watchdog period in ms. 0 = off */ /* System hardware info */ size_t system_memory_size; /* Total memory in system as reported by OS */ - int hll_use_loglogbeta; /* Use loglog-beta algorithm for HLL */ }; typedef struct pubsubPattern { From 8bd0dbd8055ba499330fd7e739923bba45ea284c Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Dec 2016 17:48:33 +0100 Subject: [PATCH 0233/1722] Hopefully improve code comments for issue #3616. This commit also contains other changes in order to conform the code to the Redis core style, specifically 80 chars max per line, smart conditionals in the same line: if (that) do_this(); --- src/replication.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/replication.c b/src/replication.c index f26c142a9..df2e23f3a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1241,14 +1241,16 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { if (eof_reached) { int aof_is_enabled = server.aof_state != AOF_OFF; + if (rename(server.repl_transfer_tmpfile,server.rdb_filename) == -1) { serverLog(LL_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno)); cancelReplicationHandshake(); return; } serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Flushing old data"); - if(aof_is_enabled) /* we need to stop any AOFRW fork before flusing and parsing RDB, otherwise we'll create a CoW disaster */ - stopAppendOnly(); + /* We need to stop any AOFRW fork before flusing and parsing + * RDB, otherwise we'll create a copy-on-write disaster. */ + if(aof_is_enabled) stopAppendOnly(); signalFlushedDb(-1); emptyDb( -1, @@ -1264,8 +1266,9 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { if (rdbLoad(server.rdb_filename,&rsi) != C_OK) { serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); cancelReplicationHandshake(); - if (aof_is_enabled) /* re-enable so that on the next attempt, we can detect that AOF was enabled */ - restartAOF(); + /* Re-enable the AOF if we disabled it earlier, in order to restore + * the original configuration. */ + if (aof_is_enabled) restartAOF(); return; } /* Final setup of the connected slave <- master link */ @@ -1289,10 +1292,8 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { /* Restart the AOF subsystem now that we finished the sync. This * will trigger an AOF rewrite, and when done will start appending * to the new file. */ - if (aof_is_enabled) - restartAOF(); + if (aof_is_enabled) restartAOF(); } - return; error: From 0af9df2aa5b7b4ed2ed10474af8fdc5968c4c65b Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Dec 2016 08:53:25 +0100 Subject: [PATCH 0234/1722] adjustOpenFilesLimit() comment made hopefully more clear. --- src/server.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index 3d5e29bed..aa5b65862 100644 --- a/src/server.c +++ b/src/server.c @@ -1576,7 +1576,10 @@ void adjustOpenFilesLimit(void) { if (bestlimit < maxfiles) { unsigned int old_maxclients = server.maxclients; - server.maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS; /* NOTICE: server.maxclients is unsigned */ + server.maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS; + /* maxclients is unsigned so may overflow: in order + * to check if maxclients is now logically less than 1 + * we test indirectly via bestlimit. */ if (bestlimit <= CONFIG_MIN_RESERVED_FDS) { serverLog(LL_WARNING,"Your current 'ulimit -n' " "of %llu is not enough for the server to start. " From 03f1cc2023d3707b084c15530abc62174f150dd9 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Dec 2016 16:41:47 +0100 Subject: [PATCH 0235/1722] Only show Redis logo if logging to stdout / TTY. You can still force the logo in the normal logs. For motivations, check issue #3112. For me the reason is that actually the logo is nice to have in interactive sessions, but inside the logs kinda loses its usefulness, but for the ability of users to recognize restarts easily: for this reason the new startup sequence shows a one liner ASCII "wave" so that there is still a bit of visual clue. Startup logging was modified in order to log events in more obvious ways, and to log more events. Also certain important informations are now more easy to parse/grep since they are printed in field=value style. The option --always-show-logo in redis.conf was added, defaulting to no. --- redis.conf | 8 ++++++++ src/config.c | 4 ++++ src/server.c | 41 ++++++++++++++++++++++++++++++----------- src/server.h | 2 ++ 4 files changed, 44 insertions(+), 11 deletions(-) diff --git a/redis.conf b/redis.conf index adc09020c..648483c1f 100644 --- a/redis.conf +++ b/redis.conf @@ -185,6 +185,14 @@ logfile "" # dbid is a number between 0 and 'databases'-1 databases 16 +# By default Redis shows an ASCII art logo only when started to log to the +# standard output and if the standard output is a TTY. Basically this means +# that normally a logo is displayed only in interactive sessions. +# +# However it is possible to force the pre-4.0 behavior and always show a +# ASCII art logo in startup logs by setting the following option to yes. +always-show-logo yes + ################################ SNAPSHOTTING ################################ # # Save the DB on disk: diff --git a/src/config.c b/src/config.c index 8f3b81a19..54af5bfe0 100644 --- a/src/config.c +++ b/src/config.c @@ -283,6 +283,10 @@ void loadServerConfigFromString(char *config) { } fclose(logfp); } + } else if (!strcasecmp(argv[0],"always-show-logo") && argc == 2) { + if ((server.always_show_logo = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"syslog-enabled") && argc == 2) { if ((server.syslog_enabled = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; diff --git a/src/server.c b/src/server.c index aa5b65862..4390360c4 100644 --- a/src/server.c +++ b/src/server.c @@ -1400,6 +1400,7 @@ void initServerConfig(void) { server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION; server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE; server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL; + server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO; server.lruclock = getLRUClock(); resetServerSaveParams(); @@ -3335,15 +3336,18 @@ void redisAsciiArt(void) { else if (server.sentinel_mode) mode = "sentinel"; else mode = "standalone"; - if (server.syslog_enabled) { + /* Show the ASCII logo if: log file is stdout AND stdout is a + * tty AND syslog logging is disabled. Also show logo if the user + * forced us to do so via redis.conf. */ + int show_logo = ((!server.syslog_enabled && + server.logfile[0] == '\0' && + isatty(fileno(stdout))) || + server.always_show_logo); + + if (!show_logo) { serverLog(LL_NOTICE, - "Redis %s (%s/%d) %s bit, %s mode, port %d, pid %ld ready to start.", - REDIS_VERSION, - redisGitSHA1(), - strtol(redisGitDirty(),NULL,10) > 0, - (sizeof(long) == 8) ? "64" : "32", - mode, server.port, - (long) getpid() + "Running mode=%s, port=%d.", + mode, server.port ); } else { snprintf(buf,1024*16,ascii_logo, @@ -3700,8 +3704,23 @@ int main(int argc, char **argv) { resetServerSaveParams(); loadServerConfig(configfile,options); sdsfree(options); - } else { + } + +// serverLog(LL_WARNING, "_.~\"(_.~\"(_.~\"(_.~\"(_.~\"( Redis is starting..."); + serverLog(LL_WARNING, "oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo"); + + serverLog(LL_WARNING, + "Redis version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started", + REDIS_VERSION, + (sizeof(long) == 8) ? 64 : 32, + redisGitSHA1(), + strtol(redisGitDirty(),NULL,10) > 0, + (int)getpid()); + + if (argc == 1) { serverLog(LL_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/%s.conf", argv[0], server.sentinel_mode ? "sentinel" : "redis"); + } else { + serverLog(LL_WARNING, "Configuration loaded"); } server.supervised = redisIsSupervised(server.supervised_mode); @@ -3716,7 +3735,7 @@ int main(int argc, char **argv) { if (!server.sentinel_mode) { /* Things not needed when running in Sentinel mode. */ - serverLog(LL_WARNING,"Server started, Redis version " REDIS_VERSION); + serverLog(LL_WARNING,"Server initialized"); #ifdef __linux__ linuxMemoryWarnings(); #endif @@ -3731,7 +3750,7 @@ int main(int argc, char **argv) { } } if (server.ipfd_count > 0) - serverLog(LL_NOTICE,"The server is now ready to accept connections on port %d", server.port); + serverLog(LL_NOTICE,"Ready to accept connections"); if (server.sofd > 0) serverLog(LL_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket); } else { diff --git a/src/server.h b/src/server.h index 7ff151de8..140897c18 100644 --- a/src/server.h +++ b/src/server.h @@ -151,6 +151,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0 +#define CONFIG_DEFAULT_ALWAYS_SHOW_LOGO 0 #define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */ #define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */ @@ -863,6 +864,7 @@ struct redisServer { char runid[CONFIG_RUN_ID_SIZE+1]; /* ID always different at every exec. */ int sentinel_mode; /* True if this instance is a Sentinel. */ size_t initial_memory_usage; /* Bytes used after initialization. */ + int always_show_logo; /* Show logo even for non-stdout logging. */ /* Modules */ dict *moduleapi; /* Exported APIs dictionary for modules. */ list *loadmodule_queue; /* List of modules to load at startup. */ From d2d95729fe7635da25244d395000b8bfc8bd10c4 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Dec 2016 16:45:18 +0100 Subject: [PATCH 0236/1722] Remove first version of ASCII wave, later discarded. --- src/server.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/server.c b/src/server.c index 4390360c4..f6868832d 100644 --- a/src/server.c +++ b/src/server.c @@ -3706,9 +3706,7 @@ int main(int argc, char **argv) { sdsfree(options); } -// serverLog(LL_WARNING, "_.~\"(_.~\"(_.~\"(_.~\"(_.~\"( Redis is starting..."); serverLog(LL_WARNING, "oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo"); - serverLog(LL_WARNING, "Redis version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started", REDIS_VERSION, From 1ffd2f315b147a68652b0c4933721a772532a32e Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Dec 2016 16:49:58 +0100 Subject: [PATCH 0237/1722] Fix test "server is up" detection after logging changes. --- tests/assets/default.conf | 1 + tests/support/server.tcl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/assets/default.conf b/tests/assets/default.conf index 81f8470bc..d7b8a75c6 100644 --- a/tests/assets/default.conf +++ b/tests/assets/default.conf @@ -1,5 +1,6 @@ # Redis configuration for testing. +always-show-logo yes notify-keyspace-events KEA daemonize no pidfile /var/run/redis.pid diff --git a/tests/support/server.tcl b/tests/support/server.tcl index 19d6c5152..c36b30775 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -278,7 +278,7 @@ proc start_server {options {code undefined}} { while 1 { # check that the server actually started and is ready for connections - if {[exec grep "ready to accept" | wc -l < $stdout] > 0} { + if {[exec grep -i "Ready to accept" | wc -l < $stdout] > 0} { break } after 10 From 5aa18b3547dc3bcd6f308c4f086406f120fefff6 Mon Sep 17 00:00:00 2001 From: Justin Carvalho Date: Mon, 19 Dec 2016 17:37:41 -0500 Subject: [PATCH 0238/1722] Fix missing brackets around encoding variable in ZIP_DECODE_LENGTH macro --- src/ziplist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ziplist.c b/src/ziplist.c index 684f8ccf8..a0939f640 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -249,7 +249,7 @@ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, un } else if ((encoding) == ZIP_STR_14B) { \ (lensize) = 2; \ (len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1]; \ - } else if (encoding == ZIP_STR_32B) { \ + } else if ((encoding) == ZIP_STR_32B) { \ (lensize) = 5; \ (len) = ((ptr)[1] << 24) | \ ((ptr)[2] << 16) | \ From ebab46b1fb8789916ee32b39e46d5bd619a76e12 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Dec 2016 10:12:38 +0100 Subject: [PATCH 0239/1722] Geo: fix edge case return values for uniformity. There were two cases outlined in issue #3512 and PR #3551 where the Geo API returned unexpected results: empty strings where NULL replies were expected, or a single null reply where an array was expected. This violates the Redis principle that Redis replies for existing keys or elements should be indistinguishable. This is technically an API breakage so will be merged only into 4.0 and specified in the changelog in the list of breaking compatibilities, even if it is not very likely that actual code will be affected, hopefully, since with the past behavior basically there was to acconut for *both* the possibilities, and the new behavior is always one of the two, but in a consistent way. --- src/geo.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/geo.c b/src/geo.c index 46022bdde..e1d026f1e 100644 --- a/src/geo.c +++ b/src/geo.c @@ -161,7 +161,7 @@ double extractDistanceOrReply(client *c, robj **argv, addReplyError(c,"radius cannot be negative"); return -1; } - + double to_meters = extractUnitOrReply(c,argv[1]); if (to_meters < 0) { return -1; @@ -738,16 +738,15 @@ void geoposCommand(client *c) { int j; /* Look up the requested zset */ - robj *zobj = NULL; - if ((zobj = lookupKeyReadOrReply(c, c->argv[1], shared.emptymultibulk)) - == NULL || checkType(c, zobj, OBJ_ZSET)) return; + robj *zobj = lookupKeyRead(c->db, c->argv[1]); + if (zobj && checkType(c, zobj, OBJ_ZSET)) return; /* Report elements one after the other, using a null bulk reply for * missing elements. */ addReplyMultiBulkLen(c,c->argc-2); for (j = 2; j < c->argc; j++) { double score; - if (zsetScore(zobj, c->argv[j]->ptr, &score) == C_ERR) { + if (!zobj || zsetScore(zobj, c->argv[j]->ptr, &score) == C_ERR) { addReply(c,shared.nullmultibulk); } else { /* Decode... */ @@ -782,7 +781,7 @@ void geodistCommand(client *c) { /* Look up the requested zset */ robj *zobj = NULL; - if ((zobj = lookupKeyReadOrReply(c, c->argv[1], shared.emptybulk)) + if ((zobj = lookupKeyReadOrReply(c, c->argv[1], shared.nullbulk)) == NULL || checkType(c, zobj, OBJ_ZSET)) return; /* Get the scores. We need both otherwise NULL is returned. */ From db1e1e63ce54b2775ac5cfeb7d8b4bce18e6093d Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Dec 2016 10:19:06 +0100 Subject: [PATCH 0240/1722] Geo: fix GEOHASH return value for consistency. The same thing observed in #3551 by gnethercutt also fixed for GEOHASH as the original PR did. --- src/geo.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/geo.c b/src/geo.c index e1d026f1e..8423931af 100644 --- a/src/geo.c +++ b/src/geo.c @@ -684,16 +684,15 @@ void geohashCommand(client *c) { int j; /* Look up the requested zset */ - robj *zobj = NULL; - if ((zobj = lookupKeyReadOrReply(c, c->argv[1], shared.emptymultibulk)) - == NULL || checkType(c, zobj, OBJ_ZSET)) return; + robj *zobj = lookupKeyRead(c->db, c->argv[1]); + if (zobj && checkType(c, zobj, OBJ_ZSET)) return; /* Geohash elements one after the other, using a null bulk reply for * missing elements. */ addReplyMultiBulkLen(c,c->argc-2); for (j = 2; j < c->argc; j++) { double score; - if (zsetScore(zobj, c->argv[j]->ptr, &score) == C_ERR) { + if (!zobj || zsetScore(zobj, c->argv[j]->ptr, &score) == C_ERR) { addReply(c,shared.nullbulk); } else { /* The internal format we use for geocoding is a bit different From 29fbdde45fbb181078e948fd42f063132ef425f8 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Dec 2016 12:22:11 +0100 Subject: [PATCH 0241/1722] Geo: fuzzy test inconsistency report fixed to show all points. We need to report all the points added into the set, not just the ones matching the Tcl distance algo. --- tests/unit/geo.tcl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl index 44a0ae1bc..fdbfbf139 100644 --- a/tests/unit/geo.tcl +++ b/tests/unit/geo.tcl @@ -254,10 +254,11 @@ start_server {tags {"geo"}} { for {set j 0} {$j < 20000} {incr j} { geo_random_point lon lat lappend argv $lon $lat "place:$j" - if {[geo_distance $lon $lat $search_lon $search_lat] < $radius_m} { + set distance [geo_distance $lon $lat $search_lon $search_lat] + if {$distance < $radius_m} { lappend tcl_result "place:$j" - lappend debuginfo "place:$j $lon $lat [expr {[geo_distance $lon $lat $search_lon $search_lat]/1000}] km" } + lappend debuginfo "place:$j $lon $lat [expr {$distance/1000}] km" } r geoadd mypoints {*}$argv set res [lsort [r georadius mypoints $search_lon $search_lat $radius_km km]] From 8155b2fca445bae92ce4908a095c15adde6c3514 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Dec 2016 12:11:56 +0100 Subject: [PATCH 0242/1722] deps/hiredis updated to latest version. Close #3687. --- deps/hiredis/.gitignore | 1 + deps/hiredis/.travis.yml | 35 +- deps/hiredis/CHANGELOG.md | 125 +++++ deps/hiredis/Makefile | 107 +++- deps/hiredis/README.md | 203 ++++--- deps/hiredis/adapters/glib.h | 153 ++++++ deps/hiredis/adapters/ivykis.h | 81 +++ deps/hiredis/adapters/libevent.h | 24 +- deps/hiredis/adapters/libuv.h | 7 +- deps/hiredis/adapters/macosx.h | 114 ++++ deps/hiredis/adapters/qt.h | 135 +++++ deps/hiredis/appveyor.yml | 36 ++ deps/hiredis/async.c | 51 +- deps/hiredis/async.h | 3 + deps/hiredis/dict.c | 4 +- deps/hiredis/examples/example-glib.c | 73 +++ deps/hiredis/examples/example-ivykis.c | 58 ++ deps/hiredis/examples/example-macosx.c | 66 +++ deps/hiredis/examples/example-qt.cpp | 46 ++ deps/hiredis/examples/example-qt.h | 32 ++ deps/hiredis/examples/example.c | 2 +- deps/hiredis/fmacros.h | 15 +- deps/hiredis/hiredis.c | 719 +++++++------------------ deps/hiredis/hiredis.h | 153 +++--- deps/hiredis/net.c | 132 ++++- deps/hiredis/net.h | 8 +- deps/hiredis/read.c | 525 ++++++++++++++++++ deps/hiredis/read.h | 111 ++++ deps/hiredis/sds.c | 6 +- deps/hiredis/sds.h | 2 +- deps/hiredis/sdsalloc.h | 8 +- deps/hiredis/test.c | 120 ++++- deps/hiredis/win32.h | 42 ++ deps/hiredis/zmalloc.h | 13 - 34 files changed, 2393 insertions(+), 817 deletions(-) create mode 100644 deps/hiredis/adapters/glib.h create mode 100644 deps/hiredis/adapters/ivykis.h create mode 100644 deps/hiredis/adapters/macosx.h create mode 100644 deps/hiredis/adapters/qt.h create mode 100644 deps/hiredis/appveyor.yml create mode 100644 deps/hiredis/examples/example-glib.c create mode 100644 deps/hiredis/examples/example-ivykis.c create mode 100644 deps/hiredis/examples/example-macosx.c create mode 100644 deps/hiredis/examples/example-qt.cpp create mode 100644 deps/hiredis/examples/example-qt.h create mode 100644 deps/hiredis/read.c create mode 100644 deps/hiredis/read.h create mode 100644 deps/hiredis/win32.h delete mode 100644 deps/hiredis/zmalloc.h diff --git a/deps/hiredis/.gitignore b/deps/hiredis/.gitignore index 0c166a02e..c44b5c537 100644 --- a/deps/hiredis/.gitignore +++ b/deps/hiredis/.gitignore @@ -4,3 +4,4 @@ /*.so /*.dylib /*.a +/*.pc diff --git a/deps/hiredis/.travis.yml b/deps/hiredis/.travis.yml index 030427ff4..ad08076d8 100644 --- a/deps/hiredis/.travis.yml +++ b/deps/hiredis/.travis.yml @@ -1,6 +1,39 @@ language: c +sudo: false compiler: - gcc - clang -script: make && make check +os: + - linux + - osx + +before_script: + - if [ "$TRAVIS_OS_NAME" == "osx" ] ; then brew update; brew install redis; fi + +addons: + apt: + packages: + - libc6-dbg + - libc6-dev + - libc6:i386 + - libc6-dev-i386 + - libc6-dbg:i386 + - gcc-multilib + - valgrind + +env: + - CFLAGS="-Werror" + - PRE="valgrind --track-origins=yes --leak-check=full" + - TARGET="32bit" TARGET_VARS="32bit-vars" CFLAGS="-Werror" + - TARGET="32bit" TARGET_VARS="32bit-vars" PRE="valgrind --track-origins=yes --leak-check=full" + +matrix: + exclude: + - os: osx + env: PRE="valgrind --track-origins=yes --leak-check=full" + + - os: osx + env: TARGET="32bit" TARGET_VARS="32bit-vars" PRE="valgrind --track-origins=yes --leak-check=full" + +script: make $TARGET CFLAGS="$CFLAGS" && make check PRE="$PRE" && make $TARGET_VARS hiredis-example diff --git a/deps/hiredis/CHANGELOG.md b/deps/hiredis/CHANGELOG.md index 268b15cd5..f92bcb3c9 100644 --- a/deps/hiredis/CHANGELOG.md +++ b/deps/hiredis/CHANGELOG.md @@ -1,3 +1,128 @@ +### 1.0.0 (unreleased) + +**Fixes**: + +* Catch a buffer overflow when formatting the error message +* Import latest upstream sds. This breaks applications that are linked against the old hiredis v0.13 +* Fix warnings, when compiled with -Wshadow +* Make hiredis compile in Cygwin on Windows, now CI-tested + +**BREAKING CHANGES**: + +* Change `redisReply.len` to `size_t`, as it denotes the the size of a string + +User code should compare this to `size_t` values as well. +If it was used to compare to other values, casting might be necessary or can be removed, if casting was applied before. + +* Remove backwards compatibility macro's + +This removes the following old function aliases, use the new name now: + +| Old | New | +| --------------------------- | ---------------------- | +| redisReplyReaderCreate | redisReaderCreate | +| redisReplyReaderCreate | redisReaderCreate | +| redisReplyReaderFree | redisReaderFree | +| redisReplyReaderFeed | redisReaderFeed | +| redisReplyReaderGetReply | redisReaderGetReply | +| redisReplyReaderSetPrivdata | redisReaderSetPrivdata | +| redisReplyReaderGetObject | redisReaderGetObject | +| redisReplyReaderGetError | redisReaderGetError | + +* The `DEBUG` variable in the Makefile was renamed to `DEBUG_FLAGS` + +Previously it broke some builds for people that had `DEBUG` set to some arbitrary value, +due to debugging other software. +By renaming we avoid unintentional name clashes. + +Simply rename `DEBUG` to `DEBUG_FLAGS` in your environment to make it working again. + +### 0.13.3 (2015-09-16) + +* Revert "Clear `REDIS_CONNECTED` flag when connection is closed". +* Make tests pass on FreeBSD (Thanks, Giacomo Olgeni) + + +If the `REDIS_CONNECTED` flag is cleared, +the async onDisconnect callback function will never be called. +This causes problems as the disconnect is never reported back to the user. + +### 0.13.2 (2015-08-25) + +* Prevent crash on pending replies in async code (Thanks, @switch-st) +* Clear `REDIS_CONNECTED` flag when connection is closed (Thanks, Jerry Jacobs) +* Add MacOS X addapter (Thanks, @dizzus) +* Add Qt adapter (Thanks, Pietro Cerutti) +* Add Ivykis adapter (Thanks, Gergely Nagy) + +All adapters are provided as is and are only tested where possible. + +### 0.13.1 (2015-05-03) + +This is a bug fix release. +The new `reconnect` method introduced new struct members, which clashed with pre-defined names in pre-C99 code. +Another commit forced C99 compilation just to make it work, but of course this is not desirable for outside projects. +Other non-C99 code can now use hiredis as usual again. +Sorry for the inconvenience. + +* Fix memory leak in async reply handling (Salvatore Sanfilippo) +* Rename struct member to avoid name clash with pre-c99 code (Alex Balashov, ncopa) + +### 0.13.0 (2015-04-16) + +This release adds a minimal Windows compatibility layer. +The parser, standalone since v0.12.0, can now be compiled on Windows +(and thus used in other client libraries as well) + +* Windows compatibility layer for parser code (tzickel) +* Properly escape data printed to PKGCONF file (Dan Skorupski) +* Fix tests when assert() undefined (Keith Bennett, Matt Stancliff) +* Implement a reconnect method for the client context, this changes the structure of `redisContext` (Aaron Bedra) + +### 0.12.1 (2015-01-26) + +* Fix `make install`: DESTDIR support, install all required files, install PKGCONF in proper location +* Fix `make test` as 32 bit build on 64 bit platform + +### 0.12.0 (2015-01-22) + +* Add optional KeepAlive support + +* Try again on EINTR errors + +* Add libuv adapter + +* Add IPv6 support + +* Remove possiblity of multiple close on same fd + +* Add ability to bind source address on connect + +* Add redisConnectFd() and redisFreeKeepFd() + +* Fix getaddrinfo() memory leak + +* Free string if it is unused (fixes memory leak) + +* Improve redisAppendCommandArgv performance 2.5x + +* Add support for SO_REUSEADDR + +* Fix redisvFormatCommand format parsing + +* Add GLib 2.0 adapter + +* Refactor reading code into read.c + +* Fix errno error buffers to not clobber errors + +* Generate pkgconf during build + +* Silence _BSD_SOURCE warnings + +* Improve digit counting for multibulk creation + + ### 0.11.0 * Increase the maximum multi-bulk reply depth to 7. diff --git a/deps/hiredis/Makefile b/deps/hiredis/Makefile index ddcc4e4f6..9a4de8360 100644 --- a/deps/hiredis/Makefile +++ b/deps/hiredis/Makefile @@ -3,13 +3,25 @@ # Copyright (C) 2010-2011 Pieter Noordhuis # This file is released under the BSD license, see the COPYING file -OBJ=net.o hiredis.o sds.o async.o -EXAMPLES=hiredis-example hiredis-example-libevent hiredis-example-libev +OBJ=net.o hiredis.o sds.o async.o read.o +EXAMPLES=hiredis-example hiredis-example-libevent hiredis-example-libev hiredis-example-glib TESTS=hiredis-test LIBNAME=libhiredis +PKGCONFNAME=hiredis.pc -HIREDIS_MAJOR=0 -HIREDIS_MINOR=11 +HIREDIS_MAJOR=$(shell grep HIREDIS_MAJOR hiredis.h | awk '{print $$3}') +HIREDIS_MINOR=$(shell grep HIREDIS_MINOR hiredis.h | awk '{print $$3}') +HIREDIS_PATCH=$(shell grep HIREDIS_PATCH hiredis.h | awk '{print $$3}') +HIREDIS_SONAME=$(shell grep HIREDIS_SONAME hiredis.h | awk '{print $$3}') + +# Installation related variables and target +PREFIX?=/usr/local +INCLUDE_PATH?=include/hiredis +LIBRARY_PATH?=lib +PKGCONF_PATH?=pkgconfig +INSTALL_INCLUDE_PATH= $(DESTDIR)$(PREFIX)/$(INCLUDE_PATH) +INSTALL_LIBRARY_PATH= $(DESTDIR)$(PREFIX)/$(LIBRARY_PATH) +INSTALL_PKGCONF_PATH= $(INSTALL_LIBRARY_PATH)/$(PKGCONF_PATH) # redis-server configuration used for testing REDIS_PORT=56379 @@ -25,15 +37,16 @@ export REDIS_TEST_CONFIG # Fallback to gcc when $CC is not in $PATH. CC:=$(shell sh -c 'type $(CC) >/dev/null 2>/dev/null && echo $(CC) || echo gcc') +CXX:=$(shell sh -c 'type $(CXX) >/dev/null 2>/dev/null && echo $(CXX) || echo g++') OPTIMIZATION?=-O3 WARNINGS=-Wall -W -Wstrict-prototypes -Wwrite-strings -DEBUG?= -g -ggdb -REAL_CFLAGS=$(OPTIMIZATION) -fPIC $(CFLAGS) $(WARNINGS) $(DEBUG) $(ARCH) +DEBUG_FLAGS?= -g -ggdb +REAL_CFLAGS=$(OPTIMIZATION) -fPIC $(CFLAGS) $(WARNINGS) $(DEBUG_FLAGS) $(ARCH) REAL_LDFLAGS=$(LDFLAGS) $(ARCH) DYLIBSUFFIX=so STLIBSUFFIX=a -DYLIB_MINOR_NAME=$(LIBNAME).$(DYLIBSUFFIX).$(HIREDIS_MAJOR).$(HIREDIS_MINOR) +DYLIB_MINOR_NAME=$(LIBNAME).$(DYLIBSUFFIX).$(HIREDIS_SONAME) DYLIB_MAJOR_NAME=$(LIBNAME).$(DYLIBSUFFIX).$(HIREDIS_MAJOR) DYLIBNAME=$(LIBNAME).$(DYLIBSUFFIX) DYLIB_MAKE_CMD=$(CC) -shared -Wl,-soname,$(DYLIB_MINOR_NAME) -o $(DYLIBNAME) $(LDFLAGS) @@ -49,19 +62,20 @@ ifeq ($(uname_S),SunOS) endif ifeq ($(uname_S),Darwin) DYLIBSUFFIX=dylib - DYLIB_MINOR_NAME=$(LIBNAME).$(HIREDIS_MAJOR).$(HIREDIS_MINOR).$(DYLIBSUFFIX) - DYLIB_MAJOR_NAME=$(LIBNAME).$(HIREDIS_MAJOR).$(DYLIBSUFFIX) + DYLIB_MINOR_NAME=$(LIBNAME).$(HIREDIS_SONAME).$(DYLIBSUFFIX) DYLIB_MAKE_CMD=$(CC) -shared -Wl,-install_name,$(DYLIB_MINOR_NAME) -o $(DYLIBNAME) $(LDFLAGS) endif -all: $(DYLIBNAME) +all: $(DYLIBNAME) $(STLIBNAME) hiredis-test $(PKGCONFNAME) # Deps (use make dep to generate this) -net.o: net.c fmacros.h net.h hiredis.h -async.o: async.c async.h hiredis.h sds.h dict.c dict.h -hiredis.o: hiredis.c fmacros.h hiredis.h net.h sds.h +async.o: async.c fmacros.h async.h hiredis.h read.h sds.h net.h dict.c dict.h +dict.o: dict.c fmacros.h dict.h +hiredis.o: hiredis.c fmacros.h hiredis.h read.h sds.h net.h +net.o: net.c fmacros.h net.h hiredis.h read.h sds.h +read.o: read.c fmacros.h read.h sds.h sds.o: sds.c sds.h -test.o: test.c hiredis.h +test.o: test.c fmacros.h hiredis.h read.h sds.h $(DYLIBNAME): $(OBJ) $(DYLIB_MAKE_CMD) $(OBJ) @@ -79,6 +93,15 @@ hiredis-example-libevent: examples/example-libevent.c adapters/libevent.h $(STLI hiredis-example-libev: examples/example-libev.c adapters/libev.h $(STLIBNAME) $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. $< -lev $(STLIBNAME) +hiredis-example-glib: examples/example-glib.c adapters/glib.h $(STLIBNAME) + $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) $(shell pkg-config --cflags --libs glib-2.0) -I. $< $(STLIBNAME) + +hiredis-example-ivykis: examples/example-ivykis.c adapters/ivykis.h $(STLIBNAME) + $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. $< -livykis $(STLIBNAME) + +hiredis-example-macosx: examples/example-macosx.c adapters/macosx.h $(STLIBNAME) + $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. $< -framework CoreFoundation $(STLIBNAME) + ifndef AE_DIR hiredis-example-ae: @echo "Please specify AE_DIR (e.g. /src)" @@ -94,7 +117,20 @@ hiredis-example-libuv: @false else hiredis-example-libuv: examples/example-libuv.c adapters/libuv.h $(STLIBNAME) - $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. -I$(LIBUV_DIR)/include $< $(LIBUV_DIR)/.libs/libuv.a -lpthread $(STLIBNAME) + $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. -I$(LIBUV_DIR)/include $< $(LIBUV_DIR)/.libs/libuv.a -lpthread -lrt $(STLIBNAME) +endif + +ifeq ($(and $(QT_MOC),$(QT_INCLUDE_DIR),$(QT_LIBRARY_DIR)),) +hiredis-example-qt: + @echo "Please specify QT_MOC, QT_INCLUDE_DIR AND QT_LIBRARY_DIR" + @false +else +hiredis-example-qt: examples/example-qt.cpp adapters/qt.h $(STLIBNAME) + $(QT_MOC) adapters/qt.h -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore | \ + $(CXX) -x c++ -o qt-adapter-moc.o -c - $(REAL_CFLAGS) -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore + $(QT_MOC) examples/example-qt.h -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore | \ + $(CXX) -x c++ -o qt-example-moc.o -c - $(REAL_CFLAGS) -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore + $(CXX) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore -L$(QT_LIBRARY_DIR) qt-adapter-moc.o qt-example-moc.o $< -pthread $(STLIBNAME) -lQtCore endif hiredis-example: examples/example.c $(STLIBNAME) @@ -103,14 +139,16 @@ hiredis-example: examples/example.c $(STLIBNAME) examples: $(EXAMPLES) hiredis-test: test.o $(STLIBNAME) - $(CC) -o $@ $(REAL_LDFLAGS) $< $(STLIBNAME) + +hiredis-%: %.o $(STLIBNAME) + $(CC) $(REAL_CFLAGS) -o $@ $(REAL_LDFLAGS) $< $(STLIBNAME) test: hiredis-test ./hiredis-test check: hiredis-test @echo "$$REDIS_TEST_CONFIG" | $(REDIS_SERVER) - - ./hiredis-test -h 127.0.0.1 -p $(REDIS_PORT) -s /tmp/hiredis-test-redis.sock || \ + $(PRE) ./hiredis-test -h 127.0.0.1 -p $(REDIS_PORT) -s /tmp/hiredis-test-redis.sock || \ ( kill `cat /tmp/hiredis-test-redis.pid` && false ) kill `cat /tmp/hiredis-test-redis.pid` @@ -118,29 +156,38 @@ check: hiredis-test $(CC) -std=c99 -pedantic -c $(REAL_CFLAGS) $< clean: - rm -rf $(DYLIBNAME) $(STLIBNAME) $(TESTS) examples/hiredis-example* *.o *.gcda *.gcno *.gcov + rm -rf $(DYLIBNAME) $(STLIBNAME) $(TESTS) $(PKGCONFNAME) examples/hiredis-example* *.o *.gcda *.gcno *.gcov dep: $(CC) -MM *.c -# Installation related variables and target -PREFIX?=/usr/local -INSTALL_INCLUDE_PATH= $(PREFIX)/include/hiredis -INSTALL_LIBRARY_PATH= $(PREFIX)/lib - ifeq ($(uname_S),SunOS) INSTALL?= cp -r endif INSTALL?= cp -a -install: $(DYLIBNAME) $(STLIBNAME) +$(PKGCONFNAME): hiredis.h + @echo "Generating $@ for pkgconfig..." + @echo prefix=$(PREFIX) > $@ + @echo exec_prefix=\$${prefix} >> $@ + @echo libdir=$(PREFIX)/$(LIBRARY_PATH) >> $@ + @echo includedir=$(PREFIX)/$(INCLUDE_PATH) >> $@ + @echo >> $@ + @echo Name: hiredis >> $@ + @echo Description: Minimalistic C client library for Redis. >> $@ + @echo Version: $(HIREDIS_MAJOR).$(HIREDIS_MINOR).$(HIREDIS_PATCH) >> $@ + @echo Libs: -L\$${libdir} -lhiredis >> $@ + @echo Cflags: -I\$${includedir} -D_FILE_OFFSET_BITS=64 >> $@ + +install: $(DYLIBNAME) $(STLIBNAME) $(PKGCONFNAME) mkdir -p $(INSTALL_INCLUDE_PATH) $(INSTALL_LIBRARY_PATH) - $(INSTALL) hiredis.h async.h adapters $(INSTALL_INCLUDE_PATH) + $(INSTALL) hiredis.h async.h read.h sds.h adapters $(INSTALL_INCLUDE_PATH) $(INSTALL) $(DYLIBNAME) $(INSTALL_LIBRARY_PATH)/$(DYLIB_MINOR_NAME) - cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MINOR_NAME) $(DYLIB_MAJOR_NAME) - cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MAJOR_NAME) $(DYLIBNAME) + cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MINOR_NAME) $(DYLIBNAME) $(INSTALL) $(STLIBNAME) $(INSTALL_LIBRARY_PATH) + mkdir -p $(INSTALL_PKGCONF_PATH) + $(INSTALL) $(PKGCONFNAME) $(INSTALL_PKGCONF_PATH) 32bit: @echo "" @@ -148,6 +195,10 @@ install: $(DYLIBNAME) $(STLIBNAME) @echo "" $(MAKE) CFLAGS="-m32" LDFLAGS="-m32" +32bit-vars: + $(eval CFLAGS=-m32) + $(eval LDFLAGS=-m32) + gprof: $(MAKE) CFLAGS="-pg" LDFLAGS="-pg" @@ -163,4 +214,4 @@ coverage: gcov noopt: $(MAKE) OPTIMIZATION="" -.PHONY: all test check clean dep install 32bit gprof gcov noopt +.PHONY: all test check clean dep install 32bit 32bit-vars gprof gcov noopt diff --git a/deps/hiredis/README.md b/deps/hiredis/README.md index dba4a8c8e..01223ea59 100644 --- a/deps/hiredis/README.md +++ b/deps/hiredis/README.md @@ -1,11 +1,13 @@ [![Build Status](https://travis-ci.org/redis/hiredis.png)](https://travis-ci.org/redis/hiredis) +**This Readme reflects the latest changed in the master branch. See [v0.13.3](https://github.com/redis/hiredis/tree/v0.13.3) for the Readme and documentation for the latest release.** + # HIREDIS Hiredis is a minimalistic C client library for the [Redis](http://redis.io/) database. It is minimalistic because it just adds minimal support for the protocol, but -at the same time it uses an high level printf-alike API in order to make it +at the same time it uses a high level printf-alike API in order to make it much higher level than otherwise suggested by its minimal code base and the lack of explicit bindings for every Redis command. @@ -20,7 +22,15 @@ Redis version >= 1.2.0. The library comes with multiple APIs. There is the *synchronous API*, the *asynchronous API* and the *reply parsing API*. -## UPGRADING +## Upgrading to `1.0.0` + +Version 1.0.0 marks a stable release of hiredis. +It includes some minor breaking changes, mostly to make the exposed API more uniform and self-explanatory. +It also bundles the updated `sds` library, to sync up with upstream and Redis. +For most applications a recompile against the new hiredis should be enough. +For code changes see the [Changelog](CHANGELOG.md). + +## Upgrading from `<0.9.0` Version 0.9.0 is a major overhaul of hiredis in every aspect. However, upgrading existing code using hiredis should not be a big pain. The key thing to keep in mind when @@ -31,51 +41,62 @@ the stateless 0.0.1 that only has a file descriptor to work with. To consume the synchronous API, there are only a few function calls that need to be introduced: - redisContext *redisConnect(const char *ip, int port); - void *redisCommand(redisContext *c, const char *format, ...); - void freeReplyObject(void *reply); +```c +redisContext *redisConnect(const char *ip, int port); +void *redisCommand(redisContext *c, const char *format, ...); +void freeReplyObject(void *reply); +``` ### Connecting The function `redisConnect` is used to create a so-called `redisContext`. The context is where Hiredis holds state for a connection. The `redisContext` -struct has an integer `err` field that is non-zero when an the connection is in +struct has an integer `err` field that is non-zero when the connection is in an error state. The field `errstr` will contain a string with a description of the error. More information on errors can be found in the **Errors** section. After trying to connect to Redis using `redisConnect` you should check the `err` field to see if establishing the connection was successful: - - redisContext *c = redisConnect("127.0.0.1", 6379); - if (c != NULL && c->err) { +```c +redisContext *c = redisConnect("127.0.0.1", 6379); +if (c == NULL || c->err) { + if (c) { printf("Error: %s\n", c->errstr); // handle error + } else { + printf("Can't allocate redis context\n"); } +} +``` + +*Note: A `redisContext` is not thread-safe.* ### Sending commands There are several ways to issue commands to Redis. The first that will be introduced is `redisCommand`. This function takes a format similar to printf. In the simplest form, it is used like this: - - reply = redisCommand(context, "SET foo bar"); +```c +reply = redisCommand(context, "SET foo bar"); +``` The specifier `%s` interpolates a string in the command, and uses `strlen` to determine the length of the string: - - reply = redisCommand(context, "SET foo %s", value); - +```c +reply = redisCommand(context, "SET foo %s", value); +``` When you need to pass binary safe strings in a command, the `%b` specifier can be used. Together with a pointer to the string, it requires a `size_t` length argument of the string: - - reply = redisCommand(context, "SET foo %b", value, (size_t) valuelen); - +```c +reply = redisCommand(context, "SET foo %b", value, (size_t) valuelen); +``` Internally, Hiredis splits the command in different arguments and will convert it to the protocol used to communicate with Redis. One or more spaces separates arguments, so you can use the specifiers anywhere in an argument: - - reply = redisCommand(context, "SET key:%s %s", myid, value); +```c +reply = redisCommand(context, "SET key:%s %s", myid, value); +``` ### Using replies @@ -114,11 +135,11 @@ was received: Redis may reply with nested arrays but this is fully supported. Replies should be freed using the `freeReplyObject()` function. -Note that this function will take care of freeing sub-replies objects +Note that this function will take care of freeing sub-reply objects contained in arrays and nested arrays, so there is no need for the user to free the sub replies (it is actually harmful and will corrupt the memory). -**Important:** the current version of hiredis (0.10.0) free's replies when the +**Important:** the current version of hiredis (0.10.0) frees replies when the asynchronous API is used. This means you should not call `freeReplyObject` when you use this API. The reply is cleaned up by hiredis _after_ the callback returns. This behavior will probably change in future releases, so make sure to @@ -127,19 +148,19 @@ keep an eye on the changelog when upgrading (see issue #39). ### Cleaning up To disconnect and free the context the following function can be used: - - void redisFree(redisContext *c); - -This function immediately closes the socket and then free's the allocations done in +```c +void redisFree(redisContext *c); +``` +This function immediately closes the socket and then frees the allocations done in creating the context. ### Sending commands (cont'd) Together with `redisCommand`, the function `redisCommandArgv` can be used to issue commands. It has the following prototype: - - void *redisCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen); - +```c +void *redisCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen); +``` It takes the number of arguments `argc`, an array of strings `argv` and the lengths of the arguments `argvlen`. For convenience, `argvlen` may be set to `NULL` and the function will use `strlen(3)` on every argument to determine its length. Obviously, when any of the arguments @@ -169,10 +190,10 @@ The function `redisGetReply` is exported as part of the Hiredis API and can be u is expected on the socket. To pipeline commands, the only things that needs to be done is filling up the output buffer. For this cause, two commands can be used that are identical to the `redisCommand` family, apart from not returning a reply: - - void redisAppendCommand(redisContext *c, const char *format, ...); - void redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen); - +```c +void redisAppendCommand(redisContext *c, const char *format, ...); +void redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen); +``` After calling either function one or more times, `redisGetReply` can be used to receive the subsequent replies. The return value for this function is either `REDIS_OK` or `REDIS_ERR`, where the latter means an error occurred while reading a reply. Just as with the other commands, @@ -180,24 +201,24 @@ the `err` field in the context can be used to find out what the cause of this er The following examples shows a simple pipeline (resulting in only a single call to `write(2)` and a single call to `read(2)`): - - redisReply *reply; - redisAppendCommand(context,"SET foo bar"); - redisAppendCommand(context,"GET foo"); - redisGetReply(context,&reply); // reply for SET - freeReplyObject(reply); - redisGetReply(context,&reply); // reply for GET - freeReplyObject(reply); - +```c +redisReply *reply; +redisAppendCommand(context,"SET foo bar"); +redisAppendCommand(context,"GET foo"); +redisGetReply(context,&reply); // reply for SET +freeReplyObject(reply); +redisGetReply(context,&reply); // reply for GET +freeReplyObject(reply); +``` This API can also be used to implement a blocking subscriber: - - reply = redisCommand(context,"SUBSCRIBE foo"); +```c +reply = redisCommand(context,"SUBSCRIBE foo"); +freeReplyObject(reply); +while(redisGetReply(context,&reply) == REDIS_OK) { + // consume message freeReplyObject(reply); - while(redisGetReply(context,&reply) == REDIS_OK) { - // consume message - freeReplyObject(reply); - } - +} +``` ### Errors When a function call is not successful, depending on the function either `NULL` or `REDIS_ERR` is @@ -237,58 +258,62 @@ should be checked after creation to see if there were errors creating the connec Because the connection that will be created is non-blocking, the kernel is not able to instantly return if the specified host and port is able to accept a connection. - redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379); - if (c->err) { - printf("Error: %s\n", c->errstr); - // handle error - } +*Note: A `redisAsyncContext` is not thread-safe.* + +```c +redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379); +if (c->err) { + printf("Error: %s\n", c->errstr); + // handle error +} +``` The asynchronous context can hold a disconnect callback function that is called when the connection is disconnected (either because of an error or per user request). This function should have the following prototype: - - void(const redisAsyncContext *c, int status); - +```c +void(const redisAsyncContext *c, int status); +``` On a disconnect, the `status` argument is set to `REDIS_OK` when disconnection was initiated by the user, or `REDIS_ERR` when the disconnection was caused by an error. When it is `REDIS_ERR`, the `err` field in the context can be accessed to find out the cause of the error. -The context object is always free'd after the disconnect callback fired. When a reconnect is needed, +The context object is always freed after the disconnect callback fired. When a reconnect is needed, the disconnect callback is a good point to do so. Setting the disconnect callback can only be done once per context. For subsequent calls it will return `REDIS_ERR`. The function to set the disconnect callback has the following prototype: - - int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn); - +```c +int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn); +``` ### Sending commands and their callbacks In an asynchronous context, commands are automatically pipelined due to the nature of an event loop. Therefore, unlike the synchronous API, there is only a single way to send commands. Because commands are sent to Redis asynchronously, issuing a command requires a callback function that is called when the reply is received. Reply callbacks should have the following prototype: - - void(redisAsyncContext *c, void *reply, void *privdata); - +```c +void(redisAsyncContext *c, void *reply, void *privdata); +``` The `privdata` argument can be used to curry arbitrary data to the callback from the point where the command is initially queued for execution. The functions that can be used to issue commands in an asynchronous context are: - - int redisAsyncCommand( - redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, - const char *format, ...); - int redisAsyncCommandArgv( - redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, - int argc, const char **argv, const size_t *argvlen); - +```c +int redisAsyncCommand( + redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, + const char *format, ...); +int redisAsyncCommandArgv( + redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, + int argc, const char **argv, const size_t *argvlen); +``` Both functions work like their blocking counterparts. The return value is `REDIS_OK` when the command was successfully added to the output buffer and `REDIS_ERR` otherwise. Example: when the connection is being disconnected per user-request, no new commands may be added to the output buffer and `REDIS_ERR` is returned on calls to the `redisAsyncCommand` family. -If the reply for a command with a `NULL` callback is read, it is immediately free'd. When the callback -for a command is non-`NULL`, the memory is free'd immediately following the callback: the reply is only +If the reply for a command with a `NULL` callback is read, it is immediately freed. When the callback +for a command is non-`NULL`, the memory is freed immediately following the callback: the reply is only valid for the duration of the callback. All pending callbacks are called with a `NULL` reply when the context encountered an error. @@ -296,14 +321,14 @@ All pending callbacks are called with a `NULL` reply when the context encountere ### Disconnecting An asynchronous connection can be terminated using: - - void redisAsyncDisconnect(redisAsyncContext *ac); - +```c +void redisAsyncDisconnect(redisAsyncContext *ac); +``` When this function is called, the connection is **not** immediately terminated. Instead, new commands are no longer accepted and the connection is only terminated when all pending commands have been written to the socket, their respective replies have been read and their respective callbacks have been executed. After this, the disconnection callback is executed with the -`REDIS_OK` status and the context object is free'd. +`REDIS_OK` status and the context object is freed. ### Hooking it up to event library *X* @@ -316,12 +341,12 @@ Hiredis comes with a reply parsing API that makes it easy for writing higher level language bindings. The reply parsing API consists of the following functions: - - redisReader *redisReaderCreate(void); - void redisReaderFree(redisReader *reader); - int redisReaderFeed(redisReader *reader, const char *buf, size_t len); - int redisReaderGetReply(redisReader *reader, void **reply); - +```c +redisReader *redisReaderCreate(void); +void redisReaderFree(redisReader *reader); +int redisReaderFeed(redisReader *reader, const char *buf, size_t len); +int redisReaderGetReply(redisReader *reader, void **reply); +``` The same set of functions are used internally by hiredis when creating a normal Redis context, the above API just exposes it to the user for a direct usage. @@ -361,7 +386,7 @@ Both when using the Reader API directly or when using it indirectly via a normal Redis context, the redisReader structure uses a buffer in order to accumulate data from the server. Usually this buffer is destroyed when it is empty and is larger than 16 -kb in order to avoid wasting memory in unused buffers +KiB in order to avoid wasting memory in unused buffers However when working with very big payloads destroying the buffer may slow down performances considerably, so it is possible to modify the max size of @@ -371,9 +396,9 @@ value for an idle buffer, so the buffer will never get freed. For instance if you have a normal Redis context you can set the maximum idle buffer to zero (unlimited) just with: - - context->reader->maxbuf = 0; - +```c +context->reader->maxbuf = 0; +``` This should be done only in order to maximize performances when working with large payloads. The context should be set back to `REDIS_READER_MAX_BUF` again as soon as possible in order to prevent allocation of useless memory. @@ -381,4 +406,6 @@ as soon as possible in order to prevent allocation of useless memory. ## AUTHORS Hiredis was written by Salvatore Sanfilippo (antirez at gmail) and -Pieter Noordhuis (pcnoordhuis at gmail) and is released under the BSD license. +Pieter Noordhuis (pcnoordhuis at gmail) and is released under the BSD license. +Hiredis is currently maintained by Matt Stancliff (matt at genges dot com) and +Jan-Erik Rediger (janerik at fnordig dot com) diff --git a/deps/hiredis/adapters/glib.h b/deps/hiredis/adapters/glib.h new file mode 100644 index 000000000..e0a6411d3 --- /dev/null +++ b/deps/hiredis/adapters/glib.h @@ -0,0 +1,153 @@ +#ifndef __HIREDIS_GLIB_H__ +#define __HIREDIS_GLIB_H__ + +#include + +#include "../hiredis.h" +#include "../async.h" + +typedef struct +{ + GSource source; + redisAsyncContext *ac; + GPollFD poll_fd; +} RedisSource; + +static void +redis_source_add_read (gpointer data) +{ + RedisSource *source = (RedisSource *)data; + g_return_if_fail(source); + source->poll_fd.events |= G_IO_IN; + g_main_context_wakeup(g_source_get_context((GSource *)data)); +} + +static void +redis_source_del_read (gpointer data) +{ + RedisSource *source = (RedisSource *)data; + g_return_if_fail(source); + source->poll_fd.events &= ~G_IO_IN; + g_main_context_wakeup(g_source_get_context((GSource *)data)); +} + +static void +redis_source_add_write (gpointer data) +{ + RedisSource *source = (RedisSource *)data; + g_return_if_fail(source); + source->poll_fd.events |= G_IO_OUT; + g_main_context_wakeup(g_source_get_context((GSource *)data)); +} + +static void +redis_source_del_write (gpointer data) +{ + RedisSource *source = (RedisSource *)data; + g_return_if_fail(source); + source->poll_fd.events &= ~G_IO_OUT; + g_main_context_wakeup(g_source_get_context((GSource *)data)); +} + +static void +redis_source_cleanup (gpointer data) +{ + RedisSource *source = (RedisSource *)data; + + g_return_if_fail(source); + + redis_source_del_read(source); + redis_source_del_write(source); + /* + * It is not our responsibility to remove ourself from the + * current main loop. However, we will remove the GPollFD. + */ + if (source->poll_fd.fd >= 0) { + g_source_remove_poll((GSource *)data, &source->poll_fd); + source->poll_fd.fd = -1; + } +} + +static gboolean +redis_source_prepare (GSource *source, + gint *timeout_) +{ + RedisSource *redis = (RedisSource *)source; + *timeout_ = -1; + return !!(redis->poll_fd.events & redis->poll_fd.revents); +} + +static gboolean +redis_source_check (GSource *source) +{ + RedisSource *redis = (RedisSource *)source; + return !!(redis->poll_fd.events & redis->poll_fd.revents); +} + +static gboolean +redis_source_dispatch (GSource *source, + GSourceFunc callback, + gpointer user_data) +{ + RedisSource *redis = (RedisSource *)source; + + if ((redis->poll_fd.revents & G_IO_OUT)) { + redisAsyncHandleWrite(redis->ac); + redis->poll_fd.revents &= ~G_IO_OUT; + } + + if ((redis->poll_fd.revents & G_IO_IN)) { + redisAsyncHandleRead(redis->ac); + redis->poll_fd.revents &= ~G_IO_IN; + } + + if (callback) { + return callback(user_data); + } + + return TRUE; +} + +static void +redis_source_finalize (GSource *source) +{ + RedisSource *redis = (RedisSource *)source; + + if (redis->poll_fd.fd >= 0) { + g_source_remove_poll(source, &redis->poll_fd); + redis->poll_fd.fd = -1; + } +} + +static GSource * +redis_source_new (redisAsyncContext *ac) +{ + static GSourceFuncs source_funcs = { + .prepare = redis_source_prepare, + .check = redis_source_check, + .dispatch = redis_source_dispatch, + .finalize = redis_source_finalize, + }; + redisContext *c = &ac->c; + RedisSource *source; + + g_return_val_if_fail(ac != NULL, NULL); + + source = (RedisSource *)g_source_new(&source_funcs, sizeof *source); + source->ac = ac; + source->poll_fd.fd = c->fd; + source->poll_fd.events = 0; + source->poll_fd.revents = 0; + g_source_add_poll((GSource *)source, &source->poll_fd); + + ac->ev.addRead = redis_source_add_read; + ac->ev.delRead = redis_source_del_read; + ac->ev.addWrite = redis_source_add_write; + ac->ev.delWrite = redis_source_del_write; + ac->ev.cleanup = redis_source_cleanup; + ac->ev.data = source; + + return (GSource *)source; +} + +#endif /* __HIREDIS_GLIB_H__ */ diff --git a/deps/hiredis/adapters/ivykis.h b/deps/hiredis/adapters/ivykis.h new file mode 100644 index 000000000..6a12a868a --- /dev/null +++ b/deps/hiredis/adapters/ivykis.h @@ -0,0 +1,81 @@ +#ifndef __HIREDIS_IVYKIS_H__ +#define __HIREDIS_IVYKIS_H__ +#include +#include "../hiredis.h" +#include "../async.h" + +typedef struct redisIvykisEvents { + redisAsyncContext *context; + struct iv_fd fd; +} redisIvykisEvents; + +static void redisIvykisReadEvent(void *arg) { + redisAsyncContext *context = (redisAsyncContext *)arg; + redisAsyncHandleRead(context); +} + +static void redisIvykisWriteEvent(void *arg) { + redisAsyncContext *context = (redisAsyncContext *)arg; + redisAsyncHandleWrite(context); +} + +static void redisIvykisAddRead(void *privdata) { + redisIvykisEvents *e = (redisIvykisEvents*)privdata; + iv_fd_set_handler_in(&e->fd, redisIvykisReadEvent); +} + +static void redisIvykisDelRead(void *privdata) { + redisIvykisEvents *e = (redisIvykisEvents*)privdata; + iv_fd_set_handler_in(&e->fd, NULL); +} + +static void redisIvykisAddWrite(void *privdata) { + redisIvykisEvents *e = (redisIvykisEvents*)privdata; + iv_fd_set_handler_out(&e->fd, redisIvykisWriteEvent); +} + +static void redisIvykisDelWrite(void *privdata) { + redisIvykisEvents *e = (redisIvykisEvents*)privdata; + iv_fd_set_handler_out(&e->fd, NULL); +} + +static void redisIvykisCleanup(void *privdata) { + redisIvykisEvents *e = (redisIvykisEvents*)privdata; + + iv_fd_unregister(&e->fd); + free(e); +} + +static int redisIvykisAttach(redisAsyncContext *ac) { + redisContext *c = &(ac->c); + redisIvykisEvents *e; + + /* Nothing should be attached when something is already attached */ + if (ac->ev.data != NULL) + return REDIS_ERR; + + /* Create container for context and r/w events */ + e = (redisIvykisEvents*)malloc(sizeof(*e)); + e->context = ac; + + /* Register functions to start/stop listening for events */ + ac->ev.addRead = redisIvykisAddRead; + ac->ev.delRead = redisIvykisDelRead; + ac->ev.addWrite = redisIvykisAddWrite; + ac->ev.delWrite = redisIvykisDelWrite; + ac->ev.cleanup = redisIvykisCleanup; + ac->ev.data = e; + + /* Initialize and install read/write events */ + IV_FD_INIT(&e->fd); + e->fd.fd = c->fd; + e->fd.handler_in = redisIvykisReadEvent; + e->fd.handler_out = redisIvykisWriteEvent; + e->fd.handler_err = NULL; + e->fd.cookie = e->context; + + iv_fd_register(&e->fd); + + return REDIS_OK; +} +#endif diff --git a/deps/hiredis/adapters/libevent.h b/deps/hiredis/adapters/libevent.h index 1c2b271bb..273d8b2dd 100644 --- a/deps/hiredis/adapters/libevent.h +++ b/deps/hiredis/adapters/libevent.h @@ -30,13 +30,13 @@ #ifndef __HIREDIS_LIBEVENT_H__ #define __HIREDIS_LIBEVENT_H__ -#include +#include #include "../hiredis.h" #include "../async.h" typedef struct redisLibeventEvents { redisAsyncContext *context; - struct event rev, wev; + struct event *rev, *wev; } redisLibeventEvents; static void redisLibeventReadEvent(int fd, short event, void *arg) { @@ -53,28 +53,28 @@ static void redisLibeventWriteEvent(int fd, short event, void *arg) { static void redisLibeventAddRead(void *privdata) { redisLibeventEvents *e = (redisLibeventEvents*)privdata; - event_add(&e->rev,NULL); + event_add(e->rev,NULL); } static void redisLibeventDelRead(void *privdata) { redisLibeventEvents *e = (redisLibeventEvents*)privdata; - event_del(&e->rev); + event_del(e->rev); } static void redisLibeventAddWrite(void *privdata) { redisLibeventEvents *e = (redisLibeventEvents*)privdata; - event_add(&e->wev,NULL); + event_add(e->wev,NULL); } static void redisLibeventDelWrite(void *privdata) { redisLibeventEvents *e = (redisLibeventEvents*)privdata; - event_del(&e->wev); + event_del(e->wev); } static void redisLibeventCleanup(void *privdata) { redisLibeventEvents *e = (redisLibeventEvents*)privdata; - event_del(&e->rev); - event_del(&e->wev); + event_del(e->rev); + event_del(e->wev); free(e); } @@ -99,10 +99,10 @@ static int redisLibeventAttach(redisAsyncContext *ac, struct event_base *base) { ac->ev.data = e; /* Initialize and install read/write events */ - event_set(&e->rev,c->fd,EV_READ,redisLibeventReadEvent,e); - event_set(&e->wev,c->fd,EV_WRITE,redisLibeventWriteEvent,e); - event_base_set(base,&e->rev); - event_base_set(base,&e->wev); + e->rev = event_new(base, c->fd, EV_READ, redisLibeventReadEvent, e); + e->wev = event_new(base, c->fd, EV_WRITE, redisLibeventWriteEvent, e); + event_add(e->rev, NULL); + event_add(e->wev, NULL); return REDIS_OK; } #endif diff --git a/deps/hiredis/adapters/libuv.h b/deps/hiredis/adapters/libuv.h index a1967f4fd..ff08c25e1 100644 --- a/deps/hiredis/adapters/libuv.h +++ b/deps/hiredis/adapters/libuv.h @@ -1,5 +1,6 @@ #ifndef __HIREDIS_LIBUV_H__ #define __HIREDIS_LIBUV_H__ +#include #include #include "../hiredis.h" #include "../async.h" @@ -11,7 +12,6 @@ typedef struct redisLibuvEvents { int events; } redisLibuvEvents; -int redisLibuvAttach(redisAsyncContext*, uv_loop_t*); static void redisLibuvPoll(uv_poll_t* handle, int status, int events) { redisLibuvEvents* p = (redisLibuvEvents*)handle->data; @@ -20,10 +20,10 @@ static void redisLibuvPoll(uv_poll_t* handle, int status, int events) { return; } - if (events & UV_READABLE) { + if (p->context != NULL && (events & UV_READABLE)) { redisAsyncHandleRead(p->context); } - if (events & UV_WRITABLE) { + if (p->context != NULL && (events & UV_WRITABLE)) { redisAsyncHandleWrite(p->context); } } @@ -83,6 +83,7 @@ static void on_close(uv_handle_t* handle) { static void redisLibuvCleanup(void *privdata) { redisLibuvEvents* p = (redisLibuvEvents*)privdata; + p->context = NULL; // indicate that context might no longer exist uv_close((uv_handle_t*)&p->handle, on_close); } diff --git a/deps/hiredis/adapters/macosx.h b/deps/hiredis/adapters/macosx.h new file mode 100644 index 000000000..72121f606 --- /dev/null +++ b/deps/hiredis/adapters/macosx.h @@ -0,0 +1,114 @@ +// +// Created by Дмитрий Бахвалов on 13.07.15. +// Copyright (c) 2015 Dmitry Bakhvalov. All rights reserved. +// + +#ifndef __HIREDIS_MACOSX_H__ +#define __HIREDIS_MACOSX_H__ + +#include + +#include "../hiredis.h" +#include "../async.h" + +typedef struct { + redisAsyncContext *context; + CFSocketRef socketRef; + CFRunLoopSourceRef sourceRef; +} RedisRunLoop; + +static int freeRedisRunLoop(RedisRunLoop* redisRunLoop) { + if( redisRunLoop != NULL ) { + if( redisRunLoop->sourceRef != NULL ) { + CFRunLoopSourceInvalidate(redisRunLoop->sourceRef); + CFRelease(redisRunLoop->sourceRef); + } + if( redisRunLoop->socketRef != NULL ) { + CFSocketInvalidate(redisRunLoop->socketRef); + CFRelease(redisRunLoop->socketRef); + } + free(redisRunLoop); + } + return REDIS_ERR; +} + +static void redisMacOSAddRead(void *privdata) { + RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata; + CFSocketEnableCallBacks(redisRunLoop->socketRef, kCFSocketReadCallBack); +} + +static void redisMacOSDelRead(void *privdata) { + RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata; + CFSocketDisableCallBacks(redisRunLoop->socketRef, kCFSocketReadCallBack); +} + +static void redisMacOSAddWrite(void *privdata) { + RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata; + CFSocketEnableCallBacks(redisRunLoop->socketRef, kCFSocketWriteCallBack); +} + +static void redisMacOSDelWrite(void *privdata) { + RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata; + CFSocketDisableCallBacks(redisRunLoop->socketRef, kCFSocketWriteCallBack); +} + +static void redisMacOSCleanup(void *privdata) { + RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata; + freeRedisRunLoop(redisRunLoop); +} + +static void redisMacOSAsyncCallback(CFSocketRef __unused s, CFSocketCallBackType callbackType, CFDataRef __unused address, const void __unused *data, void *info) { + redisAsyncContext* context = (redisAsyncContext*) info; + + switch (callbackType) { + case kCFSocketReadCallBack: + redisAsyncHandleRead(context); + break; + + case kCFSocketWriteCallBack: + redisAsyncHandleWrite(context); + break; + + default: + break; + } +} + +static int redisMacOSAttach(redisAsyncContext *redisAsyncCtx, CFRunLoopRef runLoop) { + redisContext *redisCtx = &(redisAsyncCtx->c); + + /* Nothing should be attached when something is already attached */ + if( redisAsyncCtx->ev.data != NULL ) return REDIS_ERR; + + RedisRunLoop* redisRunLoop = (RedisRunLoop*) calloc(1, sizeof(RedisRunLoop)); + if( !redisRunLoop ) return REDIS_ERR; + + /* Setup redis stuff */ + redisRunLoop->context = redisAsyncCtx; + + redisAsyncCtx->ev.addRead = redisMacOSAddRead; + redisAsyncCtx->ev.delRead = redisMacOSDelRead; + redisAsyncCtx->ev.addWrite = redisMacOSAddWrite; + redisAsyncCtx->ev.delWrite = redisMacOSDelWrite; + redisAsyncCtx->ev.cleanup = redisMacOSCleanup; + redisAsyncCtx->ev.data = redisRunLoop; + + /* Initialize and install read/write events */ + CFSocketContext socketCtx = { 0, redisAsyncCtx, NULL, NULL, NULL }; + + redisRunLoop->socketRef = CFSocketCreateWithNative(NULL, redisCtx->fd, + kCFSocketReadCallBack | kCFSocketWriteCallBack, + redisMacOSAsyncCallback, + &socketCtx); + if( !redisRunLoop->socketRef ) return freeRedisRunLoop(redisRunLoop); + + redisRunLoop->sourceRef = CFSocketCreateRunLoopSource(NULL, redisRunLoop->socketRef, 0); + if( !redisRunLoop->sourceRef ) return freeRedisRunLoop(redisRunLoop); + + CFRunLoopAddSource(runLoop, redisRunLoop->sourceRef, kCFRunLoopDefaultMode); + + return REDIS_OK; +} + +#endif + diff --git a/deps/hiredis/adapters/qt.h b/deps/hiredis/adapters/qt.h new file mode 100644 index 000000000..5cc02e6ce --- /dev/null +++ b/deps/hiredis/adapters/qt.h @@ -0,0 +1,135 @@ +/*- + * Copyright (C) 2014 Pietro Cerutti + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __HIREDIS_QT_H__ +#define __HIREDIS_QT_H__ +#include +#include "../async.h" + +static void RedisQtAddRead(void *); +static void RedisQtDelRead(void *); +static void RedisQtAddWrite(void *); +static void RedisQtDelWrite(void *); +static void RedisQtCleanup(void *); + +class RedisQtAdapter : public QObject { + + Q_OBJECT + + friend + void RedisQtAddRead(void * adapter) { + RedisQtAdapter * a = static_cast(adapter); + a->addRead(); + } + + friend + void RedisQtDelRead(void * adapter) { + RedisQtAdapter * a = static_cast(adapter); + a->delRead(); + } + + friend + void RedisQtAddWrite(void * adapter) { + RedisQtAdapter * a = static_cast(adapter); + a->addWrite(); + } + + friend + void RedisQtDelWrite(void * adapter) { + RedisQtAdapter * a = static_cast(adapter); + a->delWrite(); + } + + friend + void RedisQtCleanup(void * adapter) { + RedisQtAdapter * a = static_cast(adapter); + a->cleanup(); + } + + public: + RedisQtAdapter(QObject * parent = 0) + : QObject(parent), m_ctx(0), m_read(0), m_write(0) { } + + ~RedisQtAdapter() { + if (m_ctx != 0) { + m_ctx->ev.data = NULL; + } + } + + int setContext(redisAsyncContext * ac) { + if (ac->ev.data != NULL) { + return REDIS_ERR; + } + m_ctx = ac; + m_ctx->ev.data = this; + m_ctx->ev.addRead = RedisQtAddRead; + m_ctx->ev.delRead = RedisQtDelRead; + m_ctx->ev.addWrite = RedisQtAddWrite; + m_ctx->ev.delWrite = RedisQtDelWrite; + m_ctx->ev.cleanup = RedisQtCleanup; + return REDIS_OK; + } + + private: + void addRead() { + if (m_read) return; + m_read = new QSocketNotifier(m_ctx->c.fd, QSocketNotifier::Read, 0); + connect(m_read, SIGNAL(activated(int)), this, SLOT(read())); + } + + void delRead() { + if (!m_read) return; + delete m_read; + m_read = 0; + } + + void addWrite() { + if (m_write) return; + m_write = new QSocketNotifier(m_ctx->c.fd, QSocketNotifier::Write, 0); + connect(m_write, SIGNAL(activated(int)), this, SLOT(write())); + } + + void delWrite() { + if (!m_write) return; + delete m_write; + m_write = 0; + } + + void cleanup() { + delRead(); + delWrite(); + } + + private slots: + void read() { redisAsyncHandleRead(m_ctx); } + void write() { redisAsyncHandleWrite(m_ctx); } + + private: + redisAsyncContext * m_ctx; + QSocketNotifier * m_read; + QSocketNotifier * m_write; +}; + +#endif /* !__HIREDIS_QT_H__ */ diff --git a/deps/hiredis/appveyor.yml b/deps/hiredis/appveyor.yml new file mode 100644 index 000000000..06bbef117 --- /dev/null +++ b/deps/hiredis/appveyor.yml @@ -0,0 +1,36 @@ +# Appveyor configuration file for CI build of hiredis on Windows (under Cygwin) +environment: + matrix: + - CYG_ROOT: C:\cygwin64 + CYG_SETUP: setup-x86_64.exe + CYG_MIRROR: http://cygwin.mirror.constant.com + CYG_CACHE: C:\cygwin64\var\cache\setup + CYG_BASH: C:\cygwin64\bin\bash + CC: gcc + - CYG_ROOT: C:\cygwin + CYG_SETUP: setup-x86.exe + CYG_MIRROR: http://cygwin.mirror.constant.com + CYG_CACHE: C:\cygwin\var\cache\setup + CYG_BASH: C:\cygwin\bin\bash + CC: gcc + TARGET: 32bit + TARGET_VARS: 32bit-vars + +# Cache Cygwin files to speed up build +cache: + - '%CYG_CACHE%' +clone_depth: 1 + +# Attempt to ensure we don't try to convert line endings to Win32 CRLF as this will cause build to fail +init: + - git config --global core.autocrlf input + +# Install needed build dependencies +install: + - ps: 'Start-FileDownload "http://cygwin.com/$env:CYG_SETUP" -FileName "$env:CYG_SETUP"' + - '%CYG_SETUP% --quiet-mode --no-shortcuts --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" --packages automake,bison,gcc-core,libtool,make,gettext-devel,gettext,intltool,pkg-config,clang,llvm > NUL 2>&1' + - '%CYG_BASH% -lc "cygcheck -dc cygwin"' + +build_script: + - 'echo building...' + - '%CYG_BASH% -lc "cd $APPVEYOR_BUILD_FOLDER; exec 0c); ac->err = c->err; ac->errstr = c->errstr; @@ -173,6 +176,14 @@ redisAsyncContext *redisAsyncConnectBind(const char *ip, int port, return ac; } +redisAsyncContext *redisAsyncConnectBindWithReuse(const char *ip, int port, + const char *source_addr) { + redisContext *c = redisConnectBindNonBlockWithReuse(ip,port,source_addr); + redisAsyncContext *ac = redisAsyncInitialize(c); + __redisAsyncCopyError(ac); + return ac; +} + redisAsyncContext *redisAsyncConnectUnix(const char *path) { redisContext *c; redisAsyncContext *ac; @@ -407,7 +418,8 @@ void redisProcessCallbacks(redisAsyncContext *ac) { if (reply == NULL) { /* When the connection is being disconnected and there are * no more replies, this is the cue to really disconnect. */ - if (c->flags & REDIS_DISCONNECTING && sdslen(c->obuf) == 0) { + if (c->flags & REDIS_DISCONNECTING && sdslen(c->obuf) == 0 + && ac->replies.head == NULL) { __redisAsyncDisconnect(ac); return; } @@ -477,7 +489,7 @@ void redisProcessCallbacks(redisAsyncContext *ac) { } /* Internal helper function to detect socket status the first time a read or - * write event fires. When connecting was not succesful, the connect callback + * write event fires. When connecting was not successful, the connect callback * is called with a REDIS_ERR status and the context is free'd. */ static int __redisAsyncHandleConnect(redisAsyncContext *ac) { redisContext *c = &(ac->c); @@ -551,8 +563,8 @@ void redisAsyncHandleWrite(redisAsyncContext *ac) { /* Sets a pointer to the first argument and its length starting at p. Returns * the number of bytes to skip to get to the following argument. */ -static char *nextArgument(char *start, char **str, size_t *len) { - char *p = start; +static const char *nextArgument(const char *start, const char **str, size_t *len) { + const char *p = start; if (p[0] != '$') { p = strchr(p,'$'); if (p == NULL) return NULL; @@ -568,14 +580,15 @@ static char *nextArgument(char *start, char **str, size_t *len) { /* Helper function for the redisAsyncCommand* family of functions. Writes a * formatted command to the output buffer and registers the provided callback * function with the context. */ -static int __redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, char *cmd, size_t len) { +static int __redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *cmd, size_t len) { redisContext *c = &(ac->c); redisCallback cb; int pvariant, hasnext; - char *cstr, *astr; + const char *cstr, *astr; size_t clen, alen; - char *p; + const char *p; sds sname; + int ret; /* Don't accept new commands when the connection is about to be closed. */ if (c->flags & (REDIS_DISCONNECTING | REDIS_FREEING)) return REDIS_ERR; @@ -599,9 +612,11 @@ static int __redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void while ((p = nextArgument(p,&astr,&alen)) != NULL) { sname = sdsnewlen(astr,alen); if (pvariant) - dictReplace(ac->sub.patterns,sname,&cb); + ret = dictReplace(ac->sub.patterns,sname,&cb); else - dictReplace(ac->sub.channels,sname,&cb); + ret = dictReplace(ac->sub.channels,sname,&cb); + + if (ret == 0) sdsfree(sname); } } else if (strncasecmp(cstr,"unsubscribe\r\n",13) == 0) { /* It is only useful to call (P)UNSUBSCRIBE when the context is @@ -637,6 +652,11 @@ int redisvAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdat int len; int status; len = redisvFormatCommand(&cmd,format,ap); + + /* We don't want to pass -1 or -2 to future functions as a length. */ + if (len < 0) + return REDIS_ERR; + status = __redisAsyncCommand(ac,fn,privdata,cmd,len); free(cmd); return status; @@ -652,11 +672,16 @@ int redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata } int redisAsyncCommandArgv(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, int argc, const char **argv, const size_t *argvlen) { - char *cmd; + sds cmd; int len; int status; - len = redisFormatCommandArgv(&cmd,argc,argv,argvlen); + len = redisFormatSdsCommandArgv(&cmd,argc,argv,argvlen); status = __redisAsyncCommand(ac,fn,privdata,cmd,len); - free(cmd); + sdsfree(cmd); + return status; +} + +int redisAsyncFormattedCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *cmd, size_t len) { + int status = __redisAsyncCommand(ac,fn,privdata,cmd,len); return status; } diff --git a/deps/hiredis/async.h b/deps/hiredis/async.h index 8a2cf1ecd..59cbf469b 100644 --- a/deps/hiredis/async.h +++ b/deps/hiredis/async.h @@ -103,6 +103,8 @@ typedef struct redisAsyncContext { /* Functions that proxy to hiredis */ redisAsyncContext *redisAsyncConnect(const char *ip, int port); redisAsyncContext *redisAsyncConnectBind(const char *ip, int port, const char *source_addr); +redisAsyncContext *redisAsyncConnectBindWithReuse(const char *ip, int port, + const char *source_addr); redisAsyncContext *redisAsyncConnectUnix(const char *path); int redisAsyncSetConnectCallback(redisAsyncContext *ac, redisConnectCallback *fn); int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn); @@ -118,6 +120,7 @@ void redisAsyncHandleWrite(redisAsyncContext *ac); int redisvAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *format, va_list ap); int redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *format, ...); int redisAsyncCommandArgv(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, int argc, const char **argv, const size_t *argvlen); +int redisAsyncFormattedCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *cmd, size_t len); #ifdef __cplusplus } diff --git a/deps/hiredis/dict.c b/deps/hiredis/dict.c index 79b1041ca..e17a62546 100644 --- a/deps/hiredis/dict.c +++ b/deps/hiredis/dict.c @@ -161,7 +161,7 @@ static int dictReplace(dict *ht, void *key, void *val) { dictEntry *entry, auxentry; /* Try to add the element. If the key - * does not exists dictAdd will suceed. */ + * does not exists dictAdd will succeed. */ if (dictAdd(ht, key, val) == DICT_OK) return 1; /* It already exists, get the entry */ @@ -293,7 +293,7 @@ static void dictReleaseIterator(dictIterator *iter) { /* Expand the hash table if needed */ static int _dictExpandIfNeeded(dict *ht) { - /* If the hash table is empty expand it to the intial size, + /* If the hash table is empty expand it to the initial size, * if the table is "full" dobule its size. */ if (ht->size == 0) return dictExpand(ht, DICT_HT_INITIAL_SIZE); diff --git a/deps/hiredis/examples/example-glib.c b/deps/hiredis/examples/example-glib.c new file mode 100644 index 000000000..d6e10f8e8 --- /dev/null +++ b/deps/hiredis/examples/example-glib.c @@ -0,0 +1,73 @@ +#include + +#include +#include +#include + +static GMainLoop *mainloop; + +static void +connect_cb (const redisAsyncContext *ac G_GNUC_UNUSED, + int status) +{ + if (status != REDIS_OK) { + g_printerr("Failed to connect: %s\n", ac->errstr); + g_main_loop_quit(mainloop); + } else { + g_printerr("Connected...\n"); + } +} + +static void +disconnect_cb (const redisAsyncContext *ac G_GNUC_UNUSED, + int status) +{ + if (status != REDIS_OK) { + g_error("Failed to disconnect: %s", ac->errstr); + } else { + g_printerr("Disconnected...\n"); + g_main_loop_quit(mainloop); + } +} + +static void +command_cb(redisAsyncContext *ac, + gpointer r, + gpointer user_data G_GNUC_UNUSED) +{ + redisReply *reply = r; + + if (reply) { + g_print("REPLY: %s\n", reply->str); + } + + redisAsyncDisconnect(ac); +} + +gint +main (gint argc G_GNUC_UNUSED, + gchar *argv[] G_GNUC_UNUSED) +{ + redisAsyncContext *ac; + GMainContext *context = NULL; + GSource *source; + + ac = redisAsyncConnect("127.0.0.1", 6379); + if (ac->err) { + g_printerr("%s\n", ac->errstr); + exit(EXIT_FAILURE); + } + + source = redis_source_new(ac); + mainloop = g_main_loop_new(context, FALSE); + g_source_attach(source, context); + + redisAsyncSetConnectCallback(ac, connect_cb); + redisAsyncSetDisconnectCallback(ac, disconnect_cb); + redisAsyncCommand(ac, command_cb, NULL, "SET key 1234"); + redisAsyncCommand(ac, command_cb, NULL, "GET key"); + + g_main_loop_run(mainloop); + + return EXIT_SUCCESS; +} diff --git a/deps/hiredis/examples/example-ivykis.c b/deps/hiredis/examples/example-ivykis.c new file mode 100644 index 000000000..67affcef3 --- /dev/null +++ b/deps/hiredis/examples/example-ivykis.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include + +#include +#include +#include + +void getCallback(redisAsyncContext *c, void *r, void *privdata) { + redisReply *reply = r; + if (reply == NULL) return; + printf("argv[%s]: %s\n", (char*)privdata, reply->str); + + /* Disconnect after receiving the reply to GET */ + redisAsyncDisconnect(c); +} + +void connectCallback(const redisAsyncContext *c, int status) { + if (status != REDIS_OK) { + printf("Error: %s\n", c->errstr); + return; + } + printf("Connected...\n"); +} + +void disconnectCallback(const redisAsyncContext *c, int status) { + if (status != REDIS_OK) { + printf("Error: %s\n", c->errstr); + return; + } + printf("Disconnected...\n"); +} + +int main (int argc, char **argv) { + signal(SIGPIPE, SIG_IGN); + + iv_init(); + + redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379); + if (c->err) { + /* Let *c leak for now... */ + printf("Error: %s\n", c->errstr); + return 1; + } + + redisIvykisAttach(c); + redisAsyncSetConnectCallback(c,connectCallback); + redisAsyncSetDisconnectCallback(c,disconnectCallback); + redisAsyncCommand(c, NULL, NULL, "SET key %b", argv[argc-1], strlen(argv[argc-1])); + redisAsyncCommand(c, getCallback, (char*)"end-1", "GET key"); + + iv_main(); + + iv_deinit(); + + return 0; +} diff --git a/deps/hiredis/examples/example-macosx.c b/deps/hiredis/examples/example-macosx.c new file mode 100644 index 000000000..bc84ed5ba --- /dev/null +++ b/deps/hiredis/examples/example-macosx.c @@ -0,0 +1,66 @@ +// +// Created by Дмитрий Бахвалов on 13.07.15. +// Copyright (c) 2015 Dmitry Bakhvalov. All rights reserved. +// + +#include + +#include +#include +#include + +void getCallback(redisAsyncContext *c, void *r, void *privdata) { + redisReply *reply = r; + if (reply == NULL) return; + printf("argv[%s]: %s\n", (char*)privdata, reply->str); + + /* Disconnect after receiving the reply to GET */ + redisAsyncDisconnect(c); +} + +void connectCallback(const redisAsyncContext *c, int status) { + if (status != REDIS_OK) { + printf("Error: %s\n", c->errstr); + return; + } + printf("Connected...\n"); +} + +void disconnectCallback(const redisAsyncContext *c, int status) { + if (status != REDIS_OK) { + printf("Error: %s\n", c->errstr); + return; + } + CFRunLoopStop(CFRunLoopGetCurrent()); + printf("Disconnected...\n"); +} + +int main (int argc, char **argv) { + signal(SIGPIPE, SIG_IGN); + + CFRunLoopRef loop = CFRunLoopGetCurrent(); + if( !loop ) { + printf("Error: Cannot get current run loop\n"); + return 1; + } + + redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379); + if (c->err) { + /* Let *c leak for now... */ + printf("Error: %s\n", c->errstr); + return 1; + } + + redisMacOSAttach(c, loop); + + redisAsyncSetConnectCallback(c,connectCallback); + redisAsyncSetDisconnectCallback(c,disconnectCallback); + + redisAsyncCommand(c, NULL, NULL, "SET key %b", argv[argc-1], strlen(argv[argc-1])); + redisAsyncCommand(c, getCallback, (char*)"end-1", "GET key"); + + CFRunLoopRun(); + + return 0; +} + diff --git a/deps/hiredis/examples/example-qt.cpp b/deps/hiredis/examples/example-qt.cpp new file mode 100644 index 000000000..f524c3f3d --- /dev/null +++ b/deps/hiredis/examples/example-qt.cpp @@ -0,0 +1,46 @@ +#include +using namespace std; + +#include +#include + +#include "example-qt.h" + +void getCallback(redisAsyncContext *, void * r, void * privdata) { + + redisReply * reply = static_cast(r); + ExampleQt * ex = static_cast(privdata); + if (reply == nullptr || ex == nullptr) return; + + cout << "key: " << reply->str << endl; + + ex->finish(); +} + +void ExampleQt::run() { + + m_ctx = redisAsyncConnect("localhost", 6379); + + if (m_ctx->err) { + cerr << "Error: " << m_ctx->errstr << endl; + redisAsyncFree(m_ctx); + emit finished(); + } + + m_adapter.setContext(m_ctx); + + redisAsyncCommand(m_ctx, NULL, NULL, "SET key %s", m_value); + redisAsyncCommand(m_ctx, getCallback, this, "GET key"); +} + +int main (int argc, char **argv) { + + QCoreApplication app(argc, argv); + + ExampleQt example(argv[argc-1]); + + QObject::connect(&example, SIGNAL(finished()), &app, SLOT(quit())); + QTimer::singleShot(0, &example, SLOT(run())); + + return app.exec(); +} diff --git a/deps/hiredis/examples/example-qt.h b/deps/hiredis/examples/example-qt.h new file mode 100644 index 000000000..374f47666 --- /dev/null +++ b/deps/hiredis/examples/example-qt.h @@ -0,0 +1,32 @@ +#ifndef __HIREDIS_EXAMPLE_QT_H +#define __HIREDIS_EXAMPLE_QT_H + +#include + +class ExampleQt : public QObject { + + Q_OBJECT + + public: + ExampleQt(const char * value, QObject * parent = 0) + : QObject(parent), m_value(value) {} + + signals: + void finished(); + + public slots: + void run(); + + private: + void finish() { emit finished(); } + + private: + const char * m_value; + redisAsyncContext * m_ctx; + RedisQtAdapter m_adapter; + + friend + void getCallback(redisAsyncContext *, void *, void *); +}; + +#endif /* !__HIREDIS_EXAMPLE_QT_H */ diff --git a/deps/hiredis/examples/example.c b/deps/hiredis/examples/example.c index 25226a807..4d494c55a 100644 --- a/deps/hiredis/examples/example.c +++ b/deps/hiredis/examples/example.c @@ -57,7 +57,7 @@ int main(int argc, char **argv) { for (j = 0; j < 10; j++) { char buf[64]; - snprintf(buf,64,"%d",j); + snprintf(buf,64,"%u",j); reply = redisCommand(c,"LPUSH mylist element-%s", buf); freeReplyObject(reply); } diff --git a/deps/hiredis/fmacros.h b/deps/hiredis/fmacros.h index 6a41aa176..14fed6060 100644 --- a/deps/hiredis/fmacros.h +++ b/deps/hiredis/fmacros.h @@ -1,23 +1,24 @@ #ifndef __HIREDIS_FMACRO_H #define __HIREDIS_FMACRO_H -#if !defined(_BSD_SOURCE) +#if defined(__linux__) #define _BSD_SOURCE +#define _DEFAULT_SOURCE #endif -#if defined(_AIX) -#define _ALL_SOURCE +#if defined(__CYGWIN__) +#include #endif #if defined(__sun__) #define _POSIX_C_SOURCE 200112L -#elif defined(__linux__) || defined(__OpenBSD__) || defined(__NetBSD__) -#define _XOPEN_SOURCE 600 #else -#define _XOPEN_SOURCE +#if !(defined(__APPLE__) && defined(__MACH__)) +#define _XOPEN_SOURCE 600 +#endif #endif -#if __APPLE__ && __MACH__ +#if defined(__APPLE__) && defined(__MACH__) #define _OSX #endif diff --git a/deps/hiredis/hiredis.c b/deps/hiredis/hiredis.c index 2afee5666..18bdfc99c 100644 --- a/deps/hiredis/hiredis.c +++ b/deps/hiredis/hiredis.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2009-2011, Salvatore Sanfilippo - * Copyright (c) 2010-2011, Pieter Noordhuis + * Copyright (c) 2010-2014, Pieter Noordhuis + * Copyright (c) 2015, Matt Stancliff , + * Jan-Erik Rediger * * All rights reserved. * @@ -73,6 +75,9 @@ void freeReplyObject(void *reply) { redisReply *r = reply; size_t j; + if (r == NULL) + return; + switch(r->type) { case REDIS_REPLY_INTEGER: break; /* Nothing to free */ @@ -183,504 +188,23 @@ static void *createNilObject(const redisReadTask *task) { return r; } -static void __redisReaderSetError(redisReader *r, int type, const char *str) { - size_t len; - - if (r->reply != NULL && r->fn && r->fn->freeObject) { - r->fn->freeObject(r->reply); - r->reply = NULL; - } - - /* Clear input buffer on errors. */ - if (r->buf != NULL) { - sdsfree(r->buf); - r->buf = NULL; - r->pos = r->len = 0; - } - - /* Reset task stack. */ - r->ridx = -1; - - /* Set error. */ - r->err = type; - len = strlen(str); - len = len < (sizeof(r->errstr)-1) ? len : (sizeof(r->errstr)-1); - memcpy(r->errstr,str,len); - r->errstr[len] = '\0'; -} - -static size_t chrtos(char *buf, size_t size, char byte) { - size_t len = 0; - - switch(byte) { - case '\\': - case '"': - len = snprintf(buf,size,"\"\\%c\"",byte); - break; - case '\n': len = snprintf(buf,size,"\"\\n\""); break; - case '\r': len = snprintf(buf,size,"\"\\r\""); break; - case '\t': len = snprintf(buf,size,"\"\\t\""); break; - case '\a': len = snprintf(buf,size,"\"\\a\""); break; - case '\b': len = snprintf(buf,size,"\"\\b\""); break; - default: - if (isprint(byte)) - len = snprintf(buf,size,"\"%c\"",byte); - else - len = snprintf(buf,size,"\"\\x%02x\"",(unsigned char)byte); - break; - } - - return len; -} - -static void __redisReaderSetErrorProtocolByte(redisReader *r, char byte) { - char cbuf[8], sbuf[128]; - - chrtos(cbuf,sizeof(cbuf),byte); - snprintf(sbuf,sizeof(sbuf), - "Protocol error, got %s as reply type byte", cbuf); - __redisReaderSetError(r,REDIS_ERR_PROTOCOL,sbuf); -} - -static void __redisReaderSetErrorOOM(redisReader *r) { - __redisReaderSetError(r,REDIS_ERR_OOM,"Out of memory"); -} - -static char *readBytes(redisReader *r, unsigned int bytes) { - char *p; - if (r->len-r->pos >= bytes) { - p = r->buf+r->pos; - r->pos += bytes; - return p; - } - return NULL; -} - -/* Find pointer to \r\n. */ -static char *seekNewline(char *s, size_t len) { - int pos = 0; - int _len = len-1; - - /* Position should be < len-1 because the character at "pos" should be - * followed by a \n. Note that strchr cannot be used because it doesn't - * allow to search a limited length and the buffer that is being searched - * might not have a trailing NULL character. */ - while (pos < _len) { - while(pos < _len && s[pos] != '\r') pos++; - if (s[pos] != '\r') { - /* Not found. */ - return NULL; - } else { - if (s[pos+1] == '\n') { - /* Found. */ - return s+pos; - } else { - /* Continue searching. */ - pos++; - } - } - } - return NULL; -} - -/* Read a long long value starting at *s, under the assumption that it will be - * terminated by \r\n. Ambiguously returns -1 for unexpected input. */ -static long long readLongLong(char *s) { - long long v = 0; - int dec, mult = 1; - char c; - - if (*s == '-') { - mult = -1; - s++; - } else if (*s == '+') { - mult = 1; - s++; - } - - while ((c = *(s++)) != '\r') { - dec = c - '0'; - if (dec >= 0 && dec < 10) { - v *= 10; - v += dec; - } else { - /* Should not happen... */ - return -1; - } - } - - return mult*v; -} - -static char *readLine(redisReader *r, int *_len) { - char *p, *s; - int len; - - p = r->buf+r->pos; - s = seekNewline(p,(r->len-r->pos)); - if (s != NULL) { - len = s-(r->buf+r->pos); - r->pos += len+2; /* skip \r\n */ - if (_len) *_len = len; - return p; - } - return NULL; -} - -static void moveToNextTask(redisReader *r) { - redisReadTask *cur, *prv; - while (r->ridx >= 0) { - /* Return a.s.a.p. when the stack is now empty. */ - if (r->ridx == 0) { - r->ridx--; - return; - } - - cur = &(r->rstack[r->ridx]); - prv = &(r->rstack[r->ridx-1]); - assert(prv->type == REDIS_REPLY_ARRAY); - if (cur->idx == prv->elements-1) { - r->ridx--; - } else { - /* Reset the type because the next item can be anything */ - assert(cur->idx < prv->elements); - cur->type = -1; - cur->elements = -1; - cur->idx++; - return; - } - } -} - -static int processLineItem(redisReader *r) { - redisReadTask *cur = &(r->rstack[r->ridx]); - void *obj; - char *p; - int len; - - if ((p = readLine(r,&len)) != NULL) { - if (cur->type == REDIS_REPLY_INTEGER) { - if (r->fn && r->fn->createInteger) - obj = r->fn->createInteger(cur,readLongLong(p)); - else - obj = (void*)REDIS_REPLY_INTEGER; - } else { - /* Type will be error or status. */ - if (r->fn && r->fn->createString) - obj = r->fn->createString(cur,p,len); - else - obj = (void*)(size_t)(cur->type); - } - - if (obj == NULL) { - __redisReaderSetErrorOOM(r); - return REDIS_ERR; - } - - /* Set reply if this is the root object. */ - if (r->ridx == 0) r->reply = obj; - moveToNextTask(r); - return REDIS_OK; - } - - return REDIS_ERR; -} - -static int processBulkItem(redisReader *r) { - redisReadTask *cur = &(r->rstack[r->ridx]); - void *obj = NULL; - char *p, *s; - long len; - unsigned long bytelen; - int success = 0; - - p = r->buf+r->pos; - s = seekNewline(p,r->len-r->pos); - if (s != NULL) { - p = r->buf+r->pos; - bytelen = s-(r->buf+r->pos)+2; /* include \r\n */ - len = readLongLong(p); - - if (len < 0) { - /* The nil object can always be created. */ - if (r->fn && r->fn->createNil) - obj = r->fn->createNil(cur); - else - obj = (void*)REDIS_REPLY_NIL; - success = 1; - } else { - /* Only continue when the buffer contains the entire bulk item. */ - bytelen += len+2; /* include \r\n */ - if (r->pos+bytelen <= r->len) { - if (r->fn && r->fn->createString) - obj = r->fn->createString(cur,s+2,len); - else - obj = (void*)REDIS_REPLY_STRING; - success = 1; - } - } - - /* Proceed when obj was created. */ - if (success) { - if (obj == NULL) { - __redisReaderSetErrorOOM(r); - return REDIS_ERR; - } - - r->pos += bytelen; - - /* Set reply if this is the root object. */ - if (r->ridx == 0) r->reply = obj; - moveToNextTask(r); - return REDIS_OK; - } - } - - return REDIS_ERR; -} - -static int processMultiBulkItem(redisReader *r) { - redisReadTask *cur = &(r->rstack[r->ridx]); - void *obj; - char *p; - long elements; - int root = 0; - - /* Set error for nested multi bulks with depth > 7 */ - if (r->ridx == 8) { - __redisReaderSetError(r,REDIS_ERR_PROTOCOL, - "No support for nested multi bulk replies with depth > 7"); - return REDIS_ERR; - } - - if ((p = readLine(r,NULL)) != NULL) { - elements = readLongLong(p); - root = (r->ridx == 0); - - if (elements == -1) { - if (r->fn && r->fn->createNil) - obj = r->fn->createNil(cur); - else - obj = (void*)REDIS_REPLY_NIL; - - if (obj == NULL) { - __redisReaderSetErrorOOM(r); - return REDIS_ERR; - } - - moveToNextTask(r); - } else { - if (r->fn && r->fn->createArray) - obj = r->fn->createArray(cur,elements); - else - obj = (void*)REDIS_REPLY_ARRAY; - - if (obj == NULL) { - __redisReaderSetErrorOOM(r); - return REDIS_ERR; - } - - /* Modify task stack when there are more than 0 elements. */ - if (elements > 0) { - cur->elements = elements; - cur->obj = obj; - r->ridx++; - r->rstack[r->ridx].type = -1; - r->rstack[r->ridx].elements = -1; - r->rstack[r->ridx].idx = 0; - r->rstack[r->ridx].obj = NULL; - r->rstack[r->ridx].parent = cur; - r->rstack[r->ridx].privdata = r->privdata; - } else { - moveToNextTask(r); - } - } - - /* Set reply if this is the root object. */ - if (root) r->reply = obj; - return REDIS_OK; - } - - return REDIS_ERR; -} - -static int processItem(redisReader *r) { - redisReadTask *cur = &(r->rstack[r->ridx]); - char *p; - - /* check if we need to read type */ - if (cur->type < 0) { - if ((p = readBytes(r,1)) != NULL) { - switch (p[0]) { - case '-': - cur->type = REDIS_REPLY_ERROR; - break; - case '+': - cur->type = REDIS_REPLY_STATUS; - break; - case ':': - cur->type = REDIS_REPLY_INTEGER; - break; - case '$': - cur->type = REDIS_REPLY_STRING; - break; - case '*': - cur->type = REDIS_REPLY_ARRAY; - break; - default: - __redisReaderSetErrorProtocolByte(r,*p); - return REDIS_ERR; - } - } else { - /* could not consume 1 byte */ - return REDIS_ERR; - } - } - - /* process typed item */ - switch(cur->type) { - case REDIS_REPLY_ERROR: - case REDIS_REPLY_STATUS: - case REDIS_REPLY_INTEGER: - return processLineItem(r); - case REDIS_REPLY_STRING: - return processBulkItem(r); - case REDIS_REPLY_ARRAY: - return processMultiBulkItem(r); - default: - assert(NULL); - return REDIS_ERR; /* Avoid warning. */ - } -} - -redisReader *redisReaderCreate(void) { - redisReader *r; - - r = calloc(sizeof(redisReader),1); - if (r == NULL) - return NULL; - - r->err = 0; - r->errstr[0] = '\0'; - r->fn = &defaultFunctions; - r->buf = sdsempty(); - r->maxbuf = REDIS_READER_MAX_BUF; - if (r->buf == NULL) { - free(r); - return NULL; - } - - r->ridx = -1; - return r; -} - -void redisReaderFree(redisReader *r) { - if (r->reply != NULL && r->fn && r->fn->freeObject) - r->fn->freeObject(r->reply); - if (r->buf != NULL) - sdsfree(r->buf); - free(r); -} - -int redisReaderFeed(redisReader *r, const char *buf, size_t len) { - sds newbuf; - - /* Return early when this reader is in an erroneous state. */ - if (r->err) - return REDIS_ERR; - - /* Copy the provided buffer. */ - if (buf != NULL && len >= 1) { - /* Destroy internal buffer when it is empty and is quite large. */ - if (r->len == 0 && r->maxbuf != 0 && sdsavail(r->buf) > r->maxbuf) { - sdsfree(r->buf); - r->buf = sdsempty(); - r->pos = 0; - - /* r->buf should not be NULL since we just free'd a larger one. */ - assert(r->buf != NULL); - } - - newbuf = sdscatlen(r->buf,buf,len); - if (newbuf == NULL) { - __redisReaderSetErrorOOM(r); - return REDIS_ERR; - } - - r->buf = newbuf; - r->len = sdslen(r->buf); - } - - return REDIS_OK; -} - -int redisReaderGetReply(redisReader *r, void **reply) { - /* Default target pointer to NULL. */ - if (reply != NULL) - *reply = NULL; - - /* Return early when this reader is in an erroneous state. */ - if (r->err) - return REDIS_ERR; - - /* When the buffer is empty, there will never be a reply. */ - if (r->len == 0) - return REDIS_OK; - - /* Set first item to process when the stack is empty. */ - if (r->ridx == -1) { - r->rstack[0].type = -1; - r->rstack[0].elements = -1; - r->rstack[0].idx = -1; - r->rstack[0].obj = NULL; - r->rstack[0].parent = NULL; - r->rstack[0].privdata = r->privdata; - r->ridx = 0; - } - - /* Process items in reply. */ - while (r->ridx >= 0) - if (processItem(r) != REDIS_OK) - break; - - /* Return ASAP when an error occurred. */ - if (r->err) - return REDIS_ERR; - - /* Discard part of the buffer when we've consumed at least 1k, to avoid - * doing unnecessary calls to memmove() in sds.c. */ - if (r->pos >= 1024) { - sdsrange(r->buf,r->pos,-1); - r->pos = 0; - r->len = sdslen(r->buf); - } - - /* Emit a reply when there is one. */ - if (r->ridx == -1) { - if (reply != NULL) - *reply = r->reply; - r->reply = NULL; - } - return REDIS_OK; -} - -/* Calculate the number of bytes needed to represent an integer as string. */ -static int intlen(int i) { - int len = 0; - if (i < 0) { - len++; - i = -i; - } - do { - len++; - i /= 10; - } while(i); - return len; +/* Return the number of digits of 'v' when converted to string in radix 10. + * Implementation borrowed from link in redis/src/util.c:string2ll(). */ +static uint32_t countDigits(uint64_t v) { + uint32_t result = 1; + for (;;) { + if (v < 10) return result; + if (v < 100) return result + 1; + if (v < 1000) return result + 2; + if (v < 10000) return result + 3; + v /= 10000U; + result += 4; + } } /* Helper that calculates the bulk length given a certain string length. */ static size_t bulklen(size_t len) { - return 1+intlen(len)+2+len+2; + return 1+countDigits(len)+2+len+2; } int redisvFormatCommand(char **target, const char *format, va_list ap) { @@ -692,6 +216,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) { char **curargv = NULL, **newargv = NULL; int argc = 0; int totlen = 0; + int error_type = 0; /* 0 = no error; -1 = memory error; -2 = format error */ int j; /* Abort if there is not target to set */ @@ -708,19 +233,19 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) { if (*c == ' ') { if (touched) { newargv = realloc(curargv,sizeof(char*)*(argc+1)); - if (newargv == NULL) goto err; + if (newargv == NULL) goto memory_err; curargv = newargv; curargv[argc++] = curarg; totlen += bulklen(sdslen(curarg)); /* curarg is put in argv so it can be overwritten. */ curarg = sdsempty(); - if (curarg == NULL) goto err; + if (curarg == NULL) goto memory_err; touched = 0; } } else { newarg = sdscatlen(curarg,c,1); - if (newarg == NULL) goto err; + if (newarg == NULL) goto memory_err; curarg = newarg; touched = 1; } @@ -751,17 +276,14 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) { /* Try to detect printf format */ { static const char intfmts[] = "diouxX"; + static const char flags[] = "#0-+ "; char _format[16]; const char *_p = c+1; size_t _l = 0; va_list _cpy; /* Flags */ - if (*_p != '\0' && *_p == '#') _p++; - if (*_p != '\0' && *_p == '0') _p++; - if (*_p != '\0' && *_p == '-') _p++; - if (*_p != '\0' && *_p == ' ') _p++; - if (*_p != '\0' && *_p == '+') _p++; + while (*_p != '\0' && strchr(flags,*_p) != NULL) _p++; /* Field width */ while (*_p != '\0' && isdigit(*_p)) _p++; @@ -829,7 +351,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) { fmt_invalid: va_end(_cpy); - goto err; + goto format_err; fmt_valid: _l = (_p+1)-c; @@ -848,7 +370,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) { } } - if (newarg == NULL) goto err; + if (newarg == NULL) goto memory_err; curarg = newarg; touched = 1; @@ -860,7 +382,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) { /* Add the last argument if needed */ if (touched) { newargv = realloc(curargv,sizeof(char*)*(argc+1)); - if (newargv == NULL) goto err; + if (newargv == NULL) goto memory_err; curargv = newargv; curargv[argc++] = curarg; totlen += bulklen(sdslen(curarg)); @@ -872,11 +394,11 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) { curarg = NULL; /* Add bytes needed to hold multi bulk count */ - totlen += 1+intlen(argc)+2; + totlen += 1+countDigits(argc)+2; /* Build the command at protocol level */ cmd = malloc(totlen+1); - if (cmd == NULL) goto err; + if (cmd == NULL) goto memory_err; pos = sprintf(cmd,"*%d\r\n",argc); for (j = 0; j < argc; j++) { @@ -894,20 +416,29 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) { *target = cmd; return totlen; -err: - while(argc--) - sdsfree(curargv[argc]); - free(curargv); +format_err: + error_type = -2; + goto cleanup; - if (curarg != NULL) - sdsfree(curarg); +memory_err: + error_type = -1; + goto cleanup; + +cleanup: + if (curargv) { + while(argc--) + sdsfree(curargv[argc]); + free(curargv); + } + + sdsfree(curarg); /* No need to check cmd since it is the last statement that can fail, * but do it anyway to be as defensive as possible. */ if (cmd != NULL) free(cmd); - return -1; + return error_type; } /* Format a command according to the Redis protocol. This function @@ -928,9 +459,69 @@ int redisFormatCommand(char **target, const char *format, ...) { va_start(ap,format); len = redisvFormatCommand(target,format,ap); va_end(ap); + + /* The API says "-1" means bad result, but we now also return "-2" in some + * cases. Force the return value to always be -1. */ + if (len < 0) + len = -1; + return len; } +/* Format a command according to the Redis protocol using an sds string and + * sdscatfmt for the processing of arguments. This function takes the + * number of arguments, an array with arguments and an array with their + * lengths. If the latter is set to NULL, strlen will be used to compute the + * argument lengths. + */ +int redisFormatSdsCommandArgv(sds *target, int argc, const char **argv, + const size_t *argvlen) +{ + sds cmd; + unsigned long long totlen; + int j; + size_t len; + + /* Abort on a NULL target */ + if (target == NULL) + return -1; + + /* Calculate our total size */ + totlen = 1+countDigits(argc)+2; + for (j = 0; j < argc; j++) { + len = argvlen ? argvlen[j] : strlen(argv[j]); + totlen += bulklen(len); + } + + /* Use an SDS string for command construction */ + cmd = sdsempty(); + if (cmd == NULL) + return -1; + + /* We already know how much storage we need */ + cmd = sdsMakeRoomFor(cmd, totlen); + if (cmd == NULL) + return -1; + + /* Construct command */ + cmd = sdscatfmt(cmd, "*%i\r\n", argc); + for (j=0; j < argc; j++) { + len = argvlen ? argvlen[j] : strlen(argv[j]); + cmd = sdscatfmt(cmd, "$%u\r\n", len); + cmd = sdscatlen(cmd, argv[j], len); + cmd = sdscatlen(cmd, "\r\n", sizeof("\r\n")-1); + } + + assert(sdslen(cmd)==totlen); + + *target = cmd; + return totlen; +} + +void redisFreeSdsCommand(sds cmd) { + sdsfree(cmd); +} + /* Format a command according to the Redis protocol. This function takes the * number of arguments, an array with arguments and an array with their * lengths. If the latter is set to NULL, strlen will be used to compute the @@ -942,8 +533,12 @@ int redisFormatCommandArgv(char **target, int argc, const char **argv, const siz size_t len; int totlen, j; + /* Abort on a NULL target */ + if (target == NULL) + return -1; + /* Calculate number of bytes needed for the command */ - totlen = 1+intlen(argc)+2; + totlen = 1+countDigits(argc)+2; for (j = 0; j < argc; j++) { len = argvlen ? argvlen[j] : strlen(argv[j]); totlen += bulklen(len); @@ -970,6 +565,10 @@ int redisFormatCommandArgv(char **target, int argc, const char **argv, const siz return totlen; } +void redisFreeCommand(char *cmd) { + free(cmd); +} + void __redisSetError(redisContext *c, int type, const char *str) { size_t len; @@ -982,10 +581,14 @@ void __redisSetError(redisContext *c, int type, const char *str) { } else { /* Only REDIS_ERR_IO may lack a description! */ assert(type == REDIS_ERR_IO); - strerror_r(errno,c->errstr,sizeof(c->errstr)); + __redis_strerror_r(errno, c->errstr, sizeof(c->errstr)); } } +redisReader *redisReaderCreate(void) { + return redisReaderCreateWithFunctions(&defaultFunctions); +} + static redisContext *redisContextInit(void) { redisContext *c; @@ -997,24 +600,72 @@ static redisContext *redisContextInit(void) { c->errstr[0] = '\0'; c->obuf = sdsempty(); c->reader = redisReaderCreate(); + c->tcp.host = NULL; + c->tcp.source_addr = NULL; + c->unix_sock.path = NULL; + c->timeout = NULL; + + if (c->obuf == NULL || c->reader == NULL) { + redisFree(c); + return NULL; + } + return c; } void redisFree(redisContext *c) { + if (c == NULL) + return; if (c->fd > 0) close(c->fd); if (c->obuf != NULL) sdsfree(c->obuf); if (c->reader != NULL) redisReaderFree(c->reader); + if (c->tcp.host) + free(c->tcp.host); + if (c->tcp.source_addr) + free(c->tcp.source_addr); + if (c->unix_sock.path) + free(c->unix_sock.path); + if (c->timeout) + free(c->timeout); free(c); } int redisFreeKeepFd(redisContext *c) { - int fd = c->fd; - c->fd = -1; - redisFree(c); - return fd; + int fd = c->fd; + c->fd = -1; + redisFree(c); + return fd; +} + +int redisReconnect(redisContext *c) { + c->err = 0; + memset(c->errstr, '\0', strlen(c->errstr)); + + if (c->fd > 0) { + close(c->fd); + } + + sdsfree(c->obuf); + redisReaderFree(c->reader); + + c->obuf = sdsempty(); + c->reader = redisReaderCreate(); + + if (c->connection_type == REDIS_CONN_TCP) { + return redisContextConnectBindTcp(c, c->tcp.host, c->tcp.port, + c->timeout, c->tcp.source_addr); + } else if (c->connection_type == REDIS_CONN_UNIX) { + return redisContextConnectUnix(c, c->unix_sock.path, c->timeout); + } else { + /* Something bad happened here and shouldn't have. There isn't + enough information in the context to reconnect. */ + __redisSetError(c,REDIS_ERR_OTHER,"Not enough information to reconnect"); + } + + return REDIS_ERR; } /* Connect to a Redis instance. On error the field error in the returned @@ -1064,6 +715,15 @@ redisContext *redisConnectBindNonBlock(const char *ip, int port, return c; } +redisContext *redisConnectBindNonBlockWithReuse(const char *ip, int port, + const char *source_addr) { + redisContext *c = redisContextInit(); + c->flags &= ~REDIS_BLOCK; + c->flags |= REDIS_REUSEADDR; + redisContextConnectBindTcp(c,ip,port,NULL,source_addr); + return c; +} + redisContext *redisConnectUnix(const char *path) { redisContext *c; @@ -1162,10 +822,10 @@ int redisBufferRead(redisContext *c) { /* Write the output buffer to the socket. * * Returns REDIS_OK when the buffer is empty, or (a part of) the buffer was - * succesfully written to the socket. When the buffer is empty after the + * successfully written to the socket. When the buffer is empty after the * write operation, "done" is set to 1 (if given). * - * Returns REDIS_ERR if an error occured trying to write and sets + * Returns REDIS_ERR if an error occurred trying to write and sets * c->errstr to hold the appropriate error string. */ int redisBufferWrite(redisContext *c, int *done) { @@ -1274,6 +934,9 @@ int redisvAppendCommand(redisContext *c, const char *format, va_list ap) { if (len == -1) { __redisSetError(c,REDIS_ERR_OOM,"Out of memory"); return REDIS_ERR; + } else if (len == -2) { + __redisSetError(c,REDIS_ERR_OTHER,"Invalid format string"); + return REDIS_ERR; } if (__redisAppendCommand(c,cmd,len) != REDIS_OK) { @@ -1296,21 +959,21 @@ int redisAppendCommand(redisContext *c, const char *format, ...) { } int redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen) { - char *cmd; + sds cmd; int len; - len = redisFormatCommandArgv(&cmd,argc,argv,argvlen); + len = redisFormatSdsCommandArgv(&cmd,argc,argv,argvlen); if (len == -1) { __redisSetError(c,REDIS_ERR_OOM,"Out of memory"); return REDIS_ERR; } if (__redisAppendCommand(c,cmd,len) != REDIS_OK) { - free(cmd); + sdsfree(cmd); return REDIS_ERR; } - free(cmd); + sdsfree(cmd); return REDIS_OK; } @@ -1321,7 +984,7 @@ int redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const s * context is non-blocking, the "reply" pointer will not be used and the * command is simply appended to the write buffer. * - * Returns the reply when a reply was succesfully retrieved. Returns NULL + * Returns the reply when a reply was successfully retrieved. Returns NULL * otherwise. When NULL is returned in a blocking context, the error field * in the context will be set. */ diff --git a/deps/hiredis/hiredis.h b/deps/hiredis/hiredis.h index 7700f4b89..423d5e504 100644 --- a/deps/hiredis/hiredis.h +++ b/deps/hiredis/hiredis.h @@ -1,6 +1,8 @@ /* * Copyright (c) 2009-2011, Salvatore Sanfilippo - * Copyright (c) 2010-2011, Pieter Noordhuis + * Copyright (c) 2010-2014, Pieter Noordhuis + * Copyright (c) 2015, Matt Stancliff , + * Jan-Erik Rediger * * All rights reserved. * @@ -31,26 +33,16 @@ #ifndef __HIREDIS_H #define __HIREDIS_H -#include /* for size_t */ +#include "read.h" #include /* for va_list */ #include /* for struct timeval */ +#include /* uintXX_t, etc */ +#include "sds.h" /* for sds */ #define HIREDIS_MAJOR 0 -#define HIREDIS_MINOR 11 -#define HIREDIS_PATCH 0 - -#define REDIS_ERR -1 -#define REDIS_OK 0 - -/* When an error occurs, the err flag in a context is set to hold the type of - * error that occured. REDIS_ERR_IO means there was an I/O error and you - * should use the "errno" variable to find out what is wrong. - * For other values, the "errstr" field will hold a description. */ -#define REDIS_ERR_IO 1 /* Error in read or write */ -#define REDIS_ERR_EOF 3 /* End of file */ -#define REDIS_ERR_PROTOCOL 4 /* Protocol error */ -#define REDIS_ERR_OOM 5 /* Out of memory */ -#define REDIS_ERR_OTHER 2 /* Everything else... */ +#define HIREDIS_MINOR 13 +#define HIREDIS_PATCH 3 +#define HIREDIS_SONAME 0.13 /* Connection type can be blocking or non-blocking and is set in the * least significant bit of the flags field in redisContext. */ @@ -79,17 +71,39 @@ /* Flag that is set when monitor mode is active */ #define REDIS_MONITORING 0x40 -#define REDIS_REPLY_STRING 1 -#define REDIS_REPLY_ARRAY 2 -#define REDIS_REPLY_INTEGER 3 -#define REDIS_REPLY_NIL 4 -#define REDIS_REPLY_STATUS 5 -#define REDIS_REPLY_ERROR 6 - -#define REDIS_READER_MAX_BUF (1024*16) /* Default max unused reader buffer. */ +/* Flag that is set when we should set SO_REUSEADDR before calling bind() */ +#define REDIS_REUSEADDR 0x80 #define REDIS_KEEPALIVE_INTERVAL 15 /* seconds */ +/* number of times we retry to connect in the case of EADDRNOTAVAIL and + * SO_REUSEADDR is being used. */ +#define REDIS_CONNECT_RETRIES 10 + +/* strerror_r has two completely different prototypes and behaviors + * depending on system issues, so we need to operate on the error buffer + * differently depending on which strerror_r we're using. */ +#ifndef _GNU_SOURCE +/* "regular" POSIX strerror_r that does the right thing. */ +#define __redis_strerror_r(errno, buf, len) \ + do { \ + strerror_r((errno), (buf), (len)); \ + } while (0) +#else +/* "bad" GNU strerror_r we need to clean up after. */ +#define __redis_strerror_r(errno, buf, len) \ + do { \ + char *err_str = strerror_r((errno), (buf), (len)); \ + /* If return value _isn't_ the start of the buffer we passed in, \ + * then GNU strerror_r returned an internal static buffer and we \ + * need to copy the result into our private buffer. */ \ + if (err_str != (buf)) { \ + strncpy((buf), err_str, ((len) - 1)); \ + buf[(len)-1] = '\0'; \ + } \ + } while (0) +#endif + #ifdef __cplusplus extern "C" { #endif @@ -98,61 +112,13 @@ extern "C" { typedef struct redisReply { int type; /* REDIS_REPLY_* */ long long integer; /* The integer when type is REDIS_REPLY_INTEGER */ - int len; /* Length of string */ + size_t len; /* Length of string */ char *str; /* Used for both REDIS_REPLY_ERROR and REDIS_REPLY_STRING */ size_t elements; /* number of elements, for REDIS_REPLY_ARRAY */ struct redisReply **element; /* elements vector for REDIS_REPLY_ARRAY */ } redisReply; -typedef struct redisReadTask { - int type; - int elements; /* number of elements in multibulk container */ - int idx; /* index in parent (array) object */ - void *obj; /* holds user-generated value for a read task */ - struct redisReadTask *parent; /* parent task */ - void *privdata; /* user-settable arbitrary field */ -} redisReadTask; - -typedef struct redisReplyObjectFunctions { - void *(*createString)(const redisReadTask*, char*, size_t); - void *(*createArray)(const redisReadTask*, int); - void *(*createInteger)(const redisReadTask*, long long); - void *(*createNil)(const redisReadTask*); - void (*freeObject)(void*); -} redisReplyObjectFunctions; - -/* State for the protocol parser */ -typedef struct redisReader { - int err; /* Error flags, 0 when there is no error */ - char errstr[128]; /* String representation of error when applicable */ - - char *buf; /* Read buffer */ - size_t pos; /* Buffer cursor */ - size_t len; /* Buffer length */ - size_t maxbuf; /* Max length of unused buffer */ - - redisReadTask rstack[9]; - int ridx; /* Index of current read task */ - void *reply; /* Temporary reply pointer */ - - redisReplyObjectFunctions *fn; - void *privdata; -} redisReader; - -/* Public API for the protocol parser. */ redisReader *redisReaderCreate(void); -void redisReaderFree(redisReader *r); -int redisReaderFeed(redisReader *r, const char *buf, size_t len); -int redisReaderGetReply(redisReader *r, void **reply); - -/* Backwards compatibility, can be removed on big version bump. */ -#define redisReplyReaderCreate redisReaderCreate -#define redisReplyReaderFree redisReaderFree -#define redisReplyReaderFeed redisReaderFeed -#define redisReplyReaderGetReply redisReaderGetReply -#define redisReplyReaderSetPrivdata(_r, _p) (int)(((redisReader*)(_r))->privdata = (_p)) -#define redisReplyReaderGetObject(_r) (((redisReader*)(_r))->reply) -#define redisReplyReaderGetError(_r) (((redisReader*)(_r))->errstr) /* Function to free the reply objects hiredis returns by default. */ void freeReplyObject(void *reply); @@ -161,6 +127,14 @@ void freeReplyObject(void *reply); int redisvFormatCommand(char **target, const char *format, va_list ap); int redisFormatCommand(char **target, const char *format, ...); int redisFormatCommandArgv(char **target, int argc, const char **argv, const size_t *argvlen); +int redisFormatSdsCommandArgv(sds *target, int argc, const char ** argv, const size_t *argvlen); +void redisFreeCommand(char *cmd); +void redisFreeSdsCommand(sds cmd); + +enum redisConnectionType { + REDIS_CONN_TCP, + REDIS_CONN_UNIX +}; /* Context for a connection to Redis */ typedef struct redisContext { @@ -170,16 +144,45 @@ typedef struct redisContext { int flags; char *obuf; /* Write buffer */ redisReader *reader; /* Protocol reader */ + + enum redisConnectionType connection_type; + struct timeval *timeout; + + struct { + char *host; + char *source_addr; + int port; + } tcp; + + struct { + char *path; + } unix_sock; + } redisContext; redisContext *redisConnect(const char *ip, int port); redisContext *redisConnectWithTimeout(const char *ip, int port, const struct timeval tv); redisContext *redisConnectNonBlock(const char *ip, int port); -redisContext *redisConnectBindNonBlock(const char *ip, int port, const char *source_addr); +redisContext *redisConnectBindNonBlock(const char *ip, int port, + const char *source_addr); +redisContext *redisConnectBindNonBlockWithReuse(const char *ip, int port, + const char *source_addr); redisContext *redisConnectUnix(const char *path); redisContext *redisConnectUnixWithTimeout(const char *path, const struct timeval tv); redisContext *redisConnectUnixNonBlock(const char *path); redisContext *redisConnectFd(int fd); + +/** + * Reconnect the given context using the saved information. + * + * This re-uses the exact same connect options as in the initial connection. + * host, ip (or path), timeout and bind address are reused, + * flags are used unmodified from the existing context. + * + * Returns REDIS_OK on successful connect or REDIS_ERR otherwise. + */ +int redisReconnect(redisContext *c); + int redisSetTimeout(redisContext *c, const struct timeval tv); int redisEnableKeepAlive(redisContext *c); void redisFree(redisContext *c); diff --git a/deps/hiredis/net.c b/deps/hiredis/net.c index bdb84ceed..7d4120985 100644 --- a/deps/hiredis/net.c +++ b/deps/hiredis/net.c @@ -1,7 +1,9 @@ /* Extracted from anet.c to work properly with Hiredis error reporting. * - * Copyright (c) 2006-2011, Salvatore Sanfilippo - * Copyright (c) 2010-2011, Pieter Noordhuis + * Copyright (c) 2009-2011, Salvatore Sanfilippo + * Copyright (c) 2010-2014, Pieter Noordhuis + * Copyright (c) 2015, Matt Stancliff , + * Jan-Erik Rediger * * All rights reserved. * @@ -47,6 +49,7 @@ #include #include #include +#include #include "net.h" #include "sds.h" @@ -67,7 +70,7 @@ static void __redisSetErrorFromErrno(redisContext *c, int type, const char *pref if (prefix != NULL) len = snprintf(buf,sizeof(buf),"%s: ",prefix); - strerror_r(errno,buf+len,sizeof(buf)-len); + __redis_strerror_r(errno, (char *)(buf + len), sizeof(buf) - len); __redisSetError(c,type,buf); } @@ -138,7 +141,7 @@ int redisKeepAlive(redisContext *c, int interval) { return REDIS_ERR; } #else -#ifndef __sun +#if defined(__GLIBC__) && !defined(__FreeBSD_kernel__) val = interval; if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) { __redisSetError(c,REDIS_ERR_OTHER,strerror(errno)); @@ -175,19 +178,15 @@ static int redisSetTcpNoDelay(redisContext *c) { #define __MAX_MSEC (((LONG_MAX) - 999) / 1000) -static int redisContextWaitReady(redisContext *c, const struct timeval *timeout) { - struct pollfd wfd[1]; - long msec; - - msec = -1; - wfd[0].fd = c->fd; - wfd[0].events = POLLOUT; +static int redisContextTimeoutMsec(redisContext *c, long *result) +{ + const struct timeval *timeout = c->timeout; + long msec = -1; /* Only use timeout when not NULL. */ if (timeout != NULL) { if (timeout->tv_usec > 1000000 || timeout->tv_sec > __MAX_MSEC) { - __redisSetErrorFromErrno(c, REDIS_ERR_IO, NULL); - redisContextCloseFd(c); + *result = msec; return REDIS_ERR; } @@ -198,6 +197,16 @@ static int redisContextWaitReady(redisContext *c, const struct timeval *timeout) } } + *result = msec; + return REDIS_OK; +} + +static int redisContextWaitReady(redisContext *c, long msec) { + struct pollfd wfd[1]; + + wfd[0].fd = c->fd; + wfd[0].events = POLLOUT; + if (errno == EINPROGRESS) { int res; @@ -256,10 +265,57 @@ int redisContextSetTimeout(redisContext *c, const struct timeval tv) { static int _redisContextConnectTcp(redisContext *c, const char *addr, int port, const struct timeval *timeout, const char *source_addr) { - int s, rv; + int s, rv, n; char _port[6]; /* strlen("65535"); */ struct addrinfo hints, *servinfo, *bservinfo, *p, *b; int blocking = (c->flags & REDIS_BLOCK); + int reuseaddr = (c->flags & REDIS_REUSEADDR); + int reuses = 0; + long timeout_msec = -1; + + servinfo = NULL; + c->connection_type = REDIS_CONN_TCP; + c->tcp.port = port; + + /* We need to take possession of the passed parameters + * to make them reusable for a reconnect. + * We also carefully check we don't free data we already own, + * as in the case of the reconnect method. + * + * This is a bit ugly, but atleast it works and doesn't leak memory. + **/ + if (c->tcp.host != addr) { + if (c->tcp.host) + free(c->tcp.host); + + c->tcp.host = strdup(addr); + } + + if (timeout) { + if (c->timeout != timeout) { + if (c->timeout == NULL) + c->timeout = malloc(sizeof(struct timeval)); + + memcpy(c->timeout, timeout, sizeof(struct timeval)); + } + } else { + if (c->timeout) + free(c->timeout); + c->timeout = NULL; + } + + if (redisContextTimeoutMsec(c, &timeout_msec) != REDIS_OK) { + __redisSetError(c, REDIS_ERR_IO, "Invalid timeout specified"); + goto error; + } + + if (source_addr == NULL) { + free(c->tcp.source_addr); + c->tcp.source_addr = NULL; + } else if (c->tcp.source_addr != source_addr) { + free(c->tcp.source_addr); + c->tcp.source_addr = strdup(source_addr); + } snprintf(_port, 6, "%d", port); memset(&hints,0,sizeof(hints)); @@ -271,7 +327,7 @@ static int _redisContextConnectTcp(redisContext *c, const char *addr, int port, * as this would add latency to every connect. Otherwise a more sensible * route could be: Use IPv6 if both addresses are available and there is IPv6 * connectivity. */ - if ((rv = getaddrinfo(addr,_port,&hints,&servinfo)) != 0) { + if ((rv = getaddrinfo(c->tcp.host,_port,&hints,&servinfo)) != 0) { hints.ai_family = AF_INET6; if ((rv = getaddrinfo(addr,_port,&hints,&servinfo)) != 0) { __redisSetError(c,REDIS_ERR_OTHER,gai_strerror(rv)); @@ -279,21 +335,31 @@ static int _redisContextConnectTcp(redisContext *c, const char *addr, int port, } } for (p = servinfo; p != NULL; p = p->ai_next) { +addrretry: if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1) continue; c->fd = s; if (redisSetBlocking(c,0) != REDIS_OK) goto error; - if (source_addr) { + if (c->tcp.source_addr) { int bound = 0; /* Using getaddrinfo saves us from self-determining IPv4 vs IPv6 */ - if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0) { + if ((rv = getaddrinfo(c->tcp.source_addr, NULL, &hints, &bservinfo)) != 0) { char buf[128]; snprintf(buf,sizeof(buf),"Can't get addr: %s",gai_strerror(rv)); __redisSetError(c,REDIS_ERR_OTHER,buf); goto error; } + + if (reuseaddr) { + n = 1; + if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char*) &n, + sizeof(n)) < 0) { + goto error; + } + } + for (b = bservinfo; b != NULL; b = b->ai_next) { if (bind(s,b->ai_addr,b->ai_addrlen) != -1) { bound = 1; @@ -314,8 +380,15 @@ static int _redisContextConnectTcp(redisContext *c, const char *addr, int port, continue; } else if (errno == EINPROGRESS && !blocking) { /* This is ok. */ + } else if (errno == EADDRNOTAVAIL && reuseaddr) { + if (++reuses >= REDIS_CONNECT_RETRIES) { + goto error; + } else { + redisContextCloseFd(c); + goto addrretry; + } } else { - if (redisContextWaitReady(c,timeout) != REDIS_OK) + if (redisContextWaitReady(c,timeout_msec) != REDIS_OK) goto error; } } @@ -356,19 +429,40 @@ int redisContextConnectBindTcp(redisContext *c, const char *addr, int port, int redisContextConnectUnix(redisContext *c, const char *path, const struct timeval *timeout) { int blocking = (c->flags & REDIS_BLOCK); struct sockaddr_un sa; + long timeout_msec = -1; if (redisCreateSocket(c,AF_LOCAL) < 0) return REDIS_ERR; if (redisSetBlocking(c,0) != REDIS_OK) return REDIS_ERR; + c->connection_type = REDIS_CONN_UNIX; + if (c->unix_sock.path != path) + c->unix_sock.path = strdup(path); + + if (timeout) { + if (c->timeout != timeout) { + if (c->timeout == NULL) + c->timeout = malloc(sizeof(struct timeval)); + + memcpy(c->timeout, timeout, sizeof(struct timeval)); + } + } else { + if (c->timeout) + free(c->timeout); + c->timeout = NULL; + } + + if (redisContextTimeoutMsec(c,&timeout_msec) != REDIS_OK) + return REDIS_ERR; + sa.sun_family = AF_LOCAL; strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1); if (connect(c->fd, (struct sockaddr*)&sa, sizeof(sa)) == -1) { if (errno == EINPROGRESS && !blocking) { /* This is ok. */ } else { - if (redisContextWaitReady(c,timeout) != REDIS_OK) + if (redisContextWaitReady(c,timeout_msec) != REDIS_OK) return REDIS_ERR; } } diff --git a/deps/hiredis/net.h b/deps/hiredis/net.h index 3763ab089..2f1a0bf85 100644 --- a/deps/hiredis/net.h +++ b/deps/hiredis/net.h @@ -1,7 +1,9 @@ /* Extracted from anet.c to work properly with Hiredis error reporting. * - * Copyright (c) 2006-2011, Salvatore Sanfilippo - * Copyright (c) 2010-2011, Pieter Noordhuis + * Copyright (c) 2009-2011, Salvatore Sanfilippo + * Copyright (c) 2010-2014, Pieter Noordhuis + * Copyright (c) 2015, Matt Stancliff , + * Jan-Erik Rediger * * All rights reserved. * @@ -35,7 +37,7 @@ #include "hiredis.h" -#if defined(__sun) || defined(_AIX) +#if defined(__sun) #define AF_LOCAL AF_UNIX #endif diff --git a/deps/hiredis/read.c b/deps/hiredis/read.c new file mode 100644 index 000000000..50333b534 --- /dev/null +++ b/deps/hiredis/read.c @@ -0,0 +1,525 @@ +/* + * Copyright (c) 2009-2011, Salvatore Sanfilippo + * Copyright (c) 2010-2011, Pieter Noordhuis + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "fmacros.h" +#include +#include +#ifndef _MSC_VER +#include +#endif +#include +#include +#include + +#include "read.h" +#include "sds.h" + +static void __redisReaderSetError(redisReader *r, int type, const char *str) { + size_t len; + + if (r->reply != NULL && r->fn && r->fn->freeObject) { + r->fn->freeObject(r->reply); + r->reply = NULL; + } + + /* Clear input buffer on errors. */ + if (r->buf != NULL) { + sdsfree(r->buf); + r->buf = NULL; + r->pos = r->len = 0; + } + + /* Reset task stack. */ + r->ridx = -1; + + /* Set error. */ + r->err = type; + len = strlen(str); + len = len < (sizeof(r->errstr)-1) ? len : (sizeof(r->errstr)-1); + memcpy(r->errstr,str,len); + r->errstr[len] = '\0'; +} + +static size_t chrtos(char *buf, size_t size, char byte) { + size_t len = 0; + + switch(byte) { + case '\\': + case '"': + len = snprintf(buf,size,"\"\\%c\"",byte); + break; + case '\n': len = snprintf(buf,size,"\"\\n\""); break; + case '\r': len = snprintf(buf,size,"\"\\r\""); break; + case '\t': len = snprintf(buf,size,"\"\\t\""); break; + case '\a': len = snprintf(buf,size,"\"\\a\""); break; + case '\b': len = snprintf(buf,size,"\"\\b\""); break; + default: + if (isprint(byte)) + len = snprintf(buf,size,"\"%c\"",byte); + else + len = snprintf(buf,size,"\"\\x%02x\"",(unsigned char)byte); + break; + } + + return len; +} + +static void __redisReaderSetErrorProtocolByte(redisReader *r, char byte) { + char cbuf[8], sbuf[128]; + + chrtos(cbuf,sizeof(cbuf),byte); + snprintf(sbuf,sizeof(sbuf), + "Protocol error, got %s as reply type byte", cbuf); + __redisReaderSetError(r,REDIS_ERR_PROTOCOL,sbuf); +} + +static void __redisReaderSetErrorOOM(redisReader *r) { + __redisReaderSetError(r,REDIS_ERR_OOM,"Out of memory"); +} + +static char *readBytes(redisReader *r, unsigned int bytes) { + char *p; + if (r->len-r->pos >= bytes) { + p = r->buf+r->pos; + r->pos += bytes; + return p; + } + return NULL; +} + +/* Find pointer to \r\n. */ +static char *seekNewline(char *s, size_t len) { + int pos = 0; + int _len = len-1; + + /* Position should be < len-1 because the character at "pos" should be + * followed by a \n. Note that strchr cannot be used because it doesn't + * allow to search a limited length and the buffer that is being searched + * might not have a trailing NULL character. */ + while (pos < _len) { + while(pos < _len && s[pos] != '\r') pos++; + if (pos==_len) { + /* Not found. */ + return NULL; + } else { + if (s[pos+1] == '\n') { + /* Found. */ + return s+pos; + } else { + /* Continue searching. */ + pos++; + } + } + } + return NULL; +} + +/* Read a long long value starting at *s, under the assumption that it will be + * terminated by \r\n. Ambiguously returns -1 for unexpected input. */ +static long long readLongLong(char *s) { + long long v = 0; + int dec, mult = 1; + char c; + + if (*s == '-') { + mult = -1; + s++; + } else if (*s == '+') { + mult = 1; + s++; + } + + while ((c = *(s++)) != '\r') { + dec = c - '0'; + if (dec >= 0 && dec < 10) { + v *= 10; + v += dec; + } else { + /* Should not happen... */ + return -1; + } + } + + return mult*v; +} + +static char *readLine(redisReader *r, int *_len) { + char *p, *s; + int len; + + p = r->buf+r->pos; + s = seekNewline(p,(r->len-r->pos)); + if (s != NULL) { + len = s-(r->buf+r->pos); + r->pos += len+2; /* skip \r\n */ + if (_len) *_len = len; + return p; + } + return NULL; +} + +static void moveToNextTask(redisReader *r) { + redisReadTask *cur, *prv; + while (r->ridx >= 0) { + /* Return a.s.a.p. when the stack is now empty. */ + if (r->ridx == 0) { + r->ridx--; + return; + } + + cur = &(r->rstack[r->ridx]); + prv = &(r->rstack[r->ridx-1]); + assert(prv->type == REDIS_REPLY_ARRAY); + if (cur->idx == prv->elements-1) { + r->ridx--; + } else { + /* Reset the type because the next item can be anything */ + assert(cur->idx < prv->elements); + cur->type = -1; + cur->elements = -1; + cur->idx++; + return; + } + } +} + +static int processLineItem(redisReader *r) { + redisReadTask *cur = &(r->rstack[r->ridx]); + void *obj; + char *p; + int len; + + if ((p = readLine(r,&len)) != NULL) { + if (cur->type == REDIS_REPLY_INTEGER) { + if (r->fn && r->fn->createInteger) + obj = r->fn->createInteger(cur,readLongLong(p)); + else + obj = (void*)REDIS_REPLY_INTEGER; + } else { + /* Type will be error or status. */ + if (r->fn && r->fn->createString) + obj = r->fn->createString(cur,p,len); + else + obj = (void*)(size_t)(cur->type); + } + + if (obj == NULL) { + __redisReaderSetErrorOOM(r); + return REDIS_ERR; + } + + /* Set reply if this is the root object. */ + if (r->ridx == 0) r->reply = obj; + moveToNextTask(r); + return REDIS_OK; + } + + return REDIS_ERR; +} + +static int processBulkItem(redisReader *r) { + redisReadTask *cur = &(r->rstack[r->ridx]); + void *obj = NULL; + char *p, *s; + long len; + unsigned long bytelen; + int success = 0; + + p = r->buf+r->pos; + s = seekNewline(p,r->len-r->pos); + if (s != NULL) { + p = r->buf+r->pos; + bytelen = s-(r->buf+r->pos)+2; /* include \r\n */ + len = readLongLong(p); + + if (len < 0) { + /* The nil object can always be created. */ + if (r->fn && r->fn->createNil) + obj = r->fn->createNil(cur); + else + obj = (void*)REDIS_REPLY_NIL; + success = 1; + } else { + /* Only continue when the buffer contains the entire bulk item. */ + bytelen += len+2; /* include \r\n */ + if (r->pos+bytelen <= r->len) { + if (r->fn && r->fn->createString) + obj = r->fn->createString(cur,s+2,len); + else + obj = (void*)REDIS_REPLY_STRING; + success = 1; + } + } + + /* Proceed when obj was created. */ + if (success) { + if (obj == NULL) { + __redisReaderSetErrorOOM(r); + return REDIS_ERR; + } + + r->pos += bytelen; + + /* Set reply if this is the root object. */ + if (r->ridx == 0) r->reply = obj; + moveToNextTask(r); + return REDIS_OK; + } + } + + return REDIS_ERR; +} + +static int processMultiBulkItem(redisReader *r) { + redisReadTask *cur = &(r->rstack[r->ridx]); + void *obj; + char *p; + long elements; + int root = 0; + + /* Set error for nested multi bulks with depth > 7 */ + if (r->ridx == 8) { + __redisReaderSetError(r,REDIS_ERR_PROTOCOL, + "No support for nested multi bulk replies with depth > 7"); + return REDIS_ERR; + } + + if ((p = readLine(r,NULL)) != NULL) { + elements = readLongLong(p); + root = (r->ridx == 0); + + if (elements == -1) { + if (r->fn && r->fn->createNil) + obj = r->fn->createNil(cur); + else + obj = (void*)REDIS_REPLY_NIL; + + if (obj == NULL) { + __redisReaderSetErrorOOM(r); + return REDIS_ERR; + } + + moveToNextTask(r); + } else { + if (r->fn && r->fn->createArray) + obj = r->fn->createArray(cur,elements); + else + obj = (void*)REDIS_REPLY_ARRAY; + + if (obj == NULL) { + __redisReaderSetErrorOOM(r); + return REDIS_ERR; + } + + /* Modify task stack when there are more than 0 elements. */ + if (elements > 0) { + cur->elements = elements; + cur->obj = obj; + r->ridx++; + r->rstack[r->ridx].type = -1; + r->rstack[r->ridx].elements = -1; + r->rstack[r->ridx].idx = 0; + r->rstack[r->ridx].obj = NULL; + r->rstack[r->ridx].parent = cur; + r->rstack[r->ridx].privdata = r->privdata; + } else { + moveToNextTask(r); + } + } + + /* Set reply if this is the root object. */ + if (root) r->reply = obj; + return REDIS_OK; + } + + return REDIS_ERR; +} + +static int processItem(redisReader *r) { + redisReadTask *cur = &(r->rstack[r->ridx]); + char *p; + + /* check if we need to read type */ + if (cur->type < 0) { + if ((p = readBytes(r,1)) != NULL) { + switch (p[0]) { + case '-': + cur->type = REDIS_REPLY_ERROR; + break; + case '+': + cur->type = REDIS_REPLY_STATUS; + break; + case ':': + cur->type = REDIS_REPLY_INTEGER; + break; + case '$': + cur->type = REDIS_REPLY_STRING; + break; + case '*': + cur->type = REDIS_REPLY_ARRAY; + break; + default: + __redisReaderSetErrorProtocolByte(r,*p); + return REDIS_ERR; + } + } else { + /* could not consume 1 byte */ + return REDIS_ERR; + } + } + + /* process typed item */ + switch(cur->type) { + case REDIS_REPLY_ERROR: + case REDIS_REPLY_STATUS: + case REDIS_REPLY_INTEGER: + return processLineItem(r); + case REDIS_REPLY_STRING: + return processBulkItem(r); + case REDIS_REPLY_ARRAY: + return processMultiBulkItem(r); + default: + assert(NULL); + return REDIS_ERR; /* Avoid warning. */ + } +} + +redisReader *redisReaderCreateWithFunctions(redisReplyObjectFunctions *fn) { + redisReader *r; + + r = calloc(sizeof(redisReader),1); + if (r == NULL) + return NULL; + + r->err = 0; + r->errstr[0] = '\0'; + r->fn = fn; + r->buf = sdsempty(); + r->maxbuf = REDIS_READER_MAX_BUF; + if (r->buf == NULL) { + free(r); + return NULL; + } + + r->ridx = -1; + return r; +} + +void redisReaderFree(redisReader *r) { + if (r->reply != NULL && r->fn && r->fn->freeObject) + r->fn->freeObject(r->reply); + if (r->buf != NULL) + sdsfree(r->buf); + free(r); +} + +int redisReaderFeed(redisReader *r, const char *buf, size_t len) { + sds newbuf; + + /* Return early when this reader is in an erroneous state. */ + if (r->err) + return REDIS_ERR; + + /* Copy the provided buffer. */ + if (buf != NULL && len >= 1) { + /* Destroy internal buffer when it is empty and is quite large. */ + if (r->len == 0 && r->maxbuf != 0 && sdsavail(r->buf) > r->maxbuf) { + sdsfree(r->buf); + r->buf = sdsempty(); + r->pos = 0; + + /* r->buf should not be NULL since we just free'd a larger one. */ + assert(r->buf != NULL); + } + + newbuf = sdscatlen(r->buf,buf,len); + if (newbuf == NULL) { + __redisReaderSetErrorOOM(r); + return REDIS_ERR; + } + + r->buf = newbuf; + r->len = sdslen(r->buf); + } + + return REDIS_OK; +} + +int redisReaderGetReply(redisReader *r, void **reply) { + /* Default target pointer to NULL. */ + if (reply != NULL) + *reply = NULL; + + /* Return early when this reader is in an erroneous state. */ + if (r->err) + return REDIS_ERR; + + /* When the buffer is empty, there will never be a reply. */ + if (r->len == 0) + return REDIS_OK; + + /* Set first item to process when the stack is empty. */ + if (r->ridx == -1) { + r->rstack[0].type = -1; + r->rstack[0].elements = -1; + r->rstack[0].idx = -1; + r->rstack[0].obj = NULL; + r->rstack[0].parent = NULL; + r->rstack[0].privdata = r->privdata; + r->ridx = 0; + } + + /* Process items in reply. */ + while (r->ridx >= 0) + if (processItem(r) != REDIS_OK) + break; + + /* Return ASAP when an error occurred. */ + if (r->err) + return REDIS_ERR; + + /* Discard part of the buffer when we've consumed at least 1k, to avoid + * doing unnecessary calls to memmove() in sds.c. */ + if (r->pos >= 1024) { + sdsrange(r->buf,r->pos,-1); + r->pos = 0; + r->len = sdslen(r->buf); + } + + /* Emit a reply when there is one. */ + if (r->ridx == -1) { + if (reply != NULL) + *reply = r->reply; + r->reply = NULL; + } + return REDIS_OK; +} diff --git a/deps/hiredis/read.h b/deps/hiredis/read.h new file mode 100644 index 000000000..2988aa453 --- /dev/null +++ b/deps/hiredis/read.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2009-2011, Salvatore Sanfilippo + * Copyright (c) 2010-2011, Pieter Noordhuis + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef __HIREDIS_READ_H +#define __HIREDIS_READ_H +#include /* for size_t */ + +#define REDIS_ERR -1 +#define REDIS_OK 0 + +/* When an error occurs, the err flag in a context is set to hold the type of + * error that occurred. REDIS_ERR_IO means there was an I/O error and you + * should use the "errno" variable to find out what is wrong. + * For other values, the "errstr" field will hold a description. */ +#define REDIS_ERR_IO 1 /* Error in read or write */ +#define REDIS_ERR_EOF 3 /* End of file */ +#define REDIS_ERR_PROTOCOL 4 /* Protocol error */ +#define REDIS_ERR_OOM 5 /* Out of memory */ +#define REDIS_ERR_OTHER 2 /* Everything else... */ + +#define REDIS_REPLY_STRING 1 +#define REDIS_REPLY_ARRAY 2 +#define REDIS_REPLY_INTEGER 3 +#define REDIS_REPLY_NIL 4 +#define REDIS_REPLY_STATUS 5 +#define REDIS_REPLY_ERROR 6 + +#define REDIS_READER_MAX_BUF (1024*16) /* Default max unused reader buffer. */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct redisReadTask { + int type; + int elements; /* number of elements in multibulk container */ + int idx; /* index in parent (array) object */ + void *obj; /* holds user-generated value for a read task */ + struct redisReadTask *parent; /* parent task */ + void *privdata; /* user-settable arbitrary field */ +} redisReadTask; + +typedef struct redisReplyObjectFunctions { + void *(*createString)(const redisReadTask*, char*, size_t); + void *(*createArray)(const redisReadTask*, int); + void *(*createInteger)(const redisReadTask*, long long); + void *(*createNil)(const redisReadTask*); + void (*freeObject)(void*); +} redisReplyObjectFunctions; + +typedef struct redisReader { + int err; /* Error flags, 0 when there is no error */ + char errstr[128]; /* String representation of error when applicable */ + + char *buf; /* Read buffer */ + size_t pos; /* Buffer cursor */ + size_t len; /* Buffer length */ + size_t maxbuf; /* Max length of unused buffer */ + + redisReadTask rstack[9]; + int ridx; /* Index of current read task */ + void *reply; /* Temporary reply pointer */ + + redisReplyObjectFunctions *fn; + void *privdata; +} redisReader; + +/* Public API for the protocol parser. */ +redisReader *redisReaderCreateWithFunctions(redisReplyObjectFunctions *fn); +void redisReaderFree(redisReader *r); +int redisReaderFeed(redisReader *r, const char *buf, size_t len); +int redisReaderGetReply(redisReader *r, void **reply); + +#define redisReaderSetPrivdata(_r, _p) (int)(((redisReader*)(_r))->privdata = (_p)) +#define redisReaderGetObject(_r) (((redisReader*)(_r))->reply) +#define redisReaderGetError(_r) (((redisReader*)(_r))->errstr) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/hiredis/sds.c b/deps/hiredis/sds.c index e3dd67352..923ffd82f 100644 --- a/deps/hiredis/sds.c +++ b/deps/hiredis/sds.c @@ -89,9 +89,9 @@ sds sdsnewlen(const void *init, size_t initlen) { unsigned char *fp; /* flags pointer. */ sh = s_malloc(hdrlen+initlen+1); + if (sh == NULL) return NULL; if (!init) memset(sh, 0, hdrlen+initlen+1); - if (sh == NULL) return NULL; s = (char*)sh+hdrlen; fp = ((unsigned char*)s)-1; switch(type) { @@ -577,14 +577,12 @@ sds sdscatprintf(sds s, const char *fmt, ...) { * %% - Verbatim "%" character. */ sds sdscatfmt(sds s, char const *fmt, ...) { - size_t initlen = sdslen(s); const char *f = fmt; int i; va_list ap; va_start(ap,fmt); - f = fmt; /* Next format specifier byte to process. */ - i = initlen; /* Position of the next byte to write to dest str. */ + i = sdslen(s); /* Position of the next byte to write to dest str. */ while(*f) { char next, *str; size_t l; diff --git a/deps/hiredis/sds.h b/deps/hiredis/sds.h index 394f8b52e..13be75a9f 100644 --- a/deps/hiredis/sds.h +++ b/deps/hiredis/sds.h @@ -79,7 +79,7 @@ struct __attribute__ ((__packed__)) sdshdr64 { #define SDS_TYPE_64 4 #define SDS_TYPE_MASK 7 #define SDS_TYPE_BITS 3 -#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T))); +#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))); #define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T)))) #define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS) diff --git a/deps/hiredis/sdsalloc.h b/deps/hiredis/sdsalloc.h index 531d41929..f43023c48 100644 --- a/deps/hiredis/sdsalloc.h +++ b/deps/hiredis/sdsalloc.h @@ -1,6 +1,7 @@ /* SDSLib 2.0 -- A C dynamic strings library * * Copyright (c) 2006-2015, Salvatore Sanfilippo + * Copyright (c) 2015, Oran Agra * Copyright (c) 2015, Redis Labs, Inc * All rights reserved. * @@ -36,7 +37,6 @@ * the include of your alternate allocator if needed (not needed in order * to use the default libc allocator). */ -#include "zmalloc.h" -#define s_malloc zmalloc -#define s_realloc zrealloc -#define s_free zfree +#define s_malloc malloc +#define s_realloc realloc +#define s_free free diff --git a/deps/hiredis/test.c b/deps/hiredis/test.c index 2cc35a46f..a23d60676 100644 --- a/deps/hiredis/test.c +++ b/deps/hiredis/test.c @@ -11,6 +11,7 @@ #include #include "hiredis.h" +#include "net.h" enum connection_type { CONN_TCP, @@ -29,7 +30,7 @@ struct config { struct { const char *path; - } unix; + } unix_sock; }; /* The following lines make up our testing "framework" :) */ @@ -43,6 +44,13 @@ static long long usec(void) { return (((long long)tv.tv_sec)*1000000)+tv.tv_usec; } +/* The assert() calls below have side effects, so we need assert() + * even if we are compiling without asserts (-DNDEBUG). */ +#ifdef NDEBUG +#undef assert +#define assert(e) (void)(e) +#endif + static redisContext *select_database(redisContext *c) { redisReply *reply; @@ -51,7 +59,7 @@ static redisContext *select_database(redisContext *c) { assert(reply != NULL); freeReplyObject(reply); - /* Make sure the DB is empty */ + /* Make sure the DB is emtpy */ reply = redisCommand(c,"DBSIZE"); assert(reply != NULL); if (reply->type == REDIS_REPLY_INTEGER && reply->integer == 0) { @@ -89,10 +97,10 @@ static redisContext *connect(struct config config) { if (config.type == CONN_TCP) { c = redisConnect(config.tcp.host, config.tcp.port); } else if (config.type == CONN_UNIX) { - c = redisConnectUnix(config.unix.path); + c = redisConnectUnix(config.unix_sock.path); } else if (config.type == CONN_FD) { /* Create a dummy connection just to get an fd to inherit */ - redisContext *dummy_ctx = redisConnectUnix(config.unix.path); + redisContext *dummy_ctx = redisConnectUnix(config.unix_sock.path); if (dummy_ctx) { int fd = disconnect(dummy_ctx, 1); printf("Connecting to inherited fd %d\n", fd); @@ -107,6 +115,7 @@ static redisContext *connect(struct config config) { exit(1); } else if (c->err) { printf("Connection error: %s\n", c->errstr); + redisFree(c); exit(1); } @@ -215,6 +224,22 @@ static void test_format_commands(void) { test_cond(strncmp(cmd,"*3\r\n$3\r\nSET\r\n$7\r\nfoo\0xxx\r\n$3\r\nbar\r\n",len) == 0 && len == 4+4+(3+2)+4+(7+2)+4+(3+2)); free(cmd); + + sds sds_cmd; + + sds_cmd = sdsempty(); + test("Format command into sds by passing argc/argv without lengths: "); + len = redisFormatSdsCommandArgv(&sds_cmd,argc,argv,NULL); + test_cond(strncmp(sds_cmd,"*3\r\n$3\r\nSET\r\n$3\r\nfoo\r\n$3\r\nbar\r\n",len) == 0 && + len == 4+4+(3+2)+4+(3+2)+4+(3+2)); + sdsfree(sds_cmd); + + sds_cmd = sdsempty(); + test("Format command into sds by passing argc/argv with lengths: "); + len = redisFormatSdsCommandArgv(&sds_cmd,argc,argv,lens); + test_cond(strncmp(sds_cmd,"*3\r\n$3\r\nSET\r\n$7\r\nfoo\0xxx\r\n$3\r\nbar\r\n",len) == 0 && + len == 4+4+(3+2)+4+(7+2)+4+(3+2)); + sdsfree(sds_cmd); } static void test_append_formatted_commands(struct config config) { @@ -318,16 +343,31 @@ static void test_reply_reader(void) { redisReaderFree(reader); } +static void test_free_null(void) { + void *redisCtx = NULL; + void *reply = NULL; + + test("Don't fail when redisFree is passed a NULL value: "); + redisFree(redisCtx); + test_cond(redisCtx == NULL); + + test("Don't fail when freeReplyObject is passed a NULL value: "); + freeReplyObject(reply); + test_cond(reply == NULL); +} + static void test_blocking_connection_errors(void) { redisContext *c; test("Returns error when host cannot be resolved: "); - c = redisConnect((char*)"idontexist.local", 6379); + c = redisConnect((char*)"idontexist.test", 6379); test_cond(c->err == REDIS_ERR_OTHER && (strcmp(c->errstr,"Name or service not known") == 0 || - strcmp(c->errstr,"Can't resolve: idontexist.local") == 0 || + strcmp(c->errstr,"Can't resolve: idontexist.test") == 0 || strcmp(c->errstr,"nodename nor servname provided, or not known") == 0 || strcmp(c->errstr,"No address associated with hostname") == 0 || + strcmp(c->errstr,"Temporary failure in name resolution") == 0 || + strcmp(c->errstr,"hostname nor servname provided, or not known") == 0 || strcmp(c->errstr,"no address associated with name") == 0)); redisFree(c); @@ -337,7 +377,7 @@ static void test_blocking_connection_errors(void) { strcmp(c->errstr,"Connection refused") == 0); redisFree(c); - test("Returns error when the unix socket path doesn't accept connections: "); + test("Returns error when the unix_sock socket path doesn't accept connections: "); c = redisConnectUnix((char*)"/tmp/idontexist.sock"); test_cond(c->err == REDIS_ERR_IO); /* Don't care about the message... */ redisFree(c); @@ -421,6 +461,52 @@ static void test_blocking_connection(struct config config) { disconnect(c, 0); } +static void test_blocking_connection_timeouts(struct config config) { + redisContext *c; + redisReply *reply; + ssize_t s; + const char *cmd = "DEBUG SLEEP 3\r\n"; + struct timeval tv; + + c = connect(config); + test("Successfully completes a command when the timeout is not exceeded: "); + reply = redisCommand(c,"SET foo fast"); + freeReplyObject(reply); + tv.tv_sec = 0; + tv.tv_usec = 10000; + redisSetTimeout(c, tv); + reply = redisCommand(c, "GET foo"); + test_cond(reply != NULL && reply->type == REDIS_REPLY_STRING && memcmp(reply->str, "fast", 4) == 0); + freeReplyObject(reply); + disconnect(c, 0); + + c = connect(config); + test("Does not return a reply when the command times out: "); + s = write(c->fd, cmd, strlen(cmd)); + tv.tv_sec = 0; + tv.tv_usec = 10000; + redisSetTimeout(c, tv); + reply = redisCommand(c, "GET foo"); + test_cond(s > 0 && reply == NULL && c->err == REDIS_ERR_IO && strcmp(c->errstr, "Resource temporarily unavailable") == 0); + freeReplyObject(reply); + + test("Reconnect properly reconnects after a timeout: "); + redisReconnect(c); + reply = redisCommand(c, "PING"); + test_cond(reply != NULL && reply->type == REDIS_REPLY_STATUS && strcmp(reply->str, "PONG") == 0); + freeReplyObject(reply); + + test("Reconnect properly uses owned parameters: "); + config.tcp.host = "foo"; + config.unix_sock.path = "foo"; + redisReconnect(c); + reply = redisCommand(c, "PING"); + test_cond(reply != NULL && reply->type == REDIS_REPLY_STATUS && strcmp(reply->str, "PONG") == 0); + freeReplyObject(reply); + + disconnect(c, 0); +} + static void test_blocking_io_errors(struct config config) { redisContext *c; redisReply *reply; @@ -444,7 +530,7 @@ static void test_blocking_io_errors(struct config config) { test("Returns I/O error when the connection is lost: "); reply = redisCommand(c,"QUIT"); - if (major >= 2 && minor > 0) { + if (major > 2 || (major == 2 && minor > 0)) { /* > 2.0 returns OK on QUIT and read() should be issued once more * to know the descriptor is at EOF. */ test_cond(strcasecmp(reply->str,"OK") == 0 && @@ -482,7 +568,8 @@ static void test_invalid_timeout_errors(struct config config) { c = redisConnectWithTimeout(config.tcp.host, config.tcp.port, config.tcp.timeout); - test_cond(c->err == REDIS_ERR_IO); + test_cond(c->err == REDIS_ERR_IO && strcmp(c->errstr, "Invalid timeout specified") == 0); + redisFree(c); test("Set error when an invalid timeout sec value is given to redisConnectWithTimeout: "); @@ -491,8 +578,7 @@ static void test_invalid_timeout_errors(struct config config) { c = redisConnectWithTimeout(config.tcp.host, config.tcp.port, config.tcp.timeout); - test_cond(c->err == REDIS_ERR_IO); - + test_cond(c->err == REDIS_ERR_IO && strcmp(c->errstr, "Invalid timeout specified") == 0); redisFree(c); } @@ -666,7 +752,7 @@ int main(int argc, char **argv) { .host = "127.0.0.1", .port = 6379 }, - .unix = { + .unix_sock = { .path = "/tmp/redis.sock" } }; @@ -687,7 +773,7 @@ int main(int argc, char **argv) { cfg.tcp.port = atoi(argv[0]); } else if (argc >= 2 && !strcmp(argv[0],"-s")) { argv++; argc--; - cfg.unix.path = argv[0]; + cfg.unix_sock.path = argv[0]; } else if (argc >= 1 && !strcmp(argv[0],"--skip-throughput")) { throughput = 0; } else if (argc >= 1 && !strcmp(argv[0],"--skip-inherit-fd")) { @@ -702,27 +788,31 @@ int main(int argc, char **argv) { test_format_commands(); test_reply_reader(); test_blocking_connection_errors(); + test_free_null(); printf("\nTesting against TCP connection (%s:%d):\n", cfg.tcp.host, cfg.tcp.port); cfg.type = CONN_TCP; test_blocking_connection(cfg); + test_blocking_connection_timeouts(cfg); test_blocking_io_errors(cfg); test_invalid_timeout_errors(cfg); test_append_formatted_commands(cfg); if (throughput) test_throughput(cfg); - printf("\nTesting against Unix socket connection (%s):\n", cfg.unix.path); + printf("\nTesting against Unix socket connection (%s):\n", cfg.unix_sock.path); cfg.type = CONN_UNIX; test_blocking_connection(cfg); + test_blocking_connection_timeouts(cfg); test_blocking_io_errors(cfg); if (throughput) test_throughput(cfg); if (test_inherit_fd) { - printf("\nTesting against inherited fd (%s):\n", cfg.unix.path); + printf("\nTesting against inherited fd (%s):\n", cfg.unix_sock.path); cfg.type = CONN_FD; test_blocking_connection(cfg); } + if (fails) { printf("*** %d TESTS FAILED ***\n", fails); return 1; diff --git a/deps/hiredis/win32.h b/deps/hiredis/win32.h new file mode 100644 index 000000000..1a27c18f2 --- /dev/null +++ b/deps/hiredis/win32.h @@ -0,0 +1,42 @@ +#ifndef _WIN32_HELPER_INCLUDE +#define _WIN32_HELPER_INCLUDE +#ifdef _MSC_VER + +#ifndef inline +#define inline __inline +#endif + +#ifndef va_copy +#define va_copy(d,s) ((d) = (s)) +#endif + +#ifndef snprintf +#define snprintf c99_snprintf + +__inline int c99_vsnprintf(char* str, size_t size, const char* format, va_list ap) +{ + int count = -1; + + if (size != 0) + count = _vsnprintf_s(str, size, _TRUNCATE, format, ap); + if (count == -1) + count = _vscprintf(format, ap); + + return count; +} + +__inline int c99_snprintf(char* str, size_t size, const char* format, ...) +{ + int count; + va_list ap; + + va_start(ap, format); + count = c99_vsnprintf(str, size, format, ap); + va_end(ap); + + return count; +} +#endif + +#endif +#endif \ No newline at end of file diff --git a/deps/hiredis/zmalloc.h b/deps/hiredis/zmalloc.h deleted file mode 100644 index 99b87ace9..000000000 --- a/deps/hiredis/zmalloc.h +++ /dev/null @@ -1,13 +0,0 @@ -/* Drop in replacement for zmalloc.h in order to just use libc malloc without - * any wrappering. */ - -#ifndef ZMALLOC_H -#define ZMALLOC_H - -#define zmalloc malloc -#define zrealloc realloc -#define zcalloc(x) calloc(x,1) -#define zfree free -#define zstrdup strdup - -#endif From 8528bbaf6c064a486e52165d6ac7f10cc8fd0dcb Mon Sep 17 00:00:00 2001 From: oranagra Date: Sat, 24 Dec 2016 17:27:58 +0200 Subject: [PATCH 0243/1722] fix rare assertion in DEBUG DIGEST getExpire calls dictFind which can do rehashing. found by calling computeDatasetDigest from serverCron and running the test suite. --- src/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/debug.c b/src/debug.c index b8ad4e511..2fd382fc0 100644 --- a/src/debug.c +++ b/src/debug.c @@ -126,7 +126,7 @@ void computeDatasetDigest(unsigned char *final) { redisDb *db = server.db+j; if (dictSize(db->dict) == 0) continue; - di = dictGetIterator(db->dict); + di = dictGetSafeIterator(db->dict); /* hash the DB id, so the same dataset moved in a different * DB will lead to a different digest */ From 53511a429cd6e09584ea2e31d6d4a911e182a80a Mon Sep 17 00:00:00 2001 From: oranagra Date: Fri, 30 Dec 2016 03:37:52 +0200 Subject: [PATCH 0244/1722] active memory defragmentation --- deps/jemalloc/src/jemalloc.c | 32 +++ redis.conf | 20 ++ src/Makefile | 2 +- src/config.c | 58 ++++ src/db.c | 2 +- src/debug.c | 15 +- src/defrag.c | 527 +++++++++++++++++++++++++++++++++++ src/dict.c | 33 +++ src/dict.h | 5 +- src/server.c | 29 +- src/server.h | 18 ++ src/zmalloc.c | 20 ++ src/zmalloc.h | 2 + 13 files changed, 755 insertions(+), 8 deletions(-) create mode 100644 src/defrag.c diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c index 5a2d32406..fe77c2475 100644 --- a/deps/jemalloc/src/jemalloc.c +++ b/deps/jemalloc/src/jemalloc.c @@ -2591,3 +2591,35 @@ jemalloc_postfork_child(void) } /******************************************************************************/ + +/* Helps the application decide if a pointer is worth re-allocating in order to reduce fragmentation. + * returns 0 if the allocation is in the currently active run, + * or when it is not causing any frag issue (large or huge bin) + * returns the bin utilization and run utilization both in fixed point 16:16. + * If the application decides to re-allocate it should use MALLOCX_TCACHE_NONE when doing so. */ +JEMALLOC_EXPORT int JEMALLOC_NOTHROW +je_get_defrag_hint(void* ptr, int *bin_util, int *run_util) { + int defrag = 0; + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { /* indication that this is not a HUGE alloc */ + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t mapbits = arena_mapbits_get(chunk, pageind); + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { /* indication that this is not a LARGE alloc */ + arena_t *arena = extent_node_arena_get(&chunk->node); + size_t rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); + arena_run_t *run = &arena_miscelm_get(chunk, rpages_ind)->run; + arena_bin_t *bin = &arena->bins[run->binind]; + malloc_mutex_lock(&bin->lock); + /* runs that are in the same chunk in as the current chunk, are likely to be the next currun */ + if (chunk != (arena_chunk_t *)CHUNK_ADDR2BASE(bin->runcur)) { + arena_bin_info_t *bin_info = &arena_bin_info[run->binind]; + size_t availregs = bin_info->nregs * bin->stats.curruns; + *bin_util = (bin->stats.curregs<<16) / availregs; + *run_util = ((bin_info->nregs - run->nfree)<<16) / bin_info->nregs; + defrag = 1; + } + malloc_mutex_unlock(&bin->lock); + } + } + return defrag; +} diff --git a/redis.conf b/redis.conf index 14b5ca979..18ba9fb3f 100644 --- a/redis.conf +++ b/redis.conf @@ -1228,3 +1228,23 @@ aof-rewrite-incremental-fsync yes # # lfu-log-factor 10 # lfu-decay-time 1 + +########################### ACTIVE DEFRAGMENTATION ####################### + +# enabled active defragmentation +# activedefrag yes + +# minimum amount of fragmentation waste to start active defrag +# active-defrag-ignore-bytes 100mb + +# minimum percentage of fragmentation to start active defrag +# active-defrag-threshold-lower 10 + +# maximum percentage of fragmentation at which we use maximum effort +# active-defrag-threshold-upper 100 + +# minimal effort for defrag in CPU percentage +# active-defrag-cycle-min 25 + +# maximal effort for defrag in CPU percentage +# active-defrag-cycle-max 75 diff --git a/src/Makefile b/src/Makefile index 2bf3c9347..3f445f40f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -128,7 +128,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o REDIS_BENCHMARK_NAME=redis-benchmark diff --git a/src/config.c b/src/config.c index 54af5bfe0..4e2c74e5d 100644 --- a/src/config.c +++ b/src/config.c @@ -423,6 +423,10 @@ void loadServerConfigFromString(char *config) { if ((server.repl_slave_lazy_flush = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } + } else if (!strcasecmp(argv[0],"activedefrag") && argc == 2) { + if ((server.active_defrag_enabled = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) { if ((server.daemonize = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -499,6 +503,36 @@ void loadServerConfigFromString(char *config) { } zfree(server.rdb_filename); server.rdb_filename = zstrdup(argv[1]); + } else if (!strcasecmp(argv[0],"active-defrag-threshold-lower") && argc == 2) { + server.active_defrag_threshold_lower = atoi(argv[1]); + if (server.active_defrag_threshold_lower < 0) { + err = "active-defrag-threshold-lower must be 0 or greater"; + goto loaderr; + } + } else if (!strcasecmp(argv[0],"active-defrag-threshold-upper") && argc == 2) { + server.active_defrag_threshold_upper = atoi(argv[1]); + if (server.active_defrag_threshold_upper < 0) { + err = "active-defrag-threshold-upper must be 0 or greater"; + goto loaderr; + } + } else if (!strcasecmp(argv[0],"active-defrag-ignore-bytes") && argc == 2) { + server.active_defrag_ignore_bytes = memtoll(argv[1], NULL); + if (server.active_defrag_ignore_bytes <= 0) { + err = "active-defrag-ignore-bytes must above 0"; + goto loaderr; + } + } else if (!strcasecmp(argv[0],"active-defrag-cycle-min") && argc == 2) { + server.active_defrag_cycle_min = atoi(argv[1]); + if (server.active_defrag_cycle_min < 1 || server.active_defrag_cycle_min > 99) { + err = "active-defrag-cycle-min must be between 1 and 99"; + goto loaderr; + } + } else if (!strcasecmp(argv[0],"active-defrag-cycle-max") && argc == 2) { + server.active_defrag_cycle_max = atoi(argv[1]); + if (server.active_defrag_cycle_max < 1 || server.active_defrag_cycle_max > 99) { + err = "active-defrag-cycle-max must be between 1 and 99"; + goto loaderr; + } } else if (!strcasecmp(argv[0],"hash-max-ziplist-entries") && argc == 2) { server.hash_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) { @@ -971,6 +1005,8 @@ void configSetCommand(client *c) { "slave-read-only",server.repl_slave_ro) { } config_set_bool_field( "activerehashing",server.activerehashing) { + } config_set_bool_field( + "activedefrag",server.active_defrag_enabled) { } config_set_bool_field( "protected-mode",server.protected_mode) { } config_set_bool_field( @@ -998,6 +1034,16 @@ void configSetCommand(client *c) { "lfu-decay-time",server.lfu_decay_time,0,LLONG_MAX) { } config_set_numerical_field( "timeout",server.maxidletime,0,LONG_MAX) { + } config_set_numerical_field( + "active-defrag-threshold-lower",server.active_defrag_threshold_lower,0,1000) { + } config_set_numerical_field( + "active-defrag-threshold-upper",server.active_defrag_threshold_upper,0,1000) { + } config_set_memory_field( + "active-defrag-ignore-bytes",server.active_defrag_ignore_bytes) { + } config_set_numerical_field( + "active-defrag-cycle-min",server.active_defrag_cycle_min,1,99) { + } config_set_numerical_field( + "active-defrag-cycle-max",server.active_defrag_cycle_max,1,99) { } config_set_numerical_field( "auto-aof-rewrite-percentage",server.aof_rewrite_perc,0,LLONG_MAX){ } config_set_numerical_field( @@ -1166,6 +1212,11 @@ void configGetCommand(client *c) { config_get_numerical_field("maxmemory",server.maxmemory); config_get_numerical_field("maxmemory-samples",server.maxmemory_samples); config_get_numerical_field("timeout",server.maxidletime); + config_get_numerical_field("active-defrag-threshold-lower",server.active_defrag_threshold_lower); + config_get_numerical_field("active-defrag-threshold-upper",server.active_defrag_threshold_upper); + config_get_numerical_field("active-defrag-ignore-bytes",server.active_defrag_ignore_bytes); + config_get_numerical_field("active-defrag-cycle-min",server.active_defrag_cycle_min); + config_get_numerical_field("active-defrag-cycle-max",server.active_defrag_cycle_max); config_get_numerical_field("auto-aof-rewrite-percentage", server.aof_rewrite_perc); config_get_numerical_field("auto-aof-rewrite-min-size", @@ -1230,6 +1281,7 @@ void configGetCommand(client *c) { config_get_bool_field("rdbcompression", server.rdb_compression); config_get_bool_field("rdbchecksum", server.rdb_checksum); config_get_bool_field("activerehashing", server.activerehashing); + config_get_bool_field("activedefrag", server.active_defrag_enabled); config_get_bool_field("protected-mode", server.protected_mode); config_get_bool_field("repl-disable-tcp-nodelay", server.repl_disable_tcp_nodelay); @@ -1930,6 +1982,11 @@ int rewriteConfig(char *path) { rewriteConfigBytesOption(state,"maxmemory",server.maxmemory,CONFIG_DEFAULT_MAXMEMORY); rewriteConfigEnumOption(state,"maxmemory-policy",server.maxmemory_policy,maxmemory_policy_enum,CONFIG_DEFAULT_MAXMEMORY_POLICY); rewriteConfigNumericalOption(state,"maxmemory-samples",server.maxmemory_samples,CONFIG_DEFAULT_MAXMEMORY_SAMPLES); + rewriteConfigNumericalOption(state,"active-defrag-threshold-lower",server.active_defrag_threshold_lower,CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER); + rewriteConfigNumericalOption(state,"active-defrag-threshold-upper",server.active_defrag_threshold_upper,CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER); + rewriteConfigBytesOption(state,"active-defrag-ignore-bytes",server.active_defrag_ignore_bytes,CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES); + rewriteConfigNumericalOption(state,"active-defrag-cycle-min",server.active_defrag_cycle_min,CONFIG_DEFAULT_DEFRAG_CYCLE_MIN); + rewriteConfigNumericalOption(state,"active-defrag-cycle-max",server.active_defrag_cycle_max,CONFIG_DEFAULT_DEFRAG_CYCLE_MAX); rewriteConfigYesNoOption(state,"appendonly",server.aof_state != AOF_OFF,0); rewriteConfigStringOption(state,"appendfilename",server.aof_filename,CONFIG_DEFAULT_AOF_FILENAME); rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,aof_fsync_enum,CONFIG_DEFAULT_AOF_FSYNC); @@ -1956,6 +2013,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,OBJ_ZSET_MAX_ZIPLIST_VALUE); rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES); rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,CONFIG_DEFAULT_ACTIVE_REHASHING); + rewriteConfigYesNoOption(state,"activedefrag",server.active_defrag_enabled,CONFIG_DEFAULT_ACTIVE_DEFRAG); rewriteConfigYesNoOption(state,"protected-mode",server.protected_mode,CONFIG_DEFAULT_PROTECTED_MODE); rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigNumericalOption(state,"hz",server.hz,CONFIG_DEFAULT_HZ); diff --git a/src/db.c b/src/db.c index 90a75fcfe..a21437c76 100644 --- a/src/db.c +++ b/src/db.c @@ -665,7 +665,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { privdata[0] = keys; privdata[1] = o; do { - cursor = dictScan(ht, cursor, scanCallback, privdata); + cursor = dictScan(ht, cursor, scanCallback, NULL, privdata); } while (cursor && maxiterations-- && listLength(keys) < (unsigned long)count); diff --git a/src/debug.c b/src/debug.c index b8ad4e511..5098d2b64 100644 --- a/src/debug.c +++ b/src/debug.c @@ -282,7 +282,7 @@ void debugCommand(client *c) { blen++; addReplyStatus(c, "ziplist -- Show low level info about the ziplist encoding."); blen++; addReplyStatus(c, - "populate [prefix] -- Create string keys named key:. If a prefix is specified is used instead of the 'key' prefix."); + "populate [prefix] [size] -- Create string keys named key:. If a prefix is specified is used instead of the 'key' prefix."); blen++; addReplyStatus(c, "digest -- Outputs an hex signature representing the current DB content."); blen++; addReplyStatus(c, @@ -433,7 +433,7 @@ void debugCommand(client *c) { addReplyStatus(c,"Ziplist structure printed on stdout"); } } else if (!strcasecmp(c->argv[1]->ptr,"populate") && - (c->argc == 3 || c->argc == 4)) { + c->argc >= 3 && c->argc <= 5) { long keys, j; robj *key, *val; char buf[128]; @@ -442,15 +442,24 @@ void debugCommand(client *c) { return; dictExpand(c->db->dict,keys); for (j = 0; j < keys; j++) { + long valsize = 0; snprintf(buf,sizeof(buf),"%s:%lu", (c->argc == 3) ? "key" : (char*)c->argv[3]->ptr, j); key = createStringObject(buf,strlen(buf)); + if (c->argc == 5) + if (getLongFromObjectOrReply(c, c->argv[4], &valsize, NULL) != C_OK) + return; if (lookupKeyWrite(c->db,key) != NULL) { decrRefCount(key); continue; } snprintf(buf,sizeof(buf),"value:%lu",j); - val = createStringObject(buf,strlen(buf)); + if (valsize==0) + val = createStringObject(buf,strlen(buf)); + else { + val = createStringObject(NULL,valsize); + memset(val->ptr, 0, valsize); + } dbAdd(c->db,key,val); signalModifiedKey(c->db,key); decrRefCount(key); diff --git a/src/defrag.c b/src/defrag.c new file mode 100644 index 000000000..663196c31 --- /dev/null +++ b/src/defrag.c @@ -0,0 +1,527 @@ +/* + * Active memory defragmentation + * Try to find key / value allocations that need to be re-allocated in order + * to reduce external fragmentation. + * We do that by scanning the keyspace and for each pointer we have, we can try to + * ask the allocator if moving it to a new address will help reduce fragmentation. + * + * Copyright (c) 2017, Oran Agra + * Copyright (c) 2017, Redis Labs, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include +#include +#include + +#if defined(USE_JEMALLOC) && defined(MALLOCX_TCACHE_NONE) + +/* this method was added to jemalloc in order to help us understand which + * pointers are worthwhile moving and which aren't */ +int je_get_defrag_hint(void* ptr, int *bin_util, int *run_util); + +/* Defrag helper for generic allocations. + * + * returns NULL in case the allocatoin wasn't moved. + * when it returns a non-null value, the old pointer was already released + * and should NOT be accessed. */ +void* activeDefragAlloc(void *ptr) { + int bin_util, run_util; + size_t size; + void *newptr; + if(!je_get_defrag_hint(ptr, &bin_util, &run_util)) { + server.stat_active_defrag_misses++; + return NULL; + } + /* if this run is more utilized than the average utilization in this bin (or it is full), skip it. + * this will eventually move all the allocations from relatively empty runs into relatively full runs. */ + if (run_util > bin_util || run_util == 1<<16) { + server.stat_active_defrag_misses++; + return NULL; + } + /* move this allocation to a new allocation. + * make sure not to use the thread cache. so that we don't get back the same pointers we try to free */ + size = zmalloc_size(ptr); + newptr = zmalloc_no_tcache(size); + memcpy(newptr, ptr, size); + zfree_no_tcache(ptr); + return newptr; +} + +/*Defrag helper for sds strings + * + * returns NULL in case the allocatoin wasn't moved. + * when it returns a non-null value, the old pointer was already released + * and should NOT be accessed. */ +sds activeDefragSds(sds sdsptr) { + void* ptr = sdsAllocPtr(sdsptr); + void* newptr = activeDefragAlloc(ptr); + if (newptr) { + size_t offset = sdsptr - (char*)ptr; + sdsptr = (char*)newptr + offset; + return sdsptr; + } + return NULL; +} + +/* Defrag helper for robj and/or string objects + * + * returns NULL in case the allocatoin wasn't moved. + * when it returns a non-null value, the old pointer was already released + * and should NOT be accessed. */ +robj *activeDefragStringOb(robj* ob) { + robj *ret = NULL; + if (ob->refcount!=1) + return NULL; + + /* try to defrag robj (only if not an EMBSTR type (handled below) */ + if (ob->type!=OBJ_STRING || ob->encoding!=OBJ_ENCODING_EMBSTR) { + if ((ret = activeDefragAlloc(ob))) + ob = ret; + } + + /* try to defrag string object */ + if (ob->type == OBJ_STRING) { + if(ob->encoding==OBJ_ENCODING_RAW) { + sds newsds = activeDefragSds((sds)ob->ptr); + if (newsds) { + ob->ptr = newsds; + /* we don't need to change the return value here. + * we can return NULL if 'ret' is still NULL (since the object pointer itself wasn't changed). + * but we set return value to ob as an indication that we defragged a pointer (for stats). + * NOTE: if ret is already set and the robj was moved, then our stats will be a bit off + * since two pointers were moved, but we show only one in the stats */ + ret = ob; + } + } else if (ob->encoding==OBJ_ENCODING_EMBSTR) { + /* the sds is embedded in the object allocation, calculate the offset and update the pointer in the new allocation */ + long ofs = (intptr_t)ob->ptr - (intptr_t)ob; + if ((ret = activeDefragAlloc(ob))) { + ret->ptr = (void*)((intptr_t)ret + ofs); + } + } else if (ob->encoding!=OBJ_ENCODING_INT) { + serverPanic("Unknown string encoding"); + } + } + return ret; +} + +/* Defrag helper for dictEntries to be used during dict iteration (called on each step). + * returns a stat of how many pointers were moved. */ +int dictIterDefragEntry(dictIterator *iter) { + /* This function is a little bit dirty since it messes with the internals of the dict and it's iterator, + * but the benefit is that it is very easy to use, and require no other chagnes in the dict. */ + int defragged = 0; + dictht *ht; + /* handle the next entry (if there is one), and update the pointer in the current entry. */ + if (iter->nextEntry) { + dictEntry *newde = activeDefragAlloc(iter->nextEntry); + if (newde) { + defragged++; + iter->nextEntry = newde; + iter->entry->next = newde; + } + } + /* handle the case of the first entry in the hash bucket. */ + ht = &iter->d->ht[iter->table]; + if (ht->table[iter->index] == iter->entry) { + dictEntry *newde = activeDefragAlloc(iter->entry); + if (newde) { + iter->entry = newde; + ht->table[iter->index] = newde; + defragged++; + } + } + return defragged; +} + +/* Defrag helper for dict main allocations (dict struct, and hash tables). + * receives a pointer to the dict* and implicitly updates it when the dict struct itself was moved. + * returns a stat of how many pointers were moved. */ +int dictDefragTables(dict** dictRef) { + dict *d = *dictRef; + dictEntry **newtable; + int defragged = 0; + /* handle the dict struct */ + dict *newd = activeDefragAlloc(d); + if (newd) + defragged++, *dictRef = d = newd; + /* handle the first hash table */ + newtable = activeDefragAlloc(d->ht[0].table); + if (newtable) + defragged++, d->ht[0].table = newtable; + /* handle the second hash table */ + if (d->ht[1].table) { + newtable = activeDefragAlloc(d->ht[1].table); + if (newtable) + defragged++, d->ht[1].table = newtable; + } + return defragged; +} + +/* Internal function used by zslDefrag */ +void zslUpdateNode(zskiplist *zsl, zskiplistNode *oldnode, zskiplistNode *newnode, zskiplistNode **update) { + int i; + for (i = 0; i < zsl->level; i++) { + if (update[i]->level[i].forward == oldnode) + update[i]->level[i].forward = newnode; + } + if (zsl->header==oldnode) + zsl->header = newnode; + if (zsl->tail==oldnode) + zsl->tail = newnode; + if (newnode->level[0].forward) { + serverAssert(newnode->level[0].forward->backward==oldnode); + newnode->level[0].forward->backward = newnode; + } +} + +/* Defrag helper for sorted set. + * Update the robj pointer, defrag the struct and return the new score reference. + * we may not access oldele pointer (not even the pointer stored in the skiplist), as it was already freed. + * newele may be null, in which case we only need to defrag the skiplist, but not update the obj pointer. + * when return value is non-NULL, it is the score reference that must be updated in the dict record. */ +double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) { + zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x, *newx; + int i; + sds ele = newele? newele: oldele; + + /* find the skiplist node referring to the object that was moved, + * and all pointers that need to be updated if we'll end up moving the skiplist node. */ + x = zsl->header; + for (i = zsl->level-1; i >= 0; i--) { + while (x->level[i].forward && + x->level[i].forward->ele != oldele && /* make sure not to access the ->obj pointer if it matches oldele */ + (x->level[i].forward->score < score || + (x->level[i].forward->score == score && + sdscmp(x->level[i].forward->ele,ele) < 0))) + x = x->level[i].forward; + update[i] = x; + } + + /* update the robj pointer inside the skip list record. */ + x = x->level[0].forward; + serverAssert(x && score == x->score && x->ele==oldele); + if (newele) + x->ele = newele; + + /* try to defrag the skiplist record itself */ + newx = activeDefragAlloc(x); + if (newx) { + zslUpdateNode(zsl, x, newx, update); + return &newx->score; + } + return NULL; +} + +/* for each key we scan in the main dict, this function will attempt to defrag all the various pointers it has. + * returns a stat of how many pointers were moved. */ +int defargKey(redisDb *db, dictEntry *de) { + sds keysds = dictGetKey(de); + robj *newob, *ob; + unsigned char *newzl; + dict *d; + dictIterator *di; + int defragged = 0; + sds newsds; + + /* try to defrag the key name */ + newsds = activeDefragSds(keysds); + if (newsds) { + de->key = newsds; + if (dictSize(db->expires)) { + /* Dirty code: + * i can't search in db->expires for that key after i already released the pointer it holds + * it won't be able to do the string compare */ + unsigned int hash = dictGetHash(db->dict, newsds); + dictReplaceKeyPtr(db->expires, keysds, newsds, hash); + } + defragged++; + } + + /* try to defrag robj and / or string value */ + ob = dictGetVal(de); + if ((newob = activeDefragStringOb(ob))) { + de->v.val = newob; + ob = newob; + defragged++; + } + + if (ob->type == OBJ_STRING) { + /* already handled in activeDefragStringOb */ + } else if (ob->type == OBJ_LIST) { + if (ob->encoding == OBJ_ENCODING_QUICKLIST) { + quicklist *ql = ob->ptr, *newql; + quicklistNode *node = ql->head, *newnode; + if ((newql = activeDefragAlloc(ql))) + defragged++, ob->ptr = ql = newql; + do { + if ((newnode = activeDefragAlloc(node))) { + if (newnode->prev) + newnode->prev->next = newnode; + else + ql->head = newnode; + if (newnode->next) + newnode->next->prev = newnode; + else + ql->tail = newnode; + node = newnode; + defragged++; + } + if ((newzl = activeDefragAlloc(node->zl))) + defragged++, node->zl = newzl; + } while ((node = node->next)); + } else if (ob->encoding == OBJ_ENCODING_ZIPLIST) { + if ((newzl = activeDefragAlloc(ob->ptr))) + defragged++, ob->ptr = newzl; + } else { + serverPanic("Unknown list encoding"); + } + } else if (ob->type == OBJ_SET) { + if (ob->encoding == OBJ_ENCODING_HT) { + d = ob->ptr; + di = dictGetIterator(d); + while((de = dictNext(di)) != NULL) { + sds sdsele = dictGetKey(de); + if ((newsds = activeDefragSds(sdsele))) + defragged++, de->key = newsds; + defragged += dictIterDefragEntry(di); + } + dictReleaseIterator(di); + dictDefragTables((dict**)&ob->ptr); + } else if (ob->encoding == OBJ_ENCODING_INTSET) { + intset *is = ob->ptr; + intset *newis = activeDefragAlloc(is); + if (newis) + defragged++, ob->ptr = newis; + } else { + serverPanic("Unknown set encoding"); + } + } else if (ob->type == OBJ_ZSET) { + if (ob->encoding == OBJ_ENCODING_ZIPLIST) { + if ((newzl = activeDefragAlloc(ob->ptr))) + defragged++, ob->ptr = newzl; + } else if (ob->encoding == OBJ_ENCODING_SKIPLIST) { + zset *zs = (zset*)ob->ptr; + zset *newzs = activeDefragAlloc(zs); + zskiplist *newzsl; + if (newzs) + defragged++, ob->ptr = zs = newzs; + newzsl = activeDefragAlloc(zs->zsl); + if (newzsl) + defragged++, zs->zsl = newzsl; + d = zs->dict; + di = dictGetIterator(d); + while((de = dictNext(di)) != NULL) { + double* newscore; + sds sdsele = dictGetKey(de); + if ((newsds = activeDefragSds(sdsele))) + defragged++, de->key = newsds; + newscore = zslDefrag(zs->zsl, *(double*)dictGetVal(de), sdsele, newsds); + if (newscore) { + dictSetVal(d, de, newscore); + defragged++; + } + defragged += dictIterDefragEntry(di); + } + dictReleaseIterator(di); + dictDefragTables(&zs->dict); + } else { + serverPanic("Unknown sorted set encoding"); + } + } else if (ob->type == OBJ_HASH) { + if (ob->encoding == OBJ_ENCODING_ZIPLIST) { + if ((newzl = activeDefragAlloc(ob->ptr))) + defragged++, ob->ptr = newzl; + } else if (ob->encoding == OBJ_ENCODING_HT) { + d = ob->ptr; + di = dictGetIterator(d); + while((de = dictNext(di)) != NULL) { + sds sdsele = dictGetKey(de); + if ((newsds = activeDefragSds(sdsele))) + defragged++, de->key = newsds; + sdsele = dictGetVal(de); + if ((newsds = activeDefragSds(sdsele))) + defragged++, de->v.val = newsds; + defragged += dictIterDefragEntry(di); + } + dictReleaseIterator(di); + dictDefragTables((dict**)&ob->ptr); + } else { + serverPanic("Unknown hash encoding"); + } + } else { + serverPanic("Unknown object type"); + } + return defragged; +} + +/* defrag scan callback for the main db dictionary */ +void defragScanCallback(void *privdata, const dictEntry *de) { + /* TODO: defrag the dictEntry (and also the entriy in expire dict). */ + int defragged = defargKey((redisDb*)privdata, (dictEntry*)de); + server.stat_active_defrag_hits += defragged; + if(defragged) + server.stat_active_defrag_key_hits++; + else + server.stat_active_defrag_key_misses++; +} + +/* defrag scan callback for for each hash table bicket, + * used in order to defrag the dictEntry allocations */ +void defragDictBucketCallback(void *privdata, dictEntry **bucketref) { + UNUSED(privdata); + while(*bucketref) { + dictEntry *de = *bucketref, *newde; + if ((newde = activeDefragAlloc(de))) { + *bucketref = newde; + } + bucketref = &(*bucketref)->next; + } +} + +/* Utility function to get the fragmentation ratio from jemalloc. + * it is critical to do that by comparing only heap maps that belown to jemalloc, and skip ones the jemalloc keeps as spare. + * since we use this fragmentation ratio in order to decide if a defrag action should be taken or not, + * a false detection can cause the defragmenter to waste a lot of CPU without the possibility of getting any results. */ +float getAllocatorFragmentation(size_t *out_frag_bytes) { + size_t epoch = 1, allocated = 0, resident = 0, active = 0, sz = sizeof(size_t); + je_mallctl("epoch", &epoch, &sz, &epoch, sz); /* Update the statistics cached by mallctl. */ + je_mallctl("stats.resident", &resident, &sz, NULL, 0); /* unlike RSS, this does not include RSS from shared libraries and other non heap mappings */ + je_mallctl("stats.active", &active, &sz, NULL, 0); /* unlike resident, this doesn't not include the pages jemalloc reserves for re-use (purge will clean that) */ + je_mallctl("stats.allocated", &allocated, &sz, NULL, 0); /* unlike zmalloc_used_memory, this matches the stats.resident by taking into account all allocations done by this process (not only zmalloc) */ + float frag_pct = ((float)active / allocated)*100 - 100; + size_t frag_bytes = active - allocated; + float rss_pct = ((float)resident / allocated)*100 - 100; + size_t rss_bytes = resident - allocated; + if(out_frag_bytes) + *out_frag_bytes = frag_bytes; + serverLog(LL_DEBUG, + "allocated=%zu, active=%zu, resident=%zu, frag=%.0f%% (%.0f%% rss), frag_bytes=%zu (%zu%% rss)", + allocated, active, resident, frag_pct, rss_pct, frag_bytes, rss_bytes); + return frag_pct; +} + +#define INTERPOLATE(x, x1, x2, y1, y2) ( (y1) + ((x)-(x1)) * ((y2)-(y1)) / ((x2)-(x1)) ) +#define LIMIT(y, min, max) ((y)<(min)? min: ((y)>(max)? max: (y))) + +/* Perform incremental defragmentation work from the serverCron. + * This works in a similar way to activeExpireCycle, in the sense that + * we do incremental work across calls. */ +void activeDefragCycle(void) { + static int current_db = -1; + static unsigned long cursor = 0; + static redisDb *db = NULL; + static long long start_scan, start_stat; + unsigned int iterations = 0; + unsigned long long defragged = server.stat_active_defrag_hits; + long long start, timelimit; + + if (server.aof_child_pid!=-1 || server.rdb_child_pid!=-1) + return; /* defragging memory while there's a fork will just do damage. */ + + /* once a second, check if we the fragmentation justfies starting a scan or making it more aggressive */ + run_with_period(1000) { + size_t frag_bytes; + float frag_pct = getAllocatorFragmentation(&frag_bytes); + /* if we're not already running, and below the threshold, exit. */ + if (!server.active_defrag_running) { + if(frag_pct < server.active_defrag_threshold_lower || frag_bytes < server.active_defrag_ignore_bytes) + return; + } + + /* calculate the adaptive aggressiveness of the defrag */ + int cpu_pct = INTERPOLATE(frag_pct, server.active_defrag_threshold_lower, server.active_defrag_threshold_upper, + server.active_defrag_cycle_min, server.active_defrag_cycle_max); + cpu_pct = LIMIT(cpu_pct, server.active_defrag_cycle_min, server.active_defrag_cycle_max); + /* we allow increasing the aggressiveness during a scan, but don't reduce it */ + if (!server.active_defrag_running || cpu_pct > server.active_defrag_running) { + server.active_defrag_running = cpu_pct; + serverLog(LL_VERBOSE, + "Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%", + frag_pct, frag_bytes, cpu_pct); + } + } + if (!server.active_defrag_running) + return; + + /* See activeExpireCycle for how timelimit is handled. */ + start = ustime(); + timelimit = 1000000*server.active_defrag_running/server.hz/100; + if (timelimit <= 0) timelimit = 1; + + do { + if (!cursor) { + /* Move on to next database, and stop if we reached the last one */ + if (++current_db >= server.dbnum) { + long long now = ustime(); + size_t frag_bytes; + float frag_pct = getAllocatorFragmentation(&frag_bytes); + serverLog(LL_VERBOSE, + "Active defrag done in %dms, reallocated=%d, frag=%.0f%%, frag_bytes=%zu", + (int)((now - start_scan)/1000), (int)(server.stat_active_defrag_hits - start_stat), frag_pct, frag_bytes); + + start_scan = now; + current_db = -1; + cursor = 0; + db = NULL; + server.active_defrag_running = 0; + return; + } + else if (current_db==0) { + /* start a scan from the first database */ + start_scan = ustime(); + start_stat = server.stat_active_defrag_hits; + } + + db = &server.db[current_db]; + cursor = 0; + } + + do { + cursor = dictScan(db->dict, cursor, defragScanCallback, defragDictBucketCallback, db); + /* once in 16 scan iterations, or 1000 pointer reallocations (if we have a lot of pointers in one hash bucket), + * check if we reached the tiem limit */ + if (cursor && (++iterations > 16 || server.stat_active_defrag_hits - defragged > 1000)) { + if ((ustime() - start) > timelimit) { + return; + } + iterations = 0; + defragged = server.stat_active_defrag_hits; + } + } while(cursor); + } while(1); +} + +#else /* USE_JEMALLOC */ + +void activeDefragCycle(void) { + /* not implemented yet*/ +} + +#endif \ No newline at end of file diff --git a/src/dict.c b/src/dict.c index b9b2390f1..7b093ac57 100644 --- a/src/dict.c +++ b/src/dict.c @@ -885,6 +885,7 @@ static unsigned long rev(unsigned long v) { unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, + dictScanBucketFunction* bucketfn, void *privdata) { dictht *t0, *t1; @@ -898,6 +899,7 @@ unsigned long dictScan(dict *d, m0 = t0->sizemask; /* Emit entries at cursor */ + if (bucketfn) bucketfn(privdata, &t0->table[v & m0]); de = t0->table[v & m0]; while (de) { next = de->next; @@ -919,6 +921,7 @@ unsigned long dictScan(dict *d, m1 = t1->sizemask; /* Emit entries at cursor */ + if (bucketfn) bucketfn(privdata, &t0->table[v & m0]); de = t0->table[v & m0]; while (de) { next = de->next; @@ -930,6 +933,7 @@ unsigned long dictScan(dict *d, * of the index pointed to by the cursor in the smaller table */ do { /* Emit entries at cursor */ + if (bucketfn) bucketfn(privdata, &t1->table[v & m1]); de = t1->table[v & m1]; while (de) { next = de->next; @@ -1040,6 +1044,35 @@ void dictDisableResize(void) { dict_can_resize = 0; } +unsigned int dictGetHash(dict *d, const void *key) { + return dictHashKey(d, key); +} + +/* Replace an old key pointer in the dictionary with a new pointer. + * oldkey is a dead pointer and should not be accessed. + * the hash value should be provided using dictGetHash. + * no string / key comparison is performed. + * return value is the dictEntry if found, or NULL if not found. */ +dictEntry *dictReplaceKeyPtr(dict *d, const void *oldptr, void *newptr, unsigned int hash) { + dictEntry *he; + unsigned int idx, table; + + if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */ + for (table = 0; table <= 1; table++) { + idx = hash & d->ht[table].sizemask; + he = d->ht[table].table[idx]; + while(he) { + if (oldptr==he->key) { + he->key = newptr; + return he; + } + he = he->next; + } + if (!dictIsRehashing(d)) return NULL; + } + return NULL; +} + /* ------------------------------- Debugging ---------------------------------*/ #define DICT_STATS_VECTLEN 50 diff --git a/src/dict.h b/src/dict.h index 04b247a25..fcb68d998 100644 --- a/src/dict.h +++ b/src/dict.h @@ -95,6 +95,7 @@ typedef struct dictIterator { } dictIterator; typedef void (dictScanFunction)(void *privdata, const dictEntry *de); +typedef void (dictScanBucketFunction)(void *privdata, dictEntry **bucketref); /* This is the initial size of every hash table */ #define DICT_HT_INITIAL_SIZE 4 @@ -176,7 +177,9 @@ int dictRehash(dict *d, int n); int dictRehashMilliseconds(dict *d, int ms); void dictSetHashFunctionSeed(unsigned int initval); unsigned int dictGetHashFunctionSeed(void); -unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata); +unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata); +unsigned int dictGetHash(dict *d, const void *key); +dictEntry *dictReplaceKeyPtr(dict *d, const void *oldptr, void *newptr, unsigned int hash); /* Hash table types */ extern dictType dictTypeHeapStringCopyKey; diff --git a/src/server.c b/src/server.c index f6868832d..8bf6510de 100644 --- a/src/server.c +++ b/src/server.c @@ -876,6 +876,10 @@ void databasesCron(void) { expireSlaveKeys(); } + /* Defrag keys gradually. */ + if (server.active_defrag_enabled) + activeDefragCycle(); + /* Perform hash tables rehashing if needed, but only if there are no * other processes saving the DB on disk. Otherwise rehashing is bad * as will cause a lot of copy-on-write of memory pages. */ @@ -1332,6 +1336,12 @@ void initServerConfig(void) { server.maxidletime = CONFIG_DEFAULT_CLIENT_TIMEOUT; server.tcpkeepalive = CONFIG_DEFAULT_TCP_KEEPALIVE; server.active_expire_enabled = 1; + server.active_defrag_enabled = CONFIG_DEFAULT_ACTIVE_DEFRAG; + server.active_defrag_ignore_bytes = CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES; + server.active_defrag_threshold_lower = CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER; + server.active_defrag_threshold_upper = CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER; + server.active_defrag_cycle_min = CONFIG_DEFAULT_DEFRAG_CYCLE_MIN; + server.active_defrag_cycle_max = CONFIG_DEFAULT_DEFRAG_CYCLE_MAX; server.client_max_querybuf_len = PROTO_MAX_QUERYBUF_LEN; server.saveparams = NULL; server.loading = 0; @@ -1368,6 +1378,7 @@ void initServerConfig(void) { server.rdb_checksum = CONFIG_DEFAULT_RDB_CHECKSUM; server.stop_writes_on_bgsave_err = CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR; server.activerehashing = CONFIG_DEFAULT_ACTIVE_REHASHING; + server.active_defrag_running = 0; server.notify_keyspace_events = 0; server.maxclients = CONFIG_DEFAULT_MAX_CLIENTS; server.bpop_blocked_clients = 0; @@ -1718,6 +1729,10 @@ void resetServerStats(void) { server.stat_evictedkeys = 0; server.stat_keyspace_misses = 0; server.stat_keyspace_hits = 0; + server.stat_active_defrag_hits = 0; + server.stat_active_defrag_misses = 0; + server.stat_active_defrag_key_hits = 0; + server.stat_active_defrag_key_misses = 0; server.stat_fork_time = 0; server.stat_fork_rate = 0; server.stat_rejected_conn = 0; @@ -2873,6 +2888,7 @@ sds genRedisInfoString(char *section) { "maxmemory_policy:%s\r\n" "mem_fragmentation_ratio:%.2f\r\n" "mem_allocator:%s\r\n" + "active_defrag_running:%d\r\n" "lazyfree_pending_objects:%zu\r\n", zmalloc_used, hmem, @@ -2894,6 +2910,7 @@ sds genRedisInfoString(char *section) { evict_policy, mh->fragmentation, ZMALLOC_LIB, + server.active_defrag_running, lazyfreeGetPendingObjectsCount() ); freeMemoryOverheadData(mh); @@ -3013,7 +3030,11 @@ sds genRedisInfoString(char *section) { "pubsub_patterns:%lu\r\n" "latest_fork_usec:%lld\r\n" "migrate_cached_sockets:%ld\r\n" - "slave_expires_tracked_keys:%zu\r\n", + "slave_expires_tracked_keys:%zu\r\n" + "active_defrag_hits:%lld\r\n" + "active_defrag_misses:%lld\r\n" + "active_defrag_key_hits:%lld\r\n" + "active_defrag_key_misses:%lld\r\n", server.stat_numconnections, server.stat_numcommands, getInstantaneousMetric(STATS_METRIC_COMMAND), @@ -3033,7 +3054,11 @@ sds genRedisInfoString(char *section) { listLength(server.pubsub_patterns), server.stat_fork_time, dictSize(server.migrate_cached_sockets), - getSlaveKeyWithExpireCount()); + getSlaveKeyWithExpireCount(), + server.stat_active_defrag_hits, + server.stat_active_defrag_misses, + server.stat_active_defrag_key_hits, + server.stat_active_defrag_key_misses); } /* Replication */ diff --git a/src/server.h b/src/server.h index 140897c18..17e12d9d6 100644 --- a/src/server.h +++ b/src/server.h @@ -152,6 +152,12 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0 #define CONFIG_DEFAULT_ALWAYS_SHOW_LOGO 0 +#define CONFIG_DEFAULT_ACTIVE_DEFRAG 1 +#define CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER 10 /* don't defrag when fragmentation is below 10% */ +#define CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER 100 /* maximum defrag force at 100% fragmentation */ +#define CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES (100<<20) /* don't defrag if frag overhead is below 100mb */ +#define CONFIG_DEFAULT_DEFRAG_CYCLE_MIN 25 /* 25% CPU min (at lower threshold) */ +#define CONFIG_DEFAULT_DEFRAG_CYCLE_MAX 75 /* 75% CPU max (at upper threshold) */ #define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */ #define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */ @@ -857,6 +863,7 @@ struct redisServer { unsigned lruclock:LRU_BITS; /* Clock for LRU eviction */ int shutdown_asap; /* SHUTDOWN needed ASAP */ int activerehashing; /* Incremental rehash in serverCron() */ + int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */ char *requirepass; /* Pass for AUTH command, or NULL */ char *pidfile; /* PID file path */ int arch_bits; /* 32 or 64 depending on sizeof(long) */ @@ -908,6 +915,10 @@ struct redisServer { long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */ long long stat_keyspace_hits; /* Number of successful lookups of keys */ long long stat_keyspace_misses; /* Number of failed lookups of keys */ + long long stat_active_defrag_hits; /* number of allocations moved */ + long long stat_active_defrag_misses; /* number of allocations scanned but not moved */ + long long stat_active_defrag_key_hits; /* number of keys with moved allocations */ + long long stat_active_defrag_key_misses;/* number of keys scanned and not moved */ size_t stat_peak_memory; /* Max used memory record */ long long stat_fork_time; /* Time needed to perform latest fork() */ double stat_fork_rate; /* Fork rate in GB/sec. */ @@ -937,6 +948,12 @@ struct redisServer { int maxidletime; /* Client timeout in seconds */ int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */ int active_expire_enabled; /* Can be disabled for testing purposes. */ + int active_defrag_enabled; + size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */ + int active_defrag_threshold_lower; /* minimum percentage of fragmentation to start active defrag */ + int active_defrag_threshold_upper; /* maximum percentage of fragmentation at which we use maximum effort */ + int active_defrag_cycle_min; /* minimal effort for defrag in CPU percentage */ + int active_defrag_cycle_max; /* maximal effort for defrag in CPU percentage */ size_t client_max_querybuf_len; /* Limit for client query buffer length */ int dbnum; /* Total number of configured DBs */ int supervised; /* 1 if supervised, 0 otherwise. */ @@ -1576,6 +1593,7 @@ void adjustOpenFilesLimit(void); void closeListeningSockets(int unlink_unix_socket); void updateCachedTime(void); void resetServerStats(void); +void activeDefragCycle(void); unsigned int getLRUClock(void); const char *evictPolicyToString(void); struct redisMemOverhead *getMemoryOverheadData(void); diff --git a/src/zmalloc.c b/src/zmalloc.c index 22bf84fce..974e4fc12 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -66,6 +66,8 @@ void zlibc_free(void *ptr) { #define calloc(count,size) je_calloc(count,size) #define realloc(ptr,size) je_realloc(ptr,size) #define free(ptr) je_free(ptr) +#define mallocx(size,flags) je_mallocx(size,flags) +#define dallocx(ptr,flags) je_dallocx(ptr,flags) #endif #define update_zmalloc_stat_alloc(__n) do { \ @@ -115,6 +117,24 @@ void *zmalloc(size_t size) { #endif } +/* Allocation and free functions that bypass the thread cache + * and go straight to the allocator arena bins. + * Currently implemented only for jemalloc */ +#if defined(USE_JEMALLOC) && defined(MALLOCX_TCACHE_NONE) +void *zmalloc_no_tcache(size_t size) { + void *ptr = mallocx(size+PREFIX_SIZE, MALLOCX_TCACHE_NONE); + if (!ptr) zmalloc_oom_handler(size); + update_zmalloc_stat_alloc(zmalloc_size(ptr)); + return ptr; +} + +void zfree_no_tcache(void *ptr) { + if (ptr == NULL) return; + update_zmalloc_stat_free(zmalloc_size(ptr)); + dallocx(ptr, MALLOCX_TCACHE_NONE); +} +#endif + void *zcalloc(size_t size) { void *ptr = calloc(1, size+PREFIX_SIZE); diff --git a/src/zmalloc.h b/src/zmalloc.h index 9badf8f4c..2f7015490 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -69,6 +69,8 @@ void *zmalloc(size_t size); void *zcalloc(size_t size); void *zrealloc(void *ptr, size_t size); void zfree(void *ptr); +void zfree_no_tcache(void *ptr); +void *zmalloc_no_tcache(size_t size); char *zstrdup(const char *s); size_t zmalloc_used_memory(void); void zmalloc_enable_thread_safeness(void); From 763f49243d6b0800a0d469d7cc85534194575d77 Mon Sep 17 00:00:00 2001 From: oranagra Date: Mon, 2 Jan 2017 09:42:32 +0200 Subject: [PATCH 0245/1722] active defrag improvements --- src/debug.c | 3 +- src/defrag.c | 82 ++++++++++++++++++++++++++++++++-------------------- src/dict.c | 20 ++++++------- src/dict.h | 2 +- 4 files changed, 63 insertions(+), 44 deletions(-) diff --git a/src/debug.c b/src/debug.c index 5098d2b64..0026594d3 100644 --- a/src/debug.c +++ b/src/debug.c @@ -457,8 +457,9 @@ void debugCommand(client *c) { if (valsize==0) val = createStringObject(buf,strlen(buf)); else { + int buflen = strlen(buf); val = createStringObject(NULL,valsize); - memset(val->ptr, 0, valsize); + memcpy(val->ptr, buf, valsize<=buflen? valsize: buflen); } dbAdd(c->db,key,val); signalModifiedKey(c->db,key); diff --git a/src/defrag.c b/src/defrag.c index 663196c31..2f2f8fd07 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -94,15 +94,17 @@ sds activeDefragSds(sds sdsptr) { * returns NULL in case the allocatoin wasn't moved. * when it returns a non-null value, the old pointer was already released * and should NOT be accessed. */ -robj *activeDefragStringOb(robj* ob) { +robj *activeDefragStringOb(robj* ob, int *defragged) { robj *ret = NULL; if (ob->refcount!=1) return NULL; /* try to defrag robj (only if not an EMBSTR type (handled below) */ if (ob->type!=OBJ_STRING || ob->encoding!=OBJ_ENCODING_EMBSTR) { - if ((ret = activeDefragAlloc(ob))) + if ((ret = activeDefragAlloc(ob))) { ob = ret; + (*defragged)++; + } } /* try to defrag string object */ @@ -111,18 +113,14 @@ robj *activeDefragStringOb(robj* ob) { sds newsds = activeDefragSds((sds)ob->ptr); if (newsds) { ob->ptr = newsds; - /* we don't need to change the return value here. - * we can return NULL if 'ret' is still NULL (since the object pointer itself wasn't changed). - * but we set return value to ob as an indication that we defragged a pointer (for stats). - * NOTE: if ret is already set and the robj was moved, then our stats will be a bit off - * since two pointers were moved, but we show only one in the stats */ - ret = ob; + (*defragged)++; } } else if (ob->encoding==OBJ_ENCODING_EMBSTR) { /* the sds is embedded in the object allocation, calculate the offset and update the pointer in the new allocation */ long ofs = (intptr_t)ob->ptr - (intptr_t)ob; if ((ret = activeDefragAlloc(ob))) { ret->ptr = (void*)((intptr_t)ret + ofs); + (*defragged)++; } } else if (ob->encoding!=OBJ_ENCODING_INT) { serverPanic("Unknown string encoding"); @@ -191,18 +189,18 @@ void zslUpdateNode(zskiplist *zsl, zskiplistNode *oldnode, zskiplistNode *newnod if (update[i]->level[i].forward == oldnode) update[i]->level[i].forward = newnode; } - if (zsl->header==oldnode) - zsl->header = newnode; - if (zsl->tail==oldnode) - zsl->tail = newnode; + serverAssert(zsl->header!=oldnode); if (newnode->level[0].forward) { serverAssert(newnode->level[0].forward->backward==oldnode); newnode->level[0].forward->backward = newnode; + } else { + serverAssert(zsl->tail==oldnode); + zsl->tail = newnode; } } /* Defrag helper for sorted set. - * Update the robj pointer, defrag the struct and return the new score reference. + * Update the robj pointer, defrag the skiplist struct and return the new score reference. * we may not access oldele pointer (not even the pointer stored in the skiplist), as it was already freed. * newele may be null, in which case we only need to defrag the skiplist, but not update the obj pointer. * when return value is non-NULL, it is the score reference that must be updated in the dict record. */ @@ -229,7 +227,7 @@ double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) { serverAssert(x && score == x->score && x->ele==oldele); if (newele) x->ele = newele; - + /* try to defrag the skiplist record itself */ newx = activeDefragAlloc(x); if (newx) { @@ -239,6 +237,28 @@ double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) { return NULL; } +/* Utility function that replaces an old key pointer in the dictionary with a new pointer. + * Additionally, we try to defrag the dictEntry in that dict. + * oldkey mey be a dead pointer and should not be accessed (we get a pre-calculated hash value). + * newkey may be null if the key pointer wasn't moved. + * return value is the the dictEntry if found, or NULL if not found. + * NOTE: this is very ugly code, but it let's us avoid the complication of doing a scan on another dict. */ +dictEntry* replaceSateliteDictKeyPtrAndOrDifragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, int *defragged) { + dictEntry **deref = dictFindEntryRefByPtrAndHash(d, oldkey, hash); + if (deref) { + dictEntry *de = *deref; + dictEntry *newde = activeDefragAlloc(de); + if (newde) { + de = *deref = newde; + (*defragged)++; + } + if (newkey) + de->key = newkey; + return de; + } + return NULL; +} + /* for each key we scan in the main dict, this function will attempt to defrag all the various pointers it has. * returns a stat of how many pointers were moved. */ int defargKey(redisDb *db, dictEntry *de) { @@ -252,24 +272,21 @@ int defargKey(redisDb *db, dictEntry *de) { /* try to defrag the key name */ newsds = activeDefragSds(keysds); - if (newsds) { - de->key = newsds; - if (dictSize(db->expires)) { - /* Dirty code: - * i can't search in db->expires for that key after i already released the pointer it holds - * it won't be able to do the string compare */ - unsigned int hash = dictGetHash(db->dict, newsds); - dictReplaceKeyPtr(db->expires, keysds, newsds, hash); - } - defragged++; + if (newsds) + defragged++, de->key = newsds; + if (dictSize(db->expires)) { + /* Dirty code: + * i can't search in db->expires for that key after i already released the pointer it holds + * it won't be able to do the string compare */ + unsigned int hash = dictGetHash(db->dict, de->key); + replaceSateliteDictKeyPtrAndOrDifragDictEntry(db->expires, keysds, newsds, hash, &defragged); } /* try to defrag robj and / or string value */ ob = dictGetVal(de); - if ((newob = activeDefragStringOb(ob))) { + if ((newob = activeDefragStringOb(ob, &defragged))) { de->v.val = newob; ob = newob; - defragged++; } if (ob->type == OBJ_STRING) { @@ -328,13 +345,15 @@ int defargKey(redisDb *db, dictEntry *de) { defragged++, ob->ptr = newzl; } else if (ob->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = (zset*)ob->ptr; - zset *newzs = activeDefragAlloc(zs); + zset *newzs; zskiplist *newzsl; - if (newzs) + struct zskiplistNode *newheader; + if ((newzs = activeDefragAlloc(zs))) defragged++, ob->ptr = zs = newzs; - newzsl = activeDefragAlloc(zs->zsl); - if (newzsl) + if ((newzsl = activeDefragAlloc(zs->zsl))) defragged++, zs->zsl = newzsl; + if ((newheader = activeDefragAlloc(zs->zsl->header))) + defragged++, zs->zsl->header = newheader; d = zs->dict; di = dictGetIterator(d); while((de = dictNext(di)) != NULL) { @@ -383,7 +402,6 @@ int defargKey(redisDb *db, dictEntry *de) { /* defrag scan callback for the main db dictionary */ void defragScanCallback(void *privdata, const dictEntry *de) { - /* TODO: defrag the dictEntry (and also the entriy in expire dict). */ int defragged = defargKey((redisDb*)privdata, (dictEntry*)de); server.stat_active_defrag_hits += defragged; if(defragged) @@ -524,4 +542,4 @@ void activeDefragCycle(void) { /* not implemented yet*/ } -#endif \ No newline at end of file +#endif diff --git a/src/dict.c b/src/dict.c index 7b093ac57..59aef7724 100644 --- a/src/dict.c +++ b/src/dict.c @@ -1048,25 +1048,25 @@ unsigned int dictGetHash(dict *d, const void *key) { return dictHashKey(d, key); } -/* Replace an old key pointer in the dictionary with a new pointer. +/* Finds the dictEntry reference by using pointer and pre-calculated hash. * oldkey is a dead pointer and should not be accessed. * the hash value should be provided using dictGetHash. * no string / key comparison is performed. - * return value is the dictEntry if found, or NULL if not found. */ -dictEntry *dictReplaceKeyPtr(dict *d, const void *oldptr, void *newptr, unsigned int hash) { - dictEntry *he; + * return value is the reference to the dictEntry if found, or NULL if not found. */ +dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, unsigned int hash) { + dictEntry *he, **heref; unsigned int idx, table; if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */ for (table = 0; table <= 1; table++) { idx = hash & d->ht[table].sizemask; - he = d->ht[table].table[idx]; + heref = &d->ht[table].table[idx]; + he = *heref; while(he) { - if (oldptr==he->key) { - he->key = newptr; - return he; - } - he = he->next; + if (oldptr==he->key) + return heref; + heref = &he->next; + he = *heref; } if (!dictIsRehashing(d)) return NULL; } diff --git a/src/dict.h b/src/dict.h index fcb68d998..60a423a2c 100644 --- a/src/dict.h +++ b/src/dict.h @@ -179,7 +179,7 @@ void dictSetHashFunctionSeed(unsigned int initval); unsigned int dictGetHashFunctionSeed(void); unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata); unsigned int dictGetHash(dict *d, const void *key); -dictEntry *dictReplaceKeyPtr(dict *d, const void *oldptr, void *newptr, unsigned int hash); +dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, unsigned int hash); /* Hash table types */ extern dictType dictTypeHeapStringCopyKey; From 9c77672458009ffb7017fe16a8781be2de3aef4b Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Sun, 8 Jan 2017 18:27:00 +0100 Subject: [PATCH 0246/1722] Initialize help only in repl mode --- src/redis-cli.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index ac4358220..2a9dff712 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1273,6 +1273,11 @@ static void repl(void) { int argc; sds *argv; + /* Initialize the help and, if possible, use the COMMAND command in order + * to retrieve missing entries. */ + cliInitHelp(); + cliIntegrateHelp(); + config.interactive = 1; linenoiseSetMultiLine(1); linenoiseSetCompletionCallback(completionCallback); @@ -2606,11 +2611,6 @@ int main(int argc, char **argv) { argc -= firstarg; argv += firstarg; - /* Initialize the help and, if possible, use the COMMAND command in order - * to retrieve missing entries. */ - cliInitHelp(); - cliIntegrateHelp(); - /* Latency mode */ if (config.latency_mode) { if (cliConnect(0) == REDIS_ERR) exit(1); From e4f25b015cdf77379b8de3ff75dbb3196d387303 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 10 Jan 2017 11:25:39 +0100 Subject: [PATCH 0247/1722] Defrag: activate it only if running modified version of Jemalloc. This commit also includes minor aesthetic changes like removal of trailing spaces. --- .../include/jemalloc/jemalloc_macros.h.in | 4 ++ src/defrag.c | 44 +++++++++---------- src/zmalloc.c | 4 +- src/zmalloc.h | 14 +++++- 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in index 7f64d9ff9..a7028db34 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in @@ -100,3 +100,7 @@ # define JEMALLOC_RESTRICT_RETURN # define JEMALLOC_ALLOCATOR #endif + +/* This version of Jemalloc, modified for Redis, has the je_get_defrag_hint() + * function. */ +#define JEMALLOC_FRAG_HINT diff --git a/src/defrag.c b/src/defrag.c index 2f2f8fd07..d96e5db57 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -39,14 +39,14 @@ #include #include -#if defined(USE_JEMALLOC) && defined(MALLOCX_TCACHE_NONE) +#ifdef HAVE_DEFRAG /* this method was added to jemalloc in order to help us understand which * pointers are worthwhile moving and which aren't */ int je_get_defrag_hint(void* ptr, int *bin_util, int *run_util); /* Defrag helper for generic allocations. - * + * * returns NULL in case the allocatoin wasn't moved. * when it returns a non-null value, the old pointer was already released * and should NOT be accessed. */ @@ -58,13 +58,13 @@ void* activeDefragAlloc(void *ptr) { server.stat_active_defrag_misses++; return NULL; } - /* if this run is more utilized than the average utilization in this bin (or it is full), skip it. + /* if this run is more utilized than the average utilization in this bin (or it is full), skip it. * this will eventually move all the allocations from relatively empty runs into relatively full runs. */ if (run_util > bin_util || run_util == 1<<16) { server.stat_active_defrag_misses++; return NULL; } - /* move this allocation to a new allocation. + /* move this allocation to a new allocation. * make sure not to use the thread cache. so that we don't get back the same pointers we try to free */ size = zmalloc_size(ptr); newptr = zmalloc_no_tcache(size); @@ -74,7 +74,7 @@ void* activeDefragAlloc(void *ptr) { } /*Defrag helper for sds strings - * + * * returns NULL in case the allocatoin wasn't moved. * when it returns a non-null value, the old pointer was already released * and should NOT be accessed. */ @@ -90,7 +90,7 @@ sds activeDefragSds(sds sdsptr) { } /* Defrag helper for robj and/or string objects - * + * * returns NULL in case the allocatoin wasn't moved. * when it returns a non-null value, the old pointer was already released * and should NOT be accessed. */ @@ -221,7 +221,7 @@ double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) { x = x->level[i].forward; update[i] = x; } - + /* update the robj pointer inside the skip list record. */ x = x->level[0].forward; serverAssert(x && score == x->score && x->ele==oldele); @@ -243,7 +243,7 @@ double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) { * newkey may be null if the key pointer wasn't moved. * return value is the the dictEntry if found, or NULL if not found. * NOTE: this is very ugly code, but it let's us avoid the complication of doing a scan on another dict. */ -dictEntry* replaceSateliteDictKeyPtrAndOrDifragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, int *defragged) { +dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, int *defragged) { dictEntry **deref = dictFindEntryRefByPtrAndHash(d, oldkey, hash); if (deref) { dictEntry *de = *deref; @@ -269,7 +269,7 @@ int defargKey(redisDb *db, dictEntry *de) { dictIterator *di; int defragged = 0; sds newsds; - + /* try to defrag the key name */ newsds = activeDefragSds(keysds); if (newsds) @@ -279,7 +279,7 @@ int defargKey(redisDb *db, dictEntry *de) { * i can't search in db->expires for that key after i already released the pointer it holds * it won't be able to do the string compare */ unsigned int hash = dictGetHash(db->dict, de->key); - replaceSateliteDictKeyPtrAndOrDifragDictEntry(db->expires, keysds, newsds, hash, &defragged); + replaceSateliteDictKeyPtrAndOrDefragDictEntry(db->expires, keysds, newsds, hash, &defragged); } /* try to defrag robj and / or string value */ @@ -334,7 +334,7 @@ int defargKey(redisDb *db, dictEntry *de) { } else if (ob->encoding == OBJ_ENCODING_INTSET) { intset *is = ob->ptr; intset *newis = activeDefragAlloc(is); - if (newis) + if (newis) defragged++, ob->ptr = newis; } else { serverPanic("Unknown set encoding"); @@ -407,7 +407,7 @@ void defragScanCallback(void *privdata, const dictEntry *de) { if(defragged) server.stat_active_defrag_key_hits++; else - server.stat_active_defrag_key_misses++; + server.stat_active_defrag_key_misses++; } /* defrag scan callback for for each hash table bicket, @@ -439,8 +439,8 @@ float getAllocatorFragmentation(size_t *out_frag_bytes) { size_t rss_bytes = resident - allocated; if(out_frag_bytes) *out_frag_bytes = frag_bytes; - serverLog(LL_DEBUG, - "allocated=%zu, active=%zu, resident=%zu, frag=%.0f%% (%.0f%% rss), frag_bytes=%zu (%zu%% rss)", + serverLog(LL_DEBUG, + "allocated=%zu, active=%zu, resident=%zu, frag=%.0f%% (%.0f%% rss), frag_bytes=%zu (%zu%% rss)", allocated, active, resident, frag_pct, rss_pct, frag_bytes, rss_bytes); return frag_pct; } @@ -459,10 +459,10 @@ void activeDefragCycle(void) { unsigned int iterations = 0; unsigned long long defragged = server.stat_active_defrag_hits; long long start, timelimit; - + if (server.aof_child_pid!=-1 || server.rdb_child_pid!=-1) return; /* defragging memory while there's a fork will just do damage. */ - + /* once a second, check if we the fragmentation justfies starting a scan or making it more aggressive */ run_with_period(1000) { size_t frag_bytes; @@ -472,16 +472,16 @@ void activeDefragCycle(void) { if(frag_pct < server.active_defrag_threshold_lower || frag_bytes < server.active_defrag_ignore_bytes) return; } - + /* calculate the adaptive aggressiveness of the defrag */ - int cpu_pct = INTERPOLATE(frag_pct, server.active_defrag_threshold_lower, server.active_defrag_threshold_upper, + int cpu_pct = INTERPOLATE(frag_pct, server.active_defrag_threshold_lower, server.active_defrag_threshold_upper, server.active_defrag_cycle_min, server.active_defrag_cycle_max); cpu_pct = LIMIT(cpu_pct, server.active_defrag_cycle_min, server.active_defrag_cycle_max); /* we allow increasing the aggressiveness during a scan, but don't reduce it */ if (!server.active_defrag_running || cpu_pct > server.active_defrag_running) { server.active_defrag_running = cpu_pct; - serverLog(LL_VERBOSE, - "Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%", + serverLog(LL_VERBOSE, + "Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%", frag_pct, frag_bytes, cpu_pct); } } @@ -500,7 +500,7 @@ void activeDefragCycle(void) { long long now = ustime(); size_t frag_bytes; float frag_pct = getAllocatorFragmentation(&frag_bytes); - serverLog(LL_VERBOSE, + serverLog(LL_VERBOSE, "Active defrag done in %dms, reallocated=%d, frag=%.0f%%, frag_bytes=%zu", (int)((now - start_scan)/1000), (int)(server.stat_active_defrag_hits - start_stat), frag_pct, frag_bytes); @@ -536,7 +536,7 @@ void activeDefragCycle(void) { } while(1); } -#else /* USE_JEMALLOC */ +#else /* HAVE_DEFRAG */ void activeDefragCycle(void) { /* not implemented yet*/ diff --git a/src/zmalloc.c b/src/zmalloc.c index 974e4fc12..f71ce2c9e 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -119,8 +119,8 @@ void *zmalloc(size_t size) { /* Allocation and free functions that bypass the thread cache * and go straight to the allocator arena bins. - * Currently implemented only for jemalloc */ -#if defined(USE_JEMALLOC) && defined(MALLOCX_TCACHE_NONE) + * Currently implemented only for jemalloc. Used for online defragmentation. */ +#ifdef HAVE_DEFRAG void *zmalloc_no_tcache(size_t size) { void *ptr = mallocx(size+PREFIX_SIZE, MALLOCX_TCACHE_NONE); if (!ptr) zmalloc_oom_handler(size); diff --git a/src/zmalloc.h b/src/zmalloc.h index 2f7015490..b6d4e1d97 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -65,12 +65,17 @@ #define ZMALLOC_LIB "libc" #endif +/* We can enable the Redis defrag capabilities only if we are using Jemalloc + * and the version used is our special version modified for Redis having + * the ability to return per-allocation fragmentation hints. */ +#if defined(USE_JEMALLOC) && defined(JEMALLOC_FRAG_HINT) +#define HAVE_DEFRAG +#endif + void *zmalloc(size_t size); void *zcalloc(size_t size); void *zrealloc(void *ptr, size_t size); void zfree(void *ptr); -void zfree_no_tcache(void *ptr); -void *zmalloc_no_tcache(size_t size); char *zstrdup(const char *s); size_t zmalloc_used_memory(void); void zmalloc_enable_thread_safeness(void); @@ -82,6 +87,11 @@ size_t zmalloc_get_smap_bytes_by_field(char *field, long pid); size_t zmalloc_get_memory_size(void); void zlibc_free(void *ptr); +#ifdef HAVE_DEFRAG +void zfree_no_tcache(void *ptr); +void *zmalloc_no_tcache(size_t size); +#endif + #ifndef HAVE_MALLOC_SIZE size_t zmalloc_size(void *ptr); #endif From da55ed00329629b91f1d0bfb9635d2dea322d758 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 10 Jan 2017 11:32:52 +0100 Subject: [PATCH 0248/1722] Defrag: fix comments & code to conform to the Redis code base. Don't go over 80 cols. Start with captial letter, capital letter afer point, end comment with a point and so forth. No actual code behavior touched at all. --- src/defrag.c | 138 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 84 insertions(+), 54 deletions(-) diff --git a/src/defrag.c b/src/defrag.c index d96e5db57..ddfd6e2dd 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -58,14 +58,16 @@ void* activeDefragAlloc(void *ptr) { server.stat_active_defrag_misses++; return NULL; } - /* if this run is more utilized than the average utilization in this bin (or it is full), skip it. - * this will eventually move all the allocations from relatively empty runs into relatively full runs. */ + /* if this run is more utilized than the average utilization in this bin + * (or it is full), skip it. This will eventually move all the allocations + * from relatively empty runs into relatively full runs. */ if (run_util > bin_util || run_util == 1<<16) { server.stat_active_defrag_misses++; return NULL; } /* move this allocation to a new allocation. - * make sure not to use the thread cache. so that we don't get back the same pointers we try to free */ + * make sure not to use the thread cache. so that we don't get back the same + * pointers we try to free */ size = zmalloc_size(ptr); newptr = zmalloc_no_tcache(size); memcpy(newptr, ptr, size); @@ -99,7 +101,7 @@ robj *activeDefragStringOb(robj* ob, int *defragged) { if (ob->refcount!=1) return NULL; - /* try to defrag robj (only if not an EMBSTR type (handled below) */ + /* try to defrag robj (only if not an EMBSTR type (handled below). */ if (ob->type!=OBJ_STRING || ob->encoding!=OBJ_ENCODING_EMBSTR) { if ((ret = activeDefragAlloc(ob))) { ob = ret; @@ -116,7 +118,8 @@ robj *activeDefragStringOb(robj* ob, int *defragged) { (*defragged)++; } } else if (ob->encoding==OBJ_ENCODING_EMBSTR) { - /* the sds is embedded in the object allocation, calculate the offset and update the pointer in the new allocation */ + /* The sds is embedded in the object allocation, calculate the + * offset and update the pointer in the new allocation. */ long ofs = (intptr_t)ob->ptr - (intptr_t)ob; if ((ret = activeDefragAlloc(ob))) { ret->ptr = (void*)((intptr_t)ret + ofs); @@ -129,14 +132,16 @@ robj *activeDefragStringOb(robj* ob, int *defragged) { return ret; } -/* Defrag helper for dictEntries to be used during dict iteration (called on each step). - * returns a stat of how many pointers were moved. */ +/* Defrag helper for dictEntries to be used during dict iteration (called on + * each step). Teturns a stat of how many pointers were moved. */ int dictIterDefragEntry(dictIterator *iter) { - /* This function is a little bit dirty since it messes with the internals of the dict and it's iterator, - * but the benefit is that it is very easy to use, and require no other chagnes in the dict. */ + /* This function is a little bit dirty since it messes with the internals + * of the dict and it's iterator, but the benefit is that it is very easy + * to use, and require no other chagnes in the dict. */ int defragged = 0; dictht *ht; - /* handle the next entry (if there is one), and update the pointer in the current entry. */ + /* Handle the next entry (if there is one), and update the pointer in the + * current entry. */ if (iter->nextEntry) { dictEntry *newde = activeDefragAlloc(iter->nextEntry); if (newde) { @@ -159,8 +164,8 @@ int dictIterDefragEntry(dictIterator *iter) { } /* Defrag helper for dict main allocations (dict struct, and hash tables). - * receives a pointer to the dict* and implicitly updates it when the dict struct itself was moved. - * returns a stat of how many pointers were moved. */ + * receives a pointer to the dict* and implicitly updates it when the dict + * struct itself was moved. Returns a stat of how many pointers were moved. */ int dictDefragTables(dict** dictRef) { dict *d = *dictRef; dictEntry **newtable; @@ -200,10 +205,12 @@ void zslUpdateNode(zskiplist *zsl, zskiplistNode *oldnode, zskiplistNode *newnod } /* Defrag helper for sorted set. - * Update the robj pointer, defrag the skiplist struct and return the new score reference. - * we may not access oldele pointer (not even the pointer stored in the skiplist), as it was already freed. - * newele may be null, in which case we only need to defrag the skiplist, but not update the obj pointer. - * when return value is non-NULL, it is the score reference that must be updated in the dict record. */ + * Update the robj pointer, defrag the skiplist struct and return the new score + * reference. We may not access oldele pointer (not even the pointer stored in + * the skiplist), as it was already freed. Newele may be null, in which case we + * only need to defrag the skiplist, but not update the obj pointer. + * When return value is non-NULL, it is the score reference that must be updated + * in the dict record. */ double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x, *newx; int i; @@ -214,7 +221,9 @@ double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) { x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { while (x->level[i].forward && - x->level[i].forward->ele != oldele && /* make sure not to access the ->obj pointer if it matches oldele */ + x->level[i].forward->ele != oldele && /* make sure not to access the + ->obj pointer if it matches + oldele */ (x->level[i].forward->score < score || (x->level[i].forward->score == score && sdscmp(x->level[i].forward->ele,ele) < 0))) @@ -237,12 +246,13 @@ double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) { return NULL; } -/* Utility function that replaces an old key pointer in the dictionary with a new pointer. - * Additionally, we try to defrag the dictEntry in that dict. - * oldkey mey be a dead pointer and should not be accessed (we get a pre-calculated hash value). - * newkey may be null if the key pointer wasn't moved. - * return value is the the dictEntry if found, or NULL if not found. - * NOTE: this is very ugly code, but it let's us avoid the complication of doing a scan on another dict. */ +/* Utility function that replaces an old key pointer in the dictionary with a + * new pointer. Additionally, we try to defrag the dictEntry in that dict. + * Oldkey mey be a dead pointer and should not be accessed (we get a + * pre-calculated hash value). Newkey may be null if the key pointer wasn't + * moved. Return value is the the dictEntry if found, or NULL if not found. + * NOTE: this is very ugly code, but it let's us avoid the complication of + * doing a scan on another dict. */ dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, int *defragged) { dictEntry **deref = dictFindEntryRefByPtrAndHash(d, oldkey, hash); if (deref) { @@ -259,8 +269,9 @@ dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sd return NULL; } -/* for each key we scan in the main dict, this function will attempt to defrag all the various pointers it has. - * returns a stat of how many pointers were moved. */ +/* for each key we scan in the main dict, this function will attempt to defrag + * all the various pointers it has. Returns a stat of how many pointers were + * moved. */ int defargKey(redisDb *db, dictEntry *de) { sds keysds = dictGetKey(de); robj *newob, *ob; @@ -270,19 +281,19 @@ int defargKey(redisDb *db, dictEntry *de) { int defragged = 0; sds newsds; - /* try to defrag the key name */ + /* Try to defrag the key name. */ newsds = activeDefragSds(keysds); if (newsds) defragged++, de->key = newsds; if (dictSize(db->expires)) { /* Dirty code: - * i can't search in db->expires for that key after i already released the pointer it holds - * it won't be able to do the string compare */ + * I can't search in db->expires for that key after i already released + * the pointer it holds it won't be able to do the string compare */ unsigned int hash = dictGetHash(db->dict, de->key); replaceSateliteDictKeyPtrAndOrDefragDictEntry(db->expires, keysds, newsds, hash, &defragged); } - /* try to defrag robj and / or string value */ + /* Try to defrag robj and / or string value. */ ob = dictGetVal(de); if ((newob = activeDefragStringOb(ob, &defragged))) { de->v.val = newob; @@ -290,7 +301,7 @@ int defargKey(redisDb *db, dictEntry *de) { } if (ob->type == OBJ_STRING) { - /* already handled in activeDefragStringOb */ + /* Already handled in activeDefragStringOb. */ } else if (ob->type == OBJ_LIST) { if (ob->encoding == OBJ_ENCODING_QUICKLIST) { quicklist *ql = ob->ptr, *newql; @@ -400,7 +411,7 @@ int defargKey(redisDb *db, dictEntry *de) { return defragged; } -/* defrag scan callback for the main db dictionary */ +/* Defrag scan callback for the main db dictionary. */ void defragScanCallback(void *privdata, const dictEntry *de) { int defragged = defargKey((redisDb*)privdata, (dictEntry*)de); server.stat_active_defrag_hits += defragged; @@ -410,8 +421,8 @@ void defragScanCallback(void *privdata, const dictEntry *de) { server.stat_active_defrag_key_misses++; } -/* defrag scan callback for for each hash table bicket, - * used in order to defrag the dictEntry allocations */ +/* Defrag scan callback for for each hash table bicket, + * used in order to defrag the dictEntry allocations. */ void defragDictBucketCallback(void *privdata, dictEntry **bucketref) { UNUSED(privdata); while(*bucketref) { @@ -424,15 +435,24 @@ void defragDictBucketCallback(void *privdata, dictEntry **bucketref) { } /* Utility function to get the fragmentation ratio from jemalloc. - * it is critical to do that by comparing only heap maps that belown to jemalloc, and skip ones the jemalloc keeps as spare. - * since we use this fragmentation ratio in order to decide if a defrag action should be taken or not, - * a false detection can cause the defragmenter to waste a lot of CPU without the possibility of getting any results. */ + * It is critical to do that by comparing only heap maps that belown to + * jemalloc, and skip ones the jemalloc keeps as spare. Since we use this + * fragmentation ratio in order to decide if a defrag action should be taken + * or not, a false detection can cause the defragmenter to waste a lot of CPU + * without the possibility of getting any results. */ float getAllocatorFragmentation(size_t *out_frag_bytes) { size_t epoch = 1, allocated = 0, resident = 0, active = 0, sz = sizeof(size_t); - je_mallctl("epoch", &epoch, &sz, &epoch, sz); /* Update the statistics cached by mallctl. */ - je_mallctl("stats.resident", &resident, &sz, NULL, 0); /* unlike RSS, this does not include RSS from shared libraries and other non heap mappings */ - je_mallctl("stats.active", &active, &sz, NULL, 0); /* unlike resident, this doesn't not include the pages jemalloc reserves for re-use (purge will clean that) */ - je_mallctl("stats.allocated", &allocated, &sz, NULL, 0); /* unlike zmalloc_used_memory, this matches the stats.resident by taking into account all allocations done by this process (not only zmalloc) */ + /* Update the statistics cached by mallctl. */ + je_mallctl("epoch", &epoch, &sz, &epoch, sz); + /* Unlike RSS, this does not include RSS from shared libraries and other non + * heap mappings. */ + je_mallctl("stats.resident", &resident, &sz, NULL, 0); + /* Unlike resident, this doesn't not include the pages jemalloc reserves + * for re-use (purge will clean that). */ + je_mallctl("stats.active", &active, &sz, NULL, 0); + /* Unlike zmalloc_used_memory, this matches the stats.resident by taking + * into account all allocations done by this process (not only zmalloc). */ + je_mallctl("stats.allocated", &allocated, &sz, NULL, 0); float frag_pct = ((float)active / allocated)*100 - 100; size_t frag_bytes = active - allocated; float rss_pct = ((float)resident / allocated)*100 - 100; @@ -461,24 +481,33 @@ void activeDefragCycle(void) { long long start, timelimit; if (server.aof_child_pid!=-1 || server.rdb_child_pid!=-1) - return; /* defragging memory while there's a fork will just do damage. */ + return; /* Defragging memory while there's a fork will just do damage. */ - /* once a second, check if we the fragmentation justfies starting a scan or making it more aggressive */ + /* Once a second, check if we the fragmentation justfies starting a scan + * or making it more aggressive. */ run_with_period(1000) { size_t frag_bytes; float frag_pct = getAllocatorFragmentation(&frag_bytes); - /* if we're not already running, and below the threshold, exit. */ + /* If we're not already running, and below the threshold, exit. */ if (!server.active_defrag_running) { if(frag_pct < server.active_defrag_threshold_lower || frag_bytes < server.active_defrag_ignore_bytes) return; } - /* calculate the adaptive aggressiveness of the defrag */ - int cpu_pct = INTERPOLATE(frag_pct, server.active_defrag_threshold_lower, server.active_defrag_threshold_upper, - server.active_defrag_cycle_min, server.active_defrag_cycle_max); - cpu_pct = LIMIT(cpu_pct, server.active_defrag_cycle_min, server.active_defrag_cycle_max); - /* we allow increasing the aggressiveness during a scan, but don't reduce it */ - if (!server.active_defrag_running || cpu_pct > server.active_defrag_running) { + /* Calculate the adaptive aggressiveness of the defrag */ + int cpu_pct = INTERPOLATE(frag_pct, + server.active_defrag_threshold_lower, + server.active_defrag_threshold_upper, + server.active_defrag_cycle_min, + server.active_defrag_cycle_max); + cpu_pct = LIMIT(cpu_pct, + server.active_defrag_cycle_min, + server.active_defrag_cycle_max); + /* We allow increasing the aggressiveness during a scan, but don't + * reduce it. */ + if (!server.active_defrag_running || + cpu_pct > server.active_defrag_running) + { server.active_defrag_running = cpu_pct; serverLog(LL_VERBOSE, "Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%", @@ -495,7 +524,7 @@ void activeDefragCycle(void) { do { if (!cursor) { - /* Move on to next database, and stop if we reached the last one */ + /* Move on to next database, and stop if we reached the last one. */ if (++current_db >= server.dbnum) { long long now = ustime(); size_t frag_bytes; @@ -512,7 +541,7 @@ void activeDefragCycle(void) { return; } else if (current_db==0) { - /* start a scan from the first database */ + /* Start a scan from the first database. */ start_scan = ustime(); start_stat = server.stat_active_defrag_hits; } @@ -523,8 +552,9 @@ void activeDefragCycle(void) { do { cursor = dictScan(db->dict, cursor, defragScanCallback, defragDictBucketCallback, db); - /* once in 16 scan iterations, or 1000 pointer reallocations (if we have a lot of pointers in one hash bucket), - * check if we reached the tiem limit */ + /* Once in 16 scan iterations, or 1000 pointer reallocations + * (if we have a lot of pointers in one hash bucket), check if we + * reached the tiem limit. */ if (cursor && (++iterations > 16 || server.stat_active_defrag_hits - defragged > 1000)) { if ((ustime() - start) > timelimit) { return; @@ -539,7 +569,7 @@ void activeDefragCycle(void) { #else /* HAVE_DEFRAG */ void activeDefragCycle(void) { - /* not implemented yet*/ + /* Not implemented yet. */ } #endif From f11a12e915d01a7e1464bc7493e2ce6440722310 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 11 Jan 2017 15:36:48 +0100 Subject: [PATCH 0249/1722] Defrag: do not crash on empty quicklist. --- src/defrag.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/defrag.c b/src/defrag.c index ddfd6e2dd..dee879349 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -308,7 +308,7 @@ int defargKey(redisDb *db, dictEntry *de) { quicklistNode *node = ql->head, *newnode; if ((newql = activeDefragAlloc(ql))) defragged++, ob->ptr = ql = newql; - do { + while (node) { if ((newnode = activeDefragAlloc(node))) { if (newnode->prev) newnode->prev->next = newnode; @@ -323,7 +323,8 @@ int defargKey(redisDb *db, dictEntry *de) { } if ((newzl = activeDefragAlloc(node->zl))) defragged++, node->zl = newzl; - } while ((node = node->next)); + node = node->next; + } } else if (ob->encoding == OBJ_ENCODING_ZIPLIST) { if ((newzl = activeDefragAlloc(ob->ptr))) defragged++, ob->ptr = newzl; From 57c80d1fcaca1f7192719e4424e40e5cb8a5172c Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 11 Jan 2017 15:37:19 +0100 Subject: [PATCH 0250/1722] Defrag: fix function name typo defarg -> defrag. --- src/defrag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/defrag.c b/src/defrag.c index dee879349..cc7dfdf7f 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -272,7 +272,7 @@ dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sd /* for each key we scan in the main dict, this function will attempt to defrag * all the various pointers it has. Returns a stat of how many pointers were * moved. */ -int defargKey(redisDb *db, dictEntry *de) { +int defragKey(redisDb *db, dictEntry *de) { sds keysds = dictGetKey(de); robj *newob, *ob; unsigned char *newzl; @@ -414,7 +414,7 @@ int defargKey(redisDb *db, dictEntry *de) { /* Defrag scan callback for the main db dictionary. */ void defragScanCallback(void *privdata, const dictEntry *de) { - int defragged = defargKey((redisDb*)privdata, (dictEntry*)de); + int defragged = defragKey((redisDb*)privdata, (dictEntry*)de); server.stat_active_defrag_hits += defragged; if(defragged) server.stat_active_defrag_key_hits++; From eabcf0fed852615edbbd7f686e0edefdf369b2e4 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 11 Jan 2017 15:43:08 +0100 Subject: [PATCH 0251/1722] Defrag: not enabled by default. Error on CONFIG SET if not available. --- src/config.c | 10 ++++++++++ src/server.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/config.c b/src/config.c index 4e2c74e5d..83651877c 100644 --- a/src/config.c +++ b/src/config.c @@ -1007,6 +1007,16 @@ void configSetCommand(client *c) { "activerehashing",server.activerehashing) { } config_set_bool_field( "activedefrag",server.active_defrag_enabled) { +#ifndef HAVE_DEFRAG + if (server.active_defrag_enabled) { + server.active_defrag_enabled = 0; + addReplyError(c, + "Active defragmentation cannot be enabled: it requires a " + "Redis server compiled with a modified Jemalloc like the " + "one shipped by default with the Redis source distribution"); + return; + } +#endif } config_set_bool_field( "protected-mode",server.protected_mode) { } config_set_bool_field( diff --git a/src/server.h b/src/server.h index 17e12d9d6..bbc121b72 100644 --- a/src/server.h +++ b/src/server.h @@ -152,7 +152,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0 #define CONFIG_DEFAULT_ALWAYS_SHOW_LOGO 0 -#define CONFIG_DEFAULT_ACTIVE_DEFRAG 1 +#define CONFIG_DEFAULT_ACTIVE_DEFRAG 0 #define CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER 10 /* don't defrag when fragmentation is below 10% */ #define CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER 100 /* maximum defrag force at 100% fragmentation */ #define CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES (100<<20) /* don't defrag if frag overhead is below 100mb */ From 701a69ed4d8f41930777f5c0d57a3173d1f09fae Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 11 Jan 2017 17:24:49 +0100 Subject: [PATCH 0252/1722] Defrag: document the feature in redis.conf. --- redis.conf | 55 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/redis.conf b/redis.conf index 18ba9fb3f..c54dba392 100644 --- a/redis.conf +++ b/redis.conf @@ -1230,21 +1230,64 @@ aof-rewrite-incremental-fsync yes # lfu-decay-time 1 ########################### ACTIVE DEFRAGMENTATION ####################### +# +# WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested +# even in production and manually tested by multiple engineers for some +# time. +# +# What is active defragmentation? +# ------------------------------- +# +# Active (online) defragmentation allows a Redis server to compact the +# spaces left between small allocations and deallocations of data in memory, +# thus allowing to reclaim back memory. +# +# Fragmentation is a natural process that happens with every allocator (but +# less so with Jemalloc, fortunately) and certain workloads. Normally a server +# restart is needed in order to lower the fragmentation, or at least to flush +# away all the data and create it again. However thanks to this feature +# implemented by Oran Agra for Redis 4.0 this process can happen at runtime +# in an "hot" way, while the server is running. +# +# Basically when the fragmentation is over a certain level (see the +# configuration options below) Redis will start to create new copies of the +# values in contiguous memory regions by exploiting certain specific Jemalloc +# features (in order to understand if an allocation is causing fragmentation +# and to allocate it in a better place), and at the same time, will release the +# old copies of the data. This process, repeated incrementally for all the keys +# will cause the fragmentation to drop back to normal values. +# +# Important things to understand: +# +# 1. This feature is disabled by default, and only works if you compiled Redis +# to use the copy of Jemalloc we ship with the source code of Redis. +# This is the default with Linux builds. +# +# 2. You never need to enable this feature if you don't have fragmentation +# issues. +# +# 3. Once you experience fragmentation, you can enable this feature when +# needed with the command "CONFIG SET activedefrag yes". +# +# The configuration parameters are able to fine tune the behavior of the +# defragmentation process. If you are not sure about what they mean it is +# a good idea to leave the defaults untouched. -# enabled active defragmentation +# Enabled active defragmentation # activedefrag yes -# minimum amount of fragmentation waste to start active defrag +# Minimum amount of fragmentation waste to start active defrag # active-defrag-ignore-bytes 100mb -# minimum percentage of fragmentation to start active defrag +# Minimum percentage of fragmentation to start active defrag # active-defrag-threshold-lower 10 -# maximum percentage of fragmentation at which we use maximum effort +# Maximum percentage of fragmentation at which we use maximum effort # active-defrag-threshold-upper 100 -# minimal effort for defrag in CPU percentage +# Minimal effort for defrag in CPU percentage # active-defrag-cycle-min 25 -# maximal effort for defrag in CPU percentage +# Maximal effort for defrag in CPU percentage # active-defrag-cycle-max 75 + From fa59d5235674121aaf6bbc1510341deb673c0d4d Mon Sep 17 00:00:00 2001 From: Guy Benoish Date: Wed, 11 Jan 2017 19:24:19 +0200 Subject: [PATCH 0253/1722] enlarged buffer given to ld2string --- src/object.c | 2 +- src/t_hash.c | 2 +- src/util.h | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index 1ae37c9d1..3e197c579 100644 --- a/src/object.c +++ b/src/object.c @@ -145,7 +145,7 @@ robj *createStringObjectFromLongLong(long long value) { * * The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */ robj *createStringObjectFromLongDouble(long double value, int humanfriendly) { - char buf[256]; + char buf[MAX_LONG_DOUBLE_CHARS]; int len = ld2string(buf,sizeof(buf),value,humanfriendly); return createStringObject(buf,len); } diff --git a/src/t_hash.c b/src/t_hash.c index a49559336..f9ff8e4ee 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -620,7 +620,7 @@ void hincrbyfloatCommand(client *c) { value += incr; - char buf[256]; + char buf[MAX_LONG_DOUBLE_CHARS]; int len = ld2string(buf,sizeof(buf),value,1); new = sdsnewlen(buf,len); hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE); diff --git a/src/util.h b/src/util.h index d7784495b..91acde047 100644 --- a/src/util.h +++ b/src/util.h @@ -33,6 +33,11 @@ #include #include "sds.h" +/* The maximum number of characters needed to represent a long double + * as a string (long double has a huge range). + * This should be the size of the buffer given to ld2string */ +#define MAX_LONG_DOUBLE_CHARS 5*1024 + int stringmatchlen(const char *p, int plen, const char *s, int slen, int nocase); int stringmatch(const char *p, const char *s, int nocase); long long memtoll(const char *p, int *err); From d4593397a7025ff6a8f27913c97ade87ce87d97a Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 12 Jan 2017 09:08:43 +0100 Subject: [PATCH 0254/1722] MEMORY USAGE: support for modules data types. As a side effect of supporting it, we no longer crash when MEMORY USAGE is called against a module data type. Close #3637. --- src/module.c | 2 +- src/modules/hellotype.c | 11 +++++++---- src/object.c | 8 ++++++++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/module.c b/src/module.c index a5b3d52ae..1fbc5094f 100644 --- a/src/module.c +++ b/src/module.c @@ -2743,8 +2743,8 @@ moduleType *RM_CreateDataType(RedisModuleCtx *ctx, const char *name, int encver, moduleTypeLoadFunc rdb_load; moduleTypeSaveFunc rdb_save; moduleTypeRewriteFunc aof_rewrite; - moduleTypeDigestFunc digest; moduleTypeMemUsageFunc mem_usage; + moduleTypeDigestFunc digest; moduleTypeFreeFunc free; } *tms = (struct typemethods*) typemethods_ptr; diff --git a/src/modules/hellotype.c b/src/modules/hellotype.c index 02a5bb477..1aa6c5e63 100644 --- a/src/modules/hellotype.c +++ b/src/modules/hellotype.c @@ -226,10 +226,12 @@ void HelloTypeAofRewrite(RedisModuleIO *aof, RedisModuleString *key, void *value } } -void HelloTypeDigest(RedisModuleDigest *digest, void *value) { - REDISMODULE_NOT_USED(digest); - REDISMODULE_NOT_USED(value); - /* TODO: The DIGEST module interface is yet not implemented. */ +/* The goal of this function is to return the amount of memory used by + * the HelloType value. */ +size_t HelloTypeMemUsage(void *value) { + struct HelloTypeObject *hto = value; + struct HelloTypeNode *node = hto->head; + return sizeof(*hto) + sizeof(*node)*hto->len; } void HelloTypeFree(void *value) { @@ -250,6 +252,7 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) .rdb_load = HelloTypeRdbLoad, .rdb_save = HelloTypeRdbSave, .aof_rewrite = HelloTypeAofRewrite, + .mem_usage = HelloTypeMemUsage, .free = HelloTypeFree }; diff --git a/src/object.c b/src/object.c index 1ae37c9d1..125c1a595 100644 --- a/src/object.c +++ b/src/object.c @@ -786,6 +786,14 @@ size_t objectComputeSize(robj *o, size_t sample_size) { } else { serverPanic("Unknown hash encoding"); } + } else if (o->type == OBJ_MODULE) { + moduleValue *mv = o->ptr; + moduleType *mt = mv->type; + if (mt->mem_usage != NULL) { + asize = mt->mem_usage(mv->value); + } else { + asize = 0; + } } else { serverPanic("Unknown object type"); } From 46f2263a45901cbdaedcb4e22881406244cbecd6 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 12 Jan 2017 09:50:40 +0100 Subject: [PATCH 0255/1722] Defrag: don't crash when a module value is encountered. --- src/defrag.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/defrag.c b/src/defrag.c index cc7dfdf7f..4a1dcefe4 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -406,6 +406,9 @@ int defragKey(redisDb *db, dictEntry *de) { } else { serverPanic("Unknown hash encoding"); } + } else if (ob->type == OBJ_MODULE) { + /* Currently defragmenting modules private data types + * is not supported. */ } else { serverPanic("Unknown object type"); } From c0837ddbcca65cda5ca32ca99f555a9046fbd339 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 12 Jan 2017 12:47:46 +0100 Subject: [PATCH 0256/1722] Use const in modules types mem_usage method. As suggested by @itamarhaber. --- src/modules/hellotype.c | 4 ++-- src/redismodule.h | 2 +- src/server.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/modules/hellotype.c b/src/modules/hellotype.c index 1aa6c5e63..027155d45 100644 --- a/src/modules/hellotype.c +++ b/src/modules/hellotype.c @@ -228,8 +228,8 @@ void HelloTypeAofRewrite(RedisModuleIO *aof, RedisModuleString *key, void *value /* The goal of this function is to return the amount of memory used by * the HelloType value. */ -size_t HelloTypeMemUsage(void *value) { - struct HelloTypeObject *hto = value; +size_t HelloTypeMemUsage(const void *value) { + const struct HelloTypeObject *hto = value; struct HelloTypeNode *node = hto->head; return sizeof(*hto) + sizeof(*node)*hto->len; } diff --git a/src/redismodule.h b/src/redismodule.h index 186e284c0..fcd6aeca3 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -91,7 +91,7 @@ typedef int (*RedisModuleCmdFunc) (RedisModuleCtx *ctx, RedisModuleString **argv typedef void *(*RedisModuleTypeLoadFunc)(RedisModuleIO *rdb, int encver); typedef void (*RedisModuleTypeSaveFunc)(RedisModuleIO *rdb, void *value); typedef void (*RedisModuleTypeRewriteFunc)(RedisModuleIO *aof, RedisModuleString *key, void *value); -typedef size_t (*RedisModuleTypeMemUsageFunc)(void *value); +typedef size_t (*RedisModuleTypeMemUsageFunc)(const void *value); typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value); typedef void (*RedisModuleTypeFreeFunc)(void *value); diff --git a/src/server.h b/src/server.h index bbc121b72..96bbb9ccf 100644 --- a/src/server.h +++ b/src/server.h @@ -483,7 +483,7 @@ typedef void *(*moduleTypeLoadFunc)(struct RedisModuleIO *io, int encver); typedef void (*moduleTypeSaveFunc)(struct RedisModuleIO *io, void *value); typedef void (*moduleTypeRewriteFunc)(struct RedisModuleIO *io, struct redisObject *key, void *value); typedef void (*moduleTypeDigestFunc)(struct RedisModuleDigest *digest, void *value); -typedef size_t (*moduleTypeMemUsageFunc)(void *value); +typedef size_t (*moduleTypeMemUsageFunc)(const void *value); typedef void (*moduleTypeFreeFunc)(void *value); /* The module type, which is referenced in each value of a given type, defines From eee378422496c4e9ddfcdd3193325c9a71b53019 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 13 Jan 2017 11:47:09 +0100 Subject: [PATCH 0257/1722] Ziplist: remove static from functions, they prevent good crash reports. --- src/ziplist.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/ziplist.c b/src/ziplist.c index a0939f640..f270cdbf9 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -187,7 +187,7 @@ typedef struct zlentry { void ziplistRepr(unsigned char *zl); /* Return bytes needed to store integer encoded by 'encoding' */ -static unsigned int zipIntSize(unsigned char encoding) { +unsigned int zipIntSize(unsigned char encoding) { switch(encoding) { case ZIP_INT_8B: return 1; case ZIP_INT_16B: return 2; @@ -202,7 +202,7 @@ static unsigned int zipIntSize(unsigned char encoding) { /* Encode the length 'rawlen' writing it in 'p'. If p is NULL it just returns * the amount of bytes required to encode such a length. */ -static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) { +unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) { unsigned char len = 1, buf[5]; if (ZIP_IS_STR(encoding)) { @@ -266,7 +266,7 @@ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, un /* Encode the length of the previous entry and write it to "p". Return the * number of bytes needed to encode this length if "p" is NULL. */ -static unsigned int zipPrevEncodeLength(unsigned char *p, unsigned int len) { +unsigned int zipPrevEncodeLength(unsigned char *p, unsigned int len) { if (p == NULL) { return (len < ZIP_BIGLEN) ? 1 : sizeof(len)+1; } else { @@ -284,7 +284,7 @@ static unsigned int zipPrevEncodeLength(unsigned char *p, unsigned int len) { /* Encode the length of the previous entry and write it to "p". This only * uses the larger encoding (required in __ziplistCascadeUpdate). */ -static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { +void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { if (p == NULL) return; p[0] = ZIP_BIGLEN; memcpy(p+1,&len,sizeof(len)); @@ -316,14 +316,14 @@ static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { /* Return the difference in number of bytes needed to store the length of the * previous element 'len', in the entry pointed to by 'p'. */ -static int zipPrevLenByteDiff(unsigned char *p, unsigned int len) { +int zipPrevLenByteDiff(unsigned char *p, unsigned int len) { unsigned int prevlensize; ZIP_DECODE_PREVLENSIZE(p, prevlensize); return zipPrevEncodeLength(NULL, len) - prevlensize; } /* Return the total number of bytes used by the entry pointed to by 'p'. */ -static unsigned int zipRawEntryLength(unsigned char *p) { +unsigned int zipRawEntryLength(unsigned char *p) { unsigned int prevlensize, encoding, lensize, len; ZIP_DECODE_PREVLENSIZE(p, prevlensize); ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len); @@ -332,7 +332,7 @@ static unsigned int zipRawEntryLength(unsigned char *p) { /* Check if string pointed to by 'entry' can be encoded as an integer. * Stores the integer value in 'v' and its encoding in 'encoding'. */ -static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) { +int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) { long long value; if (entrylen >= 32 || entrylen == 0) return 0; @@ -359,7 +359,7 @@ static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long } /* Store integer 'value' at 'p', encoded as 'encoding' */ -static void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) { +void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) { int16_t i16; int32_t i32; int64_t i64; @@ -389,7 +389,7 @@ static void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encodi } /* Read integer encoded as 'encoding' from 'p' */ -static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) { +int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) { int16_t i16; int32_t i32; int64_t i64, ret = 0; @@ -421,7 +421,7 @@ static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) { } /* Return a struct with all information about an entry. */ -static void zipEntry(unsigned char *p, zlentry *e) { +void zipEntry(unsigned char *p, zlentry *e) { ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen); ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len); @@ -441,7 +441,7 @@ unsigned char *ziplistNew(void) { } /* Resize the ziplist. */ -static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) { +unsigned char *ziplistResize(unsigned char *zl, unsigned int len) { zl = zrealloc(zl,len); ZIPLIST_BYTES(zl) = intrev32ifbe(len); zl[len-1] = ZIP_END; @@ -468,7 +468,7 @@ static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) { * * The pointer "p" points to the first entry that does NOT need to be * updated, i.e. consecutive fields MAY need an update. */ -static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) { +unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) { size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize; size_t offset, noffset, extra; unsigned char *np; @@ -530,7 +530,7 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p } /* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */ -static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) { +unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) { unsigned int i, totlen, deleted = 0; size_t offset; int nextdiff = 0; @@ -590,7 +590,7 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig } /* Insert item at "p". */ -static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) { +unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) { size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen; unsigned int prevlensize, prevlen = 0; size_t offset; From c6dfff5b61f9555ab25260e5efe07271e1f519a8 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 18 Jan 2017 17:05:10 +0100 Subject: [PATCH 0258/1722] serverPanic(): allow printf() alike formatting. This is of great interest because allows us to print debugging informations that could be of useful when debugging, like in the following example: serverPanic("Unexpected encoding for object %d, %d", obj->type, obj->encoding); --- src/debug.c | 14 ++++++++++++-- src/server.h | 4 ++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/debug.c b/src/debug.c index 0026594d3..a3190f636 100644 --- a/src/debug.c +++ b/src/debug.c @@ -266,6 +266,8 @@ void debugCommand(client *c) { blen++; addReplyStatus(c, "segfault -- Crash the server with sigsegv."); blen++; addReplyStatus(c, + "panic -- Crash the server simulating a panic."); + blen++; addReplyStatus(c, "restart -- Graceful restart: save config, db, restart."); blen++; addReplyStatus(c, "crash-and-recovery -- Hard crash and restart after delay."); @@ -300,6 +302,8 @@ void debugCommand(client *c) { setDeferredMultiBulkLength(c,blenp,blen); } else if (!strcasecmp(c->argv[1]->ptr,"segfault")) { *((char*)-1) = 'x'; + } else if (!strcasecmp(c->argv[1]->ptr,"panic")) { + serverPanic("DEBUG PANIC called at Unix time %ld", time(NULL)); } else if (!strcasecmp(c->argv[1]->ptr,"restart") || !strcasecmp(c->argv[1]->ptr,"crash-and-recover")) { @@ -615,11 +619,17 @@ void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, con _serverAssert(estr,file,line); } -void _serverPanic(const char *msg, const char *file, int line) { +void _serverPanic(const char *file, int line, const char *msg, ...) { + va_list ap; + va_start(ap,msg); + char fmtmsg[256]; + vsnprintf(fmtmsg,sizeof(fmtmsg),msg,ap); + va_end(ap); + bugReportStart(); serverLog(LL_WARNING,"------------------------------------------------"); serverLog(LL_WARNING,"!!! Software Failure. Press left mouse button to continue"); - serverLog(LL_WARNING,"Guru Meditation: %s #%s:%d",msg,file,line); + serverLog(LL_WARNING,"Guru Meditation: %s #%s:%d",fmtmsg,file,line); #ifdef HAVE_BACKTRACE serverLog(LL_WARNING,"(forcing SIGSEGV in order to print the stack trace)"); #endif diff --git a/src/server.h b/src/server.h index 96bbb9ccf..30d8be849 100644 --- a/src/server.h +++ b/src/server.h @@ -435,7 +435,7 @@ typedef long long mstime_t; /* millisecond time type. */ /* We can print the stacktrace, so our assert is defined this way: */ #define serverAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_serverAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1))) #define serverAssert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),_exit(1))) -#define serverPanic(_e) _serverPanic(#_e,__FILE__,__LINE__),_exit(1) +#define serverPanic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),_exit(1) /*----------------------------------------------------------------------------- * Data types @@ -1960,7 +1960,7 @@ void *realloc(void *ptr, size_t size) __attribute__ ((deprecated)); /* Debugging stuff */ void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line); void _serverAssert(const char *estr, const char *file, int line); -void _serverPanic(const char *msg, const char *file, int line); +void _serverPanic(const char *file, int line, const char *msg, ...); void bugReportStart(void); void serverLogObjectDebugInfo(const robj *o); void sigsegvHandler(int sig, siginfo_t *info, void *secret); From d3516e69c7dd7d2511d10641f213c045bd219de1 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 18 Jan 2017 17:11:57 +0100 Subject: [PATCH 0259/1722] Add panic() into redisassert.h. This header file is for libs, like ziplist.c, that we want to leave almost separted from the core. The panic() calls will be easy to delete in order to use such files outside, but the debugging info we gain are very valuable compared to simple assertions where it is not possible to print debugging info. --- src/redisassert.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/redisassert.h b/src/redisassert.h index 34e95268a..c9b78327c 100644 --- a/src/redisassert.h +++ b/src/redisassert.h @@ -41,7 +41,9 @@ #include /* for _exit() */ #define assert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),_exit(1))) +#define panic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),_exit(1) void _serverAssert(char *estr, char *file, int line); +void _serverPanic(const char *file, int line, const char *msg, ...); #endif From 8b565df63c67e88a214a860e78d352dc0d081805 Mon Sep 17 00:00:00 2001 From: miter Date: Thu, 26 Jan 2017 21:36:26 +0900 Subject: [PATCH 0260/1722] Change switch statment to if statment --- src/object.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/object.c b/src/object.c index 125c1a595..90df9e38b 100644 --- a/src/object.c +++ b/src/object.c @@ -246,11 +246,9 @@ void freeStringObject(robj *o) { } void freeListObject(robj *o) { - switch (o->encoding) { - case OBJ_ENCODING_QUICKLIST: + if (o->encoding == OBJ_ENCODING_QUICKLIST) { quicklistRelease(o->ptr); - break; - default: + } else { serverPanic("Unknown list encoding type"); } } From 978e62da15ab709561bf5e38354164e99c6b0bec Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Fri, 27 Jan 2017 16:23:15 +0100 Subject: [PATCH 0261/1722] Don't divide by zero Previously Redis crashed on `MEMORY DOCTOR` when it has no slaves attached. Fixes #3783 --- src/object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/object.c b/src/object.c index 90df9e38b..08c9ad956 100644 --- a/src/object.c +++ b/src/object.c @@ -951,7 +951,7 @@ sds getMemoryDoctorReport(void) { } /* Slaves using more than 10 MB each? */ - if (mh->clients_slaves / numslaves > (1024*1024*10)) { + if (numslaves > 0 && mh->clients_slaves / numslaves > (1024*1024*10)) { big_slave_buf = 1; num_reports++; } From 362c8f80b12c1093455ebeb34daf1545e7a32d06 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 30 Jan 2017 09:58:34 +0100 Subject: [PATCH 0262/1722] Jemalloc updated to 4.4.0. The original jemalloc source tree was modified to: 1. Remove the configure error that prevents nested builds. 2. Insert the Redis private Jemalloc API in order to allow the Redis fragmentation function to work. --- deps/jemalloc/.appveyor.yml | 28 + deps/jemalloc/.gitignore | 16 + deps/jemalloc/.travis.yml | 29 + deps/jemalloc/COPYING | 4 +- deps/jemalloc/ChangeLog | 220 ++ deps/jemalloc/INSTALL | 26 + deps/jemalloc/Makefile.in | 128 +- deps/jemalloc/README | 2 +- deps/jemalloc/VERSION | 2 +- deps/jemalloc/bin/jeprof.in | 131 +- deps/jemalloc/{ => build-aux}/config.guess | 174 +- deps/jemalloc/{ => build-aux}/config.sub | 76 +- deps/jemalloc/{ => build-aux}/install-sh | 0 deps/jemalloc/configure | 1391 ++++++++++- deps/jemalloc/configure.ac | 436 +++- deps/jemalloc/doc/html.xsl.in | 1 + deps/jemalloc/doc/jemalloc.3 | 859 ++++--- deps/jemalloc/doc/jemalloc.html | 1535 +++++------- deps/jemalloc/doc/jemalloc.xml.in | 540 +++-- deps/jemalloc/doc/stylesheet.xsl | 7 +- .../include/jemalloc/internal/arena.h | 586 +++-- .../include/jemalloc/internal/assert.h | 45 + .../include/jemalloc/internal/atomic.h | 4 +- .../jemalloc/include/jemalloc/internal/base.h | 11 +- .../include/jemalloc/internal/bitmap.h | 76 +- .../include/jemalloc/internal/chunk.h | 38 +- .../include/jemalloc/internal/chunk_dss.h | 10 +- .../include/jemalloc/internal/chunk_mmap.h | 4 +- deps/jemalloc/include/jemalloc/internal/ckh.h | 6 +- deps/jemalloc/include/jemalloc/internal/ctl.h | 29 +- .../include/jemalloc/internal/extent.h | 43 +- .../jemalloc/include/jemalloc/internal/hash.h | 33 +- .../jemalloc/include/jemalloc/internal/huge.h | 21 +- .../jemalloc/internal/jemalloc_internal.h.in | 440 ++-- .../internal/jemalloc_internal_decls.h | 11 + .../internal/jemalloc_internal_defs.h.in | 73 +- deps/jemalloc/include/jemalloc/internal/mb.h | 10 +- .../include/jemalloc/internal/mutex.h | 62 +- .../include/jemalloc/internal/nstime.h | 48 + .../include/jemalloc/internal/pages.h | 7 +- deps/jemalloc/include/jemalloc/internal/ph.h | 345 +++ .../jemalloc/internal/private_symbols.txt | 252 +- .../jemalloc/include/jemalloc/internal/prng.h | 193 +- .../jemalloc/include/jemalloc/internal/prof.h | 86 +- deps/jemalloc/include/jemalloc/internal/rb.h | 208 +- .../include/jemalloc/internal/rtree.h | 160 +- .../include/jemalloc/internal/size_classes.sh | 50 +- .../include/jemalloc/internal/smoothstep.h | 246 ++ .../include/jemalloc/internal/smoothstep.sh | 115 + .../jemalloc/include/jemalloc/internal/spin.h | 51 + .../include/jemalloc/internal/stats.h | 14 + .../include/jemalloc/internal/tcache.h | 159 +- .../include/jemalloc/internal/ticker.h | 75 + deps/jemalloc/include/jemalloc/internal/tsd.h | 164 +- .../jemalloc/include/jemalloc/internal/util.h | 214 +- .../include/jemalloc/internal/valgrind.h | 40 +- .../include/jemalloc/internal/witness.h | 266 +++ .../include/jemalloc/jemalloc_defs.h.in | 8 + .../include/jemalloc/jemalloc_macros.h.in | 61 +- deps/jemalloc/include/msvc_compat/strings.h | 30 + .../include/msvc_compat/windows_extra.h | 22 +- deps/jemalloc/jemalloc.pc.in | 2 +- deps/jemalloc/msvc/ReadMe.txt | 24 + deps/jemalloc/msvc/jemalloc_vc2015.sln | 63 + .../projects/vc2015/jemalloc/jemalloc.vcxproj | 402 ++++ .../vc2015/jemalloc/jemalloc.vcxproj.filters | 272 +++ .../vc2015/test_threads/test_threads.cpp | 89 + .../vc2015/test_threads/test_threads.h | 3 + .../vc2015/test_threads/test_threads.vcxproj | 327 +++ .../test_threads/test_threads.vcxproj.filters | 26 + .../vc2015/test_threads/test_threads_main.cpp | 12 + deps/jemalloc/src/arena.c | 2057 +++++++++++------ deps/jemalloc/src/base.c | 73 +- deps/jemalloc/src/bitmap.c | 59 +- deps/jemalloc/src/chunk.c | 428 ++-- deps/jemalloc/src/chunk_dss.c | 188 +- deps/jemalloc/src/chunk_mmap.c | 18 +- deps/jemalloc/src/ckh.c | 43 +- deps/jemalloc/src/ctl.c | 789 ++++--- deps/jemalloc/src/extent.c | 80 +- deps/jemalloc/src/huge.c | 238 +- deps/jemalloc/src/jemalloc.c | 1500 +++++++----- deps/jemalloc/src/mutex.c | 23 +- deps/jemalloc/src/nstime.c | 194 ++ deps/jemalloc/src/pages.c | 177 +- deps/jemalloc/src/prng.c | 2 + deps/jemalloc/src/prof.c | 678 +++--- deps/jemalloc/src/quarantine.c | 50 +- deps/jemalloc/src/rtree.c | 9 +- deps/jemalloc/src/spin.c | 2 + deps/jemalloc/src/stats.c | 1250 +++++++--- deps/jemalloc/src/tcache.c | 170 +- deps/jemalloc/src/ticker.c | 2 + deps/jemalloc/src/tsd.c | 28 +- deps/jemalloc/src/util.c | 42 +- deps/jemalloc/src/witness.c | 136 ++ deps/jemalloc/src/zone.c | 198 +- .../test/include/test/jemalloc_test.h.in | 80 +- deps/jemalloc/test/include/test/mtx.h | 2 + deps/jemalloc/test/include/test/test.h | 4 + deps/jemalloc/test/include/test/timer.h | 19 +- .../jemalloc/test/integration/MALLOCX_ARENA.c | 4 +- .../jemalloc/test/integration/aligned_alloc.c | 20 +- deps/jemalloc/test/integration/allocated.c | 17 +- deps/jemalloc/test/integration/chunk.c | 98 +- deps/jemalloc/test/integration/mallocx.c | 108 +- deps/jemalloc/test/integration/overflow.c | 8 +- .../test/integration/posix_memalign.c | 20 +- deps/jemalloc/test/integration/rallocx.c | 86 +- deps/jemalloc/test/integration/sdallocx.c | 4 +- deps/jemalloc/test/integration/thread_arena.c | 10 +- .../test/integration/thread_tcache_enabled.c | 39 +- deps/jemalloc/test/integration/xallocx.c | 120 +- deps/jemalloc/test/src/mtx.c | 7 + deps/jemalloc/test/src/test.c | 56 +- deps/jemalloc/test/src/timer.c | 45 +- deps/jemalloc/test/stress/microbench.c | 3 +- deps/jemalloc/test/unit/a0.c | 19 + deps/jemalloc/test/unit/arena_reset.c | 159 ++ deps/jemalloc/test/unit/bitmap.c | 26 +- deps/jemalloc/test/unit/ckh.c | 8 +- deps/jemalloc/test/unit/decay.c | 374 +++ deps/jemalloc/test/unit/fork.c | 64 + deps/jemalloc/test/unit/hash.c | 36 +- deps/jemalloc/test/unit/junk.c | 17 +- deps/jemalloc/test/unit/junk_alloc.c | 2 +- deps/jemalloc/test/unit/junk_free.c | 2 +- deps/jemalloc/test/unit/mallctl.c | 319 ++- deps/jemalloc/test/unit/math.c | 4 + deps/jemalloc/test/unit/nstime.c | 227 ++ deps/jemalloc/test/unit/pack.c | 206 ++ deps/jemalloc/test/unit/pages.c | 27 + deps/jemalloc/test/unit/ph.c | 290 +++ deps/jemalloc/test/unit/prng.c | 263 +++ deps/jemalloc/test/unit/prof_accum.c | 5 +- deps/jemalloc/test/unit/prof_active.c | 5 +- deps/jemalloc/test/unit/prof_gdump.c | 13 +- deps/jemalloc/test/unit/prof_idump.c | 5 +- deps/jemalloc/test/unit/prof_reset.c | 16 +- deps/jemalloc/test/unit/prof_thread_name.c | 22 +- deps/jemalloc/test/unit/rb.c | 60 +- deps/jemalloc/test/unit/run_quantize.c | 149 ++ deps/jemalloc/test/unit/size_classes.c | 105 +- deps/jemalloc/test/unit/smoothstep.c | 106 + deps/jemalloc/test/unit/stats.c | 241 +- deps/jemalloc/test/unit/ticker.c | 76 + deps/jemalloc/test/unit/tsd.c | 13 +- deps/jemalloc/test/unit/util.c | 89 +- deps/jemalloc/test/unit/witness.c | 278 +++ deps/jemalloc/test/unit/zero.c | 16 +- 150 files changed, 17225 insertions(+), 6342 deletions(-) create mode 100644 deps/jemalloc/.appveyor.yml create mode 100644 deps/jemalloc/.travis.yml rename deps/jemalloc/{ => build-aux}/config.guess (90%) rename deps/jemalloc/{ => build-aux}/config.sub (95%) rename deps/jemalloc/{ => build-aux}/install-sh (100%) create mode 100644 deps/jemalloc/include/jemalloc/internal/assert.h create mode 100644 deps/jemalloc/include/jemalloc/internal/nstime.h create mode 100644 deps/jemalloc/include/jemalloc/internal/ph.h create mode 100644 deps/jemalloc/include/jemalloc/internal/smoothstep.h create mode 100755 deps/jemalloc/include/jemalloc/internal/smoothstep.sh create mode 100644 deps/jemalloc/include/jemalloc/internal/spin.h create mode 100644 deps/jemalloc/include/jemalloc/internal/ticker.h create mode 100644 deps/jemalloc/include/jemalloc/internal/witness.h create mode 100644 deps/jemalloc/msvc/ReadMe.txt create mode 100644 deps/jemalloc/msvc/jemalloc_vc2015.sln create mode 100644 deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj create mode 100644 deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters create mode 100755 deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.cpp create mode 100644 deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.h create mode 100644 deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj create mode 100644 deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters create mode 100644 deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads_main.cpp create mode 100644 deps/jemalloc/src/nstime.c create mode 100644 deps/jemalloc/src/prng.c create mode 100644 deps/jemalloc/src/spin.c mode change 100644 => 100755 deps/jemalloc/src/stats.c mode change 100644 => 100755 deps/jemalloc/src/tcache.c create mode 100644 deps/jemalloc/src/ticker.c mode change 100644 => 100755 deps/jemalloc/src/util.c create mode 100644 deps/jemalloc/src/witness.c mode change 100644 => 100755 deps/jemalloc/test/integration/MALLOCX_ARENA.c mode change 100644 => 100755 deps/jemalloc/test/integration/allocated.c mode change 100644 => 100755 deps/jemalloc/test/integration/mallocx.c mode change 100644 => 100755 deps/jemalloc/test/integration/overflow.c mode change 100644 => 100755 deps/jemalloc/test/integration/rallocx.c mode change 100644 => 100755 deps/jemalloc/test/integration/thread_arena.c mode change 100644 => 100755 deps/jemalloc/test/integration/thread_tcache_enabled.c mode change 100644 => 100755 deps/jemalloc/test/integration/xallocx.c create mode 100644 deps/jemalloc/test/unit/a0.c create mode 100755 deps/jemalloc/test/unit/arena_reset.c create mode 100755 deps/jemalloc/test/unit/decay.c create mode 100644 deps/jemalloc/test/unit/fork.c mode change 100644 => 100755 deps/jemalloc/test/unit/mallctl.c create mode 100644 deps/jemalloc/test/unit/nstime.c create mode 100644 deps/jemalloc/test/unit/pack.c create mode 100644 deps/jemalloc/test/unit/pages.c create mode 100644 deps/jemalloc/test/unit/ph.c create mode 100644 deps/jemalloc/test/unit/prng.c mode change 100644 => 100755 deps/jemalloc/test/unit/prof_accum.c mode change 100644 => 100755 deps/jemalloc/test/unit/prof_active.c mode change 100644 => 100755 deps/jemalloc/test/unit/prof_gdump.c mode change 100644 => 100755 deps/jemalloc/test/unit/prof_idump.c mode change 100644 => 100755 deps/jemalloc/test/unit/prof_reset.c mode change 100644 => 100755 deps/jemalloc/test/unit/prof_thread_name.c create mode 100644 deps/jemalloc/test/unit/run_quantize.c mode change 100644 => 100755 deps/jemalloc/test/unit/size_classes.c create mode 100644 deps/jemalloc/test/unit/smoothstep.c mode change 100644 => 100755 deps/jemalloc/test/unit/stats.c create mode 100644 deps/jemalloc/test/unit/ticker.c create mode 100644 deps/jemalloc/test/unit/witness.c diff --git a/deps/jemalloc/.appveyor.yml b/deps/jemalloc/.appveyor.yml new file mode 100644 index 000000000..ddd5c5711 --- /dev/null +++ b/deps/jemalloc/.appveyor.yml @@ -0,0 +1,28 @@ +version: '{build}' + +environment: + matrix: + - MSYSTEM: MINGW64 + CPU: x86_64 + MSVC: amd64 + - MSYSTEM: MINGW32 + CPU: i686 + MSVC: x86 + - MSYSTEM: MINGW64 + CPU: x86_64 + - MSYSTEM: MINGW32 + CPU: i686 + +install: + - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% + - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC% + - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc + - pacman --noconfirm -Suy mingw-w64-%CPU%-make + +build_script: + - bash -c "autoconf" + - bash -c "./configure" + - mingw32-make -j3 + - file lib/jemalloc.dll + - mingw32-make -j3 tests + - mingw32-make -k check diff --git a/deps/jemalloc/.gitignore b/deps/jemalloc/.gitignore index d0e393619..08278d087 100644 --- a/deps/jemalloc/.gitignore +++ b/deps/jemalloc/.gitignore @@ -73,3 +73,19 @@ test/include/test/jemalloc_test_defs.h /test/unit/*.out /VERSION + +*.pdb +*.sdf +*.opendb +*.opensdf +*.cachefile +*.suo +*.user +*.sln.docstates +*.tmp +/msvc/Win32/ +/msvc/x64/ +/msvc/projects/*/*/Debug*/ +/msvc/projects/*/*/Release*/ +/msvc/projects/*/*/Win32/ +/msvc/projects/*/*/x64/ diff --git a/deps/jemalloc/.travis.yml b/deps/jemalloc/.travis.yml new file mode 100644 index 000000000..1fed4f8e6 --- /dev/null +++ b/deps/jemalloc/.travis.yml @@ -0,0 +1,29 @@ +language: c + +matrix: + include: + - os: linux + compiler: gcc + - os: linux + compiler: gcc + env: + - EXTRA_FLAGS=-m32 + addons: + apt: + packages: + - gcc-multilib + - os: osx + compiler: clang + - os: osx + compiler: clang + env: + - EXTRA_FLAGS=-m32 + +before_script: + - autoconf + - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS"} + - make -j3 + - make -j3 tests + +script: + - make check diff --git a/deps/jemalloc/COPYING b/deps/jemalloc/COPYING index 611968cda..104b1f8b0 100644 --- a/deps/jemalloc/COPYING +++ b/deps/jemalloc/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2015 Jason Evans . +Copyright (C) 2002-2016 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2015 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2016 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/deps/jemalloc/ChangeLog b/deps/jemalloc/ChangeLog index e3b0a5190..f75edd933 100644 --- a/deps/jemalloc/ChangeLog +++ b/deps/jemalloc/ChangeLog @@ -4,6 +4,226 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.4.0 (December 3, 2016) + + New features: + - Add configure support for *-*-linux-android. (@cferris1000, @jasone) + - Add the --disable-syscall configure option, for use on systems that place + security-motivated limitations on syscall(2). (@jasone) + - Add support for Debian GNU/kFreeBSD. (@thesam) + + Optimizations: + - Add extent serial numbers and use them where appropriate as a sort key that + is higher priority than address, so that the allocation policy prefers older + extents. This tends to improve locality (decrease fragmentation) when + memory grows downward. (@jasone) + - Refactor madvise(2) configuration so that MADV_FREE is detected and utilized + on Linux 4.5 and newer. (@jasone) + - Mark partially purged arena chunks as non-huge-page. This improves + interaction with Linux's transparent huge page functionality. (@jasone) + + Bug fixes: + - Fix size class computations for edge conditions involving extremely large + allocations. This regression was first released in 4.0.0. (@jasone, + @ingvarha) + - Remove overly restrictive assertions related to the cactive statistic. This + regression was first released in 4.1.0. (@jasone) + - Implement a more reliable detection scheme for os_unfair_lock on macOS. + (@jszakmeister) + +* 4.3.1 (November 7, 2016) + + Bug fixes: + - Fix a severe virtual memory leak. This regression was first released in + 4.3.0. (@interwq, @jasone) + - Refactor atomic and prng APIs to restore support for 32-bit platforms that + use pre-C11 toolchains, e.g. FreeBSD's mips. (@jasone) + +* 4.3.0 (November 4, 2016) + + This is the first release that passes the test suite for multiple Windows + configurations, thanks in large part to @glandium setting up continuous + integration via AppVeyor (and Travis CI for Linux and OS X). + + New features: + - Add "J" (JSON) support to malloc_stats_print(). (@jasone) + - Add Cray compiler support. (@ronawho) + + Optimizations: + - Add/use adaptive spinning for bootstrapping and radix tree node + initialization. (@jasone) + + Bug fixes: + - Fix large allocation to search starting in the optimal size class heap, + which can substantially reduce virtual memory churn and fragmentation. This + regression was first released in 4.0.0. (@mjp41, @jasone) + - Fix stats.arenas..nthreads accounting. (@interwq) + - Fix and simplify decay-based purging. (@jasone) + - Make DSS (sbrk(2)-related) operations lockless, which resolves potential + deadlocks during thread exit. (@jasone) + - Fix over-sized allocation of radix tree leaf nodes. (@mjp41, @ogaun, + @jasone) + - Fix over-sized allocation of arena_t (plus associated stats) data + structures. (@jasone, @interwq) + - Fix EXTRA_CFLAGS to not affect configuration. (@jasone) + - Fix a Valgrind integration bug. (@ronawho) + - Disallow 0x5a junk filling when running in Valgrind. (@jasone) + - Fix a file descriptor leak on Linux. This regression was first released in + 4.2.0. (@vsarunas, @jasone) + - Fix static linking of jemalloc with glibc. (@djwatson) + - Use syscall(2) rather than {open,read,close}(2) during boot on Linux. This + works around other libraries' system call wrappers performing reentrant + allocation. (@kspinka, @Whissi, @jasone) + - Fix OS X default zone replacement to work with OS X 10.12. (@glandium, + @jasone) + - Fix cached memory management to avoid needless commit/decommit operations + during purging, which resolves permanent virtual memory map fragmentation + issues on Windows. (@mjp41, @jasone) + - Fix TSD fetches to avoid (recursive) allocation. This is relevant to + non-TLS and Windows configurations. (@jasone) + - Fix malloc_conf overriding to work on Windows. (@jasone) + - Forcibly disable lazy-lock on Windows (was forcibly *enabled*). (@jasone) + +* 4.2.1 (June 8, 2016) + + Bug fixes: + - Fix bootstrapping issues for configurations that require allocation during + tsd initialization (e.g. --disable-tls). (@cferris1000, @jasone) + - Fix gettimeofday() version of nstime_update(). (@ronawho) + - Fix Valgrind regressions in calloc() and chunk_alloc_wrapper(). (@ronawho) + - Fix potential VM map fragmentation regression. (@jasone) + - Fix opt_zero-triggered in-place huge reallocation zeroing. (@jasone) + - Fix heap profiling context leaks in reallocation edge cases. (@jasone) + +* 4.2.0 (May 12, 2016) + + New features: + - Add the arena..reset mallctl, which makes it possible to discard all of + an arena's allocations in a single operation. (@jasone) + - Add the stats.retained and stats.arenas..retained statistics. (@jasone) + - Add the --with-version configure option. (@jasone) + - Support --with-lg-page values larger than actual page size. (@jasone) + + Optimizations: + - Use pairing heaps rather than red-black trees for various hot data + structures. (@djwatson, @jasone) + - Streamline fast paths of rtree operations. (@jasone) + - Optimize the fast paths of calloc() and [m,d,sd]allocx(). (@jasone) + - Decommit unused virtual memory if the OS does not overcommit. (@jasone) + - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order + to avoid unfortunate interactions during fork(2). (@jasone) + + Bug fixes: + - Fix chunk accounting related to triggering gdump profiles. (@jasone) + - Link against librt for clock_gettime(2) if glibc < 2.17. (@jasone) + - Scale leak report summary according to sampling probability. (@jasone) + +* 4.1.1 (May 3, 2016) + + This bugfix release resolves a variety of mostly minor issues, though the + bitmap fix is critical for 64-bit Windows. + + Bug fixes: + - Fix the linear scan version of bitmap_sfu() to shift by the proper amount + even when sizeof(long) is not the same as sizeof(void *), as on 64-bit + Windows. (@jasone) + - Fix hashing functions to avoid unaligned memory accesses (and resulting + crashes). This is relevant at least to some ARM-based platforms. + (@rkmisra) + - Fix fork()-related lock rank ordering reversals. These reversals were + unlikely to cause deadlocks in practice except when heap profiling was + enabled and active. (@jasone) + - Fix various chunk leaks in OOM code paths. (@jasone) + - Fix malloc_stats_print() to print opt.narenas correctly. (@jasone) + - Fix MSVC-specific build/test issues. (@rustyx, @yuslepukhin) + - Fix a variety of test failures that were due to test fragility rather than + core bugs. (@jasone) + +* 4.1.0 (February 28, 2016) + + This release is primarily about optimizations, but it also incorporates a lot + of portability-motivated refactoring and enhancements. Many people worked on + this release, to an extent that even with the omission here of minor changes + (see git revision history), and of the people who reported and diagnosed + issues, so much of the work was contributed that starting with this release, + changes are annotated with author credits to help reflect the collaborative + effort involved. + + New features: + - Implement decay-based unused dirty page purging, a major optimization with + mallctl API impact. This is an alternative to the existing ratio-based + unused dirty page purging, and is intended to eventually become the sole + purging mechanism. New mallctls: + + opt.purge + + opt.decay_time + + arena..decay + + arena..decay_time + + arenas.decay_time + + stats.arenas..decay_time + (@jasone, @cevans87) + - Add --with-malloc-conf, which makes it possible to embed a default + options string during configuration. This was motivated by the desire to + specify --with-malloc-conf=purge:decay , since the default must remain + purge:ratio until the 5.0.0 release. (@jasone) + - Add MS Visual Studio 2015 support. (@rustyx, @yuslepukhin) + - Make *allocx() size class overflow behavior defined. The maximum + size class is now less than PTRDIFF_MAX to protect applications against + numerical overflow, and all allocation functions are guaranteed to indicate + errors rather than potentially crashing if the request size exceeds the + maximum size class. (@jasone) + - jeprof: + + Add raw heap profile support. (@jasone) + + Add --retain and --exclude for backtrace symbol filtering. (@jasone) + + Optimizations: + - Optimize the fast path to combine various bootstrapping and configuration + checks and execute more streamlined code in the common case. (@interwq) + - Use linear scan for small bitmaps (used for small object tracking). In + addition to speeding up bitmap operations on 64-bit systems, this reduces + allocator metadata overhead by approximately 0.2%. (@djwatson) + - Separate arena_avail trees, which substantially speeds up run tree + operations. (@djwatson) + - Use memoization (boot-time-computed table) for run quantization. Separate + arena_avail trees reduced the importance of this optimization. (@jasone) + - Attempt mmap-based in-place huge reallocation. This can dramatically speed + up incremental huge reallocation. (@jasone) + + Incompatible changes: + - Make opt.narenas unsigned rather than size_t. (@jasone) + + Bug fixes: + - Fix stats.cactive accounting regression. (@rustyx, @jasone) + - Handle unaligned keys in hash(). This caused problems for some ARM systems. + (@jasone, @cferris1000) + - Refactor arenas array. In addition to fixing a fork-related deadlock, this + makes arena lookups faster and simpler. (@jasone) + - Move retained memory allocation out of the default chunk allocation + function, to a location that gets executed even if the application installs + a custom chunk allocation function. This resolves a virtual memory leak. + (@buchgr) + - Fix a potential tsd cleanup leak. (@cferris1000, @jasone) + - Fix run quantization. In practice this bug had no impact unless + applications requested memory with alignment exceeding one page. + (@jasone, @djwatson) + - Fix LinuxThreads-specific bootstrapping deadlock. (Cosmin Paraschiv) + - jeprof: + + Don't discard curl options if timeout is not defined. (@djwatson) + + Detect failed profile fetches. (@djwatson) + - Fix stats.arenas..{dss,lg_dirty_mult,decay_time,pactive,pdirty} for + --disable-stats case. (@jasone) + +* 4.0.4 (October 24, 2015) + + This bugfix release fixes another xallocx() regression. No other regressions + have come to light in over a month, so this is likely a good starting point + for people who prefer to wait for "dot one" releases with all the major issues + shaken out. + + Bug fixes: + - Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of large + allocations that have been randomly assigned an offset of 0 when + --enable-cache-oblivious configure option is enabled. + * 4.0.3 (September 24, 2015) This bugfix release continues the trend of xallocx() and heap profiling fixes. diff --git a/deps/jemalloc/INSTALL b/deps/jemalloc/INSTALL index 8d3968745..cce3ed711 100644 --- a/deps/jemalloc/INSTALL +++ b/deps/jemalloc/INSTALL @@ -35,6 +35,10 @@ any of the following arguments (not a definitive list) to 'configure': will cause files to be installed into /usr/local/include, /usr/local/lib, and /usr/local/man. +--with-version=..--g + Use the specified version string rather than trying to generate one (if in + a git repository) or use existing the VERSION file (if present). + --with-rpath= Embed one or more library paths, so that libjemalloc can find the libraries it is linked to. This works only on ELF-based systems. @@ -84,6 +88,14 @@ any of the following arguments (not a definitive list) to 'configure': versions of jemalloc can coexist in the same installation directory. For example, libjemalloc.so.0 becomes libjemalloc.so.0. +--with-malloc-conf= + Embed as a run-time options string that is processed prior to + the malloc_conf global variable, the /etc/malloc.conf symlink, and the + MALLOC_CONF environment variable. For example, to change the default chunk + size to 256 KiB: + + --with-malloc-conf=lg_chunk:18 + --disable-cc-silence Disable code that silences non-useful compiler warnings. This is mainly useful during development when auditing the set of warnings that are being @@ -194,6 +206,11 @@ any of the following arguments (not a definitive list) to 'configure': most extreme case increases physical memory usage for the 16 KiB size class to 20 KiB. +--disable-syscall + Disable use of syscall(2) rather than {open,read,write,close}(2). This is + intended as a workaround for systems that place security limitations on + syscall(2). + --with-xslroot= Specify where to find DocBook XSL stylesheets when building the documentation. @@ -315,6 +332,15 @@ LDFLAGS="?" PATH="?" 'configure' uses this to find programs. +In some cases it may be necessary to work around configuration results that do +not match reality. For example, Linux 4.5 added support for the MADV_FREE flag +to madvise(2), which can cause problems if building on a host with MADV_FREE +support and deploying to a target without. To work around this, use a cache +file to override the relevant configuration variable defined in configure.ac, +e.g.: + + echo "je_cv_madv_free=no" > config.cache && ./configure -C + === Advanced compilation ======================================================= To build only parts of jemalloc, use the following targets: diff --git a/deps/jemalloc/Makefile.in b/deps/jemalloc/Makefile.in index 1ac6f2926..c70536391 100644 --- a/deps/jemalloc/Makefile.in +++ b/deps/jemalloc/Makefile.in @@ -24,11 +24,11 @@ abs_objroot := @abs_objroot@ # Build parameters. CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include -CFLAGS := @CFLAGS@ +EXTRA_CFLAGS := @EXTRA_CFLAGS@ +CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS) LDFLAGS := @LDFLAGS@ EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ LIBS := @LIBS@ -TESTLIBS := @TESTLIBS@ RPATH_EXTRA := @RPATH_EXTRA@ SO := @so@ IMPORTLIB := @importlib@ @@ -53,15 +53,19 @@ enable_prof := @enable_prof@ enable_valgrind := @enable_valgrind@ enable_zone_allocator := @enable_zone_allocator@ MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF +link_whole_archive := @link_whole_archive@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ CTARGET = @CTARGET@ LDTARGET = @LDTARGET@ +TEST_LD_MODE = @TEST_LD_MODE@ MKLIB = @MKLIB@ AR = @AR@ ARFLAGS = @ARFLAGS@ CC_MM = @CC_MM@ +LM := @LM@ +INSTALL = @INSTALL@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" @@ -78,15 +82,34 @@ LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h -C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ - $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \ - $(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \ - $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \ - $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ - $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \ - $(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \ - $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/util.c \ - $(srcroot)src/tsd.c +C_SRCS := $(srcroot)src/jemalloc.c \ + $(srcroot)src/arena.c \ + $(srcroot)src/atomic.c \ + $(srcroot)src/base.c \ + $(srcroot)src/bitmap.c \ + $(srcroot)src/chunk.c \ + $(srcroot)src/chunk_dss.c \ + $(srcroot)src/chunk_mmap.c \ + $(srcroot)src/ckh.c \ + $(srcroot)src/ctl.c \ + $(srcroot)src/extent.c \ + $(srcroot)src/hash.c \ + $(srcroot)src/huge.c \ + $(srcroot)src/mb.c \ + $(srcroot)src/mutex.c \ + $(srcroot)src/nstime.c \ + $(srcroot)src/pages.c \ + $(srcroot)src/prng.c \ + $(srcroot)src/prof.c \ + $(srcroot)src/quarantine.c \ + $(srcroot)src/rtree.c \ + $(srcroot)src/stats.c \ + $(srcroot)src/spin.c \ + $(srcroot)src/tcache.c \ + $(srcroot)src/ticker.c \ + $(srcroot)src/tsd.c \ + $(srcroot)src/util.c \ + $(srcroot)src/witness.c ifeq ($(enable_valgrind), 1) C_SRCS += $(srcroot)src/valgrind.c endif @@ -105,6 +128,11 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif +ifeq (1, $(link_whole_archive)) +LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive +else +LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) +endif PC := $(objroot)jemalloc.pc MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml @@ -116,10 +144,19 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c -C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c -TESTS_UNIT := $(srcroot)test/unit/atomic.c \ +ifeq (1, $(link_whole_archive)) +C_UTIL_INTEGRATION_SRCS := +else +C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c +endif +TESTS_UNIT := \ + $(srcroot)test/unit/a0.c \ + $(srcroot)test/unit/arena_reset.c \ + $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ + $(srcroot)test/unit/decay.c \ + $(srcroot)test/unit/fork.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/junk.c \ $(srcroot)test/unit/junk_alloc.c \ @@ -129,6 +166,10 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/math.c \ $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c \ + $(srcroot)test/unit/pack.c \ + $(srcroot)test/unit/pages.c \ + $(srcroot)test/unit/ph.c \ + $(srcroot)test/unit/prng.c \ $(srcroot)test/unit/prof_accum.c \ $(srcroot)test/unit/prof_active.c \ $(srcroot)test/unit/prof_gdump.c \ @@ -140,11 +181,16 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/quarantine.c \ $(srcroot)test/unit/rb.c \ $(srcroot)test/unit/rtree.c \ + $(srcroot)test/unit/run_quantize.c \ $(srcroot)test/unit/SFMT.c \ $(srcroot)test/unit/size_classes.c \ + $(srcroot)test/unit/smoothstep.c \ $(srcroot)test/unit/stats.c \ + $(srcroot)test/unit/ticker.c \ + $(srcroot)test/unit/nstime.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ + $(srcroot)test/unit/witness.c \ $(srcroot)test/unit/zero.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ @@ -266,69 +312,69 @@ $(STATIC_LIBS): $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS) $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) + $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS) $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) + $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) build_lib: build_lib_shared build_lib_static install_bin: - install -d $(BINDIR) + $(INSTALL) -d $(BINDIR) @for b in $(BINS); do \ - echo "install -m 755 $$b $(BINDIR)"; \ - install -m 755 $$b $(BINDIR); \ + echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \ + $(INSTALL) -m 755 $$b $(BINDIR); \ done install_include: - install -d $(INCLUDEDIR)/jemalloc + $(INSTALL) -d $(INCLUDEDIR)/jemalloc @for h in $(C_HDRS); do \ - echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ - install -m 644 $$h $(INCLUDEDIR)/jemalloc; \ + echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ + $(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \ done install_lib_shared: $(DSOS) - install -d $(LIBDIR) - install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR) + $(INSTALL) -d $(LIBDIR) + $(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR) ifneq ($(SOREV),$(SO)) ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO) endif install_lib_static: $(STATIC_LIBS) - install -d $(LIBDIR) + $(INSTALL) -d $(LIBDIR) @for l in $(STATIC_LIBS); do \ - echo "install -m 755 $$l $(LIBDIR)"; \ - install -m 755 $$l $(LIBDIR); \ + echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \ + $(INSTALL) -m 755 $$l $(LIBDIR); \ done install_lib_pc: $(PC) - install -d $(LIBDIR)/pkgconfig + $(INSTALL) -d $(LIBDIR)/pkgconfig @for l in $(PC); do \ - echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \ - install -m 644 $$l $(LIBDIR)/pkgconfig; \ + echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \ + $(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \ done install_lib: install_lib_shared install_lib_static install_lib_pc install_doc_html: - install -d $(DATADIR)/doc/jemalloc$(install_suffix) + $(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix) @for d in $(DOCS_HTML); do \ - echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \ - install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \ + echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \ + $(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \ done install_doc_man: - install -d $(MANDIR)/man3 + $(INSTALL) -d $(MANDIR)/man3 @for d in $(DOCS_MAN3); do \ - echo "install -m 644 $$d $(MANDIR)/man3"; \ - install -m 644 $$d $(MANDIR)/man3; \ + echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \ + $(INSTALL) -m 644 $$d $(MANDIR)/man3; \ done install_doc: install_doc_html install_doc_man @@ -349,18 +395,22 @@ stress_dir: check_dir: check_unit_dir check_integration_dir check_unit: tests_unit check_unit_dir - $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:ratio" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) check_integration_prof: tests_integration check_integration_dir ifeq ($(enable_prof), 1) $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) endif +check_integration_decay: tests_integration check_integration_dir + $(MALLOC_CONF)="purge:decay,decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay,decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) check_integration: tests_integration check_integration_dir $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) stress: tests_stress stress_dir $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) -check: tests check_dir check_integration_prof - $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) +check: check_unit check_integration check_integration_decay check_integration_prof ifeq ($(enable_code_coverage), 1) coverage_unit: check_unit diff --git a/deps/jemalloc/README b/deps/jemalloc/README index 9b268f422..5ff24a9ef 100644 --- a/deps/jemalloc/README +++ b/deps/jemalloc/README @@ -17,4 +17,4 @@ jemalloc. The ChangeLog file contains a brief summary of changes for each release. -URL: http://www.canonware.com/jemalloc/ +URL: http://jemalloc.net/ diff --git a/deps/jemalloc/VERSION b/deps/jemalloc/VERSION index f1f9f1c61..810bd6d4c 100644 --- a/deps/jemalloc/VERSION +++ b/deps/jemalloc/VERSION @@ -1 +1 @@ -4.0.3-0-ge9192eacf8935e29fc62fddc2701f7942b1cc02c +4.4.0-0-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc diff --git a/deps/jemalloc/bin/jeprof.in b/deps/jemalloc/bin/jeprof.in index e7178078a..42087fcec 100644 --- a/deps/jemalloc/bin/jeprof.in +++ b/deps/jemalloc/bin/jeprof.in @@ -95,7 +95,7 @@ my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread my @KCACHEGRIND = ("kcachegrind"); my @PS2PDF = ("ps2pdf"); # These are used for dynamic profiles -my @URL_FETCHER = ("curl", "-s"); +my @URL_FETCHER = ("curl", "-s", "--fail"); # These are the web pages that servers need to support for dynamic profiles my $HEAP_PAGE = "/pprof/heap"; @@ -223,12 +223,14 @@ Call-graph Options: --nodefraction= Hide nodes below *total [default=.005] --edgefraction= Hide edges below *total [default=.001] --maxdegree= Max incoming/outgoing edges per node [default=8] - --focus= Focus on nodes matching + --focus= Focus on backtraces with nodes matching --thread= Show profile for thread - --ignore= Ignore nodes matching + --ignore= Ignore backtraces with nodes matching --scale= Set GV scaling [default=0] --heapcheck Make nodes with non-0 object counts (i.e. direct leak generators) more visible + --retain= Retain only nodes that match + --exclude= Exclude all nodes that match Miscellaneous: --tools=[,...] \$PATH for object tool pathnames @@ -339,6 +341,8 @@ sub Init() { $main::opt_ignore = ''; $main::opt_scale = 0; $main::opt_heapcheck = 0; + $main::opt_retain = ''; + $main::opt_exclude = ''; $main::opt_seconds = 30; $main::opt_lib = ""; @@ -410,6 +414,8 @@ sub Init() { "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, "heapcheck" => \$main::opt_heapcheck, + "retain=s" => \$main::opt_retain, + "exclude=s" => \$main::opt_exclude, "inuse_space!" => \$main::opt_inuse_space, "inuse_objects!" => \$main::opt_inuse_objects, "alloc_space!" => \$main::opt_alloc_space, @@ -1160,8 +1166,21 @@ sub PrintSymbolizedProfile { } print '---', "\n"; - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $profile_marker = $&; + my $profile_marker; + if ($main::profile_type eq 'heap') { + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'growth') { + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'contention') { + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } else { # elsif ($main::profile_type eq 'cpu') + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } + print '--- ', $profile_marker, "\n"; if (defined($main::collected_profile)) { # if used with remote fetch, simply dump the collected profile to output. @@ -1171,6 +1190,12 @@ sub PrintSymbolizedProfile { } close(SRC); } else { + # --raw/http: For everything to work correctly for non-remote profiles, we + # would need to extend PrintProfileData() to handle all possible profile + # types, re-enable the code that is currently disabled in ReadCPUProfile() + # and FixCallerAddresses(), and remove the remote profile dumping code in + # the block above. + die "--raw/http: jeprof can only dump remote profiles for --raw\n"; # dump a cpu-format profile to standard out PrintProfileData($profile); } @@ -2821,6 +2846,43 @@ sub ExtractCalls { return $calls; } +sub FilterFrames { + my $symbols = shift; + my $profile = shift; + + if ($main::opt_retain eq '' && $main::opt_exclude eq '') { + return $profile; + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + my $sym; + if (exists($symbols->{$a})) { + $sym = $symbols->{$a}->[0]; + } else { + $sym = $a; + } + if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) { + next; + } + if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) { + next; + } + push(@path, $a); + } + if (scalar(@path) > 0) { + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + } + + return $result; +} + sub RemoveUninterestingFrames { my $symbols = shift; my $profile = shift; @@ -2965,6 +3027,9 @@ sub RemoveUninterestingFrames { my $reduced_path = join("\n", @path); AddEntry($result, $reduced_path, $count); } + + $result = FilterFrames($symbols, $result); + return $result; } @@ -3274,7 +3339,7 @@ sub ResolveRedirectionForCurl { # Add a timeout flat to URL_FETCHER. Returns a new list. sub AddFetchTimeout { my $timeout = shift; - my @fetcher = shift; + my @fetcher = @_; if (defined($timeout)) { if (join(" ", @fetcher) =~ m/\bcurl -s/) { push(@fetcher, "--max-time", sprintf("%d", $timeout)); @@ -3320,6 +3385,27 @@ sub ReadSymbols { return $map; } +sub URLEncode { + my $str = shift; + $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg; + return $str; +} + +sub AppendSymbolFilterParams { + my $url = shift; + my @params = (); + if ($main::opt_retain ne '') { + push(@params, sprintf("retain=%s", URLEncode($main::opt_retain))); + } + if ($main::opt_exclude ne '') { + push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude))); + } + if (scalar @params > 0) { + $url = sprintf("%s?%s", $url, join("&", @params)); + } + return $url; +} + # Fetches and processes symbols to prepare them for use in the profile output # code. If the optional 'symbol_map' arg is not given, fetches symbols from # $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols @@ -3344,9 +3430,11 @@ sub FetchSymbols { my $command_line; if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { $url = ResolveRedirectionForCurl($url); + $url = AppendSymbolFilterParams($url); $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", $url); } else { + $url = AppendSymbolFilterParams($url); $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) . " < " . ShellEscape($main::tmpfile_sym)); } @@ -3427,12 +3515,22 @@ sub FetchDynamicProfile { } $url .= sprintf("seconds=%d", $main::opt_seconds); $fetch_timeout = $main::opt_seconds * 1.01 + 60; + # Set $profile_type for consumption by PrintSymbolizedProfile. + $main::profile_type = 'cpu'; } else { # For non-CPU profiles, we add a type-extension to # the target profile file name. my $suffix = $path; $suffix =~ s,/,.,g; $profile_file .= $suffix; + # Set $profile_type for consumption by PrintSymbolizedProfile. + if ($path =~ m/$HEAP_PAGE/) { + $main::profile_type = 'heap'; + } elsif ($path =~ m/$GROWTH_PAGE/) { + $main::profile_type = 'growth'; + } elsif ($path =~ m/$CONTENTION_PAGE/) { + $main::profile_type = 'contention'; + } } my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof"); @@ -3730,6 +3828,8 @@ sub ReadProfile { my $symbol_marker = $&; $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash my $profile_marker = $&; + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $heap_marker = $&; # Look at first line to see if it is a heap or a CPU profile. # CPU profile may start with no header at all, and just binary data @@ -3756,7 +3856,13 @@ sub ReadProfile { $header = ReadProfileHeader(*PROFILE) || ""; } + if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) { + # Skip "--- ..." line for profile types that have their own headers. + $header = ReadProfileHeader(*PROFILE) || ""; + } + $main::profile_type = ''; + if ($header =~ m/^heap profile:.*$growth_marker/o) { $main::profile_type = 'growth'; $result = ReadHeapProfile($prog, *PROFILE, $header); @@ -3808,9 +3914,9 @@ sub ReadProfile { # independent implementation. sub FixCallerAddresses { my $stack = shift; - if ($main::use_symbolized_profile) { - return $stack; - } else { + # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile() + # dumps unadjusted profiles. + { $stack =~ /(\s)/; my $delimiter = $1; my @addrs = split(' ', $stack); @@ -3878,12 +3984,7 @@ sub ReadCPUProfile { for (my $j = 0; $j < $d; $j++) { my $pc = $slots->get($i+$j); # Subtract one from caller pc so we map back to call instr. - # However, don't do this if we're reading a symbolized profile - # file, in which case the subtract-one was done when the file - # was written. - if ($j > 0 && !$main::use_symbolized_profile) { - $pc--; - } + $pc--; $pc = sprintf("%0*x", $address_length, $pc); $pcs->{$pc} = 1; push @k, $pc; diff --git a/deps/jemalloc/config.guess b/deps/jemalloc/build-aux/config.guess similarity index 90% rename from deps/jemalloc/config.guess rename to deps/jemalloc/build-aux/config.guess index 1f5c50c0d..2e9ad7fe8 100755 --- a/deps/jemalloc/config.guess +++ b/deps/jemalloc/build-aux/config.guess @@ -1,8 +1,8 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2014 Free Software Foundation, Inc. +# Copyright 1992-2016 Free Software Foundation, Inc. -timestamp='2014-03-23' +timestamp='2016-10-02' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -24,12 +24,12 @@ timestamp='2014-03-23' # program. This Exception is an additional permission under section 7 # of the GNU General Public License, version 3 ("GPLv3"). # -# Originally written by Per Bothner. +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. # # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess # -# Please send patches with a ChangeLog entry to config-patches@gnu.org. +# Please send patches to . me=`echo "$0" | sed -e 's,.*/,,'` @@ -50,7 +50,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2014 Free Software Foundation, Inc. +Copyright 1992-2016 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -168,19 +168,29 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # Note: NetBSD doesn't particularly care about the vendor # portion of the name. We always set it to "unknown". sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + /sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || \ + echo unknown)` case "${UNAME_MACHINE_ARCH}" in armeb) machine=armeb-unknown ;; arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'` + machine=${arch}${endian}-unknown + ;; *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched - # to ELF recently, or will in the future. + # to ELF recently (or will in the future) and ABI. case "${UNAME_MACHINE_ARCH}" in + earm*) + os=netbsdelf + ;; arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ @@ -197,6 +207,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in os=netbsd ;; esac + # Determine ABI tags. + case "${UNAME_MACHINE_ARCH}" in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"` + ;; + esac # The OS release # Debian GNU/NetBSD machines have a different userland, and # thus, need a distinct triplet. However, they do not need @@ -207,13 +224,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in release='-gnu' ;; *) - release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2` ;; esac # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}" + echo "${machine}-${os}${release}${abi}" exit ;; *:Bitrig:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` @@ -223,6 +240,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE} + exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} exit ;; @@ -235,6 +256,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:MirBSD:*:*) echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} exit ;; + *:Sortix:*:*) + echo ${UNAME_MACHINE}-unknown-sortix + exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) @@ -251,42 +275,42 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case "$ALPHA_CPU_TYPE" in "EV4 (21064)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "EV4.5 (21064)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "LCA4 (21066/21068)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "EV5 (21164)") - UNAME_MACHINE="alphaev5" ;; + UNAME_MACHINE=alphaev5 ;; "EV5.6 (21164A)") - UNAME_MACHINE="alphaev56" ;; + UNAME_MACHINE=alphaev56 ;; "EV5.6 (21164PC)") - UNAME_MACHINE="alphapca56" ;; + UNAME_MACHINE=alphapca56 ;; "EV5.7 (21164PC)") - UNAME_MACHINE="alphapca57" ;; + UNAME_MACHINE=alphapca57 ;; "EV6 (21264)") - UNAME_MACHINE="alphaev6" ;; + UNAME_MACHINE=alphaev6 ;; "EV6.7 (21264A)") - UNAME_MACHINE="alphaev67" ;; + UNAME_MACHINE=alphaev67 ;; "EV6.8CB (21264C)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.8AL (21264B)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.8CX (21264D)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.9A (21264/EV69A)") - UNAME_MACHINE="alphaev69" ;; + UNAME_MACHINE=alphaev69 ;; "EV7 (21364)") - UNAME_MACHINE="alphaev7" ;; + UNAME_MACHINE=alphaev7 ;; "EV7.9 (21364A)") - UNAME_MACHINE="alphaev79" ;; + UNAME_MACHINE=alphaev79 ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` # Reset EXIT trap before exiting to avoid spurious non-zero exit code. exitcode=$? trap '' 0 @@ -359,16 +383,16 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) eval $set_cc_for_build - SUN_ARCH="i386" + SUN_ARCH=i386 # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then - SUN_ARCH="x86_64" + SUN_ARCH=x86_64 fi fi echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` @@ -393,7 +417,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) echo m68k-sun-sunos${UNAME_RELEASE} @@ -579,8 +603,9 @@ EOF else IBM_ARCH=powerpc fi - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` + if [ -x /usr/bin/lslpp ] ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` else IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi @@ -617,13 +642,13 @@ EOF sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; - '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 esac ;; esac fi @@ -662,11 +687,11 @@ EOF exit (0); } EOF - (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac - if [ ${HP_ARCH} = "hppa2.0w" ] + if [ ${HP_ARCH} = hppa2.0w ] then eval $set_cc_for_build @@ -679,12 +704,12 @@ EOF # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then - HP_ARCH="hppa2.0w" + HP_ARCH=hppa2.0w else - HP_ARCH="hppa64" + HP_ARCH=hppa64 fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} @@ -789,14 +814,14 @@ EOF echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) @@ -878,7 +903,7 @@ EOF exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix @@ -901,7 +926,7 @@ EOF EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC="gnulibc1" ; fi + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; arc:Linux:*:* | arceb:Linux:*:*) @@ -932,6 +957,9 @@ EOF crisv32:Linux:*:*) echo ${UNAME_MACHINE}-axis-linux-${LIBC} exit ;; + e2k:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; frv:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; @@ -944,6 +972,9 @@ EOF ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; + k1om:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; @@ -969,6 +1000,9 @@ EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } ;; + mips64el:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; openrisc*:Linux:*:*) echo or1k-unknown-linux-${LIBC} exit ;; @@ -1001,6 +1035,9 @@ EOF ppcle:Linux:*:*) echo powerpcle-unknown-linux-${LIBC} exit ;; + riscv32:Linux:*:* | riscv64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux-${LIBC} exit ;; @@ -1020,7 +1057,7 @@ EOF echo ${UNAME_MACHINE}-dec-linux-${LIBC} exit ;; x86_64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-pc-linux-${LIBC} exit ;; xtensa*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} @@ -1099,7 +1136,7 @@ EOF # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configury will decide that + # prints for the "djgpp" host, or else GDB configure will decide that # this is a cross-build. echo i586-pc-msdosdjgpp exit ;; @@ -1248,6 +1285,9 @@ EOF SX-8R:SUPER-UX:*:*) echo sx8r-nec-superux${UNAME_RELEASE} exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux${UNAME_RELEASE} + exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} exit ;; @@ -1261,9 +1301,9 @@ EOF UNAME_PROCESSOR=powerpc fi if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then case $UNAME_PROCESSOR in @@ -1285,7 +1325,7 @@ EOF exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = "x86"; then + if test "$UNAME_PROCESSOR" = x86; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi @@ -1316,7 +1356,7 @@ EOF # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. - if test "$cputype" = "386"; then + if test "$cputype" = 386; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" @@ -1358,7 +1398,7 @@ EOF echo i386-pc-xenix exit ;; i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'` exit ;; i*86:rdos:*:*) echo ${UNAME_MACHINE}-pc-rdos @@ -1369,23 +1409,25 @@ EOF x86_64:VMkernel:*:*) echo ${UNAME_MACHINE}-unknown-esx exit ;; + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs + exit ;; esac cat >&2 < in order to provide the needed -information to handle your system. +If $0 has already been updated, send the following data and any +information you think might be pertinent to config-patches@gnu.org to +provide the necessary information to handle your system. config.guess timestamp = $timestamp diff --git a/deps/jemalloc/config.sub b/deps/jemalloc/build-aux/config.sub similarity index 95% rename from deps/jemalloc/config.sub rename to deps/jemalloc/build-aux/config.sub index 0ccff7706..dd2ca93c6 100755 --- a/deps/jemalloc/config.sub +++ b/deps/jemalloc/build-aux/config.sub @@ -1,8 +1,8 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2014 Free Software Foundation, Inc. +# Copyright 1992-2016 Free Software Foundation, Inc. -timestamp='2014-05-01' +timestamp='2016-11-04' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ timestamp='2014-05-01' # of the GNU General Public License, version 3 ("GPLv3"). -# Please send patches with a ChangeLog entry to config-patches@gnu.org. +# Please send patches to . # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. @@ -33,7 +33,7 @@ timestamp='2014-05-01' # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases @@ -53,8 +53,7 @@ timestamp='2014-05-01' me=`echo "$0" | sed -e 's,.*/,,'` usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS - $0 [OPTION] ALIAS +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS Canonicalize a configuration name. @@ -68,7 +67,7 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright 1992-2014 Free Software Foundation, Inc. +Copyright 1992-2016 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -117,8 +116,8 @@ maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ - knetbsd*-gnu* | netbsd*-gnu* | \ - kopensolaris*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ + kopensolaris*-gnu* | cloudabi*-eabi* | \ storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` @@ -255,12 +254,13 @@ case $basic_machine in | arc | arceb \ | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ | avr | avr32 \ + | ba \ | be32 | be64 \ | bfin \ | c4x | c8051 | clipper \ | d10v | d30v | dlx | dsp16xx \ - | epiphany \ - | fido | fr30 | frv \ + | e2k | epiphany \ + | fido | fr30 | frv | ft32 \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ | i370 | i860 | i960 | ia64 \ @@ -301,10 +301,12 @@ case $basic_machine in | open8 | or1k | or1knd | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pru \ | pyramid \ + | riscv32 | riscv64 \ | rl78 | rx \ | score \ - | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ @@ -312,6 +314,7 @@ case $basic_machine in | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ | ubicom32 \ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ + | visium \ | we32k \ | x86 | xc16x | xstormy16 | xtensa \ | z8k | z80) @@ -326,6 +329,9 @@ case $basic_machine in c6x) basic_machine=tic6x-unknown ;; + leon|leon[3-9]) + basic_machine=sparc-$basic_machine + ;; m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) basic_machine=$basic_machine-unknown os=-none @@ -371,12 +377,13 @@ case $basic_machine in | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | avr-* | avr32-* \ + | ba-* \ | be32-* | be64-* \ | bfin-* | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* \ | c8051-* | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ - | elxsi-* \ + | e2k-* | elxsi-* \ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ @@ -422,13 +429,15 @@ case $basic_machine in | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pru-* \ | pyramid-* \ + | riscv32-* | riscv64-* \ | rl78-* | romp-* | rs6000-* | rx-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ | sparclite-* \ - | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \ | tahoe-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ | tile*-* \ @@ -436,6 +445,7 @@ case $basic_machine in | ubicom32-* \ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ | vax-* \ + | visium-* \ | we32k-* \ | x86-* | x86_64-* | xc16x-* | xps100-* \ | xstormy16-* | xtensa*-* \ @@ -512,6 +522,9 @@ case $basic_machine in basic_machine=i386-pc os=-aros ;; + asmjs) + basic_machine=asmjs-unknown + ;; aux) basic_machine=m68k-apple os=-aux @@ -632,6 +645,14 @@ case $basic_machine in basic_machine=m68k-bull os=-sysv3 ;; + e500v[12]) + basic_machine=powerpc-unknown + os=$os"spe" + ;; + e500v[12]-*) + basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + os=$os"spe" + ;; ebmon29k) basic_machine=a29k-amd os=-ebmon @@ -773,6 +794,9 @@ case $basic_machine in basic_machine=m68k-isi os=-sysv ;; + leon-*|leon[3-9]-*) + basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'` + ;; m68knommu) basic_machine=m68k-unknown os=-linux @@ -828,6 +852,10 @@ case $basic_machine in basic_machine=powerpc-unknown os=-morphos ;; + moxiebox) + basic_machine=moxie-unknown + os=-moxiebox + ;; msdos) basic_machine=i386-pc os=-msdos @@ -1004,7 +1032,7 @@ case $basic_machine in ppc-* | ppcbe-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppcle | powerpclittle | ppc-le | powerpc-little) + ppcle | powerpclittle) basic_machine=powerpcle-unknown ;; ppcle-* | powerpclittle-*) @@ -1014,7 +1042,7 @@ case $basic_machine in ;; ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppc64le | powerpc64little | ppc64-le | powerpc64-little) + ppc64le | powerpc64little) basic_machine=powerpc64le-unknown ;; ppc64le-* | powerpc64little-*) @@ -1360,27 +1388,28 @@ case $os in | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ | -sym* | -kopensolaris* | -plan9* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* | -aros* \ + | -aos* | -aros* | -cloudabi* | -sortix* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -bitrig* | -openbsd* | -solidbsd* \ + | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -chorusos* | -chorusrdb* | -cegcc* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ | -linux-newlib* | -linux-musl* | -linux-uclibc* \ - | -uxpv* | -beos* | -mpeix* | -udk* \ + | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*) + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ + | -onefs* | -tirtos* | -phoenix* | -fuchsia*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1404,9 +1433,6 @@ case $os in -mac*) os=`echo $os | sed -e 's|mac|macos|'` ;; - # Apple iOS - -ios*) - ;; -linux-dietlibc) os=-linux-dietlibc ;; @@ -1515,6 +1541,8 @@ case $os in ;; -nacl*) ;; + -ios) + ;; -none) ;; *) diff --git a/deps/jemalloc/install-sh b/deps/jemalloc/build-aux/install-sh similarity index 100% rename from deps/jemalloc/install-sh rename to deps/jemalloc/build-aux/install-sh diff --git a/deps/jemalloc/configure b/deps/jemalloc/configure index 8c56c92a1..d7391524d 100755 --- a/deps/jemalloc/configure +++ b/deps/jemalloc/configure @@ -628,7 +628,6 @@ cfghdrs_in enable_zone_allocator enable_tls enable_lazy_lock -TESTLIBS jemalloc_version_gid jemalloc_version_nrev jemalloc_version_bugfix @@ -658,16 +657,19 @@ INSTALL_SCRIPT INSTALL_PROGRAM enable_autogen RPATH_EXTRA +LM CC_MM AROUT ARFLAGS MKLIB +TEST_LD_MODE LDTARGET CTARGET PIC_CFLAGS SOREV EXTRA_LDFLAGS DSO_LDFLAGS +link_whole_archive libprefix exe a @@ -689,6 +691,7 @@ build EGREP GREP CPP +EXTRA_CFLAGS OBJEXT EXEEXT ac_ct_CC @@ -729,6 +732,7 @@ infodir docdir oldincludedir includedir +runstatedir localstatedir sharedstatedir sysconfdir @@ -760,6 +764,7 @@ with_jemalloc_prefix with_export with_private_namespace with_install_suffix +with_malloc_conf enable_cc_silence enable_debug enable_ivsalloc @@ -781,6 +786,8 @@ with_lg_quantum with_lg_page with_lg_page_sizes with_lg_size_class_group +with_version +enable_syscall enable_lazy_lock enable_tls enable_zone_allocator @@ -832,6 +839,7 @@ datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE}' @@ -1084,6 +1092,15 @@ do | -silent | --silent | --silen | --sile | --sil) silent=yes ;; + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ @@ -1221,7 +1238,7 @@ fi for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir + libdir localedir mandir runstatedir do eval ac_val=\$$ac_var # Remove trailing slashes. @@ -1374,6 +1391,7 @@ Fine tuning of the installation directories: --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] @@ -1425,6 +1443,7 @@ Optional Features: --disable-cache-oblivious Disable support for cache-oblivious allocation alignment + --disable-syscall Disable use of syscall(2) --enable-lazy-lock Enable lazy locking (only lock when multi-threaded) --disable-tls Disable thread-local storage (__thread keyword) --disable-zone-allocator @@ -1443,6 +1462,8 @@ Optional Packages: Prefix to prepend to all library-private APIs --with-install-suffix= Suffix to append to all installed files + --with-malloc-conf= + config.malloc_conf options string --with-static-libunwind= Path to static libunwind library; use rather than dynamically linking @@ -1456,6 +1477,8 @@ Optional Packages: Base 2 logs of system page sizes to support --with-lg-size-class-group= Base 2 log of size classes per doubling + --with-version=..--g + Version string Some influential environment variables: CC C compiler command @@ -2484,6 +2507,36 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu +ac_aux_dir= +for ac_dir in build-aux "$srcdir"/build-aux; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in build-aux \"$srcdir\"/build-aux" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + + @@ -3390,6 +3443,7 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu + if test "x$GCC" != "xyes" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is MSVC" >&5 @@ -3423,10 +3477,125 @@ fi $as_echo "$je_cv_msvc" >&6; } fi +je_cv_cray_prgenv_wrapper="" +if test "x${PE_ENV}" != "x" ; then + case "${CC}" in + CC|cc) + je_cv_cray_prgenv_wrapper="yes" + ;; + *) + ;; + esac +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is cray" >&5 +$as_echo_n "checking whether compiler is cray... " >&6; } +if ${je_cv_cray+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#ifndef _CRAYC + int fail-1; +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cray=yes +else + je_cv_cray=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_cray" >&5 +$as_echo "$je_cv_cray" >&6; } + +if test "x${je_cv_cray}" = "xyes" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether cray compiler version is 8.4" >&5 +$as_echo_n "checking whether cray compiler version is 8.4... " >&6; } +if ${je_cv_cray_84+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4) + int fail-1; +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cray_84=yes +else + je_cv_cray_84=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_cray_84" >&5 +$as_echo "$je_cv_cray_84" >&6; } +fi + if test "x$CFLAGS" = "x" ; then no_CFLAGS="yes" if test "x$GCC" = "xyes" ; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu11" >&5 +$as_echo_n "checking whether compiler supports -std=gnu11... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-std=gnu11" +else + CFLAGS="${CFLAGS} -std=gnu11" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-std=gnu11 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then + cat >>confdefs.h <<_ACEOF +#define JEMALLOC_HAS_RESTRICT 1 +_ACEOF + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu99" >&5 $as_echo_n "checking whether compiler supports -std=gnu99... " >&6; } TCFLAGS="${CFLAGS}" @@ -3462,11 +3631,12 @@ $as_echo "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then - cat >>confdefs.h <<_ACEOF + if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then + cat >>confdefs.h <<_ACEOF #define JEMALLOC_HAS_RESTRICT 1 _ACEOF + fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wall" >&5 @@ -3541,6 +3711,78 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wshorten-64-to-32" >&5 +$as_echo_n "checking whether compiler supports -Wshorten-64-to-32... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-Wshorten-64-to-32" +else + CFLAGS="${CFLAGS} -Wshorten-64-to-32" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-Wshorten-64-to-32 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wsign-compare" >&5 +$as_echo_n "checking whether compiler supports -Wsign-compare... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-Wsign-compare" +else + CFLAGS="${CFLAGS} -Wsign-compare" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-Wsign-compare + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -pipe" >&5 $as_echo_n "checking whether compiler supports -pipe... " >&6; } TCFLAGS="${CFLAGS}" @@ -3760,16 +4002,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat" fi -fi -if test "x$EXTRA_CFLAGS" != "x" ; then + if test "x$je_cv_cray" = "xyes" ; then + if test "x$je_cv_cray_84" = "xyes" ; then -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports $EXTRA_CFLAGS" >&5 -$as_echo_n "checking whether compiler supports $EXTRA_CFLAGS... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hipa2" >&5 +$as_echo_n "checking whether compiler supports -hipa2... " >&6; } TCFLAGS="${CFLAGS}" if test "x${CFLAGS}" = "x" ; then - CFLAGS="$EXTRA_CFLAGS" + CFLAGS="-hipa2" else - CFLAGS="${CFLAGS} $EXTRA_CFLAGS" + CFLAGS="${CFLAGS} -hipa2" fi cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -3786,7 +4028,7 @@ main () } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=$EXTRA_CFLAGS + je_cv_cflags_appended=-hipa2 { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else @@ -3798,7 +4040,120 @@ $as_echo "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnognu" >&5 +$as_echo_n "checking whether compiler supports -hnognu... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-hnognu" +else + CFLAGS="${CFLAGS} -hnognu" fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-hnognu + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + fi + if test "x$enable_cc_silence" != "xno" ; then + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnomessage=128" >&5 +$as_echo_n "checking whether compiler supports -hnomessage=128... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-hnomessage=128" +else + CFLAGS="${CFLAGS} -hnomessage=128" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-hnomessage=128 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnomessage=1357" >&5 +$as_echo_n "checking whether compiler supports -hnomessage=1357... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-hnomessage=1357" +else + CFLAGS="${CFLAGS} -hnomessage=1357" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-hnomessage=1357 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + fi + fi +fi + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -4431,7 +4786,12 @@ if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99" fi -# The cast to long int works around a bug in the HP C Compiler +if test "x${je_cv_msvc}" = "xyes" ; then + LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN + { $as_echo "$as_me:${as_lineno-$LINENO}: result: Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit" >&5 +$as_echo "Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit" >&6; } +else + # The cast to long int works around a bug in the HP C Compiler # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. # This bug is HP SR number 8606223364. @@ -4464,12 +4824,13 @@ cat >>confdefs.h <<_ACEOF _ACEOF -if test "x${ac_cv_sizeof_void_p}" = "x8" ; then - LG_SIZEOF_PTR=3 -elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then - LG_SIZEOF_PTR=2 -else - as_fn_error $? "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5 + if test "x${ac_cv_sizeof_void_p}" = "x8" ; then + LG_SIZEOF_PTR=3 + elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then + LG_SIZEOF_PTR=2 + else + as_fn_error $? "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5 + fi fi cat >>confdefs.h <<_ACEOF #define LG_SIZEOF_PTR $LG_SIZEOF_PTR @@ -4566,6 +4927,51 @@ cat >>confdefs.h <<_ACEOF _ACEOF +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long long" >&5 +$as_echo_n "checking size of long long... " >&6; } +if ${ac_cv_sizeof_long_long+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (long long))" "ac_cv_sizeof_long_long" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_long_long" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (long long) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_long_long=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long_long" >&5 +$as_echo "$ac_cv_sizeof_long_long" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_LONG_LONG $ac_cv_sizeof_long_long +_ACEOF + + +if test "x${ac_cv_sizeof_long_long}" = "x8" ; then + LG_SIZEOF_LONG_LONG=3 +elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then + LG_SIZEOF_LONG_LONG=2 +else + as_fn_error $? "Unsupported long long size: ${ac_cv_sizeof_long_long}" "$LINENO" 5 +fi +cat >>confdefs.h <<_ACEOF +#define LG_SIZEOF_LONG_LONG $LG_SIZEOF_LONG_LONG +_ACEOF + + # The cast to long int works around a bug in the HP C Compiler # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. @@ -4613,35 +5019,6 @@ cat >>confdefs.h <<_ACEOF _ACEOF -ac_aux_dir= -for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do - if test -f "$ac_dir/install-sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install-sh -c" - break - elif test -f "$ac_dir/install.sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install.sh -c" - break - elif test -f "$ac_dir/shtool"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/shtool install -c" - break - fi -done -if test -z "$ac_aux_dir"; then - as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 -fi - -# These three variables are undocumented and unsupported, -# and are intended to be withdrawn in a future Autoconf release. -# They can cause serious problems if a builder's source tree is in a directory -# whose full name contains unusual characters. -ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. -ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. -ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. - - # Make sure we can run config.sub. $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 @@ -4716,7 +5093,45 @@ case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac CPU_SPINWAIT="" case "${host_cpu}" in i686|x86_64) - if ${je_cv_pause+:} false; then : + if test "x${je_cv_msvc}" = "xyes" ; then + if ${je_cv_pause_msvc+:} false; then : + $as_echo_n "(cached) " >&6 +else + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pause instruction MSVC is compilable" >&5 +$as_echo_n "checking whether pause instruction MSVC is compilable... " >&6; } +if ${je_cv_pause_msvc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +_mm_pause(); return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_pause_msvc=yes +else + je_cv_pause_msvc=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pause_msvc" >&5 +$as_echo "$je_cv_pause_msvc" >&6; } + +fi + + if test "x${je_cv_pause_msvc}" = "xyes" ; then + CPU_SPINWAIT='_mm_pause()' + fi + else + if ${je_cv_pause+:} false; then : $as_echo_n "(cached) " >&6 else @@ -4749,8 +5164,9 @@ $as_echo "$je_cv_pause" >&6; } fi - if test "x${je_cv_pause}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' + if test "x${je_cv_pause}" = "xyes" ; then + CPU_SPINWAIT='__asm__ volatile("pause")' + fi fi ;; powerpc) @@ -4774,17 +5190,27 @@ o="$ac_objext" a="a" exe="$ac_exeext" libprefix="lib" +link_whole_archive="0" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' CTARGET='-o $@' LDTARGET='-o $@' +TEST_LD_MODE= EXTRA_LDFLAGS= ARFLAGS='crus' AROUT=' $@' CC_MM=1 +if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then + TEST_LD_MODE='-dynamic' +fi + +if test "x${je_cv_cray}" = "xyes" ; then + CC_MM= +fi + @@ -4881,14 +5307,12 @@ else fi +CFLAGS="$CFLAGS" default_munmap="1" maps_coalesce="1" case "${host}" in *-*-darwin* | *-*-ios*) - CFLAGS="$CFLAGS" abi="macho" - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h - RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" @@ -4899,38 +5323,41 @@ case "${host}" in sbrk_deprecated="1" ;; *-*-freebsd*) - CFLAGS="$CFLAGS" abi="elf" - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h + $as_echo "#define JEMALLOC_SYSCTL_VM_OVERCOMMIT " >>confdefs.h force_lazy_lock="1" ;; *-*-dragonfly*) - CFLAGS="$CFLAGS" abi="elf" - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h - ;; *-*-openbsd*) - CFLAGS="$CFLAGS" abi="elf" - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h - force_tls="0" ;; *-*-bitrig*) - CFLAGS="$CFLAGS" abi="elf" - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h - ;; - *-*-linux*) - CFLAGS="$CFLAGS" - CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" + *-*-linux-android) + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" $as_echo "#define JEMALLOC_HAS_ALLOCA_H 1" >>confdefs.h - $as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED " >>confdefs.h + $as_echo "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY " >>confdefs.h + + $as_echo "#define JEMALLOC_THREADED_INIT " >>confdefs.h + + $as_echo "#define JEMALLOC_C11ATOMICS 1" >>confdefs.h + + force_tls="0" + default_munmap="0" + ;; + *-*-linux* | *-*-kfreebsd*) + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" + abi="elf" + $as_echo "#define JEMALLOC_HAS_ALLOCA_H 1" >>confdefs.h + + $as_echo "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY " >>confdefs.h $as_echo "#define JEMALLOC_THREADED_INIT " >>confdefs.h @@ -4958,21 +5385,16 @@ main () } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : - CFLAGS="$CFLAGS"; abi="elf" + abi="elf" else abi="aout" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $abi" >&5 $as_echo "$abi" >&6; } - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h - ;; *-*-solaris2*) - CFLAGS="$CFLAGS" abi="elf" - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h - RPATH='-Wl,-R,$(1)' CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" LIBS="$LIBS -lposix4 -lsocket -lnsl" @@ -4988,7 +5410,6 @@ $as_echo "$abi" >&6; } *-*-mingw* | *-*-cygwin*) abi="pecoff" force_tls="0" - force_lazy_lock="1" maps_coalesce="0" RPATH="" so="dll" @@ -5005,6 +5426,7 @@ $as_echo "$abi" >&6; } else importlib="${so}" DSO_LDFLAGS="-shared" + link_whole_archive="1" fi a="lib" libprefix="" @@ -5084,6 +5506,73 @@ _ACEOF + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing log" >&5 +$as_echo_n "checking for library containing log... " >&6; } +if ${ac_cv_search_log+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char log (); +int +main () +{ +return log (); + ; + return 0; +} +_ACEOF +for ac_lib in '' m; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_log=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_log+:} false; then : + break +fi +done +if ${ac_cv_search_log+:} false; then : + +else + ac_cv_search_log=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_log" >&5 +$as_echo "$ac_cv_search_log" >&6; } +ac_res=$ac_cv_search_log +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +else + as_fn_error $? "Missing math functions" "$LINENO" 5 +fi + +if test "x$ac_cv_search_log" != "xnone required" ; then + LM="$ac_cv_search_log" +else + LM= +fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __attribute__ syntax is compilable" >&5 @@ -5193,6 +5682,42 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5 +$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-herror_on_warning" +else + CFLAGS="${CFLAGS} -herror_on_warning" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-herror_on_warning + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether tls_model attribute is compilable" >&5 $as_echo_n "checking whether tls_model attribute is compilable... " >&6; } if ${je_cv_tls_model+:} false; then : @@ -5268,6 +5793,42 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5 +$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-herror_on_warning" +else + CFLAGS="${CFLAGS} -herror_on_warning" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-herror_on_warning + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether alloc_size attribute is compilable" >&5 $as_echo_n "checking whether alloc_size attribute is compilable... " >&6; } if ${je_cv_alloc_size+:} false; then : @@ -5338,6 +5899,42 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5 +$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-herror_on_warning" +else + CFLAGS="${CFLAGS} -herror_on_warning" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-herror_on_warning + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(gnu_printf, ...) attribute is compilable" >&5 $as_echo_n "checking whether format(gnu_printf, ...) attribute is compilable... " >&6; } if ${je_cv_format_gnu_printf+:} false; then : @@ -5408,6 +6005,42 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5 +$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-herror_on_warning" +else + CFLAGS="${CFLAGS} -herror_on_warning" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-herror_on_warning + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(printf, ...) attribute is compilable" >&5 $as_echo_n "checking whether format(printf, ...) attribute is compilable... " >&6; } if ${je_cv_format_printf+:} false; then : @@ -5927,6 +6560,21 @@ fi install_suffix="$INSTALL_SUFFIX" + +# Check whether --with-malloc_conf was given. +if test "${with_malloc_conf+set}" = set; then : + withval=$with_malloc_conf; JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf" +else + JEMALLOC_CONFIG_MALLOC_CONF="" + +fi + +config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF" +cat >>confdefs.h <<_ACEOF +#define JEMALLOC_CONFIG_MALLOC_CONF "$config_malloc_conf" +_ACEOF + + je_="je_" @@ -6495,8 +7143,8 @@ $as_echo_n "checking configured backtracing method... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $backtrace_method" >&5 $as_echo "$backtrace_method" >&6; } if test "x$enable_prof" = "x1" ; then - if test "x$abi" != "xpecoff"; then - LIBS="$LIBS -lm" + if test "x$LM" != "x" ; then + LIBS="$LIBS $LM" fi $as_echo "#define JEMALLOC_PROF " >>confdefs.h @@ -6745,6 +7393,52 @@ fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_unreachable is compilable" >&5 +$as_echo_n "checking whether a program using __builtin_unreachable is compilable... " >&6; } +if ${je_cv_gcc_builtin_unreachable+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +void foo (void) { + __builtin_unreachable(); +} + +int +main () +{ + + { + foo(); + } + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_gcc_builtin_unreachable=yes +else + je_cv_gcc_builtin_unreachable=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_gcc_builtin_unreachable" >&5 +$as_echo "$je_cv_gcc_builtin_unreachable" >&6; } + +if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then + $as_echo "#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable" >>confdefs.h + +else + $as_echo "#define JEMALLOC_INTERNAL_UNREACHABLE abort" >>confdefs.h + +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_ffsl is compilable" >&5 $as_echo_n "checking whether a program using __builtin_ffsl is compilable... " >&6; } if ${je_cv_gcc_builtin_ffsl+:} false; then : @@ -6782,6 +7476,8 @@ fi $as_echo "$je_cv_gcc_builtin_ffsl" >&6; } if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then + $as_echo "#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll" >>confdefs.h + $as_echo "#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl" >>confdefs.h $as_echo "#define JEMALLOC_INTERNAL_FFS __builtin_ffs" >>confdefs.h @@ -6825,6 +7521,8 @@ fi $as_echo "$je_cv_function_ffsl" >&6; } if test "x${je_cv_function_ffsl}" = "xyes" ; then + $as_echo "#define JEMALLOC_INTERNAL_FFSLL ffsll" >>confdefs.h + $as_echo "#define JEMALLOC_INTERNAL_FFSL ffsl" >>confdefs.h $as_echo "#define JEMALLOC_INTERNAL_FFS ffs" >>confdefs.h @@ -6913,7 +7611,7 @@ main () if (f == NULL) { return 1; } - fprintf(f, "%d\n", result); + fprintf(f, "%d", result); fclose(f); return 0; @@ -6964,7 +7662,6 @@ else LG_SIZE_CLASS_GROUP="2" fi - if test ! -e "${objroot}VERSION" ; then if test ! -e "${srcroot}VERSION" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: Missing VERSION file, and unable to generate it; creating bogus VERSION" >&5 @@ -7102,12 +7799,46 @@ fi fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthread_atfork(3) is compilable" >&5 +$as_echo_n "checking whether pthread_atfork(3) is compilable... " >&6; } +if ${je_cv_pthread_atfork+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + +int +main () +{ + + pthread_atfork((void *)0, (void *)0, (void *)0); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_pthread_atfork=yes +else + je_cv_pthread_atfork=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pthread_atfork" >&5 +$as_echo "$je_cv_pthread_atfork" >&6; } + + if test "x${je_cv_pthread_atfork}" = "xyes" ; then + $as_echo "#define JEMALLOC_HAVE_PTHREAD_ATFORK " >>confdefs.h + + fi fi CPPFLAGS="$CPPFLAGS -D_REENTRANT" -SAVED_LIBS="${LIBS}" -LIBS= { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5 $as_echo_n "checking for library containing clock_gettime... " >&6; } if ${ac_cv_search_clock_gettime+:} false; then : @@ -7161,11 +7892,321 @@ $as_echo "$ac_cv_search_clock_gettime" >&6; } ac_res=$ac_cv_search_clock_gettime if test "$ac_res" != no; then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - TESTLIBS="${LIBS}" + fi -LIBS="${SAVED_LIBS}" +if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then + if test "$ac_cv_search_clock_gettime" != "-lrt"; then + SAVED_CFLAGS="${CFLAGS}" + + unset ac_cv_search_clock_gettime + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -dynamic" >&5 +$as_echo_n "checking whether compiler supports -dynamic... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-dynamic" +else + CFLAGS="${CFLAGS} -dynamic" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-dynamic + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5 +$as_echo_n "checking for library containing clock_gettime... " >&6; } +if ${ac_cv_search_clock_gettime+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char clock_gettime (); +int +main () +{ +return clock_gettime (); + ; + return 0; +} +_ACEOF +for ac_lib in '' rt; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_clock_gettime=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_clock_gettime+:} false; then : + break +fi +done +if ${ac_cv_search_clock_gettime+:} false; then : + +else + ac_cv_search_clock_gettime=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5 +$as_echo "$ac_cv_search_clock_gettime" >&6; } +ac_res=$ac_cv_search_clock_gettime +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + + + CFLAGS="${SAVED_CFLAGS}" + fi +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is compilable" >&5 +$as_echo_n "checking whether clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is compilable... " >&6; } +if ${je_cv_clock_monotonic_coarse+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + +int +main () +{ + + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_clock_monotonic_coarse=yes +else + je_cv_clock_monotonic_coarse=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_clock_monotonic_coarse" >&5 +$as_echo "$je_cv_clock_monotonic_coarse" >&6; } + +if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then + $as_echo "#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1" >>confdefs.h + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clock_gettime(CLOCK_MONOTONIC, ...) is compilable" >&5 +$as_echo_n "checking whether clock_gettime(CLOCK_MONOTONIC, ...) is compilable... " >&6; } +if ${je_cv_clock_monotonic+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int +main () +{ + + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); +#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0 +# error _POSIX_MONOTONIC_CLOCK missing/invalid +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_clock_monotonic=yes +else + je_cv_clock_monotonic=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_clock_monotonic" >&5 +$as_echo "$je_cv_clock_monotonic" >&6; } + +if test "x${je_cv_clock_monotonic}" = "xyes" ; then + $as_echo "#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1" >>confdefs.h + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mach_absolute_time() is compilable" >&5 +$as_echo_n "checking whether mach_absolute_time() is compilable... " >&6; } +if ${je_cv_mach_absolute_time+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + +int +main () +{ + + mach_absolute_time(); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_mach_absolute_time=yes +else + je_cv_mach_absolute_time=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_mach_absolute_time" >&5 +$as_echo "$je_cv_mach_absolute_time" >&6; } + +if test "x${je_cv_mach_absolute_time}" = "xyes" ; then + $as_echo "#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1" >>confdefs.h + +fi + +# Check whether --enable-syscall was given. +if test "${enable_syscall+set}" = set; then : + enableval=$enable_syscall; if test "x$enable_syscall" = "xno" ; then + enable_syscall="0" +else + enable_syscall="1" +fi + +else + enable_syscall="1" + +fi + +if test "x$enable_syscall" = "x1" ; then + SAVED_CFLAGS="${CFLAGS}" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5 +$as_echo_n "checking whether compiler supports -Werror... " >&6; } +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="-Werror" +else + CFLAGS="${CFLAGS} -Werror" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ + + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + je_cv_cflags_appended=-Werror + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + je_cv_cflags_appended= + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="${TCFLAGS}" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether syscall(2) is compilable" >&5 +$as_echo_n "checking whether syscall(2) is compilable... " >&6; } +if ${je_cv_syscall+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int +main () +{ + + syscall(SYS_write, 2, "hello", 5); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_syscall=yes +else + je_cv_syscall=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_syscall" >&5 +$as_echo "$je_cv_syscall" >&6; } + + CFLAGS="${SAVED_CFLAGS}" + if test "x$je_cv_syscall" = "xyes" ; then + $as_echo "#define JEMALLOC_USE_SYSCALL " >>confdefs.h + + fi +fi ac_fn_c_check_func "$LINENO" "secure_getenv" "ac_cv_func_secure_getenv" if test "x$ac_cv_func_secure_getenv" = xyes; then : @@ -7233,10 +8274,19 @@ else fi -if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5 +if test "x${enable_lazy_lock}" = "x" ; then + if test "x${force_lazy_lock}" = "x1" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5 $as_echo "Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&6; } - enable_lazy_lock="1" + enable_lazy_lock="1" + else + enable_lazy_lock="0" + fi +fi +if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing no lazy-lock because thread creation monitoring is unimplemented" >&5 +$as_echo "Forcing no lazy-lock because thread creation monitoring is unimplemented" >&6; } + enable_lazy_lock="0" fi if test "x$enable_lazy_lock" = "x1" ; then if test "x$abi" != "xpecoff" ; then @@ -7306,8 +8356,6 @@ fi fi $as_echo "#define JEMALLOC_LAZY_LOCK " >>confdefs.h -else - enable_lazy_lock="0" fi @@ -7544,9 +8592,7 @@ int main () { - { - madvise((void *)0, 0, 0); - } + madvise((void *)0, 0, 0); ; return 0; @@ -7566,6 +8612,118 @@ $as_echo "$je_cv_madvise" >&6; } if test "x${je_cv_madvise}" = "xyes" ; then $as_echo "#define JEMALLOC_HAVE_MADVISE " >>confdefs.h + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(..., MADV_FREE) is compilable" >&5 +$as_echo_n "checking whether madvise(..., MADV_FREE) is compilable... " >&6; } +if ${je_cv_madv_free+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + +int +main () +{ + + madvise((void *)0, 0, MADV_FREE); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_madv_free=yes +else + je_cv_madv_free=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_madv_free" >&5 +$as_echo "$je_cv_madv_free" >&6; } + + if test "x${je_cv_madv_free}" = "xyes" ; then + $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h + + fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(..., MADV_DONTNEED) is compilable" >&5 +$as_echo_n "checking whether madvise(..., MADV_DONTNEED) is compilable... " >&6; } +if ${je_cv_madv_dontneed+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + +int +main () +{ + + madvise((void *)0, 0, MADV_DONTNEED); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_madv_dontneed=yes +else + je_cv_madv_dontneed=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_madv_dontneed" >&5 +$as_echo "$je_cv_madv_dontneed" >&6; } + + if test "x${je_cv_madv_dontneed}" = "xyes" ; then + $as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED " >>confdefs.h + + fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(..., MADV_[NO]HUGEPAGE) is compilable" >&5 +$as_echo_n "checking whether madvise(..., MADV_[NO]HUGEPAGE) is compilable... " >&6; } +if ${je_cv_thp+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + +int +main () +{ + + madvise((void *)0, 0, MADV_HUGEPAGE); + madvise((void *)0, 0, MADV_NOHUGEPAGE); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_thp=yes +else + je_cv_thp=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_thp" >&5 +$as_echo "$je_cv_thp" >&6; } + + if test "x${je_cv_thp}" = "xyes" ; then + $as_echo "#define JEMALLOC_THP " >>confdefs.h + + fi fi @@ -7708,6 +8866,51 @@ fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin os_unfair_lock_*() is compilable" >&5 +$as_echo_n "checking whether Darwin os_unfair_lock_*() is compilable... " >&6; } +if ${je_cv_os_unfair_lock+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int +main () +{ + + #if MAC_OS_X_VERSION_MIN_REQUIRED < 101200 + #error "os_unfair_lock is not supported" + #else + os_unfair_lock lock = OS_UNFAIR_LOCK_INIT; + os_unfair_lock_lock(&lock); + os_unfair_lock_unlock(&lock); + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + je_cv_os_unfair_lock=yes +else + je_cv_os_unfair_lock=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_os_unfair_lock" >&5 +$as_echo "$je_cv_os_unfair_lock" >&6; } + +if test "x${je_cv_os_unfair_lock}" = "xyes" ; then + $as_echo "#define JEMALLOC_OS_UNFAIR_LOCK " >>confdefs.h + +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSSpin*() is compilable" >&5 $as_echo_n "checking whether Darwin OSSpin*() is compilable... " >&6; } if ${je_cv_osspin+:} false; then : @@ -9610,6 +10813,8 @@ $as_echo "CONFIG : ${CONFIG}" >&6; } $as_echo "CC : ${CC}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: CFLAGS : ${CFLAGS}" >&5 $as_echo "CFLAGS : ${CFLAGS}" >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: EXTRA_CFLAGS : ${EXTRA_CFLAGS}" >&5 +$as_echo "EXTRA_CFLAGS : ${EXTRA_CFLAGS}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: CPPFLAGS : ${CPPFLAGS}" >&5 $as_echo "CPPFLAGS : ${CPPFLAGS}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: LDFLAGS : ${LDFLAGS}" >&5 @@ -9618,8 +10823,6 @@ $as_echo "LDFLAGS : ${LDFLAGS}" >&6; } $as_echo "EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: LIBS : ${LIBS}" >&5 $as_echo "LIBS : ${LIBS}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: TESTLIBS : ${TESTLIBS}" >&5 -$as_echo "TESTLIBS : ${TESTLIBS}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: RPATH_EXTRA : ${RPATH_EXTRA}" >&5 $as_echo "RPATH_EXTRA : ${RPATH_EXTRA}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5 @@ -9662,6 +10865,8 @@ $as_echo "JEMALLOC_PRIVATE_NAMESPACE" >&6; } $as_echo " : ${JEMALLOC_PRIVATE_NAMESPACE}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: install_suffix : ${install_suffix}" >&5 $as_echo "install_suffix : ${install_suffix}" >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: malloc_conf : ${config_malloc_conf}" >&5 +$as_echo "malloc_conf : ${config_malloc_conf}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: autogen : ${enable_autogen}" >&5 $as_echo "autogen : ${enable_autogen}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: cc-silence : ${enable_cc_silence}" >&5 diff --git a/deps/jemalloc/configure.ac b/deps/jemalloc/configure.ac index 7a1290e0d..9573c3020 100644 --- a/deps/jemalloc/configure.ac +++ b/deps/jemalloc/configure.ac @@ -1,6 +1,8 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT([Makefile.in]) +AC_CONFIG_AUX_DIR([build-aux]) + dnl ============================================================================ dnl Custom macro definitions. @@ -116,6 +118,7 @@ dnl If CFLAGS isn't defined, set CFLAGS to something reasonable. Otherwise, dnl just prevent autoconf from molesting CFLAGS. CFLAGS=$CFLAGS AC_PROG_CC + if test "x$GCC" != "xyes" ; then AC_CACHE_CHECK([whether compiler is MSVC], [je_cv_msvc], @@ -129,15 +132,58 @@ if test "x$GCC" != "xyes" ; then [je_cv_msvc=no])]) fi +dnl check if a cray prgenv wrapper compiler is being used +je_cv_cray_prgenv_wrapper="" +if test "x${PE_ENV}" != "x" ; then + case "${CC}" in + CC|cc) + je_cv_cray_prgenv_wrapper="yes" + ;; + *) + ;; + esac +fi + +AC_CACHE_CHECK([whether compiler is cray], + [je_cv_cray], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], + [ +#ifndef _CRAYC + int fail[-1]; +#endif +])], + [je_cv_cray=yes], + [je_cv_cray=no])]) + +if test "x${je_cv_cray}" = "xyes" ; then + AC_CACHE_CHECK([whether cray compiler version is 8.4], + [je_cv_cray_84], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], + [ +#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4) + int fail[-1]; +#endif +])], + [je_cv_cray_84=yes], + [je_cv_cray_84=no])]) +fi + if test "x$CFLAGS" = "x" ; then no_CFLAGS="yes" if test "x$GCC" = "xyes" ; then - JE_CFLAGS_APPEND([-std=gnu99]) - if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then + JE_CFLAGS_APPEND([-std=gnu11]) + if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT]) + else + JE_CFLAGS_APPEND([-std=gnu99]) + if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then + AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT]) + fi fi JE_CFLAGS_APPEND([-Wall]) JE_CFLAGS_APPEND([-Werror=declaration-after-statement]) + JE_CFLAGS_APPEND([-Wshorten-64-to-32]) + JE_CFLAGS_APPEND([-Wsign-compare]) JE_CFLAGS_APPEND([-pipe]) JE_CFLAGS_APPEND([-g3]) elif test "x$je_cv_msvc" = "xyes" ; then @@ -148,11 +194,21 @@ if test "x$CFLAGS" = "x" ; then JE_CFLAGS_APPEND([-FS]) CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat" fi + if test "x$je_cv_cray" = "xyes" ; then + dnl cray compiler 8.4 has an inlining bug + if test "x$je_cv_cray_84" = "xyes" ; then + JE_CFLAGS_APPEND([-hipa2]) + JE_CFLAGS_APPEND([-hnognu]) + fi + if test "x$enable_cc_silence" != "xno" ; then + dnl ignore unreachable code warning + JE_CFLAGS_APPEND([-hnomessage=128]) + dnl ignore redefinition of "malloc", "free", etc warning + JE_CFLAGS_APPEND([-hnomessage=1357]) + fi + fi fi -dnl Append EXTRA_CFLAGS to CFLAGS, if defined. -if test "x$EXTRA_CFLAGS" != "x" ; then - JE_CFLAGS_APPEND([$EXTRA_CFLAGS]) -fi +AC_SUBST([EXTRA_CFLAGS]) AC_PROG_CPP AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0]) @@ -164,13 +220,18 @@ if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99" fi -AC_CHECK_SIZEOF([void *]) -if test "x${ac_cv_sizeof_void_p}" = "x8" ; then - LG_SIZEOF_PTR=3 -elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then - LG_SIZEOF_PTR=2 +if test "x${je_cv_msvc}" = "xyes" ; then + LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN + AC_MSG_RESULT([Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit]) else - AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) + AC_CHECK_SIZEOF([void *]) + if test "x${ac_cv_sizeof_void_p}" = "x8" ; then + LG_SIZEOF_PTR=3 + elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then + LG_SIZEOF_PTR=2 + else + AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) + fi fi AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR]) @@ -194,6 +255,16 @@ else fi AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG]) +AC_CHECK_SIZEOF([long long]) +if test "x${ac_cv_sizeof_long_long}" = "x8" ; then + LG_SIZEOF_LONG_LONG=3 +elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then + LG_SIZEOF_LONG_LONG=2 +else + AC_MSG_ERROR([Unsupported long long size: ${ac_cv_sizeof_long_long}]) +fi +AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG]) + AC_CHECK_SIZEOF([intmax_t]) if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then LG_SIZEOF_INTMAX_T=4 @@ -211,12 +282,22 @@ dnl CPU-specific settings. CPU_SPINWAIT="" case "${host_cpu}" in i686|x86_64) - AC_CACHE_VAL([je_cv_pause], - [JE_COMPILABLE([pause instruction], [], - [[__asm__ volatile("pause"); return 0;]], - [je_cv_pause])]) - if test "x${je_cv_pause}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' + if test "x${je_cv_msvc}" = "xyes" ; then + AC_CACHE_VAL([je_cv_pause_msvc], + [JE_COMPILABLE([pause instruction MSVC], [], + [[_mm_pause(); return 0;]], + [je_cv_pause_msvc])]) + if test "x${je_cv_pause_msvc}" = "xyes" ; then + CPU_SPINWAIT='_mm_pause()' + fi + else + AC_CACHE_VAL([je_cv_pause], + [JE_COMPILABLE([pause instruction], [], + [[__asm__ volatile("pause"); return 0;]], + [je_cv_pause])]) + if test "x${je_cv_pause}" = "xyes" ; then + CPU_SPINWAIT='__asm__ volatile("pause")' + fi fi ;; powerpc) @@ -234,17 +315,27 @@ o="$ac_objext" a="a" exe="$ac_exeext" libprefix="lib" +link_whole_archive="0" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' CTARGET='-o $@' LDTARGET='-o $@' +TEST_LD_MODE= EXTRA_LDFLAGS= ARFLAGS='crus' AROUT=' $@' CC_MM=1 +if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then + TEST_LD_MODE='-dynamic' +fi + +if test "x${je_cv_cray}" = "xyes" ; then + CC_MM= +fi + AN_MAKEVAR([AR], [AC_PROG_AR]) AN_PROGRAM([ar], [AC_PROG_AR]) AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)]) @@ -257,13 +348,12 @@ dnl dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the dnl definitions need to be seen before any headers are included, which is a pain dnl to make happen otherwise. +CFLAGS="$CFLAGS" default_munmap="1" maps_coalesce="1" case "${host}" in *-*-darwin* | *-*-ios*) - CFLAGS="$CFLAGS" abi="macho" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" @@ -274,33 +364,37 @@ case "${host}" in sbrk_deprecated="1" ;; *-*-freebsd*) - CFLAGS="$CFLAGS" abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) + AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ]) force_lazy_lock="1" ;; *-*-dragonfly*) - CFLAGS="$CFLAGS" abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; *-*-openbsd*) - CFLAGS="$CFLAGS" abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) force_tls="0" ;; *-*-bitrig*) - CFLAGS="$CFLAGS" abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; - *-*-linux*) - CFLAGS="$CFLAGS" + *-*-linux-android) + dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE. CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) - AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) + AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ]) + AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) + AC_DEFINE([JEMALLOC_C11ATOMICS]) + force_tls="0" + default_munmap="0" + ;; + *-*-linux* | *-*-kfreebsd*) + dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE. + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" + abi="elf" + AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) + AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ]) default_munmap="0" @@ -314,15 +408,12 @@ case "${host}" in #error aout #endif ]])], - [CFLAGS="$CFLAGS"; abi="elf"], + [abi="elf"], [abi="aout"]) AC_MSG_RESULT([$abi]) - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; *-*-solaris2*) - CFLAGS="$CFLAGS" abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH='-Wl,-R,$(1)' dnl Solaris needs this for sigwait(). CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" @@ -341,7 +432,6 @@ case "${host}" in *-*-mingw* | *-*-cygwin*) abi="pecoff" force_tls="0" - force_lazy_lock="1" maps_coalesce="0" RPATH="" so="dll" @@ -358,6 +448,7 @@ case "${host}" in else importlib="${so}" DSO_LDFLAGS="-shared" + link_whole_archive="1" fi a="lib" libprefix="" @@ -395,17 +486,28 @@ AC_SUBST([o]) AC_SUBST([a]) AC_SUBST([exe]) AC_SUBST([libprefix]) +AC_SUBST([link_whole_archive]) AC_SUBST([DSO_LDFLAGS]) AC_SUBST([EXTRA_LDFLAGS]) AC_SUBST([SOREV]) AC_SUBST([PIC_CFLAGS]) AC_SUBST([CTARGET]) AC_SUBST([LDTARGET]) +AC_SUBST([TEST_LD_MODE]) AC_SUBST([MKLIB]) AC_SUBST([ARFLAGS]) AC_SUBST([AROUT]) AC_SUBST([CC_MM]) +dnl Determine whether libm must be linked to use e.g. log(3). +AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])]) +if test "x$ac_cv_search_log" != "xnone required" ; then + LM="$ac_cv_search_log" +else + LM= +fi +AC_SUBST(LM) + JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], [], @@ -419,6 +521,7 @@ fi dnl Check for tls_model attribute support (clang 3.0 still lacks support). SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) +JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([tls_model attribute], [], [static __thread int __attribute__((tls_model("initial-exec"), unused)) foo; @@ -434,6 +537,7 @@ fi dnl Check for alloc_size attribute support. SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) +JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([alloc_size attribute], [#include ], [void *foo(size_t size) __attribute__((alloc_size(1)));], [je_cv_alloc_size]) @@ -444,6 +548,7 @@ fi dnl Check for format(gnu_printf, ...) attribute support. SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) +JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include ], [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));], [je_cv_format_gnu_printf]) @@ -454,6 +559,7 @@ fi dnl Check for format(printf, ...) attribute support. SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) +JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([format(printf, ...) attribute], [#include ], [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));], [je_cv_format_printf]) @@ -575,6 +681,15 @@ AC_ARG_WITH([install_suffix], install_suffix="$INSTALL_SUFFIX" AC_SUBST([install_suffix]) +dnl Specify default malloc_conf. +AC_ARG_WITH([malloc_conf], + [AS_HELP_STRING([--with-malloc-conf=], [config.malloc_conf options string])], + [JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf"], + [JEMALLOC_CONFIG_MALLOC_CONF=""] +) +config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF" +AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"]) + dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of dnl jemalloc_protos_jet.h easy. je_="je_" @@ -839,9 +954,9 @@ fi AC_MSG_CHECKING([configured backtracing method]) AC_MSG_RESULT([$backtrace_method]) if test "x$enable_prof" = "x1" ; then - if test "x$abi" != "xpecoff"; then - dnl Heap profiling uses the log(3) function. - LIBS="$LIBS -lm" + dnl Heap profiling uses the log(3) function. + if test "x$LM" != "x" ; then + LIBS="$LIBS $LM" fi AC_DEFINE([JEMALLOC_PROF], [ ]) @@ -1010,11 +1125,28 @@ if test "x$enable_cache_oblivious" = "x1" ; then fi AC_SUBST([enable_cache_oblivious]) + + +JE_COMPILABLE([a program using __builtin_unreachable], [ +void foo (void) { + __builtin_unreachable(); +} +], [ + { + foo(); + } +], [je_cv_gcc_builtin_unreachable]) +if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable]) +else + AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort]) +fi + dnl ============================================================================ dnl Check for __builtin_ffsl(), then ffsl(3), and fail if neither are found. dnl One of those two functions should (theoretically) exist on all platforms dnl that jemalloc currently has a chance of functioning on without modification. -dnl We additionally assume ffs() or __builtin_ffs() are defined if +dnl We additionally assume ffs[ll]() or __builtin_ffs[ll]() are defined if dnl ffsl() or __builtin_ffsl() are defined, respectively. JE_COMPILABLE([a program using __builtin_ffsl], [ #include @@ -1027,6 +1159,7 @@ JE_COMPILABLE([a program using __builtin_ffsl], [ } ], [je_cv_gcc_builtin_ffsl]) if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll]) AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) else @@ -1041,6 +1174,7 @@ else } ], [je_cv_function_ffsl]) if test "x${je_cv_function_ffsl}" = "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll]) AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) else @@ -1100,7 +1234,7 @@ if test "x$LG_PAGE" = "xdetect"; then if (f == NULL) { return 1; } - fprintf(f, "%d\n", result); + fprintf(f, "%d", result); fclose(f); return 0; @@ -1133,27 +1267,36 @@ dnl ============================================================================ dnl jemalloc configuration. dnl -dnl Set VERSION if source directory is inside a git repository. -if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then - dnl Pattern globs aren't powerful enough to match both single- and - dnl double-digit version numbers, so iterate over patterns to support up to - dnl version 99.99.99 without any accidental matches. - rm -f "${objroot}VERSION" - for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \ - '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \ - '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \ - '[0-9][0-9].[0-9][0-9].[0-9]' \ - '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do - if test ! -e "${objroot}VERSION" ; then - (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null - if test $? -eq 0 ; then - mv "${objroot}VERSION.tmp" "${objroot}VERSION" - break - fi +AC_ARG_WITH([version], + [AS_HELP_STRING([--with-version=..--g], + [Version string])], + [ + echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null + if test $? -ne 0 ; then + AC_MSG_ERROR([${with_version} does not match ..--g]) fi - done -fi -rm -f "${objroot}VERSION.tmp" + echo "$with_version" > "${objroot}VERSION" + ], [ + dnl Set VERSION if source directory is inside a git repository. + if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then + dnl Pattern globs aren't powerful enough to match both single- and + dnl double-digit version numbers, so iterate over patterns to support up + dnl to version 99.99.99 without any accidental matches. + for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \ + '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \ + '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \ + '[0-9][0-9].[0-9][0-9].[0-9]' \ + '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do + (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null + if test $? -eq 0 ; then + mv "${objroot}VERSION.tmp" "${objroot}VERSION" + break + fi + done + fi + rm -f "${objroot}VERSION.tmp" + ]) + if test ! -e "${objroot}VERSION" ; then if test ! -e "${srcroot}VERSION" ; then AC_MSG_RESULT( @@ -1186,17 +1329,101 @@ if test "x$abi" != "xpecoff" ; then AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], [AC_SEARCH_LIBS([pthread_create], , , AC_MSG_ERROR([libpthread is missing]))]) + JE_COMPILABLE([pthread_atfork(3)], [ +#include +], [ + pthread_atfork((void *)0, (void *)0, (void *)0); +], [je_cv_pthread_atfork]) + if test "x${je_cv_pthread_atfork}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ]) + fi fi CPPFLAGS="$CPPFLAGS -D_REENTRANT" -dnl Check whether clock_gettime(2) is in libc or librt. This function is only -dnl used in test code, so save the result to TESTLIBS to avoid poluting LIBS. -SAVED_LIBS="${LIBS}" -LIBS= -AC_SEARCH_LIBS([clock_gettime], [rt], [TESTLIBS="${LIBS}"]) -AC_SUBST([TESTLIBS]) -LIBS="${SAVED_LIBS}" +dnl Check whether clock_gettime(2) is in libc or librt. +AC_SEARCH_LIBS([clock_gettime], [rt]) + +dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with +dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc +if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then + if test "$ac_cv_search_clock_gettime" != "-lrt"; then + SAVED_CFLAGS="${CFLAGS}" + + unset ac_cv_search_clock_gettime + JE_CFLAGS_APPEND([-dynamic]) + AC_SEARCH_LIBS([clock_gettime], [rt]) + + CFLAGS="${SAVED_CFLAGS}" + fi +fi + +dnl check for CLOCK_MONOTONIC_COARSE (Linux-specific). +JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [ +#include +], [ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); +], [je_cv_clock_monotonic_coarse]) +if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE]) +fi + +dnl check for CLOCK_MONOTONIC. +JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [ +#include +#include +], [ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); +#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0 +# error _POSIX_MONOTONIC_CLOCK missing/invalid +#endif +], [je_cv_clock_monotonic]) +if test "x${je_cv_clock_monotonic}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC]) +fi + +dnl Check for mach_absolute_time(). +JE_COMPILABLE([mach_absolute_time()], [ +#include +], [ + mach_absolute_time(); +], [je_cv_mach_absolute_time]) +if test "x${je_cv_mach_absolute_time}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME]) +fi + +dnl Use syscall(2) (if available) by default. +AC_ARG_ENABLE([syscall], + [AS_HELP_STRING([--disable-syscall], [Disable use of syscall(2)])], +[if test "x$enable_syscall" = "xno" ; then + enable_syscall="0" +else + enable_syscall="1" +fi +], +[enable_syscall="1"] +) +if test "x$enable_syscall" = "x1" ; then + dnl Check if syscall(2) is usable. Treat warnings as errors, so that e.g. OS + dnl X 10.12's deprecation warning prevents use. + SAVED_CFLAGS="${CFLAGS}" + JE_CFLAGS_APPEND([-Werror]) + JE_COMPILABLE([syscall(2)], [ +#include +#include +], [ + syscall(SYS_write, 2, "hello", 5); +], + [je_cv_syscall]) + CFLAGS="${SAVED_CFLAGS}" + if test "x$je_cv_syscall" = "xyes" ; then + AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ]) + fi +fi dnl Check if the GNU-specific secure_getenv function exists. AC_CHECK_FUNC([secure_getenv], @@ -1252,9 +1479,17 @@ fi ], [enable_lazy_lock=""] ) -if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then - AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues]) - enable_lazy_lock="1" +if test "x${enable_lazy_lock}" = "x" ; then + if test "x${force_lazy_lock}" = "x1" ; then + AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues]) + enable_lazy_lock="1" + else + enable_lazy_lock="0" + fi +fi +if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then + AC_MSG_RESULT([Forcing no lazy-lock because thread creation monitoring is unimplemented]) + enable_lazy_lock="0" fi if test "x$enable_lazy_lock" = "x1" ; then if test "x$abi" != "xpecoff" ; then @@ -1265,8 +1500,6 @@ if test "x$enable_lazy_lock" = "x1" ; then ]) fi AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) -else - enable_lazy_lock="0" fi AC_SUBST([enable_lazy_lock]) @@ -1389,12 +1622,41 @@ dnl Check for madvise(2). JE_COMPILABLE([madvise(2)], [ #include ], [ - { - madvise((void *)0, 0, 0); - } + madvise((void *)0, 0, 0); ], [je_cv_madvise]) if test "x${je_cv_madvise}" = "xyes" ; then AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ]) + + dnl Check for madvise(..., MADV_FREE). + JE_COMPILABLE([madvise(..., MADV_FREE)], [ +#include +], [ + madvise((void *)0, 0, MADV_FREE); +], [je_cv_madv_free]) + if test "x${je_cv_madv_free}" = "xyes" ; then + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) + fi + + dnl Check for madvise(..., MADV_DONTNEED). + JE_COMPILABLE([madvise(..., MADV_DONTNEED)], [ +#include +], [ + madvise((void *)0, 0, MADV_DONTNEED); +], [je_cv_madv_dontneed]) + if test "x${je_cv_madv_dontneed}" = "xyes" ; then + AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) + fi + + dnl Check for madvise(..., MADV_[NO]HUGEPAGE). + JE_COMPILABLE([madvise(..., MADV_[[NO]]HUGEPAGE)], [ +#include +], [ + madvise((void *)0, 0, MADV_HUGEPAGE); + madvise((void *)0, 0, MADV_NOHUGEPAGE); +], [je_cv_thp]) + if test "x${je_cv_thp}" = "xyes" ; then + AC_DEFINE([JEMALLOC_THP], [ ]) + fi fi dnl ============================================================================ @@ -1454,6 +1716,25 @@ if test "x${je_cv_builtin_clz}" = "xyes" ; then AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ]) fi +dnl ============================================================================ +dnl Check for os_unfair_lock operations as provided on Darwin. + +JE_COMPILABLE([Darwin os_unfair_lock_*()], [ +#include +#include +], [ + #if MAC_OS_X_VERSION_MIN_REQUIRED < 101200 + #error "os_unfair_lock is not supported" + #else + os_unfair_lock lock = OS_UNFAIR_LOCK_INIT; + os_unfair_lock_lock(&lock); + os_unfair_lock_unlock(&lock); + #endif +], [je_cv_os_unfair_lock]) +if test "x${je_cv_os_unfair_lock}" = "xyes" ; then + AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ]) +fi + dnl ============================================================================ dnl Check for spinlock(3) operations as provided on Darwin. @@ -1698,11 +1979,11 @@ AC_MSG_RESULT([]) AC_MSG_RESULT([CONFIG : ${CONFIG}]) AC_MSG_RESULT([CC : ${CC}]) AC_MSG_RESULT([CFLAGS : ${CFLAGS}]) +AC_MSG_RESULT([EXTRA_CFLAGS : ${EXTRA_CFLAGS}]) AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) AC_MSG_RESULT([LDFLAGS : ${LDFLAGS}]) AC_MSG_RESULT([EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}]) AC_MSG_RESULT([LIBS : ${LIBS}]) -AC_MSG_RESULT([TESTLIBS : ${TESTLIBS}]) AC_MSG_RESULT([RPATH_EXTRA : ${RPATH_EXTRA}]) AC_MSG_RESULT([]) AC_MSG_RESULT([XSLTPROC : ${XSLTPROC}]) @@ -1724,6 +2005,7 @@ AC_MSG_RESULT([JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}]) AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE]) AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}]) AC_MSG_RESULT([install_suffix : ${install_suffix}]) +AC_MSG_RESULT([malloc_conf : ${config_malloc_conf}]) AC_MSG_RESULT([autogen : ${enable_autogen}]) AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) AC_MSG_RESULT([debug : ${enable_debug}]) diff --git a/deps/jemalloc/doc/html.xsl.in b/deps/jemalloc/doc/html.xsl.in index a91d9746f..ec4fa6552 100644 --- a/deps/jemalloc/doc/html.xsl.in +++ b/deps/jemalloc/doc/html.xsl.in @@ -1,4 +1,5 @@ + diff --git a/deps/jemalloc/doc/jemalloc.3 b/deps/jemalloc/doc/jemalloc.3 index 2e6b2c0e8..3709f6692 100644 --- a/deps/jemalloc/doc/jemalloc.3 +++ b/deps/jemalloc/doc/jemalloc.3 @@ -1,13 +1,13 @@ '\" t .\" Title: JEMALLOC .\" Author: Jason Evans -.\" Generator: DocBook XSL Stylesheets v1.78.1 -.\" Date: 09/24/2015 +.\" Generator: DocBook XSL Stylesheets v1.79.1 +.\" Date: 12/03/2016 .\" Manual: User Manual -.\" Source: jemalloc 4.0.3-0-ge9192eacf8935e29fc62fddc2701f7942b1cc02c +.\" Source: jemalloc 4.4.0-0-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc .\" Language: English .\" -.TH "JEMALLOC" "3" "09/24/2015" "jemalloc 4.0.3-0-ge9192eacf893" "User Manual" +.TH "JEMALLOC" "3" "12/03/2016" "jemalloc 4.4.0-0-gf1f76357313e" "User Manual" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- @@ -31,7 +31,7 @@ jemalloc \- general purpose memory allocation functions .SH "LIBRARY" .PP -This manual describes jemalloc 4\&.0\&.3\-0\-ge9192eacf8935e29fc62fddc2701f7942b1cc02c\&. More information can be found at the +This manual describes jemalloc 4\&.4\&.0\-0\-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc\&. More information can be found at the \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&. .SH "SYNOPSIS" .sp @@ -86,26 +86,26 @@ const char *\fImalloc_conf\fR; .SS "Standard API" .PP The -\fBmalloc\fR\fB\fR +malloc() function allocates \fIsize\fR bytes of uninitialized memory\&. The allocated space is suitably aligned (after possible pointer coercion) for storage of any type of object\&. .PP The -\fBcalloc\fR\fB\fR +calloc() function allocates space for \fInumber\fR objects, each \fIsize\fR bytes in length\&. The result is identical to calling -\fBmalloc\fR\fB\fR +malloc() with an argument of \fInumber\fR * \fIsize\fR, with the exception that the allocated memory is explicitly initialized to zero bytes\&. .PP The -\fBposix_memalign\fR\fB\fR +posix_memalign() function allocates \fIsize\fR bytes of memory such that the allocation\*(Aqs base address is a multiple of @@ -116,7 +116,7 @@ must be a power of 2 at least as large as sizeof(\fBvoid *\fR)\&. .PP The -\fBaligned_alloc\fR\fB\fR +aligned_alloc() function allocates \fIsize\fR bytes of memory such that the allocation\*(Aqs base address is a multiple of @@ -128,7 +128,7 @@ is not an integral multiple of \fIalignment\fR\&. .PP The -\fBrealloc\fR\fB\fR +realloc() function changes the size of the previously allocated memory referenced by \fIptr\fR to @@ -136,19 +136,19 @@ to bytes\&. The contents of the memory are unchanged up to the lesser of the new and old sizes\&. If the new size is larger, the contents of the newly allocated portion of the memory are undefined\&. Upon success, the memory referenced by \fIptr\fR is freed and a pointer to the newly allocated memory is returned\&. Note that -\fBrealloc\fR\fB\fR +realloc() may move the memory allocation, resulting in a different return value than \fIptr\fR\&. If \fIptr\fR is \fBNULL\fR, the -\fBrealloc\fR\fB\fR +realloc() function behaves identically to -\fBmalloc\fR\fB\fR +malloc() for the specified size\&. .PP The -\fBfree\fR\fB\fR +free() function causes the allocated memory referenced by \fIptr\fR to be made available for future allocations\&. If @@ -158,13 +158,13 @@ is .SS "Non\-standard API" .PP The -\fBmallocx\fR\fB\fR, -\fBrallocx\fR\fB\fR, -\fBxallocx\fR\fB\fR, -\fBsallocx\fR\fB\fR, -\fBdallocx\fR\fB\fR, -\fBsdallocx\fR\fB\fR, and -\fBnallocx\fR\fB\fR +mallocx(), +rallocx(), +xallocx(), +sallocx(), +dallocx(), +sdallocx(), and +nallocx() functions all have a \fIflags\fR argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following: @@ -196,7 +196,7 @@ Initialize newly allocated memory to contain zero bytes\&. In the growing reallo .RS 4 Use the thread\-specific cache (tcache) specified by the identifier \fItc\fR, which must have been acquired via the -"tcache\&.create" +tcache\&.create mallctl\&. This macro does not validate that \fItc\fR specifies a valid identifier\&. @@ -223,16 +223,16 @@ specifies an arena index in the valid range\&. .RE .PP The -\fBmallocx\fR\fB\fR +mallocx() function allocates at least \fIsize\fR bytes of memory, and returns a pointer to the base address of the allocation\&. Behavior is undefined if \fIsize\fR is -\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. +\fB0\fR\&. .PP The -\fBrallocx\fR\fB\fR +rallocx() function resizes the allocation at \fIptr\fR to be at least @@ -240,10 +240,10 @@ to be at least bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location\&. Behavior is undefined if \fIsize\fR is -\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. +\fB0\fR\&. .PP The -\fBxallocx\fR\fB\fR +xallocx() function resizes the allocation at \fIptr\fR in place to be at least @@ -259,40 +259,42 @@ is (\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&. .PP The -\fBsallocx\fR\fB\fR +sallocx() function returns the real size of the allocation at \fIptr\fR\&. .PP The -\fBdallocx\fR\fB\fR +dallocx() function causes the memory referenced by \fIptr\fR to be made available for future allocations\&. .PP The -\fBsdallocx\fR\fB\fR +sdallocx() function is an extension of -\fBdallocx\fR\fB\fR +dallocx() with a \fIsize\fR parameter to allow the caller to pass in the allocation size as an optimization\&. The minimum valid input size is the original requested size of the allocation, and the maximum valid input size is the corresponding value returned by -\fBnallocx\fR\fB\fR +nallocx() or -\fBsallocx\fR\fB\fR\&. +sallocx()\&. .PP The -\fBnallocx\fR\fB\fR +nallocx() function allocates no memory, but it performs the same size computation as the -\fBmallocx\fR\fB\fR +mallocx() function, and returns the real size of the allocation that would result from the equivalent -\fBmallocx\fR\fB\fR -function call\&. Behavior is undefined if +mallocx() +function call, or +\fB0\fR +if the inputs exceed the maximum supported size class and/or alignment\&. Behavior is undefined if \fIsize\fR is -\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. +\fB0\fR\&. .PP The -\fBmallctl\fR\fB\fR +mallctl() function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions\&. The period\-separated \fIname\fR argument specifies a location in a tree\-structured namespace; see the @@ -311,10 +313,12 @@ and \fB0\fR\&. .PP The -\fBmallctlnametomib\fR\fB\fR -function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name to a \(lqManagement Information Base\(rq (MIB) that can be passed repeatedly to -\fBmallctlbymib\fR\fB\fR\&. Upon successful return from -\fBmallctlnametomib\fR\fB\fR, +mallctlnametomib() +function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name to a +\(lqManagement Information Base\(rq +(MIB) that can be passed repeatedly to +mallctlbymib()\&. Upon successful return from +mallctlnametomib(), \fImibp\fR contains an array of \fI*miblenp\fR @@ -326,7 +330,7 @@ and the input value of \fI*miblenp\fR\&. Thus it is possible to pass a \fI*miblenp\fR that is smaller than the number of period\-separated name components, which results in a partial MIB that can be used as the basis for constructing a complete MIB\&. For name components that are integers (e\&.g\&. the 2 in -"arenas\&.bin\&.2\&.size"), the corresponding MIB component will always be that integer\&. Therefore, it is legitimate to construct code like the following: +arenas\&.bin\&.2\&.size), the corresponding MIB component will always be that integer\&. Therefore, it is legitimate to construct code like the following: .sp .if n \{\ .RS 4 @@ -346,7 +350,7 @@ for (i = 0; i < nbins; i++) { mib[2] = i; len = sizeof(bin_size); - mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0); + mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0); /* Do something with bin_size\&.\&.\&. */ } .fi @@ -355,67 +359,87 @@ for (i = 0; i < nbins; i++) { .\} .PP The -\fBmalloc_stats_print\fR\fB\fR -function writes human\-readable summary statistics via the +malloc_stats_print() +function writes summary statistics via the \fIwrite_cb\fR callback function pointer and \fIcbopaque\fR data passed to \fIwrite_cb\fR, or -\fBmalloc_message\fR\fB\fR +malloc_message() if \fIwrite_cb\fR is -\fBNULL\fR\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying "g" as a character within the +\fBNULL\fR\&. The statistics are presented in human\-readable form unless +\(lqJ\(rq +is specified as a character within the +\fIopts\fR +string, in which case the statistics are presented in +\m[blue]\fBJSON format\fR\m[]\&\s-2\u[2]\d\s+2\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying +\(lqg\(rq +as a character within the \fIopts\fR string\&. Note that -\fBmalloc_message\fR\fB\fR +malloc_message() uses the -\fBmallctl*\fR\fB\fR +mallctl*() functions internally, so inconsistent statistics can be reported if multiple threads use these functions simultaneously\&. If \fB\-\-enable\-stats\fR -is specified during configuration, \(lqm\(rq and \(lqa\(rq can be specified to omit merged arena and per arena statistics, respectively; \(lqb\(rq, \(lql\(rq, and \(lqh\(rq can be specified to omit per size class statistics for bins, large objects, and huge objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&. +is specified during configuration, +\(lqm\(rq +and +\(lqa\(rq +can be specified to omit merged arena and per arena statistics, respectively; +\(lqb\(rq, +\(lql\(rq, and +\(lqh\(rq +can be specified to omit per size class statistics for bins, large objects, and huge objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&. .PP The -\fBmalloc_usable_size\fR\fB\fR +malloc_usable_size() function returns the usable size of the allocation pointed to by \fIptr\fR\&. The return value may be larger than the size that was requested during allocation\&. The -\fBmalloc_usable_size\fR\fB\fR +malloc_usable_size() function is not a mechanism for in\-place -\fBrealloc\fR\fB\fR; rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by -\fBmalloc_usable_size\fR\fB\fR +realloc(); rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by +malloc_usable_size() should not be depended on, since such behavior is entirely implementation\-dependent\&. .SH "TUNING" .PP Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various options that can be specified at compile\- or run\-time\&. .PP -The string pointed to by the global variable -\fImalloc_conf\fR, the \(lqname\(rq of the file referenced by the symbolic link named +The string specified via +\fB\-\-with\-malloc\-conf\fR, the string pointed to by the global variable +\fImalloc_conf\fR, the +\(lqname\(rq +of the file referenced by the symbolic link named /etc/malloc\&.conf, and the value of the environment variable \fBMALLOC_CONF\fR, will be interpreted, in that order, from left to right as options\&. Note that \fImalloc_conf\fR may be read before -\fBmain\fR\fB\fR +main() is entered, so the declaration of \fImalloc_conf\fR should specify an initializer that contains the final value to be read by jemalloc\&. +\fB\-\-with\-malloc\-conf\fR +and \fImalloc_conf\fR -is a compile\-time setting, whereas +are compile\-time mechanisms, whereas /etc/malloc\&.conf and \fBMALLOC_CONF\fR can be safely set any time prior to program invocation\&. .PP An options string is a comma\-separated list of option:value pairs\&. There is one key corresponding to each -"opt\&.*" +opt\&.* mallctl (see the MALLCTL NAMESPACE section for options documentation)\&. For example, abort:true,narenas:1 sets the -"opt\&.abort" +opt\&.abort and -"opt\&.narenas" +opt\&.narenas options\&. Some options have boolean values (true/false), others have integer values (base 8, 10, or 16, depending on prefix), and yet others have raw string values\&. .SH "IMPLEMENTATION NOTES" .PP @@ -436,29 +460,26 @@ In addition to multiple arenas, unless \fB\-\-disable\-tcache\fR is specified during configuration, this allocator supports thread\-specific caching for small and large objects, in order to make it possible to completely avoid synchronization for most allocation requests\&. Such caching allows very fast allocation in the common case, but it increases memory usage and fragmentation, since a bounded number of objects can remain allocated in each thread cache\&. .PP -Memory is conceptually broken into equal\-sized chunks, where the chunk size is a power of two that is greater than the page size\&. Chunks are always aligned to multiples of the chunk size\&. This alignment makes it possible to find metadata for user objects very quickly\&. -.PP -User objects are broken into three categories according to size: small, large, and huge\&. Small and large objects are managed entirely by arenas; huge objects are additionally aggregated in a single data structure that is shared by all threads\&. Huge objects are typically used by applications infrequently enough that this single data structure is not a scalability issue\&. -.PP -Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&. +Memory is conceptually broken into equal\-sized chunks, where the chunk size is a power of two that is greater than the page size\&. Chunks are always aligned to multiples of the chunk size\&. This alignment makes it possible to find metadata for user objects very quickly\&. User objects are broken into three categories according to size: small, large, and huge\&. Multiple small and large objects can reside within a single chunk, whereas huge objects each have one or more chunks backing them\&. Each chunk that contains small and/or large objects tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&. .PP Small objects are managed in groups by page runs\&. Each run maintains a bitmap to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least sizeof(\fBdouble\fR)\&. All other object size classes are multiples of the quantum, spaced such that there are four size classes for each doubling in size, which limits internal fragmentation to approximately 20% for all but the smallest size classes\&. Small size classes are smaller than four times the page size, large size classes are smaller than the chunk size (see the -"opt\&.lg_chunk" -option), and huge size classes extend from the chunk size up to one size class less than the full address space size\&. +opt\&.lg_chunk +option), and huge size classes extend from the chunk size up to the largest size class that does not exceed +\fBPTRDIFF_MAX\fR\&. .PP Allocations are packed tightly together, which can be an issue for multi\-threaded applications\&. If you need to assure that allocations do not suffer from cacheline sharing, round your allocation requests up to the nearest multiple of the cacheline size, or specify cacheline alignment when allocating\&. .PP The -\fBrealloc\fR\fB\fR, -\fBrallocx\fR\fB\fR, and -\fBxallocx\fR\fB\fR +realloc(), +rallocx(), and +xallocx() functions may resize allocations without moving them under limited circumstances\&. Unlike the -\fB*allocx\fR\fB\fR +*allocx() API, the standard API does not officially round up the usable size of an allocation to the nearest size class, so technically it is necessary to call -\fBrealloc\fR\fB\fR +realloc() to grow e\&.g\&. a 9\-byte allocation to 16 bytes, or shrink a 16\-byte allocation to 9 bytes\&. Growth and shrinkage trivially succeeds in place as long as the pre\-size and post\-size both round up to the same size class\&. No other API guarantees are made regarding in\-place resizing, but the current implementation also tries to resize large and huge allocations in place, as long as the pre\-size and post\-size are both large or both huge\&. In such cases shrinkage always succeeds for large size classes, but for huge size classes the chunk allocator must support splitting (see -"arena\&.\&.chunk_hooks")\&. Growth only succeeds if the trailing memory is currently available, and additionally for huge size classes the chunk allocator must support merging\&. +arena\&.\&.chunk_hooks)\&. Growth only succeeds if the trailing memory is currently available, and additionally for huge size classes the chunk allocator must support merging\&. .PP Assuming 2 MiB chunks, 4 KiB pages, and a 16\-byte quantum on a 64\-bit system, the size classes in each category are as shown in Table 1\&. @@ -502,6 +523,8 @@ l r l ^ r l ^ r l ^ r l +^ r l +^ r l ^ r l. T{ Small @@ -629,12 +652,22 @@ T} T}:T{ \&.\&.\&. T} +:T{ +512 PiB +T}:T{ +[2560 PiB, 3 EiB, 3584 PiB, 4 EiB] +T} +:T{ +1 EiB +T}:T{ +[5 EiB, 6 EiB, 7 EiB] +T} .TE .sp 1 .SH "MALLCTL NAMESPACE" .PP The following names are defined in the namespace accessible via the -\fBmallctl*\fR\fB\fR +mallctl*() functions\&. Value types are specified in parentheses, their readable/writable statuses are encoded as rw, r\-, @@ -644,111 +677,118 @@ r\-, or indicates an integer component, where the integer varies from 0 to some upper value that must be determined via introspection\&. In the case of -"stats\&.arenas\&.\&.*", +stats\&.arenas\&.\&.*, equal to -"arenas\&.narenas" +arenas\&.narenas can be used to access the summation of statistics from all arenas\&. Take special note of the -"epoch" +epoch mallctl, which controls refreshing of cached dynamic statistics\&. .PP -"version" (\fBconst char *\fR) r\- +version (\fBconst char *\fR) r\- .RS 4 Return the jemalloc version string\&. .RE .PP -"epoch" (\fBuint64_t\fR) rw +epoch (\fBuint64_t\fR) rw .RS 4 If a value is passed in, refresh the data from which the -\fBmallctl*\fR\fB\fR +mallctl*() functions report values, and increment the epoch\&. Return the current epoch\&. This is useful for detecting whether another thread caused a refresh\&. .RE .PP -"config\&.cache_oblivious" (\fBbool\fR) r\- +config\&.cache_oblivious (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-cache\-oblivious\fR was specified during build configuration\&. .RE .PP -"config\&.debug" (\fBbool\fR) r\- +config\&.debug (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-debug\fR was specified during build configuration\&. .RE .PP -"config\&.fill" (\fBbool\fR) r\- +config\&.fill (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-fill\fR was specified during build configuration\&. .RE .PP -"config\&.lazy_lock" (\fBbool\fR) r\- +config\&.lazy_lock (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-lazy\-lock\fR was specified during build configuration\&. .RE .PP -"config\&.munmap" (\fBbool\fR) r\- +config\&.malloc_conf (\fBconst char *\fR) r\- +.RS 4 +Embedded configure\-time\-specified run\-time options string, empty unless +\fB\-\-with\-malloc\-conf\fR +was specified during build configuration\&. +.RE +.PP +config\&.munmap (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-munmap\fR was specified during build configuration\&. .RE .PP -"config\&.prof" (\fBbool\fR) r\- +config\&.prof (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-prof\fR was specified during build configuration\&. .RE .PP -"config\&.prof_libgcc" (\fBbool\fR) r\- +config\&.prof_libgcc (\fBbool\fR) r\- .RS 4 \fB\-\-disable\-prof\-libgcc\fR was not specified during build configuration\&. .RE .PP -"config\&.prof_libunwind" (\fBbool\fR) r\- +config\&.prof_libunwind (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-prof\-libunwind\fR was specified during build configuration\&. .RE .PP -"config\&.stats" (\fBbool\fR) r\- +config\&.stats (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-stats\fR was specified during build configuration\&. .RE .PP -"config\&.tcache" (\fBbool\fR) r\- +config\&.tcache (\fBbool\fR) r\- .RS 4 \fB\-\-disable\-tcache\fR was not specified during build configuration\&. .RE .PP -"config\&.tls" (\fBbool\fR) r\- +config\&.tls (\fBbool\fR) r\- .RS 4 \fB\-\-disable\-tls\fR was not specified during build configuration\&. .RE .PP -"config\&.utrace" (\fBbool\fR) r\- +config\&.utrace (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-utrace\fR was specified during build configuration\&. .RE .PP -"config\&.valgrind" (\fBbool\fR) r\- +config\&.valgrind (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-valgrind\fR was specified during build configuration\&. .RE .PP -"config\&.xmalloc" (\fBbool\fR) r\- +config\&.xmalloc (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-xmalloc\fR was specified during build configuration\&. .RE .PP -"opt\&.abort" (\fBbool\fR) r\- +opt\&.abort (\fBbool\fR) r\- .RS 4 Abort\-on\-warning enabled/disabled\&. If true, most warnings are fatal\&. The process will call \fBabort\fR(3) @@ -757,97 +797,132 @@ in these cases\&. This option is disabled by default unless is specified during configuration, in which case it is enabled by default\&. .RE .PP -"opt\&.dss" (\fBconst char *\fR) r\- +opt\&.dss (\fBconst char *\fR) r\- .RS 4 dss (\fBsbrk\fR(2)) allocation precedence as related to \fBmmap\fR(2) allocation\&. The following settings are supported if \fBsbrk\fR(2) -is supported by the operating system: \(lqdisabled\(rq, \(lqprimary\(rq, and \(lqsecondary\(rq; otherwise only \(lqdisabled\(rq is supported\&. The default is \(lqsecondary\(rq if +is supported by the operating system: +\(lqdisabled\(rq, +\(lqprimary\(rq, and +\(lqsecondary\(rq; otherwise only +\(lqdisabled\(rq +is supported\&. The default is +\(lqsecondary\(rq +if \fBsbrk\fR(2) -is supported by the operating system; \(lqdisabled\(rq otherwise\&. +is supported by the operating system; +\(lqdisabled\(rq +otherwise\&. .RE .PP -"opt\&.lg_chunk" (\fBsize_t\fR) r\- +opt\&.lg_chunk (\fBsize_t\fR) r\- .RS 4 Virtual memory chunk size (log base 2)\&. If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size\&. The default chunk size is 2 MiB (2^21)\&. .RE .PP -"opt\&.narenas" (\fBsize_t\fR) r\- +opt\&.narenas (\fBunsigned\fR) r\- .RS 4 Maximum number of arenas to use for automatic multiplexing of threads and arenas\&. The default is four times the number of CPUs, or one if there is a single CPU\&. .RE .PP -"opt\&.lg_dirty_mult" (\fBssize_t\fR) r\- +opt\&.purge (\fBconst char *\fR) r\- +.RS 4 +Purge mode is \(lqratio\(rq (default) or \(lqdecay\(rq\&. See +opt\&.lg_dirty_mult +for details of the ratio mode\&. See +opt\&.decay_time +for details of the decay mode\&. +.RE +.PP +opt\&.lg_dirty_mult (\fBssize_t\fR) r\- .RS 4 Per\-arena minimum ratio (log base 2) of active to dirty pages\&. Some dirty unused pages may be allowed to accumulate, within the limit set by the ratio (or one chunk worth of dirty pages, whichever is greater), before informing the kernel about some of those pages via \fBmadvise\fR(2) or a similar system call\&. This provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused\&. The default minimum ratio is 8:1 (2^3:1); an option value of \-1 will disable dirty page purging\&. See -"arenas\&.lg_dirty_mult" +arenas\&.lg_dirty_mult and -"arena\&.\&.lg_dirty_mult" +arena\&.\&.lg_dirty_mult for related dynamic control options\&. .RE .PP -"opt\&.stats_print" (\fBbool\fR) r\- +opt\&.decay_time (\fBssize_t\fR) r\- +.RS 4 +Approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. The pages are incrementally purged according to a sigmoidal decay curve that starts and ends with zero purge rate\&. A decay time of 0 causes all unused dirty pages to be purged immediately upon creation\&. A decay time of \-1 disables purging\&. The default decay time is 10 seconds\&. See +arenas\&.decay_time +and +arena\&.\&.decay_time +for related dynamic control options\&. +.RE +.PP +opt\&.stats_print (\fBbool\fR) r\- .RS 4 Enable/disable statistics printing at exit\&. If enabled, the -\fBmalloc_stats_print\fR\fB\fR +malloc_stats_print() function is called at program exit via an \fBatexit\fR(3) function\&. If \fB\-\-enable\-stats\fR is specified during configuration, this has the potential to cause deadlock for a multi\-threaded process that exits while one or more threads are executing in the memory allocation functions\&. Furthermore, -\fBatexit\fR\fB\fR +atexit() may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls -\fBatexit\fR\fB\fR, so this option is not univerally usable (though the application can register its own -\fBatexit\fR\fB\fR +atexit(), so this option is not universally usable (though the application can register its own +atexit() function with equivalent functionality)\&. Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development\&. This option is disabled by default\&. .RE .PP -"opt\&.junk" (\fBconst char *\fR) r\- [\fB\-\-enable\-fill\fR] +opt\&.junk (\fBconst char *\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 -Junk filling\&. If set to "alloc", each byte of uninitialized allocated memory will be initialized to -0xa5\&. If set to "free", all deallocated memory will be initialized to -0x5a\&. If set to "true", both allocated and deallocated memory will be initialized, and if set to "false", junk filling be disabled entirely\&. This is intended for debugging and will impact performance negatively\&. This option is "false" by default unless +Junk filling\&. If set to +\(lqalloc\(rq, each byte of uninitialized allocated memory will be initialized to +0xa5\&. If set to +\(lqfree\(rq, all deallocated memory will be initialized to +0x5a\&. If set to +\(lqtrue\(rq, both allocated and deallocated memory will be initialized, and if set to +\(lqfalse\(rq, junk filling be disabled entirely\&. This is intended for debugging and will impact performance negatively\&. This option is +\(lqfalse\(rq +by default unless \fB\-\-enable\-debug\fR -is specified during configuration, in which case it is "true" by default unless running inside -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2\&. +is specified during configuration, in which case it is +\(lqtrue\(rq +by default unless running inside +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2\&. .RE .PP -"opt\&.quarantine" (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR] +opt\&.quarantine (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 Per thread quarantine size in bytes\&. If non\-zero, each thread maintains a FIFO object quarantine that stores up to the specified number of bytes of memory\&. The quarantined memory is not freed until it is released from quarantine, though it is immediately junk\-filled if the -"opt\&.junk" +opt\&.junk option is enabled\&. This feature is of particular use in combination with -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0 unless running inside Valgrind, in which case the default is 16 MiB\&. +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0 unless running inside Valgrind, in which case the default is 16 MiB\&. .RE .PP -"opt\&.redzone" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] +opt\&.redzone (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 Redzones enabled/disabled\&. If enabled, small allocations have redzones before and after them\&. Furthermore, if the -"opt\&.junk" +opt\&.junk option is enabled, the redzones are checked for corruption during deallocation\&. However, the primary intended purpose of this feature is to be used in combination with -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default unless running inside Valgrind\&. +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default unless running inside Valgrind\&. .RE .PP -"opt\&.zero" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] +opt\&.zero (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 Zero filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to 0\&. Note that this initialization only happens once for each byte, so -\fBrealloc\fR\fB\fR +realloc() and -\fBrallocx\fR\fB\fR +rallocx() calls do not zero memory that was previously allocated\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default\&. .RE .PP -"opt\&.utrace" (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR] +opt\&.utrace (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR] .RS 4 Allocation tracing based on \fButrace\fR(2) enabled/disabled\&. This option is disabled by default\&. .RE .PP -"opt\&.xmalloc" (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR] +opt\&.xmalloc (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR] .RS 4 Abort\-on\-out\-of\-memory enabled/disabled\&. If enabled, rather than returning failure for any allocation function, display a diagnostic message on \fBSTDERR_FILENO\fR @@ -867,92 +942,94 @@ malloc_conf = "xmalloc:true"; This option is disabled by default\&. .RE .PP -"opt\&.tcache" (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR] +opt\&.tcache (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Thread\-specific caching (tcache) enabled/disabled\&. When there are multiple threads, each thread uses a tcache for objects up to a certain size\&. Thread\-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use\&. See the -"opt\&.lg_tcache_max" +opt\&.lg_tcache_max option for related tuning information\&. This option is enabled by default unless running inside -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, in which case it is forcefully disabled\&. +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, in which case it is forcefully disabled\&. .RE .PP -"opt\&.lg_tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] +opt\&.lg_tcache_max (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Maximum size class (log base 2) to cache in the thread\-specific cache (tcache)\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&. .RE .PP -"opt\&.prof" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity\&. See the -"opt\&.prof_active" +opt\&.prof_active option for on\-the\-fly activation/deactivation\&. See the -"opt\&.lg_prof_sample" +opt\&.lg_prof_sample option for probabilistic sampling control\&. See the -"opt\&.prof_accum" +opt\&.prof_accum option for control of cumulative sample reporting\&. See the -"opt\&.lg_prof_interval" +opt\&.lg_prof_interval option for information on interval\-triggered profile dumping, the -"opt\&.prof_gdump" +opt\&.prof_gdump option for information on high\-water\-triggered profile dumping, and the -"opt\&.prof_final" +opt\&.prof_final option for final profile dumping\&. Profile output is compatible with the \fBjeprof\fR command, which is based on the \fBpprof\fR that is developed as part of the -\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2\&. +\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[4]\d\s+2\&. See +HEAP PROFILE FORMAT +for heap profile format documentation\&. .RE .PP -"opt\&.prof_prefix" (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_prefix (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Filename prefix for profile dumps\&. If the prefix is set to the empty string, no automatic dumps will occur; this is primarily useful for disabling the automatic final heap dump (which also disables leak reporting, if enabled)\&. The default prefix is jeprof\&. .RE .PP -"opt\&.prof_active" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_active (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Profiling activated/deactivated\&. This is a secondary control mechanism that makes it possible to start the application with profiling enabled (see the -"opt\&.prof" +opt\&.prof option) but inactive, then toggle profiling at any time during program execution with the -"prof\&.active" +prof\&.active mallctl\&. This option is enabled by default\&. .RE .PP -"opt\&.prof_thread_active_init" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_thread_active_init (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Initial setting for -"thread\&.prof\&.active" +thread\&.prof\&.active in newly created threads\&. The initial setting for newly created threads can also be changed during execution via the -"prof\&.thread_active_init" +prof\&.thread_active_init mallctl\&. This option is enabled by default\&. .RE .PP -"opt\&.lg_prof_sample" (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.lg_prof_sample (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 512 KiB (2^19 B)\&. .RE .PP -"opt\&.prof_accum" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_accum (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Reporting of cumulative object/byte counts in profile dumps enabled/disabled\&. If this option is enabled, every unique backtrace must be stored for the duration of execution\&. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest\&. This option is disabled by default\&. .RE .PP -"opt\&.lg_prof_interval" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.lg_prof_interval (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Average interval (log base 2) between memory profile dumps, as measured in bytes of allocation activity\&. The actual interval between dumps may be sporadic because decentralized allocation counters are used to avoid synchronization bottlenecks\&. Profiles are dumped to files named according to the pattern \&.\&.\&.i\&.heap, where is controlled by the -"opt\&.prof_prefix" +opt\&.prof_prefix option\&. By default, interval\-triggered profile dumping is disabled (encoded as \-1)\&. .RE .PP -"opt\&.prof_gdump" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_gdump (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Set the initial state of -"prof\&.gdump", which when enabled triggers a memory profile dump every time the total virtual memory exceeds the previous maximum\&. This option is disabled by default\&. +prof\&.gdump, which when enabled triggers a memory profile dump every time the total virtual memory exceeds the previous maximum\&. This option is disabled by default\&. .RE .PP -"opt\&.prof_final" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_final (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Use an \fBatexit\fR(3) @@ -960,127 +1037,150 @@ function to dump final memory usage to a file named according to the pattern \&.\&.\&.f\&.heap, where is controlled by the -"opt\&.prof_prefix" +opt\&.prof_prefix option\&. Note that -\fBatexit\fR\fB\fR +atexit() may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls -\fBatexit\fR\fB\fR, so this option is not univerally usable (though the application can register its own -\fBatexit\fR\fB\fR +atexit(), so this option is not universally usable (though the application can register its own +atexit() function with equivalent functionality)\&. This option is disabled by default\&. .RE .PP -"opt\&.prof_leak" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_leak (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Leak reporting enabled/disabled\&. If enabled, use an \fBatexit\fR(3) function to report memory leaks detected by allocation sampling\&. See the -"opt\&.prof" +opt\&.prof option for information on analyzing heap profile output\&. This option is disabled by default\&. .RE .PP -"thread\&.arena" (\fBunsigned\fR) rw +thread\&.arena (\fBunsigned\fR) rw .RS 4 Get or set the arena associated with the calling thread\&. If the specified arena was not initialized beforehand (see the -"arenas\&.initialized" +arenas\&.initialized mallctl), it will be automatically initialized as a side effect of calling this interface\&. .RE .PP -"thread\&.allocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +thread\&.allocated (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get the total number of bytes ever allocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&. .RE .PP -"thread\&.allocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] +thread\&.allocatedp (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get a pointer to the the value that is returned by the -"thread\&.allocated" +thread\&.allocated mallctl\&. This is useful for avoiding the overhead of repeated -\fBmallctl*\fR\fB\fR +mallctl*() calls\&. .RE .PP -"thread\&.deallocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +thread\&.deallocated (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get the total number of bytes ever deallocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&. .RE .PP -"thread\&.deallocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] +thread\&.deallocatedp (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get a pointer to the the value that is returned by the -"thread\&.deallocated" +thread\&.deallocated mallctl\&. This is useful for avoiding the overhead of repeated -\fBmallctl*\fR\fB\fR +mallctl*() calls\&. .RE .PP -"thread\&.tcache\&.enabled" (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR] +thread\&.tcache\&.enabled (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR] .RS 4 Enable/disable calling thread\*(Aqs tcache\&. The tcache is implicitly flushed as a side effect of becoming disabled (see -"thread\&.tcache\&.flush")\&. +thread\&.tcache\&.flush)\&. .RE .PP -"thread\&.tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR] +thread\&.tcache\&.flush (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR] .RS 4 Flush calling thread\*(Aqs thread\-specific cache (tcache)\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs tcache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&. .RE .PP -"thread\&.prof\&.name" (\fBconst char *\fR) r\- or \-w [\fB\-\-enable\-prof\fR] +thread\&.prof\&.name (\fBconst char *\fR) r\- or \-w [\fB\-\-enable\-prof\fR] .RS 4 -Get/set the descriptive name associated with the calling thread in memory profile dumps\&. An internal copy of the name string is created, so the input string need not be maintained after this interface completes execution\&. The output string of this interface should be copied for non\-ephemeral uses, because multiple implementation details can cause asynchronous string deallocation\&. Furthermore, each invocation of this interface can only read or write; simultaneous read/write is not supported due to string lifetime limitations\&. The name string must nil\-terminated and comprised only of characters in the sets recognized by +Get/set the descriptive name associated with the calling thread in memory profile dumps\&. An internal copy of the name string is created, so the input string need not be maintained after this interface completes execution\&. The output string of this interface should be copied for non\-ephemeral uses, because multiple implementation details can cause asynchronous string deallocation\&. Furthermore, each invocation of this interface can only read or write; simultaneous read/write is not supported due to string lifetime limitations\&. The name string must be nil\-terminated and comprised only of characters in the sets recognized by \fBisgraph\fR(3) and \fBisblank\fR(3)\&. .RE .PP -"thread\&.prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +thread\&.prof\&.active (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Control whether sampling is currently active for the calling thread\&. This is an activation mechanism in addition to -"prof\&.active"; both must be active for the calling thread to sample\&. This flag is enabled by default\&. +prof\&.active; both must be active for the calling thread to sample\&. This flag is enabled by default\&. .RE .PP -"tcache\&.create" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] +tcache\&.create (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Create an explicit thread\-specific cache (tcache) and return an identifier that can be passed to the \fBMALLOCX_TCACHE(\fR\fB\fItc\fR\fR\fB)\fR macro to explicitly use the specified cache rather than the automatically managed one that is used by default\&. Each explicit cache can be used by only one thread at a time; the application must assure that this constraint holds\&. .RE .PP -"tcache\&.flush" (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] +tcache\&.flush (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] .RS 4 Flush the specified thread\-specific cache (tcache)\&. The same considerations apply to this interface as to -"thread\&.tcache\&.flush", except that the tcache will never be automatically be discarded\&. +thread\&.tcache\&.flush, except that the tcache will never be automatically discarded\&. .RE .PP -"tcache\&.destroy" (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] +tcache\&.destroy (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] .RS 4 Flush the specified thread\-specific cache (tcache) and make the identifier available for use during a future tcache creation\&. .RE .PP -"arena\&.\&.purge" (\fBvoid\fR) \-\- +arena\&.\&.purge (\fBvoid\fR) \-\- .RS 4 -Purge unused dirty pages for arena , or for all arenas if equals -"arenas\&.narenas"\&. +Purge all unused dirty pages for arena , or for all arenas if equals +arenas\&.narenas\&. .RE .PP -"arena\&.\&.dss" (\fBconst char *\fR) rw +arena\&.\&.decay (\fBvoid\fR) \-\- +.RS 4 +Trigger decay\-based purging of unused dirty pages for arena , or for all arenas if equals +arenas\&.narenas\&. The proportion of unused dirty pages to be purged depends on the current time; see +opt\&.decay_time +for details\&. +.RE +.PP +arena\&.\&.reset (\fBvoid\fR) \-\- +.RS 4 +Discard all of the arena\*(Aqs extant allocations\&. This interface can only be used with arenas created via +arenas\&.extend\&. None of the arena\*(Aqs discarded/cached allocations may accessed afterward\&. As part of this requirement, all thread caches which were used to allocate/deallocate in conjunction with the arena must be flushed beforehand\&. This interface cannot be used if running inside Valgrind, nor if the +quarantine +size is non\-zero\&. +.RE +.PP +arena\&.\&.dss (\fBconst char *\fR) rw .RS 4 Set the precedence of dss allocation as related to mmap allocation for arena , or for all arenas if equals -"arenas\&.narenas"\&. See -"opt\&.dss" +arenas\&.narenas\&. See +opt\&.dss for supported settings\&. .RE .PP -"arena\&.\&.lg_dirty_mult" (\fBssize_t\fR) rw +arena\&.\&.lg_dirty_mult (\fBssize_t\fR) rw .RS 4 Current per\-arena minimum ratio (log base 2) of active to dirty pages for arena \&. Each time this interface is set and the ratio is increased, pages are synchronously purged as necessary to impose the new ratio\&. See -"opt\&.lg_dirty_mult" +opt\&.lg_dirty_mult for additional information\&. .RE .PP -"arena\&.\&.chunk_hooks" (\fBchunk_hooks_t\fR) rw +arena\&.\&.decay_time (\fBssize_t\fR) rw +.RS 4 +Current per\-arena approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. Each time this interface is set, all currently unused dirty pages are considered to have fully decayed, which causes immediate purging of all unused dirty pages unless the decay time is set to \-1 (i\&.e\&. purging disabled)\&. See +opt\&.decay_time +for additional information\&. +.RE +.PP +arena\&.\&.chunk_hooks (\fBchunk_hooks_t\fR) rw .RS 4 Get or set the chunk management hook functions for arena \&. The functions must be capable of operating on all extant chunks associated with arena , usually by passing unknown chunks to the replaced functions\&. In practice, it is feasible to control allocation for arenas created via -"arenas\&.extend" +arenas\&.extend such that all chunks originate from an application\-supplied chunk allocator (by setting custom chunk hook functions just after arena creation), but the automatically created arenas may have already created chunks prior to the application having an opportunity to take over chunk allocation\&. .sp .if n \{\ @@ -1149,7 +1249,7 @@ is not on success or \fBNULL\fR on error\&. Committed memory may be committed in absolute terms as on a system that does not overcommit, or in implicit terms as on a system that overcommits and satisfies physical memory needs on demand via soft page faults\&. Note that replacing the default chunk allocation function makes the arena\*(Aqs -"arena\&.\&.dss" +arena\&.\&.dss setting irrelevant\&. .HP \w'typedef\ bool\ (chunk_dalloc_t)('u .BI "typedef bool (chunk_dalloc_t)(void\ *" "chunk" ", size_t\ " "size" ", bool\ " "committed" ", unsigned\ " "arena_ind" ");" @@ -1296,407 +1396,504 @@ into one contiguous chunk, operating on \fIarena_ind\fR, returning false upon success\&. If the function returns true, this indicates that the chunks remain distinct mappings and therefore should continue to be operated on independently\&. .RE .PP -"arenas\&.narenas" (\fBunsigned\fR) r\- +arenas\&.narenas (\fBunsigned\fR) r\- .RS 4 Current limit on number of arenas\&. .RE .PP -"arenas\&.initialized" (\fBbool *\fR) r\- +arenas\&.initialized (\fBbool *\fR) r\- .RS 4 An array of -"arenas\&.narenas" +arenas\&.narenas booleans\&. Each boolean indicates whether the corresponding arena is initialized\&. .RE .PP -"arenas\&.lg_dirty_mult" (\fBssize_t\fR) rw +arenas\&.lg_dirty_mult (\fBssize_t\fR) rw .RS 4 Current default per\-arena minimum ratio (log base 2) of active to dirty pages, used to initialize -"arena\&.\&.lg_dirty_mult" +arena\&.\&.lg_dirty_mult during arena creation\&. See -"opt\&.lg_dirty_mult" +opt\&.lg_dirty_mult for additional information\&. .RE .PP -"arenas\&.quantum" (\fBsize_t\fR) r\- +arenas\&.decay_time (\fBssize_t\fR) rw +.RS 4 +Current default per\-arena approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused, used to initialize +arena\&.\&.decay_time +during arena creation\&. See +opt\&.decay_time +for additional information\&. +.RE +.PP +arenas\&.quantum (\fBsize_t\fR) r\- .RS 4 Quantum size\&. .RE .PP -"arenas\&.page" (\fBsize_t\fR) r\- +arenas\&.page (\fBsize_t\fR) r\- .RS 4 Page size\&. .RE .PP -"arenas\&.tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] +arenas\&.tcache_max (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Maximum thread\-cached size class\&. .RE .PP -"arenas\&.nbins" (\fBunsigned\fR) r\- +arenas\&.nbins (\fBunsigned\fR) r\- .RS 4 Number of bin size classes\&. .RE .PP -"arenas\&.nhbins" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] +arenas\&.nhbins (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Total number of thread cache bin size classes\&. .RE .PP -"arenas\&.bin\&.\&.size" (\fBsize_t\fR) r\- +arenas\&.bin\&.\&.size (\fBsize_t\fR) r\- .RS 4 Maximum size supported by size class\&. .RE .PP -"arenas\&.bin\&.\&.nregs" (\fBuint32_t\fR) r\- +arenas\&.bin\&.\&.nregs (\fBuint32_t\fR) r\- .RS 4 Number of regions per page run\&. .RE .PP -"arenas\&.bin\&.\&.run_size" (\fBsize_t\fR) r\- +arenas\&.bin\&.\&.run_size (\fBsize_t\fR) r\- .RS 4 Number of bytes per page run\&. .RE .PP -"arenas\&.nlruns" (\fBunsigned\fR) r\- +arenas\&.nlruns (\fBunsigned\fR) r\- .RS 4 Total number of large size classes\&. .RE .PP -"arenas\&.lrun\&.\&.size" (\fBsize_t\fR) r\- +arenas\&.lrun\&.\&.size (\fBsize_t\fR) r\- .RS 4 Maximum size supported by this large size class\&. .RE .PP -"arenas\&.nhchunks" (\fBunsigned\fR) r\- +arenas\&.nhchunks (\fBunsigned\fR) r\- .RS 4 Total number of huge size classes\&. .RE .PP -"arenas\&.hchunk\&.\&.size" (\fBsize_t\fR) r\- +arenas\&.hchunk\&.\&.size (\fBsize_t\fR) r\- .RS 4 Maximum size supported by this huge size class\&. .RE .PP -"arenas\&.extend" (\fBunsigned\fR) r\- +arenas\&.extend (\fBunsigned\fR) r\- .RS 4 Extend the array of arenas by appending a new arena, and returning the new arena index\&. .RE .PP -"prof\&.thread_active_init" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +prof\&.thread_active_init (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Control the initial setting for -"thread\&.prof\&.active" +thread\&.prof\&.active in newly created threads\&. See the -"opt\&.prof_thread_active_init" +opt\&.prof_thread_active_init option for additional information\&. .RE .PP -"prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +prof\&.active (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Control whether sampling is currently active\&. See the -"opt\&.prof_active" +opt\&.prof_active option for additional information, as well as the interrelated -"thread\&.prof\&.active" +thread\&.prof\&.active mallctl\&. .RE .PP -"prof\&.dump" (\fBconst char *\fR) \-w [\fB\-\-enable\-prof\fR] +prof\&.dump (\fBconst char *\fR) \-w [\fB\-\-enable\-prof\fR] .RS 4 Dump a memory profile to the specified file, or if NULL is specified, to a file according to the pattern \&.\&.\&.m\&.heap, where is controlled by the -"opt\&.prof_prefix" +opt\&.prof_prefix option\&. .RE .PP -"prof\&.gdump" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +prof\&.gdump (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 When enabled, trigger a memory profile dump every time the total virtual memory exceeds the previous maximum\&. Profiles are dumped to files named according to the pattern \&.\&.\&.u\&.heap, where is controlled by the -"opt\&.prof_prefix" +opt\&.prof_prefix option\&. .RE .PP -"prof\&.reset" (\fBsize_t\fR) \-w [\fB\-\-enable\-prof\fR] +prof\&.reset (\fBsize_t\fR) \-w [\fB\-\-enable\-prof\fR] .RS 4 Reset all memory profile statistics, and optionally update the sample rate (see -"opt\&.lg_prof_sample" +opt\&.lg_prof_sample and -"prof\&.lg_sample")\&. +prof\&.lg_sample)\&. .RE .PP -"prof\&.lg_sample" (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] +prof\&.lg_sample (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Get the current sample rate (see -"opt\&.lg_prof_sample")\&. +opt\&.lg_prof_sample)\&. .RE .PP -"prof\&.interval" (\fBuint64_t\fR) r\- [\fB\-\-enable\-prof\fR] +prof\&.interval (\fBuint64_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 -Average number of bytes allocated between inverval\-based profile dumps\&. See the -"opt\&.lg_prof_interval" +Average number of bytes allocated between interval\-based profile dumps\&. See the +opt\&.lg_prof_interval option for additional information\&. .RE .PP -"stats\&.cactive" (\fBsize_t *\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.cactive (\fBsize_t *\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Pointer to a counter that contains an approximate count of the current number of bytes in active pages\&. The estimate may be high, but never low, because each arena rounds up when computing its contribution to the counter\&. Note that the -"epoch" +epoch mallctl has no bearing on this counter\&. Furthermore, counter consistency is maintained via atomic operations, so it is necessary to use an atomic operation in order to guarantee a consistent read when dereferencing the pointer\&. .RE .PP -"stats\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes allocated by the application\&. .RE .PP -"stats\&.active" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.active (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes in active pages allocated by the application\&. This is a multiple of the page size, and greater than or equal to -"stats\&.allocated"\&. This does not include -"stats\&.arenas\&.\&.pdirty", nor pages entirely devoted to allocator metadata\&. +stats\&.allocated\&. This does not include +stats\&.arenas\&.\&.pdirty, nor pages entirely devoted to allocator metadata\&. .RE .PP -"stats\&.metadata" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.metadata (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes dedicated to metadata, which comprise base allocations used for bootstrap\-sensitive internal allocator data structures, arena chunk headers (see -"stats\&.arenas\&.\&.metadata\&.mapped"), and internal allocations (see -"stats\&.arenas\&.\&.metadata\&.allocated")\&. +stats\&.arenas\&.\&.metadata\&.mapped), and internal allocations (see +stats\&.arenas\&.\&.metadata\&.allocated)\&. .RE .PP -"stats\&.resident" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.resident (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Maximum number of bytes in physically resident data pages mapped by the allocator, comprising all pages dedicated to allocator metadata, pages backing active allocations, and unused dirty pages\&. This is a maximum rather than precise because pages may not actually be physically resident if they correspond to demand\-zeroed virtual memory that has not yet been touched\&. This is a multiple of the page size, and is larger than -"stats\&.active"\&. +stats\&.active\&. .RE .PP -"stats\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.mapped (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes in active chunks mapped by the allocator\&. This is a multiple of the chunk size, and is larger than -"stats\&.active"\&. This does not include inactive chunks, even those that contain unused dirty pages, which means that there is no strict ordering between this and -"stats\&.resident"\&. +stats\&.active\&. This does not include inactive chunks, even those that contain unused dirty pages, which means that there is no strict ordering between this and +stats\&.resident\&. .RE .PP -"stats\&.arenas\&.\&.dss" (\fBconst char *\fR) r\- +stats\&.retained (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Total number of bytes in virtual memory mappings that were retained rather than being returned to the operating system via e\&.g\&. +\fBmunmap\fR(2)\&. Retained virtual memory is typically untouched, decommitted, or purged, so it has no strongly associated physical memory (see +chunk hooks +for details)\&. Retained memory is excluded from mapped memory statistics, e\&.g\&. +stats\&.mapped\&. +.RE +.PP +stats\&.arenas\&.\&.dss (\fBconst char *\fR) r\- .RS 4 dss (\fBsbrk\fR(2)) allocation precedence as related to \fBmmap\fR(2) allocation\&. See -"opt\&.dss" +opt\&.dss for details\&. .RE .PP -"stats\&.arenas\&.\&.lg_dirty_mult" (\fBssize_t\fR) r\- +stats\&.arenas\&.\&.lg_dirty_mult (\fBssize_t\fR) r\- .RS 4 Minimum ratio (log base 2) of active to dirty pages\&. See -"opt\&.lg_dirty_mult" +opt\&.lg_dirty_mult for details\&. .RE .PP -"stats\&.arenas\&.\&.nthreads" (\fBunsigned\fR) r\- +stats\&.arenas\&.\&.decay_time (\fBssize_t\fR) r\- +.RS 4 +Approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. See +opt\&.decay_time +for details\&. +.RE +.PP +stats\&.arenas\&.\&.nthreads (\fBunsigned\fR) r\- .RS 4 Number of threads currently assigned to arena\&. .RE .PP -"stats\&.arenas\&.\&.pactive" (\fBsize_t\fR) r\- +stats\&.arenas\&.\&.pactive (\fBsize_t\fR) r\- .RS 4 Number of pages in active runs\&. .RE .PP -"stats\&.arenas\&.\&.pdirty" (\fBsize_t\fR) r\- +stats\&.arenas\&.\&.pdirty (\fBsize_t\fR) r\- .RS 4 Number of pages within unused runs that are potentially dirty, and for which -\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR +madvise\fI\&.\&.\&.\fR \fI\fBMADV_DONTNEED\fR\fR or similar has not been called\&. .RE .PP -"stats\&.arenas\&.\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.mapped (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of mapped bytes\&. .RE .PP -"stats\&.arenas\&.\&.metadata\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.retained (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Number of retained bytes\&. See +stats\&.retained +for details\&. +.RE +.PP +stats\&.arenas\&.\&.metadata\&.mapped (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of mapped bytes in arena chunk headers, which track the states of the non\-metadata pages\&. .RE .PP -"stats\&.arenas\&.\&.metadata\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.metadata\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes dedicated to internal allocations\&. Internal allocations differ from application\-originated allocations in that they are for internal use, and that they are omitted from heap profiles\&. This statistic is reported separately from -"stats\&.metadata" +stats\&.metadata and -"stats\&.arenas\&.\&.metadata\&.mapped" +stats\&.arenas\&.\&.metadata\&.mapped because it overlaps with e\&.g\&. the -"stats\&.allocated" +stats\&.allocated and -"stats\&.active" +stats\&.active statistics, whereas the other metadata statistics do not\&. .RE .PP -"stats\&.arenas\&.\&.npurge" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.npurge (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of dirty page purge sweeps performed\&. .RE .PP -"stats\&.arenas\&.\&.nmadvise" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.nmadvise (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of -\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR +madvise\fI\&.\&.\&.\fR \fI\fBMADV_DONTNEED\fR\fR or similar calls made to purge dirty pages\&. .RE .PP -"stats\&.arenas\&.\&.purged" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.purged (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of pages purged\&. .RE .PP -"stats\&.arenas\&.\&.small\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.small\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes currently allocated by small objects\&. .RE .PP -"stats\&.arenas\&.\&.small\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.small\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests served by small bins\&. .RE .PP -"stats\&.arenas\&.\&.small\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.small\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of small objects returned to bins\&. .RE .PP -"stats\&.arenas\&.\&.small\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.small\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of small allocation requests\&. .RE .PP -"stats\&.arenas\&.\&.large\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.large\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes currently allocated by large objects\&. .RE .PP -"stats\&.arenas\&.\&.large\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.large\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of large allocation requests served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.large\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.large\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of large deallocation requests served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.large\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.large\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of large allocation requests\&. .RE .PP -"stats\&.arenas\&.\&.huge\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.huge\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes currently allocated by huge objects\&. .RE .PP -"stats\&.arenas\&.\&.huge\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.huge\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of huge allocation requests served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.huge\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.huge\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of huge deallocation requests served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.huge\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.huge\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of huge allocation requests\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocations served by bin\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocations returned to bin\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.curregs" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.curregs (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of regions for this size class\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nfills" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] +stats\&.arenas\&.\&.bins\&.\&.nfills (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] .RS 4 Cumulative number of tcache fills\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nflushes" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] +stats\&.arenas\&.\&.bins\&.\&.nflushes (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] .RS 4 Cumulative number of tcache flushes\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.nruns (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of runs created\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nreruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.nreruns (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of times the current run from which to allocate changed\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.curruns (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of runs\&. .RE .PP -"stats\&.arenas\&.\&.lruns\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.lruns\&.\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.lruns\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.lruns\&.\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of deallocation requests for this size class served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.lruns\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.lruns\&.\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class\&. .RE .PP -"stats\&.arenas\&.\&.lruns\&.\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.lruns\&.\&.curruns (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of runs for this size class\&. .RE .PP -"stats\&.arenas\&.\&.hchunks\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.hchunks\&.\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.hchunks\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.hchunks\&.\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of deallocation requests for this size class served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.hchunks\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.hchunks\&.\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class\&. .RE .PP -"stats\&.arenas\&.\&.hchunks\&.\&.curhchunks" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.hchunks\&.\&.curhchunks (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of huge allocations for this size class\&. .RE +.SH "HEAP PROFILE FORMAT" +.PP +Although the heap profiling functionality was originally designed to be compatible with the +\fBpprof\fR +command that is developed as part of the +\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[4]\d\s+2, the addition of per thread heap profiling functionality required a different heap profile format\&. The +\fBjeprof\fR +command is derived from +\fBpprof\fR, with enhancements to support the heap profile format described here\&. +.PP +In the following hypothetical heap profile, +\fB[\&.\&.\&.]\fR +indicates elision for the sake of compactness\&. +.sp +.if n \{\ +.RS 4 +.\} +.nf +heap_v2/524288 + t*: 28106: 56637512 [0: 0] + [\&.\&.\&.] + t3: 352: 16777344 [0: 0] + [\&.\&.\&.] + t99: 17754: 29341640 [0: 0] + [\&.\&.\&.] +@ 0x5f86da8 0x5f5a1dc [\&.\&.\&.] 0x29e4d4e 0xa200316 0xabb2988 [\&.\&.\&.] + t*: 13: 6688 [0: 0] + t3: 12: 6496 [0: ] + t99: 1: 192 [0: 0] +[\&.\&.\&.] + +MAPPED_LIBRARIES: +[\&.\&.\&.] +.fi +.if n \{\ +.RE +.\} +.sp +The following matches the above heap profile, but most tokens are replaced with +\fB\fR +to indicate descriptions of the corresponding fields\&. +.sp +.if n \{\ +.RS 4 +.\} +.nf +/ + : : [: ] + [\&.\&.\&.] + : : [: ] + [\&.\&.\&.] + : : [: ] + [\&.\&.\&.] +@ [\&.\&.\&.] [\&.\&.\&.] + : : [: ] + : : [: ] + : : [: ] +[\&.\&.\&.] + +MAPPED_LIBRARIES: +/maps> +.fi +.if n \{\ +.RE +.\} .SH "DEBUGGING MALLOC PROBLEMS" .PP When debugging, it is a good idea to configure/build jemalloc with the @@ -1705,14 +1902,16 @@ and \fB\-\-enable\-fill\fR options, and recompile the program with suitable options and symbols for debugger support\&. When so configured, jemalloc incorporates a wide variety of run\-time assertions that catch application errors such as double\-free, write\-after\-free, etc\&. .PP -Programs often accidentally depend on \(lquninitialized\(rq memory actually being filled with zero bytes\&. Junk filling (see the -"opt\&.junk" +Programs often accidentally depend on +\(lquninitialized\(rq +memory actually being filled with zero bytes\&. Junk filling (see the +opt\&.junk option) tends to expose such bugs in the form of obviously incorrect results and/or coredumps\&. Conversely, zero filling (see the -"opt\&.zero" +opt\&.zero option) eliminates the symptoms of such bugs\&. Between these two options, it is usually possible to quickly detect, diagnose, and eliminate such bugs\&. .PP This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive\&. However, jemalloc does integrate with the most excellent -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2 +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2 tool if the \fB\-\-enable\-valgrind\fR configuration option is enabled\&. @@ -1720,7 +1919,7 @@ configuration option is enabled\&. .PP If any of the memory allocation/deallocation functions detect an error or warning condition, a message will be printed to file descriptor \fBSTDERR_FILENO\fR\&. Errors will result in the process dumping core\&. If the -"opt\&.abort" +opt\&.abort option is set, most warnings are treated as errors\&. .PP The @@ -1728,22 +1927,23 @@ The variable allows the programmer to override the function which emits the text strings forming the errors and warnings if for some reason the \fBSTDERR_FILENO\fR file descriptor is not suitable for this\&. -\fBmalloc_message\fR\fB\fR +malloc_message() takes the \fIcbopaque\fR pointer argument that is \fBNULL\fR unless overridden by the arguments in a call to -\fBmalloc_stats_print\fR\fB\fR, followed by a string pointer\&. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock\&. +malloc_stats_print(), followed by a string pointer\&. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock\&. .PP -All messages are prefixed by \(lq:\(rq\&. +All messages are prefixed by +\(lq: \(rq\&. .SH "RETURN VALUES" .SS "Standard API" .PP The -\fBmalloc\fR\fB\fR +malloc() and -\fBcalloc\fR\fB\fR +calloc() functions return a pointer to the allocated memory if successful; otherwise a \fBNULL\fR pointer is returned and @@ -1752,9 +1952,9 @@ is set to ENOMEM\&. .PP The -\fBposix_memalign\fR\fB\fR +posix_memalign() function returns the value 0 if successful; otherwise it returns an error value\&. The -\fBposix_memalign\fR\fB\fR +posix_memalign() function will fail if: .PP EINVAL @@ -1771,13 +1971,13 @@ Memory allocation error\&. .RE .PP The -\fBaligned_alloc\fR\fB\fR +aligned_alloc() function returns a pointer to the allocated memory if successful; otherwise a \fBNULL\fR pointer is returned and \fIerrno\fR is set\&. The -\fBaligned_alloc\fR\fB\fR +aligned_alloc() function will fail if: .PP EINVAL @@ -1793,7 +1993,7 @@ Memory allocation error\&. .RE .PP The -\fBrealloc\fR\fB\fR +realloc() function returns a pointer, possibly identical to \fIptr\fR, to the allocated memory if successful; otherwise a \fBNULL\fR @@ -1802,44 +2002,44 @@ pointer is returned, and is set to ENOMEM if the error was the result of an allocation failure\&. The -\fBrealloc\fR\fB\fR +realloc() function always leaves the original buffer intact when an error occurs\&. .PP The -\fBfree\fR\fB\fR +free() function returns no value\&. .SS "Non\-standard API" .PP The -\fBmallocx\fR\fB\fR +mallocx() and -\fBrallocx\fR\fB\fR +rallocx() functions return a pointer to the allocated memory if successful; otherwise a \fBNULL\fR pointer is returned to indicate insufficient contiguous memory was available to service the allocation request\&. .PP The -\fBxallocx\fR\fB\fR +xallocx() function returns the real size of the resulting resized allocation pointed to by \fIptr\fR, which is a value less than \fIsize\fR if the allocation could not be adequately grown in place\&. .PP The -\fBsallocx\fR\fB\fR +sallocx() function returns the real size of the allocation pointed to by \fIptr\fR\&. .PP The -\fBnallocx\fR\fB\fR +nallocx() returns the real size that would result from a successful equivalent -\fBmallocx\fR\fB\fR +mallocx() function call, or zero if insufficient memory is available to perform the size computation\&. .PP The -\fBmallctl\fR\fB\fR, -\fBmallctlnametomib\fR\fB\fR, and -\fBmallctlbymib\fR\fB\fR +mallctl(), +mallctlnametomib(), and +mallctlbymib() functions return 0 on success; otherwise they return an error value\&. The functions will fail if: .PP EINVAL @@ -1874,12 +2074,12 @@ A memory allocation failure occurred\&. EFAULT .RS 4 An interface with side effects failed in some way not directly related to -\fBmallctl*\fR\fB\fR +mallctl*() read/write processing\&. .RE .PP The -\fBmalloc_usable_size\fR\fB\fR +malloc_usable_size() function returns the usable size of the allocation pointed to by \fIptr\fR\&. .SH "ENVIRONMENT" @@ -1929,14 +2129,14 @@ malloc_conf = "lg_chunk:24"; .SH "STANDARDS" .PP The -\fBmalloc\fR\fB\fR, -\fBcalloc\fR\fB\fR, -\fBrealloc\fR\fB\fR, and -\fBfree\fR\fB\fR +malloc(), +calloc(), +realloc(), and +free() functions conform to ISO/IEC 9899:1990 (\(lqISO C90\(rq)\&. .PP The -\fBposix_memalign\fR\fB\fR +posix_memalign() function conforms to IEEE Std 1003\&.1\-2001 (\(lqPOSIX\&.1\(rq)\&. .SH "AUTHOR" .PP @@ -1947,14 +2147,19 @@ function conforms to IEEE Std 1003\&.1\-2001 (\(lqPOSIX\&.1\(rq)\&. .IP " 1." 4 jemalloc website .RS 4 -\%http://www.canonware.com/jemalloc/ +\%http://jemalloc.net/ .RE .IP " 2." 4 +JSON format +.RS 4 +\%http://www.json.org/ +.RE +.IP " 3." 4 Valgrind .RS 4 \%http://valgrind.org/ .RE -.IP " 3." 4 +.IP " 4." 4 gperftools package .RS 4 \%http://code.google.com/p/gperftools/ diff --git a/deps/jemalloc/doc/jemalloc.html b/deps/jemalloc/doc/jemalloc.html index 7b8e2be8c..db2504f6e 100644 --- a/deps/jemalloc/doc/jemalloc.html +++ b/deps/jemalloc/doc/jemalloc.html @@ -1,28 +1,29 @@ -JEMALLOC

Name

jemalloc — general purpose memory allocation functions

LIBRARY

This manual describes jemalloc 4.0.3-0-ge9192eacf8935e29fc62fddc2701f7942b1cc02c. More information - can be found at the jemalloc website.

SYNOPSIS

#include <jemalloc/jemalloc.h>

Standard API

void *malloc(size_t size);
 
void *calloc(size_t number,
 size_t size);
 
int posix_memalign(void **ptr,
 size_t alignment,
 size_t size);
 
void *aligned_alloc(size_t alignment,
 size_t size);
 
void *realloc(void *ptr,
 size_t size);
 
void free(void *ptr);
 

Non-standard API

void *mallocx(size_t size,
 int flags);
 
void *rallocx(void *ptr,
 size_t size,
 int flags);
 
size_t xallocx(void *ptr,
 size_t size,
 size_t extra,
 int flags);
 
size_t sallocx(void *ptr,
 int flags);
 
void dallocx(void *ptr,
 int flags);
 
void sdallocx(void *ptr,
 size_t size,
 int flags);
 
size_t nallocx(size_t size,
 int flags);
 
int mallctl(const char *name,
 void *oldp,
 size_t *oldlenp,
 void *newp,
 size_t newlen);
 
int mallctlnametomib(const char *name,
 size_t *mibp,
 size_t *miblenp);
 
int mallctlbymib(const size_t *mib,
 size_t miblen,
 void *oldp,
 size_t *oldlenp,
 void *newp,
 size_t newlen);
 
void malloc_stats_print(void (*write_cb) + +JEMALLOC

Name

jemalloc — general purpose memory allocation functions

DESCRIPTION

Standard API

The malloc() function allocates + ,

 void *cbopaque,
 const char *opts);
 
size_t malloc_usable_size(const void *ptr);
 
void (*malloc_message)(void *cbopaque,
 const char *s);
 

const char *malloc_conf;

DESCRIPTION

Standard API

The malloc() function allocates size bytes of uninitialized memory. The allocated space is suitably aligned (after possible pointer coercion) for storage - of any type of object.

The calloc() function allocates + of any type of object.

The calloc() function allocates space for number objects, each size bytes in length. The result is identical to - calling malloc() with an argument of + calling malloc() with an argument of number * size, with the exception that the allocated memory is explicitly initialized to zero - bytes.

The posix_memalign() function + bytes.

The posix_memalign() function allocates size bytes of memory such that the allocation's base address is a multiple of alignment, and returns the allocation in the value pointed to by ptr. The requested alignment must be a power of 2 at least as large as - sizeof(void *).

The aligned_alloc() function + sizeof(void *).

The aligned_alloc() function allocates size bytes of memory such that the allocation's base address is a multiple of alignment. The requested alignment must be a power of 2. Behavior is undefined if size is not an integral multiple of - alignment.

The realloc() function changes the + alignment.

The realloc() function changes the size of the previously allocated memory referenced by ptr to size bytes. The contents of the memory are unchanged up to the lesser of the new and old @@ -30,69 +31,65 @@ portion of the memory are undefined. Upon success, the memory referenced by ptr is freed and a pointer to the newly allocated memory is returned. Note that - realloc() may move the memory allocation, + realloc() may move the memory allocation, resulting in a different return value than ptr. If ptr is NULL, the - realloc() function behaves identically to - malloc() for the specified size.

The free() function causes the + realloc() function behaves identically to + malloc() for the specified size.

The free() function causes the allocated memory referenced by ptr to be made available for future allocations. If ptr is - NULL, no action occurs.

Non-standard API

The mallocx(), - rallocx(), - xallocx(), - sallocx(), - dallocx(), - sdallocx(), and - nallocx() functions all have a + NULL, no action occurs.

Non-standard API

The mallocx(), + rallocx(), + xallocx(), + sallocx(), + dallocx(), + sdallocx(), and + nallocx() functions all have a flags argument that can be used to specify options. The functions only check the options that are contextually relevant. Use bitwise or (|) operations to specify one or more of the following: -

MALLOCX_LG_ALIGN(la) +

MALLOCX_LG_ALIGN(la)

Align the memory allocation to start at an address that is a multiple of (1 << la). This macro does not validate that la is within the valid - range.

MALLOCX_ALIGN(a) + range.

MALLOCX_ALIGN(a)

Align the memory allocation to start at an address that is a multiple of a, where a is a power of two. This macro does not validate that a is a power of 2. -

MALLOCX_ZERO

Initialize newly allocated memory to contain zero +

MALLOCX_ZERO

Initialize newly allocated memory to contain zero bytes. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes. If this macro is - absent, newly allocated memory is uninitialized.

MALLOCX_TCACHE(tc) + absent, newly allocated memory is uninitialized.

MALLOCX_TCACHE(tc)

Use the thread-specific cache (tcache) specified by the identifier tc, which must have been - acquired via the - "tcache.create" - + acquired via the tcache.create mallctl. This macro does not validate that tc specifies a valid - identifier.

MALLOCX_TCACHE_NONE

Do not use a thread-specific cache (tcache). Unless + identifier.

MALLOCX_TCACHE_NONE

Do not use a thread-specific cache (tcache). Unless MALLOCX_TCACHE(tc) or MALLOCX_TCACHE_NONE is specified, an automatically managed tcache will be used under many circumstances. This macro cannot be used in the same flags argument as - MALLOCX_TCACHE(tc).

MALLOCX_ARENA(a) + MALLOCX_TCACHE(tc).

MALLOCX_ARENA(a)

Use the arena specified by the index a. This macro has no effect for regions that were allocated via an arena other than the one specified. This macro does not validate that a specifies an arena index in the valid range.

-

The mallocx() function allocates at +

The mallocx() function allocates at least size bytes of memory, and returns a pointer to the base address of the allocation. Behavior is undefined if - size is 0, or if request size - overflows due to size class and/or alignment constraints.

The rallocx() function resizes the + size is 0.

The rallocx() function resizes the allocation at ptr to be at least size bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location. Behavior is undefined if - size is 0, or if request size - overflows due to size class and/or alignment constraints.

The xallocx() function resizes the + size is 0.

The xallocx() function resizes the allocation at ptr in place to be at least size bytes, and returns the real size of the allocation. If extra is non-zero, an attempt is @@ -101,24 +98,24 @@ the extra byte(s) will not by itself result in failure to resize. Behavior is undefined if size is 0, or if (size + extra - > SIZE_T_MAX).

The sallocx() function returns the - real size of the allocation at ptr.

The dallocx() function causes the + > SIZE_T_MAX).

The sallocx() function returns the + real size of the allocation at ptr.

The dallocx() function causes the memory referenced by ptr to be made available for - future allocations.

The sdallocx() function is an - extension of dallocx() with a + future allocations.

The sdallocx() function is an + extension of dallocx() with a size parameter to allow the caller to pass in the allocation size as an optimization. The minimum valid input size is the original requested size of the allocation, and the maximum valid input size is the corresponding value returned by - nallocx() or - sallocx().

The nallocx() function allocates no + nallocx() or + sallocx().

The nallocx() function allocates no memory, but it performs the same size computation as the - mallocx() function, and returns the real + mallocx() function, and returns the real size of the allocation that would result from the equivalent - mallocx() function call. Behavior is - undefined if size is 0, or if - request size overflows due to size class and/or alignment - constraints.

The mallctl() function provides a + mallocx() function call, or + 0 if the inputs exceed the maximum supported size + class and/or alignment. Behavior is undefined if + size is 0.

The mallctl() function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions. The period-separated name argument specifies a @@ -130,12 +127,12 @@ write a value, pass a pointer to the value via newp, and its length via newlen; otherwise pass NULL - and 0.

The mallctlnametomib() function + and 0.

The mallctlnametomib() function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name - to a “Management Information Base” (MIB) that can be passed - repeatedly to mallctlbymib(). Upon - successful return from mallctlnametomib(), + to a “Management Information Base†(MIB) that can be passed + repeatedly to mallctlbymib(). Upon + successful return from mallctlnametomib(), mibp contains an array of *miblenp integers, where *miblenp is the lesser of the number of components @@ -145,9 +142,7 @@ period-separated name components, which results in a partial MIB that can be used as the basis for constructing a complete MIB. For name components that are integers (e.g. the 2 in - - "arenas.bin.2.size" - ), + arenas.bin.2.size), the corresponding MIB component will always be that integer. Therefore, it is legitimate to construct code like the following:

 unsigned nbins, i;
@@ -164,65 +159,62 @@ for (i = 0; i < nbins; i++) {
 
 	mib[2] = i;
 	len = sizeof(bin_size);
-	mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0);
+	mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0);
 	/* Do something with bin_size... */
-}

The malloc_stats_print() function - writes human-readable summary statistics via the - write_cb callback function pointer and - cbopaque data passed to - write_cb, or - malloc_message() if - write_cb is NULL. This - function can be called repeatedly. General information that never - changes during execution can be omitted by specifying "g" as a character +}

The malloc_stats_print() function writes + summary statistics via the write_cb callback + function pointer and cbopaque data passed to + write_cb, or malloc_message() + if write_cb is NULL. The + statistics are presented in human-readable form unless “J†is + specified as a character within the opts string, in + which case the statistics are presented in JSON format. This function can be + called repeatedly. General information that never changes during + execution can be omitted by specifying “g†as a character within the opts string. Note that - malloc_message() uses the - mallctl*() functions internally, so - inconsistent statistics can be reported if multiple threads use these - functions simultaneously. If --enable-stats is - specified during configuration, “m” and “a” can - be specified to omit merged arena and per arena statistics, respectively; - “b”, “l”, and “h” can be specified to - omit per size class statistics for bins, large objects, and huge objects, - respectively. Unrecognized characters are silently ignored. Note that - thread caching may prevent some statistics from being completely up to - date, since extra locking would be required to merge counters that track - thread cache operations. -

The malloc_usable_size() function + malloc_message() uses the + mallctl*() functions internally, so inconsistent + statistics can be reported if multiple threads use these functions + simultaneously. If --enable-stats is specified during + configuration, “m†and “a†can be specified to + omit merged arena and per arena statistics, respectively; + “bâ€, “lâ€, and “h†can be specified + to omit per size class statistics for bins, large objects, and huge + objects, respectively. Unrecognized characters are silently ignored. + Note that thread caching may prevent some statistics from being completely + up to date, since extra locking would be required to merge counters that + track thread cache operations.

The malloc_usable_size() function returns the usable size of the allocation pointed to by ptr. The return value may be larger than the size that was requested during allocation. The - malloc_usable_size() function is not a - mechanism for in-place realloc(); rather + malloc_usable_size() function is not a + mechanism for in-place realloc(); rather it is provided solely as a tool for introspection purposes. Any discrepancy between the requested allocation size and the size reported - by malloc_usable_size() should not be + by malloc_usable_size() should not be depended on, since such behavior is entirely implementation-dependent. -

TUNING

Once, when the first call is made to one of the memory allocation +

TUNING

Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various - options that can be specified at compile- or run-time.

The string pointed to by the global variable - malloc_conf, the “name” of the file - referenced by the symbolic link named /etc/malloc.conf, and the value of the + options that can be specified at compile- or run-time.

The string specified via --with-malloc-conf, the + string pointed to by the global variable malloc_conf, the + “name†of the file referenced by the symbolic link named + /etc/malloc.conf, and the value of the environment variable MALLOC_CONF, will be interpreted, in that order, from left to right as options. Note that malloc_conf may be read before - main() is entered, so the declaration of + main() is entered, so the declaration of malloc_conf should specify an initializer that contains - the final value to be read by jemalloc. malloc_conf is - a compile-time setting, whereas /etc/malloc.conf and MALLOC_CONF - can be safely set any time prior to program invocation.

An options string is a comma-separated list of option:value pairs. - There is one key corresponding to each - "opt.*" - mallctl (see the MALLCTL NAMESPACE section for options + the final value to be read by jemalloc. --with-malloc-conf + and malloc_conf are compile-time mechanisms, whereas + /etc/malloc.conf and + MALLOC_CONF can be safely set any time prior to program + invocation.

An options string is a comma-separated list of option:value pairs. + There is one key corresponding to each opt.* mallctl (see the MALLCTL NAMESPACE section for options documentation). For example, abort:true,narenas:1 sets - the - "opt.abort" - and - "opt.narenas" - options. Some + the opt.abort and opt.narenas options. Some options have boolean values (true/false), others have integer values (base 8, 10, or 16, depending on prefix), and yet others have raw string - values.

IMPLEMENTATION NOTES

Traditionally, allocators have used + values.

IMPLEMENTATION NOTES

Traditionally, allocators have used sbrk(2) to obtain memory, which is suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory. If @@ -246,19 +238,18 @@ for (i = 0; i < nbins; i++) { order to make it possible to completely avoid synchronization for most allocation requests. Such caching allows very fast allocation in the common case, but it increases memory usage and fragmentation, since a - bounded number of objects can remain allocated in each thread cache.

Memory is conceptually broken into equal-sized chunks, where the - chunk size is a power of two that is greater than the page size. Chunks - are always aligned to multiples of the chunk size. This alignment makes it - possible to find metadata for user objects very quickly.

User objects are broken into three categories according to size: - small, large, and huge. Small and large objects are managed entirely by - arenas; huge objects are additionally aggregated in a single data structure - that is shared by all threads. Huge objects are typically used by - applications infrequently enough that this single data structure is not a - scalability issue.

Each chunk that is managed by an arena tracks its contents as runs of + bounded number of objects can remain allocated in each thread cache.

Memory is conceptually broken into equal-sized chunks, where the chunk + size is a power of two that is greater than the page size. Chunks are + always aligned to multiples of the chunk size. This alignment makes it + possible to find metadata for user objects very quickly. User objects are + broken into three categories according to size: small, large, and huge. + Multiple small and large objects can reside within a single chunk, whereas + huge objects each have one or more chunks backing them. Each chunk that + contains small and/or large objects tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one - large object). The combination of chunk alignment and chunk page maps - makes it possible to determine all metadata regarding small and large - allocations in constant time.

Small objects are managed in groups by page runs. Each run maintains + large object). The combination of chunk alignment and chunk page maps makes + it possible to determine all metadata regarding small and large allocations + in constant time.

Small objects are managed in groups by page runs. Each run maintains a bitmap to track which regions are in use. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least sizeof(double). All other object size @@ -266,22 +257,20 @@ for (i = 0; i < nbins; i++) { classes for each doubling in size, which limits internal fragmentation to approximately 20% for all but the smallest size classes. Small size classes are smaller than four times the page size, large size classes are smaller - than the chunk size (see the - "opt.lg_chunk" - option), and - huge size classes extend from the chunk size up to one size class less than - the full address space size.

Allocations are packed tightly together, which can be an issue for + than the chunk size (see the opt.lg_chunk option), and + huge size classes extend from the chunk size up to the largest size class + that does not exceed PTRDIFF_MAX.

Allocations are packed tightly together, which can be an issue for multi-threaded applications. If you need to assure that allocations do not suffer from cacheline sharing, round your allocation requests up to the nearest multiple of the cacheline size, or specify cacheline alignment when - allocating.

The realloc(), - rallocx(), and - xallocx() functions may resize allocations + allocating.

The realloc(), + rallocx(), and + xallocx() functions may resize allocations without moving them under limited circumstances. Unlike the - *allocx() API, the standard API does not + *allocx() API, the standard API does not officially round up the usable size of an allocation to the nearest size class, so technically it is necessary to call - realloc() to grow e.g. a 9-byte allocation to + realloc() to grow e.g. a 9-byte allocation to 16 bytes, or shrink a 16-byte allocation to 9 bytes. Growth and shrinkage trivially succeeds in place as long as the pre-size and post-size both round up to the same size class. No other API guarantees are made regarding @@ -289,147 +278,111 @@ for (i = 0; i < nbins; i++) { and huge allocations in place, as long as the pre-size and post-size are both large or both huge. In such cases shrinkage always succeeds for large size classes, but for huge size classes the chunk allocator must support - splitting (see - "arena.<i>.chunk_hooks" - ). + splitting (see arena.<i>.chunk_hooks). Growth only succeeds if the trailing memory is currently available, and additionally for huge size classes the chunk allocator must support merging.

Assuming 2 MiB chunks, 4 KiB pages, and a 16-byte quantum on a - 64-bit system, the size classes in each category are as shown in Table 1.

Table 1. Size classes

CategorySpacingSize
Smalllg[8]
16[16, 32, 48, 64, 80, 96, 112, 128]
32[160, 192, 224, 256]
64[320, 384, 448, 512]
128[640, 768, 896, 1024]
256[1280, 1536, 1792, 2048]
512[2560, 3072, 3584, 4096]
1 KiB[5 KiB, 6 KiB, 7 KiB, 8 KiB]
2 KiB[10 KiB, 12 KiB, 14 KiB]
Large2 KiB[16 KiB]
4 KiB[20 KiB, 24 KiB, 28 KiB, 32 KiB]
8 KiB[40 KiB, 48 KiB, 54 KiB, 64 KiB]
16 KiB[80 KiB, 96 KiB, 112 KiB, 128 KiB]
32 KiB[160 KiB, 192 KiB, 224 KiB, 256 KiB]
64 KiB[320 KiB, 384 KiB, 448 KiB, 512 KiB]
128 KiB[640 KiB, 768 KiB, 896 KiB, 1 MiB]
256 KiB[1280 KiB, 1536 KiB, 1792 KiB]
Huge256 KiB[2 MiB]
512 KiB[2560 KiB, 3 MiB, 3584 KiB, 4 MiB]
1 MiB[5 MiB, 6 MiB, 7 MiB, 8 MiB]
2 MiB[10 MiB, 12 MiB, 14 MiB, 16 MiB]
4 MiB[20 MiB, 24 MiB, 28 MiB, 32 MiB]
8 MiB[40 MiB, 48 MiB, 56 MiB, 64 MiB]
......

MALLCTL NAMESPACE

The following names are defined in the namespace accessible via the + mallctl*() functions. Value types are specified in parentheses, their readable/writable statuses are encoded as rw, r-, -w, or --, and required build configuration flags follow, if any. A name element encoded as <i> or <j> indicates an integer component, where the integer varies from 0 to some upper value that must be determined via - introspection. In the case of - "stats.arenas.<i>.*" - , - <i> equal to - "arenas.narenas" - can be + introspection. In the case of stats.arenas.<i>.*, + <i> equal to arenas.narenas can be used to access the summation of statistics from all arenas. Take special - note of the - "epoch" - mallctl, - which controls refreshing of cached dynamic statistics.

- - "version" - + note of the epoch mallctl, + which controls refreshing of cached dynamic statistics.

+ version (const char *) r- -

Return the jemalloc version string.

- - "epoch" - +

Return the jemalloc version string.

+ epoch (uint64_t) rw

If a value is passed in, refresh the data from which - the mallctl*() functions report values, + the mallctl*() functions report values, and increment the epoch. Return the current epoch. This is useful for - detecting whether another thread caused a refresh.

- - "config.cache_oblivious" - + detecting whether another thread caused a refresh.

+ config.cache_oblivious (bool) r-

--enable-cache-oblivious was specified - during build configuration.

- - "config.debug" - + during build configuration.

+ config.debug (bool) r-

--enable-debug was specified during - build configuration.

- - "config.fill" - + build configuration.

+ config.fill (bool) r-

--enable-fill was specified during - build configuration.

- - "config.lazy_lock" - + build configuration.

+ config.lazy_lock (bool) r-

--enable-lazy-lock was specified - during build configuration.

- - "config.munmap" - + during build configuration.

+ config.malloc_conf + (const char *) + r- +

Embedded configure-time-specified run-time options + string, empty unless --with-malloc-conf was specified + during build configuration.

+ config.munmap (bool) r-

--enable-munmap was specified during - build configuration.

- - "config.prof" - + build configuration.

+ config.prof (bool) r-

--enable-prof was specified during - build configuration.

- - "config.prof_libgcc" - + build configuration.

+ config.prof_libgcc (bool) r-

--disable-prof-libgcc was not - specified during build configuration.

- - "config.prof_libunwind" - + specified during build configuration.

+ config.prof_libunwind (bool) r-

--enable-prof-libunwind was specified - during build configuration.

- - "config.stats" - + during build configuration.

+ config.stats (bool) r-

--enable-stats was specified during - build configuration.

- - "config.tcache" - + build configuration.

+ config.tcache (bool) r-

--disable-tcache was not specified - during build configuration.

- - "config.tls" - + during build configuration.

+ config.tls (bool) r-

--disable-tls was not specified during - build configuration.

- - "config.utrace" - + build configuration.

+ config.utrace (bool) r-

--enable-utrace was specified during - build configuration.

- - "config.valgrind" - + build configuration.

+ config.valgrind (bool) r-

--enable-valgrind was specified during - build configuration.

- - "config.xmalloc" - + build configuration.

+ config.xmalloc (bool) r-

--enable-xmalloc was specified during - build configuration.

- - "opt.abort" - + build configuration.

+ opt.abort (bool) r-

Abort-on-warning enabled/disabled. If true, most @@ -437,43 +390,42 @@ for (i = 0; i < nbins; i++) { abort(3) in these cases. This option is disabled by default unless --enable-debug is specified during configuration, in which case it is enabled by default. -

- - "opt.dss" - +

+ opt.dss (const char *) r-

dss (sbrk(2)) allocation precedence as related to mmap(2) allocation. The following settings are supported if sbrk(2) is supported by the operating - system: “disabled”, “primary”, and - “secondary”; otherwise only “disabled” is - supported. The default is “secondary” if + system: “disabledâ€, “primaryâ€, and + “secondaryâ€; otherwise only “disabled†is + supported. The default is “secondary†if sbrk(2) is supported by the operating - system; “disabled” otherwise. -

- - "opt.lg_chunk" - + system; “disabled†otherwise. +

+ opt.lg_chunk (size_t) r-

Virtual memory chunk size (log base 2). If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size. The default chunk size is 2 MiB (2^21). -

- - "opt.narenas" - - (size_t) +

+ opt.narenas + (unsigned) r-

Maximum number of arenas to use for automatic multiplexing of threads and arenas. The default is four times the - number of CPUs, or one if there is a single CPU.

- - "opt.lg_dirty_mult" - + number of CPUs, or one if there is a single CPU.

+ opt.purge + (const char *) + r- +

Purge mode is “ratio†(default) or + “decayâ€. See opt.lg_dirty_mult + for details of the ratio mode. See opt.decay_time for + details of the decay mode.

+ opt.lg_dirty_mult (ssize_t) r-

Per-arena minimum ratio (log base 2) of active to dirty @@ -484,53 +436,57 @@ for (i = 0; i < nbins; i++) { provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused. The default minimum ratio is 8:1 (2^3:1); an option value of -1 will - disable dirty page purging. See - "arenas.lg_dirty_mult" - - and - "arena.<i>.lg_dirty_mult" - - for related dynamic control options.

- - "opt.stats_print" - + disable dirty page purging. See arenas.lg_dirty_mult + and arena.<i>.lg_dirty_mult + for related dynamic control options.

+ opt.decay_time + (ssize_t) + r- +

Approximate time in seconds from the creation of a set + of unused dirty pages until an equivalent set of unused dirty pages is + purged and/or reused. The pages are incrementally purged according to a + sigmoidal decay curve that starts and ends with zero purge rate. A + decay time of 0 causes all unused dirty pages to be purged immediately + upon creation. A decay time of -1 disables purging. The default decay + time is 10 seconds. See arenas.decay_time + and arena.<i>.decay_time + for related dynamic control options. +

+ opt.stats_print (bool) r-

Enable/disable statistics printing at exit. If - enabled, the malloc_stats_print() + enabled, the malloc_stats_print() function is called at program exit via an atexit(3) function. If --enable-stats is specified during configuration, this has the potential to cause deadlock for a multi-threaded process that exits while one or more threads are executing in the memory allocation - functions. Furthermore, atexit() may + functions. Furthermore, atexit() may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls - atexit(), so this option is not - univerally usable (though the application can register its own - atexit() function with equivalent + atexit(), so this option is not + universally usable (though the application can register its own + atexit() function with equivalent functionality). Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application - development. This option is disabled by default.

- - "opt.junk" - + development. This option is disabled by default.

+ opt.junk (const char *) r- [--enable-fill] -

Junk filling. If set to "alloc", each byte of - uninitialized allocated memory will be initialized to - 0xa5. If set to "free", all deallocated memory will - be initialized to 0x5a. If set to "true", both - allocated and deallocated memory will be initialized, and if set to - "false", junk filling be disabled entirely. This is intended for - debugging and will impact performance negatively. This option is - "false" by default unless --enable-debug is specified - during configuration, in which case it is "true" by default unless - running inside Valgrind.

- - "opt.quarantine" - +

Junk filling. If set to “allocâ€, each byte + of uninitialized allocated memory will be initialized to + 0xa5. If set to “freeâ€, all deallocated + memory will be initialized to 0x5a. If set to + “trueâ€, both allocated and deallocated memory will be + initialized, and if set to “falseâ€, junk filling be + disabled entirely. This is intended for debugging and will impact + performance negatively. This option is “false†by default + unless --enable-debug is specified during + configuration, in which case it is “true†by default unless + running inside Valgrind.

+ opt.quarantine (size_t) r- [--enable-fill] @@ -538,58 +494,46 @@ for (i = 0; i < nbins; i++) { thread maintains a FIFO object quarantine that stores up to the specified number of bytes of memory. The quarantined memory is not freed until it is released from quarantine, though it is immediately - junk-filled if the - "opt.junk" - option is + junk-filled if the opt.junk option is enabled. This feature is of particular use in combination with Valgrind, which can detect attempts to access quarantined objects. This is intended for debugging and will impact performance negatively. The default quarantine size is 0 unless running inside Valgrind, in which case the default is 16 - MiB.

- - "opt.redzone" - + MiB.

+ opt.redzone (bool) r- [--enable-fill]

Redzones enabled/disabled. If enabled, small allocations have redzones before and after them. Furthermore, if the - - "opt.junk" - option is + opt.junk option is enabled, the redzones are checked for corruption during deallocation. However, the primary intended purpose of this feature is to be used in combination with Valgrind, which needs redzones in order to do effective buffer overflow/underflow detection. This option is intended for debugging and will impact performance negatively. This option is disabled by - default unless running inside Valgrind.

- - "opt.zero" - + default unless running inside Valgrind.

+ opt.zero (bool) r- [--enable-fill]

Zero filling enabled/disabled. If enabled, each byte of uninitialized allocated memory will be initialized to 0. Note that this initialization only happens once for each byte, so - realloc() and - rallocx() calls do not zero memory that + realloc() and + rallocx() calls do not zero memory that was previously allocated. This is intended for debugging and will impact performance negatively. This option is disabled by default. -

- - "opt.utrace" - +

+ opt.utrace (bool) r- [--enable-utrace]

Allocation tracing based on utrace(2) enabled/disabled. This option - is disabled by default.

- - "opt.xmalloc" - + is disabled by default.

+ opt.xmalloc (bool) r- [--enable-xmalloc] @@ -602,10 +546,8 @@ for (i = 0; i < nbins; i++) { including the following in the source code:

 malloc_conf = "xmalloc:true";

- This option is disabled by default.

- - "opt.tcache" - + This option is disabled by default.

+ opt.tcache (bool) r- [--enable-tcache] @@ -613,55 +555,36 @@ malloc_conf = "xmalloc:true";

there are multiple threads, each thread uses a tcache for objects up to a certain size. Thread-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of - increased memory use. See the - "opt.lg_tcache_max" - + increased memory use. See the opt.lg_tcache_max option for related tuning information. This option is enabled by default unless running inside Valgrind, in which case it is - forcefully disabled.

- - "opt.lg_tcache_max" - + forcefully disabled.

+ opt.lg_tcache_max (size_t) r- [--enable-tcache]

Maximum size class (log base 2) to cache in the thread-specific cache (tcache). At a minimum, all small size classes are cached, and at a maximum all large size classes are cached. The - default maximum is 32 KiB (2^15).

- - "opt.prof" - + default maximum is 32 KiB (2^15).

+ opt.prof (bool) r- [--enable-prof]

Memory profiling enabled/disabled. If enabled, profile - memory allocation activity. See the - "opt.prof_active" - - option for on-the-fly activation/deactivation. See the - "opt.lg_prof_sample" - - option for probabilistic sampling control. See the - "opt.prof_accum" - - option for control of cumulative sample reporting. See the - "opt.lg_prof_interval" - - option for information on interval-triggered profile dumping, the - "opt.prof_gdump" - + memory allocation activity. See the opt.prof_active + option for on-the-fly activation/deactivation. See the opt.lg_prof_sample + option for probabilistic sampling control. See the opt.prof_accum + option for control of cumulative sample reporting. See the opt.lg_prof_interval + option for information on interval-triggered profile dumping, the opt.prof_gdump option for information on high-water-triggered profile dumping, and the - - "opt.prof_final" - + opt.prof_final option for final profile dumping. Profile output is compatible with the jeprof command, which is based on the pprof that is developed as part of the gperftools - package.

- - "opt.prof_prefix" - + package. See HEAP PROFILE + FORMAT for heap profile format documentation.

+ opt.prof_prefix (const char *) r- [--enable-prof] @@ -669,40 +592,26 @@ malloc_conf = "xmalloc:true";

set to the empty string, no automatic dumps will occur; this is primarily useful for disabling the automatic final heap dump (which also disables leak reporting, if enabled). The default prefix is - jeprof.

- - "opt.prof_active" - + jeprof.

+ opt.prof_active (bool) r- [--enable-prof]

Profiling activated/deactivated. This is a secondary control mechanism that makes it possible to start the application with - profiling enabled (see the - "opt.prof" - option) but + profiling enabled (see the opt.prof option) but inactive, then toggle profiling at any time during program execution - with the - "prof.active" - mallctl. - This option is enabled by default.

- - "opt.prof_thread_active_init" - + with the prof.active mallctl. + This option is enabled by default.

+ opt.prof_thread_active_init (bool) r- [--enable-prof] -

Initial setting for - "thread.prof.active" - +

Initial setting for thread.prof.active in newly created threads. The initial setting for newly created threads - can also be changed during execution via the - "prof.thread_active_init" - - mallctl. This option is enabled by default.

- - "opt.lg_prof_sample" - + can also be changed during execution via the prof.thread_active_init + mallctl. This option is enabled by default.

+ opt.lg_prof_sample (size_t) r- [--enable-prof] @@ -710,10 +619,8 @@ malloc_conf = "xmalloc:true";

samples, as measured in bytes of allocation activity. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead. The default sample interval is 512 KiB (2^19 - B).

- - "opt.prof_accum" - + B).

+ opt.prof_accum (bool) r- [--enable-prof] @@ -722,10 +629,8 @@ malloc_conf = "xmalloc:true";

backtrace must be stored for the duration of execution. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest. This option is disabled - by default.

- - "opt.lg_prof_interval" - + by default.

+ opt.lg_prof_interval (ssize_t) r- [--enable-prof] @@ -736,27 +641,19 @@ malloc_conf = "xmalloc:true";

dumped to files named according to the pattern <prefix>.<pid>.<seq>.i<iseq>.heap, where <prefix> is controlled by the - - "opt.prof_prefix" - + opt.prof_prefix option. By default, interval-triggered profile dumping is disabled (encoded as -1). -

- - "opt.prof_gdump" - +

+ opt.prof_gdump (bool) r- [--enable-prof] -

Set the initial state of - "prof.gdump" - , which when +

Set the initial state of prof.gdump, which when enabled triggers a memory profile dump every time the total virtual memory exceeds the previous maximum. This option is disabled by - default.

- - "opt.prof_final" - + default.

+ opt.prof_final (bool) r- [--enable-prof] @@ -764,101 +661,73 @@ malloc_conf = "xmalloc:true";

atexit(3) function to dump final memory usage to a file named according to the pattern <prefix>.<pid>.<seq>.f.heap, - where <prefix> is controlled by the - "opt.prof_prefix" - - option. Note that atexit() may allocate + where <prefix> is controlled by the opt.prof_prefix + option. Note that atexit() may allocate memory during application initialization and then deadlock internally - when jemalloc in turn calls atexit(), so - this option is not univerally usable (though the application can - register its own atexit() function with + when jemalloc in turn calls atexit(), so + this option is not universally usable (though the application can + register its own atexit() function with equivalent functionality). This option is disabled by - default.

- - "opt.prof_leak" - + default.

+ opt.prof_leak (bool) r- [--enable-prof]

Leak reporting enabled/disabled. If enabled, use an atexit(3) function to report memory leaks detected by allocation sampling. See the - - "opt.prof" - option for + opt.prof option for information on analyzing heap profile output. This option is disabled - by default.

- - "thread.arena" - + by default.

+ thread.arena (unsigned) rw

Get or set the arena associated with the calling thread. If the specified arena was not initialized beforehand (see the - - "arenas.initialized" - + arenas.initialized mallctl), it will be automatically initialized as a side effect of - calling this interface.

- - "thread.allocated" - + calling this interface.

+ thread.allocated (uint64_t) r- [--enable-stats]

Get the total number of bytes ever allocated by the calling thread. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such - cases.

- - "thread.allocatedp" - + cases.

+ thread.allocatedp (uint64_t *) r- [--enable-stats]

Get a pointer to the the value that is returned by the - - "thread.allocated" - + thread.allocated mallctl. This is useful for avoiding the overhead of repeated - mallctl*() calls.

- - "thread.deallocated" - + mallctl*() calls.

+ thread.deallocated (uint64_t) r- [--enable-stats]

Get the total number of bytes ever deallocated by the calling thread. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such - cases.

- - "thread.deallocatedp" - + cases.

+ thread.deallocatedp (uint64_t *) r- [--enable-stats]

Get a pointer to the the value that is returned by the - - "thread.deallocated" - + thread.deallocated mallctl. This is useful for avoiding the overhead of repeated - mallctl*() calls.

- - "thread.tcache.enabled" - + mallctl*() calls.

+ thread.tcache.enabled (bool) rw [--enable-tcache]

Enable/disable calling thread's tcache. The tcache is implicitly flushed as a side effect of becoming - disabled (see - "thread.tcache.flush" - ). -

- - "thread.tcache.flush" - + disabled (see thread.tcache.flush). +

+ thread.tcache.flush (void) -- [--enable-tcache] @@ -870,10 +739,8 @@ malloc_conf = "xmalloc:true";

a thread exits. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case - the developer may find manual flushing useful.

- - "thread.prof.name" - + the developer may find manual flushing useful.

+ thread.prof.name (const char *) r- or -w @@ -886,25 +753,19 @@ malloc_conf = "xmalloc:true";

can cause asynchronous string deallocation. Furthermore, each invocation of this interface can only read or write; simultaneous read/write is not supported due to string lifetime limitations. The - name string must nil-terminated and comprised only of characters in the - sets recognized + name string must be nil-terminated and comprised only of characters in + the sets recognized by isgraph(3) and - isblank(3).

- - "thread.prof.active" - + isblank(3).

+ thread.prof.active (bool) rw [--enable-prof]

Control whether sampling is currently active for the - calling thread. This is an activation mechanism in addition to - "prof.active" - ; both must + calling thread. This is an activation mechanism in addition to prof.active; both must be active for the calling thread to sample. This flag is enabled by - default.

- - "tcache.create" - + default.

+ tcache.create (unsigned) r- [--enable-tcache] @@ -914,76 +775,81 @@ malloc_conf = "xmalloc:true";

automatically managed one that is used by default. Each explicit cache can be used by only one thread at a time; the application must assure that this constraint holds. -

- - "tcache.flush" - +

+ tcache.flush (unsigned) -w [--enable-tcache]

Flush the specified thread-specific cache (tcache). The - same considerations apply to this interface as to - "thread.tcache.flush" - , - except that the tcache will never be automatically be discarded. -

- - "tcache.destroy" - + same considerations apply to this interface as to thread.tcache.flush, + except that the tcache will never be automatically discarded. +

+ tcache.destroy (unsigned) -w [--enable-tcache]

Flush the specified thread-specific cache (tcache) and make the identifier available for use during a future tcache creation. -

- - "arena.<i>.purge" - +

+ arena.<i>.purge (void) -- -

Purge unused dirty pages for arena <i>, or for - all arenas if <i> equals - "arenas.narenas" - . -

- - "arena.<i>.dss" - +

Purge all unused dirty pages for arena <i>, or for + all arenas if <i> equals arenas.narenas. +

+ arena.<i>.decay + (void) + -- +

Trigger decay-based purging of unused dirty pages for + arena <i>, or for all arenas if <i> equals arenas.narenas. + The proportion of unused dirty pages to be purged depends on the current + time; see opt.decay_time for + details.

+ arena.<i>.reset + (void) + -- +

Discard all of the arena's extant allocations. This + interface can only be used with arenas created via arenas.extend. None + of the arena's discarded/cached allocations may accessed afterward. As + part of this requirement, all thread caches which were used to + allocate/deallocate in conjunction with the arena must be flushed + beforehand. This interface cannot be used if running inside Valgrind, + nor if the quarantine size is + non-zero.

+ arena.<i>.dss (const char *) rw

Set the precedence of dss allocation as related to mmap allocation for arena <i>, or for all arenas if <i> equals - - "arenas.narenas" - . See - - "opt.dss" - for supported - settings.

- - "arena.<i>.lg_dirty_mult" - + arenas.narenas. See + opt.dss for supported + settings.

+ arena.<i>.lg_dirty_mult (ssize_t) rw

Current per-arena minimum ratio (log base 2) of active to dirty pages for arena <i>. Each time this interface is set and the ratio is increased, pages are synchronously purged as necessary to - impose the new ratio. See - "opt.lg_dirty_mult" - - for additional information.

- - "arena.<i>.chunk_hooks" - + impose the new ratio. See opt.lg_dirty_mult + for additional information.

+ arena.<i>.decay_time + (ssize_t) + rw +

Current per-arena approximate time in seconds from the + creation of a set of unused dirty pages until an equivalent set of + unused dirty pages is purged and/or reused. Each time this interface is + set, all currently unused dirty pages are considered to have fully + decayed, which causes immediate purging of all unused dirty pages unless + the decay time is set to -1 (i.e. purging disabled). See opt.decay_time for + additional information.

+ arena.<i>.chunk_hooks (chunk_hooks_t) rw

Get or set the chunk management hook functions for arena <i>. The functions must be capable of operating on all extant chunks associated with arena <i>, usually by passing unknown chunks to the replaced functions. In practice, it is feasible to - control allocation for arenas created via - "arenas.extend" - such + control allocation for arenas created via arenas.extend such that all chunks originate from an application-supplied chunk allocator (by setting custom chunk hook functions just after arena creation), but the automatically created arenas may have already created chunks prior @@ -1009,7 +875,7 @@ typedef struct { operations can also be opted out of, but this is mainly intended to support platforms on which virtual memory mappings provided by the operating system kernel do not automatically coalesce and split, e.g. - Windows.

typedef void *(chunk_alloc_t)(void *chunk,
 size_t size,
 size_t alignment,
 bool *zero,
 bool *commit,
 unsigned arena_ind);
 

A chunk allocation function conforms to the + Windows.

typedef void *(chunk_alloc_t)(void *chunk,
 size_t size,
 size_t alignment,
 bool *zero,
 bool *commit,
 unsigned arena_ind);
 

A chunk allocation function conforms to the chunk_alloc_t type and upon success returns a pointer to size bytes of mapped memory on behalf of arena arena_ind such that the chunk's base address is a @@ -1030,10 +896,8 @@ typedef struct { in absolute terms as on a system that does not overcommit, or in implicit terms as on a system that overcommits and satisfies physical memory needs on demand via soft page faults. Note that replacing the - default chunk allocation function makes the arena's - "arena.<i>.dss" - - setting irrelevant.

typedef bool (chunk_dalloc_t)(void *chunk,
 size_t size,
 bool committed,
 unsigned arena_ind);
 

+ default chunk allocation function makes the arena's arena.<i>.dss + setting irrelevant.

typedef bool (chunk_dalloc_t)(void *chunk,
 size_t size,
 bool committed,
 unsigned arena_ind);
 

A chunk deallocation function conforms to the chunk_dalloc_t type and deallocates a chunk of given size with @@ -1042,7 +906,7 @@ typedef struct { success. If the function returns true, this indicates opt-out from deallocation; the virtual memory mapping associated with the chunk remains mapped, in the same commit state, and available for future use, - in which case it will be automatically retained for later reuse.

typedef bool (chunk_commit_t)(void *chunk,
 size_t size,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk commit function conforms to the + in which case it will be automatically retained for later reuse.

typedef bool (chunk_commit_t)(void *chunk,
 size_t size,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk commit function conforms to the chunk_commit_t type and commits zeroed physical memory to back pages within a chunk of given size at offset bytes, @@ -1052,7 +916,7 @@ typedef struct { does not overcommit, or in implicit terms as on a system that overcommits and satisfies physical memory needs on demand via soft page faults. If the function returns true, this indicates insufficient - physical memory to satisfy the request.

typedef bool (chunk_decommit_t)(void *chunk,
 size_t size,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk decommit function conforms to the + physical memory to satisfy the request.

typedef bool (chunk_decommit_t)(void *chunk,
 size_t size,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk decommit function conforms to the chunk_decommit_t type and decommits any physical memory that is backing pages within a chunk of given size at offset bytes, @@ -1061,14 +925,14 @@ typedef struct { case the pages will be committed via the chunk commit function before being reused. If the function returns true, this indicates opt-out from decommit; the memory remains committed and available for future use, in - which case it will be automatically retained for later reuse.

typedef bool (chunk_purge_t)(void *chunk,
 size_tsize,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk purge function conforms to the chunk_purge_t + which case it will be automatically retained for later reuse.

typedef bool (chunk_purge_t)(void *chunk,
 size_tsize,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk purge function conforms to the chunk_purge_t type and optionally discards physical pages within the virtual memory mapping associated with chunk of given size at offset bytes, extending for length on behalf of arena arena_ind, returning false if pages within the purged virtual memory range will be zero-filled the next time they are - accessed.

typedef bool (chunk_split_t)(void *chunk,
 size_t size,
 size_t size_a,
 size_t size_b,
 bool committed,
 unsigned arena_ind);
 

A chunk split function conforms to the chunk_split_t + accessed.

typedef bool (chunk_split_t)(void *chunk,
 size_t size,
 size_t size_a,
 size_t size_b,
 bool committed,
 unsigned arena_ind);
 

A chunk split function conforms to the chunk_split_t type and optionally splits chunk of given size into two adjacent chunks, the first of size_a bytes, and the second of @@ -1077,7 +941,7 @@ typedef struct { behalf of arena arena_ind, returning false upon success. If the function returns true, this indicates that the chunk remains unsplit and therefore should continue to be operated on as a - whole.

typedef bool (chunk_merge_t)(void *chunk_a,
 size_t size_a,
 void *chunk_b,
 size_t size_b,
 bool committed,
 unsigned arena_ind);
 

A chunk merge function conforms to the chunk_merge_t + whole.

typedef bool (chunk_merge_t)(void *chunk_a,
 size_t size_a,
 void *chunk_b,
 size_t size_b,
 bool committed,
 unsigned arena_ind);
 

A chunk merge function conforms to the chunk_merge_t type and optionally merges adjacent chunks, chunk_a of given size_a and chunk_b of given @@ -1086,150 +950,106 @@ typedef struct { behalf of arena arena_ind, returning false upon success. If the function returns true, this indicates that the chunks remain distinct mappings and therefore should continue to be operated on - independently.

- - "arenas.narenas" - + independently.

+ arenas.narenas (unsigned) r- -

Current limit on number of arenas.

- - "arenas.initialized" - +

Current limit on number of arenas.

+ arenas.initialized (bool *) r- -

An array of - "arenas.narenas" - +

An array of arenas.narenas booleans. Each boolean indicates whether the corresponding arena is - initialized.

- - "arenas.lg_dirty_mult" - + initialized.

+ arenas.lg_dirty_mult (ssize_t) rw

Current default per-arena minimum ratio (log base 2) of - active to dirty pages, used to initialize - "arena.<i>.lg_dirty_mult" - - during arena creation. See - "opt.lg_dirty_mult" - - for additional information.

- - "arenas.quantum" - + active to dirty pages, used to initialize arena.<i>.lg_dirty_mult + during arena creation. See opt.lg_dirty_mult + for additional information.

+ arenas.decay_time + (ssize_t) + rw +

Current default per-arena approximate time in seconds + from the creation of a set of unused dirty pages until an equivalent set + of unused dirty pages is purged and/or reused, used to initialize arena.<i>.decay_time + during arena creation. See opt.decay_time for + additional information.

+ arenas.quantum (size_t) r- -

Quantum size.

- - "arenas.page" - +

Quantum size.

+ arenas.page (size_t) r- -

Page size.

- - "arenas.tcache_max" - +

Page size.

+ arenas.tcache_max (size_t) r- [--enable-tcache] -

Maximum thread-cached size class.

- - "arenas.nbins" - +

Maximum thread-cached size class.

+ arenas.nbins (unsigned) r- -

Number of bin size classes.

- - "arenas.nhbins" - +

Number of bin size classes.

+ arenas.nhbins (unsigned) r- [--enable-tcache]

Total number of thread cache bin size - classes.

- - "arenas.bin.<i>.size" - + classes.

+ arenas.bin.<i>.size (size_t) r- -

Maximum size supported by size class.

- - "arenas.bin.<i>.nregs" - +

Maximum size supported by size class.

+ arenas.bin.<i>.nregs (uint32_t) r- -

Number of regions per page run.

- - "arenas.bin.<i>.run_size" - +

Number of regions per page run.

+ arenas.bin.<i>.run_size (size_t) r- -

Number of bytes per page run.

- - "arenas.nlruns" - +

Number of bytes per page run.

+ arenas.nlruns (unsigned) r- -

Total number of large size classes.

- - "arenas.lrun.<i>.size" - +

Total number of large size classes.

+ arenas.lrun.<i>.size (size_t) r-

Maximum size supported by this large size - class.

- - "arenas.nhchunks" - + class.

+ arenas.nhchunks (unsigned) r- -

Total number of huge size classes.

- - "arenas.hchunk.<i>.size" - +

Total number of huge size classes.

+ arenas.hchunk.<i>.size (size_t) r-

Maximum size supported by this huge size - class.

- - "arenas.extend" - + class.

+ arenas.extend (unsigned) r-

Extend the array of arenas by appending a new arena, - and returning the new arena index.

- - "prof.thread_active_init" - + and returning the new arena index.

+ prof.thread_active_init (bool) rw [--enable-prof] -

Control the initial setting for - "thread.prof.active" - - in newly created threads. See the - "opt.prof_thread_active_init" - - option for additional information.

- - "prof.active" - +

Control the initial setting for thread.prof.active + in newly created threads. See the opt.prof_thread_active_init + option for additional information.

+ prof.active (bool) rw [--enable-prof]

Control whether sampling is currently active. See the - - "opt.prof_active" - - option for additional information, as well as the interrelated - "thread.prof.active" - - mallctl.

- - "prof.dump" - + opt.prof_active + option for additional information, as well as the interrelated thread.prof.active + mallctl.

+ prof.dump (const char *) -w [--enable-prof] @@ -1237,13 +1057,9 @@ typedef struct { is specified, to a file according to the pattern <prefix>.<pid>.<seq>.m<mseq>.heap, where <prefix> is controlled by the - - "opt.prof_prefix" - - option.

- - "prof.gdump" - + opt.prof_prefix + option.

+ prof.gdump (bool) rw [--enable-prof] @@ -1251,103 +1067,67 @@ typedef struct { the total virtual memory exceeds the previous maximum. Profiles are dumped to files named according to the pattern <prefix>.<pid>.<seq>.u<useq>.heap, - where <prefix> is controlled by the - "opt.prof_prefix" - - option.

- - "prof.reset" - + where <prefix> is controlled by the opt.prof_prefix + option.

+ prof.reset (size_t) -w [--enable-prof]

Reset all memory profile statistics, and optionally - update the sample rate (see - "opt.lg_prof_sample" - - and - "prof.lg_sample" - ). -

- - "prof.lg_sample" - + update the sample rate (see opt.lg_prof_sample + and prof.lg_sample). +

+ prof.lg_sample (size_t) r- [--enable-prof] -

Get the current sample rate (see - "opt.lg_prof_sample" - ). -

- - "prof.interval" - +

Get the current sample rate (see opt.lg_prof_sample). +

+ prof.interval (uint64_t) r- [--enable-prof]

Average number of bytes allocated between - inverval-based profile dumps. See the - - "opt.lg_prof_interval" - - option for additional information.

- - "stats.cactive" - + interval-based profile dumps. See the + opt.lg_prof_interval + option for additional information.

+ stats.cactive (size_t *) r- [--enable-stats]

Pointer to a counter that contains an approximate count of the current number of bytes in active pages. The estimate may be high, but never low, because each arena rounds up when computing its - contribution to the counter. Note that the - "epoch" - mallctl has no bearing + contribution to the counter. Note that the epoch mallctl has no bearing on this counter. Furthermore, counter consistency is maintained via atomic operations, so it is necessary to use an atomic operation in order to guarantee a consistent read when dereferencing the pointer. -

- - "stats.allocated" - +

+ stats.allocated (size_t) r- [--enable-stats]

Total number of bytes allocated by the - application.

- - "stats.active" - + application.

+ stats.active (size_t) r- [--enable-stats]

Total number of bytes in active pages allocated by the application. This is a multiple of the page size, and greater than or - equal to - "stats.allocated" - . + equal to stats.allocated. This does not include - - "stats.arenas.<i>.pdirty" - , nor pages - entirely devoted to allocator metadata.

- - "stats.metadata" - + stats.arenas.<i>.pdirty, nor pages + entirely devoted to allocator metadata.

+ stats.metadata (size_t) r- [--enable-stats]

Total number of bytes dedicated to metadata, which comprise base allocations used for bootstrap-sensitive internal - allocator data structures, arena chunk headers (see - "stats.arenas.<i>.metadata.mapped" - ), - and internal allocations (see - "stats.arenas.<i>.metadata.allocated" - ).

- - "stats.resident" - + allocator data structures, arena chunk headers (see stats.arenas.<i>.metadata.mapped), + and internal allocations (see stats.arenas.<i>.metadata.allocated).

+ stats.resident (size_t) r- [--enable-stats] @@ -1357,436 +1137,393 @@ typedef struct { pages. This is a maximum rather than precise because pages may not actually be physically resident if they correspond to demand-zeroed virtual memory that has not yet been touched. This is a multiple of the - page size, and is larger than - "stats.active" - .

- - "stats.mapped" - + page size, and is larger than stats.active.

+ stats.mapped (size_t) r- [--enable-stats]

Total number of bytes in active chunks mapped by the allocator. This is a multiple of the chunk size, and is larger than - - "stats.active" - . + stats.active. This does not include inactive chunks, even those that contain unused dirty pages, which means that there is no strict ordering between this - and - "stats.resident" - .

- - "stats.arenas.<i>.dss" - - (const char *) - r- -

dss (sbrk(2)) allocation precedence as - related to mmap(2) allocation. See - "opt.dss" - for details. -

- - "stats.arenas.<i>.lg_dirty_mult" - - (ssize_t) - r- -

Minimum ratio (log base 2) of active to dirty pages. - See - "opt.lg_dirty_mult" - - for details.

- - "stats.arenas.<i>.nthreads" - - (unsigned) - r- -

Number of threads currently assigned to - arena.

- - "stats.arenas.<i>.pactive" - - (size_t) - r- -

Number of pages in active runs.

- - "stats.arenas.<i>.pdirty" - - (size_t) - r- -

Number of pages within unused runs that are potentially - dirty, and for which madvise(..., - MADV_DONTNEED) or - similar has not been called.

- - "stats.arenas.<i>.mapped" - + and stats.resident.

+ stats.retained (size_t) r- [--enable-stats] -

Number of mapped bytes.

- - "stats.arenas.<i>.metadata.mapped" - +

Total number of bytes in virtual memory mappings that + were retained rather than being returned to the operating system via + e.g. munmap(2). Retained virtual memory is + typically untouched, decommitted, or purged, so it has no strongly + associated physical memory (see chunk hooks for details). Retained + memory is excluded from mapped memory statistics, e.g. stats.mapped. +

+ stats.arenas.<i>.dss + (const char *) + r- +

dss (sbrk(2)) allocation precedence as + related to mmap(2) allocation. See opt.dss for details. +

+ stats.arenas.<i>.lg_dirty_mult + (ssize_t) + r- +

Minimum ratio (log base 2) of active to dirty pages. + See opt.lg_dirty_mult + for details.

+ stats.arenas.<i>.decay_time + (ssize_t) + r- +

Approximate time in seconds from the creation of a set + of unused dirty pages until an equivalent set of unused dirty pages is + purged and/or reused. See opt.decay_time + for details.

+ stats.arenas.<i>.nthreads + (unsigned) + r- +

Number of threads currently assigned to + arena.

+ stats.arenas.<i>.pactive + (size_t) + r- +

Number of pages in active runs.

+ stats.arenas.<i>.pdirty + (size_t) + r- +

Number of pages within unused runs that are potentially + dirty, and for which madvise... + MADV_DONTNEED or + similar has not been called.

+ stats.arenas.<i>.mapped + (size_t) + r- + [--enable-stats] +

Number of mapped bytes.

+ stats.arenas.<i>.retained + (size_t) + r- + [--enable-stats] +

Number of retained bytes. See stats.retained for + details.

+ stats.arenas.<i>.metadata.mapped (size_t) r- [--enable-stats]

Number of mapped bytes in arena chunk headers, which - track the states of the non-metadata pages.

- - "stats.arenas.<i>.metadata.allocated" - + track the states of the non-metadata pages.

+ stats.arenas.<i>.metadata.allocated (size_t) r- [--enable-stats]

Number of bytes dedicated to internal allocations. Internal allocations differ from application-originated allocations in that they are for internal use, and that they are omitted from heap - profiles. This statistic is reported separately from - "stats.metadata" - and - - "stats.arenas.<i>.metadata.mapped" - - because it overlaps with e.g. the - "stats.allocated" - and - - "stats.active" - + profiles. This statistic is reported separately from stats.metadata and + stats.arenas.<i>.metadata.mapped + because it overlaps with e.g. the stats.allocated and + stats.active statistics, whereas the other metadata statistics do - not.

- - "stats.arenas.<i>.npurge" - + not.

+ stats.arenas.<i>.npurge (uint64_t) r- [--enable-stats]

Number of dirty page purge sweeps performed. -

- - "stats.arenas.<i>.nmadvise" - +

+ stats.arenas.<i>.nmadvise (uint64_t) r- [--enable-stats] -

Number of madvise(..., - MADV_DONTNEED) or - similar calls made to purge dirty pages.

- - "stats.arenas.<i>.purged" - +

Number of madvise... + MADV_DONTNEED or + similar calls made to purge dirty pages.

+ stats.arenas.<i>.purged (uint64_t) r- [--enable-stats] -

Number of pages purged.

- - "stats.arenas.<i>.small.allocated" - +

Number of pages purged.

+ stats.arenas.<i>.small.allocated (size_t) r- [--enable-stats]

Number of bytes currently allocated by small objects. -

- - "stats.arenas.<i>.small.nmalloc" - +

+ stats.arenas.<i>.small.nmalloc (uint64_t) r- [--enable-stats]

Cumulative number of allocation requests served by - small bins.

- - "stats.arenas.<i>.small.ndalloc" - + small bins.

+ stats.arenas.<i>.small.ndalloc (uint64_t) r- [--enable-stats]

Cumulative number of small objects returned to bins. -

- - "stats.arenas.<i>.small.nrequests" - +

+ stats.arenas.<i>.small.nrequests (uint64_t) r- [--enable-stats]

Cumulative number of small allocation requests. -

- - "stats.arenas.<i>.large.allocated" - +

+ stats.arenas.<i>.large.allocated (size_t) r- [--enable-stats]

Number of bytes currently allocated by large objects. -

- - "stats.arenas.<i>.large.nmalloc" - +

+ stats.arenas.<i>.large.nmalloc (uint64_t) r- [--enable-stats]

Cumulative number of large allocation requests served - directly by the arena.

- - "stats.arenas.<i>.large.ndalloc" - + directly by the arena.

+ stats.arenas.<i>.large.ndalloc (uint64_t) r- [--enable-stats]

Cumulative number of large deallocation requests served - directly by the arena.

- - "stats.arenas.<i>.large.nrequests" - + directly by the arena.

+ stats.arenas.<i>.large.nrequests (uint64_t) r- [--enable-stats]

Cumulative number of large allocation requests. -

- - "stats.arenas.<i>.huge.allocated" - +

+ stats.arenas.<i>.huge.allocated (size_t) r- [--enable-stats]

Number of bytes currently allocated by huge objects. -

- - "stats.arenas.<i>.huge.nmalloc" - +

+ stats.arenas.<i>.huge.nmalloc (uint64_t) r- [--enable-stats]

Cumulative number of huge allocation requests served - directly by the arena.

- - "stats.arenas.<i>.huge.ndalloc" - + directly by the arena.

+ stats.arenas.<i>.huge.ndalloc (uint64_t) r- [--enable-stats]

Cumulative number of huge deallocation requests served - directly by the arena.

- - "stats.arenas.<i>.huge.nrequests" - + directly by the arena.

+ stats.arenas.<i>.huge.nrequests (uint64_t) r- [--enable-stats]

Cumulative number of huge allocation requests. -

- - "stats.arenas.<i>.bins.<j>.nmalloc" - +

+ stats.arenas.<i>.bins.<j>.nmalloc (uint64_t) r- [--enable-stats]

Cumulative number of allocations served by bin. -

- - "stats.arenas.<i>.bins.<j>.ndalloc" - +

+ stats.arenas.<i>.bins.<j>.ndalloc (uint64_t) r- [--enable-stats]

Cumulative number of allocations returned to bin. -

- - "stats.arenas.<i>.bins.<j>.nrequests" - +

+ stats.arenas.<i>.bins.<j>.nrequests (uint64_t) r- [--enable-stats]

Cumulative number of allocation - requests.

- - "stats.arenas.<i>.bins.<j>.curregs" - + requests.

+ stats.arenas.<i>.bins.<j>.curregs (size_t) r- [--enable-stats]

Current number of regions for this size - class.

- - "stats.arenas.<i>.bins.<j>.nfills" - + class.

+ stats.arenas.<i>.bins.<j>.nfills (uint64_t) r- [--enable-stats --enable-tcache] -

Cumulative number of tcache fills.

- - "stats.arenas.<i>.bins.<j>.nflushes" - +

Cumulative number of tcache fills.

+ stats.arenas.<i>.bins.<j>.nflushes (uint64_t) r- [--enable-stats --enable-tcache] -

Cumulative number of tcache flushes.

- - "stats.arenas.<i>.bins.<j>.nruns" - +

Cumulative number of tcache flushes.

+ stats.arenas.<i>.bins.<j>.nruns (uint64_t) r- [--enable-stats] -

Cumulative number of runs created.

- - "stats.arenas.<i>.bins.<j>.nreruns" - +

Cumulative number of runs created.

+ stats.arenas.<i>.bins.<j>.nreruns (uint64_t) r- [--enable-stats]

Cumulative number of times the current run from which - to allocate changed.

- - "stats.arenas.<i>.bins.<j>.curruns" - + to allocate changed.

+ stats.arenas.<i>.bins.<j>.curruns (size_t) r- [--enable-stats] -

Current number of runs.

- - "stats.arenas.<i>.lruns.<j>.nmalloc" - +

Current number of runs.

+ stats.arenas.<i>.lruns.<j>.nmalloc (uint64_t) r- [--enable-stats]

Cumulative number of allocation requests for this size - class served directly by the arena.

- - "stats.arenas.<i>.lruns.<j>.ndalloc" - + class served directly by the arena.

+ stats.arenas.<i>.lruns.<j>.ndalloc (uint64_t) r- [--enable-stats]

Cumulative number of deallocation requests for this - size class served directly by the arena.

- - "stats.arenas.<i>.lruns.<j>.nrequests" - + size class served directly by the arena.

+ stats.arenas.<i>.lruns.<j>.nrequests (uint64_t) r- [--enable-stats]

Cumulative number of allocation requests for this size - class.

- - "stats.arenas.<i>.lruns.<j>.curruns" - + class.

+ stats.arenas.<i>.lruns.<j>.curruns (size_t) r- [--enable-stats]

Current number of runs for this size class. -

- - "stats.arenas.<i>.hchunks.<j>.nmalloc" - +

+ stats.arenas.<i>.hchunks.<j>.nmalloc (uint64_t) r- [--enable-stats]

Cumulative number of allocation requests for this size - class served directly by the arena.

- - "stats.arenas.<i>.hchunks.<j>.ndalloc" - + class served directly by the arena.

+ stats.arenas.<i>.hchunks.<j>.ndalloc (uint64_t) r- [--enable-stats]

Cumulative number of deallocation requests for this - size class served directly by the arena.

- - "stats.arenas.<i>.hchunks.<j>.nrequests" - + size class served directly by the arena.

+ stats.arenas.<i>.hchunks.<j>.nrequests (uint64_t) r- [--enable-stats]

Cumulative number of allocation requests for this size - class.

- - "stats.arenas.<i>.hchunks.<j>.curhchunks" - + class.

+ stats.arenas.<i>.hchunks.<j>.curhchunks (size_t) r- [--enable-stats]

Current number of huge allocations for this size class. -

DEBUGGING MALLOC PROBLEMS

When debugging, it is a good idea to configure/build jemalloc with +

HEAP PROFILE FORMAT

Although the heap profiling functionality was originally designed to + be compatible with the + pprof command that is developed as part of the gperftools + package, the addition of per thread heap profiling functionality + required a different heap profile format. The jeprof + command is derived from pprof, with enhancements to + support the heap profile format described here.

In the following hypothetical heap profile, [...] + indicates elision for the sake of compactness.

+heap_v2/524288
+  t*: 28106: 56637512 [0: 0]
+  [...]
+  t3: 352: 16777344 [0: 0]
+  [...]
+  t99: 17754: 29341640 [0: 0]
+  [...]
+@ 0x5f86da8 0x5f5a1dc [...] 0x29e4d4e 0xa200316 0xabb2988 [...]
+  t*: 13: 6688 [0: 0]
+  t3: 12: 6496 [0: ]
+  t99: 1: 192 [0: 0]
+[...]
+
+MAPPED_LIBRARIES:
+[...]

The following matches the above heap profile, but most +tokens are replaced with <description> to indicate +descriptions of the corresponding fields.

+<heap_profile_format_version>/<mean_sample_interval>
+  <aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  [...]
+  <thread_3_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  [...]
+  <thread_99_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  [...]
+@ <top_frame> <frame> [...] <frame> <frame> <frame> [...]
+  <backtrace_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  <backtrace_thread_3>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  <backtrace_thread_99>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+[...]
+
+MAPPED_LIBRARIES:
+</proc/<pid>/maps>

DEBUGGING MALLOC PROBLEMS

When debugging, it is a good idea to configure/build jemalloc with the --enable-debug and --enable-fill options, and recompile the program with suitable options and symbols for debugger support. When so configured, jemalloc incorporates a wide variety of run-time assertions that catch application errors such as double-free, - write-after-free, etc.

Programs often accidentally depend on “uninitialized” + write-after-free, etc.

Programs often accidentally depend on “uninitialized†memory actually being filled with zero bytes. Junk filling - (see the - "opt.junk" - + (see the opt.junk option) tends to expose such bugs in the form of obviously incorrect results and/or coredumps. Conversely, zero - filling (see the - "opt.zero" - option) eliminates + filling (see the opt.zero option) eliminates the symptoms of such bugs. Between these two options, it is usually possible to quickly detect, diagnose, and eliminate such bugs.

This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive. However, jemalloc does integrate with the most excellent Valgrind tool if the - --enable-valgrind configuration option is enabled.

DIAGNOSTIC MESSAGES

If any of the memory allocation/deallocation functions detect an + --enable-valgrind configuration option is enabled.

DIAGNOSTIC MESSAGES

If any of the memory allocation/deallocation functions detect an error or warning condition, a message will be printed to file descriptor STDERR_FILENO. Errors will result in the process - dumping core. If the - "opt.abort" - option is set, most + dumping core. If the opt.abort option is set, most warnings are treated as errors.

The malloc_message variable allows the programmer to override the function which emits the text strings forming the errors and warnings if for some reason the STDERR_FILENO file descriptor is not suitable for this. - malloc_message() takes the + malloc_message() takes the cbopaque pointer argument that is NULL unless overridden by the arguments in a call to - malloc_stats_print(), followed by a string + malloc_stats_print(), followed by a string pointer. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock.

All messages are prefixed by - “<jemalloc>: ”.

RETURN VALUES

Standard API

The malloc() and - calloc() functions return a pointer to the + “<jemalloc>: â€.

RETURN VALUES

Non-standard API

The mallocx() and - rallocx() functions return a pointer to +

The free() function returns no + value.

Non-standard API

The mallocx() and + rallocx() functions return a pointer to the allocated memory if successful; otherwise a NULL pointer is returned to indicate insufficient contiguous memory was - available to service the allocation request.

The xallocx() function returns the + available to service the allocation request.

The xallocx() function returns the real size of the resulting resized allocation pointed to by ptr, which is a value less than size if the allocation could not be adequately - grown in place.

The sallocx() function returns the + grown in place.

The sallocx() function returns the real size of the allocation pointed to by ptr. -

The nallocx() returns the real size +

The nallocx() returns the real size that would result from a successful equivalent - mallocx() function call, or zero if - insufficient memory is available to perform the size computation.

The mallctl(), - mallctlnametomib(), and - mallctlbymib() functions return 0 on + mallocx() function call, or zero if + insufficient memory is available to perform the size computation.

The mallctl(), + mallctlnametomib(), and + mallctlbymib() functions return 0 on success; otherwise they return an error value. The functions will fail if:

EINVAL

newp is not @@ -1798,28 +1535,28 @@ typedef struct { value.

EPERM

Attempt to read or write void value, or attempt to write read-only value.

EAGAIN

A memory allocation failure occurred.

EFAULT

An interface with side effects failed in some way - not directly related to mallctl*() + not directly related to mallctl*() read/write processing.

-

The malloc_usable_size() function +

The malloc_usable_size() function returns the usable size of the allocation pointed to by - ptr.

ENVIRONMENT

The following environment variable affects the execution of the + ptr.

EXAMPLES

To dump core whenever a problem occurs: +

SEE ALSO

madvise(2), +malloc_conf = "lg_chunk:24";

STANDARDS

The malloc(), - calloc(), - realloc(), and - free() functions conform to ISO/IEC - 9899:1990 (“ISO C90”).

The posix_memalign() function conforms - to IEEE Std 1003.1-2001 (“POSIX.1”).

+ getpagesize(3)

\ No newline at end of file diff --git a/deps/jemalloc/doc/jemalloc.xml.in b/deps/jemalloc/doc/jemalloc.xml.in index 8fc774b18..d9c83452d 100644 --- a/deps/jemalloc/doc/jemalloc.xml.in +++ b/deps/jemalloc/doc/jemalloc.xml.in @@ -52,7 +52,7 @@ LIBRARY This manual describes jemalloc @jemalloc_version@. More information can be found at the jemalloc website. + url="http://jemalloc.net/">jemalloc website. SYNOPSIS @@ -180,20 +180,20 @@ Standard API - The malloc function allocates + The malloc() function allocates size bytes of uninitialized memory. The allocated space is suitably aligned (after possible pointer coercion) for storage of any type of object. - The calloc function allocates + The calloc() function allocates space for number objects, each size bytes in length. The result is identical to - calling malloc with an argument of + calling malloc() with an argument of number * size, with the exception that the allocated memory is explicitly initialized to zero bytes. - The posix_memalign function + The posix_memalign() function allocates size bytes of memory such that the allocation's base address is a multiple of alignment, and returns the allocation in the value @@ -201,7 +201,7 @@ alignment must be a power of 2 at least as large as sizeof(void *). - The aligned_alloc function + The aligned_alloc() function allocates size bytes of memory such that the allocation's base address is a multiple of alignment. The requested @@ -209,7 +209,7 @@ undefined if size is not an integral multiple of alignment. - The realloc function changes the + The realloc() function changes the size of the previously allocated memory referenced by ptr to size bytes. The contents of the memory are unchanged up to the lesser of the new and old @@ -217,26 +217,26 @@ portion of the memory are undefined. Upon success, the memory referenced by ptr is freed and a pointer to the newly allocated memory is returned. Note that - realloc may move the memory allocation, + realloc() may move the memory allocation, resulting in a different return value than ptr. If ptr is NULL, the - realloc function behaves identically to - malloc for the specified size. + realloc() function behaves identically to + malloc() for the specified size. - The free function causes the + The free() function causes the allocated memory referenced by ptr to be made available for future allocations. If ptr is NULL, no action occurs. Non-standard API - The mallocx, - rallocx, - xallocx, - sallocx, - dallocx, - sdallocx, and - nallocx functions all have a + The mallocx(), + rallocx(), + xallocx(), + sallocx(), + dallocx(), + sdallocx(), and + nallocx() functions all have a flags argument that can be used to specify options. The functions only check the options that are contextually relevant. Use bitwise or (|) operations to @@ -307,21 +307,19 @@ - The mallocx function allocates at + The mallocx() function allocates at least size bytes of memory, and returns a pointer to the base address of the allocation. Behavior is undefined if - size is 0, or if request size - overflows due to size class and/or alignment constraints. + size is 0. - The rallocx function resizes the + The rallocx() function resizes the allocation at ptr to be at least size bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location. Behavior is undefined if - size is 0, or if request size - overflows due to size class and/or alignment constraints. + size is 0. - The xallocx function resizes the + The xallocx() function resizes the allocation at ptr in place to be at least size bytes, and returns the real size of the allocation. If extra is non-zero, an attempt is @@ -334,32 +332,32 @@ language="C">(size + extra > SIZE_T_MAX)
. - The sallocx function returns the + The sallocx() function returns the real size of the allocation at ptr. - The dallocx function causes the + The dallocx() function causes the memory referenced by ptr to be made available for future allocations. - The sdallocx function is an - extension of dallocx with a + The sdallocx() function is an + extension of dallocx() with a size parameter to allow the caller to pass in the allocation size as an optimization. The minimum valid input size is the original requested size of the allocation, and the maximum valid input size is the corresponding value returned by - nallocx or - sallocx. + nallocx() or + sallocx(). - The nallocx function allocates no + The nallocx() function allocates no memory, but it performs the same size computation as the - mallocx function, and returns the real + mallocx() function, and returns the real size of the allocation that would result from the equivalent - mallocx function call. Behavior is - undefined if size is 0, or if - request size overflows due to size class and/or alignment - constraints. + mallocx() function call, or + 0 if the inputs exceed the maximum supported size + class and/or alignment. Behavior is undefined if + size is 0. - The mallctl function provides a + The mallctl() function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions. The period-separated name argument specifies a @@ -374,12 +372,12 @@ newlen; otherwise pass NULL and 0. - The mallctlnametomib function + The mallctlnametomib() function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name - to a “Management Information Base” (MIB) that can be passed - repeatedly to mallctlbymib. Upon - successful return from mallctlnametomib, + to a Management Information Base (MIB) that can be passed + repeatedly to mallctlbymib(). Upon + successful return from mallctlnametomib(), mibp contains an array of *miblenp integers, where *miblenp is the lesser of the number of components @@ -408,43 +406,44 @@ for (i = 0; i < nbins; i++) { mib[2] = i; len = sizeof(bin_size); - mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0); + mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0); /* Do something with bin_size... */ }]]> - The malloc_stats_print function - writes human-readable summary statistics via the - write_cb callback function pointer and - cbopaque data passed to - write_cb, or - malloc_message if - write_cb is NULL. This - function can be called repeatedly. General information that never - changes during execution can be omitted by specifying "g" as a character + The malloc_stats_print() function writes + summary statistics via the write_cb callback + function pointer and cbopaque data passed to + write_cb, or malloc_message() + if write_cb is NULL. The + statistics are presented in human-readable form unless J is + specified as a character within the opts string, in + which case the statistics are presented in JSON format. This function can be + called repeatedly. General information that never changes during + execution can be omitted by specifying g as a character within the opts string. Note that - malloc_message uses the - mallctl* functions internally, so - inconsistent statistics can be reported if multiple threads use these - functions simultaneously. If is - specified during configuration, “m” and “a” can - be specified to omit merged arena and per arena statistics, respectively; - “b”, “l”, and “h” can be specified to - omit per size class statistics for bins, large objects, and huge objects, - respectively. Unrecognized characters are silently ignored. Note that - thread caching may prevent some statistics from being completely up to - date, since extra locking would be required to merge counters that track - thread cache operations. - + malloc_message() uses the + mallctl*() functions internally, so inconsistent + statistics can be reported if multiple threads use these functions + simultaneously. If is specified during + configuration, m and a can be specified to + omit merged arena and per arena statistics, respectively; + b, l, and h can be specified + to omit per size class statistics for bins, large objects, and huge + objects, respectively. Unrecognized characters are silently ignored. + Note that thread caching may prevent some statistics from being completely + up to date, since extra locking would be required to merge counters that + track thread cache operations. - The malloc_usable_size function + The malloc_usable_size() function returns the usable size of the allocation pointed to by ptr. The return value may be larger than the size that was requested during allocation. The - malloc_usable_size function is not a - mechanism for in-place realloc; rather + malloc_usable_size() function is not a + mechanism for in-place realloc(); rather it is provided solely as a tool for introspection purposes. Any discrepancy between the requested allocation size and the size reported - by malloc_usable_size should not be + by malloc_usable_size() should not be depended on, since such behavior is entirely implementation-dependent. @@ -455,19 +454,20 @@ for (i = 0; i < nbins; i++) { routines, the allocator initializes its internals based in part on various options that can be specified at compile- or run-time. - The string pointed to by the global variable - malloc_conf, the “name” of the file - referenced by the symbolic link named /etc/malloc.conf, and the value of the + The string specified via , the + string pointed to by the global variable malloc_conf, the + name of the file referenced by the symbolic link named + /etc/malloc.conf, and the value of the environment variable MALLOC_CONF, will be interpreted, in that order, from left to right as options. Note that malloc_conf may be read before - main is entered, so the declaration of + main() is entered, so the declaration of malloc_conf should specify an initializer that contains - the final value to be read by jemalloc. malloc_conf is - a compile-time setting, whereas /etc/malloc.conf and MALLOC_CONF - can be safely set any time prior to program invocation. + the final value to be read by jemalloc. + and malloc_conf are compile-time mechanisms, whereas + /etc/malloc.conf and + MALLOC_CONF can be safely set any time prior to program + invocation. An options string is a comma-separated list of option:value pairs. There is one key corresponding to each - Memory is conceptually broken into equal-sized chunks, where the - chunk size is a power of two that is greater than the page size. Chunks - are always aligned to multiples of the chunk size. This alignment makes it - possible to find metadata for user objects very quickly. - - User objects are broken into three categories according to size: - small, large, and huge. Small and large objects are managed entirely by - arenas; huge objects are additionally aggregated in a single data structure - that is shared by all threads. Huge objects are typically used by - applications infrequently enough that this single data structure is not a - scalability issue. - - Each chunk that is managed by an arena tracks its contents as runs of + Memory is conceptually broken into equal-sized chunks, where the chunk + size is a power of two that is greater than the page size. Chunks are + always aligned to multiples of the chunk size. This alignment makes it + possible to find metadata for user objects very quickly. User objects are + broken into three categories according to size: small, large, and huge. + Multiple small and large objects can reside within a single chunk, whereas + huge objects each have one or more chunks backing them. Each chunk that + contains small and/or large objects tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one - large object). The combination of chunk alignment and chunk page maps - makes it possible to determine all metadata regarding small and large - allocations in constant time. + large object). The combination of chunk alignment and chunk page maps makes + it possible to determine all metadata regarding small and large allocations + in constant time. Small objects are managed in groups by page runs. Each run maintains a bitmap to track which regions are in use. Allocation requests that are no @@ -546,8 +541,8 @@ for (i = 0; i < nbins; i++) { are smaller than four times the page size, large size classes are smaller than the chunk size (see the opt.lg_chunk option), and - huge size classes extend from the chunk size up to one size class less than - the full address space size. + huge size classes extend from the chunk size up to the largest size class + that does not exceed PTRDIFF_MAX. Allocations are packed tightly together, which can be an issue for multi-threaded applications. If you need to assure that allocations do not @@ -555,14 +550,14 @@ for (i = 0; i < nbins; i++) { nearest multiple of the cacheline size, or specify cacheline alignment when allocating. - The realloc, - rallocx, and - xallocx functions may resize allocations + The realloc(), + rallocx(), and + xallocx() functions may resize allocations without moving them under limited circumstances. Unlike the - *allocx API, the standard API does not + *allocx() API, the standard API does not officially round up the usable size of an allocation to the nearest size class, so technically it is necessary to call - realloc to grow e.g. a 9-byte allocation to + realloc() to grow e.g. a 9-byte allocation to 16 bytes, or shrink a 16-byte allocation to 9 bytes. Growth and shrinkage trivially succeeds in place as long as the pre-size and post-size both round up to the same size class. No other API guarantees are made regarding @@ -665,7 +660,7 @@ for (i = 0; i < nbins; i++) { [1280 KiB, 1536 KiB, 1792 KiB] - Huge + Huge 256 KiB [2 MiB] @@ -693,6 +688,14 @@ for (i = 0; i < nbins; i++) { ... ... + + 512 PiB + [2560 PiB, 3 EiB, 3584 PiB, 4 EiB] + + + 1 EiB + [5 EiB, 6 EiB, 7 EiB] + @@ -700,7 +703,7 @@ for (i = 0; i < nbins; i++) { MALLCTL NAMESPACE The following names are defined in the namespace accessible via the - mallctl* functions. Value types are + mallctl*() functions. Value types are specified in parentheses, their readable/writable statuses are encoded as rw, r-, -w, or --, and required build configuration flags follow, if @@ -731,7 +734,7 @@ for (i = 0; i < nbins; i++) { rw If a value is passed in, refresh the data from which - the mallctl* functions report values, + the mallctl*() functions report values, and increment the epoch. Return the current epoch. This is useful for detecting whether another thread caused a refresh. @@ -776,6 +779,17 @@ for (i = 0; i < nbins; i++) { during build configuration. + + + config.malloc_conf + (const char *) + r- + + Embedded configure-time-specified run-time options + string, empty unless was specified + during build configuration. + + config.munmap @@ -904,12 +918,12 @@ for (i = 0; i < nbins; i++) { settings are supported if sbrk 2 is supported by the operating - system: “disabled”, “primary”, and - “secondary”; otherwise only “disabled” is - supported. The default is “secondary” if + system: disabled, primary, and + secondary; otherwise only disabled is + supported. The default is secondary if sbrk 2 is supported by the operating - system; “disabled” otherwise. + system; disabled otherwise. @@ -929,7 +943,7 @@ for (i = 0; i < nbins; i++) { opt.narenas - (size_t) + (unsigned) r- Maximum number of arenas to use for automatic @@ -937,6 +951,20 @@ for (i = 0; i < nbins; i++) { number of CPUs, or one if there is a single CPU. + + + opt.purge + (const char *) + r- + + Purge mode is “ratio” (default) or + “decay”. See opt.lg_dirty_mult + for details of the ratio mode. See opt.decay_time for + details of the decay mode. + + opt.lg_dirty_mult @@ -959,6 +987,26 @@ for (i = 0; i < nbins; i++) { for related dynamic control options. + + + opt.decay_time + (ssize_t) + r- + + Approximate time in seconds from the creation of a set + of unused dirty pages until an equivalent set of unused dirty pages is + purged and/or reused. The pages are incrementally purged according to a + sigmoidal decay curve that starts and ends with zero purge rate. A + decay time of 0 causes all unused dirty pages to be purged immediately + upon creation. A decay time of -1 disables purging. The default decay + time is 10 seconds. See arenas.decay_time + and arena.<i>.decay_time + for related dynamic control options. + + + opt.stats_print @@ -966,19 +1014,19 @@ for (i = 0; i < nbins; i++) { r- Enable/disable statistics printing at exit. If - enabled, the malloc_stats_print + enabled, the malloc_stats_print() function is called at program exit via an atexit 3 function. If is specified during configuration, this has the potential to cause deadlock for a multi-threaded process that exits while one or more threads are executing in the memory allocation - functions. Furthermore, atexit may + functions. Furthermore, atexit() may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls - atexit, so this option is not - univerally usable (though the application can register its own - atexit function with equivalent + atexit(), so this option is not + universally usable (though the application can register its own + atexit() function with equivalent functionality). Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development. This option is disabled by default. @@ -991,15 +1039,16 @@ for (i = 0; i < nbins; i++) { r- [] - Junk filling. If set to "alloc", each byte of - uninitialized allocated memory will be initialized to - 0xa5. If set to "free", all deallocated memory will - be initialized to 0x5a. If set to "true", both - allocated and deallocated memory will be initialized, and if set to - "false", junk filling be disabled entirely. This is intended for - debugging and will impact performance negatively. This option is - "false" by default unless is specified - during configuration, in which case it is "true" by default unless + Junk filling. If set to alloc, each byte + of uninitialized allocated memory will be initialized to + 0xa5. If set to free, all deallocated + memory will be initialized to 0x5a. If set to + true, both allocated and deallocated memory will be + initialized, and if set to false, junk filling be + disabled entirely. This is intended for debugging and will impact + performance negatively. This option is false by default + unless is specified during + configuration, in which case it is true by default unless running inside Valgrind. @@ -1054,8 +1103,8 @@ for (i = 0; i < nbins; i++) { Zero filling enabled/disabled. If enabled, each byte of uninitialized allocated memory will be initialized to 0. Note that this initialization only happens once for each byte, so - realloc and - rallocx calls do not zero memory that + realloc() and + rallocx() calls do not zero memory that was previously allocated. This is intended for debugging and will impact performance negatively. This option is disabled by default. @@ -1150,7 +1199,8 @@ malloc_conf = "xmalloc:true";]]> the jeprof command, which is based on the pprof that is developed as part of the gperftools - package. + package. See HEAP PROFILE + FORMAT for heap profile format documentation. @@ -1277,11 +1327,11 @@ malloc_conf = "xmalloc:true";]]> <prefix>.<pid>.<seq>.f.heap, where <prefix> is controlled by the opt.prof_prefix - option. Note that atexit may allocate + option. Note that atexit() may allocate memory during application initialization and then deadlock internally - when jemalloc in turn calls atexit, so - this option is not univerally usable (though the application can - register its own atexit function with + when jemalloc in turn calls atexit(), so + this option is not universally usable (though the application can + register its own atexit() function with equivalent functionality). This option is disabled by default. @@ -1340,7 +1390,7 @@ malloc_conf = "xmalloc:true";]]> thread.allocated mallctl. This is useful for avoiding the overhead of repeated - mallctl* calls. + mallctl*() calls. @@ -1367,7 +1417,7 @@ malloc_conf = "xmalloc:true";]]> thread.deallocated mallctl. This is useful for avoiding the overhead of repeated - mallctl* calls. + mallctl*() calls. @@ -1418,8 +1468,8 @@ malloc_conf = "xmalloc:true";]]> can cause asynchronous string deallocation. Furthermore, each invocation of this interface can only read or write; simultaneous read/write is not supported due to string lifetime limitations. The - name string must nil-terminated and comprised only of characters in the - sets recognized + name string must be nil-terminated and comprised only of characters in + the sets recognized by isgraph 3 and isblank @@ -1467,7 +1517,7 @@ malloc_conf = "xmalloc:true";]]> Flush the specified thread-specific cache (tcache). The same considerations apply to this interface as to thread.tcache.flush, - except that the tcache will never be automatically be discarded. + except that the tcache will never be automatically discarded. @@ -1489,12 +1539,44 @@ malloc_conf = "xmalloc:true";]]> (void) -- - Purge unused dirty pages for arena <i>, or for + Purge all unused dirty pages for arena <i>, or for all arenas if <i> equals arenas.narenas. + + + arena.<i>.decay + (void) + -- + + Trigger decay-based purging of unused dirty pages for + arena <i>, or for all arenas if <i> equals arenas.narenas. + The proportion of unused dirty pages to be purged depends on the current + time; see opt.decay_time for + details. + + + + + arena.<i>.reset + (void) + -- + + Discard all of the arena's extant allocations. This + interface can only be used with arenas created via arenas.extend. None + of the arena's discarded/cached allocations may accessed afterward. As + part of this requirement, all thread caches which were used to + allocate/deallocate in conjunction with the arena must be flushed + beforehand. This interface cannot be used if running inside Valgrind, + nor if the quarantine size is + non-zero. + + arena.<i>.dss @@ -1523,6 +1605,22 @@ malloc_conf = "xmalloc:true";]]> for additional information. + + + arena.<i>.decay_time + (ssize_t) + rw + + Current per-arena approximate time in seconds from the + creation of a set of unused dirty pages until an equivalent set of + unused dirty pages is purged and/or reused. Each time this interface is + set, all currently unused dirty pages are considered to have fully + decayed, which causes immediate purging of all unused dirty pages unless + the decay time is set to -1 (i.e. purging disabled). See opt.decay_time for + additional information. + + arena.<i>.chunk_hooks @@ -1757,6 +1855,21 @@ typedef struct { for additional information. + + + arenas.decay_time + (ssize_t) + rw + + Current default per-arena approximate time in seconds + from the creation of a set of unused dirty pages until an equivalent set + of unused dirty pages is purged and/or reused, used to initialize arena.<i>.decay_time + during arena creation. See opt.decay_time for + additional information. + + arenas.quantum @@ -1976,7 +2089,7 @@ typedef struct { [] Average number of bytes allocated between - inverval-based profile dumps. See the + interval-based profile dumps. See the opt.lg_prof_interval option for additional information. @@ -2075,6 +2188,25 @@ typedef struct { linkend="stats.resident">stats.resident. + + + stats.retained + (size_t) + r- + [] + + Total number of bytes in virtual memory mappings that + were retained rather than being returned to the operating system via + e.g. munmap + 2. Retained virtual memory is + typically untouched, decommitted, or purged, so it has no strongly + associated physical memory (see chunk hooks for details). Retained + memory is excluded from mapped memory statistics, e.g. stats.mapped. + + + stats.arenas.<i>.dss @@ -2101,6 +2233,19 @@ typedef struct { for details. + + + stats.arenas.<i>.decay_time + (ssize_t) + r- + + Approximate time in seconds from the creation of a set + of unused dirty pages until an equivalent set of unused dirty pages is + purged and/or reused. See opt.decay_time + for details. + + stats.arenas.<i>.nthreads @@ -2142,6 +2287,18 @@ typedef struct { Number of mapped bytes. + + + stats.arenas.<i>.retained + (size_t) + r- + [] + + Number of retained bytes. See stats.retained for + details. + + stats.arenas.<i>.metadata.mapped @@ -2523,6 +2680,53 @@ typedef struct { + + HEAP PROFILE FORMAT + Although the heap profiling functionality was originally designed to + be compatible with the + pprof command that is developed as part of the gperftools + package, the addition of per thread heap profiling functionality + required a different heap profile format. The jeprof + command is derived from pprof, with enhancements to + support the heap profile format described here. + + In the following hypothetical heap profile, [...] + indicates elision for the sake of compactness. The following matches the above heap profile, but most +tokens are replaced with <description> to indicate +descriptions of the corresponding fields. / + : : [: ] + [...] + : : [: ] + [...] + : : [: ] + [...] +@ [...] [...] + : : [: ] + : : [: ] + : : [: ] +[...] + +MAPPED_LIBRARIES: +/maps>]]> + + DEBUGGING MALLOC PROBLEMS When debugging, it is a good idea to configure/build jemalloc with @@ -2532,7 +2736,7 @@ typedef struct { of run-time assertions that catch application errors such as double-free, write-after-free, etc. - Programs often accidentally depend on “uninitialized” + Programs often accidentally depend on uninitialized memory actually being filled with zero bytes. Junk filling (see the opt.junk option) tends to expose such bugs in the form of obviously incorrect @@ -2561,29 +2765,29 @@ typedef struct { to override the function which emits the text strings forming the errors and warnings if for some reason the STDERR_FILENO file descriptor is not suitable for this. - malloc_message takes the + malloc_message() takes the cbopaque pointer argument that is NULL unless overridden by the arguments in a call to - malloc_stats_print, followed by a string + malloc_stats_print(), followed by a string pointer. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock. All messages are prefixed by - “<jemalloc>: ”. + <jemalloc>: . RETURN VALUES Standard API - The malloc and - calloc functions return a pointer to the + The malloc() and + calloc() functions return a pointer to the allocated memory if successful; otherwise a NULL pointer is returned and errno is set to ENOMEM. - The posix_memalign function + The posix_memalign() function returns the value 0 if successful; otherwise it returns an error value. - The posix_memalign function will fail + The posix_memalign() function will fail if: @@ -2602,11 +2806,11 @@ typedef struct { - The aligned_alloc function returns + The aligned_alloc() function returns a pointer to the allocated memory if successful; otherwise a NULL pointer is returned and errno is set. The - aligned_alloc function will fail if: + aligned_alloc() function will fail if: EINVAL @@ -2623,44 +2827,44 @@ typedef struct { - The realloc function returns a + The realloc() function returns a pointer, possibly identical to ptr, to the allocated memory if successful; otherwise a NULL pointer is returned, and errno is set to ENOMEM if the error was the result of an - allocation failure. The realloc + allocation failure. The realloc() function always leaves the original buffer intact when an error occurs. - The free function returns no + The free() function returns no value. Non-standard API - The mallocx and - rallocx functions return a pointer to + The mallocx() and + rallocx() functions return a pointer to the allocated memory if successful; otherwise a NULL pointer is returned to indicate insufficient contiguous memory was available to service the allocation request. - The xallocx function returns the + The xallocx() function returns the real size of the resulting resized allocation pointed to by ptr, which is a value less than size if the allocation could not be adequately grown in place. - The sallocx function returns the + The sallocx() function returns the real size of the allocation pointed to by ptr. - The nallocx returns the real size + The nallocx() returns the real size that would result from a successful equivalent - mallocx function call, or zero if + mallocx() function call, or zero if insufficient memory is available to perform the size computation. - The mallctl, - mallctlnametomib, and - mallctlbymib functions return 0 on + The mallctl(), + mallctlnametomib(), and + mallctlbymib() functions return 0 on success; otherwise they return an error value. The functions will fail if: @@ -2696,13 +2900,13 @@ typedef struct { EFAULT An interface with side effects failed in some way - not directly related to mallctl* + not directly related to mallctl*() read/write processing. - The malloc_usable_size function + The malloc_usable_size() function returns the usable size of the allocation pointed to by ptr. @@ -2750,13 +2954,13 @@ malloc_conf = "lg_chunk:24";]]> STANDARDS - The malloc, - calloc, - realloc, and - free functions conform to ISO/IEC - 9899:1990 (“ISO C90”). + The malloc(), + calloc(), + realloc(), and + free() functions conform to ISO/IEC + 9899:1990 (ISO C90). - The posix_memalign function conforms - to IEEE Std 1003.1-2001 (“POSIX.1”). + The posix_memalign() function conforms + to IEEE Std 1003.1-2001 (POSIX.1). diff --git a/deps/jemalloc/doc/stylesheet.xsl b/deps/jemalloc/doc/stylesheet.xsl index 4e334a86f..619365d82 100644 --- a/deps/jemalloc/doc/stylesheet.xsl +++ b/deps/jemalloc/doc/stylesheet.xsl @@ -1,7 +1,10 @@ ansi - + + + + - "" + diff --git a/deps/jemalloc/include/jemalloc/internal/arena.h b/deps/jemalloc/include/jemalloc/internal/arena.h index 12c617979..ce4e6029e 100644 --- a/deps/jemalloc/include/jemalloc/internal/arena.h +++ b/deps/jemalloc/include/jemalloc/internal/arena.h @@ -23,14 +23,29 @@ */ #define LG_DIRTY_MULT_DEFAULT 3 +typedef enum { + purge_mode_ratio = 0, + purge_mode_decay = 1, + + purge_mode_limit = 2 +} purge_mode_t; +#define PURGE_DEFAULT purge_mode_ratio +/* Default decay time in seconds. */ +#define DECAY_TIME_DEFAULT 10 +/* Number of event ticks between time checks. */ +#define DECAY_NTICKS_PER_UPDATE 1000 + typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; +typedef struct arena_avail_links_s arena_avail_links_t; typedef struct arena_run_s arena_run_t; typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_bin_info_s arena_bin_info_t; +typedef struct arena_decay_s arena_decay_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; +typedef struct arena_tdata_s arena_tdata_t; #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -140,13 +155,13 @@ struct arena_runs_dirty_link_s { */ struct arena_chunk_map_misc_s { /* - * Linkage for run trees. There are two disjoint uses: + * Linkage for run heaps. There are two disjoint uses: * - * 1) arena_t's runs_avail tree. + * 1) arena_t's runs_avail heaps. * 2) arena_run_t conceptually uses this linkage for in-use non-full * runs, rather than directly embedding linkage. */ - rb_node(arena_chunk_map_misc_t) rb_link; + phn(arena_chunk_map_misc_t) ph_link; union { /* Linkage for list of dirty runs. */ @@ -154,16 +169,15 @@ struct arena_chunk_map_misc_s { /* Profile counters, used for large object runs. */ union { - void *prof_tctx_pun; - prof_tctx_t *prof_tctx; + void *prof_tctx_pun; + prof_tctx_t *prof_tctx; }; /* Small region run metadata. */ arena_run_t run; }; }; -typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; -typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; +typedef ph(arena_chunk_map_misc_t) arena_run_heap_t; #endif /* JEMALLOC_ARENA_STRUCTS_A */ #ifdef JEMALLOC_ARENA_STRUCTS_B @@ -176,6 +190,14 @@ struct arena_chunk_s { */ extent_node_t node; + /* + * True if memory could be backed by transparent huge pages. This is + * only directly relevant to Linux, since it is the only supported + * platform on which jemalloc interacts with explicit transparent huge + * page controls. + */ + bool hugepage; + /* * Map of pages within chunk that keeps track of free/large/small. The * first map_bias entries are omitted, since the chunk header does not @@ -220,28 +242,71 @@ struct arena_chunk_s { */ struct arena_bin_info_s { /* Size of regions in a run for this bin's size class. */ - size_t reg_size; + size_t reg_size; /* Redzone size. */ - size_t redzone_size; + size_t redzone_size; /* Interval between regions (reg_size + (redzone_size << 1)). */ - size_t reg_interval; + size_t reg_interval; /* Total size of a run for this bin's size class. */ - size_t run_size; + size_t run_size; /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; + uint32_t nregs; /* * Metadata used to manipulate bitmaps for runs associated with this * bin. */ - bitmap_info_t bitmap_info; + bitmap_info_t bitmap_info; /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; + uint32_t reg0_offset; +}; + +struct arena_decay_s { + /* + * Approximate time in seconds from the creation of a set of unused + * dirty pages until an equivalent set of unused dirty pages is purged + * and/or reused. + */ + ssize_t time; + /* time / SMOOTHSTEP_NSTEPS. */ + nstime_t interval; + /* + * Time at which the current decay interval logically started. We do + * not actually advance to a new epoch until sometime after it starts + * because of scheduling and computation delays, and it is even possible + * to completely skip epochs. In all cases, during epoch advancement we + * merge all relevant activity into the most recently recorded epoch. + */ + nstime_t epoch; + /* Deadline randomness generator. */ + uint64_t jitter_state; + /* + * Deadline for current epoch. This is the sum of interval and per + * epoch jitter which is a uniform random variable in [0..interval). + * Epochs always advance by precise multiples of interval, but we + * randomize the deadline to reduce the likelihood of arenas purging in + * lockstep. + */ + nstime_t deadline; + /* + * Number of dirty pages at beginning of current epoch. During epoch + * advancement we use the delta between arena->decay.ndirty and + * arena->ndirty to determine how many dirty pages, if any, were + * generated. + */ + size_t ndirty; + /* + * Trailing log of how many unused dirty pages were generated during + * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last + * element is the most recent epoch. Corresponding epoch times are + * relative to epoch. + */ + size_t backlog[SMOOTHSTEP_NSTEPS]; }; struct arena_bin_s { @@ -251,25 +316,25 @@ struct arena_bin_s { * which may be acquired while holding one or more bin locks, but not * vise versa. */ - malloc_mutex_t lock; + malloc_mutex_t lock; /* * Current run being used to service allocations of this bin's size * class. */ - arena_run_t *runcur; + arena_run_t *runcur; /* - * Tree of non-full runs. This tree is used when looking for an + * Heap of non-full runs. This heap is used when looking for an * existing run when runcur is no longer usable. We choose the * non-full run that is lowest in memory; this policy tends to keep * objects packed well, and it can also help reduce the number of * almost-empty chunks. */ - arena_run_tree_t runs; + arena_run_heap_t runs; /* Bin statistics. */ - malloc_bin_stats_t stats; + malloc_bin_stats_t stats; }; struct arena_s { @@ -277,15 +342,23 @@ struct arena_s { unsigned ind; /* - * Number of threads currently assigned to this arena. This field is - * protected by arenas_lock. + * Number of threads currently assigned to this arena, synchronized via + * atomic operations. Each thread has two distinct assignments, one for + * application-serving allocation, and the other for internal metadata + * allocation. Internal metadata must not be allocated from arenas + * created via the arenas.extend mallctl, because the arena..reset + * mallctl indiscriminately discards all allocations for the affected + * arena. + * + * 0: Application allocation. + * 1: Internal metadata allocation. */ - unsigned nthreads; + unsigned nthreads[2]; /* * There are three classes of arena operations from a locking * perspective: - * 1) Thread assignment (modifies nthreads) is protected by arenas_lock. + * 1) Thread assignment (modifies nthreads) is synchronized via atomics. * 2) Bin-related operations are protected by bin locks. * 3) Chunk- and run-related operations are protected by this mutex. */ @@ -305,10 +378,16 @@ struct arena_s { * PRNG state for cache index randomization of large allocation base * pointers. */ - uint64_t offset_state; + size_t offset_state; dss_prec_t dss_prec; + /* Extant arena chunks. */ + ql_head(extent_node_t) achunks; + + /* Extent serial number generator state. */ + size_t extent_sn_next; + /* * In order to avoid rapid chunk allocation/deallocation when an arena * oscillates right on the cusp of needing a new chunk, cache the most @@ -324,7 +403,7 @@ struct arena_s { /* Minimum ratio (log base 2) of nactive:ndirty. */ ssize_t lg_dirty_mult; - /* True if a thread is currently executing arena_purge(). */ + /* True if a thread is currently executing arena_purge_to_limit(). */ bool purging; /* Number of pages in active runs and huge regions. */ @@ -338,12 +417,6 @@ struct arena_s { */ size_t ndirty; - /* - * Size/address-ordered tree of this arena's available runs. The tree - * is used for first-best-fit run allocation. - */ - arena_avail_tree_t runs_avail; - /* * Unused dirty memory this arena manages. Dirty memory is conceptually * tracked as an arbitrarily interleaved LRU of dirty runs and cached @@ -375,6 +448,9 @@ struct arena_s { arena_runs_dirty_link_t runs_dirty; extent_node_t chunks_cache; + /* Decay-based purging state. */ + arena_decay_t decay; + /* Extant huge allocations. */ ql_head(extent_node_t) huge; /* Synchronizes all huge allocation/update/deallocation. */ @@ -387,9 +463,9 @@ struct arena_s { * orderings are needed, which is why there are two trees with the same * contents. */ - extent_tree_t chunks_szad_cached; + extent_tree_t chunks_szsnad_cached; extent_tree_t chunks_ad_cached; - extent_tree_t chunks_szad_retained; + extent_tree_t chunks_szsnad_retained; extent_tree_t chunks_ad_retained; malloc_mutex_t chunks_mtx; @@ -402,6 +478,19 @@ struct arena_s { /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; + + /* + * Size-segregated address-ordered heaps of this arena's available runs, + * used for first-best-fit run allocation. Runs are quantized, i.e. + * they reside in the last heap which corresponds to a size class less + * than or equal to the run size. + */ + arena_run_heap_t runs_avail[NPSIZES]; +}; + +/* Used in conjunction with tsd for fast arena-related context lookup. */ +struct arena_tdata_s { + ticker_t decay_ticker; }; #endif /* JEMALLOC_ARENA_STRUCTS_B */ @@ -417,7 +506,10 @@ static const size_t large_pad = #endif ; +extern purge_mode_t opt_purge; +extern const char *purge_mode_names[]; extern ssize_t opt_lg_dirty_mult; +extern ssize_t opt_decay_time; extern arena_bin_info_t arena_bin_info[NBINS]; @@ -428,27 +520,37 @@ extern size_t large_maxclass; /* Max large size class. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ +#ifdef JEMALLOC_JET +typedef size_t (run_quantize_t)(size_t); +extern run_quantize_t *run_quantize_floor; +extern run_quantize_t *run_quantize_ceil; +#endif void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, bool cache); void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, bool cache); -extent_node_t *arena_node_alloc(arena_t *arena); -void arena_node_dalloc(arena_t *arena, extent_node_t *node); -void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, - bool *zero); -void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize); -void arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, - size_t oldsize, size_t usize); -void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, - size_t oldsize, size_t usize); -bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, - size_t oldsize, size_t usize, bool *zero); -ssize_t arena_lg_dirty_mult_get(arena_t *arena); -bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult); -void arena_maybe_purge(arena_t *arena); -void arena_purge_all(arena_t *arena); -void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - szind_t binind, uint64_t prof_accumbytes); +extent_node_t *arena_node_alloc(tsdn_t *tsdn, arena_t *arena); +void arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node); +void *arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, size_t *sn, bool *zero); +void arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, + size_t usize, size_t sn); +void arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, + void *chunk, size_t oldsize, size_t usize); +void arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, + void *chunk, size_t oldsize, size_t usize, size_t sn); +bool arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, + void *chunk, size_t oldsize, size_t usize, bool *zero); +ssize_t arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena); +bool arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, + ssize_t lg_dirty_mult); +ssize_t arena_decay_time_get(tsdn_t *tsdn, arena_t *arena); +bool arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time); +void arena_purge(tsdn_t *tsdn, arena_t *arena, bool all); +void arena_maybe_purge(tsdn_t *tsdn, arena_t *arena); +void arena_reset(tsd_t *tsd, arena_t *arena); +void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, + tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); #ifdef JEMALLOC_JET @@ -461,75 +563,100 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); #endif void arena_quarantine_junk_small(void *ptr, size_t usize); -void *arena_malloc_small(arena_t *arena, size_t size, bool zero); -void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, +void *arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind, + bool zero); +void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, + szind_t ind, bool zero); +void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); -void arena_prof_promoted(const void *ptr, size_t size); -void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, - void *ptr, arena_chunk_map_bits_t *bitselm); -void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind, arena_chunk_map_bits_t *bitselm); -void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind); +void arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size); +void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm); +void arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm); +void arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t pageind); #ifdef JEMALLOC_JET typedef void (arena_dalloc_junk_large_t)(void *, size_t); extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; #else void arena_dalloc_junk_large(void *ptr, size_t usize); #endif -void arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, +void arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk, void *ptr); +void arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr); -void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); #ifdef JEMALLOC_JET typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; #endif -bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero); +bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, + size_t size, size_t extra, bool zero); void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache); -dss_prec_t arena_dss_prec_get(arena_t *arena); -bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); +dss_prec_t arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena); +bool arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec); ssize_t arena_lg_dirty_mult_default_get(void); bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); -void arena_stats_merge(arena_t *arena, const char **dss, - ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); -arena_t *arena_new(unsigned ind); -bool arena_boot(void); -void arena_prefork(arena_t *arena); -void arena_postfork_parent(arena_t *arena); -void arena_postfork_child(arena_t *arena); +ssize_t arena_decay_time_default_get(void); +bool arena_decay_time_default_set(ssize_t decay_time); +void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, + unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult, + ssize_t *decay_time, size_t *nactive, size_t *ndirty); +void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, + size_t *nactive, size_t *ndirty, arena_stats_t *astats, + malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, + malloc_huge_stats_t *hstats); +unsigned arena_nthreads_get(arena_t *arena, bool internal); +void arena_nthreads_inc(arena_t *arena, bool internal); +void arena_nthreads_dec(arena_t *arena, bool internal); +size_t arena_extent_sn_next(arena_t *arena); +arena_t *arena_new(tsdn_t *tsdn, unsigned ind); +void arena_boot(void); +void arena_prefork0(tsdn_t *tsdn, arena_t *arena); +void arena_prefork1(tsdn_t *tsdn, arena_t *arena); +void arena_prefork2(tsdn_t *tsdn, arena_t *arena); +void arena_prefork3(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, +arena_chunk_map_bits_t *arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind); -arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, +const arena_chunk_map_bits_t *arena_bitselm_get_const( + const arena_chunk_t *chunk, size_t pageind); +arena_chunk_map_misc_t *arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind); -size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm); -void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm); +const arena_chunk_map_misc_t *arena_miscelm_get_const( + const arena_chunk_t *chunk, size_t pageind); +size_t arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm); +void *arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm); arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); -size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbitsp_read(size_t *mapbitsp); -size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_size_decode(size_t mapbits); -size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, +size_t *arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind); +const size_t *arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); -szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbitsp_read(const size_t *mapbitsp); +size_t arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_size_decode(size_t mapbits); +size_t arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_large_size_get(const arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_small_runind_get(const arena_chunk_t *chunk, + size_t pageind); +szind_t arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_decommitted_get(const arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind); void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits); size_t arena_mapbits_size_encode(size_t size); void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, @@ -549,27 +676,31 @@ void arena_metadata_allocated_sub(arena_t *arena, size_t size); size_t arena_metadata_allocated_get(arena_t *arena); bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes); szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); -unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, +size_t arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); -prof_tctx_t *arena_prof_tctx_get(const void *ptr); -void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); -void arena_prof_tctx_reset(const void *ptr, size_t usize, +prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr); +void arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx); +void arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr, prof_tctx_t *old_tctx); -void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache); +void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks); +void arena_decay_tick(tsdn_t *tsdn, arena_t *arena); +void *arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache, bool slow_path); arena_t *arena_aalloc(const void *ptr); -size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); -void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +size_t arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote); +void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); +void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + bool slow_path); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * -arena_bitselm_get(arena_chunk_t *chunk, size_t pageind) +arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind) { assert(pageind >= map_bias); @@ -578,8 +709,15 @@ arena_bitselm_get(arena_chunk_t *chunk, size_t pageind) return (&chunk->map_bits[pageind-map_bias]); } +JEMALLOC_ALWAYS_INLINE const arena_chunk_map_bits_t * +arena_bitselm_get_const(const arena_chunk_t *chunk, size_t pageind) +{ + + return (arena_bitselm_get_mutable((arena_chunk_t *)chunk, pageind)); +} + JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * -arena_miscelm_get(arena_chunk_t *chunk, size_t pageind) +arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind) { assert(pageind >= map_bias); @@ -589,8 +727,15 @@ arena_miscelm_get(arena_chunk_t *chunk, size_t pageind) (uintptr_t)map_misc_offset) + pageind-map_bias); } +JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t * +arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind) +{ + + return (arena_miscelm_get_mutable((arena_chunk_t *)chunk, pageind)); +} + JEMALLOC_ALWAYS_INLINE size_t -arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) +arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk + @@ -603,7 +748,7 @@ arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) } JEMALLOC_ALWAYS_INLINE void * -arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm) +arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); size_t pageind = arena_miscelm_to_pageind(miscelm); @@ -636,24 +781,31 @@ arena_run_to_miscelm(arena_run_t *run) } JEMALLOC_ALWAYS_INLINE size_t * -arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind) { - return (&arena_bitselm_get(chunk, pageind)->bits); + return (&arena_bitselm_get_mutable(chunk, pageind)->bits); +} + +JEMALLOC_ALWAYS_INLINE const size_t * +arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind) +{ + + return (arena_mapbitsp_get_mutable((arena_chunk_t *)chunk, pageind)); } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbitsp_read(size_t *mapbitsp) +arena_mapbitsp_read(const size_t *mapbitsp) { return (*mapbitsp); } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind) { - return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind))); + return (arena_mapbitsp_read(arena_mapbitsp_get_const(chunk, pageind))); } JEMALLOC_ALWAYS_INLINE size_t @@ -673,7 +825,7 @@ arena_mapbits_size_decode(size_t mapbits) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -683,7 +835,7 @@ arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -694,7 +846,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -705,7 +857,7 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE szind_t -arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind) { size_t mapbits; szind_t binind; @@ -717,7 +869,7 @@ arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -728,7 +880,7 @@ arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -739,7 +891,7 @@ arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -750,7 +902,7 @@ arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -759,7 +911,7 @@ arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) +arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -795,7 +947,7 @@ JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); assert((size & PAGE_MASK) == 0); assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); @@ -809,7 +961,7 @@ JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, size_t size) { - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); assert((size & PAGE_MASK) == 0); @@ -821,7 +973,7 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, JEMALLOC_ALWAYS_INLINE void arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags) { - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); assert((flags & CHUNK_MAP_UNZEROED) == flags); arena_mapbitsp_write(mapbitsp, flags); @@ -831,7 +983,7 @@ JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); assert((size & PAGE_MASK) == 0); assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); @@ -846,7 +998,7 @@ JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, szind_t binind) { - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); assert(binind <= BININD_INVALID); @@ -860,7 +1012,7 @@ JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, szind_t binind, size_t flags) { - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); assert(binind < BININD_INVALID); assert(pageind - runind >= map_bias); @@ -917,7 +1069,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) } JEMALLOC_INLINE bool -arena_prof_accum(arena_t *arena, uint64_t accumbytes) +arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { cassert(config_prof); @@ -928,9 +1080,9 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) { bool ret; - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); ret = arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); return (ret); } } @@ -948,12 +1100,12 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) size_t pageind; size_t actual_mapbits; size_t rpages_ind; - arena_run_t *run; + const arena_run_t *run; arena_bin_t *bin; szind_t run_binind, actual_binind; arena_bin_info_t *bin_info; - arena_chunk_map_misc_t *miscelm; - void *rpages; + const arena_chunk_map_misc_t *miscelm; + const void *rpages; assert(binind != BININD_INVALID); assert(binind < NBINS); @@ -966,11 +1118,11 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) assert(arena_mapbits_allocated_get(chunk, pageind) != 0); rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); - miscelm = arena_miscelm_get(chunk, rpages_ind); + miscelm = arena_miscelm_get_const(chunk, rpages_ind); run = &miscelm->run; run_binind = run->binind; bin = &arena->bins[run_binind]; - actual_binind = bin - arena->bins; + actual_binind = (szind_t)(bin - arena->bins); assert(run_binind == actual_binind); bin_info = &arena_bin_info[actual_binind]; rpages = arena_miscelm_to_rpages(miscelm); @@ -987,16 +1139,15 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) JEMALLOC_INLINE szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { - szind_t binind = bin - arena->bins; + szind_t binind = (szind_t)(bin - arena->bins); assert(binind < NBINS); return (binind); } -JEMALLOC_INLINE unsigned +JEMALLOC_INLINE size_t arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { - unsigned shift, diff, regind; - size_t interval; + size_t diff, interval, shift, regind; arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); void *rpages = arena_miscelm_to_rpages(miscelm); @@ -1011,12 +1162,12 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * Avoid doing division with a variable divisor if possible. Using * actual division here can reduce allocator throughput by over 20%! */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages - + diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages - bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ interval = bin_info->reg_interval; - shift = jemalloc_ffs(interval) - 1; + shift = ffs_zu(interval) - 1; diff >>= shift; interval >>= shift; @@ -1038,9 +1189,9 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * divide by 0, and 1 and 2 are both powers of two, which are * handled above. */ -#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned interval_invs[] = { +#define SIZE_INV_SHIFT ((sizeof(size_t) << 3) - LG_RUN_MAXREGS) +#define SIZE_INV(s) (((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1) + static const size_t interval_invs[] = { SIZE_INV(3), SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), @@ -1051,8 +1202,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) }; - if (likely(interval <= ((sizeof(interval_invs) / - sizeof(unsigned)) + 2))) { + if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t)) + + 2))) { regind = (diff * interval_invs[interval - 3]) >> SIZE_INV_SHIFT; } else @@ -1067,7 +1218,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) } JEMALLOC_INLINE prof_tctx_t * -arena_prof_tctx_get(const void *ptr) +arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr) { prof_tctx_t *ret; arena_chunk_t *chunk; @@ -1083,18 +1234,19 @@ arena_prof_tctx_get(const void *ptr) if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) ret = (prof_tctx_t *)(uintptr_t)1U; else { - arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk, - pageind); + arena_chunk_map_misc_t *elm = + arena_miscelm_get_mutable(chunk, pageind); ret = atomic_read_p(&elm->prof_tctx_pun); } } else - ret = huge_prof_tctx_get(ptr); + ret = huge_prof_tctx_get(tsdn, ptr); return (ret); } JEMALLOC_INLINE void -arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) +arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx) { arena_chunk_t *chunk; @@ -1113,7 +1265,7 @@ arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) assert(arena_mapbits_large_get(chunk, pageind) != 0); - elm = arena_miscelm_get(chunk, pageind); + elm = arena_miscelm_get_mutable(chunk, pageind); atomic_write_p(&elm->prof_tctx_pun, tctx); } else { /* @@ -1125,12 +1277,12 @@ arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) assert(arena_mapbits_large_get(chunk, pageind) == 0); } } else - huge_prof_tctx_set(ptr, tctx); + huge_prof_tctx_set(tsdn, ptr, tctx); } JEMALLOC_INLINE void -arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, - prof_tctx_t *old_tctx) +arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, + const void *old_ptr, prof_tctx_t *old_tctx) { cassert(config_prof); @@ -1149,43 +1301,59 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, 0); assert(arena_mapbits_large_get(chunk, pageind) != 0); - elm = arena_miscelm_get(chunk, pageind); + elm = arena_miscelm_get_mutable(chunk, pageind); atomic_write_p(&elm->prof_tctx_pun, (prof_tctx_t *)(uintptr_t)1U); } else - huge_prof_tctx_reset(ptr); + huge_prof_tctx_reset(tsdn, ptr); } } -JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache) +JEMALLOC_ALWAYS_INLINE void +arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) +{ + tsd_t *tsd; + ticker_t *decay_ticker; + + if (unlikely(tsdn_null(tsdn))) + return; + tsd = tsdn_tsd(tsdn); + decay_ticker = decay_ticker_get(tsd, arena->ind); + if (unlikely(decay_ticker == NULL)) + return; + if (unlikely(ticker_ticks(decay_ticker, nticks))) + arena_purge(tsdn, arena, false); +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_tick(tsdn_t *tsdn, arena_t *arena) { + arena_decay_ticks(tsdn, arena, 1); +} + +JEMALLOC_ALWAYS_INLINE void * +arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool slow_path) +{ + + assert(!tsdn_null(tsdn) || tcache == NULL); assert(size != 0); - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); + if (likely(tcache != NULL)) { + if (likely(size <= SMALL_MAXCLASS)) { + return (tcache_alloc_small(tsdn_tsd(tsdn), arena, + tcache, size, ind, zero, slow_path)); + } + if (likely(size <= tcache_maxclass)) { + return (tcache_alloc_large(tsdn_tsd(tsdn), arena, + tcache, size, ind, zero, slow_path)); + } + /* (size > tcache_maxclass) case falls through. */ + assert(size > tcache_maxclass); + } - if (likely(size <= SMALL_MAXCLASS)) { - if (likely(tcache != NULL)) { - return (tcache_alloc_small(tsd, arena, tcache, size, - zero)); - } else - return (arena_malloc_small(arena, size, zero)); - } else if (likely(size <= large_maxclass)) { - /* - * Initialize tcache after checking size in order to avoid - * infinite recursion during tcache initialization. - */ - if (likely(tcache != NULL) && size <= tcache_maxclass) { - return (tcache_alloc_large(tsd, arena, tcache, size, - zero)); - } else - return (arena_malloc_large(arena, size, zero)); - } else - return (huge_malloc(tsd, arena, size, zero, tcache)); + return (arena_malloc_hard(tsdn, arena, size, ind, zero)); } JEMALLOC_ALWAYS_INLINE arena_t * @@ -1202,7 +1370,7 @@ arena_aalloc(const void *ptr) /* Return the size of the allocation pointed to by ptr. */ JEMALLOC_ALWAYS_INLINE size_t -arena_salloc(const void *ptr, bool demote) +arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; @@ -1245,17 +1413,18 @@ arena_salloc(const void *ptr, bool demote) ret = index2size(binind); } } else - ret = huge_salloc(ptr); + ret = huge_salloc(tsdn, ptr); return (ret); } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) { arena_chunk_t *chunk; size_t pageind, mapbits; + assert(!tsdn_null(tsdn) || tcache == NULL); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); @@ -1268,10 +1437,12 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) if (likely(tcache != NULL)) { szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); - tcache_dalloc_small(tsd, tcache, ptr, binind); + tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, + binind, slow_path); } else { - arena_dalloc_small(extent_node_arena_get( - &chunk->node), chunk, ptr, pageind); + arena_dalloc_small(tsdn, + extent_node_arena_get(&chunk->node), chunk, + ptr, pageind); } } else { size_t size = arena_mapbits_large_size_get(chunk, @@ -1282,28 +1453,33 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) if (likely(tcache != NULL) && size - large_pad <= tcache_maxclass) { - tcache_dalloc_large(tsd, tcache, ptr, size - - large_pad); + tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, + size - large_pad, slow_path); } else { - arena_dalloc_large(extent_node_arena_get( - &chunk->node), chunk, ptr); + arena_dalloc_large(tsdn, + extent_node_arena_get(&chunk->node), chunk, + ptr); } } } else - huge_dalloc(tsd, ptr, tcache); + huge_dalloc(tsdn, ptr); } JEMALLOC_ALWAYS_INLINE void -arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) +arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + bool slow_path) { arena_chunk_t *chunk; + assert(!tsdn_null(tsdn) || tcache == NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) { if (config_prof && opt_prof) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != + 0); if (arena_mapbits_large_get(chunk, pageind) != 0) { /* * Make sure to use promoted size, not request @@ -1313,32 +1489,36 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) pageind) - large_pad; } } - assert(s2u(size) == s2u(arena_salloc(ptr, false))); + assert(s2u(size) == s2u(arena_salloc(tsdn, ptr, false))); if (likely(size <= SMALL_MAXCLASS)) { /* Small allocation. */ if (likely(tcache != NULL)) { szind_t binind = size2index(size); - tcache_dalloc_small(tsd, tcache, ptr, binind); + tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, + binind, slow_path); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_dalloc_small(extent_node_arena_get( - &chunk->node), chunk, ptr, pageind); + arena_dalloc_small(tsdn, + extent_node_arena_get(&chunk->node), chunk, + ptr, pageind); } } else { assert(config_cache_oblivious || ((uintptr_t)ptr & PAGE_MASK) == 0); - if (likely(tcache != NULL) && size <= tcache_maxclass) - tcache_dalloc_large(tsd, tcache, ptr, size); - else { - arena_dalloc_large(extent_node_arena_get( - &chunk->node), chunk, ptr); + if (likely(tcache != NULL) && size <= tcache_maxclass) { + tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, + size, slow_path); + } else { + arena_dalloc_large(tsdn, + extent_node_arena_get(&chunk->node), chunk, + ptr); } } } else - huge_dalloc(tsd, ptr, tcache); + huge_dalloc(tsdn, ptr); } # endif /* JEMALLOC_ARENA_INLINE_B */ #endif diff --git a/deps/jemalloc/include/jemalloc/internal/assert.h b/deps/jemalloc/include/jemalloc/internal/assert.h new file mode 100644 index 000000000..6f8f7eb93 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/assert.h @@ -0,0 +1,45 @@ +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (unlikely(config_debug && !(e))) { \ + malloc_printf( \ + ": %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + ": %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ + unreachable(); \ +} while (0) +#endif + +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf(": %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef assert_not_implemented +#define assert_not_implemented(e) do { \ + if (unlikely(config_debug && !(e))) \ + not_implemented(); \ +} while (0) +#endif + + diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h index a9aad35d1..3f15ea149 100644 --- a/deps/jemalloc/include/jemalloc/internal/atomic.h +++ b/deps/jemalloc/include/jemalloc/internal/atomic.h @@ -28,8 +28,8 @@ * callers. * * atomic_read_( *p) { return (*p); } - * atomic_add_( *p, x) { return (*p + x); } - * atomic_sub_( *p, x) { return (*p - x); } + * atomic_add_( *p, x) { return (*p += x); } + * atomic_sub_( *p, x) { return (*p -= x); } * bool atomic_cas_( *p, c, s) * { * if (*p != c) diff --git a/deps/jemalloc/include/jemalloc/internal/base.h b/deps/jemalloc/include/jemalloc/internal/base.h index 39e46ee44..d6b81e162 100644 --- a/deps/jemalloc/include/jemalloc/internal/base.h +++ b/deps/jemalloc/include/jemalloc/internal/base.h @@ -9,12 +9,13 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *base_alloc(size_t size); -void base_stats_get(size_t *allocated, size_t *resident, size_t *mapped); +void *base_alloc(tsdn_t *tsdn, size_t size); +void base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident, + size_t *mapped); bool base_boot(void); -void base_prefork(void); -void base_postfork_parent(void); -void base_postfork_child(void); +void base_prefork(tsdn_t *tsdn); +void base_postfork_parent(tsdn_t *tsdn); +void base_postfork_child(tsdn_t *tsdn); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/bitmap.h b/deps/jemalloc/include/jemalloc/internal/bitmap.h index fcc6005c7..36f38b59c 100644 --- a/deps/jemalloc/include/jemalloc/internal/bitmap.h +++ b/deps/jemalloc/include/jemalloc/internal/bitmap.h @@ -15,6 +15,15 @@ typedef unsigned long bitmap_t; #define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) #define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) +/* + * Do some analysis on how big the bitmap is before we use a tree. For a brute + * force linear search, if we would have to call ffs_lu() more than 2^3 times, + * use a tree instead. + */ +#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3 +# define USE_TREE +#endif + /* Number of groups required to store a given number of bits. */ #define BITMAP_BITS2GROUPS(nbits) \ ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) @@ -48,6 +57,8 @@ typedef unsigned long bitmap_t; /* * Maximum number of groups required to support LG_BITMAP_MAXBITS. */ +#ifdef USE_TREE + #if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS # define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 @@ -65,6 +76,12 @@ typedef unsigned long bitmap_t; (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) +#else /* USE_TREE */ + +#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) + +#endif /* USE_TREE */ + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -78,6 +95,7 @@ struct bitmap_info_s { /* Logical number of bits in bitmap (stored at bottom level). */ size_t nbits; +#ifdef USE_TREE /* Number of levels necessary for nbits. */ unsigned nlevels; @@ -86,6 +104,10 @@ struct bitmap_info_s { * bottom to top (e.g. the bottom level is stored in levels[0]). */ bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +#else /* USE_TREE */ + /* Number of groups necessary for nbits. */ + size_t ngroups; +#endif /* USE_TREE */ }; #endif /* JEMALLOC_H_STRUCTS */ @@ -93,9 +115,8 @@ struct bitmap_info_s { #ifdef JEMALLOC_H_EXTERNS void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); -size_t bitmap_info_ngroups(const bitmap_info_t *binfo); -size_t bitmap_size(size_t nbits); void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); +size_t bitmap_size(const bitmap_info_t *binfo); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -113,10 +134,20 @@ void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); JEMALLOC_INLINE bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) { - unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; +#ifdef USE_TREE + size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; bitmap_t rg = bitmap[rgoff]; /* The bitmap is full iff the root group is 0. */ return (rg == 0); +#else + size_t i; + + for (i = 0; i < binfo->ngroups; i++) { + if (bitmap[i] != 0) + return (false); + } + return (true); +#endif } JEMALLOC_INLINE bool @@ -128,7 +159,7 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) assert(bit < binfo->nbits); goff = bit >> LG_BITMAP_GROUP_NBITS; g = bitmap[goff]; - return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); + return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))); } JEMALLOC_INLINE void @@ -143,10 +174,11 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[goff]; g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(bitmap_get(bitmap, binfo, bit)); +#ifdef USE_TREE /* Propagate group state transitions up the tree. */ if (g == 0) { unsigned i; @@ -155,13 +187,14 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[binfo->levels[i].group_offset + goff]; g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; if (g != 0) break; } } +#endif } /* sfu: set first unset. */ @@ -174,15 +207,24 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) assert(!bitmap_full(bitmap, binfo)); +#ifdef USE_TREE i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; - bit = jemalloc_ffsl(g) - 1; + bit = ffs_lu(g) - 1; while (i > 0) { i--; g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1); + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1); } - +#else + i = 0; + g = bitmap[0]; + while ((bit = ffs_lu(g)) == 0) { + i++; + g = bitmap[i]; + } + bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1); +#endif bitmap_set(bitmap, binfo, bit); return (bit); } @@ -193,7 +235,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) size_t goff; bitmap_t *gp; bitmap_t g; - bool propagate; + UNUSED bool propagate; assert(bit < binfo->nbits); assert(bitmap_get(bitmap, binfo, bit)); @@ -201,10 +243,11 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) gp = &bitmap[goff]; g = *gp; propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(!bitmap_get(bitmap, binfo, bit)); +#ifdef USE_TREE /* Propagate group state transitions up the tree. */ if (propagate) { unsigned i; @@ -214,14 +257,15 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) gp = &bitmap[binfo->levels[i].group_offset + goff]; g = *gp; propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; if (!propagate) break; } } +#endif /* USE_TREE */ } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/chunk.h b/deps/jemalloc/include/jemalloc/internal/chunk.h index 5d1938353..50b9904b0 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk.h @@ -48,32 +48,30 @@ extern size_t chunk_npages; extern const chunk_hooks_t chunk_hooks_default; -chunk_hooks_t chunk_hooks_get(arena_t *arena); -chunk_hooks_t chunk_hooks_set(arena_t *arena, +chunk_hooks_t chunk_hooks_get(tsdn_t *tsdn, arena_t *arena); +chunk_hooks_t chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks); -bool chunk_register(const void *chunk, const extent_node_t *node); +bool chunk_register(tsdn_t *tsdn, const void *chunk, + const extent_node_t *node); void chunk_deregister(const void *chunk, const extent_node_t *node); void *chunk_alloc_base(size_t size); -void *chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *new_addr, size_t size, size_t alignment, bool *zero, - bool dalloc_node); -void *chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit); -void chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, bool committed); -void chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, bool zeroed, bool committed); -void chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, bool committed); -bool chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, +void *chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, + size_t *sn, bool *zero, bool *commit, bool dalloc_node); +void *chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, + size_t *sn, bool *zero, bool *commit); +void chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t sn, + bool committed); +void chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t sn, + bool zeroed, bool committed); +bool chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset, size_t length); -bool chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, size_t offset, size_t length); bool chunk_boot(void); -void chunk_prefork(void); -void chunk_postfork_parent(void); -void chunk_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h index 388f46be0..da8511ba0 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h @@ -23,13 +23,11 @@ extern const char *dss_prec_names[]; dss_prec_t chunk_dss_prec_get(void); bool chunk_dss_prec_set(dss_prec_t dss_prec); -void *chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, - size_t alignment, bool *zero, bool *commit); +void *chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit); bool chunk_in_dss(void *chunk); -bool chunk_dss_boot(void); -void chunk_dss_prefork(void); -void chunk_dss_postfork_parent(void); -void chunk_dss_postfork_child(void); +bool chunk_dss_mergeable(void *chunk_a, void *chunk_b); +void chunk_dss_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h index 7d8014c58..6f2d0ac2e 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h @@ -9,8 +9,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, - bool *commit); +void *chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit); bool chunk_dalloc_mmap(void *chunk, size_t size); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/deps/jemalloc/include/jemalloc/internal/ckh.h b/deps/jemalloc/include/jemalloc/internal/ckh.h index 75c1c979f..f75ad90b7 100644 --- a/deps/jemalloc/include/jemalloc/internal/ckh.h +++ b/deps/jemalloc/include/jemalloc/internal/ckh.h @@ -40,9 +40,7 @@ struct ckh_s { #endif /* Used for pseudo-random number generation. */ -#define CKH_A 1103515241 -#define CKH_C 12347 - uint32_t prng_state; + uint64_t prng_state; /* Total number of items. */ size_t count; @@ -74,7 +72,7 @@ bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); -bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); +bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); void ckh_string_hash(const void *key, size_t r_hash[2]); bool ckh_string_keycomp(const void *k1, const void *k2); void ckh_pointer_hash(const void *key, size_t r_hash[2]); diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h index 751c14b5b..af0f6d7c5 100644 --- a/deps/jemalloc/include/jemalloc/internal/ctl.h +++ b/deps/jemalloc/include/jemalloc/internal/ctl.h @@ -21,13 +21,14 @@ struct ctl_named_node_s { /* If (nchildren == 0), this is a terminal node. */ unsigned nchildren; const ctl_node_t *children; - int (*ctl)(const size_t *, size_t, void *, size_t *, - void *, size_t); + int (*ctl)(tsd_t *, const size_t *, size_t, void *, + size_t *, void *, size_t); }; struct ctl_indexed_node_s { struct ctl_node_s node; - const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); + const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t, + size_t); }; struct ctl_arena_stats_s { @@ -35,8 +36,12 @@ struct ctl_arena_stats_s { unsigned nthreads; const char *dss; ssize_t lg_dirty_mult; + ssize_t decay_time; size_t pactive; size_t pdirty; + + /* The remainder are only populated if config_stats is true. */ + arena_stats_t astats; /* Aggregate stats for small size classes, based on bin stats. */ @@ -56,6 +61,7 @@ struct ctl_stats_s { size_t metadata; size_t resident; size_t mapped; + size_t retained; unsigned narenas; ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ }; @@ -64,16 +70,17 @@ struct ctl_stats_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen); -int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp); - -int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, +int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, + size_t *miblenp); + +int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); bool ctl_boot(void); -void ctl_prefork(void); -void ctl_postfork_parent(void); -void ctl_postfork_child(void); +void ctl_prefork(tsdn_t *tsdn); +void ctl_postfork_parent(tsdn_t *tsdn); +void ctl_postfork_child(tsdn_t *tsdn); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ diff --git a/deps/jemalloc/include/jemalloc/internal/extent.h b/deps/jemalloc/include/jemalloc/internal/extent.h index 386d50ef4..168ffe643 100644 --- a/deps/jemalloc/include/jemalloc/internal/extent.h +++ b/deps/jemalloc/include/jemalloc/internal/extent.h @@ -18,6 +18,20 @@ struct extent_node_s { /* Total region size. */ size_t en_size; + /* + * Serial number (potentially non-unique). + * + * In principle serial numbers can wrap around on 32-bit systems if + * JEMALLOC_MUNMAP is defined, but as long as comparison functions fall + * back on address comparison for equal serial numbers, stable (if + * imperfect) ordering is maintained. + * + * Serial numbers may not be unique even in the absence of wrap-around, + * e.g. when splitting an extent and assigning the same serial number to + * both resulting adjacent extents. + */ + size_t en_sn; + /* * The zeroed flag is used by chunk recycling code to track whether * memory is zero-filled. @@ -45,10 +59,10 @@ struct extent_node_s { qr(extent_node_t) cc_link; union { - /* Linkage for the size/address-ordered tree. */ - rb_node(extent_node_t) szad_link; + /* Linkage for the size/sn/address-ordered tree. */ + rb_node(extent_node_t) szsnad_link; - /* Linkage for arena's huge and node_cache lists. */ + /* Linkage for arena's achunks, huge, and node_cache lists. */ ql_elm(extent_node_t) ql_link; }; @@ -61,7 +75,7 @@ typedef rb_tree(extent_node_t) extent_tree_t; /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) +rb_proto(, extent_tree_szsnad_, extent_tree_t, extent_node_t) rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) @@ -73,6 +87,7 @@ rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) arena_t *extent_node_arena_get(const extent_node_t *node); void *extent_node_addr_get(const extent_node_t *node); size_t extent_node_size_get(const extent_node_t *node); +size_t extent_node_sn_get(const extent_node_t *node); bool extent_node_zeroed_get(const extent_node_t *node); bool extent_node_committed_get(const extent_node_t *node); bool extent_node_achunk_get(const extent_node_t *node); @@ -80,12 +95,13 @@ prof_tctx_t *extent_node_prof_tctx_get(const extent_node_t *node); void extent_node_arena_set(extent_node_t *node, arena_t *arena); void extent_node_addr_set(extent_node_t *node, void *addr); void extent_node_size_set(extent_node_t *node, size_t size); +void extent_node_sn_set(extent_node_t *node, size_t sn); void extent_node_zeroed_set(extent_node_t *node, bool zeroed); void extent_node_committed_set(extent_node_t *node, bool committed); void extent_node_achunk_set(extent_node_t *node, bool achunk); void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, - size_t size, bool zeroed, bool committed); + size_t size, size_t sn, bool zeroed, bool committed); void extent_node_dirty_linkage_init(extent_node_t *node); void extent_node_dirty_insert(extent_node_t *node, arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty); @@ -114,6 +130,13 @@ extent_node_size_get(const extent_node_t *node) return (node->en_size); } +JEMALLOC_INLINE size_t +extent_node_sn_get(const extent_node_t *node) +{ + + return (node->en_sn); +} + JEMALLOC_INLINE bool extent_node_zeroed_get(const extent_node_t *node) { @@ -164,6 +187,13 @@ extent_node_size_set(extent_node_t *node, size_t size) node->en_size = size; } +JEMALLOC_INLINE void +extent_node_sn_set(extent_node_t *node, size_t sn) +{ + + node->en_sn = sn; +} + JEMALLOC_INLINE void extent_node_zeroed_set(extent_node_t *node, bool zeroed) { @@ -194,12 +224,13 @@ extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx) JEMALLOC_INLINE void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, - bool zeroed, bool committed) + size_t sn, bool zeroed, bool committed) { extent_node_arena_set(node, arena); extent_node_addr_set(node, addr); extent_node_size_set(node, size); + extent_node_sn_set(node, sn); extent_node_zeroed_set(node, zeroed); extent_node_committed_set(node, committed); extent_node_achunk_set(node, false); diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h index bcead337a..1ff2d9a05 100644 --- a/deps/jemalloc/include/jemalloc/internal/hash.h +++ b/deps/jemalloc/include/jemalloc/internal/hash.h @@ -1,6 +1,6 @@ /* * The following hash function is based on MurmurHash3, placed into the public - * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for + * domain by Austin Appleby. See https://github.com/aappleby/smhasher for * details. */ /******************************************************************************/ @@ -49,6 +49,14 @@ JEMALLOC_INLINE uint32_t hash_get_block_32(const uint32_t *p, int i) { + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) { + uint32_t ret; + + memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t)); + return (ret); + } + return (p[i]); } @@ -56,6 +64,14 @@ JEMALLOC_INLINE uint64_t hash_get_block_64(const uint64_t *p, int i) { + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) { + uint64_t ret; + + memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t)); + return (ret); + } + return (p[i]); } @@ -321,13 +337,18 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, JEMALLOC_INLINE void hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) { + + assert(len <= INT_MAX); /* Unfortunate implementation limitation. */ + #if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) - hash_x64_128(key, len, seed, (uint64_t *)r_hash); + hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash); #else - uint64_t hashes[2]; - hash_x86_128(key, len, seed, hashes); - r_hash[0] = (size_t)hashes[0]; - r_hash[1] = (size_t)hashes[1]; + { + uint64_t hashes[2]; + hash_x86_128(key, (int)len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; + } #endif } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/huge.h b/deps/jemalloc/include/jemalloc/internal/huge.h index ece7af980..22184d9bb 100644 --- a/deps/jemalloc/include/jemalloc/internal/huge.h +++ b/deps/jemalloc/include/jemalloc/internal/huge.h @@ -9,24 +9,23 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache); -void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, - bool zero, tcache_t *tcache); -bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, - size_t usize_max, bool zero); +void *huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); +void *huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero); +bool huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, + size_t usize_min, size_t usize_max, bool zero); void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, size_t alignment, bool zero, tcache_t *tcache); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void huge_dalloc(tsdn_t *tsdn, void *ptr); arena_t *huge_aalloc(const void *ptr); -size_t huge_salloc(const void *ptr); -prof_tctx_t *huge_prof_tctx_get(const void *ptr); -void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); -void huge_prof_tctx_reset(const void *ptr); +size_t huge_salloc(tsdn_t *tsdn, const void *ptr); +prof_tctx_t *huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr); +void huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx); +void huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index 8536a3eda..e7ace7d8c 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -49,6 +49,7 @@ static const bool config_lazy_lock = false #endif ; +static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; static const bool config_prof = #ifdef JEMALLOC_PROF true @@ -160,7 +161,10 @@ static const bool config_cache_oblivious = #include #endif +#include "jemalloc/internal/ph.h" +#ifndef __PGI #define RB_COMPACT +#endif #include "jemalloc/internal/rb.h" #include "jemalloc/internal/qr.h" #include "jemalloc/internal/ql.h" @@ -183,6 +187,9 @@ static const bool config_cache_oblivious = #include "jemalloc/internal/jemalloc_internal_macros.h" +/* Page size index type. */ +typedef unsigned pszind_t; + /* Size class index type. */ typedef unsigned szind_t; @@ -232,7 +239,7 @@ typedef unsigned szind_t; # ifdef __alpha__ # define LG_QUANTUM 4 # endif -# if (defined(__sparc64__) || defined(__sparcv9)) +# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__)) # define LG_QUANTUM 4 # endif # if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) @@ -256,6 +263,9 @@ typedef unsigned szind_t; # ifdef __powerpc__ # define LG_QUANTUM 4 # endif +# ifdef __riscv__ +# define LG_QUANTUM 4 +# endif # ifdef __s390__ # define LG_QUANTUM 4 # endif @@ -317,13 +327,17 @@ typedef unsigned szind_t; #define PAGE ((size_t)(1U << LG_PAGE)) #define PAGE_MASK ((size_t)(PAGE - 1)) +/* Return the page base address for the page containing address a. */ +#define PAGE_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~PAGE_MASK)) + /* Return the smallest pagesize multiple that is >= s. */ #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) /* Return the nearest aligned address at or below a. */ #define ALIGNMENT_ADDR2BASE(a, alignment) \ - ((void *)((uintptr_t)(a) & (-(alignment)))) + ((void *)((uintptr_t)(a) & ((~(alignment)) + 1))) /* Return the offset between a and the nearest aligned address at or below a. */ #define ALIGNMENT_ADDR2OFFSET(a, alignment) \ @@ -331,7 +345,7 @@ typedef unsigned szind_t; /* Return the smallest alignment multiple that is >= s. */ #define ALIGNMENT_CEILING(s, alignment) \ - (((s) + (alignment - 1)) & (-(alignment))) + (((s) + (alignment - 1)) & ((~(alignment)) + 1)) /* Declare a variable-length array. */ #if __STDC_VERSION__ < 199901L @@ -351,14 +365,19 @@ typedef unsigned szind_t; # define VARIABLE_ARRAY(type, name, count) type name[(count)] #endif +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/witness.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" @@ -379,14 +398,19 @@ typedef unsigned szind_t; /******************************************************************************/ #define JEMALLOC_H_STRUCTS +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/witness.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" @@ -422,13 +446,27 @@ extern bool opt_redzone; extern bool opt_utrace; extern bool opt_xmalloc; extern bool opt_zero; -extern size_t opt_narenas; +extern unsigned opt_narenas; extern bool in_valgrind; /* Number of CPUs. */ -extern unsigned ncpus; +extern unsigned ncpus; +/* Number of arenas used for automatic multiplexing of threads and arenas. */ +extern unsigned narenas_auto; + +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +extern arena_t **arenas; + +/* + * pind2sz_tab encodes the same information as could be computed by + * pind2sz_compute(). + */ +extern size_t const pind2sz_tab[NPSIZES]; /* * index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by index2size_compute(). @@ -447,31 +485,35 @@ void a0dalloc(void *ptr); void *bootstrap_malloc(size_t size); void *bootstrap_calloc(size_t num, size_t size); void bootstrap_free(void *ptr); -arena_t *arenas_extend(unsigned ind); -arena_t *arena_init(unsigned ind); unsigned narenas_total_get(void); -arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing); -arena_t *arena_choose_hard(tsd_t *tsd); +arena_t *arena_init(tsdn_t *tsdn, unsigned ind); +arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind); +arena_t *arena_choose_hard(tsd_t *tsd, bool internal); void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); -unsigned arena_nbound(unsigned ind); void thread_allocated_cleanup(tsd_t *tsd); void thread_deallocated_cleanup(tsd_t *tsd); +void iarena_cleanup(tsd_t *tsd); void arena_cleanup(tsd_t *tsd); -void arenas_cache_cleanup(tsd_t *tsd); -void narenas_cache_cleanup(tsd_t *tsd); -void arenas_cache_bypass_cleanup(tsd_t *tsd); +void arenas_tdata_cleanup(tsd_t *tsd); +void narenas_tdata_cleanup(tsd_t *tsd); +void arenas_tdata_bypass_cleanup(tsd_t *tsd); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/witness.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" @@ -492,16 +534,21 @@ void jemalloc_postfork_child(void); /******************************************************************************/ #define JEMALLOC_H_INLINES +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/witness.h" +#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/base.h" @@ -511,6 +558,11 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE +pszind_t psz2ind(size_t psz); +size_t pind2sz_compute(pszind_t pind); +size_t pind2sz_lookup(pszind_t pind); +size_t pind2sz(pszind_t pind); +size_t psz2u(size_t psz); szind_t size2index_compute(size_t size); szind_t size2index_lookup(size_t size); szind_t size2index(size_t size); @@ -521,39 +573,121 @@ size_t s2u_compute(size_t size); size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); +arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal); arena_t *arena_choose(tsd_t *tsd, arena_t *arena); -arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, +arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena); +arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing); +arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing); +ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE pszind_t +psz2ind(size_t psz) +{ + + if (unlikely(psz > HUGE_MAXCLASS)) + return (NPSIZES); + { + pszind_t x = lg_floor((psz<<1)-1); + pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x - + (LG_SIZE_CLASS_GROUP + LG_PAGE); + pszind_t grp = shift << LG_SIZE_CLASS_GROUP; + + pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + pszind_t ind = grp + mod; + return (ind); + } +} + +JEMALLOC_INLINE size_t +pind2sz_compute(pszind_t pind) +{ + + { + size_t grp = pind >> LG_SIZE_CLASS_GROUP; + size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_PAGE + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_PAGE-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t sz = grp_size + mod_size; + return (sz); + } +} + +JEMALLOC_INLINE size_t +pind2sz_lookup(pszind_t pind) +{ + size_t ret = (size_t)pind2sz_tab[pind]; + assert(ret == pind2sz_compute(pind)); + return (ret); +} + +JEMALLOC_INLINE size_t +pind2sz(pszind_t pind) +{ + + assert(pind < NPSIZES); + return (pind2sz_lookup(pind)); +} + +JEMALLOC_INLINE size_t +psz2u(size_t psz) +{ + + if (unlikely(psz > HUGE_MAXCLASS)) + return (0); + { + size_t x = lg_floor((psz<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (psz + delta_mask) & ~delta_mask; + return (usize); + } +} + JEMALLOC_INLINE szind_t size2index_compute(size_t size) { + if (unlikely(size > HUGE_MAXCLASS)) + return (NSIZES); #if (NTBINS != 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); + szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + szind_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); } #endif { - size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? - (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) - : lg_floor((size<<1)-1); - size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + szind_t x = lg_floor((size<<1)-1); + szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); - size_t grp = shift << LG_SIZE_CLASS_GROUP; + szind_t grp = shift << LG_SIZE_CLASS_GROUP; - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; size_t delta_inverse_mask = ZI(-1) << lg_delta; - size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - size_t index = NTBINS + grp + mod; + szind_t index = NTBINS + grp + mod; return (index); } } @@ -564,8 +698,7 @@ size2index_lookup(size_t size) assert(size <= LOOKUP_MAXCLASS); { - size_t ret = ((size_t)(size2index_tab[(size-1) >> - LG_TINY_MIN])); + szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]); assert(ret == size2index_compute(size)); return (ret); } @@ -628,18 +761,18 @@ JEMALLOC_ALWAYS_INLINE size_t s2u_compute(size_t size) { + if (unlikely(size > HUGE_MAXCLASS)) + return (0); #if (NTBINS > 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); + size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : (ZU(1) << lg_ceil)); } #endif { - size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? - (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) - : lg_floor((size<<1)-1); + size_t x = lg_floor((size<<1)-1); size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; size_t delta = ZU(1) << lg_delta; @@ -723,17 +856,16 @@ sa2u(size_t size, size_t alignment) return (usize); } - /* Huge size class. Beware of size_t overflow. */ + /* Huge size class. Beware of overflow. */ + + if (unlikely(alignment > HUGE_MAXCLASS)) + return (0); /* * We can't achieve subchunk alignment, so round up alignment to the * minimum that can actually be supported. */ alignment = CHUNK_CEILING(alignment); - if (alignment == 0) { - /* size_t overflow. */ - return (0); - } /* Make sure result is a huge size class. */ if (size <= chunksize) @@ -759,45 +891,84 @@ sa2u(size_t size, size_t alignment) /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -arena_choose(tsd_t *tsd, arena_t *arena) +arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { arena_t *ret; if (arena != NULL) return (arena); - if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) - ret = arena_choose_hard(tsd); + ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); + if (unlikely(ret == NULL)) + ret = arena_choose_hard(tsd, internal); return (ret); } JEMALLOC_INLINE arena_t * -arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, - bool refresh_if_missing) +arena_choose(tsd_t *tsd, arena_t *arena) { - arena_t *arena; - arena_t **arenas_cache = tsd_arenas_cache_get(tsd); - /* init_if_missing requires refresh_if_missing. */ - assert(!init_if_missing || refresh_if_missing); + return (arena_choose_impl(tsd, arena, false)); +} - if (unlikely(arenas_cache == NULL)) { - /* arenas_cache hasn't been initialized yet. */ - return (arena_get_hard(tsd, ind, init_if_missing)); +JEMALLOC_INLINE arena_t * +arena_ichoose(tsd_t *tsd, arena_t *arena) +{ + + return (arena_choose_impl(tsd, arena, true)); +} + +JEMALLOC_INLINE arena_tdata_t * +arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) +{ + arena_tdata_t *tdata; + arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); + + if (unlikely(arenas_tdata == NULL)) { + /* arenas_tdata hasn't been initialized yet. */ + return (arena_tdata_get_hard(tsd, ind)); } - if (unlikely(ind >= tsd_narenas_cache_get(tsd))) { + if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) { /* - * ind is invalid, cache is old (too small), or arena to be + * ind is invalid, cache is old (too small), or tdata to be * initialized. */ - return (refresh_if_missing ? arena_get_hard(tsd, ind, - init_if_missing) : NULL); + return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) : + NULL); } - arena = arenas_cache[ind]; - if (likely(arena != NULL) || !refresh_if_missing) - return (arena); - return (arena_get_hard(tsd, ind, init_if_missing)); + + tdata = &arenas_tdata[ind]; + if (likely(tdata != NULL) || !refresh_if_missing) + return (tdata); + return (arena_tdata_get_hard(tsd, ind)); +} + +JEMALLOC_INLINE arena_t * +arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) +{ + arena_t *ret; + + assert(ind <= MALLOCX_ARENA_MAX); + + ret = arenas[ind]; + if (unlikely(ret == NULL)) { + ret = atomic_read_p((void *)&arenas[ind]); + if (init_if_missing && unlikely(ret == NULL)) + ret = arena_init(tsdn, ind); + } + return (ret); +} + +JEMALLOC_INLINE ticker_t * +decay_ticker_get(tsd_t *tsd, unsigned ind) +{ + arena_tdata_t *tdata; + + tdata = arena_tdata_get(tsd, ind, true); + if (unlikely(tdata == NULL)) + return (NULL); + return (&tdata->decay_ticker); } #endif @@ -818,27 +989,27 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, #ifndef JEMALLOC_ENABLE_INLINE arena_t *iaalloc(const void *ptr); -size_t isalloc(const void *ptr, bool demote); -void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, - bool is_metadata, arena_t *arena); -void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); -void *imalloc(tsd_t *tsd, size_t size); -void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); -void *icalloc(tsd_t *tsd, size_t size); -void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, +size_t isalloc(tsdn_t *tsdn, const void *ptr, bool demote); +void *iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path); +void *ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, + bool slow_path); +void *ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena); -void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, +void *ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); -size_t ivsalloc(const void *ptr, bool demote); +size_t ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote); size_t u2rz(size_t usize); -size_t p2rz(const void *ptr); -void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata); -void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); +size_t p2rz(tsdn_t *tsdn, const void *ptr); +void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata, + bool slow_path); void idalloc(tsd_t *tsd, void *ptr); -void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); -void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); -void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); +void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + bool slow_path); +void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, + bool slow_path); void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); @@ -846,8 +1017,8 @@ void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero); -bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero); +bool ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -862,100 +1033,85 @@ iaalloc(const void *ptr) /* * Typical usage: + * tsdn_t *tsdn = [...] * void *ptr = [...] - * size_t sz = isalloc(ptr, config_prof); + * size_t sz = isalloc(tsdn, ptr, config_prof); */ JEMALLOC_ALWAYS_INLINE size_t -isalloc(const void *ptr, bool demote) +isalloc(tsdn_t *tsdn, const void *ptr, bool demote) { assert(ptr != NULL); /* Demotion only makes sense if config_prof is true. */ assert(config_prof || !demote); - return (arena_salloc(ptr, demote)); + return (arena_salloc(tsdn, ptr, demote)); } JEMALLOC_ALWAYS_INLINE void * -iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, - arena_t *arena) +iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, + bool is_metadata, arena_t *arena, bool slow_path) { void *ret; assert(size != 0); + assert(!is_metadata || tcache == NULL); + assert(!is_metadata || arena == NULL || arena->ind < narenas_auto); - ret = arena_malloc(tsd, arena, size, zero, tcache); + ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); if (config_stats && is_metadata && likely(ret != NULL)) { - arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, - config_prof)); + arena_metadata_allocated_add(iaalloc(ret), + isalloc(tsdn, ret, config_prof)); } return (ret); } JEMALLOC_ALWAYS_INLINE void * -imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) +ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) { - return (iallocztm(tsd, size, false, tcache, false, arena)); + return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true), + false, NULL, slow_path)); } JEMALLOC_ALWAYS_INLINE void * -imalloc(tsd_t *tsd, size_t size) -{ - - return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)); -} - -JEMALLOC_ALWAYS_INLINE void * -icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) -{ - - return (iallocztm(tsd, size, true, tcache, false, arena)); -} - -JEMALLOC_ALWAYS_INLINE void * -icalloc(tsd_t *tsd, size_t size) -{ - - return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL)); -} - -JEMALLOC_ALWAYS_INLINE void * -ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, +ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena) { void *ret; assert(usize != 0); assert(usize == sa2u(usize, alignment)); + assert(!is_metadata || tcache == NULL); + assert(!is_metadata || arena == NULL || arena->ind < narenas_auto); - ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache); + ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache); assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); if (config_stats && is_metadata && likely(ret != NULL)) { - arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, + arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn, ret, config_prof)); } return (ret); } JEMALLOC_ALWAYS_INLINE void * -ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, +ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { - return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena)); + return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { - return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, - NULL), false, NULL)); + return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero, + tcache_get(tsd, true), false, NULL)); } JEMALLOC_ALWAYS_INLINE size_t -ivsalloc(const void *ptr, bool demote) +ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote) { extent_node_t *node; @@ -967,7 +1123,7 @@ ivsalloc(const void *ptr, bool demote) assert(extent_node_addr_get(node) == ptr || extent_node_achunk_get(node)); - return (isalloc(ptr, demote)); + return (isalloc(tsdn, ptr, demote)); } JEMALLOC_INLINE size_t @@ -985,65 +1141,62 @@ u2rz(size_t usize) } JEMALLOC_INLINE size_t -p2rz(const void *ptr) +p2rz(tsdn_t *tsdn, const void *ptr) { - size_t usize = isalloc(ptr, false); + size_t usize = isalloc(tsdn, ptr, false); return (u2rz(usize)); } JEMALLOC_ALWAYS_INLINE void -idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) +idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata, + bool slow_path) { assert(ptr != NULL); + assert(!is_metadata || tcache == NULL); + assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto); if (config_stats && is_metadata) { - arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr, + arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, ptr, config_prof)); } - arena_dalloc(tsd, ptr, tcache); -} - -JEMALLOC_ALWAYS_INLINE void -idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) -{ - - idalloctm(tsd, ptr, tcache, false); + arena_dalloc(tsdn, ptr, tcache, slow_path); } JEMALLOC_ALWAYS_INLINE void idalloc(tsd_t *tsd, void *ptr) { - idalloctm(tsd, ptr, tcache_get(tsd, false), false); + idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true); } JEMALLOC_ALWAYS_INLINE void -iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { - if (config_fill && unlikely(opt_quarantine)) + if (slow_path && config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - idalloctm(tsd, ptr, tcache, false); + idalloctm(tsd_tsdn(tsd), ptr, tcache, false, slow_path); } JEMALLOC_ALWAYS_INLINE void -isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) +isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + bool slow_path) { - arena_sdalloc(tsd, ptr, size, tcache); + arena_sdalloc(tsdn, ptr, size, tcache, slow_path); } JEMALLOC_ALWAYS_INLINE void -isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) +isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path) { - if (config_fill && unlikely(opt_quarantine)) + if (slow_path && config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - isdalloct(tsd, ptr, size, tcache); + isdalloct(tsd_tsdn(tsd), ptr, size, tcache, slow_path); } JEMALLOC_ALWAYS_INLINE void * @@ -1054,17 +1207,18 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t usize, copysize; usize = sa2u(size + extra, alignment); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return (NULL); - p = ipalloct(tsd, usize, alignment, zero, tcache, arena); + p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, arena); if (p == NULL) { if (extra == 0) return (NULL); /* Try again, without extra this time. */ usize = sa2u(size, alignment); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return (NULL); - p = ipalloct(tsd, usize, alignment, zero, tcache, arena); + p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, + arena); if (p == NULL) return (NULL); } @@ -1074,7 +1228,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache); + isqalloc(tsd, ptr, oldsize, tcache, true); return (p); } @@ -1110,8 +1264,8 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, } JEMALLOC_ALWAYS_INLINE bool -ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, - bool zero) +ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) { assert(ptr != NULL); @@ -1123,7 +1277,7 @@ ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, return (true); } - return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); + return (arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero)); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h index a601d6ebb..c907d9109 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h @@ -17,7 +17,18 @@ # include # endif # include +# ifdef JEMALLOC_OS_UNFAIR_LOCK +# include +# endif +# ifdef JEMALLOC_GLIBC_MALLOC_HOOK +# include +# endif # include +# include +# include +# ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME +# include +# endif #endif #include diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in index b0f8caaf8..def4ba550 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -56,9 +56,9 @@ #undef JEMALLOC_HAVE_BUILTIN_CLZ /* - * Defined if madvise(2) is available. + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. */ -#undef JEMALLOC_HAVE_MADVISE +#undef JEMALLOC_OS_UNFAIR_LOCK /* * Defined if OSSpin*() functions are available, as provided by Darwin, and @@ -66,6 +66,9 @@ */ #undef JEMALLOC_OSSPIN +/* Defined if syscall(2) is usable. */ +#undef JEMALLOC_USE_SYSCALL + /* * Defined if secure_getenv(3) is available. */ @@ -76,6 +79,24 @@ */ #undef JEMALLOC_HAVE_ISSETUGID +/* Defined if pthread_atfork(3) is available. */ +#undef JEMALLOC_HAVE_PTHREAD_ATFORK + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#undef JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME + /* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc @@ -189,9 +210,16 @@ #undef JEMALLOC_TLS /* - * ffs()/ffsl() functions to use for bitmapping. Don't use these directly; - * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h. + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h */ +#undef JEMALLOC_INTERNAL_UNREACHABLE + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#undef JEMALLOC_INTERNAL_FFSLL #undef JEMALLOC_INTERNAL_FFSL #undef JEMALLOC_INTERNAL_FFS @@ -213,18 +241,35 @@ #undef JEMALLOC_ZONE #undef JEMALLOC_ZONE_VERSION +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +#undef JEMALLOC_SYSCTL_VM_OVERCOMMIT +#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#undef JEMALLOC_HAVE_MADVISE + /* * Methods for purging unused pages differ between operating systems. * - * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, - * such that new pages will be demand-zeroed if - * the address region is later touched. - * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being - * unused, such that they will be discarded rather - * than swapped out. + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : This immediately discards pages, such that + * new pages will be demand-zeroed if the + * address region is later touched. */ -#undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE +#undef JEMALLOC_PURGE_MADVISE_DONTNEED + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#undef JEMALLOC_THP /* Define if operating system has alloca.h header. */ #undef JEMALLOC_HAS_ALLOCA_H @@ -241,6 +286,9 @@ /* sizeof(long) == 2^LG_SIZEOF_LONG. */ #undef LG_SIZEOF_LONG +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#undef LG_SIZEOF_LONG_LONG + /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #undef LG_SIZEOF_INTMAX_T @@ -259,4 +307,7 @@ */ #undef JEMALLOC_EXPORT +/* config.malloc_conf options string. */ +#undef JEMALLOC_CONFIG_MALLOC_CONF + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/mb.h b/deps/jemalloc/include/jemalloc/internal/mb.h index 3cfa78729..5384728fd 100644 --- a/deps/jemalloc/include/jemalloc/internal/mb.h +++ b/deps/jemalloc/include/jemalloc/internal/mb.h @@ -42,7 +42,7 @@ mb_write(void) : /* Inputs. */ : "memory" /* Clobbers. */ ); -#else +# else /* * This is hopefully enough to keep the compiler from reordering * instructions around this one. @@ -52,7 +52,7 @@ mb_write(void) : /* Inputs. */ : "memory" /* Clobbers. */ ); -#endif +# endif } #elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE void @@ -104,9 +104,9 @@ mb_write(void) { malloc_mutex_t mtx; - malloc_mutex_init(&mtx); - malloc_mutex_lock(&mtx); - malloc_mutex_unlock(&mtx); + malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT); + malloc_mutex_lock(TSDN_NULL, &mtx); + malloc_mutex_unlock(TSDN_NULL, &mtx); } #endif #endif diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h index f051f2917..b442d2d4e 100644 --- a/deps/jemalloc/include/jemalloc/internal/mutex.h +++ b/deps/jemalloc/include/jemalloc/internal/mutex.h @@ -5,18 +5,25 @@ typedef struct malloc_mutex_s malloc_mutex_t; #ifdef _WIN32 # define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) +# define MALLOC_MUTEX_INITIALIZER \ + {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} #elif (defined(JEMALLOC_OSSPIN)) -# define MALLOC_MUTEX_INITIALIZER {0} +# define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} #elif (defined(JEMALLOC_MUTEX_INIT_CB)) -# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} +# define MALLOC_MUTEX_INITIALIZER \ + {PTHREAD_MUTEX_INITIALIZER, NULL, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} #else # if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \ defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP -# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} +# define MALLOC_MUTEX_INITIALIZER \ + {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, \ + WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} # else # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT -# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER} +# define MALLOC_MUTEX_INITIALIZER \ + {PTHREAD_MUTEX_INITIALIZER, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} # endif #endif @@ -31,6 +38,8 @@ struct malloc_mutex_s { # else CRITICAL_SECTION lock; # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock lock; #elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -39,6 +48,7 @@ struct malloc_mutex_s { #else pthread_mutex_t lock; #endif + witness_t witness; }; #endif /* JEMALLOC_H_STRUCTS */ @@ -52,52 +62,62 @@ extern bool isthreaded; # define isthreaded true #endif -bool malloc_mutex_init(malloc_mutex_t *mutex); -void malloc_mutex_prefork(malloc_mutex_t *mutex); -void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); -void malloc_mutex_postfork_child(malloc_mutex_t *mutex); -bool mutex_boot(void); +bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, + witness_rank_t rank); +void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex); +bool malloc_mutex_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -void malloc_mutex_lock(malloc_mutex_t *mutex); -void malloc_mutex_unlock(malloc_mutex_t *mutex); +void malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) JEMALLOC_INLINE void -malloc_mutex_lock(malloc_mutex_t *mutex) +malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) { if (isthreaded) { + witness_assert_not_owner(tsdn, &mutex->witness); #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 AcquireSRWLockExclusive(&mutex->lock); # else EnterCriticalSection(&mutex->lock); # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_lock(&mutex->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mutex->lock); #else pthread_mutex_lock(&mutex->lock); #endif + witness_lock(tsdn, &mutex->witness); } } JEMALLOC_INLINE void -malloc_mutex_unlock(malloc_mutex_t *mutex) +malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) { if (isthreaded) { + witness_unlock(tsdn, &mutex->witness); #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 ReleaseSRWLockExclusive(&mutex->lock); # else LeaveCriticalSection(&mutex->lock); # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_unlock(&mutex->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mutex->lock); #else @@ -105,6 +125,22 @@ malloc_mutex_unlock(malloc_mutex_t *mutex) #endif } } + +JEMALLOC_INLINE void +malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) +{ + + if (isthreaded) + witness_assert_owner(tsdn, &mutex->witness); +} + +JEMALLOC_INLINE void +malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) +{ + + if (isthreaded) + witness_assert_not_owner(tsdn, &mutex->witness); +} #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/deps/jemalloc/include/jemalloc/internal/nstime.h b/deps/jemalloc/include/jemalloc/internal/nstime.h new file mode 100644 index 000000000..93b27dc80 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/nstime.h @@ -0,0 +1,48 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct nstime_s nstime_t; + +/* Maximum supported number of seconds (~584 years). */ +#define NSTIME_SEC_MAX KQU(18446744072) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct nstime_s { + uint64_t ns; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void nstime_init(nstime_t *time, uint64_t ns); +void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec); +uint64_t nstime_ns(const nstime_t *time); +uint64_t nstime_sec(const nstime_t *time); +uint64_t nstime_nsec(const nstime_t *time); +void nstime_copy(nstime_t *time, const nstime_t *source); +int nstime_compare(const nstime_t *a, const nstime_t *b); +void nstime_add(nstime_t *time, const nstime_t *addend); +void nstime_subtract(nstime_t *time, const nstime_t *subtrahend); +void nstime_imultiply(nstime_t *time, uint64_t multiplier); +void nstime_idivide(nstime_t *time, uint64_t divisor); +uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); +#ifdef JEMALLOC_JET +typedef bool (nstime_monotonic_t)(void); +extern nstime_monotonic_t *nstime_monotonic; +typedef bool (nstime_update_t)(nstime_t *); +extern nstime_update_t *nstime_update; +#else +bool nstime_monotonic(void); +bool nstime_update(nstime_t *time); +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/pages.h b/deps/jemalloc/include/jemalloc/internal/pages.h index da7eb9686..4ae9f156a 100644 --- a/deps/jemalloc/include/jemalloc/internal/pages.h +++ b/deps/jemalloc/include/jemalloc/internal/pages.h @@ -9,13 +9,16 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *pages_map(void *addr, size_t size); +void *pages_map(void *addr, size_t size, bool *commit); void pages_unmap(void *addr, size_t size); void *pages_trim(void *addr, size_t alloc_size, size_t leadsize, - size_t size); + size_t size, bool *commit); bool pages_commit(void *addr, size_t size); bool pages_decommit(void *addr, size_t size); bool pages_purge(void *addr, size_t size); +bool pages_huge(void *addr, size_t size); +bool pages_nohuge(void *addr, size_t size); +void pages_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/ph.h b/deps/jemalloc/include/jemalloc/internal/ph.h new file mode 100644 index 000000000..4f91c333f --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/ph.h @@ -0,0 +1,345 @@ +/* + * A Pairing Heap implementation. + * + * "The Pairing Heap: A New Form of Self-Adjusting Heap" + * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf + * + * With auxiliary twopass list, described in a follow on paper. + * + * "Pairing Heaps: Experiments and Analysis" + * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf + * + ******************************************************************************* + */ + +#ifndef PH_H_ +#define PH_H_ + +/* Node structure. */ +#define phn(a_type) \ +struct { \ + a_type *phn_prev; \ + a_type *phn_next; \ + a_type *phn_lchild; \ +} + +/* Root structure. */ +#define ph(a_type) \ +struct { \ + a_type *ph_root; \ +} + +/* Internal utility macros. */ +#define phn_lchild_get(a_type, a_field, a_phn) \ + (a_phn->a_field.phn_lchild) +#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do { \ + a_phn->a_field.phn_lchild = a_lchild; \ +} while (0) + +#define phn_next_get(a_type, a_field, a_phn) \ + (a_phn->a_field.phn_next) +#define phn_prev_set(a_type, a_field, a_phn, a_prev) do { \ + a_phn->a_field.phn_prev = a_prev; \ +} while (0) + +#define phn_prev_get(a_type, a_field, a_phn) \ + (a_phn->a_field.phn_prev) +#define phn_next_set(a_type, a_field, a_phn, a_next) do { \ + a_phn->a_field.phn_next = a_next; \ +} while (0) + +#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do { \ + a_type *phn0child; \ + \ + assert(a_phn0 != NULL); \ + assert(a_phn1 != NULL); \ + assert(a_cmp(a_phn0, a_phn1) <= 0); \ + \ + phn_prev_set(a_type, a_field, a_phn1, a_phn0); \ + phn0child = phn_lchild_get(a_type, a_field, a_phn0); \ + phn_next_set(a_type, a_field, a_phn1, phn0child); \ + if (phn0child != NULL) \ + phn_prev_set(a_type, a_field, phn0child, a_phn1); \ + phn_lchild_set(a_type, a_field, a_phn0, a_phn1); \ +} while (0) + +#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do { \ + if (a_phn0 == NULL) \ + r_phn = a_phn1; \ + else if (a_phn1 == NULL) \ + r_phn = a_phn0; \ + else if (a_cmp(a_phn0, a_phn1) < 0) { \ + phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, \ + a_cmp); \ + r_phn = a_phn0; \ + } else { \ + phn_merge_ordered(a_type, a_field, a_phn1, a_phn0, \ + a_cmp); \ + r_phn = a_phn1; \ + } \ +} while (0) + +#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do { \ + a_type *head = NULL; \ + a_type *tail = NULL; \ + a_type *phn0 = a_phn; \ + a_type *phn1 = phn_next_get(a_type, a_field, phn0); \ + \ + /* \ + * Multipass merge, wherein the first two elements of a FIFO \ + * are repeatedly merged, and each result is appended to the \ + * singly linked FIFO, until the FIFO contains only a single \ + * element. We start with a sibling list but no reference to \ + * its tail, so we do a single pass over the sibling list to \ + * populate the FIFO. \ + */ \ + if (phn1 != NULL) { \ + a_type *phnrest = phn_next_get(a_type, a_field, phn1); \ + if (phnrest != NULL) \ + phn_prev_set(a_type, a_field, phnrest, NULL); \ + phn_prev_set(a_type, a_field, phn0, NULL); \ + phn_next_set(a_type, a_field, phn0, NULL); \ + phn_prev_set(a_type, a_field, phn1, NULL); \ + phn_next_set(a_type, a_field, phn1, NULL); \ + phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0); \ + head = tail = phn0; \ + phn0 = phnrest; \ + while (phn0 != NULL) { \ + phn1 = phn_next_get(a_type, a_field, phn0); \ + if (phn1 != NULL) { \ + phnrest = phn_next_get(a_type, a_field, \ + phn1); \ + if (phnrest != NULL) { \ + phn_prev_set(a_type, a_field, \ + phnrest, NULL); \ + } \ + phn_prev_set(a_type, a_field, phn0, \ + NULL); \ + phn_next_set(a_type, a_field, phn0, \ + NULL); \ + phn_prev_set(a_type, a_field, phn1, \ + NULL); \ + phn_next_set(a_type, a_field, phn1, \ + NULL); \ + phn_merge(a_type, a_field, phn0, phn1, \ + a_cmp, phn0); \ + phn_next_set(a_type, a_field, tail, \ + phn0); \ + tail = phn0; \ + phn0 = phnrest; \ + } else { \ + phn_next_set(a_type, a_field, tail, \ + phn0); \ + tail = phn0; \ + phn0 = NULL; \ + } \ + } \ + phn0 = head; \ + phn1 = phn_next_get(a_type, a_field, phn0); \ + if (phn1 != NULL) { \ + while (true) { \ + head = phn_next_get(a_type, a_field, \ + phn1); \ + assert(phn_prev_get(a_type, a_field, \ + phn0) == NULL); \ + phn_next_set(a_type, a_field, phn0, \ + NULL); \ + assert(phn_prev_get(a_type, a_field, \ + phn1) == NULL); \ + phn_next_set(a_type, a_field, phn1, \ + NULL); \ + phn_merge(a_type, a_field, phn0, phn1, \ + a_cmp, phn0); \ + if (head == NULL) \ + break; \ + phn_next_set(a_type, a_field, tail, \ + phn0); \ + tail = phn0; \ + phn0 = head; \ + phn1 = phn_next_get(a_type, a_field, \ + phn0); \ + } \ + } \ + } \ + r_phn = phn0; \ +} while (0) + +#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do { \ + a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root); \ + if (phn != NULL) { \ + phn_prev_set(a_type, a_field, a_ph->ph_root, NULL); \ + phn_next_set(a_type, a_field, a_ph->ph_root, NULL); \ + phn_prev_set(a_type, a_field, phn, NULL); \ + ph_merge_siblings(a_type, a_field, phn, a_cmp, phn); \ + assert(phn_next_get(a_type, a_field, phn) == NULL); \ + phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp, \ + a_ph->ph_root); \ + } \ +} while (0) + +#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do { \ + a_type *lchild = phn_lchild_get(a_type, a_field, a_phn); \ + if (lchild == NULL) \ + r_phn = NULL; \ + else { \ + ph_merge_siblings(a_type, a_field, lchild, a_cmp, \ + r_phn); \ + } \ +} while (0) + +/* + * The ph_proto() macro generates function prototypes that correspond to the + * functions generated by an equivalently parameterized call to ph_gen(). + */ +#define ph_proto(a_attr, a_prefix, a_ph_type, a_type) \ +a_attr void a_prefix##new(a_ph_type *ph); \ +a_attr bool a_prefix##empty(a_ph_type *ph); \ +a_attr a_type *a_prefix##first(a_ph_type *ph); \ +a_attr void a_prefix##insert(a_ph_type *ph, a_type *phn); \ +a_attr a_type *a_prefix##remove_first(a_ph_type *ph); \ +a_attr void a_prefix##remove(a_ph_type *ph, a_type *phn); + +/* + * The ph_gen() macro generates a type-specific pairing heap implementation, + * based on the above cpp macros. + */ +#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp) \ +a_attr void \ +a_prefix##new(a_ph_type *ph) \ +{ \ + \ + memset(ph, 0, sizeof(ph(a_type))); \ +} \ +a_attr bool \ +a_prefix##empty(a_ph_type *ph) \ +{ \ + \ + return (ph->ph_root == NULL); \ +} \ +a_attr a_type * \ +a_prefix##first(a_ph_type *ph) \ +{ \ + \ + if (ph->ph_root == NULL) \ + return (NULL); \ + ph_merge_aux(a_type, a_field, ph, a_cmp); \ + return (ph->ph_root); \ +} \ +a_attr void \ +a_prefix##insert(a_ph_type *ph, a_type *phn) \ +{ \ + \ + memset(&phn->a_field, 0, sizeof(phn(a_type))); \ + \ + /* \ + * Treat the root as an aux list during insertion, and lazily \ + * merge during a_prefix##remove_first(). For elements that \ + * are inserted, then removed via a_prefix##remove() before the \ + * aux list is ever processed, this makes insert/remove \ + * constant-time, whereas eager merging would make insert \ + * O(log n). \ + */ \ + if (ph->ph_root == NULL) \ + ph->ph_root = phn; \ + else { \ + phn_next_set(a_type, a_field, phn, phn_next_get(a_type, \ + a_field, ph->ph_root)); \ + if (phn_next_get(a_type, a_field, ph->ph_root) != \ + NULL) { \ + phn_prev_set(a_type, a_field, \ + phn_next_get(a_type, a_field, ph->ph_root), \ + phn); \ + } \ + phn_prev_set(a_type, a_field, phn, ph->ph_root); \ + phn_next_set(a_type, a_field, ph->ph_root, phn); \ + } \ +} \ +a_attr a_type * \ +a_prefix##remove_first(a_ph_type *ph) \ +{ \ + a_type *ret; \ + \ + if (ph->ph_root == NULL) \ + return (NULL); \ + ph_merge_aux(a_type, a_field, ph, a_cmp); \ + \ + ret = ph->ph_root; \ + \ + ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \ + ph->ph_root); \ + \ + return (ret); \ +} \ +a_attr void \ +a_prefix##remove(a_ph_type *ph, a_type *phn) \ +{ \ + a_type *replace, *parent; \ + \ + /* \ + * We can delete from aux list without merging it, but we need \ + * to merge if we are dealing with the root node. \ + */ \ + if (ph->ph_root == phn) { \ + ph_merge_aux(a_type, a_field, ph, a_cmp); \ + if (ph->ph_root == phn) { \ + ph_merge_children(a_type, a_field, ph->ph_root, \ + a_cmp, ph->ph_root); \ + return; \ + } \ + } \ + \ + /* Get parent (if phn is leftmost child) before mutating. */ \ + if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) { \ + if (phn_lchild_get(a_type, a_field, parent) != phn) \ + parent = NULL; \ + } \ + /* Find a possible replacement node, and link to parent. */ \ + ph_merge_children(a_type, a_field, phn, a_cmp, replace); \ + /* Set next/prev for sibling linked list. */ \ + if (replace != NULL) { \ + if (parent != NULL) { \ + phn_prev_set(a_type, a_field, replace, parent); \ + phn_lchild_set(a_type, a_field, parent, \ + replace); \ + } else { \ + phn_prev_set(a_type, a_field, replace, \ + phn_prev_get(a_type, a_field, phn)); \ + if (phn_prev_get(a_type, a_field, phn) != \ + NULL) { \ + phn_next_set(a_type, a_field, \ + phn_prev_get(a_type, a_field, phn), \ + replace); \ + } \ + } \ + phn_next_set(a_type, a_field, replace, \ + phn_next_get(a_type, a_field, phn)); \ + if (phn_next_get(a_type, a_field, phn) != NULL) { \ + phn_prev_set(a_type, a_field, \ + phn_next_get(a_type, a_field, phn), \ + replace); \ + } \ + } else { \ + if (parent != NULL) { \ + a_type *next = phn_next_get(a_type, a_field, \ + phn); \ + phn_lchild_set(a_type, a_field, parent, next); \ + if (next != NULL) { \ + phn_prev_set(a_type, a_field, next, \ + parent); \ + } \ + } else { \ + assert(phn_prev_get(a_type, a_field, phn) != \ + NULL); \ + phn_next_set(a_type, a_field, \ + phn_prev_get(a_type, a_field, phn), \ + phn_next_get(a_type, a_field, phn)); \ + } \ + if (phn_next_get(a_type, a_field, phn) != NULL) { \ + phn_prev_set(a_type, a_field, \ + phn_next_get(a_type, a_field, phn), \ + phn_prev_get(a_type, a_field, phn)); \ + } \ + } \ +} + +#endif /* PH_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt index a90021aa6..c1c6c4090 100644 --- a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt +++ b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt @@ -3,12 +3,15 @@ a0get a0malloc arena_aalloc arena_alloc_junk_small +arena_basic_stats_merge arena_bin_index arena_bin_info -arena_bitselm_get +arena_bitselm_get_const +arena_bitselm_get_mutable arena_boot arena_choose arena_choose_hard +arena_choose_impl arena_chunk_alloc_huge arena_chunk_cache_maybe_insert arena_chunk_cache_maybe_remove @@ -25,18 +28,25 @@ arena_dalloc_junk_small arena_dalloc_large arena_dalloc_large_junked_locked arena_dalloc_small +arena_decay_tick +arena_decay_ticks +arena_decay_time_default_get +arena_decay_time_default_set +arena_decay_time_get +arena_decay_time_set arena_dss_prec_get arena_dss_prec_set +arena_extent_sn_next arena_get -arena_get_hard +arena_ichoose arena_init arena_lg_dirty_mult_default_get arena_lg_dirty_mult_default_set arena_lg_dirty_mult_get arena_lg_dirty_mult_set arena_malloc +arena_malloc_hard arena_malloc_large -arena_malloc_small arena_mapbits_allocated_get arena_mapbits_binind_get arena_mapbits_decommitted_get @@ -47,9 +57,6 @@ arena_mapbits_large_binind_set arena_mapbits_large_get arena_mapbits_large_set arena_mapbits_large_size_get -arena_mapbitsp_get -arena_mapbitsp_read -arena_mapbitsp_write arena_mapbits_size_decode arena_mapbits_size_encode arena_mapbits_small_runind_get @@ -58,23 +65,33 @@ arena_mapbits_unallocated_set arena_mapbits_unallocated_size_get arena_mapbits_unallocated_size_set arena_mapbits_unzeroed_get +arena_mapbitsp_get_const +arena_mapbitsp_get_mutable +arena_mapbitsp_read +arena_mapbitsp_write arena_maxrun arena_maybe_purge arena_metadata_allocated_add arena_metadata_allocated_get arena_metadata_allocated_sub arena_migrate -arena_miscelm_get +arena_miscelm_get_const +arena_miscelm_get_mutable arena_miscelm_to_pageind arena_miscelm_to_rpages -arena_nbound arena_new arena_node_alloc arena_node_dalloc +arena_nthreads_dec +arena_nthreads_get +arena_nthreads_inc arena_palloc arena_postfork_child arena_postfork_parent -arena_prefork +arena_prefork0 +arena_prefork1 +arena_prefork2 +arena_prefork3 arena_prof_accum arena_prof_accum_impl arena_prof_accum_locked @@ -83,21 +100,25 @@ arena_prof_tctx_get arena_prof_tctx_reset arena_prof_tctx_set arena_ptr_small_binind_get -arena_purge_all +arena_purge arena_quarantine_junk_small arena_ralloc arena_ralloc_junk_large arena_ralloc_no_move arena_rd_to_miscelm arena_redzone_corruption +arena_reset arena_run_regind arena_run_to_miscelm arena_salloc -arenas_cache_bypass_cleanup -arenas_cache_cleanup arena_sdalloc arena_stats_merge arena_tcache_fill_small +arena_tdata_get +arena_tdata_get_hard +arenas +arenas_tdata_bypass_cleanup +arenas_tdata_cleanup atomic_add_p atomic_add_u atomic_add_uint32 @@ -113,6 +134,11 @@ atomic_sub_u atomic_sub_uint32 atomic_sub_uint64 atomic_sub_z +atomic_write_p +atomic_write_u +atomic_write_uint32 +atomic_write_uint64 +atomic_write_z base_alloc base_boot base_postfork_child @@ -122,7 +148,6 @@ base_stats_get bitmap_full bitmap_get bitmap_info_init -bitmap_info_ngroups bitmap_init bitmap_set bitmap_sfu @@ -139,32 +164,25 @@ chunk_alloc_dss chunk_alloc_mmap chunk_alloc_wrapper chunk_boot -chunk_dalloc_arena chunk_dalloc_cache chunk_dalloc_mmap chunk_dalloc_wrapper chunk_deregister chunk_dss_boot -chunk_dss_postfork_child -chunk_dss_postfork_parent +chunk_dss_mergeable chunk_dss_prec_get chunk_dss_prec_set -chunk_dss_prefork chunk_hooks_default chunk_hooks_get chunk_hooks_set chunk_in_dss chunk_lookup chunk_npages -chunk_postfork_child -chunk_postfork_parent -chunk_prefork -chunk_purge_arena chunk_purge_wrapper chunk_register +chunks_rtree chunksize chunksize_mask -chunks_rtree ckh_count ckh_delete ckh_insert @@ -183,6 +201,7 @@ ctl_nametomib ctl_postfork_child ctl_postfork_parent ctl_prefork +decay_ticker_get dss_prec_names extent_node_achunk_get extent_node_achunk_set @@ -190,6 +209,8 @@ extent_node_addr_get extent_node_addr_set extent_node_arena_get extent_node_arena_set +extent_node_committed_get +extent_node_committed_set extent_node_dirty_insert extent_node_dirty_linkage_init extent_node_dirty_remove @@ -198,8 +219,12 @@ extent_node_prof_tctx_get extent_node_prof_tctx_set extent_node_size_get extent_node_size_set +extent_node_sn_get +extent_node_sn_set extent_node_zeroed_get extent_node_zeroed_set +extent_tree_ad_destroy +extent_tree_ad_destroy_recurse extent_tree_ad_empty extent_tree_ad_first extent_tree_ad_insert @@ -217,23 +242,31 @@ extent_tree_ad_reverse_iter extent_tree_ad_reverse_iter_recurse extent_tree_ad_reverse_iter_start extent_tree_ad_search -extent_tree_szad_empty -extent_tree_szad_first -extent_tree_szad_insert -extent_tree_szad_iter -extent_tree_szad_iter_recurse -extent_tree_szad_iter_start -extent_tree_szad_last -extent_tree_szad_new -extent_tree_szad_next -extent_tree_szad_nsearch -extent_tree_szad_prev -extent_tree_szad_psearch -extent_tree_szad_remove -extent_tree_szad_reverse_iter -extent_tree_szad_reverse_iter_recurse -extent_tree_szad_reverse_iter_start -extent_tree_szad_search +extent_tree_szsnad_destroy +extent_tree_szsnad_destroy_recurse +extent_tree_szsnad_empty +extent_tree_szsnad_first +extent_tree_szsnad_insert +extent_tree_szsnad_iter +extent_tree_szsnad_iter_recurse +extent_tree_szsnad_iter_start +extent_tree_szsnad_last +extent_tree_szsnad_new +extent_tree_szsnad_next +extent_tree_szsnad_nsearch +extent_tree_szsnad_prev +extent_tree_szsnad_psearch +extent_tree_szsnad_remove +extent_tree_szsnad_reverse_iter +extent_tree_szsnad_reverse_iter_recurse +extent_tree_szsnad_reverse_iter_start +extent_tree_szsnad_search +ffs_llu +ffs_lu +ffs_u +ffs_u32 +ffs_u64 +ffs_zu get_errno hash hash_fmix_32 @@ -257,19 +290,16 @@ huge_ralloc huge_ralloc_no_move huge_salloc iaalloc +ialloc iallocztm -icalloc -icalloct +iarena_cleanup idalloc -idalloct idalloctm -imalloc -imalloct +in_valgrind index2size index2size_compute index2size_lookup index2size_tab -in_valgrind ipalloc ipalloct ipallocztm @@ -288,7 +318,11 @@ jemalloc_postfork_parent jemalloc_prefork large_maxclass lg_floor +lg_prof_sample malloc_cprintf +malloc_mutex_assert_not_owner +malloc_mutex_assert_owner +malloc_mutex_boot malloc_mutex_init malloc_mutex_lock malloc_mutex_postfork_child @@ -310,12 +344,29 @@ malloc_write map_bias map_misc_offset mb_write -mutex_boot -narenas_cache_cleanup +narenas_auto +narenas_tdata_cleanup narenas_total_get ncpus nhbins +nhclasses +nlclasses +nstime_add +nstime_compare +nstime_copy +nstime_divide +nstime_idivide +nstime_imultiply +nstime_init +nstime_init2 +nstime_monotonic +nstime_ns +nstime_nsec +nstime_sec +nstime_subtract +nstime_update opt_abort +opt_decay_time opt_dss opt_junk opt_junk_alloc @@ -334,6 +385,7 @@ opt_prof_gdump opt_prof_leak opt_prof_prefix opt_prof_thread_active_init +opt_purge opt_quarantine opt_redzone opt_stats_print @@ -342,13 +394,32 @@ opt_utrace opt_xmalloc opt_zero p2rz +pages_boot pages_commit pages_decommit +pages_huge pages_map +pages_nohuge pages_purge pages_trim pages_unmap -pow2_ceil +pind2sz +pind2sz_compute +pind2sz_lookup +pind2sz_tab +pow2_ceil_u32 +pow2_ceil_u64 +pow2_ceil_zu +prng_lg_range_u32 +prng_lg_range_u64 +prng_lg_range_zu +prng_range_u32 +prng_range_u64 +prng_range_zu +prng_state_next_u32 +prng_state_next_u64 +prng_state_next_zu +prof_active prof_active_get prof_active_get_unlocked prof_active_set @@ -358,6 +429,7 @@ prof_backtrace prof_boot0 prof_boot1 prof_boot2 +prof_bt_count prof_dump_header prof_dump_open prof_free @@ -375,7 +447,8 @@ prof_malloc_sample_object prof_mdump prof_postfork_child prof_postfork_parent -prof_prefork +prof_prefork0 +prof_prefork1 prof_realloc prof_reset prof_sample_accum_update @@ -384,6 +457,7 @@ prof_tctx_get prof_tctx_reset prof_tctx_set prof_tdata_cleanup +prof_tdata_count prof_tdata_get prof_tdata_init prof_tdata_reinit @@ -393,11 +467,13 @@ prof_thread_active_init_set prof_thread_active_set prof_thread_name_get prof_thread_name_set +psz2ind +psz2u +purge_mode_names quarantine quarantine_alloc_hook quarantine_alloc_hook_work quarantine_cleanup -register_zone rtree_child_read rtree_child_read_hard rtree_child_tryread @@ -413,6 +489,8 @@ rtree_subtree_read_hard rtree_subtree_tryread rtree_val_read rtree_val_write +run_quantize_ceil +run_quantize_floor s2u s2u_compute s2u_lookup @@ -422,6 +500,8 @@ size2index size2index_compute size2index_lookup size2index_tab +spin_adaptive +spin_init stats_cactive stats_cactive_add stats_cactive_get @@ -431,8 +511,6 @@ tcache_alloc_easy tcache_alloc_large tcache_alloc_small tcache_alloc_small_hard -tcache_arena_associate -tcache_arena_dissociate tcache_arena_reassociate tcache_bin_flush_large tcache_bin_flush_small @@ -451,49 +529,103 @@ tcache_flush tcache_get tcache_get_hard tcache_maxclass -tcaches tcache_salloc +tcache_stats_merge +tcaches tcaches_create tcaches_destroy tcaches_flush tcaches_get -tcache_stats_merge thread_allocated_cleanup thread_deallocated_cleanup +ticker_copy +ticker_init +ticker_read +ticker_tick +ticker_ticks tsd_arena_get tsd_arena_set +tsd_arenap_get +tsd_arenas_tdata_bypass_get +tsd_arenas_tdata_bypass_set +tsd_arenas_tdata_bypassp_get +tsd_arenas_tdata_get +tsd_arenas_tdata_set +tsd_arenas_tdatap_get tsd_boot tsd_boot0 tsd_boot1 tsd_booted +tsd_booted_get tsd_cleanup tsd_cleanup_wrapper tsd_fetch +tsd_fetch_impl tsd_get -tsd_wrapper_get -tsd_wrapper_set +tsd_get_allocates +tsd_iarena_get +tsd_iarena_set +tsd_iarenap_get tsd_initialized tsd_init_check_recursion tsd_init_finish tsd_init_head +tsd_narenas_tdata_get +tsd_narenas_tdata_set +tsd_narenas_tdatap_get +tsd_wrapper_get +tsd_wrapper_set tsd_nominal +tsd_prof_tdata_get +tsd_prof_tdata_set +tsd_prof_tdatap_get tsd_quarantine_get tsd_quarantine_set +tsd_quarantinep_get tsd_set tsd_tcache_enabled_get tsd_tcache_enabled_set +tsd_tcache_enabledp_get tsd_tcache_get tsd_tcache_set -tsd_tls -tsd_tsd -tsd_prof_tdata_get -tsd_prof_tdata_set +tsd_tcachep_get tsd_thread_allocated_get tsd_thread_allocated_set +tsd_thread_allocatedp_get tsd_thread_deallocated_get tsd_thread_deallocated_set +tsd_thread_deallocatedp_get +tsd_tls +tsd_tsd +tsd_tsdn +tsd_witness_fork_get +tsd_witness_fork_set +tsd_witness_forkp_get +tsd_witnesses_get +tsd_witnesses_set +tsd_witnessesp_get +tsdn_fetch +tsdn_null +tsdn_tsd u2rz valgrind_freelike_block valgrind_make_mem_defined valgrind_make_mem_noaccess valgrind_make_mem_undefined +witness_assert_lockless +witness_assert_not_owner +witness_assert_owner +witness_fork_cleanup +witness_init +witness_lock +witness_lock_error +witness_lockless_error +witness_not_owner_error +witness_owner +witness_owner_error +witness_postfork_child +witness_postfork_parent +witness_prefork +witness_unlock +witnesses_cleanup +zone_register diff --git a/deps/jemalloc/include/jemalloc/internal/prng.h b/deps/jemalloc/include/jemalloc/internal/prng.h index 216d0ef47..c2bda19c6 100644 --- a/deps/jemalloc/include/jemalloc/internal/prng.h +++ b/deps/jemalloc/include/jemalloc/internal/prng.h @@ -18,31 +18,13 @@ * proportional to bit position. For example, the lowest bit has a cycle of 2, * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. - * - * Macro parameters: - * uint32_t r : Result. - * unsigned lg_range : (0..32], number of least significant bits to return. - * uint32_t state : Seed value. - * const uint32_t a, c : See above discussion. */ -#define prng32(r, lg_range, state, a, c) do { \ - assert((lg_range) > 0); \ - assert((lg_range) <= 32); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (32 - (lg_range)); \ -} while (false) -/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prng64(r, lg_range, state, a, c) do { \ - assert((lg_range) > 0); \ - assert((lg_range) <= 64); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (64 - (lg_range)); \ -} while (false) +#define PRNG_A_32 UINT32_C(1103515241) +#define PRNG_C_32 UINT32_C(12347) + +#define PRNG_A_64 UINT64_C(6364136223846793005) +#define PRNG_C_64 UINT64_C(1442695040888963407) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -56,5 +38,170 @@ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +uint32_t prng_state_next_u32(uint32_t state); +uint64_t prng_state_next_u64(uint64_t state); +size_t prng_state_next_zu(size_t state); + +uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range, + bool atomic); +uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range); +size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic); + +uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic); +uint64_t prng_range_u64(uint64_t *state, uint64_t range); +size_t prng_range_zu(size_t *state, size_t range, bool atomic); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) +JEMALLOC_ALWAYS_INLINE uint32_t +prng_state_next_u32(uint32_t state) +{ + + return ((state * PRNG_A_32) + PRNG_C_32); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_state_next_u64(uint64_t state) +{ + + return ((state * PRNG_A_64) + PRNG_C_64); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_state_next_zu(size_t state) +{ + +#if LG_SIZEOF_PTR == 2 + return ((state * PRNG_A_32) + PRNG_C_32); +#elif LG_SIZEOF_PTR == 3 + return ((state * PRNG_A_64) + PRNG_C_64); +#else +#error Unsupported pointer size +#endif +} + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) +{ + uint32_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= 32); + + if (atomic) { + uint32_t state0; + + do { + state0 = atomic_read_uint32(state); + state1 = prng_state_next_u32(state0); + } while (atomic_cas_uint32(state, state0, state1)); + } else { + state1 = prng_state_next_u32(*state); + *state = state1; + } + ret = state1 >> (32 - lg_range); + + return (ret); +} + +/* 64-bit atomic operations cannot be supported on all relevant platforms. */ +JEMALLOC_ALWAYS_INLINE uint64_t +prng_lg_range_u64(uint64_t *state, unsigned lg_range) +{ + uint64_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= 64); + + state1 = prng_state_next_u64(*state); + *state = state1; + ret = state1 >> (64 - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) +{ + size_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR)); + + if (atomic) { + size_t state0; + + do { + state0 = atomic_read_z(state); + state1 = prng_state_next_zu(state0); + } while (atomic_cas_z(state, state0, state1)); + } else { + state1 = prng_state_next_zu(*state); + *state = state1; + } + ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_range_u32(uint32_t *state, uint32_t range, bool atomic) +{ + uint32_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u32(pow2_ceil_u32(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_u32(state, lg_range, atomic); + } while (ret >= range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_range_u64(uint64_t *state, uint64_t range) +{ + uint64_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_u64(state, lg_range); + } while (ret >= range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_range_zu(size_t *state, size_t range, bool atomic) +{ + size_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_zu(state, lg_range, atomic); + } while (ret >= range); + + return (ret); +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/prof.h b/deps/jemalloc/include/jemalloc/internal/prof.h index e5198c3e8..8293b71ed 100644 --- a/deps/jemalloc/include/jemalloc/internal/prof.h +++ b/deps/jemalloc/include/jemalloc/internal/prof.h @@ -281,7 +281,7 @@ extern uint64_t prof_interval; extern size_t lg_prof_sample; void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); -void prof_malloc_sample_object(const void *ptr, size_t usize, +void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); void bt_init(prof_bt_t *bt, void **vec); @@ -293,32 +293,33 @@ size_t prof_bt_count(void); const prof_cnt_t *prof_cnt_all(void); typedef int (prof_dump_open_t)(bool, const char *); extern prof_dump_open_t *prof_dump_open; -typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *); +typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *); extern prof_dump_header_t *prof_dump_header; #endif -void prof_idump(void); -bool prof_mdump(const char *filename); -void prof_gdump(void); +void prof_idump(tsdn_t *tsdn); +bool prof_mdump(tsd_t *tsd, const char *filename); +void prof_gdump(tsdn_t *tsdn); prof_tdata_t *prof_tdata_init(tsd_t *tsd); prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); void prof_reset(tsd_t *tsd, size_t lg_sample); void prof_tdata_cleanup(tsd_t *tsd); -const char *prof_thread_name_get(void); -bool prof_active_get(void); -bool prof_active_set(bool active); +bool prof_active_get(tsdn_t *tsdn); +bool prof_active_set(tsdn_t *tsdn, bool active); +const char *prof_thread_name_get(tsd_t *tsd); int prof_thread_name_set(tsd_t *tsd, const char *thread_name); -bool prof_thread_active_get(void); -bool prof_thread_active_set(bool active); -bool prof_thread_active_init_get(void); -bool prof_thread_active_init_set(bool active_init); -bool prof_gdump_get(void); -bool prof_gdump_set(bool active); +bool prof_thread_active_get(tsd_t *tsd); +bool prof_thread_active_set(tsd_t *tsd, bool active); +bool prof_thread_active_init_get(tsdn_t *tsdn); +bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); +bool prof_gdump_get(tsdn_t *tsdn); +bool prof_gdump_set(tsdn_t *tsdn, bool active); void prof_boot0(void); void prof_boot1(void); -bool prof_boot2(void); -void prof_prefork(void); -void prof_postfork_parent(void); -void prof_postfork_child(void); +bool prof_boot2(tsd_t *tsd); +void prof_prefork0(tsdn_t *tsdn); +void prof_prefork1(tsdn_t *tsdn); +void prof_postfork_parent(tsdn_t *tsdn); +void prof_postfork_child(tsdn_t *tsdn); void prof_sample_threshold_update(prof_tdata_t *tdata); #endif /* JEMALLOC_H_EXTERNS */ @@ -329,17 +330,17 @@ void prof_sample_threshold_update(prof_tdata_t *tdata); bool prof_active_get_unlocked(void); bool prof_gdump_get_unlocked(void); prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); +prof_tctx_t *prof_tctx_get(tsdn_t *tsdn, const void *ptr); +void prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, + const void *old_ptr, prof_tctx_t *tctx); bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, prof_tdata_t **tdata_out); prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update); -prof_tctx_t *prof_tctx_get(const void *ptr); -void prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); -void prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, +void prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx); -void prof_malloc_sample_object(const void *ptr, size_t usize, - prof_tctx_t *tctx); -void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx); @@ -397,34 +398,34 @@ prof_tdata_get(tsd_t *tsd, bool create) } JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_tctx_get(const void *ptr) +prof_tctx_get(tsdn_t *tsdn, const void *ptr) { cassert(config_prof); assert(ptr != NULL); - return (arena_prof_tctx_get(ptr)); + return (arena_prof_tctx_get(tsdn, ptr)); } JEMALLOC_ALWAYS_INLINE void -prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) +prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) { cassert(config_prof); assert(ptr != NULL); - arena_prof_tctx_set(ptr, usize, tctx); + arena_prof_tctx_set(tsdn, ptr, usize, tctx); } JEMALLOC_ALWAYS_INLINE void -prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, +prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr, prof_tctx_t *old_tctx) { cassert(config_prof); assert(ptr != NULL); - arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx); + arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx); } JEMALLOC_ALWAYS_INLINE bool @@ -436,16 +437,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, cassert(config_prof); tdata = prof_tdata_get(tsd, true); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) tdata = NULL; if (tdata_out != NULL) *tdata_out = tdata; - if (tdata == NULL) + if (unlikely(tdata == NULL)) return (true); - if (tdata->bytes_until_sample >= usize) { + if (likely(tdata->bytes_until_sample >= usize)) { if (update) tdata->bytes_until_sample -= usize; return (true); @@ -479,17 +480,17 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) } JEMALLOC_ALWAYS_INLINE void -prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) +prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) { cassert(config_prof); assert(ptr != NULL); - assert(usize == isalloc(ptr, true)); + assert(usize == isalloc(tsdn, ptr, true)); if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) - prof_malloc_sample_object(ptr, usize, tctx); + prof_malloc_sample_object(tsdn, ptr, usize, tctx); else - prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U); + prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U); } JEMALLOC_ALWAYS_INLINE void @@ -503,7 +504,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); if (prof_active && !updated && ptr != NULL) { - assert(usize == isalloc(ptr, true)); + assert(usize == isalloc(tsd_tsdn(tsd), ptr, true)); if (prof_sample_accum_update(tsd, usize, true, NULL)) { /* * Don't sample. The usize passed to prof_alloc_prep() @@ -512,6 +513,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, * though its actual usize was insufficient to cross the * sample threshold. */ + prof_alloc_rollback(tsd, tctx, true); tctx = (prof_tctx_t *)(uintptr_t)1U; } } @@ -520,9 +522,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); if (unlikely(sampled)) - prof_malloc_sample_object(ptr, usize, tctx); + prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx); else - prof_tctx_reset(ptr, usize, old_ptr, old_tctx); + prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx); if (unlikely(old_sampled)) prof_free_sampled_object(tsd, old_usize, old_tctx); @@ -531,10 +533,10 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, JEMALLOC_ALWAYS_INLINE void prof_free(tsd_t *tsd, const void *ptr, size_t usize) { - prof_tctx_t *tctx = prof_tctx_get(ptr); + prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr); cassert(config_prof); - assert(usize == isalloc(ptr, true)); + assert(usize == isalloc(tsd_tsdn(tsd), ptr, true)); if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) prof_free_sampled_object(tsd, usize, tctx); diff --git a/deps/jemalloc/include/jemalloc/internal/rb.h b/deps/jemalloc/include/jemalloc/internal/rb.h index 2ca8e5933..3770342f8 100644 --- a/deps/jemalloc/include/jemalloc/internal/rb.h +++ b/deps/jemalloc/include/jemalloc/internal/rb.h @@ -42,7 +42,6 @@ struct { \ #define rb_tree(a_type) \ struct { \ a_type *rbt_root; \ - a_type rbt_nil; \ } /* Left accessors. */ @@ -79,6 +78,15 @@ struct { \ (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ } while (0) + +/* Node initializer. */ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ + /* Bookkeeping bit cannot be used by node pointer. */ \ + assert(((uintptr_t)(a_node) & 0x1) == 0); \ + rbtn_left_set(a_type, a_field, (a_node), NULL); \ + rbtn_right_set(a_type, a_field, (a_node), NULL); \ + rbtn_red_set(a_type, a_field, (a_node)); \ +} while (0) #else /* Right accessors. */ #define rbtn_right_get(a_type, a_field, a_node) \ @@ -99,28 +107,26 @@ struct { \ #define rbtn_black_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_red = false; \ } while (0) -#endif /* Node initializer. */ #define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ - rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ - rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_left_set(a_type, a_field, (a_node), NULL); \ + rbtn_right_set(a_type, a_field, (a_node), NULL); \ rbtn_red_set(a_type, a_field, (a_node)); \ } while (0) +#endif /* Tree initializer. */ #define rb_new(a_type, a_field, a_rbt) do { \ - (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ - rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ - rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ + (a_rbt)->rbt_root = NULL; \ } while (0) /* Internal utility macros. */ #define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ + if ((r_node) != NULL) { \ for (; \ - rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ + rbtn_left_get(a_type, a_field, (r_node)) != NULL; \ (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ } \ } \ @@ -128,10 +134,9 @@ struct { \ #define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ - for (; rbtn_right_get(a_type, a_field, (r_node)) != \ - &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ - (r_node))) { \ + if ((r_node) != NULL) { \ + for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL; \ + (r_node) = rbtn_right_get(a_type, a_field, (r_node))) { \ } \ } \ } while (0) @@ -169,11 +174,11 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node); \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key); \ +a_prefix##search(a_rbt_type *rbtree, const a_type *key); \ a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ +a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key); \ a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ +a_prefix##psearch(a_rbt_type *rbtree, const a_type *key); \ a_attr void \ a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ a_attr void \ @@ -183,7 +188,10 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ a_rbt_type *, a_type *, void *), void *arg); \ a_attr a_type * \ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); \ +a_attr void \ +a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ + void *arg); /* * The rb_gen() macro generates a type-specific red-black tree implementation, @@ -254,7 +262,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * last/first. * * static ex_node_t * - * ex_search(ex_t *tree, ex_node_t *key); + * ex_search(ex_t *tree, const ex_node_t *key); * Description: Search for node that matches key. * Args: * tree: Pointer to an initialized red-black tree object. @@ -262,9 +270,9 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * Ret: Node in tree that matches key, or NULL if no match. * * static ex_node_t * - * ex_nsearch(ex_t *tree, ex_node_t *key); + * ex_nsearch(ex_t *tree, const ex_node_t *key); * static ex_node_t * - * ex_psearch(ex_t *tree, ex_node_t *key); + * ex_psearch(ex_t *tree, const ex_node_t *key); * Description: Search for node that matches key. If no match is found, * return what would be key's successor/predecessor, were * key in tree. @@ -312,6 +320,20 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * arg : Opaque pointer passed to cb(). * Ret: NULL if iteration completed, or the non-NULL callback return value * that caused termination of the iteration. + * + * static void + * ex_destroy(ex_t *tree, void (*cb)(ex_node_t *, void *), void *arg); + * Description: Iterate over the tree with post-order traversal, remove + * each node, and run the callback if non-null. This is + * used for destroying a tree without paying the cost to + * rebalance it. The tree must not be otherwise altered + * during traversal. + * Args: + * tree: Pointer to an initialized red-black tree object. + * cb : Callback function, which, if non-null, is called for each node + * during iteration. There is no way to stop iteration once it + * has begun. + * arg : Opaque pointer passed to cb(). */ #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ a_attr void \ @@ -320,36 +342,30 @@ a_prefix##new(a_rbt_type *rbtree) { \ } \ a_attr bool \ a_prefix##empty(a_rbt_type *rbtree) { \ - return (rbtree->rbt_root == &rbtree->rbt_nil); \ + return (rbtree->rbt_root == NULL); \ } \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##last(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ a_type *ret; \ - if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + if (rbtn_right_get(a_type, a_field, node) != NULL) { \ rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ a_field, node), ret); \ } else { \ a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ + assert(tnode != NULL); \ + ret = NULL; \ while (true) { \ int cmp = (a_cmp)(node, tnode); \ if (cmp < 0) { \ @@ -360,24 +376,21 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ } else { \ break; \ } \ - assert(tnode != &rbtree->rbt_nil); \ + assert(tnode != NULL); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ a_type *ret; \ - if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + if (rbtn_left_get(a_type, a_field, node) != NULL) { \ rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ a_field, node), ret); \ } else { \ a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ + assert(tnode != NULL); \ + ret = NULL; \ while (true) { \ int cmp = (a_cmp)(node, tnode); \ if (cmp < 0) { \ @@ -388,20 +401,17 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ } else { \ break; \ } \ - assert(tnode != &rbtree->rbt_nil); \ + assert(tnode != NULL); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ int cmp; \ ret = rbtree->rbt_root; \ - while (ret != &rbtree->rbt_nil \ + while (ret != NULL \ && (cmp = (a_cmp)(key, ret)) != 0) { \ if (cmp < 0) { \ ret = rbtn_left_get(a_type, a_field, ret); \ @@ -409,17 +419,14 @@ a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ ret = rbtn_right_get(a_type, a_field, ret); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ + ret = NULL; \ + while (tnode != NULL) { \ int cmp = (a_cmp)(key, tnode); \ if (cmp < 0) { \ ret = tnode; \ @@ -431,17 +438,14 @@ a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ break; \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ + ret = NULL; \ + while (tnode != NULL) { \ int cmp = (a_cmp)(key, tnode); \ if (cmp < 0) { \ tnode = rbtn_left_get(a_type, a_field, tnode); \ @@ -453,9 +457,6 @@ a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ break; \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr void \ @@ -467,7 +468,7 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbt_node_new(a_type, a_field, rbtree, node); \ /* Wind. */ \ path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + for (pathp = path; pathp->node != NULL; pathp++) { \ int cmp = pathp->cmp = a_cmp(node, pathp->node); \ assert(cmp != 0); \ if (cmp < 0) { \ @@ -487,7 +488,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbtn_left_set(a_type, a_field, cnode, left); \ if (rbtn_red_get(a_type, a_field, left)) { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* Fix up 4-node. */ \ a_type *tnode; \ rbtn_black_set(a_type, a_field, leftleft); \ @@ -502,7 +504,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbtn_right_set(a_type, a_field, cnode, right); \ if (rbtn_red_get(a_type, a_field, right)) { \ a_type *left = rbtn_left_get(a_type, a_field, cnode); \ - if (rbtn_red_get(a_type, a_field, left)) { \ + if (left != NULL && rbtn_red_get(a_type, a_field, \ + left)) { \ /* Split 4-node. */ \ rbtn_black_set(a_type, a_field, left); \ rbtn_black_set(a_type, a_field, right); \ @@ -535,7 +538,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Wind. */ \ nodep = NULL; /* Silence compiler warning. */ \ path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + for (pathp = path; pathp->node != NULL; pathp++) { \ int cmp = pathp->cmp = a_cmp(node, pathp->node); \ if (cmp < 0) { \ pathp[1].node = rbtn_left_get(a_type, a_field, \ @@ -547,7 +550,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Find node's successor, in preparation for swap. */ \ pathp->cmp = 1; \ nodep = pathp; \ - for (pathp++; pathp->node != &rbtree->rbt_nil; \ + for (pathp++; pathp->node != NULL; \ pathp++) { \ pathp->cmp = -1; \ pathp[1].node = rbtn_left_get(a_type, a_field, \ @@ -590,7 +593,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ } else { \ a_type *left = rbtn_left_get(a_type, a_field, node); \ - if (left != &rbtree->rbt_nil) { \ + if (left != NULL) { \ /* node has no successor, but it has a left child. */\ /* Splice node out, without losing the left child. */\ assert(!rbtn_red_get(a_type, a_field, node)); \ @@ -610,33 +613,32 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ return; \ } else if (pathp == path) { \ /* The tree only contained one node. */ \ - rbtree->rbt_root = &rbtree->rbt_nil; \ + rbtree->rbt_root = NULL; \ return; \ } \ } \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ /* Prune red node, which requires no fixup. */ \ assert(pathp[-1].cmp < 0); \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - &rbtree->rbt_nil); \ + rbtn_left_set(a_type, a_field, pathp[-1].node, NULL); \ return; \ } \ /* The node to be pruned is black, so unwind until balance is */\ /* restored. */\ - pathp->node = &rbtree->rbt_nil; \ + pathp->node = NULL; \ for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ assert(pathp->cmp != 0); \ if (pathp->cmp < 0) { \ rbtn_left_set(a_type, a_field, pathp->node, \ pathp[1].node); \ - assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *right = rbtn_right_get(a_type, a_field, \ pathp->node); \ a_type *rightleft = rbtn_left_get(a_type, a_field, \ right); \ a_type *tnode; \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ + if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ + rightleft)) { \ /* In the following diagrams, ||, //, and \\ */\ /* indicate the path to the removed node. */\ /* */\ @@ -679,7 +681,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ pathp->node); \ a_type *rightleft = rbtn_left_get(a_type, a_field, \ right); \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ + if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ + rightleft)) { \ /* || */\ /* pathp(b) */\ /* // \ */\ @@ -733,7 +736,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ left); \ a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ leftright); \ - if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ + if (leftrightleft != NULL && rbtn_red_get(a_type, \ + a_field, leftrightleft)) { \ /* || */\ /* pathp(b) */\ /* / \\ */\ @@ -759,7 +763,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* (b) */\ /* / */\ /* (b) */\ - assert(leftright != &rbtree->rbt_nil); \ + assert(leftright != NULL); \ rbtn_red_set(a_type, a_field, leftright); \ rbtn_rotate_right(a_type, a_field, pathp->node, \ tnode); \ @@ -782,7 +786,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ return; \ } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* || */\ /* pathp(r) */\ /* / \\ */\ @@ -820,7 +825,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ } else { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* || */\ /* pathp(b) */\ /* / \\ */\ @@ -866,13 +872,13 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ a_attr a_type * \ a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ + if (node == NULL) { \ + return (NULL); \ } else { \ a_type *ret; \ if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ - a_field, node), cb, arg)) != &rbtree->rbt_nil \ - || (ret = cb(rbtree, node, arg)) != NULL) { \ + a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node, \ + arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ @@ -886,8 +892,8 @@ a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ if (cmp < 0) { \ a_type *ret; \ if ((ret = a_prefix##iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ @@ -914,21 +920,18 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ } else { \ ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ + if (node == NULL) { \ + return (NULL); \ } else { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##reverse_iter_recurse(rbtree, \ @@ -943,8 +946,8 @@ a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ if (cmp > 0) { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##reverse_iter_recurse(rbtree, \ @@ -972,10 +975,29 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ cb, arg); \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ +} \ +a_attr void \ +a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)( \ + a_type *, void *), void *arg) { \ + if (node == NULL) { \ + return; \ + } \ + a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field, \ + node), cb, arg); \ + rbtn_left_set(a_type, a_field, (node), NULL); \ + a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field, \ + node), cb, arg); \ + rbtn_right_set(a_type, a_field, (node), NULL); \ + if (cb) { \ + cb(node, arg); \ + } \ +} \ +a_attr void \ +a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ + void *arg) { \ + a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg); \ + rbtree->rbt_root = NULL; \ } #endif /* RB_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/rtree.h b/deps/jemalloc/include/jemalloc/internal/rtree.h index 28ae9d1dd..8d0c584da 100644 --- a/deps/jemalloc/include/jemalloc/internal/rtree.h +++ b/deps/jemalloc/include/jemalloc/internal/rtree.h @@ -15,9 +15,10 @@ typedef struct rtree_s rtree_t; * machine address width. */ #define LG_RTREE_BITS_PER_LEVEL 4 -#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL) +#define RTREE_BITS_PER_LEVEL (1U << LG_RTREE_BITS_PER_LEVEL) +/* Maximum rtree height. */ #define RTREE_HEIGHT_MAX \ - ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) + ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) /* Used for two-stage lock-free node initialization. */ #define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) @@ -111,22 +112,25 @@ unsigned rtree_start_level(rtree_t *rtree, uintptr_t key); uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); bool rtree_node_valid(rtree_node_elm_t *node); -rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm); +rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm, + bool dependent); rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, - unsigned level); + unsigned level, bool dependent); extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent); void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val); -rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level); -rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level); +rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level, + bool dependent); +rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level, + bool dependent); extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent); bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -JEMALLOC_INLINE unsigned +JEMALLOC_ALWAYS_INLINE unsigned rtree_start_level(rtree_t *rtree, uintptr_t key) { unsigned start_level; @@ -140,7 +144,7 @@ rtree_start_level(rtree_t *rtree, uintptr_t key) return (start_level); } -JEMALLOC_INLINE uintptr_t +JEMALLOC_ALWAYS_INLINE uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) { @@ -149,37 +153,40 @@ rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) rtree->levels[level].bits) - 1)); } -JEMALLOC_INLINE bool +JEMALLOC_ALWAYS_INLINE bool rtree_node_valid(rtree_node_elm_t *node) { return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); } -JEMALLOC_INLINE rtree_node_elm_t * -rtree_child_tryread(rtree_node_elm_t *elm) +JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * +rtree_child_tryread(rtree_node_elm_t *elm, bool dependent) { rtree_node_elm_t *child; /* Double-checked read (first read may be stale. */ child = elm->child; - if (!rtree_node_valid(child)) + if (!dependent && !rtree_node_valid(child)) child = atomic_read_p(&elm->pun); + assert(!dependent || child != NULL); return (child); } -JEMALLOC_INLINE rtree_node_elm_t * -rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) +JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * +rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level, + bool dependent) { rtree_node_elm_t *child; - child = rtree_child_tryread(elm); - if (unlikely(!rtree_node_valid(child))) + child = rtree_child_tryread(elm, dependent); + if (!dependent && unlikely(!rtree_node_valid(child))) child = rtree_child_read_hard(rtree, elm, level); + assert(!dependent || child != NULL); return (child); } -JEMALLOC_INLINE extent_node_t * +JEMALLOC_ALWAYS_INLINE extent_node_t * rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent) { @@ -208,54 +215,119 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val) atomic_write_p(&elm->pun, val); } -JEMALLOC_INLINE rtree_node_elm_t * -rtree_subtree_tryread(rtree_t *rtree, unsigned level) +JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * +rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) { rtree_node_elm_t *subtree; /* Double-checked read (first read may be stale. */ subtree = rtree->levels[level].subtree; - if (!rtree_node_valid(subtree)) + if (!dependent && unlikely(!rtree_node_valid(subtree))) subtree = atomic_read_p(&rtree->levels[level].subtree_pun); + assert(!dependent || subtree != NULL); return (subtree); } -JEMALLOC_INLINE rtree_node_elm_t * -rtree_subtree_read(rtree_t *rtree, unsigned level) +JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * +rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent) { rtree_node_elm_t *subtree; - subtree = rtree_subtree_tryread(rtree, level); - if (unlikely(!rtree_node_valid(subtree))) + subtree = rtree_subtree_tryread(rtree, level, dependent); + if (!dependent && unlikely(!rtree_node_valid(subtree))) subtree = rtree_subtree_read_hard(rtree, level); + assert(!dependent || subtree != NULL); return (subtree); } -JEMALLOC_INLINE extent_node_t * +JEMALLOC_ALWAYS_INLINE extent_node_t * rtree_get(rtree_t *rtree, uintptr_t key, bool dependent) { uintptr_t subkey; - unsigned i, start_level; - rtree_node_elm_t *node, *child; + unsigned start_level; + rtree_node_elm_t *node; start_level = rtree_start_level(rtree, key); - for (i = start_level, node = rtree_subtree_tryread(rtree, start_level); - /**/; i++, node = child) { - if (!dependent && unlikely(!rtree_node_valid(node))) - return (NULL); - subkey = rtree_subkey(rtree, key, i); - if (i == rtree->height - 1) { - /* - * node is a leaf, so it contains values rather than - * child pointers. - */ - return (rtree_val_read(rtree, &node[subkey], - dependent)); - } - assert(i < rtree->height - 1); - child = rtree_child_tryread(&node[subkey]); + node = rtree_subtree_tryread(rtree, start_level, dependent); +#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height) + switch (start_level + RTREE_GET_BIAS) { +#define RTREE_GET_SUBTREE(level) \ + case level: \ + assert(level < (RTREE_HEIGHT_MAX-1)); \ + if (!dependent && unlikely(!rtree_node_valid(node))) \ + return (NULL); \ + subkey = rtree_subkey(rtree, key, level - \ + RTREE_GET_BIAS); \ + node = rtree_child_tryread(&node[subkey], dependent); \ + /* Fall through. */ +#define RTREE_GET_LEAF(level) \ + case level: \ + assert(level == (RTREE_HEIGHT_MAX-1)); \ + if (!dependent && unlikely(!rtree_node_valid(node))) \ + return (NULL); \ + subkey = rtree_subkey(rtree, key, level - \ + RTREE_GET_BIAS); \ + /* \ + * node is a leaf, so it contains values rather than \ + * child pointers. \ + */ \ + return (rtree_val_read(rtree, &node[subkey], \ + dependent)); +#if RTREE_HEIGHT_MAX > 1 + RTREE_GET_SUBTREE(0) +#endif +#if RTREE_HEIGHT_MAX > 2 + RTREE_GET_SUBTREE(1) +#endif +#if RTREE_HEIGHT_MAX > 3 + RTREE_GET_SUBTREE(2) +#endif +#if RTREE_HEIGHT_MAX > 4 + RTREE_GET_SUBTREE(3) +#endif +#if RTREE_HEIGHT_MAX > 5 + RTREE_GET_SUBTREE(4) +#endif +#if RTREE_HEIGHT_MAX > 6 + RTREE_GET_SUBTREE(5) +#endif +#if RTREE_HEIGHT_MAX > 7 + RTREE_GET_SUBTREE(6) +#endif +#if RTREE_HEIGHT_MAX > 8 + RTREE_GET_SUBTREE(7) +#endif +#if RTREE_HEIGHT_MAX > 9 + RTREE_GET_SUBTREE(8) +#endif +#if RTREE_HEIGHT_MAX > 10 + RTREE_GET_SUBTREE(9) +#endif +#if RTREE_HEIGHT_MAX > 11 + RTREE_GET_SUBTREE(10) +#endif +#if RTREE_HEIGHT_MAX > 12 + RTREE_GET_SUBTREE(11) +#endif +#if RTREE_HEIGHT_MAX > 13 + RTREE_GET_SUBTREE(12) +#endif +#if RTREE_HEIGHT_MAX > 14 + RTREE_GET_SUBTREE(13) +#endif +#if RTREE_HEIGHT_MAX > 15 + RTREE_GET_SUBTREE(14) +#endif +#if RTREE_HEIGHT_MAX > 16 +# error Unsupported RTREE_HEIGHT_MAX +#endif + RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1) +#undef RTREE_GET_SUBTREE +#undef RTREE_GET_LEAF + default: not_reached(); } +#undef RTREE_GET_BIAS not_reached(); } @@ -268,7 +340,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) start_level = rtree_start_level(rtree, key); - node = rtree_subtree_read(rtree, start_level); + node = rtree_subtree_read(rtree, start_level, false); if (node == NULL) return (true); for (i = start_level; /**/; i++, node = child) { @@ -282,7 +354,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) return (false); } assert(i + 1 < rtree->height); - child = rtree_child_read(rtree, &node[subkey], i); + child = rtree_child_read(rtree, &node[subkey], i, false); if (child == NULL) return (true); } diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.sh b/deps/jemalloc/include/jemalloc/internal/size_classes.sh index fc82036d3..f6fbce4ef 100755 --- a/deps/jemalloc/include/jemalloc/internal/size_classes.sh +++ b/deps/jemalloc/include/jemalloc/internal/size_classes.sh @@ -48,6 +48,21 @@ size_class() { lg_p=$5 lg_kmax=$6 + if [ ${lg_delta} -ge ${lg_p} ] ; then + psz="yes" + else + pow2 ${lg_p}; p=${pow2_result} + pow2 ${lg_grp}; grp=${pow2_result} + pow2 ${lg_delta}; delta=${pow2_result} + sz=$((${grp} + ${delta} * ${ndelta})) + npgs=$((${sz} / ${p})) + if [ ${sz} -eq $((${npgs} * ${p})) ] ; then + psz="yes" + else + psz="no" + fi + fi + lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta} if [ ${pow2_result} -lt ${ndelta} ] ; then rem="yes" @@ -74,14 +89,15 @@ size_class() { else lg_delta_lookup="no" fi - printf ' SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup} + printf ' SC(%3d, %6d, %8d, %6d, %3s, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${lg_delta_lookup} # Defined upon return: - # - lg_delta_lookup (${lg_delta} or "no") + # - psz ("yes" or "no") # - bin ("yes" or "no") + # - lg_delta_lookup (${lg_delta} or "no") } sep_line() { - echo " \\" + echo " \\" } size_classes() { @@ -95,12 +111,13 @@ size_classes() { pow2 ${lg_g}; g=${pow2_result} echo "#define SIZE_CLASSES \\" - echo " /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\" + echo " /* index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup */ \\" ntbins=0 nlbins=0 lg_tiny_maxclass='"NA"' nbins=0 + npsizes=0 # Tiny size classes. ndelta=0 @@ -112,6 +129,9 @@ size_classes() { if [ ${lg_delta_lookup} != "no" ] ; then nlbins=$((${index} + 1)) fi + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi if [ ${bin} != "no" ] ; then nbins=$((${index} + 1)) fi @@ -133,19 +153,25 @@ size_classes() { index=$((${index} + 1)) lg_grp=$((${lg_grp} + 1)) lg_delta=$((${lg_delta} + 1)) + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi fi while [ ${ndelta} -lt ${g} ] ; do size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} index=$((${index} + 1)) ndelta=$((${ndelta} + 1)) + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi done # All remaining groups. lg_grp=$((${lg_grp} + ${lg_g})) - while [ ${lg_grp} -lt ${ptr_bits} ] ; do + while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do sep_line ndelta=1 - if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then + if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then ndelta_limit=$((${g} - 1)) else ndelta_limit=${g} @@ -157,6 +183,9 @@ size_classes() { # Final written value is correct: lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" fi + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi if [ ${bin} != "no" ] ; then nbins=$((${index} + 1)) # Final written value is correct: @@ -183,6 +212,7 @@ size_classes() { # - nlbins # - nbins # - nsizes + # - npsizes # - lg_tiny_maxclass # - lookup_maxclass # - small_maxclass @@ -200,13 +230,13 @@ cat < +# +# is in {smooth, smoother, smoothest}. +# must be greater than zero. +# must be in [0..62]; reasonable values are roughly [10..30]. +# is x decimal precision. +# is y decimal precision. + +#set -x + +cmd="sh smoothstep.sh $*" +variant=$1 +nsteps=$2 +bfp=$3 +xprec=$4 +yprec=$5 + +case "${variant}" in + smooth) + ;; + smoother) + ;; + smoothest) + ;; + *) + echo "Unsupported variant" + exit 1 + ;; +esac + +smooth() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx _2 lx 3 ^ '*' 3 lx 2 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +smoother() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx 6 lx 5 ^ '*' _15 lx 4 ^ '*' + 10 lx 3 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +smoothest() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx _20 lx 7 ^ '*' 70 lx 6 ^ '*' + _84 lx 5 ^ '*' + 35 lx 4 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +cat <iteration = 0; +} + +JEMALLOC_INLINE void +spin_adaptive(spin_t *spin) +{ + volatile uint64_t i; + + for (i = 0; i < (KQU(1) << spin->iteration); i++) + CPU_SPINWAIT; + + if (spin->iteration < 63) + spin->iteration++; +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h index c91dba99d..04e7dae14 100644 --- a/deps/jemalloc/include/jemalloc/internal/stats.h +++ b/deps/jemalloc/include/jemalloc/internal/stats.h @@ -102,6 +102,14 @@ struct arena_stats_s { /* Number of bytes currently mapped. */ size_t mapped; + /* + * Number of bytes currently retained as a side effect of munmap() being + * disabled/bypassed. Retained bytes are technically mapped (though + * always decommitted or purged), but they are excluded from the mapped + * statistic (above). + */ + size_t retained; + /* * Total number of purge sweeps, total number of madvise calls made, * and total pages purged in order to keep dirty unused memory under @@ -168,6 +176,9 @@ JEMALLOC_INLINE void stats_cactive_add(size_t size) { + assert(size > 0); + assert((size & chunksize_mask) == 0); + atomic_add_z(&stats_cactive, size); } @@ -175,6 +186,9 @@ JEMALLOC_INLINE void stats_cactive_sub(size_t size) { + assert(size > 0); + assert((size & chunksize_mask) == 0); + atomic_sub_z(&stats_cactive, size); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/tcache.h b/deps/jemalloc/include/jemalloc/internal/tcache.h index 5079cd266..01ba062de 100644 --- a/deps/jemalloc/include/jemalloc/internal/tcache.h +++ b/deps/jemalloc/include/jemalloc/internal/tcache.h @@ -70,13 +70,20 @@ struct tcache_bin_s { int low_water; /* Min # cached since last GC. */ unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ unsigned ncached; /* # of cached objects. */ + /* + * To make use of adjacent cacheline prefetch, the items in the avail + * stack goes to higher address for newer allocations. avail points + * just above the available space, which means that + * avail[-ncached, ... -1] are available items and the lowest item will + * be allocated first. + */ void **avail; /* Stack of available objects. */ }; struct tcache_s { ql_elm(tcache_t) link; /* Used for aggregating stats. */ uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - unsigned ev_cnt; /* Event count since incremental GC. */ + ticker_t gc_ticker; /* Drives incremental GC. */ szind_t next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ /* @@ -108,7 +115,7 @@ extern tcache_bin_info_t *tcache_bin_info; * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ -extern size_t nhbins; +extern unsigned nhbins; /* Maximum cached size class. */ extern size_t tcache_maxclass; @@ -123,27 +130,25 @@ extern size_t tcache_maxclass; */ extern tcaches_t *tcaches; -size_t tcache_salloc(const void *ptr); +size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); -void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind); +void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind, bool *tcache_success); void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, szind_t binind, unsigned rem); void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, unsigned rem, tcache_t *tcache); -void tcache_arena_associate(tcache_t *tcache, arena_t *arena); -void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, - arena_t *newarena); -void tcache_arena_dissociate(tcache_t *tcache, arena_t *arena); +void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, + arena_t *oldarena, arena_t *newarena); tcache_t *tcache_get_hard(tsd_t *tsd); -tcache_t *tcache_create(tsd_t *tsd, arena_t *arena); +tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena); void tcache_cleanup(tsd_t *tsd); void tcache_enabled_cleanup(tsd_t *tsd); -void tcache_stats_merge(tcache_t *tcache, arena_t *arena); +void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); bool tcaches_create(tsd_t *tsd, unsigned *r_ind); void tcaches_flush(tsd_t *tsd, unsigned ind); void tcaches_destroy(tsd_t *tsd, unsigned ind); -bool tcache_boot(void); +bool tcache_boot(tsdn_t *tsdn); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -155,15 +160,15 @@ void tcache_flush(void); bool tcache_enabled_get(void); tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); -void *tcache_alloc_easy(tcache_bin_t *tbin); +void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success); void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, bool zero); + size_t size, szind_t ind, bool zero, bool slow_path); void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, bool zero); + size_t size, szind_t ind, bool zero, bool slow_path); void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, - szind_t binind); + szind_t binind, bool slow_path); void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, - size_t size); + size_t size, bool slow_path); tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); #endif @@ -240,51 +245,74 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) if (TCACHE_GC_INCR == 0) return; - tcache->ev_cnt++; - assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR)) + if (unlikely(ticker_tick(&tcache->gc_ticker))) tcache_event_hard(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin) +tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) { void *ret; if (unlikely(tbin->ncached == 0)) { tbin->low_water = -1; + *tcache_success = false; return (NULL); } + /* + * tcache_success (instead of ret) should be checked upon the return of + * this function. We avoid checking (ret == NULL) because there is + * never a null stored on the avail stack (which is unknown to the + * compiler), and eagerly checking ret would cause pipeline stall + * (waiting for the cacheline). + */ + *tcache_success = true; + ret = *(tbin->avail - tbin->ncached); tbin->ncached--; + if (unlikely((int)tbin->ncached < tbin->low_water)) tbin->low_water = tbin->ncached; - ret = tbin->avail[tbin->ncached]; + return (ret); } JEMALLOC_ALWAYS_INLINE void * tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - bool zero) + szind_t binind, bool zero, bool slow_path) { void *ret; - szind_t binind; - size_t usize; tcache_bin_t *tbin; + bool tcache_success; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); - binind = size2index(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; - usize = index2size(binind); - ret = tcache_alloc_easy(tbin); - if (unlikely(ret == NULL)) { - ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind); - if (ret == NULL) + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + bool tcache_hard_success; + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, + tbin, binind, &tcache_hard_success); + if (tcache_hard_success == false) return (NULL); } - assert(tcache_salloc(ret) == usize); + + assert(ret); + /* + * Only compute usize if required. The checks in the following if + * statement are all static. + */ + if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + usize = index2size(binind); + assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize); + } if (likely(!zero)) { - if (config_fill) { + if (slow_path && config_fill) { if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); @@ -292,7 +320,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, memset(ret, 0, usize); } } else { - if (config_fill && unlikely(opt_junk_alloc)) { + if (slow_path && config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } @@ -309,28 +337,38 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - bool zero) + szind_t binind, bool zero, bool slow_path) { void *ret; - szind_t binind; - size_t usize; tcache_bin_t *tbin; + bool tcache_success; - binind = size2index(size); - usize = index2size(binind); - assert(usize <= tcache_maxclass); assert(binind < nhbins); tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin); - if (unlikely(ret == NULL)) { + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { /* * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(arena, usize, zero); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + ret = arena_malloc_large(tsd_tsdn(tsd), arena, binind, zero); if (ret == NULL) return (NULL); } else { + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + /* Only compute usize on demand */ + if (config_prof || (slow_path && config_fill) || + unlikely(zero)) { + usize = index2size(binind); + assert(usize <= tcache_maxclass); + } + if (config_prof && usize == LARGE_MINCLASS) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); @@ -340,10 +378,11 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, BININD_INVALID); } if (likely(!zero)) { - if (config_fill) { - if (unlikely(opt_junk_alloc)) - memset(ret, 0xa5, usize); - else if (unlikely(opt_zero)) + if (slow_path && config_fill) { + if (unlikely(opt_junk_alloc)) { + memset(ret, JEMALLOC_ALLOC_JUNK, + usize); + } else if (unlikely(opt_zero)) memset(ret, 0, usize); } } else @@ -360,14 +399,15 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, + bool slow_path) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; - assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS); - if (config_fill && unlikely(opt_junk_free)) + if (slow_path && config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; @@ -377,26 +417,27 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) (tbin_info->ncached_max >> 1)); } assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size, + bool slow_path) { szind_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); - assert(tcache_salloc(ptr) > SMALL_MAXCLASS); - assert(tcache_salloc(ptr) <= tcache_maxclass); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass); binind = size2index(size); - if (config_fill && unlikely(opt_junk_free)) + if (slow_path && config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_large(ptr, size); tbin = &tcache->tbins[binind]; @@ -406,8 +447,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } @@ -416,8 +457,10 @@ JEMALLOC_ALWAYS_INLINE tcache_t * tcaches_get(tsd_t *tsd, unsigned ind) { tcaches_t *elm = &tcaches[ind]; - if (unlikely(elm->tcache == NULL)) - elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL)); + if (unlikely(elm->tcache == NULL)) { + elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd, + NULL)); + } return (elm->tcache); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/ticker.h b/deps/jemalloc/include/jemalloc/internal/ticker.h new file mode 100644 index 000000000..4696e56d2 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/ticker.h @@ -0,0 +1,75 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ticker_s ticker_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct ticker_s { + int32_t tick; + int32_t nticks; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void ticker_init(ticker_t *ticker, int32_t nticks); +void ticker_copy(ticker_t *ticker, const ticker_t *other); +int32_t ticker_read(const ticker_t *ticker); +bool ticker_ticks(ticker_t *ticker, int32_t nticks); +bool ticker_tick(ticker_t *ticker); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_)) +JEMALLOC_INLINE void +ticker_init(ticker_t *ticker, int32_t nticks) +{ + + ticker->tick = nticks; + ticker->nticks = nticks; +} + +JEMALLOC_INLINE void +ticker_copy(ticker_t *ticker, const ticker_t *other) +{ + + *ticker = *other; +} + +JEMALLOC_INLINE int32_t +ticker_read(const ticker_t *ticker) +{ + + return (ticker->tick); +} + +JEMALLOC_INLINE bool +ticker_ticks(ticker_t *ticker, int32_t nticks) +{ + + if (unlikely(ticker->tick < nticks)) { + ticker->tick = ticker->nticks; + return (true); + } + ticker->tick -= nticks; + return(false); +} + +JEMALLOC_INLINE bool +ticker_tick(ticker_t *ticker) +{ + + return (ticker_ticks(ticker, 1)); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h index eed7aa013..9055acafd 100644 --- a/deps/jemalloc/include/jemalloc/internal/tsd.h +++ b/deps/jemalloc/include/jemalloc/internal/tsd.h @@ -13,6 +13,9 @@ typedef struct tsd_init_head_s tsd_init_head_t; #endif typedef struct tsd_s tsd_t; +typedef struct tsdn_s tsdn_t; + +#define TSDN_NULL ((tsdn_t *)0) typedef enum { tsd_state_uninitialized, @@ -44,7 +47,8 @@ typedef enum { * The result is a set of generated functions, e.g.: * * bool example_tsd_boot(void) {...} - * example_t *example_tsd_get() {...} + * bool example_tsd_booted_get(void) {...} + * example_t *example_tsd_get(bool init) {...} * void example_tsd_set(example_t *val) {...} * * Note that all of the functions deal in terms of (a_type *) rather than @@ -98,8 +102,10 @@ a_attr void \ a_name##tsd_boot1(void); \ a_attr bool \ a_name##tsd_boot(void); \ +a_attr bool \ +a_name##tsd_booted_get(void); \ a_attr a_type * \ -a_name##tsd_get(void); \ +a_name##tsd_get(bool init); \ a_attr void \ a_name##tsd_set(a_type *val); @@ -201,9 +207,21 @@ a_name##tsd_boot(void) \ \ return (a_name##tsd_boot0()); \ } \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (false); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ \ assert(a_name##tsd_booted); \ @@ -246,9 +264,21 @@ a_name##tsd_boot(void) \ \ return (a_name##tsd_boot0()); \ } \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (false); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ \ assert(a_name##tsd_booted); \ @@ -307,14 +337,14 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ } \ } \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(void) \ +a_name##tsd_wrapper_get(bool init) \ { \ DWORD error = GetLastError(); \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ TlsGetValue(a_name##tsd_tsd); \ SetLastError(error); \ \ - if (unlikely(wrapper == NULL)) { \ + if (init && unlikely(wrapper == NULL)) { \ wrapper = (a_name##tsd_wrapper_t *) \ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ if (wrapper == NULL) { \ @@ -368,14 +398,28 @@ a_name##tsd_boot(void) \ a_name##tsd_boot1(); \ return (false); \ } \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (true); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(init); \ + if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ + return (NULL); \ return (&wrapper->val); \ } \ a_attr void \ @@ -384,7 +428,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(true); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -428,12 +472,12 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ } \ } \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(void) \ +a_name##tsd_wrapper_get(bool init) \ { \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ pthread_getspecific(a_name##tsd_tsd); \ \ - if (unlikely(wrapper == NULL)) { \ + if (init && unlikely(wrapper == NULL)) { \ tsd_init_block_t block; \ wrapper = tsd_init_check_recursion( \ &a_name##tsd_init_head, &block); \ @@ -490,14 +534,28 @@ a_name##tsd_boot(void) \ a_name##tsd_boot1(); \ return (false); \ } \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (true); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(init); \ + if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ + return (NULL); \ return (&wrapper->val); \ } \ a_attr void \ @@ -506,7 +564,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(true); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -536,12 +594,15 @@ struct tsd_init_head_s { O(thread_allocated, uint64_t) \ O(thread_deallocated, uint64_t) \ O(prof_tdata, prof_tdata_t *) \ + O(iarena, arena_t *) \ O(arena, arena_t *) \ - O(arenas_cache, arena_t **) \ - O(narenas_cache, unsigned) \ - O(arenas_cache_bypass, bool) \ + O(arenas_tdata, arena_tdata_t *) \ + O(narenas_tdata, unsigned) \ + O(arenas_tdata_bypass, bool) \ O(tcache_enabled, tcache_enabled_t) \ O(quarantine, quarantine_t *) \ + O(witnesses, witness_list_t) \ + O(witness_fork, bool) \ #define TSD_INITIALIZER { \ tsd_state_uninitialized, \ @@ -551,10 +612,13 @@ struct tsd_init_head_s { NULL, \ NULL, \ NULL, \ + NULL, \ 0, \ false, \ tcache_enabled_default, \ - NULL \ + NULL, \ + ql_head_initializer(witnesses), \ + false \ } struct tsd_s { @@ -565,6 +629,15 @@ MALLOC_TSD #undef O }; +/* + * Wrapper around tsd_t that makes it possible to avoid implicit conversion + * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be + * explicitly converted to tsd_t, which is non-nullable. + */ +struct tsdn_s { + tsd_t tsd; +}; + static const tsd_t tsd_initializer = TSD_INITIALIZER; malloc_tsd_types(, tsd_t) @@ -577,7 +650,7 @@ void *malloc_tsd_malloc(size_t size); void malloc_tsd_dalloc(void *wrapper); void malloc_tsd_no_cleanup(void *arg); void malloc_tsd_cleanup_register(bool (*f)(void)); -bool malloc_tsd_boot0(void); +tsd_t *malloc_tsd_boot0(void); void malloc_tsd_boot1(void); #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ !defined(_WIN32)) @@ -594,7 +667,9 @@ void tsd_cleanup(void *arg); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) +tsd_t *tsd_fetch_impl(bool init); tsd_t *tsd_fetch(void); +tsdn_t *tsd_tsdn(tsd_t *tsd); bool tsd_nominal(tsd_t *tsd); #define O(n, t) \ t *tsd_##n##p_get(tsd_t *tsd); \ @@ -602,6 +677,9 @@ t tsd_##n##_get(tsd_t *tsd); \ void tsd_##n##_set(tsd_t *tsd, t n); MALLOC_TSD #undef O +tsdn_t *tsdn_fetch(void); +bool tsdn_null(const tsdn_t *tsdn); +tsd_t *tsdn_tsd(tsdn_t *tsdn); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) @@ -609,9 +687,13 @@ malloc_tsd_externs(, tsd_t) malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch(void) +tsd_fetch_impl(bool init) { - tsd_t *tsd = tsd_get(); + tsd_t *tsd = tsd_get(init); + + if (!init && tsd_get_allocates() && tsd == NULL) + return (NULL); + assert(tsd != NULL); if (unlikely(tsd->state != tsd_state_nominal)) { if (tsd->state == tsd_state_uninitialized) { @@ -628,6 +710,20 @@ tsd_fetch(void) return (tsd); } +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) +{ + + return (tsd_fetch_impl(true)); +} + +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsd_tsdn(tsd_t *tsd) +{ + + return ((tsdn_t *)tsd); +} + JEMALLOC_INLINE bool tsd_nominal(tsd_t *tsd) { @@ -659,6 +755,32 @@ tsd_##n##_set(tsd_t *tsd, t n) \ } MALLOC_TSD #undef O + +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsdn_fetch(void) +{ + + if (!tsd_booted_get()) + return (NULL); + + return (tsd_tsdn(tsd_fetch_impl(false))); +} + +JEMALLOC_ALWAYS_INLINE bool +tsdn_null(const tsdn_t *tsdn) +{ + + return (tsdn == NULL); +} + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsdn_tsd(tsdn_t *tsdn) +{ + + assert(!tsdn_null(tsdn)); + + return (&tsdn->tsd); +} #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/deps/jemalloc/include/jemalloc/internal/util.h b/deps/jemalloc/include/jemalloc/internal/util.h index b2ea740fd..4b56d652e 100644 --- a/deps/jemalloc/include/jemalloc/internal/util.h +++ b/deps/jemalloc/include/jemalloc/internal/util.h @@ -40,6 +40,14 @@ */ #define MALLOC_PRINTF_BUFSIZE 4096 +/* Junk fill patterns. */ +#ifndef JEMALLOC_ALLOC_JUNK +# define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5) +#endif +#ifndef JEMALLOC_FREE_JUNK +# define JEMALLOC_FREE_JUNK ((uint8_t)0x5a) +#endif + /* * Wrap a cpp argument that contains commas such that it isn't broken up into * multiple arguments. @@ -57,73 +65,21 @@ # define JEMALLOC_CC_SILENCE_INIT(v) #endif -#define JEMALLOC_GNUC_PREREQ(major, minor) \ - (!defined(__clang__) && \ - (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))) -#ifndef __has_builtin -# define __has_builtin(builtin) (0) -#endif -#define JEMALLOC_CLANG_HAS_BUILTIN(builtin) \ - (defined(__clang__) && __has_builtin(builtin)) - #ifdef __GNUC__ # define likely(x) __builtin_expect(!!(x), 1) # define unlikely(x) __builtin_expect(!!(x), 0) -# if JEMALLOC_GNUC_PREREQ(4, 6) || \ - JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable) -# define unreachable() __builtin_unreachable() -# else -# define unreachable() -# endif #else # define likely(x) !!(x) # define unlikely(x) !!(x) -# define unreachable() #endif -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifndef assert -#define assert(e) do { \ - if (unlikely(config_debug && !(e))) { \ - malloc_printf( \ - ": %s:%d: Failed assertion: \"%s\"\n", \ - __FILE__, __LINE__, #e); \ - abort(); \ - } \ -} while (0) +#if !defined(JEMALLOC_INTERNAL_UNREACHABLE) +# error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure #endif -#ifndef not_reached -#define not_reached() do { \ - if (config_debug) { \ - malloc_printf( \ - ": %s:%d: Unreachable code reached\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ - unreachable(); \ -} while (0) -#endif +#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE() -#ifndef not_implemented -#define not_implemented() do { \ - if (config_debug) { \ - malloc_printf(": %s:%d: Not implemented\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ -} while (0) -#endif - -#ifndef assert_not_implemented -#define assert_not_implemented(e) do { \ - if (unlikely(config_debug && !(e))) \ - not_implemented(); \ -} while (0) -#endif +#include "jemalloc/internal/assert.h" /* Use to assert a particular configuration, e.g., cassert(config_debug). */ #define cassert(c) do { \ @@ -148,9 +104,9 @@ void malloc_write(const char *s); * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating * point math. */ -int malloc_vsnprintf(char *str, size_t size, const char *format, +size_t malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap); -int malloc_snprintf(char *str, size_t size, const char *format, ...) +size_t malloc_snprintf(char *str, size_t size, const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap); @@ -163,10 +119,16 @@ void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -int jemalloc_ffsl(long bitmap); -int jemalloc_ffs(int bitmap); -size_t pow2_ceil(size_t x); -size_t lg_floor(size_t x); +unsigned ffs_llu(unsigned long long bitmap); +unsigned ffs_lu(unsigned long bitmap); +unsigned ffs_u(unsigned bitmap); +unsigned ffs_zu(size_t bitmap); +unsigned ffs_u64(uint64_t bitmap); +unsigned ffs_u32(uint32_t bitmap); +uint64_t pow2_ceil_u64(uint64_t x); +uint32_t pow2_ceil_u32(uint32_t x); +size_t pow2_ceil_zu(size_t x); +unsigned lg_floor(size_t x); void set_errno(int errnum); int get_errno(void); #endif @@ -174,27 +136,74 @@ int get_errno(void); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) /* Sanity check. */ -#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS) -# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure +#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ + || !defined(JEMALLOC_INTERNAL_FFS) +# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure #endif -JEMALLOC_ALWAYS_INLINE int -jemalloc_ffsl(long bitmap) +JEMALLOC_ALWAYS_INLINE unsigned +ffs_llu(unsigned long long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSLL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_lu(unsigned long bitmap) { return (JEMALLOC_INTERNAL_FFSL(bitmap)); } -JEMALLOC_ALWAYS_INLINE int -jemalloc_ffs(int bitmap) +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u(unsigned bitmap) { return (JEMALLOC_INTERNAL_FFS(bitmap)); } -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil(size_t x) +JEMALLOC_ALWAYS_INLINE unsigned +ffs_zu(size_t bitmap) +{ + +#if LG_SIZEOF_PTR == LG_SIZEOF_INT + return (ffs_u(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG + return (ffs_lu(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG + return (ffs_llu(bitmap)); +#else +#error No implementation for size_t ffs() +#endif +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u64(uint64_t bitmap) +{ + +#if LG_SIZEOF_LONG == 3 + return (ffs_lu(bitmap)); +#elif LG_SIZEOF_LONG_LONG == 3 + return (ffs_llu(bitmap)); +#else +#error No implementation for 64-bit ffs() +#endif +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u32(uint32_t bitmap) +{ + +#if LG_SIZEOF_INT == 2 + return (ffs_u(bitmap)); +#else +#error No implementation for 32-bit ffs() +#endif + return (ffs_u(bitmap)); +} + +JEMALLOC_INLINE uint64_t +pow2_ceil_u64(uint64_t x) { x--; @@ -203,15 +212,39 @@ pow2_ceil(size_t x) x |= x >> 4; x |= x >> 8; x |= x >> 16; -#if (LG_SIZEOF_PTR == 3) x |= x >> 32; -#endif x++; return (x); } -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE uint32_t +pow2_ceil_u32(uint32_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return (x); +} + +/* Compute the smallest power of 2 that is >= x. */ JEMALLOC_INLINE size_t +pow2_ceil_zu(size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return (pow2_ceil_u64(x)); +#else + return (pow2_ceil_u32(x)); +#endif +} + +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE unsigned lg_floor(size_t x) { size_t ret; @@ -222,10 +255,11 @@ lg_floor(size_t x) : "=r"(ret) // Outputs. : "r"(x) // Inputs. ); - return (ret); + assert(ret < UINT_MAX); + return ((unsigned)ret); } #elif (defined(_MSC_VER)) -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned lg_floor(size_t x) { unsigned long ret; @@ -237,12 +271,13 @@ lg_floor(size_t x) #elif (LG_SIZEOF_PTR == 2) _BitScanReverse(&ret, x); #else -# error "Unsupported type sizes for lg_floor()" +# error "Unsupported type size for lg_floor()" #endif - return (ret); + assert(ret < UINT_MAX); + return ((unsigned)ret); } #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned lg_floor(size_t x) { @@ -253,11 +288,11 @@ lg_floor(size_t x) #elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); #else -# error "Unsupported type sizes for lg_floor()" +# error "Unsupported type size for lg_floor()" #endif } #else -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned lg_floor(size_t x) { @@ -268,20 +303,13 @@ lg_floor(size_t x) x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); -#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG) +#if (LG_SIZEOF_PTR == 3) x |= (x >> 32); - if (x == KZU(0xffffffffffffffff)) - return (63); - x++; - return (jemalloc_ffsl(x) - 2); -#elif (LG_SIZEOF_PTR == 2) - if (x == KZU(0xffffffff)) - return (31); - x++; - return (jemalloc_ffs(x) - 2); -#else -# error "Unsupported type sizes for lg_floor()" #endif + if (x == SIZE_T_MAX) + return ((8 << LG_SIZEOF_PTR) - 1); + x++; + return (ffs_zu(x) - 2); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/valgrind.h b/deps/jemalloc/include/jemalloc/internal/valgrind.h index a3380df92..877a142b6 100644 --- a/deps/jemalloc/include/jemalloc/internal/valgrind.h +++ b/deps/jemalloc/include/jemalloc/internal/valgrind.h @@ -30,17 +30,31 @@ * calls must be embedded in macros rather than in functions so that when * Valgrind reports errors, there are no extra stack frames in the backtraces. */ -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ - if (unlikely(in_valgrind && cond)) \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ +#define JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do { \ + if (unlikely(in_valgrind && cond)) { \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsdn, ptr), \ + zero); \ + } \ } while (0) -#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ - ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ - zero) do { \ +#define JEMALLOC_VALGRIND_REALLOC_MOVED_no(ptr, old_ptr) \ + (false) +#define JEMALLOC_VALGRIND_REALLOC_MOVED_maybe(ptr, old_ptr) \ + ((ptr) != (old_ptr)) +#define JEMALLOC_VALGRIND_REALLOC_PTR_NULL_no(ptr) \ + (false) +#define JEMALLOC_VALGRIND_REALLOC_PTR_NULL_maybe(ptr) \ + (ptr == NULL) +#define JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_no(old_ptr) \ + (false) +#define JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_maybe(old_ptr) \ + (old_ptr == NULL) +#define JEMALLOC_VALGRIND_REALLOC(moved, tsdn, ptr, usize, ptr_null, \ + old_ptr, old_usize, old_rzsize, old_ptr_null, zero) do { \ if (unlikely(in_valgrind)) { \ - size_t rzsize = p2rz(ptr); \ + size_t rzsize = p2rz(tsdn, ptr); \ \ - if (!maybe_moved || ptr == old_ptr) { \ + if (!JEMALLOC_VALGRIND_REALLOC_MOVED_##moved(ptr, \ + old_ptr)) { \ VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ usize, rzsize); \ if (zero && old_usize < usize) { \ @@ -49,11 +63,13 @@ old_usize), usize - old_usize); \ } \ } else { \ - if (!old_ptr_maybe_null || old_ptr != NULL) { \ + if (!JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_## \ + old_ptr_null(old_ptr)) { \ valgrind_freelike_block(old_ptr, \ old_rzsize); \ } \ - if (!ptr_maybe_null || ptr != NULL) { \ + if (!JEMALLOC_VALGRIND_REALLOC_PTR_NULL_## \ + ptr_null(ptr)) { \ size_t copy_size = (old_usize < usize) \ ? old_usize : usize; \ size_t tail_size = usize - copy_size; \ @@ -81,8 +97,8 @@ #define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0) #define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0) #define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0) -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ +#define JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {} while (0) +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize, \ ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ zero) do {} while (0) #define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) diff --git a/deps/jemalloc/include/jemalloc/internal/witness.h b/deps/jemalloc/include/jemalloc/internal/witness.h new file mode 100644 index 000000000..cdf15d797 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/witness.h @@ -0,0 +1,266 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct witness_s witness_t; +typedef unsigned witness_rank_t; +typedef ql_head(witness_t) witness_list_t; +typedef int witness_comp_t (const witness_t *, const witness_t *); + +/* + * Lock ranks. Witnesses with rank WITNESS_RANK_OMIT are completely ignored by + * the witness machinery. + */ +#define WITNESS_RANK_OMIT 0U + +#define WITNESS_RANK_INIT 1U +#define WITNESS_RANK_CTL 1U +#define WITNESS_RANK_ARENAS 2U + +#define WITNESS_RANK_PROF_DUMP 3U +#define WITNESS_RANK_PROF_BT2GCTX 4U +#define WITNESS_RANK_PROF_TDATAS 5U +#define WITNESS_RANK_PROF_TDATA 6U +#define WITNESS_RANK_PROF_GCTX 7U + +#define WITNESS_RANK_ARENA 8U +#define WITNESS_RANK_ARENA_CHUNKS 9U +#define WITNESS_RANK_ARENA_NODE_CACHE 10 + +#define WITNESS_RANK_BASE 11U + +#define WITNESS_RANK_LEAF 0xffffffffU +#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF +#define WITNESS_RANK_ARENA_HUGE WITNESS_RANK_LEAF +#define WITNESS_RANK_DSS WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT WITNESS_RANK_LEAF + +#define WITNESS_INITIALIZER(rank) {"initializer", rank, NULL, {NULL, NULL}} + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct witness_s { + /* Name, used for printing lock order reversal messages. */ + const char *name; + + /* + * Witness rank, where 0 is lowest and UINT_MAX is highest. Witnesses + * must be acquired in order of increasing rank. + */ + witness_rank_t rank; + + /* + * If two witnesses are of equal rank and they have the samp comp + * function pointer, it is called as a last attempt to differentiate + * between witnesses of equal rank. + */ + witness_comp_t *comp; + + /* Linkage for thread's currently owned locks. */ + ql_elm(witness_t) link; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void witness_init(witness_t *witness, const char *name, witness_rank_t rank, + witness_comp_t *comp); +#ifdef JEMALLOC_JET +typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *); +extern witness_lock_error_t *witness_lock_error; +#else +void witness_lock_error(const witness_list_t *witnesses, + const witness_t *witness); +#endif +#ifdef JEMALLOC_JET +typedef void (witness_owner_error_t)(const witness_t *); +extern witness_owner_error_t *witness_owner_error; +#else +void witness_owner_error(const witness_t *witness); +#endif +#ifdef JEMALLOC_JET +typedef void (witness_not_owner_error_t)(const witness_t *); +extern witness_not_owner_error_t *witness_not_owner_error; +#else +void witness_not_owner_error(const witness_t *witness); +#endif +#ifdef JEMALLOC_JET +typedef void (witness_lockless_error_t)(const witness_list_t *); +extern witness_lockless_error_t *witness_lockless_error; +#else +void witness_lockless_error(const witness_list_t *witnesses); +#endif + +void witnesses_cleanup(tsd_t *tsd); +void witness_fork_cleanup(tsd_t *tsd); +void witness_prefork(tsd_t *tsd); +void witness_postfork_parent(tsd_t *tsd); +void witness_postfork_child(tsd_t *tsd); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +bool witness_owner(tsd_t *tsd, const witness_t *witness); +void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness); +void witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness); +void witness_assert_lockless(tsdn_t *tsdn); +void witness_lock(tsdn_t *tsdn, witness_t *witness); +void witness_unlock(tsdn_t *tsdn, witness_t *witness); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +JEMALLOC_INLINE bool +witness_owner(tsd_t *tsd, const witness_t *witness) +{ + witness_list_t *witnesses; + witness_t *w; + + witnesses = tsd_witnessesp_get(tsd); + ql_foreach(w, witnesses, link) { + if (w == witness) + return (true); + } + + return (false); +} + +JEMALLOC_INLINE void +witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) +{ + tsd_t *tsd; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + if (witness_owner(tsd, witness)) + return; + witness_owner_error(witness); +} + +JEMALLOC_INLINE void +witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) +{ + tsd_t *tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + witnesses = tsd_witnessesp_get(tsd); + ql_foreach(w, witnesses, link) { + if (w == witness) + witness_not_owner_error(witness); + } +} + +JEMALLOC_INLINE void +witness_assert_lockless(tsdn_t *tsdn) +{ + tsd_t *tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + + witnesses = tsd_witnessesp_get(tsd); + w = ql_last(witnesses, link); + if (w != NULL) + witness_lockless_error(witnesses); +} + +JEMALLOC_INLINE void +witness_lock(tsdn_t *tsdn, witness_t *witness) +{ + tsd_t *tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + witness_assert_not_owner(tsdn, witness); + + witnesses = tsd_witnessesp_get(tsd); + w = ql_last(witnesses, link); + if (w == NULL) { + /* No other locks; do nothing. */ + } else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) { + /* Forking, and relaxed ranking satisfied. */ + } else if (w->rank > witness->rank) { + /* Not forking, rank order reversal. */ + witness_lock_error(witnesses, witness); + } else if (w->rank == witness->rank && (w->comp == NULL || w->comp != + witness->comp || w->comp(w, witness) > 0)) { + /* + * Missing/incompatible comparison function, or comparison + * function indicates rank order reversal. + */ + witness_lock_error(witnesses, witness); + } + + ql_elm_new(witness, link); + ql_tail_insert(witnesses, witness, link); +} + +JEMALLOC_INLINE void +witness_unlock(tsdn_t *tsdn, witness_t *witness) +{ + tsd_t *tsd; + witness_list_t *witnesses; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + /* + * Check whether owner before removal, rather than relying on + * witness_assert_owner() to abort, so that unit tests can test this + * function's failure mode without causing undefined behavior. + */ + if (witness_owner(tsd, witness)) { + witnesses = tsd_witnessesp_get(tsd); + ql_remove(witnesses, witness, link); + } else + witness_assert_owner(tsdn, witness); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in index ab13c3758..6d89435c2 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -33,5 +33,13 @@ */ #undef JEMALLOC_USE_CXX_THROW +#ifdef _MSC_VER +# ifdef _WIN64 +# define LG_SIZEOF_PTR_WIN 3 +# else +# define LG_SIZEOF_PTR_WIN 2 +# endif +#endif + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR diff --git a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in index a7028db34..2956c7bb4 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in @@ -11,12 +11,13 @@ #define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" -# define MALLOCX_LG_ALIGN(la) (la) +# define MALLOCX_LG_ALIGN(la) ((int)(la)) # if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) (ffs(a)-1) +# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) # else # define MALLOCX_ALIGN(a) \ - ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) + ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ + ffs((int)(((size_t)(a))>>32))+31)) # endif # define MALLOCX_ZERO ((int)0x40) /* @@ -28,7 +29,7 @@ /* * Bias arena index bits so that 0 encodes "use an automatically chosen arena". */ -# define MALLOCX_ARENA(a) ((int)(((a)+1) << 20)) +# define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) # define JEMALLOC_CXX_THROW throw() @@ -36,32 +37,7 @@ # define JEMALLOC_CXX_THROW #endif -#ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE -# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) -# else -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# endif -# ifndef JEMALLOC_EXPORT -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) -# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) -# else -# define JEMALLOC_FORMAT_PRINTF(s, i) -# endif -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#elif _MSC_VER +#if _MSC_VER # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) __declspec(align(s)) # define JEMALLOC_ALLOC_SIZE(s) @@ -87,6 +63,31 @@ # else # define JEMALLOC_ALLOCATOR # endif +#elif defined(JEMALLOC_HAVE_ATTR) +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) +# else +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# endif +# ifndef JEMALLOC_EXPORT +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) +# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) +# else +# define JEMALLOC_FORMAT_PRINTF(s, i) +# endif +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR #else # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) diff --git a/deps/jemalloc/include/msvc_compat/strings.h b/deps/jemalloc/include/msvc_compat/strings.h index f01ffdd18..a3ee25063 100644 --- a/deps/jemalloc/include/msvc_compat/strings.h +++ b/deps/jemalloc/include/msvc_compat/strings.h @@ -21,7 +21,37 @@ static __forceinline int ffs(int x) return (ffsl(x)); } +# ifdef _M_X64 +# pragma intrinsic(_BitScanForward64) +# endif + +static __forceinline int ffsll(unsigned __int64 x) +{ + unsigned long i; +#ifdef _M_X64 + if (_BitScanForward64(&i, x)) + return (i + 1); + return (0); #else +// Fallback for 32-bit build where 64-bit version not available +// assuming little endian + union { + unsigned __int64 ll; + unsigned long l[2]; + } s; + + s.ll = x; + + if (_BitScanForward(&i, s.l[0])) + return (i + 1); + else if(_BitScanForward(&i, s.l[1])) + return (i + 33); + return (0); +#endif +} + +#else +# define ffsll(x) __builtin_ffsll(x) # define ffsl(x) __builtin_ffsl(x) # define ffs(x) __builtin_ffs(x) #endif diff --git a/deps/jemalloc/include/msvc_compat/windows_extra.h b/deps/jemalloc/include/msvc_compat/windows_extra.h index 0c5e323ff..3008faa37 100644 --- a/deps/jemalloc/include/msvc_compat/windows_extra.h +++ b/deps/jemalloc/include/msvc_compat/windows_extra.h @@ -1,26 +1,6 @@ #ifndef MSVC_COMPAT_WINDOWS_EXTRA_H #define MSVC_COMPAT_WINDOWS_EXTRA_H -#ifndef ENOENT -# define ENOENT ERROR_PATH_NOT_FOUND -#endif -#ifndef EINVAL -# define EINVAL ERROR_BAD_ARGUMENTS -#endif -#ifndef EAGAIN -# define EAGAIN ERROR_OUTOFMEMORY -#endif -#ifndef EPERM -# define EPERM ERROR_WRITE_FAULT -#endif -#ifndef EFAULT -# define EFAULT ERROR_INVALID_ADDRESS -#endif -#ifndef ENOMEM -# define ENOMEM ERROR_NOT_ENOUGH_MEMORY -#endif -#ifndef ERANGE -# define ERANGE ERROR_INVALID_DATA -#endif +#include #endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */ diff --git a/deps/jemalloc/jemalloc.pc.in b/deps/jemalloc/jemalloc.pc.in index 1a3ad9b34..a318e8dd3 100644 --- a/deps/jemalloc/jemalloc.pc.in +++ b/deps/jemalloc/jemalloc.pc.in @@ -6,7 +6,7 @@ install_suffix=@install_suffix@ Name: jemalloc Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support. -URL: http://www.canonware.com/jemalloc +URL: http://jemalloc.net/ Version: @jemalloc_version@ Cflags: -I${includedir} Libs: -L${libdir} -ljemalloc${install_suffix} diff --git a/deps/jemalloc/msvc/ReadMe.txt b/deps/jemalloc/msvc/ReadMe.txt new file mode 100644 index 000000000..77d567da0 --- /dev/null +++ b/deps/jemalloc/msvc/ReadMe.txt @@ -0,0 +1,24 @@ + +How to build jemalloc for Windows +================================= + +1. Install Cygwin with at least the following packages: + * autoconf + * autogen + * gawk + * grep + * sed + +2. Install Visual Studio 2015 with Visual C++ + +3. Add Cygwin\bin to the PATH environment variable + +4. Open "VS2015 x86 Native Tools Command Prompt" + (note: x86/x64 doesn't matter at this point) + +5. Generate header files: + sh -c "CC=cl ./autogen.sh" + +6. Now the project can be opened and built in Visual Studio: + msvc\jemalloc_vc2015.sln + diff --git a/deps/jemalloc/msvc/jemalloc_vc2015.sln b/deps/jemalloc/msvc/jemalloc_vc2015.sln new file mode 100644 index 000000000..aedd5e5ea --- /dev/null +++ b/deps/jemalloc/msvc/jemalloc_vc2015.sln @@ -0,0 +1,63 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.24720.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}" + ProjectSection(SolutionItems) = preProject + ReadMe.txt = ReadMe.txt + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2015\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2015\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Debug-static|x64 = Debug-static|x64 + Debug-static|x86 = Debug-static|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + Release-static|x64 = Release-static|x64 + Release-static|x86 = Release-static|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj new file mode 100644 index 000000000..8342ab3ab --- /dev/null +++ b/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -0,0 +1,402 @@ + + + + + Debug-static + Win32 + + + Debug-static + x64 + + + Debug + Win32 + + + Release-static + Win32 + + + Release-static + x64 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {8D6BB292-9E1C-413D-9F98-4864BDC1514A} + Win32Proj + jemalloc + 8.1 + + + + DynamicLibrary + true + v140 + MultiByte + + + StaticLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + DynamicLibrary + true + v140 + MultiByte + + + StaticLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)d + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-$(PlatformToolset)-$(Configuration) + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-$(PlatformToolset)-$(Configuration) + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)d + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration) + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration) + + + + + + Level3 + Disabled + _REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + 4090;4146;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + + + Level3 + Disabled + JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + 4090;4146;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + + + Level3 + Disabled + _REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + 4090;4146;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + + + Level3 + Disabled + JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + 4090;4146;4267;4334 + OldStyle + false + + + Windows + true + + + + + Level3 + + + MaxSpeed + true + true + _REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + 4090;4146;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + _REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + 4090;4146;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + _REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions) + 4090;4146;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + _REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + 4090;4146;4267;4334 + OldStyle + + + Windows + true + true + true + + + + + + \ No newline at end of file diff --git a/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters new file mode 100644 index 000000000..37f0f02ae --- /dev/null +++ b/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -0,0 +1,272 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {5697dfa3-16cf-4932-b428-6e0ec6e9f98e} + + + {0cbd2ca6-42a7-4f82-8517-d7e7a14fd986} + + + {0abe6f30-49b5-46dd-8aca-6e33363fa52c} + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\msvc_compat + + + Header Files\msvc_compat + + + Header Files\msvc_compat\C99 + + + Header Files\msvc_compat\C99 + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.cpp b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.cpp new file mode 100755 index 000000000..a3d1a792a --- /dev/null +++ b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.cpp @@ -0,0 +1,89 @@ +// jemalloc C++ threaded test +// Author: Rustam Abdullaev +// Public Domain + +#include +#include +#include +#include +#include +#include +#include +#include + +using std::vector; +using std::thread; +using std::uniform_int_distribution; +using std::minstd_rand; + +int test_threads() +{ + je_malloc_conf = "narenas:3"; + int narenas = 0; + size_t sz = sizeof(narenas); + je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0); + if (narenas != 3) { + printf("Error: unexpected number of arenas: %d\n", narenas); + return 1; + } + static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 }; + static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0])); + vector workers; + static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50; + je_malloc_stats_print(NULL, NULL, NULL); + size_t allocated1; + size_t sz1 = sizeof(allocated1); + je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0); + printf("\nPress Enter to start threads...\n"); + getchar(); + printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2); + for (int i = 0; i < numThreads; i++) { + workers.emplace_back([tid=i]() { + uniform_int_distribution sizeDist(0, numSizes - 1); + minstd_rand rnd(tid * 17); + uint8_t* ptrs[numAllocsMax]; + int ptrsz[numAllocsMax]; + for (int i = 0; i < numIter1; ++i) { + thread t([&]() { + for (int i = 0; i < numIter2; ++i) { + const int numAllocs = numAllocsMax - sizeDist(rnd); + for (int j = 0; j < numAllocs; j += 64) { + const int x = sizeDist(rnd); + const int sz = sizes[x]; + ptrsz[j] = sz; + ptrs[j] = (uint8_t*)je_malloc(sz); + if (!ptrs[j]) { + printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x); + exit(1); + } + for (int k = 0; k < sz; k++) + ptrs[j][k] = tid + k; + } + for (int j = 0; j < numAllocs; j += 64) { + for (int k = 0, sz = ptrsz[j]; k < sz; k++) + if (ptrs[j][k] != (uint8_t)(tid + k)) { + printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k)); + exit(1); + } + je_free(ptrs[j]); + } + } + }); + t.join(); + } + }); + } + for (thread& t : workers) { + t.join(); + } + je_malloc_stats_print(NULL, NULL, NULL); + size_t allocated2; + je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0); + size_t leaked = allocated2 - allocated1; + printf("\nDone. Leaked: %zd bytes\n", leaked); + bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet) + printf("\nTest %s!\n", (failed ? "FAILED" : "successful")); + printf("\nPress Enter to continue...\n"); + getchar(); + return failed ? 1 : 0; +} diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.h b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.h new file mode 100644 index 000000000..64d0cdb33 --- /dev/null +++ b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.h @@ -0,0 +1,3 @@ +#pragma once + +int test_threads(); diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj new file mode 100644 index 000000000..f5e9898f2 --- /dev/null +++ b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj @@ -0,0 +1,327 @@ + + + + + Debug-static + Win32 + + + Debug-static + x64 + + + Debug + Win32 + + + Release-static + Win32 + + + Release-static + x64 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + {09028CFD-4EB7-491D-869C-0708DB97ED44} + Win32Proj + test_threads + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + Application + false + v140 + true + MultiByte + + + Application + true + v140 + MultiByte + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + true + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + true + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + $(SolutionDir)$(Platform)\$(Configuration) + jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + Level3 + Disabled + JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + + + Console + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + Level3 + Disabled + _DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(SolutionDir)$(Platform)\$(Configuration) + + + + + + + Level3 + Disabled + JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + + + Console + true + jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(SolutionDir)$(Platform)\$(Configuration) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + {8d6bb292-9e1c-413d-9f98-4864bdc1514a} + + + + + + + + + \ No newline at end of file diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters new file mode 100644 index 000000000..4c2334073 --- /dev/null +++ b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters @@ -0,0 +1,26 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads_main.cpp b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads_main.cpp new file mode 100644 index 000000000..ffd96e6ab --- /dev/null +++ b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads_main.cpp @@ -0,0 +1,12 @@ +#include "test_threads.h" +#include +#include +#include + +using namespace std::chrono_literals; + +int main(int argc, char** argv) +{ + int rc = test_threads(); + return rc; +} diff --git a/deps/jemalloc/src/arena.c b/deps/jemalloc/src/arena.c index 3081519cc..648a8da3a 100644 --- a/deps/jemalloc/src/arena.c +++ b/deps/jemalloc/src/arena.c @@ -4,16 +4,23 @@ /******************************************************************************/ /* Data. */ +purge_mode_t opt_purge = PURGE_DEFAULT; +const char *purge_mode_names[] = { + "ratio", + "decay", + "N/A" +}; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; static ssize_t lg_dirty_mult_default; +ssize_t opt_decay_time = DECAY_TIME_DEFAULT; +static ssize_t decay_time_default; + arena_bin_info_t arena_bin_info[NBINS]; size_t map_bias; size_t map_misc_offset; size_t arena_maxrun; /* Max run size for arenas. */ size_t large_maxclass; /* Max large size class. */ -static size_t small_maxrun; /* Max run size used for small size classes. */ -static bool *small_run_tab; /* Valid small run page multiples. */ unsigned nlclasses; /* Number of large size classes. */ unsigned nhclasses; /* Number of huge size classes. */ @@ -23,60 +30,57 @@ unsigned nhclasses; /* Number of huge size classes. */ * definition. */ -static void arena_purge(arena_t *arena, bool all); -static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, - bool cleaned, bool decommitted); -static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, arena_bin_t *bin); -static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, arena_bin_t *bin); +static void arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk); +static void arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, + size_t ndirty_limit); +static void arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, + bool dirty, bool cleaned, bool decommitted); +static void arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); +static void arena_bin_lower_run(arena_t *arena, arena_run_t *run, + arena_bin_t *bin); /******************************************************************************/ -#define CHUNK_MAP_KEY ((uintptr_t)0x1U) - -JEMALLOC_INLINE_C arena_chunk_map_misc_t * -arena_miscelm_key_create(size_t size) -{ - - return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) | - CHUNK_MAP_KEY)); -} - -JEMALLOC_INLINE_C bool -arena_miscelm_is_key(const arena_chunk_map_misc_t *miscelm) -{ - - return (((uintptr_t)miscelm & CHUNK_MAP_KEY) != 0); -} - -#undef CHUNK_MAP_KEY - JEMALLOC_INLINE_C size_t -arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm) -{ - - assert(arena_miscelm_is_key(miscelm)); - - return (arena_mapbits_size_decode((uintptr_t)miscelm)); -} - -JEMALLOC_INLINE_C size_t -arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm) +arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk; size_t pageind, mapbits; - assert(!arena_miscelm_is_key(miscelm)); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); pageind = arena_miscelm_to_pageind(miscelm); mapbits = arena_mapbits_get(chunk, pageind); return (arena_mapbits_size_decode(mapbits)); } +JEMALLOC_INLINE_C const extent_node_t * +arena_miscelm_extent_get(const arena_chunk_map_misc_t *miscelm) +{ + arena_chunk_t *chunk; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + return (&chunk->node); +} + JEMALLOC_INLINE_C int -arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) +arena_sn_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b) +{ + size_t a_sn, b_sn; + + assert(a != NULL); + assert(b != NULL); + + a_sn = extent_node_sn_get(arena_miscelm_extent_get(a)); + b_sn = extent_node_sn_get(arena_miscelm_extent_get(b)); + + return ((a_sn > b_sn) - (a_sn < b_sn)); +} + +JEMALLOC_INLINE_C int +arena_ad_comp(const arena_chunk_map_misc_t *a, + const arena_chunk_map_misc_t *b) { uintptr_t a_miscelm = (uintptr_t)a; uintptr_t b_miscelm = (uintptr_t)b; @@ -87,74 +91,79 @@ arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm)); } -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t, - rb_link, arena_run_comp) - -static size_t -run_quantize(size_t size) +JEMALLOC_INLINE_C int +arena_snad_comp(const arena_chunk_map_misc_t *a, + const arena_chunk_map_misc_t *b) { - size_t qsize; + int ret; + + assert(a != NULL); + assert(b != NULL); + + ret = arena_sn_comp(a, b); + if (ret != 0) + return (ret); + + ret = arena_ad_comp(a, b); + return (ret); +} + +/* Generate pairing heap functions. */ +ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t, + ph_link, arena_snad_comp) + +#ifdef JEMALLOC_JET +#undef run_quantize_floor +#define run_quantize_floor JEMALLOC_N(n_run_quantize_floor) +#endif +static size_t +run_quantize_floor(size_t size) +{ + size_t ret; + pszind_t pind; + + assert(size > 0); + assert(size <= HUGE_MAXCLASS); + assert((size & PAGE_MASK) == 0); assert(size != 0); assert(size == PAGE_CEILING(size)); - /* Don't change sizes that are valid small run sizes. */ - if (size <= small_maxrun && small_run_tab[size >> LG_PAGE]) + pind = psz2ind(size - large_pad + 1); + if (pind == 0) { + /* + * Avoid underflow. This short-circuit would also do the right + * thing for all sizes in the range for which there are + * PAGE-spaced size classes, but it's simplest to just handle + * the one case that would cause erroneous results. + */ return (size); - - /* - * Round down to the nearest run size that can actually be requested - * during normal large allocation. Add large_pad so that cache index - * randomization can offset the allocation from the page boundary. - */ - qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad; - if (qsize <= SMALL_MAXCLASS + large_pad) - return (run_quantize(size - large_pad)); - assert(qsize <= size); - return (qsize); -} - -static size_t -run_quantize_next(size_t size) -{ - size_t large_run_size_next; - - assert(size != 0); - assert(size == PAGE_CEILING(size)); - - /* - * Return the next quantized size greater than the input size. - * Quantized sizes comprise the union of run sizes that back small - * region runs, and run sizes that back large regions with no explicit - * alignment constraints. - */ - - if (size > SMALL_MAXCLASS) { - large_run_size_next = PAGE_CEILING(index2size(size2index(size - - large_pad) + 1) + large_pad); - } else - large_run_size_next = SIZE_T_MAX; - if (size >= small_maxrun) - return (large_run_size_next); - - while (true) { - size += PAGE; - assert(size <= small_maxrun); - if (small_run_tab[size >> LG_PAGE]) { - if (large_run_size_next < size) - return (large_run_size_next); - return (size); - } } + ret = pind2sz(pind - 1) + large_pad; + assert(ret <= size); + return (ret); } +#ifdef JEMALLOC_JET +#undef run_quantize_floor +#define run_quantize_floor JEMALLOC_N(run_quantize_floor) +run_quantize_t *run_quantize_floor = JEMALLOC_N(n_run_quantize_floor); +#endif +#ifdef JEMALLOC_JET +#undef run_quantize_ceil +#define run_quantize_ceil JEMALLOC_N(n_run_quantize_ceil) +#endif static size_t -run_quantize_first(size_t size) +run_quantize_ceil(size_t size) { - size_t qsize = run_quantize(size); + size_t ret; - if (qsize < size) { + assert(size > 0); + assert(size <= HUGE_MAXCLASS); + assert((size & PAGE_MASK) == 0); + + ret = run_quantize_floor(size); + if (ret < size) { /* * Skip a quantization that may have an adequately large run, * because under-sized runs may be mixed in. This only happens @@ -163,72 +172,50 @@ run_quantize_first(size_t size) * search would potentially find sufficiently aligned available * memory somewhere lower. */ - qsize = run_quantize_next(size); + ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad; } - return (qsize); -} - -JEMALLOC_INLINE_C int -arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) -{ - int ret; - uintptr_t a_miscelm = (uintptr_t)a; - size_t a_qsize = run_quantize(arena_miscelm_is_key(a) ? - arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a)); - size_t b_qsize = run_quantize(arena_miscelm_size_get(b)); - - /* - * Compare based on quantized size rather than size, in order to sort - * equally useful runs only by address. - */ - ret = (a_qsize > b_qsize) - (a_qsize < b_qsize); - if (ret == 0) { - if (!arena_miscelm_is_key(a)) { - uintptr_t b_miscelm = (uintptr_t)b; - - ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm); - } else { - /* - * Treat keys as if they are lower than anything else. - */ - ret = -1; - } - } - return (ret); } - -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, - arena_chunk_map_misc_t, rb_link, arena_avail_comp) +#ifdef JEMALLOC_JET +#undef run_quantize_ceil +#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil) +run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil); +#endif static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - + pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get( + arena_miscelm_get_const(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk, - pageind)); + assert((npages << LG_PAGE) < chunksize); + assert(pind2sz(pind) <= chunksize); + arena_run_heap_insert(&arena->runs_avail[pind], + arena_miscelm_get_mutable(chunk, pageind)); } static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - + pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get( + arena_miscelm_get_const(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk, - pageind)); + assert((npages << LG_PAGE) < chunksize); + assert(pind2sz(pind) <= chunksize); + arena_run_heap_remove(&arena->runs_avail[pind], + arena_miscelm_get_mutable(chunk, pageind)); } static void arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk, + pageind); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); @@ -245,7 +232,8 @@ static void arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk, + pageind); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); @@ -292,14 +280,14 @@ JEMALLOC_INLINE_C void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { void *ret; - unsigned regind; + size_t regind; arena_chunk_map_misc_t *miscelm; void *rpages; assert(run->nfree > 0); assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info)); - regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info); + regind = (unsigned)bitmap_sfu(run->bitmap, &bin_info->bitmap_info); miscelm = arena_run_to_miscelm(run); rpages = arena_miscelm_to_rpages(miscelm); ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset + @@ -316,7 +304,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) size_t mapbits = arena_mapbits_get(chunk, pageind); szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind = arena_run_regind(run, bin_info, ptr); + size_t regind = arena_run_regind(run, bin_info, ptr); assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ @@ -364,16 +352,30 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) } static void -arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages) +arena_nactive_add(arena_t *arena, size_t add_pages) { if (config_stats) { - ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + add_pages - - sub_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << + size_t cactive_add = CHUNK_CEILING((arena->nactive + + add_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << LG_PAGE); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); + if (cactive_add != 0) + stats_cactive_add(cactive_add); } + arena->nactive += add_pages; +} + +static void +arena_nactive_sub(arena_t *arena, size_t sub_pages) +{ + + if (config_stats) { + size_t cactive_sub = CHUNK_CEILING(arena->nactive << LG_PAGE) - + CHUNK_CEILING((arena->nactive - sub_pages) << LG_PAGE); + if (cactive_sub != 0) + stats_cactive_sub(cactive_sub); + } + arena->nactive -= sub_pages; } static void @@ -394,8 +396,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, arena_avail_remove(arena, chunk, run_ind, total_pages); if (flag_dirty != 0) arena_run_dirty_remove(arena, chunk, run_ind, total_pages); - arena_cactive_update(arena, need_pages, 0); - arena->nactive += need_pages; + arena_nactive_add(arena, need_pages); /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { @@ -567,7 +568,8 @@ arena_chunk_init_spare(arena_t *arena) } static bool -arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero) +arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + size_t sn, bool zero) { /* @@ -576,64 +578,67 @@ arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero) * of runs is tracked individually, and upon chunk deallocation the * entire chunk is in a consistent commit state. */ - extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true); + extent_node_init(&chunk->node, arena, chunk, chunksize, sn, zero, true); extent_node_achunk_set(&chunk->node, true); - return (chunk_register(chunk, &chunk->node)); + return (chunk_register(tsdn, chunk, &chunk->node)); } static arena_chunk_t * -arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, - bool *zero, bool *commit) +arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, bool *zero, bool *commit) { arena_chunk_t *chunk; + size_t sn; - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); - chunk = (arena_chunk_t *)chunk_alloc_wrapper(arena, chunk_hooks, NULL, - chunksize, chunksize, zero, commit); + chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks, + NULL, chunksize, chunksize, &sn, zero, commit); if (chunk != NULL && !*commit) { /* Commit header. */ if (chunk_hooks->commit(chunk, chunksize, 0, map_bias << LG_PAGE, arena->ind)) { - chunk_dalloc_wrapper(arena, chunk_hooks, - (void *)chunk, chunksize, *commit); + chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, + (void *)chunk, chunksize, sn, *zero, *commit); chunk = NULL; } } - if (chunk != NULL && arena_chunk_register(arena, chunk, *zero)) { + if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, sn, + *zero)) { if (!*commit) { /* Undo commit of header. */ chunk_hooks->decommit(chunk, chunksize, 0, map_bias << LG_PAGE, arena->ind); } - chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk, - chunksize, *commit); + chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk, + chunksize, sn, *zero, *commit); chunk = NULL; } - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); return (chunk); } static arena_chunk_t * -arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit) +arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero, + bool *commit) { arena_chunk_t *chunk; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; + size_t sn; - chunk = chunk_alloc_cache(arena, &chunk_hooks, NULL, chunksize, - chunksize, zero, true); + chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize, + chunksize, &sn, zero, commit, true); if (chunk != NULL) { - if (arena_chunk_register(arena, chunk, *zero)) { - chunk_dalloc_cache(arena, &chunk_hooks, chunk, - chunksize, true); + if (arena_chunk_register(tsdn, arena, chunk, sn, *zero)) { + chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, + chunksize, sn, true); return (NULL); } - *commit = true; } if (chunk == NULL) { - chunk = arena_chunk_alloc_internal_hard(arena, &chunk_hooks, - zero, commit); + chunk = arena_chunk_alloc_internal_hard(tsdn, arena, + &chunk_hooks, zero, commit); } if (config_stats && chunk != NULL) { @@ -645,7 +650,7 @@ arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit) } static arena_chunk_t * -arena_chunk_init_hard(arena_t *arena) +arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena) { arena_chunk_t *chunk; bool zero, commit; @@ -655,14 +660,16 @@ arena_chunk_init_hard(arena_t *arena) zero = false; commit = false; - chunk = arena_chunk_alloc_internal(arena, &zero, &commit); + chunk = arena_chunk_alloc_internal(tsdn, arena, &zero, &commit); if (chunk == NULL) return (NULL); + chunk->hugepage = true; + /* * Initialize the map to contain one maximal free untouched run. Mark - * the pages as zeroed if chunk_alloc() returned a zeroed or decommitted - * chunk. + * the pages as zeroed if arena_chunk_alloc_internal() returned a zeroed + * or decommitted chunk. */ flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED; flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED; @@ -674,17 +681,18 @@ arena_chunk_init_hard(arena_t *arena) */ if (!zero) { JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( - (void *)arena_bitselm_get(chunk, map_bias+1), - (size_t)((uintptr_t) arena_bitselm_get(chunk, - chunk_npages-1) - (uintptr_t)arena_bitselm_get(chunk, - map_bias+1))); + (void *)arena_bitselm_get_const(chunk, map_bias+1), + (size_t)((uintptr_t)arena_bitselm_get_const(chunk, + chunk_npages-1) - + (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1))); for (i = map_bias+1; i < chunk_npages-1; i++) arena_mapbits_internal_set(chunk, i, flag_unzeroed); } else { JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void - *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t) - arena_bitselm_get(chunk, chunk_npages-1) - - (uintptr_t)arena_bitselm_get(chunk, map_bias+1))); + *)arena_bitselm_get_const(chunk, map_bias+1), + (size_t)((uintptr_t)arena_bitselm_get_const(chunk, + chunk_npages-1) - + (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1))); if (config_debug) { for (i = map_bias+1; i < chunk_npages-1; i++) { assert(arena_mapbits_unzeroed_get(chunk, i) == @@ -699,27 +707,84 @@ arena_chunk_init_hard(arena_t *arena) } static arena_chunk_t * -arena_chunk_alloc(arena_t *arena) +arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena) { arena_chunk_t *chunk; if (arena->spare != NULL) chunk = arena_chunk_init_spare(arena); else { - chunk = arena_chunk_init_hard(arena); + chunk = arena_chunk_init_hard(tsdn, arena); if (chunk == NULL) return (NULL); } - /* Insert the run into the runs_avail tree. */ + ql_elm_new(&chunk->node, ql_link); + ql_tail_insert(&arena->achunks, &chunk->node, ql_link); arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias); return (chunk); } static void -arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) +arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) { + size_t sn, hugepage; + bool committed; + chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; + + chunk_deregister(chunk, &chunk->node); + + sn = extent_node_sn_get(&chunk->node); + hugepage = chunk->hugepage; + committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0); + if (!committed) { + /* + * Decommit the header. Mark the chunk as decommitted even if + * header decommit fails, since treating a partially committed + * chunk as committed has a high potential for causing later + * access of decommitted memory. + */ + chunk_hooks = chunk_hooks_get(tsdn, arena); + chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE, + arena->ind); + } + if (!hugepage) { + /* + * Convert chunk back to the default state, so that all + * subsequent chunk allocations start out with chunks that can + * be backed by transparent huge pages. + */ + pages_huge(chunk, chunksize); + } + + chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize, + sn, committed); + + if (config_stats) { + arena->stats.mapped -= chunksize; + arena->stats.metadata_mapped -= (map_bias << LG_PAGE); + } +} + +static void +arena_spare_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *spare) +{ + + assert(arena->spare != spare); + + if (arena_mapbits_dirty_get(spare, map_bias) != 0) { + arena_run_dirty_remove(arena, spare, map_bias, + chunk_npages-map_bias); + } + + arena_chunk_discard(tsdn, arena, spare); +} + +static void +arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) +{ + arena_chunk_t *spare; assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); @@ -732,49 +797,14 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) assert(arena_mapbits_decommitted_get(chunk, map_bias) == arena_mapbits_decommitted_get(chunk, chunk_npages-1)); - /* - * Remove run from the runs_avail tree, so that the arena does not use - * it. - */ + /* Remove run from runs_avail, so that the arena does not use it. */ arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias); - if (arena->spare != NULL) { - arena_chunk_t *spare = arena->spare; - chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; - bool committed; - - arena->spare = chunk; - if (arena_mapbits_dirty_get(spare, map_bias) != 0) { - arena_run_dirty_remove(arena, spare, map_bias, - chunk_npages-map_bias); - } - - chunk_deregister(spare, &spare->node); - - committed = (arena_mapbits_decommitted_get(spare, map_bias) == - 0); - if (!committed) { - /* - * Decommit the header. Mark the chunk as decommitted - * even if header decommit fails, since treating a - * partially committed chunk as committed has a high - * potential for causing later access of decommitted - * memory. - */ - chunk_hooks = chunk_hooks_get(arena); - chunk_hooks.decommit(spare, chunksize, 0, map_bias << - LG_PAGE, arena->ind); - } - - chunk_dalloc_cache(arena, &chunk_hooks, (void *)spare, - chunksize, committed); - - if (config_stats) { - arena->stats.mapped -= chunksize; - arena->stats.metadata_mapped -= (map_bias << LG_PAGE); - } - } else - arena->spare = chunk; + ql_remove(&arena->achunks, &chunk->node, ql_link); + spare = arena->spare; + arena->spare = chunk; + if (spare != NULL) + arena_spare_discard(tsdn, arena, spare); } static void @@ -816,6 +846,17 @@ arena_huge_dalloc_stats_update(arena_t *arena, size_t usize) arena->stats.hstats[index].curhchunks--; } +static void +arena_huge_reset_stats_cancel(arena_t *arena, size_t usize) +{ + szind_t index = size2index(usize) - nlclasses - NBINS; + + cassert(config_stats); + + arena->stats.ndalloc_huge++; + arena->stats.hstats[index].ndalloc--; +} + static void arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize) { @@ -847,243 +888,240 @@ arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize, } extent_node_t * -arena_node_alloc(arena_t *arena) +arena_node_alloc(tsdn_t *tsdn, arena_t *arena) { extent_node_t *node; - malloc_mutex_lock(&arena->node_cache_mtx); + malloc_mutex_lock(tsdn, &arena->node_cache_mtx); node = ql_last(&arena->node_cache, ql_link); if (node == NULL) { - malloc_mutex_unlock(&arena->node_cache_mtx); - return (base_alloc(sizeof(extent_node_t))); + malloc_mutex_unlock(tsdn, &arena->node_cache_mtx); + return (base_alloc(tsdn, sizeof(extent_node_t))); } ql_tail_remove(&arena->node_cache, extent_node_t, ql_link); - malloc_mutex_unlock(&arena->node_cache_mtx); + malloc_mutex_unlock(tsdn, &arena->node_cache_mtx); return (node); } void -arena_node_dalloc(arena_t *arena, extent_node_t *node) +arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node) { - malloc_mutex_lock(&arena->node_cache_mtx); + malloc_mutex_lock(tsdn, &arena->node_cache_mtx); ql_elm_new(node, ql_link); ql_tail_insert(&arena->node_cache, node, ql_link); - malloc_mutex_unlock(&arena->node_cache_mtx); + malloc_mutex_unlock(tsdn, &arena->node_cache_mtx); } static void * -arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, - size_t usize, size_t alignment, bool *zero, size_t csize) +arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, size_t *sn, + bool *zero, size_t csize) { void *ret; bool commit = true; - ret = chunk_alloc_wrapper(arena, chunk_hooks, NULL, csize, alignment, - zero, &commit); + ret = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize, + alignment, sn, zero, &commit); if (ret == NULL) { /* Revert optimistic stats updates. */ - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); if (config_stats) { arena_huge_malloc_stats_update_undo(arena, usize); arena->stats.mapped -= usize; } - arena->nactive -= (usize >> LG_PAGE); - malloc_mutex_unlock(&arena->lock); + arena_nactive_sub(arena, usize >> LG_PAGE); + malloc_mutex_unlock(tsdn, &arena->lock); } return (ret); } void * -arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, - bool *zero) +arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, size_t *sn, bool *zero) { void *ret; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; size_t csize = CHUNK_CEILING(usize); + bool commit = true; - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); /* Optimistically update stats. */ if (config_stats) { arena_huge_malloc_stats_update(arena, usize); arena->stats.mapped += usize; } - arena->nactive += (usize >> LG_PAGE); + arena_nactive_add(arena, usize >> LG_PAGE); - ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment, - zero, true); - malloc_mutex_unlock(&arena->lock); + ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize, + alignment, sn, zero, &commit, true); + malloc_mutex_unlock(tsdn, &arena->lock); if (ret == NULL) { - ret = arena_chunk_alloc_huge_hard(arena, &chunk_hooks, usize, - alignment, zero, csize); + ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks, + usize, alignment, sn, zero, csize); } - if (config_stats && ret != NULL) - stats_cactive_add(usize); return (ret); } void -arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize) +arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize, + size_t sn) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; size_t csize; csize = CHUNK_CEILING(usize); - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); if (config_stats) { arena_huge_dalloc_stats_update(arena, usize); arena->stats.mapped -= usize; - stats_cactive_sub(usize); } - arena->nactive -= (usize >> LG_PAGE); + arena_nactive_sub(arena, usize >> LG_PAGE); - chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true); - malloc_mutex_unlock(&arena->lock); + chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, csize, sn, true); + malloc_mutex_unlock(tsdn, &arena->lock); } void -arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize, - size_t usize) +arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, void *chunk, + size_t oldsize, size_t usize) { assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize)); assert(oldsize != usize); - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); if (config_stats) arena_huge_ralloc_stats_update(arena, oldsize, usize); - if (oldsize < usize) { - size_t udiff = usize - oldsize; - arena->nactive += udiff >> LG_PAGE; - if (config_stats) - stats_cactive_add(udiff); - } else { - size_t udiff = oldsize - usize; - arena->nactive -= udiff >> LG_PAGE; - if (config_stats) - stats_cactive_sub(udiff); - } - malloc_mutex_unlock(&arena->lock); + if (oldsize < usize) + arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE); + else + arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE); + malloc_mutex_unlock(tsdn, &arena->lock); } void -arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, - size_t usize) +arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk, + size_t oldsize, size_t usize, size_t sn) { size_t udiff = oldsize - usize; size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); if (config_stats) { arena_huge_ralloc_stats_update(arena, oldsize, usize); - if (cdiff != 0) { + if (cdiff != 0) arena->stats.mapped -= cdiff; - stats_cactive_sub(udiff); - } } - arena->nactive -= udiff >> LG_PAGE; + arena_nactive_sub(arena, udiff >> LG_PAGE); if (cdiff != 0) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(usize)); - chunk_dalloc_cache(arena, &chunk_hooks, nchunk, cdiff, true); + chunk_dalloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff, + sn, true); } - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); } static bool -arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t oldsize, size_t usize, bool *zero, void *nchunk, - size_t udiff, size_t cdiff) +arena_chunk_ralloc_huge_expand_hard(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *chunk, size_t oldsize, size_t usize, + size_t *sn, bool *zero, void *nchunk, size_t udiff, size_t cdiff) { bool err; bool commit = true; - err = (chunk_alloc_wrapper(arena, chunk_hooks, nchunk, cdiff, chunksize, - zero, &commit) == NULL); + err = (chunk_alloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff, + chunksize, sn, zero, &commit) == NULL); if (err) { /* Revert optimistic stats updates. */ - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); if (config_stats) { arena_huge_ralloc_stats_update_undo(arena, oldsize, usize); arena->stats.mapped -= cdiff; } - arena->nactive -= (udiff >> LG_PAGE); - malloc_mutex_unlock(&arena->lock); + arena_nactive_sub(arena, udiff >> LG_PAGE); + malloc_mutex_unlock(tsdn, &arena->lock); } else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk, cdiff, true, arena->ind)) { - chunk_dalloc_arena(arena, chunk_hooks, nchunk, cdiff, *zero, - true); + chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff, + *sn, *zero, true); err = true; } return (err); } bool -arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, - size_t usize, bool *zero) +arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk, + size_t oldsize, size_t usize, bool *zero) { bool err; - chunk_hooks_t chunk_hooks = chunk_hooks_get(arena); + chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena); void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize)); size_t udiff = usize - oldsize; size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); + size_t sn; + bool commit = true; - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); /* Optimistically update stats. */ if (config_stats) { arena_huge_ralloc_stats_update(arena, oldsize, usize); arena->stats.mapped += cdiff; } - arena->nactive += (udiff >> LG_PAGE); + arena_nactive_add(arena, udiff >> LG_PAGE); - err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff, - chunksize, zero, true) == NULL); - malloc_mutex_unlock(&arena->lock); + err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff, + chunksize, &sn, zero, &commit, true) == NULL); + malloc_mutex_unlock(tsdn, &arena->lock); if (err) { - err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks, - chunk, oldsize, usize, zero, nchunk, udiff, - cdiff); + err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena, + &chunk_hooks, chunk, oldsize, usize, &sn, zero, nchunk, + udiff, cdiff); } else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk, cdiff, true, arena->ind)) { - chunk_dalloc_arena(arena, &chunk_hooks, nchunk, cdiff, *zero, - true); + chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff, + sn, *zero, true); err = true; } - if (config_stats && !err) - stats_cactive_add(udiff); return (err); } /* * Do first-best-fit run selection, i.e. select the lowest run that best fits. - * Run sizes are quantized, so not all candidate runs are necessarily exactly - * the same size. + * Run sizes are indexed, so not all candidate runs are necessarily exactly the + * same size. */ static arena_run_t * arena_run_first_best_fit(arena_t *arena, size_t size) { - size_t search_size = run_quantize_first(size); - arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size); - arena_chunk_map_misc_t *miscelm = - arena_avail_tree_nsearch(&arena->runs_avail, key); - if (miscelm == NULL) - return (NULL); - return (&miscelm->run); + pszind_t pind, i; + + pind = psz2ind(run_quantize_ceil(size)); + + for (i = pind; pind2sz(i) <= chunksize; i++) { + arena_chunk_map_misc_t *miscelm = arena_run_heap_first( + &arena->runs_avail[i]); + if (miscelm != NULL) + return (&miscelm->run); + } + + return (NULL); } static arena_run_t * arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { - arena_run_t *run = arena_run_first_best_fit(arena, s2u(size)); + arena_run_t *run = arena_run_first_best_fit(arena, size); if (run != NULL) { if (arena_run_split_large(arena, run, size, zero)) run = NULL; @@ -1092,7 +1130,7 @@ arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) } static arena_run_t * -arena_run_alloc_large(arena_t *arena, size_t size, bool zero) +arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero) { arena_chunk_t *chunk; arena_run_t *run; @@ -1108,9 +1146,9 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) /* * No usable runs. Create a new chunk from which to allocate the run. */ - chunk = arena_chunk_alloc(arena); + chunk = arena_chunk_alloc(tsdn, arena); if (chunk != NULL) { - run = &arena_miscelm_get(chunk, map_bias)->run; + run = &arena_miscelm_get_mutable(chunk, map_bias)->run; if (arena_run_split_large(arena, run, size, zero)) run = NULL; return (run); @@ -1136,7 +1174,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind) } static arena_run_t * -arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind) +arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind) { arena_chunk_t *chunk; arena_run_t *run; @@ -1153,9 +1191,9 @@ arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind) /* * No usable runs. Create a new chunk from which to allocate the run. */ - chunk = arena_chunk_alloc(arena); + chunk = arena_chunk_alloc(tsdn, arena); if (chunk != NULL) { - run = &arena_miscelm_get(chunk, map_bias)->run; + run = &arena_miscelm_get_mutable(chunk, map_bias)->run; if (arena_run_split_small(arena, run, size, binind)) run = NULL; return (run); @@ -1178,42 +1216,239 @@ arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult) } ssize_t -arena_lg_dirty_mult_get(arena_t *arena) +arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena) { ssize_t lg_dirty_mult; - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); lg_dirty_mult = arena->lg_dirty_mult; - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); return (lg_dirty_mult); } bool -arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult) +arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, ssize_t lg_dirty_mult) { if (!arena_lg_dirty_mult_valid(lg_dirty_mult)) return (true); - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); arena->lg_dirty_mult = lg_dirty_mult; - arena_maybe_purge(arena); - malloc_mutex_unlock(&arena->lock); + arena_maybe_purge(tsdn, arena); + malloc_mutex_unlock(tsdn, &arena->lock); return (false); } -void -arena_maybe_purge(arena_t *arena) +static void +arena_decay_deadline_init(arena_t *arena) { + assert(opt_purge == purge_mode_decay); + + /* + * Generate a new deadline that is uniformly random within the next + * epoch after the current one. + */ + nstime_copy(&arena->decay.deadline, &arena->decay.epoch); + nstime_add(&arena->decay.deadline, &arena->decay.interval); + if (arena->decay.time > 0) { + nstime_t jitter; + + nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state, + nstime_ns(&arena->decay.interval))); + nstime_add(&arena->decay.deadline, &jitter); + } +} + +static bool +arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time) +{ + + assert(opt_purge == purge_mode_decay); + + return (nstime_compare(&arena->decay.deadline, time) <= 0); +} + +static size_t +arena_decay_backlog_npages_limit(const arena_t *arena) +{ + static const uint64_t h_steps[] = { +#define STEP(step, h, x, y) \ + h, + SMOOTHSTEP +#undef STEP + }; + uint64_t sum; + size_t npages_limit_backlog; + unsigned i; + + assert(opt_purge == purge_mode_decay); + + /* + * For each element of decay_backlog, multiply by the corresponding + * fixed-point smoothstep decay factor. Sum the products, then divide + * to round down to the nearest whole number of pages. + */ + sum = 0; + for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) + sum += arena->decay.backlog[i] * h_steps[i]; + npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP); + + return (npages_limit_backlog); +} + +static void +arena_decay_backlog_update_last(arena_t *arena) +{ + size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? + arena->ndirty - arena->decay.ndirty : 0; + arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta; +} + +static void +arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64) +{ + + if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) { + memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) * + sizeof(size_t)); + } else { + size_t nadvance_z = (size_t)nadvance_u64; + + assert((uint64_t)nadvance_z == nadvance_u64); + + memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z], + (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t)); + if (nadvance_z > 1) { + memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS - + nadvance_z], 0, (nadvance_z-1) * sizeof(size_t)); + } + } + + arena_decay_backlog_update_last(arena); +} + +static void +arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time) +{ + uint64_t nadvance_u64; + nstime_t delta; + + assert(opt_purge == purge_mode_decay); + assert(arena_decay_deadline_reached(arena, time)); + + nstime_copy(&delta, time); + nstime_subtract(&delta, &arena->decay.epoch); + nadvance_u64 = nstime_divide(&delta, &arena->decay.interval); + assert(nadvance_u64 > 0); + + /* Add nadvance_u64 decay intervals to epoch. */ + nstime_copy(&delta, &arena->decay.interval); + nstime_imultiply(&delta, nadvance_u64); + nstime_add(&arena->decay.epoch, &delta); + + /* Set a new deadline. */ + arena_decay_deadline_init(arena); + + /* Update the backlog. */ + arena_decay_backlog_update(arena, nadvance_u64); +} + +static void +arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena) +{ + size_t ndirty_limit = arena_decay_backlog_npages_limit(arena); + + if (arena->ndirty > ndirty_limit) + arena_purge_to_limit(tsdn, arena, ndirty_limit); + arena->decay.ndirty = arena->ndirty; +} + +static void +arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time) +{ + + arena_decay_epoch_advance_helper(arena, time); + arena_decay_epoch_advance_purge(tsdn, arena); +} + +static void +arena_decay_init(arena_t *arena, ssize_t decay_time) +{ + + arena->decay.time = decay_time; + if (decay_time > 0) { + nstime_init2(&arena->decay.interval, decay_time, 0); + nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS); + } + + nstime_init(&arena->decay.epoch, 0); + nstime_update(&arena->decay.epoch); + arena->decay.jitter_state = (uint64_t)(uintptr_t)arena; + arena_decay_deadline_init(arena); + arena->decay.ndirty = arena->ndirty; + memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); +} + +static bool +arena_decay_time_valid(ssize_t decay_time) +{ + + if (decay_time < -1) + return (false); + if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX) + return (true); + return (false); +} + +ssize_t +arena_decay_time_get(tsdn_t *tsdn, arena_t *arena) +{ + ssize_t decay_time; + + malloc_mutex_lock(tsdn, &arena->lock); + decay_time = arena->decay.time; + malloc_mutex_unlock(tsdn, &arena->lock); + + return (decay_time); +} + +bool +arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) +{ + + if (!arena_decay_time_valid(decay_time)) + return (true); + + malloc_mutex_lock(tsdn, &arena->lock); + /* + * Restart decay backlog from scratch, which may cause many dirty pages + * to be immediately purged. It would conceptually be possible to map + * the old backlog onto the new backlog, but there is no justification + * for such complexity since decay_time changes are intended to be + * infrequent, either between the {-1, 0, >0} states, or a one-time + * arbitrary change during initial arena configuration. + */ + arena_decay_init(arena, decay_time); + arena_maybe_purge(tsdn, arena); + malloc_mutex_unlock(tsdn, &arena->lock); + + return (false); +} + +static void +arena_maybe_purge_ratio(tsdn_t *tsdn, arena_t *arena) +{ + + assert(opt_purge == purge_mode_ratio); + /* Don't purge if the option is disabled. */ if (arena->lg_dirty_mult < 0) return; - /* Don't recursively purge. */ - if (arena->purging) - return; + /* * Iterate, since preventing recursive purging could otherwise leave too * many dirty pages. @@ -1228,10 +1463,68 @@ arena_maybe_purge(arena_t *arena) */ if (arena->ndirty <= threshold) return; - arena_purge(arena, false); + arena_purge_to_limit(tsdn, arena, threshold); } } +static void +arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena) +{ + nstime_t time; + + assert(opt_purge == purge_mode_decay); + + /* Purge all or nothing if the option is disabled. */ + if (arena->decay.time <= 0) { + if (arena->decay.time == 0) + arena_purge_to_limit(tsdn, arena, 0); + return; + } + + nstime_init(&time, 0); + nstime_update(&time); + if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch, + &time) > 0)) { + /* + * Time went backwards. Move the epoch back in time and + * generate a new deadline, with the expectation that time + * typically flows forward for long enough periods of time that + * epochs complete. Unfortunately, this strategy is susceptible + * to clock jitter triggering premature epoch advances, but + * clock jitter estimation and compensation isn't feasible here + * because calls into this code are event-driven. + */ + nstime_copy(&arena->decay.epoch, &time); + arena_decay_deadline_init(arena); + } else { + /* Verify that time does not go backwards. */ + assert(nstime_compare(&arena->decay.epoch, &time) <= 0); + } + + /* + * If the deadline has been reached, advance to the current epoch and + * purge to the new limit if necessary. Note that dirty pages created + * during the current epoch are not subject to purge until a future + * epoch, so as a result purging only happens during epoch advances. + */ + if (arena_decay_deadline_reached(arena, &time)) + arena_decay_epoch_advance(tsdn, arena, &time); +} + +void +arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) +{ + + /* Don't recursively purge. */ + if (arena->purging) + return; + + if (opt_purge == purge_mode_ratio) + arena_maybe_purge_ratio(tsdn, arena); + else + arena_maybe_purge_decay(tsdn, arena); +} + static size_t arena_dirty_count(arena_t *arena) { @@ -1267,35 +1560,15 @@ arena_dirty_count(arena_t *arena) } static size_t -arena_compute_npurge(arena_t *arena, bool all) -{ - size_t npurge; - - /* - * Compute the minimum number of pages that this thread should try to - * purge. - */ - if (!all) { - size_t threshold = (arena->nactive >> arena->lg_dirty_mult); - threshold = threshold < chunk_npages ? chunk_npages : threshold; - - npurge = arena->ndirty - threshold; - } else - npurge = arena->ndirty; - - return (npurge); -} - -static size_t -arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, - size_t npurge, arena_runs_dirty_link_t *purge_runs_sentinel, +arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, + size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { arena_runs_dirty_link_t *rdelm, *rdelm_next; extent_node_t *chunkselm; size_t nstashed = 0; - /* Stash at least npurge pages. */ + /* Stash runs/chunks according to ndirty_limit. */ for (rdelm = qr_next(&arena->runs_dirty, rd_link), chunkselm = qr_next(&arena->chunks_cache, cc_link); rdelm != &arena->runs_dirty; rdelm = rdelm_next) { @@ -1304,24 +1577,32 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, if (rdelm == &chunkselm->rd) { extent_node_t *chunkselm_next; - bool zero; + size_t sn; + bool zero, commit; UNUSED void *chunk; + npages = extent_node_size_get(chunkselm) >> LG_PAGE; + if (opt_purge == purge_mode_decay && arena->ndirty - + (nstashed + npages) < ndirty_limit) + break; + chunkselm_next = qr_next(chunkselm, cc_link); /* * Allocate. chunkselm remains valid due to the * dalloc_node=false argument to chunk_alloc_cache(). */ zero = false; - chunk = chunk_alloc_cache(arena, chunk_hooks, + commit = false; + chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks, extent_node_addr_get(chunkselm), - extent_node_size_get(chunkselm), chunksize, &zero, - false); + extent_node_size_get(chunkselm), chunksize, &sn, + &zero, &commit, false); assert(chunk == extent_node_addr_get(chunkselm)); assert(zero == extent_node_zeroed_get(chunkselm)); extent_node_dirty_insert(chunkselm, purge_runs_sentinel, purge_chunks_sentinel); - npages = extent_node_size_get(chunkselm) >> LG_PAGE; + assert(npages == (extent_node_size_get(chunkselm) >> + LG_PAGE)); chunkselm = chunkselm_next; } else { arena_chunk_t *chunk = @@ -1334,6 +1615,9 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, arena_mapbits_unallocated_size_get(chunk, pageind); npages = run_size >> LG_PAGE; + if (opt_purge == purge_mode_decay && arena->ndirty - + (nstashed + npages) < ndirty_limit) + break; assert(pageind + npages <= chunk_npages); assert(arena_mapbits_dirty_get(chunk, pageind) == @@ -1344,7 +1628,7 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, * prior to allocation. */ if (chunk == arena->spare) - arena_chunk_alloc(arena); + arena_chunk_alloc(tsdn, arena); /* Temporarily allocate the free dirty run. */ arena_run_split_large(arena, run, run_size, false); @@ -1359,7 +1643,8 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, } nstashed += npages; - if (!all && nstashed >= npurge) + if (opt_purge == purge_mode_ratio && arena->ndirty - nstashed <= + ndirty_limit) break; } @@ -1367,7 +1652,7 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, } static size_t -arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks, +arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { @@ -1379,7 +1664,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks, nmadvise = 0; npurged = 0; - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); for (rdelm = qr_next(purge_runs_sentinel, rd_link), chunkselm = qr_next(purge_chunks_sentinel, cc_link); rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) { @@ -1408,6 +1693,17 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks, run_size = arena_mapbits_large_size_get(chunk, pageind); npages = run_size >> LG_PAGE; + /* + * If this is the first run purged within chunk, mark + * the chunk as non-huge. This will prevent all use of + * transparent huge pages for this chunk until the chunk + * as a whole is deallocated. + */ + if (chunk->hugepage) { + pages_nohuge(chunk, chunksize); + chunk->hugepage = false; + } + assert(pageind + npages <= chunk_npages); assert(!arena_mapbits_decommitted_get(chunk, pageind)); assert(!arena_mapbits_decommitted_get(chunk, @@ -1418,7 +1714,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks, flag_unzeroed = 0; flags = CHUNK_MAP_DECOMMITTED; } else { - flag_unzeroed = chunk_purge_wrapper(arena, + flag_unzeroed = chunk_purge_wrapper(tsdn, arena, chunk_hooks, chunk, chunksize, pageind << LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0; flags = flag_unzeroed; @@ -1449,7 +1745,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks, if (config_stats) nmadvise++; } - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); if (config_stats) { arena->stats.nmadvise += nmadvise; @@ -1460,7 +1756,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks, } static void -arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks, +arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { @@ -1477,13 +1773,14 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks, cc_link); void *addr = extent_node_addr_get(chunkselm); size_t size = extent_node_size_get(chunkselm); + size_t sn = extent_node_sn_get(chunkselm); bool zeroed = extent_node_zeroed_get(chunkselm); bool committed = extent_node_committed_get(chunkselm); extent_node_dirty_remove(chunkselm); - arena_node_dalloc(arena, chunkselm); + arena_node_dalloc(tsdn, arena, chunkselm); chunkselm = chunkselm_next; - chunk_dalloc_arena(arena, chunk_hooks, addr, size, - zeroed, committed); + chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr, + size, sn, zeroed, committed); } else { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm); @@ -1494,16 +1791,26 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks, pageind) != 0); arena_run_t *run = &miscelm->run; qr_remove(rdelm, rd_link); - arena_run_dalloc(arena, run, false, true, decommitted); + arena_run_dalloc(tsdn, arena, run, false, true, + decommitted); } } } +/* + * NB: ndirty_limit is interpreted differently depending on opt_purge: + * - purge_mode_ratio: Purge as few dirty run/chunks as possible to reach the + * desired state: + * (arena->ndirty <= ndirty_limit) + * - purge_mode_decay: Purge as many dirty runs/chunks as possible without + * violating the invariant: + * (arena->ndirty >= ndirty_limit) + */ static void -arena_purge(arena_t *arena, bool all) +arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) { - chunk_hooks_t chunk_hooks = chunk_hooks_get(arena); - size_t npurge, npurgeable, npurged; + chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena); + size_t npurge, npurged; arena_runs_dirty_link_t purge_runs_sentinel; extent_node_t purge_chunks_sentinel; @@ -1517,34 +1824,183 @@ arena_purge(arena_t *arena, bool all) size_t ndirty = arena_dirty_count(arena); assert(ndirty == arena->ndirty); } - assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || all); + assert(opt_purge != purge_mode_ratio || (arena->nactive >> + arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0); + + qr_new(&purge_runs_sentinel, rd_link); + extent_node_dirty_linkage_init(&purge_chunks_sentinel); + + npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit, + &purge_runs_sentinel, &purge_chunks_sentinel); + if (npurge == 0) + goto label_return; + npurged = arena_purge_stashed(tsdn, arena, &chunk_hooks, + &purge_runs_sentinel, &purge_chunks_sentinel); + assert(npurged == npurge); + arena_unstash_purged(tsdn, arena, &chunk_hooks, &purge_runs_sentinel, + &purge_chunks_sentinel); if (config_stats) arena->stats.npurge++; - npurge = arena_compute_npurge(arena, all); - qr_new(&purge_runs_sentinel, rd_link); - extent_node_dirty_linkage_init(&purge_chunks_sentinel); - - npurgeable = arena_stash_dirty(arena, &chunk_hooks, all, npurge, - &purge_runs_sentinel, &purge_chunks_sentinel); - assert(npurgeable >= npurge); - npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel, - &purge_chunks_sentinel); - assert(npurged == npurgeable); - arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel, - &purge_chunks_sentinel); - +label_return: arena->purging = false; } void -arena_purge_all(arena_t *arena) +arena_purge(tsdn_t *tsdn, arena_t *arena, bool all) { - malloc_mutex_lock(&arena->lock); - arena_purge(arena, true); - malloc_mutex_unlock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); + if (all) + arena_purge_to_limit(tsdn, arena, 0); + else + arena_maybe_purge(tsdn, arena); + malloc_mutex_unlock(tsdn, &arena->lock); +} + +static void +arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk) +{ + size_t pageind, npages; + + cassert(config_prof); + assert(opt_prof); + + /* + * Iterate over the allocated runs and remove profiled allocations from + * the sample set. + */ + for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { + if (arena_mapbits_allocated_get(chunk, pageind) != 0) { + if (arena_mapbits_large_get(chunk, pageind) != 0) { + void *ptr = (void *)((uintptr_t)chunk + (pageind + << LG_PAGE)); + size_t usize = isalloc(tsd_tsdn(tsd), ptr, + config_prof); + + prof_free(tsd, ptr, usize); + npages = arena_mapbits_large_size_get(chunk, + pageind) >> LG_PAGE; + } else { + /* Skip small run. */ + size_t binind = arena_mapbits_binind_get(chunk, + pageind); + arena_bin_info_t *bin_info = + &arena_bin_info[binind]; + npages = bin_info->run_size >> LG_PAGE; + } + } else { + /* Skip unallocated run. */ + npages = arena_mapbits_unallocated_size_get(chunk, + pageind) >> LG_PAGE; + } + assert(pageind + npages <= chunk_npages); + } +} + +void +arena_reset(tsd_t *tsd, arena_t *arena) +{ + unsigned i; + extent_node_t *node; + + /* + * Locking in this function is unintuitive. The caller guarantees that + * no concurrent operations are happening in this arena, but there are + * still reasons that some locking is necessary: + * + * - Some of the functions in the transitive closure of calls assume + * appropriate locks are held, and in some cases these locks are + * temporarily dropped to avoid lock order reversal or deadlock due to + * reentry. + * - mallctl("epoch", ...) may concurrently refresh stats. While + * strictly speaking this is a "concurrent operation", disallowing + * stats refreshes would impose an inconvenient burden. + */ + + /* Remove large allocations from prof sample set. */ + if (config_prof && opt_prof) { + ql_foreach(node, &arena->achunks, ql_link) { + arena_achunk_prof_reset(tsd, arena, + extent_node_addr_get(node)); + } + } + + /* Reset curruns for large size classes. */ + if (config_stats) { + for (i = 0; i < nlclasses; i++) + arena->stats.lstats[i].curruns = 0; + } + + /* Huge allocations. */ + malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx); + for (node = ql_last(&arena->huge, ql_link); node != NULL; node = + ql_last(&arena->huge, ql_link)) { + void *ptr = extent_node_addr_get(node); + size_t usize; + + malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx); + if (config_stats || (config_prof && opt_prof)) + usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); + /* Remove huge allocation from prof sample set. */ + if (config_prof && opt_prof) + prof_free(tsd, ptr, usize); + huge_dalloc(tsd_tsdn(tsd), ptr); + malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx); + /* Cancel out unwanted effects on stats. */ + if (config_stats) + arena_huge_reset_stats_cancel(arena, usize); + } + malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx); + + malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock); + + /* Bins. */ + for (i = 0; i < NBINS; i++) { + arena_bin_t *bin = &arena->bins[i]; + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + bin->runcur = NULL; + arena_run_heap_new(&bin->runs); + if (config_stats) { + bin->stats.curregs = 0; + bin->stats.curruns = 0; + } + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + } + + /* + * Re-initialize runs_dirty such that the chunks_cache and runs_dirty + * chains directly correspond. + */ + qr_new(&arena->runs_dirty, rd_link); + for (node = qr_next(&arena->chunks_cache, cc_link); + node != &arena->chunks_cache; node = qr_next(node, cc_link)) { + qr_new(&node->rd, rd_link); + qr_meld(&arena->runs_dirty, &node->rd, rd_link); + } + + /* Arena chunks. */ + for (node = ql_last(&arena->achunks, ql_link); node != NULL; node = + ql_last(&arena->achunks, ql_link)) { + ql_remove(&arena->achunks, node, ql_link); + arena_chunk_discard(tsd_tsdn(tsd), arena, + extent_node_addr_get(node)); + } + + /* Spare. */ + if (arena->spare != NULL) { + arena_chunk_discard(tsd_tsdn(tsd), arena, arena->spare); + arena->spare = NULL; + } + + assert(!arena->purging); + arena->nactive = 0; + + for (i = 0; i < NPSIZES; i++) + arena_run_heap_new(&arena->runs_avail[i]); + + malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock); } static void @@ -1660,21 +2116,9 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, return (size); } -static bool -arena_run_decommit(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run) -{ - arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); - size_t run_ind = arena_miscelm_to_pageind(miscelm); - size_t offset = run_ind << LG_PAGE; - size_t length = arena_run_size_get(arena, chunk, run, run_ind); - - return (arena->chunk_hooks.decommit(chunk, chunksize, offset, length, - arena->ind)); -} - static void -arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, - bool decommitted) +arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty, + bool cleaned, bool decommitted) { arena_chunk_t *chunk; arena_chunk_map_misc_t *miscelm; @@ -1687,8 +2131,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, assert(run_ind < chunk_npages); size = arena_run_size_get(arena, chunk, run, run_ind); run_pages = (size >> LG_PAGE); - arena_cactive_update(arena, 0, run_pages); - arena->nactive -= run_pages; + arena_nactive_sub(arena, run_pages); /* * The run is dirty if the caller claims to have dirtied it, as well as @@ -1735,7 +2178,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, if (size == arena_maxrun) { assert(run_ind == map_bias); assert(run_pages == (arena_maxrun >> LG_PAGE)); - arena_chunk_dalloc(arena, chunk); + arena_chunk_dalloc(tsdn, arena, chunk); } /* @@ -1746,21 +2189,12 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, * chances of spuriously crossing the dirty page purging threshold. */ if (dirty) - arena_maybe_purge(arena); + arena_maybe_purge(tsdn, arena); } static void -arena_run_dalloc_decommit(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run) -{ - bool committed = arena_run_decommit(arena, chunk, run); - - arena_run_dalloc(arena, run, committed, false, !committed); -} - -static void -arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - size_t oldsize, size_t newsize) +arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, size_t oldsize, size_t newsize) { arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); size_t pageind = arena_miscelm_to_pageind(miscelm); @@ -1795,12 +2229,13 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind+head_npages))); - arena_run_dalloc(arena, run, false, false, (flag_decommitted != 0)); + arena_run_dalloc(tsdn, arena, run, false, false, (flag_decommitted != + 0)); } static void -arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - size_t oldsize, size_t newsize, bool dirty) +arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, size_t oldsize, size_t newsize, bool dirty) { arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); size_t pageind = arena_miscelm_to_pageind(miscelm); @@ -1837,20 +2272,10 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind+head_npages))); - tail_miscelm = arena_miscelm_get(chunk, pageind + head_npages); + tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages); tail_run = &tail_miscelm->run; - arena_run_dalloc(arena, tail_run, dirty, false, (flag_decommitted != - 0)); -} - -static arena_run_t * -arena_bin_runs_first(arena_bin_t *bin) -{ - arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs); - if (miscelm != NULL) - return (&miscelm->run); - - return (NULL); + arena_run_dalloc(tsdn, arena, tail_run, dirty, false, (flag_decommitted + != 0)); } static void @@ -1858,35 +2283,25 @@ arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) { arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); - assert(arena_run_tree_search(&bin->runs, miscelm) == NULL); - - arena_run_tree_insert(&bin->runs, miscelm); -} - -static void -arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) -{ - arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); - - assert(arena_run_tree_search(&bin->runs, miscelm) != NULL); - - arena_run_tree_remove(&bin->runs, miscelm); + arena_run_heap_insert(&bin->runs, miscelm); } static arena_run_t * arena_bin_nonfull_run_tryget(arena_bin_t *bin) { - arena_run_t *run = arena_bin_runs_first(bin); - if (run != NULL) { - arena_bin_runs_remove(bin, run); - if (config_stats) - bin->stats.reruns++; - } - return (run); + arena_chunk_map_misc_t *miscelm; + + miscelm = arena_run_heap_remove_first(&bin->runs); + if (miscelm == NULL) + return (NULL); + if (config_stats) + bin->stats.reruns++; + + return (&miscelm->run); } static arena_run_t * -arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) +arena_bin_nonfull_run_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin) { arena_run_t *run; szind_t binind; @@ -1902,19 +2317,19 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) bin_info = &arena_bin_info[binind]; /* Allocate a new run. */ - malloc_mutex_unlock(&bin->lock); + malloc_mutex_unlock(tsdn, &bin->lock); /******************************/ - malloc_mutex_lock(&arena->lock); - run = arena_run_alloc_small(arena, bin_info->run_size, binind); + malloc_mutex_lock(tsdn, &arena->lock); + run = arena_run_alloc_small(tsdn, arena, bin_info->run_size, binind); if (run != NULL) { /* Initialize run internals. */ run->binind = binind; run->nfree = bin_info->nregs; bitmap_init(run->bitmap, &bin_info->bitmap_info); } - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); /********************************/ - malloc_mutex_lock(&bin->lock); + malloc_mutex_lock(tsdn, &bin->lock); if (run != NULL) { if (config_stats) { bin->stats.nruns++; @@ -1937,7 +2352,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) /* Re-fill bin->runcur, then call arena_run_reg_alloc(). */ static void * -arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) +arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin) { szind_t binind; arena_bin_info_t *bin_info; @@ -1946,7 +2361,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) binind = arena_bin_index(arena, bin); bin_info = &arena_bin_info[binind]; bin->runcur = NULL; - run = arena_bin_nonfull_run_get(arena, bin); + run = arena_bin_nonfull_run_get(tsdn, arena, bin); if (bin->runcur != NULL && bin->runcur->nfree > 0) { /* * Another thread updated runcur while this one ran without the @@ -1967,10 +2382,11 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * were just deallocated from the run. */ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - if (run->nfree == bin_info->nregs) - arena_dalloc_bin_run(arena, chunk, run, bin); - else - arena_bin_lower_run(arena, chunk, run, bin); + if (run->nfree == bin_info->nregs) { + arena_dalloc_bin_run(tsdn, arena, chunk, run, + bin); + } else + arena_bin_lower_run(arena, run, bin); } return (ret); } @@ -1986,18 +2402,18 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) } void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, - uint64_t prof_accumbytes) +arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin, + szind_t binind, uint64_t prof_accumbytes) { unsigned i, nfill; arena_bin_t *bin; assert(tbin->ncached == 0); - if (config_prof && arena_prof_accum(arena, prof_accumbytes)) - prof_idump(); + if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) + prof_idump(tsdn); bin = &arena->bins[binind]; - malloc_mutex_lock(&bin->lock); + malloc_mutex_lock(tsdn, &bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> tbin->lg_fill_div); i < nfill; i++) { arena_run_t *run; @@ -2005,16 +2421,15 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, if ((run = bin->runcur) != NULL && run->nfree > 0) ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); else - ptr = arena_bin_malloc_hard(arena, bin); + ptr = arena_bin_malloc_hard(tsdn, arena, bin); if (ptr == NULL) { /* * OOM. tbin->avail isn't yet filled down to its first * element, so the successful allocations (if any) must - * be moved to the base of tbin->avail before bailing - * out. + * be moved just before tbin->avail before bailing out. */ if (i > 0) { - memmove(tbin->avail, &tbin->avail[nfill - i], + memmove(tbin->avail - i, tbin->avail - nfill, i * sizeof(void *)); } break; @@ -2024,7 +2439,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, true); } /* Insert such that low regions get used first. */ - tbin->avail[nfill - 1 - i] = ptr; + *(tbin->avail - nfill + i) = ptr; } if (config_stats) { bin->stats.nmalloc += i; @@ -2033,29 +2448,31 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, bin->stats.nfills++; tbin->tstats.nrequests = 0; } - malloc_mutex_unlock(&bin->lock); + malloc_mutex_unlock(tsdn, &bin->lock); tbin->ncached = i; + arena_decay_tick(tsdn, arena); } void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero) { + size_t redzone_size = bin_info->redzone_size; + if (zero) { - size_t redzone_size = bin_info->redzone_size; - memset((void *)((uintptr_t)ptr - redzone_size), 0xa5, - redzone_size); - memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5, - redzone_size); + memset((void *)((uintptr_t)ptr - redzone_size), + JEMALLOC_ALLOC_JUNK, redzone_size); + memset((void *)((uintptr_t)ptr + bin_info->reg_size), + JEMALLOC_ALLOC_JUNK, redzone_size); } else { - memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5, - bin_info->reg_interval); + memset((void *)((uintptr_t)ptr - redzone_size), + JEMALLOC_ALLOC_JUNK, bin_info->reg_interval); } } #ifdef JEMALLOC_JET #undef arena_redzone_corruption -#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl) +#define arena_redzone_corruption JEMALLOC_N(n_arena_redzone_corruption) #endif static void arena_redzone_corruption(void *ptr, size_t usize, bool after, @@ -2070,7 +2487,7 @@ arena_redzone_corruption(void *ptr, size_t usize, bool after, #undef arena_redzone_corruption #define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption) arena_redzone_corruption_t *arena_redzone_corruption = - JEMALLOC_N(arena_redzone_corruption_impl); + JEMALLOC_N(n_arena_redzone_corruption); #endif static void @@ -2085,22 +2502,22 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) for (i = 1; i <= redzone_size; i++) { uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i); - if (*byte != 0xa5) { + if (*byte != JEMALLOC_ALLOC_JUNK) { error = true; arena_redzone_corruption(ptr, size, false, i, *byte); if (reset) - *byte = 0xa5; + *byte = JEMALLOC_ALLOC_JUNK; } } for (i = 0; i < redzone_size; i++) { uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i); - if (*byte != 0xa5) { + if (*byte != JEMALLOC_ALLOC_JUNK) { error = true; arena_redzone_corruption(ptr, size, true, i, *byte); if (reset) - *byte = 0xa5; + *byte = JEMALLOC_ALLOC_JUNK; } } } @@ -2111,7 +2528,7 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) #ifdef JEMALLOC_JET #undef arena_dalloc_junk_small -#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl) +#define arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small) #endif void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) @@ -2119,14 +2536,14 @@ arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) size_t redzone_size = bin_info->redzone_size; arena_redzones_validate(ptr, bin_info, false); - memset((void *)((uintptr_t)ptr - redzone_size), 0x5a, + memset((void *)((uintptr_t)ptr - redzone_size), JEMALLOC_FREE_JUNK, bin_info->reg_interval); } #ifdef JEMALLOC_JET #undef arena_dalloc_junk_small #define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) arena_dalloc_junk_small_t *arena_dalloc_junk_small = - JEMALLOC_N(arena_dalloc_junk_small_impl); + JEMALLOC_N(n_arena_dalloc_junk_small); #endif void @@ -2144,27 +2561,26 @@ arena_quarantine_junk_small(void *ptr, size_t usize) arena_redzones_validate(ptr, bin_info, true); } -void * -arena_malloc_small(arena_t *arena, size_t size, bool zero) +static void * +arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { void *ret; arena_bin_t *bin; + size_t usize; arena_run_t *run; - szind_t binind; - binind = size2index(size); assert(binind < NBINS); bin = &arena->bins[binind]; - size = index2size(binind); + usize = index2size(binind); - malloc_mutex_lock(&bin->lock); + malloc_mutex_lock(tsdn, &bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); else - ret = arena_bin_malloc_hard(arena, bin); + ret = arena_bin_malloc_hard(tsdn, arena, bin); if (ret == NULL) { - malloc_mutex_unlock(&bin->lock); + malloc_mutex_unlock(tsdn, &bin->lock); return (NULL); } @@ -2173,9 +2589,9 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) bin->stats.nrequests++; bin->stats.curregs++; } - malloc_mutex_unlock(&bin->lock); - if (config_prof && !isthreaded && arena_prof_accum(arena, size)) - prof_idump(); + malloc_mutex_unlock(tsdn, &bin->lock); + if (config_prof && !isthreaded && arena_prof_accum(tsdn, arena, usize)) + prof_idump(tsdn); if (!zero) { if (config_fill) { @@ -2183,34 +2599,35 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) arena_alloc_junk_small(ret, &arena_bin_info[binind], false); } else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize); } else { if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize); + memset(ret, 0, usize); } + arena_decay_tick(tsdn, arena); return (ret); } void * -arena_malloc_large(arena_t *arena, size_t size, bool zero) +arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { void *ret; size_t usize; uintptr_t random_offset; arena_run_t *run; arena_chunk_map_misc_t *miscelm; - UNUSED bool idump; + UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false); /* Large allocation. */ - usize = s2u(size); - malloc_mutex_lock(&arena->lock); + usize = index2size(binind); + malloc_mutex_lock(tsdn, &arena->lock); if (config_cache_oblivious) { uint64_t r; @@ -2219,22 +2636,21 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64 * for 4 KiB pages and 64-byte cachelines. */ - prng64(r, LG_PAGE - LG_CACHELINE, arena->offset_state, - UINT64_C(6364136223846793009), - UINT64_C(1442695040888963409)); + r = prng_lg_range_zu(&arena->offset_state, LG_PAGE - + LG_CACHELINE, false); random_offset = ((uintptr_t)r) << LG_CACHELINE; } else random_offset = 0; - run = arena_run_alloc_large(arena, usize + large_pad, zero); + run = arena_run_alloc_large(tsdn, arena, usize + large_pad, zero); if (run == NULL) { - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); return (NULL); } miscelm = arena_run_to_miscelm(run); ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) + random_offset); if (config_stats) { - szind_t index = size2index(usize) - NBINS; + szind_t index = binind - NBINS; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; @@ -2245,25 +2661,45 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) } if (config_prof) idump = arena_prof_accum_locked(arena, usize); - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); if (config_prof && idump) - prof_idump(); + prof_idump(tsdn); if (!zero) { if (config_fill) { if (unlikely(opt_junk_alloc)) - memset(ret, 0xa5, usize); + memset(ret, JEMALLOC_ALLOC_JUNK, usize); else if (unlikely(opt_zero)) memset(ret, 0, usize); } } + arena_decay_tick(tsdn, arena); return (ret); } +void * +arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, + bool zero) +{ + + assert(!tsdn_null(tsdn) || arena != NULL); + + if (likely(!tsdn_null(tsdn))) + arena = arena_choose(tsdn_tsd(tsdn), arena); + if (unlikely(arena == NULL)) + return (NULL); + + if (likely(size <= SMALL_MAXCLASS)) + return (arena_malloc_small(tsdn, arena, ind, zero)); + if (likely(size <= large_maxclass)) + return (arena_malloc_large(tsdn, arena, ind, zero)); + return (huge_malloc(tsdn, arena, index2size(ind), zero)); +} + /* Only handles large allocations that require more than page alignment. */ static void * -arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, +arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero) { void *ret; @@ -2273,19 +2709,21 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, arena_chunk_map_misc_t *miscelm; void *rpages; + assert(!tsdn_null(tsdn) || arena != NULL); assert(usize == PAGE_CEILING(usize)); - arena = arena_choose(tsd, arena); + if (likely(!tsdn_null(tsdn))) + arena = arena_choose(tsdn_tsd(tsdn), arena); if (unlikely(arena == NULL)) return (NULL); alignment = PAGE_CEILING(alignment); alloc_size = usize + large_pad + alignment - PAGE; - malloc_mutex_lock(&arena->lock); - run = arena_run_alloc_large(arena, alloc_size, false); + malloc_mutex_lock(tsdn, &arena->lock); + run = arena_run_alloc_large(tsdn, arena, alloc_size, false); if (run == NULL) { - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); return (NULL); } chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); @@ -2300,16 +2738,16 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, arena_chunk_map_misc_t *head_miscelm = miscelm; arena_run_t *head_run = run; - miscelm = arena_miscelm_get(chunk, + miscelm = arena_miscelm_get_mutable(chunk, arena_miscelm_to_pageind(head_miscelm) + (leadsize >> LG_PAGE)); run = &miscelm->run; - arena_run_trim_head(arena, chunk, head_run, alloc_size, + arena_run_trim_head(tsdn, arena, chunk, head_run, alloc_size, alloc_size - leadsize); } if (trailsize != 0) { - arena_run_trim_tail(arena, chunk, run, usize + large_pad + + arena_run_trim_tail(tsdn, arena, chunk, run, usize + large_pad + trailsize, usize + large_pad, false); } if (arena_run_init_large(arena, run, usize + large_pad, zero)) { @@ -2320,8 +2758,8 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, run_ind) != 0); assert(decommitted); /* Cause of OOM. */ - arena_run_dalloc(arena, run, dirty, false, decommitted); - malloc_mutex_unlock(&arena->lock); + arena_run_dalloc(tsdn, arena, run, dirty, false, decommitted); + malloc_mutex_unlock(tsdn, &arena->lock); return (NULL); } ret = arena_miscelm_to_rpages(miscelm); @@ -2336,19 +2774,20 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, arena->stats.lstats[index].nrequests++; arena->stats.lstats[index].curruns++; } - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); if (config_fill && !zero) { if (unlikely(opt_junk_alloc)) - memset(ret, 0xa5, usize); + memset(ret, JEMALLOC_ALLOC_JUNK, usize); else if (unlikely(opt_zero)) memset(ret, 0, usize); } + arena_decay_tick(tsdn, arena); return (ret); } void * -arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, +arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache) { void *ret; @@ -2356,7 +2795,8 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE && (usize & PAGE_MASK) == 0))) { /* Small; alignment doesn't require special run placement. */ - ret = arena_malloc(tsd, arena, usize, zero, tcache); + ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero, + tcache, true); } else if (usize <= large_maxclass && alignment <= PAGE) { /* * Large; alignment doesn't require special run placement. @@ -2364,25 +2804,25 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, * the base of the run, so do some bit manipulation to retrieve * the base. */ - ret = arena_malloc(tsd, arena, usize, zero, tcache); + ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero, + tcache, true); if (config_cache_oblivious) ret = (void *)((uintptr_t)ret & ~PAGE_MASK); } else { if (likely(usize <= large_maxclass)) { - ret = arena_palloc_large(tsd, arena, usize, alignment, + ret = arena_palloc_large(tsdn, arena, usize, alignment, zero); } else if (likely(alignment <= chunksize)) - ret = huge_malloc(tsd, arena, usize, zero, tcache); + ret = huge_malloc(tsdn, arena, usize, zero); else { - ret = huge_palloc(tsd, arena, usize, alignment, zero, - tcache); + ret = huge_palloc(tsdn, arena, usize, alignment, zero); } } return (ret); } void -arena_prof_promoted(const void *ptr, size_t size) +arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size) { arena_chunk_t *chunk; size_t pageind; @@ -2391,8 +2831,8 @@ arena_prof_promoted(const void *ptr, size_t size) cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr, false) == LARGE_MINCLASS); - assert(isalloc(ptr, true) == LARGE_MINCLASS); + assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS); + assert(isalloc(tsdn, ptr, true) == LARGE_MINCLASS); assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); @@ -2401,8 +2841,8 @@ arena_prof_promoted(const void *ptr, size_t size) assert(binind < NBINS); arena_mapbits_large_binind_set(chunk, pageind, binind); - assert(isalloc(ptr, false) == LARGE_MINCLASS); - assert(isalloc(ptr, true) == size); + assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS); + assert(isalloc(tsdn, ptr, true) == size); } static void @@ -2418,48 +2858,51 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, &chunk->node), bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; + /* + * The following block's conditional is necessary because if the + * run only contains one region, then it never gets inserted + * into the non-full runs tree. + */ if (bin_info->nregs != 1) { - /* - * This block's conditional is necessary because if the - * run only contains one region, then it never gets - * inserted into the non-full runs tree. - */ - arena_bin_runs_remove(bin, run); + arena_chunk_map_misc_t *miscelm = + arena_run_to_miscelm(run); + + arena_run_heap_remove(&bin->runs, miscelm); } } } static void -arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin) +arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin) { assert(run != bin->runcur); - assert(arena_run_tree_search(&bin->runs, arena_run_to_miscelm(run)) == - NULL); - malloc_mutex_unlock(&bin->lock); + malloc_mutex_unlock(tsdn, &bin->lock); /******************************/ - malloc_mutex_lock(&arena->lock); - arena_run_dalloc_decommit(arena, chunk, run); - malloc_mutex_unlock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); + arena_run_dalloc(tsdn, arena, run, true, false, false); + malloc_mutex_unlock(tsdn, &arena->lock); /****************************/ - malloc_mutex_lock(&bin->lock); + malloc_mutex_lock(tsdn, &bin->lock); if (config_stats) bin->stats.curruns--; } static void -arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin) +arena_bin_lower_run(arena_t *arena, arena_run_t *run, arena_bin_t *bin) { /* - * Make sure that if bin->runcur is non-NULL, it refers to the lowest - * non-full run. It is okay to NULL runcur out rather than proactively - * keeping it pointing at the lowest non-full run. + * Make sure that if bin->runcur is non-NULL, it refers to the + * oldest/lowest non-full run. It is okay to NULL runcur out rather + * than proactively keeping it pointing at the oldest/lowest non-full + * run. */ - if ((uintptr_t)run < (uintptr_t)bin->runcur) { + if (bin->runcur != NULL && + arena_snad_comp(arena_run_to_miscelm(bin->runcur), + arena_run_to_miscelm(run)) > 0) { /* Switch runcur. */ if (bin->runcur->nfree > 0) arena_bin_runs_insert(bin, bin->runcur); @@ -2471,8 +2914,8 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, } static void -arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_bits_t *bitselm, bool junked) +arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + void *ptr, arena_chunk_map_bits_t *bitselm, bool junked) { size_t pageind, rpages_ind; arena_run_t *run; @@ -2482,7 +2925,7 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); - run = &arena_miscelm_get(chunk, rpages_ind)->run; + run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run; binind = run->binind; bin = &arena->bins[binind]; bin_info = &arena_bin_info[binind]; @@ -2493,9 +2936,9 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_run_reg_dalloc(run, ptr); if (run->nfree == bin_info->nregs) { arena_dissociate_bin_run(chunk, run, bin); - arena_dalloc_bin_run(arena, chunk, run, bin); + arena_dalloc_bin_run(tsdn, arena, chunk, run, bin); } else if (run->nfree == 1 && run != bin->runcur) - arena_bin_lower_run(arena, chunk, run, bin); + arena_bin_lower_run(arena, run, bin); if (config_stats) { bin->stats.ndalloc++; @@ -2504,15 +2947,15 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, } void -arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_bits_t *bitselm) +arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm) { - arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, true); + arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, true); } void -arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, +arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm) { arena_run_t *run; @@ -2520,16 +2963,16 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t rpages_ind; rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); - run = &arena_miscelm_get(chunk, rpages_ind)->run; + run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run; bin = &arena->bins[run->binind]; - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false); - malloc_mutex_unlock(&bin->lock); + malloc_mutex_lock(tsdn, &bin->lock); + arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, false); + malloc_mutex_unlock(tsdn, &bin->lock); } void -arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind) +arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t pageind) { arena_chunk_map_bits_t *bitselm; @@ -2538,34 +2981,36 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) != BININD_INVALID); } - bitselm = arena_bitselm_get(chunk, pageind); - arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm); + bitselm = arena_bitselm_get_mutable(chunk, pageind); + arena_dalloc_bin(tsdn, arena, chunk, ptr, pageind, bitselm); + arena_decay_tick(tsdn, arena); } #ifdef JEMALLOC_JET #undef arena_dalloc_junk_large -#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl) +#define arena_dalloc_junk_large JEMALLOC_N(n_arena_dalloc_junk_large) #endif void arena_dalloc_junk_large(void *ptr, size_t usize) { if (config_fill && unlikely(opt_junk_free)) - memset(ptr, 0x5a, usize); + memset(ptr, JEMALLOC_FREE_JUNK, usize); } #ifdef JEMALLOC_JET #undef arena_dalloc_junk_large #define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large) arena_dalloc_junk_large_t *arena_dalloc_junk_large = - JEMALLOC_N(arena_dalloc_junk_large_impl); + JEMALLOC_N(n_arena_dalloc_junk_large); #endif static void -arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk, - void *ptr, bool junked) +arena_dalloc_large_locked_impl(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk, void *ptr, bool junked) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk, + pageind); arena_run_t *run = &miscelm->run; if (config_fill || config_stats) { @@ -2584,32 +3029,35 @@ arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk, } } - arena_run_dalloc_decommit(arena, chunk, run); + arena_run_dalloc(tsdn, arena, run, true, false, false); } void -arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, +arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk, void *ptr) +{ + + arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, true); +} + +void +arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr) { - arena_dalloc_large_locked_impl(arena, chunk, ptr, true); -} - -void -arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) -{ - - malloc_mutex_lock(&arena->lock); - arena_dalloc_large_locked_impl(arena, chunk, ptr, false); - malloc_mutex_unlock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); + arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, false); + malloc_mutex_unlock(tsdn, &arena->lock); + arena_decay_tick(tsdn, arena); } static void -arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t oldsize, size_t size) +arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t oldsize, size_t size) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk, + pageind); arena_run_t *run = &miscelm->run; assert(size < oldsize); @@ -2618,8 +3066,8 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, * Shrink the run, and make trailing pages available for other * allocations. */ - malloc_mutex_lock(&arena->lock); - arena_run_trim_tail(arena, chunk, run, oldsize + large_pad, size + + malloc_mutex_lock(tsdn, &arena->lock); + arena_run_trim_tail(tsdn, arena, chunk, run, oldsize + large_pad, size + large_pad, true); if (config_stats) { szind_t oldindex = size2index(oldsize) - NBINS; @@ -2637,12 +3085,12 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena->stats.lstats[index].nrequests++; arena->stats.lstats[index].curruns++; } - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); } static bool -arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t oldsize, size_t usize_min, size_t usize_max, bool zero) +arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t npages = (oldsize + large_pad) >> LG_PAGE; @@ -2652,7 +3100,7 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, large_pad); /* Try to extend the run. */ - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk, pageind+npages) != 0) goto label_fail; @@ -2675,7 +3123,7 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, if (splitsize == 0) goto label_fail; - run = &arena_miscelm_get(chunk, pageind+npages)->run; + run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run; if (arena_run_split_large(arena, run, splitsize, zero)) goto label_fail; @@ -2683,10 +3131,16 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, /* * Zero the trailing bytes of the original allocation's * last page, since they are in an indeterminate state. + * There will always be trailing bytes, because ptr's + * offset from the beginning of the run is a multiple of + * CACHELINE in [0 .. PAGE). */ - assert(PAGE_CEILING(oldsize) == oldsize); - memset((void *)((uintptr_t)ptr + oldsize), 0, - PAGE_CEILING((uintptr_t)ptr) - (uintptr_t)ptr); + void *zbase = (void *)((uintptr_t)ptr + oldsize); + void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase + + PAGE)); + size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase; + assert(nzero > 0); + memset(zbase, 0, nzero); } size = oldsize + splitsize; @@ -2726,24 +3180,24 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena->stats.lstats[index].nrequests++; arena->stats.lstats[index].curruns++; } - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); return (false); } label_fail: - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); return (true); } #ifdef JEMALLOC_JET #undef arena_ralloc_junk_large -#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl) +#define arena_ralloc_junk_large JEMALLOC_N(n_arena_ralloc_junk_large) #endif static void arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize) { if (config_fill && unlikely(opt_junk_free)) { - memset((void *)((uintptr_t)ptr + usize), 0x5a, + memset((void *)((uintptr_t)ptr + usize), JEMALLOC_FREE_JUNK, old_usize - usize); } } @@ -2751,7 +3205,7 @@ arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize) #undef arena_ralloc_junk_large #define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large) arena_ralloc_junk_large_t *arena_ralloc_junk_large = - JEMALLOC_N(arena_ralloc_junk_large_impl); + JEMALLOC_N(n_arena_ralloc_junk_large); #endif /* @@ -2759,7 +3213,7 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large = * always fail if growing an object, and the following run is already in use. */ static bool -arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min, +arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero) { arena_chunk_t *chunk; @@ -2774,15 +3228,16 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min, arena = extent_node_arena_get(&chunk->node); if (oldsize < usize_max) { - bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize, - usize_min, usize_max, zero); + bool ret = arena_ralloc_large_grow(tsdn, arena, chunk, ptr, + oldsize, usize_min, usize_max, zero); if (config_fill && !ret && !zero) { if (unlikely(opt_junk_alloc)) { - memset((void *)((uintptr_t)ptr + oldsize), 0xa5, - isalloc(ptr, config_prof) - oldsize); + memset((void *)((uintptr_t)ptr + oldsize), + JEMALLOC_ALLOC_JUNK, + isalloc(tsdn, ptr, config_prof) - oldsize); } else if (unlikely(opt_zero)) { memset((void *)((uintptr_t)ptr + oldsize), 0, - isalloc(ptr, config_prof) - oldsize); + isalloc(tsdn, ptr, config_prof) - oldsize); } } return (ret); @@ -2791,19 +3246,27 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min, assert(oldsize > usize_max); /* Fill before shrinking in order avoid a race. */ arena_ralloc_junk_large(ptr, oldsize, usize_max); - arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max); + arena_ralloc_large_shrink(tsdn, arena, chunk, ptr, oldsize, usize_max); return (false); } bool -arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, - bool zero) +arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero) { size_t usize_min, usize_max; + /* Calls with non-zero extra had to clamp extra. */ + assert(extra == 0 || size + extra <= HUGE_MAXCLASS); + + if (unlikely(size > HUGE_MAXCLASS)) + return (true); + usize_min = s2u(size); usize_max = s2u(size + extra); if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) { + arena_chunk_t *chunk; + /* * Avoid moving the allocation if the size class can be left the * same. @@ -2811,37 +3274,39 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, if (oldsize <= SMALL_MAXCLASS) { assert(arena_bin_info[size2index(oldsize)].reg_size == oldsize); - if ((usize_max <= SMALL_MAXCLASS && - size2index(usize_max) == size2index(oldsize)) || - (size <= oldsize && usize_max >= oldsize)) - return (false); + if ((usize_max > SMALL_MAXCLASS || + size2index(usize_max) != size2index(oldsize)) && + (size > oldsize || usize_max < oldsize)) + return (true); } else { - if (usize_max > SMALL_MAXCLASS) { - if (!arena_ralloc_large(ptr, oldsize, usize_min, - usize_max, zero)) - return (false); - } + if (usize_max <= SMALL_MAXCLASS) + return (true); + if (arena_ralloc_large(tsdn, ptr, oldsize, usize_min, + usize_max, zero)) + return (true); } - /* Reallocation would require a move. */ - return (true); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena_decay_tick(tsdn, extent_node_arena_get(&chunk->node)); + return (false); } else { - return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max, - zero)); + return (huge_ralloc_no_move(tsdn, ptr, oldsize, usize_min, + usize_max, zero)); } } static void * -arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, +arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache) { if (alignment == 0) - return (arena_malloc(tsd, arena, usize, zero, tcache)); + return (arena_malloc(tsdn, arena, usize, size2index(usize), + zero, tcache, true)); usize = sa2u(usize, alignment); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return (NULL); - return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); + return (ipalloct(tsdn, usize, alignment, zero, tcache, arena)); } void * @@ -2852,14 +3317,15 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize; usize = s2u(size); - if (usize == 0) + if (unlikely(usize == 0 || size > HUGE_MAXCLASS)) return (NULL); if (likely(usize <= large_maxclass)) { size_t copysize; /* Try to avoid moving the allocation. */ - if (!arena_ralloc_no_move(ptr, oldsize, usize, 0, zero)) + if (!arena_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, 0, + zero)) return (ptr); /* @@ -2867,8 +3333,8 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, * the object. In that case, fall back to allocating new space * and copying. */ - ret = arena_ralloc_move_helper(tsd, arena, usize, alignment, - zero, tcache); + ret = arena_ralloc_move_helper(tsd_tsdn(tsd), arena, usize, + alignment, zero, tcache); if (ret == NULL) return (NULL); @@ -2880,7 +3346,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, copysize = (usize < oldsize) ? usize : oldsize; JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache); + isqalloc(tsd, ptr, oldsize, tcache, true); } else { ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment, zero, tcache); @@ -2889,25 +3355,25 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, } dss_prec_t -arena_dss_prec_get(arena_t *arena) +arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena) { dss_prec_t ret; - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); ret = arena->dss_prec; - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); return (ret); } bool -arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) +arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec) { if (!have_dss) return (dss_prec != dss_prec_disabled); - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); arena->dss_prec = dss_prec; - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); return (false); } @@ -2922,27 +3388,76 @@ bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult) { + if (opt_purge != purge_mode_ratio) + return (true); if (!arena_lg_dirty_mult_valid(lg_dirty_mult)) return (true); atomic_write_z((size_t *)&lg_dirty_mult_default, (size_t)lg_dirty_mult); return (false); } +ssize_t +arena_decay_time_default_get(void) +{ + + return ((ssize_t)atomic_read_z((size_t *)&decay_time_default)); +} + +bool +arena_decay_time_default_set(ssize_t decay_time) +{ + + if (opt_purge != purge_mode_decay) + return (true); + if (!arena_decay_time_valid(decay_time)) + return (true); + atomic_write_z((size_t *)&decay_time_default, (size_t)decay_time); + return (false); +} + +static void +arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, + size_t *nactive, size_t *ndirty) +{ + + *nthreads += arena_nthreads_get(arena, false); + *dss = dss_prec_names[arena->dss_prec]; + *lg_dirty_mult = arena->lg_dirty_mult; + *decay_time = arena->decay.time; + *nactive += arena->nactive; + *ndirty += arena->ndirty; +} + void -arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, +arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, + size_t *nactive, size_t *ndirty) +{ + + malloc_mutex_lock(tsdn, &arena->lock); + arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult, + decay_time, nactive, ndirty); + malloc_mutex_unlock(tsdn, &arena->lock); +} + +void +arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats) { unsigned i; - malloc_mutex_lock(&arena->lock); - *dss = dss_prec_names[arena->dss_prec]; - *lg_dirty_mult = arena->lg_dirty_mult; - *nactive += arena->nactive; - *ndirty += arena->ndirty; + cassert(config_stats); + + malloc_mutex_lock(tsdn, &arena->lock); + arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult, + decay_time, nactive, ndirty); astats->mapped += arena->stats.mapped; + astats->retained += arena->stats.retained; astats->npurge += arena->stats.npurge; astats->nmadvise += arena->stats.nmadvise; astats->purged += arena->stats.purged; @@ -2968,12 +3483,12 @@ arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, hstats[i].ndalloc += arena->stats.hstats[i].ndalloc; hstats[i].curhchunks += arena->stats.hstats[i].curhchunks; } - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; - malloc_mutex_lock(&bin->lock); + malloc_mutex_lock(tsdn, &bin->lock); bstats[i].nmalloc += bin->stats.nmalloc; bstats[i].ndalloc += bin->stats.ndalloc; bstats[i].nrequests += bin->stats.nrequests; @@ -2985,33 +3500,61 @@ arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, bstats[i].nruns += bin->stats.nruns; bstats[i].reruns += bin->stats.reruns; bstats[i].curruns += bin->stats.curruns; - malloc_mutex_unlock(&bin->lock); + malloc_mutex_unlock(tsdn, &bin->lock); } } +unsigned +arena_nthreads_get(arena_t *arena, bool internal) +{ + + return (atomic_read_u(&arena->nthreads[internal])); +} + +void +arena_nthreads_inc(arena_t *arena, bool internal) +{ + + atomic_add_u(&arena->nthreads[internal], 1); +} + +void +arena_nthreads_dec(arena_t *arena, bool internal) +{ + + atomic_sub_u(&arena->nthreads[internal], 1); +} + +size_t +arena_extent_sn_next(arena_t *arena) +{ + + return (atomic_add_z(&arena->extent_sn_next, 1) - 1); +} + arena_t * -arena_new(unsigned ind) +arena_new(tsdn_t *tsdn, unsigned ind) { arena_t *arena; unsigned i; - arena_bin_t *bin; /* * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly * because there is no way to clean up if base_alloc() OOMs. */ if (config_stats) { - arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t)) - + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) + - nhclasses) * sizeof(malloc_huge_stats_t)); + arena = (arena_t *)base_alloc(tsdn, + CACHELINE_CEILING(sizeof(arena_t)) + + QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t))) + + (nhclasses * sizeof(malloc_huge_stats_t))); } else - arena = (arena_t *)base_alloc(sizeof(arena_t)); + arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t)); if (arena == NULL) return (NULL); arena->ind = ind; - arena->nthreads = 0; - if (malloc_mutex_init(&arena->lock)) + arena->nthreads[0] = arena->nthreads[1] = 0; + if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA)) return (NULL); if (config_stats) { @@ -3041,11 +3584,15 @@ arena_new(unsigned ind) * deterministic seed. */ arena->offset_state = config_debug ? ind : - (uint64_t)(uintptr_t)arena; + (size_t)(uintptr_t)arena; } arena->dss_prec = chunk_dss_prec_get(); + ql_new(&arena->achunks); + + arena->extent_sn_next = 0; + arena->spare = NULL; arena->lg_dirty_mult = arena_lg_dirty_mult_default_get(); @@ -3053,33 +3600,42 @@ arena_new(unsigned ind) arena->nactive = 0; arena->ndirty = 0; - arena_avail_tree_new(&arena->runs_avail); + for (i = 0; i < NPSIZES; i++) + arena_run_heap_new(&arena->runs_avail[i]); + qr_new(&arena->runs_dirty, rd_link); qr_new(&arena->chunks_cache, cc_link); + if (opt_purge == purge_mode_decay) + arena_decay_init(arena, arena_decay_time_default_get()); + ql_new(&arena->huge); - if (malloc_mutex_init(&arena->huge_mtx)) + if (malloc_mutex_init(&arena->huge_mtx, "arena_huge", + WITNESS_RANK_ARENA_HUGE)) return (NULL); - extent_tree_szad_new(&arena->chunks_szad_cached); + extent_tree_szsnad_new(&arena->chunks_szsnad_cached); extent_tree_ad_new(&arena->chunks_ad_cached); - extent_tree_szad_new(&arena->chunks_szad_retained); + extent_tree_szsnad_new(&arena->chunks_szsnad_retained); extent_tree_ad_new(&arena->chunks_ad_retained); - if (malloc_mutex_init(&arena->chunks_mtx)) + if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks", + WITNESS_RANK_ARENA_CHUNKS)) return (NULL); ql_new(&arena->node_cache); - if (malloc_mutex_init(&arena->node_cache_mtx)) + if (malloc_mutex_init(&arena->node_cache_mtx, "arena_node_cache", + WITNESS_RANK_ARENA_NODE_CACHE)) return (NULL); arena->chunk_hooks = chunk_hooks_default; /* Initialize bins. */ for (i = 0; i < NBINS; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) + arena_bin_t *bin = &arena->bins[i]; + if (malloc_mutex_init(&bin->lock, "arena_bin", + WITNESS_RANK_ARENA_BIN)) return (NULL); bin->runcur = NULL; - arena_run_tree_new(&bin->runs); + arena_run_heap_new(&bin->runs); if (config_stats) memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); } @@ -3111,8 +3667,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) * be twice as large in order to maintain alignment. */ if (config_fill && unlikely(opt_redzone)) { - size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) - - 1); + size_t align_min = ZU(1) << (ffs_zu(bin_info->reg_size) - 1); if (align_min <= REDZONE_MINSIZE) { bin_info->redzone_size = REDZONE_MINSIZE; pad_size = 0; @@ -3132,18 +3687,19 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) * size). */ try_run_size = PAGE; - try_nregs = try_run_size / bin_info->reg_size; + try_nregs = (uint32_t)(try_run_size / bin_info->reg_size); do { perfect_run_size = try_run_size; perfect_nregs = try_nregs; try_run_size += PAGE; - try_nregs = try_run_size / bin_info->reg_size; + try_nregs = (uint32_t)(try_run_size / bin_info->reg_size); } while (perfect_run_size != perfect_nregs * bin_info->reg_size); assert(perfect_nregs <= RUN_MAXREGS); actual_run_size = perfect_run_size; - actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval; + actual_nregs = (uint32_t)((actual_run_size - pad_size) / + bin_info->reg_interval); /* * Redzones can require enough padding that not even a single region can @@ -3155,8 +3711,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) assert(config_fill && unlikely(opt_redzone)); actual_run_size += PAGE; - actual_nregs = (actual_run_size - pad_size) / - bin_info->reg_interval; + actual_nregs = (uint32_t)((actual_run_size - pad_size) / + bin_info->reg_interval); } /* @@ -3164,8 +3720,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) */ while (actual_run_size > arena_maxrun) { actual_run_size -= PAGE; - actual_nregs = (actual_run_size - pad_size) / - bin_info->reg_interval; + actual_nregs = (uint32_t)((actual_run_size - pad_size) / + bin_info->reg_interval); } assert(actual_nregs > 0); assert(actual_run_size == s2u(actual_run_size)); @@ -3173,11 +3729,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) /* Copy final settings. */ bin_info->run_size = actual_run_size; bin_info->nregs = actual_nregs; - bin_info->reg0_offset = actual_run_size - (actual_nregs * - bin_info->reg_interval) - pad_size + bin_info->redzone_size; - - if (actual_run_size > small_maxrun) - small_maxrun = actual_run_size; + bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs * + bin_info->reg_interval) - pad_size + bin_info->redzone_size); assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs * bin_info->reg_interval) + pad_size == bin_info->run_size); @@ -3194,7 +3747,7 @@ bin_info_init(void) bin_info_run_size_calc(bin_info); \ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); #define BIN_INFO_INIT_bin_no(index, size) -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ +#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup) \ BIN_INFO_INIT_bin_##bin(index, (ZU(1)<> - LG_PAGE)); - if (small_run_tab == NULL) - return (true); - -#define TAB_INIT_bin_yes(index, size) { \ - arena_bin_info_t *bin_info = &arena_bin_info[index]; \ - small_run_tab[bin_info->run_size >> LG_PAGE] = true; \ - } -#define TAB_INIT_bin_no(index, size) -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ - TAB_INIT_bin_##bin(index, (ZU(1)<lock); - malloc_mutex_prefork(&arena->huge_mtx); - malloc_mutex_prefork(&arena->chunks_mtx); - malloc_mutex_prefork(&arena->node_cache_mtx); - for (i = 0; i < NBINS; i++) - malloc_mutex_prefork(&arena->bins[i].lock); + malloc_mutex_prefork(tsdn, &arena->lock); } void -arena_postfork_parent(arena_t *arena) +arena_prefork1(tsdn_t *tsdn, arena_t *arena) +{ + + malloc_mutex_prefork(tsdn, &arena->chunks_mtx); +} + +void +arena_prefork2(tsdn_t *tsdn, arena_t *arena) +{ + + malloc_mutex_prefork(tsdn, &arena->node_cache_mtx); +} + +void +arena_prefork3(tsdn_t *tsdn, arena_t *arena) { unsigned i; for (i = 0; i < NBINS; i++) - malloc_mutex_postfork_parent(&arena->bins[i].lock); - malloc_mutex_postfork_parent(&arena->node_cache_mtx); - malloc_mutex_postfork_parent(&arena->chunks_mtx); - malloc_mutex_postfork_parent(&arena->huge_mtx); - malloc_mutex_postfork_parent(&arena->lock); + malloc_mutex_prefork(tsdn, &arena->bins[i].lock); + malloc_mutex_prefork(tsdn, &arena->huge_mtx); } void -arena_postfork_child(arena_t *arena) +arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { unsigned i; + malloc_mutex_postfork_parent(tsdn, &arena->huge_mtx); for (i = 0; i < NBINS; i++) - malloc_mutex_postfork_child(&arena->bins[i].lock); - malloc_mutex_postfork_child(&arena->node_cache_mtx); - malloc_mutex_postfork_child(&arena->chunks_mtx); - malloc_mutex_postfork_child(&arena->huge_mtx); - malloc_mutex_postfork_child(&arena->lock); + malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock); + malloc_mutex_postfork_parent(tsdn, &arena->node_cache_mtx); + malloc_mutex_postfork_parent(tsdn, &arena->chunks_mtx); + malloc_mutex_postfork_parent(tsdn, &arena->lock); +} + +void +arena_postfork_child(tsdn_t *tsdn, arena_t *arena) +{ + unsigned i; + + malloc_mutex_postfork_child(tsdn, &arena->huge_mtx); + for (i = 0; i < NBINS; i++) + malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock); + malloc_mutex_postfork_child(tsdn, &arena->node_cache_mtx); + malloc_mutex_postfork_child(tsdn, &arena->chunks_mtx); + malloc_mutex_postfork_child(tsdn, &arena->lock); } diff --git a/deps/jemalloc/src/base.c b/deps/jemalloc/src/base.c index 7cdcfed86..5681a3f36 100644 --- a/deps/jemalloc/src/base.c +++ b/deps/jemalloc/src/base.c @@ -5,7 +5,8 @@ /* Data. */ static malloc_mutex_t base_mtx; -static extent_tree_t base_avail_szad; +static size_t base_extent_sn_next; +static extent_tree_t base_avail_szsnad; static extent_node_t *base_nodes; static size_t base_allocated; static size_t base_resident; @@ -13,12 +14,13 @@ static size_t base_mapped; /******************************************************************************/ -/* base_mtx must be held. */ static extent_node_t * -base_node_try_alloc(void) +base_node_try_alloc(tsdn_t *tsdn) { extent_node_t *node; + malloc_mutex_assert_owner(tsdn, &base_mtx); + if (base_nodes == NULL) return (NULL); node = base_nodes; @@ -27,33 +29,42 @@ base_node_try_alloc(void) return (node); } -/* base_mtx must be held. */ static void -base_node_dalloc(extent_node_t *node) +base_node_dalloc(tsdn_t *tsdn, extent_node_t *node) { + malloc_mutex_assert_owner(tsdn, &base_mtx); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); *(extent_node_t **)node = base_nodes; base_nodes = node; } -/* base_mtx must be held. */ +static void +base_extent_node_init(extent_node_t *node, void *addr, size_t size) +{ + size_t sn = atomic_add_z(&base_extent_sn_next, 1) - 1; + + extent_node_init(node, NULL, addr, size, sn, true, true); +} + static extent_node_t * -base_chunk_alloc(size_t minsize) +base_chunk_alloc(tsdn_t *tsdn, size_t minsize) { extent_node_t *node; size_t csize, nsize; void *addr; + malloc_mutex_assert_owner(tsdn, &base_mtx); assert(minsize != 0); - node = base_node_try_alloc(); + node = base_node_try_alloc(tsdn); /* Allocate enough space to also carve a node out if necessary. */ nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0; csize = CHUNK_CEILING(minsize + nsize); addr = chunk_alloc_base(csize); if (addr == NULL) { if (node != NULL) - base_node_dalloc(node); + base_node_dalloc(tsdn, node); return (NULL); } base_mapped += csize; @@ -66,7 +77,7 @@ base_chunk_alloc(size_t minsize) base_resident += PAGE_CEILING(nsize); } } - extent_node_init(node, NULL, addr, csize, true, true); + base_extent_node_init(node, addr, csize); return (node); } @@ -76,7 +87,7 @@ base_chunk_alloc(size_t minsize) * physical memory usage. */ void * -base_alloc(size_t size) +base_alloc(tsdn_t *tsdn, size_t size) { void *ret; size_t csize, usize; @@ -90,15 +101,15 @@ base_alloc(size_t size) csize = CACHELINE_CEILING(size); usize = s2u(csize); - extent_node_init(&key, NULL, NULL, usize, false, false); - malloc_mutex_lock(&base_mtx); - node = extent_tree_szad_nsearch(&base_avail_szad, &key); + extent_node_init(&key, NULL, NULL, usize, 0, false, false); + malloc_mutex_lock(tsdn, &base_mtx); + node = extent_tree_szsnad_nsearch(&base_avail_szsnad, &key); if (node != NULL) { /* Use existing space. */ - extent_tree_szad_remove(&base_avail_szad, node); + extent_tree_szsnad_remove(&base_avail_szsnad, node); } else { /* Try to allocate more space. */ - node = base_chunk_alloc(csize); + node = base_chunk_alloc(tsdn, csize); } if (node == NULL) { ret = NULL; @@ -109,9 +120,9 @@ base_alloc(size_t size) if (extent_node_size_get(node) > csize) { extent_node_addr_set(node, (void *)((uintptr_t)ret + csize)); extent_node_size_set(node, extent_node_size_get(node) - csize); - extent_tree_szad_insert(&base_avail_szad, node); + extent_tree_szsnad_insert(&base_avail_szsnad, node); } else - base_node_dalloc(node); + base_node_dalloc(tsdn, node); if (config_stats) { base_allocated += csize; /* @@ -123,52 +134,54 @@ base_alloc(size_t size) } JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize); label_return: - malloc_mutex_unlock(&base_mtx); + malloc_mutex_unlock(tsdn, &base_mtx); return (ret); } void -base_stats_get(size_t *allocated, size_t *resident, size_t *mapped) +base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident, + size_t *mapped) { - malloc_mutex_lock(&base_mtx); + malloc_mutex_lock(tsdn, &base_mtx); assert(base_allocated <= base_resident); assert(base_resident <= base_mapped); *allocated = base_allocated; *resident = base_resident; *mapped = base_mapped; - malloc_mutex_unlock(&base_mtx); + malloc_mutex_unlock(tsdn, &base_mtx); } bool base_boot(void) { - if (malloc_mutex_init(&base_mtx)) + if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE)) return (true); - extent_tree_szad_new(&base_avail_szad); + base_extent_sn_next = 0; + extent_tree_szsnad_new(&base_avail_szsnad); base_nodes = NULL; return (false); } void -base_prefork(void) +base_prefork(tsdn_t *tsdn) { - malloc_mutex_prefork(&base_mtx); + malloc_mutex_prefork(tsdn, &base_mtx); } void -base_postfork_parent(void) +base_postfork_parent(tsdn_t *tsdn) { - malloc_mutex_postfork_parent(&base_mtx); + malloc_mutex_postfork_parent(tsdn, &base_mtx); } void -base_postfork_child(void) +base_postfork_child(tsdn_t *tsdn) { - malloc_mutex_postfork_child(&base_mtx); + malloc_mutex_postfork_child(tsdn, &base_mtx); } diff --git a/deps/jemalloc/src/bitmap.c b/deps/jemalloc/src/bitmap.c index c733372b4..ac0f3b381 100644 --- a/deps/jemalloc/src/bitmap.c +++ b/deps/jemalloc/src/bitmap.c @@ -3,6 +3,8 @@ /******************************************************************************/ +#ifdef USE_TREE + void bitmap_info_init(bitmap_info_t *binfo, size_t nbits) { @@ -32,20 +34,11 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) binfo->nbits = nbits; } -size_t +static size_t bitmap_info_ngroups(const bitmap_info_t *binfo) { - return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); -} - -size_t -bitmap_size(size_t nbits) -{ - bitmap_info_t binfo; - - bitmap_info_init(&binfo, nbits); - return (bitmap_info_ngroups(&binfo)); + return (binfo->levels[binfo->nlevels].group_offset); } void @@ -61,8 +54,7 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) * correspond to the first logical bit in the group, so extra bits * are the most significant bits of the last group. */ - memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << - LG_SIZEOF_BITMAP); + memset(bitmap, 0xffU, bitmap_size(binfo)); extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; if (extra != 0) @@ -76,3 +68,44 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; } } + +#else /* USE_TREE */ + +void +bitmap_info_init(bitmap_info_t *binfo, size_t nbits) +{ + + assert(nbits > 0); + assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); + + binfo->ngroups = BITMAP_BITS2GROUPS(nbits); + binfo->nbits = nbits; +} + +static size_t +bitmap_info_ngroups(const bitmap_info_t *binfo) +{ + + return (binfo->ngroups); +} + +void +bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t extra; + + memset(bitmap, 0xffU, bitmap_size(binfo)); + extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) + & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->ngroups - 1] >>= extra; +} + +#endif /* USE_TREE */ + +size_t +bitmap_size(const bitmap_info_t *binfo) +{ + + return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP); +} diff --git a/deps/jemalloc/src/chunk.c b/deps/jemalloc/src/chunk.c index 6ba1ca7a5..c1c514a86 100644 --- a/deps/jemalloc/src/chunk.c +++ b/deps/jemalloc/src/chunk.c @@ -49,9 +49,10 @@ const chunk_hooks_t chunk_hooks_default = { * definition. */ -static void chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks, - extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, - void *chunk, size_t size, bool zeroed, bool committed); +static void chunk_record(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szsnad, + extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, size_t sn, + bool zeroed, bool committed); /******************************************************************************/ @@ -63,23 +64,23 @@ chunk_hooks_get_locked(arena_t *arena) } chunk_hooks_t -chunk_hooks_get(arena_t *arena) +chunk_hooks_get(tsdn_t *tsdn, arena_t *arena) { chunk_hooks_t chunk_hooks; - malloc_mutex_lock(&arena->chunks_mtx); + malloc_mutex_lock(tsdn, &arena->chunks_mtx); chunk_hooks = chunk_hooks_get_locked(arena); - malloc_mutex_unlock(&arena->chunks_mtx); + malloc_mutex_unlock(tsdn, &arena->chunks_mtx); return (chunk_hooks); } chunk_hooks_t -chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks) +chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks) { chunk_hooks_t old_chunk_hooks; - malloc_mutex_lock(&arena->chunks_mtx); + malloc_mutex_lock(tsdn, &arena->chunks_mtx); old_chunk_hooks = arena->chunk_hooks; /* * Copy each field atomically so that it is impossible for readers to @@ -104,14 +105,14 @@ chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks) ATOMIC_COPY_HOOK(split); ATOMIC_COPY_HOOK(merge); #undef ATOMIC_COPY_HOOK - malloc_mutex_unlock(&arena->chunks_mtx); + malloc_mutex_unlock(tsdn, &arena->chunks_mtx); return (old_chunk_hooks); } static void -chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks, - bool locked) +chunk_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, bool locked) { static const chunk_hooks_t uninitialized_hooks = CHUNK_HOOKS_INITIALIZER; @@ -119,27 +120,28 @@ chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks, if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) == 0) { *chunk_hooks = locked ? chunk_hooks_get_locked(arena) : - chunk_hooks_get(arena); + chunk_hooks_get(tsdn, arena); } } static void -chunk_hooks_assure_initialized_locked(arena_t *arena, +chunk_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks) { - chunk_hooks_assure_initialized_impl(arena, chunk_hooks, true); + chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, true); } static void -chunk_hooks_assure_initialized(arena_t *arena, chunk_hooks_t *chunk_hooks) +chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks) { - chunk_hooks_assure_initialized_impl(arena, chunk_hooks, false); + chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, false); } bool -chunk_register(const void *chunk, const extent_node_t *node) +chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node) { assert(extent_node_addr_get(node) == chunk); @@ -159,7 +161,7 @@ chunk_register(const void *chunk, const extent_node_t *node) high = atomic_read_z(&highchunks); } if (cur > high && prof_gdump_get_unlocked()) - prof_gdump(); + prof_gdump(tsdn); } return (false); @@ -181,33 +183,35 @@ chunk_deregister(const void *chunk, const extent_node_t *node) } /* - * Do first-best-fit chunk selection, i.e. select the lowest chunk that best - * fits. + * Do first-best-fit chunk selection, i.e. select the oldest/lowest chunk that + * best fits. */ static extent_node_t * -chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, size_t size) +chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szsnad, size_t size) { extent_node_t key; assert(size == CHUNK_CEILING(size)); - extent_node_init(&key, arena, NULL, size, false, false); - return (extent_tree_szad_nsearch(chunks_szad, &key)); + extent_node_init(&key, arena, NULL, size, 0, false, false); + return (extent_tree_szsnad_nsearch(chunks_szsnad, &key)); } static void * -chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks, - extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, - void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit, - bool dalloc_node) +chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, + extent_tree_t *chunks_szsnad, extent_tree_t *chunks_ad, bool cache, + void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero, + bool *commit, bool dalloc_node) { void *ret; extent_node_t *node; size_t alloc_size, leadsize, trailsize; bool zeroed, committed; + assert(CHUNK_CEILING(size) == size); + assert(alignment > 0); assert(new_addr == NULL || alignment == chunksize); + assert(CHUNK_ADDR2BASE(new_addr) == new_addr); /* * Cached chunks use the node linkage embedded in their headers, in * which case dalloc_node is true, and new_addr is non-NULL because @@ -215,24 +219,23 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks, */ assert(dalloc_node || new_addr != NULL); - alloc_size = CHUNK_CEILING(s2u(size + alignment - chunksize)); + alloc_size = size + CHUNK_CEILING(alignment) - chunksize; /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); - malloc_mutex_lock(&arena->chunks_mtx); - chunk_hooks_assure_initialized_locked(arena, chunk_hooks); + malloc_mutex_lock(tsdn, &arena->chunks_mtx); + chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks); if (new_addr != NULL) { extent_node_t key; - extent_node_init(&key, arena, new_addr, alloc_size, false, + extent_node_init(&key, arena, new_addr, alloc_size, 0, false, false); node = extent_tree_ad_search(chunks_ad, &key); } else { - node = chunk_first_best_fit(arena, chunks_szad, chunks_ad, - alloc_size); + node = chunk_first_best_fit(arena, chunks_szsnad, alloc_size); } if (node == NULL || (new_addr != NULL && extent_node_size_get(node) < size)) { - malloc_mutex_unlock(&arena->chunks_mtx); + malloc_mutex_unlock(tsdn, &arena->chunks_mtx); return (NULL); } leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node), @@ -241,6 +244,7 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks, assert(extent_node_size_get(node) >= leadsize + size); trailsize = extent_node_size_get(node) - leadsize - size; ret = (void *)((uintptr_t)extent_node_addr_get(node) + leadsize); + *sn = extent_node_sn_get(node); zeroed = extent_node_zeroed_get(node); if (zeroed) *zero = true; @@ -251,17 +255,17 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks, if (leadsize != 0 && chunk_hooks->split(extent_node_addr_get(node), extent_node_size_get(node), leadsize, size, false, arena->ind)) { - malloc_mutex_unlock(&arena->chunks_mtx); + malloc_mutex_unlock(tsdn, &arena->chunks_mtx); return (NULL); } /* Remove node from the tree. */ - extent_tree_szad_remove(chunks_szad, node); + extent_tree_szsnad_remove(chunks_szsnad, node); extent_tree_ad_remove(chunks_ad, node); arena_chunk_cache_maybe_remove(arena, node, cache); if (leadsize != 0) { /* Insert the leading space as a smaller chunk. */ extent_node_size_set(node, leadsize); - extent_tree_szad_insert(chunks_szad, node); + extent_tree_szsnad_insert(chunks_szsnad, node); extent_tree_ad_insert(chunks_ad, node); arena_chunk_cache_maybe_insert(arena, node, cache); node = NULL; @@ -271,41 +275,42 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks, if (chunk_hooks->split(ret, size + trailsize, size, trailsize, false, arena->ind)) { if (dalloc_node && node != NULL) - arena_node_dalloc(arena, node); - malloc_mutex_unlock(&arena->chunks_mtx); - chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, - cache, ret, size + trailsize, zeroed, committed); + arena_node_dalloc(tsdn, arena, node); + malloc_mutex_unlock(tsdn, &arena->chunks_mtx); + chunk_record(tsdn, arena, chunk_hooks, chunks_szsnad, + chunks_ad, cache, ret, size + trailsize, *sn, + zeroed, committed); return (NULL); } /* Insert the trailing space as a smaller chunk. */ if (node == NULL) { - node = arena_node_alloc(arena); + node = arena_node_alloc(tsdn, arena); if (node == NULL) { - malloc_mutex_unlock(&arena->chunks_mtx); - chunk_record(arena, chunk_hooks, chunks_szad, - chunks_ad, cache, ret, size + trailsize, - zeroed, committed); + malloc_mutex_unlock(tsdn, &arena->chunks_mtx); + chunk_record(tsdn, arena, chunk_hooks, + chunks_szsnad, chunks_ad, cache, ret, size + + trailsize, *sn, zeroed, committed); return (NULL); } } extent_node_init(node, arena, (void *)((uintptr_t)(ret) + size), - trailsize, zeroed, committed); - extent_tree_szad_insert(chunks_szad, node); + trailsize, *sn, zeroed, committed); + extent_tree_szsnad_insert(chunks_szsnad, node); extent_tree_ad_insert(chunks_ad, node); arena_chunk_cache_maybe_insert(arena, node, cache); node = NULL; } if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) { - malloc_mutex_unlock(&arena->chunks_mtx); - chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, cache, - ret, size, zeroed, committed); + malloc_mutex_unlock(tsdn, &arena->chunks_mtx); + chunk_record(tsdn, arena, chunk_hooks, chunks_szsnad, chunks_ad, + cache, ret, size, *sn, zeroed, committed); return (NULL); } - malloc_mutex_unlock(&arena->chunks_mtx); + malloc_mutex_unlock(tsdn, &arena->chunks_mtx); assert(dalloc_node || node != NULL); if (dalloc_node && node != NULL) - arena_node_dalloc(arena, node); + arena_node_dalloc(tsdn, arena, node); if (*zero) { if (!zeroed) memset(ret, 0, size); @@ -313,10 +318,11 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks, size_t i; size_t *p = (size_t *)(uintptr_t)ret; - JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); } + if (config_valgrind) + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size); } return (ret); } @@ -328,39 +334,29 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks, * them if they are returned. */ static void * -chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, - bool *zero, bool *commit, dss_prec_t dss_prec) +chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, + size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec) { void *ret; - chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; assert(size != 0); assert((size & chunksize_mask) == 0); assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - /* Retained. */ - if ((ret = chunk_recycle(arena, &chunk_hooks, - &arena->chunks_szad_retained, &arena->chunks_ad_retained, false, - new_addr, size, alignment, zero, commit, true)) != NULL) - return (ret); - /* "primary" dss. */ if (have_dss && dss_prec == dss_prec_primary && (ret = - chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) != - NULL) - return (ret); - /* - * mmap. Requesting an address is not implemented for - * chunk_alloc_mmap(), so only call it if (new_addr == NULL). - */ - if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero, + chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero, commit)) != NULL) return (ret); + /* mmap. */ + if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) != + NULL) + return (ret); /* "secondary" dss. */ if (have_dss && dss_prec == dss_prec_secondary && (ret = - chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) != - NULL) + chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero, + commit)) != NULL) return (ret); /* All strategies for allocation failed. */ @@ -380,7 +376,7 @@ chunk_alloc_base(size_t size) */ zero = true; commit = true; - ret = chunk_alloc_mmap(size, chunksize, &zero, &commit); + ret = chunk_alloc_mmap(NULL, size, chunksize, &zero, &commit); if (ret == NULL) return (NULL); if (config_valgrind) @@ -390,37 +386,33 @@ chunk_alloc_base(size_t size) } void * -chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, - size_t size, size_t alignment, bool *zero, bool dalloc_node) +chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, + void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero, + bool *commit, bool dalloc_node) { void *ret; - bool commit; assert(size != 0); assert((size & chunksize_mask) == 0); assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - commit = true; - ret = chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_cached, - &arena->chunks_ad_cached, true, new_addr, size, alignment, zero, - &commit, dalloc_node); + ret = chunk_recycle(tsdn, arena, chunk_hooks, + &arena->chunks_szsnad_cached, &arena->chunks_ad_cached, true, + new_addr, size, alignment, sn, zero, commit, dalloc_node); if (ret == NULL) return (NULL); - assert(commit); if (config_valgrind) JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); return (ret); } static arena_t * -chunk_arena_get(unsigned arena_ind) +chunk_arena_get(tsdn_t *tsdn, unsigned arena_ind) { arena_t *arena; - /* Dodge tsd for a0 in order to avoid bootstrapping issues. */ - arena = (arena_ind == 0) ? a0get() : arena_get(tsd_fetch(), arena_ind, - false, true); + arena = arena_get(tsdn, arena_ind, false); /* * The arena we're allocating on behalf of must have been initialized * already. @@ -430,14 +422,12 @@ chunk_arena_get(unsigned arena_ind) } static void * -chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, - bool *commit, unsigned arena_ind) +chunk_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit) { void *ret; - arena_t *arena; - arena = chunk_arena_get(arena_ind); - ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, + ret = chunk_alloc_core(tsdn, arena, new_addr, size, alignment, zero, commit, arena->dss_prec); if (ret == NULL) return (NULL); @@ -447,26 +437,80 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, return (ret); } -void * -chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, - size_t size, size_t alignment, bool *zero, bool *commit) +static void * +chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, + bool *commit, unsigned arena_ind) +{ + tsdn_t *tsdn; + arena_t *arena; + + tsdn = tsdn_fetch(); + arena = chunk_arena_get(tsdn, arena_ind); + + return (chunk_alloc_default_impl(tsdn, arena, new_addr, size, alignment, + zero, commit)); +} + +static void * +chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, + void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero, + bool *commit) { void *ret; - chunk_hooks_assure_initialized(arena, chunk_hooks); - ret = chunk_hooks->alloc(new_addr, size, alignment, zero, commit, - arena->ind); - if (ret == NULL) - return (NULL); - if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default) - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize); + assert(size != 0); + assert((size & chunksize_mask) == 0); + assert(alignment != 0); + assert((alignment & chunksize_mask) == 0); + + ret = chunk_recycle(tsdn, arena, chunk_hooks, + &arena->chunks_szsnad_retained, &arena->chunks_ad_retained, false, + new_addr, size, alignment, sn, zero, commit, true); + + if (config_stats && ret != NULL) + arena->stats.retained -= size; + + return (ret); +} + +void * +chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, + void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero, + bool *commit) +{ + void *ret; + + chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks); + + ret = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size, + alignment, sn, zero, commit); + if (ret == NULL) { + if (chunk_hooks->alloc == chunk_alloc_default) { + /* Call directly to propagate tsdn. */ + ret = chunk_alloc_default_impl(tsdn, arena, new_addr, + size, alignment, zero, commit); + } else { + ret = chunk_hooks->alloc(new_addr, size, alignment, + zero, commit, arena->ind); + } + + if (ret == NULL) + return (NULL); + + *sn = arena_extent_sn_next(arena); + + if (config_valgrind && chunk_hooks->alloc != + chunk_alloc_default) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize); + } + return (ret); } static void -chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks, - extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, - void *chunk, size_t size, bool zeroed, bool committed) +chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, + extent_tree_t *chunks_szsnad, extent_tree_t *chunks_ad, bool cache, + void *chunk, size_t size, size_t sn, bool zeroed, bool committed) { bool unzeroed; extent_node_t *node, *prev; @@ -476,9 +520,9 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks, unzeroed = cache || !zeroed; JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); - malloc_mutex_lock(&arena->chunks_mtx); - chunk_hooks_assure_initialized_locked(arena, chunk_hooks); - extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, + malloc_mutex_lock(tsdn, &arena->chunks_mtx); + chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks); + extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, 0, false, false); node = extent_tree_ad_nsearch(chunks_ad, &key); /* Try to coalesce forward. */ @@ -490,19 +534,21 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks, /* * Coalesce chunk with the following address range. This does * not change the position within chunks_ad, so only - * remove/insert from/into chunks_szad. + * remove/insert from/into chunks_szsnad. */ - extent_tree_szad_remove(chunks_szad, node); + extent_tree_szsnad_remove(chunks_szsnad, node); arena_chunk_cache_maybe_remove(arena, node, cache); extent_node_addr_set(node, chunk); extent_node_size_set(node, size + extent_node_size_get(node)); + if (sn < extent_node_sn_get(node)) + extent_node_sn_set(node, sn); extent_node_zeroed_set(node, extent_node_zeroed_get(node) && !unzeroed); - extent_tree_szad_insert(chunks_szad, node); + extent_tree_szsnad_insert(chunks_szsnad, node); arena_chunk_cache_maybe_insert(arena, node, cache); } else { /* Coalescing forward failed, so insert a new node. */ - node = arena_node_alloc(arena); + node = arena_node_alloc(tsdn, arena); if (node == NULL) { /* * Node allocation failed, which is an exceedingly @@ -511,15 +557,15 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks, * a virtual memory leak. */ if (cache) { - chunk_purge_wrapper(arena, chunk_hooks, chunk, - size, 0, size); + chunk_purge_wrapper(tsdn, arena, chunk_hooks, + chunk, size, 0, size); } goto label_return; } - extent_node_init(node, arena, chunk, size, !unzeroed, + extent_node_init(node, arena, chunk, size, sn, !unzeroed, committed); extent_tree_ad_insert(chunks_ad, node); - extent_tree_szad_insert(chunks_szad, node); + extent_tree_szsnad_insert(chunks_szsnad, node); arena_chunk_cache_maybe_insert(arena, node, cache); } @@ -533,31 +579,33 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks, /* * Coalesce chunk with the previous address range. This does * not change the position within chunks_ad, so only - * remove/insert node from/into chunks_szad. + * remove/insert node from/into chunks_szsnad. */ - extent_tree_szad_remove(chunks_szad, prev); + extent_tree_szsnad_remove(chunks_szsnad, prev); extent_tree_ad_remove(chunks_ad, prev); arena_chunk_cache_maybe_remove(arena, prev, cache); - extent_tree_szad_remove(chunks_szad, node); + extent_tree_szsnad_remove(chunks_szsnad, node); arena_chunk_cache_maybe_remove(arena, node, cache); extent_node_addr_set(node, extent_node_addr_get(prev)); extent_node_size_set(node, extent_node_size_get(prev) + extent_node_size_get(node)); + if (extent_node_sn_get(prev) < extent_node_sn_get(node)) + extent_node_sn_set(node, extent_node_sn_get(prev)); extent_node_zeroed_set(node, extent_node_zeroed_get(prev) && extent_node_zeroed_get(node)); - extent_tree_szad_insert(chunks_szad, node); + extent_tree_szsnad_insert(chunks_szsnad, node); arena_chunk_cache_maybe_insert(arena, node, cache); - arena_node_dalloc(arena, prev); + arena_node_dalloc(tsdn, arena, prev); } label_return: - malloc_mutex_unlock(&arena->chunks_mtx); + malloc_mutex_unlock(tsdn, &arena->chunks_mtx); } void -chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, - size_t size, bool committed) +chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, size_t sn, bool committed) { assert(chunk != NULL); @@ -565,24 +613,49 @@ chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, assert(size != 0); assert((size & chunksize_mask) == 0); - chunk_record(arena, chunk_hooks, &arena->chunks_szad_cached, - &arena->chunks_ad_cached, true, chunk, size, false, committed); - arena_maybe_purge(arena); + chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szsnad_cached, + &arena->chunks_ad_cached, true, chunk, size, sn, false, + committed); + arena_maybe_purge(tsdn, arena); +} + +static bool +chunk_dalloc_default_impl(void *chunk, size_t size) +{ + + if (!have_dss || !chunk_in_dss(chunk)) + return (chunk_dalloc_mmap(chunk, size)); + return (true); +} + +static bool +chunk_dalloc_default(void *chunk, size_t size, bool committed, + unsigned arena_ind) +{ + + return (chunk_dalloc_default_impl(chunk, size)); } void -chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, - size_t size, bool zeroed, bool committed) +chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, size_t sn, bool zeroed, bool committed) { + bool err; assert(chunk != NULL); assert(CHUNK_ADDR2BASE(chunk) == chunk); assert(size != 0); assert((size & chunksize_mask) == 0); - chunk_hooks_assure_initialized(arena, chunk_hooks); + chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks); /* Try to deallocate. */ - if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind)) + if (chunk_hooks->dalloc == chunk_dalloc_default) { + /* Call directly to propagate tsdn. */ + err = chunk_dalloc_default_impl(chunk, size); + } else + err = chunk_hooks->dalloc(chunk, size, committed, arena->ind); + + if (!err) return; /* Try to decommit; purge if that fails. */ if (committed) { @@ -591,29 +664,12 @@ chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, } zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size, arena->ind); - chunk_record(arena, chunk_hooks, &arena->chunks_szad_retained, - &arena->chunks_ad_retained, false, chunk, size, zeroed, committed); -} + chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szsnad_retained, + &arena->chunks_ad_retained, false, chunk, size, sn, zeroed, + committed); -static bool -chunk_dalloc_default(void *chunk, size_t size, bool committed, - unsigned arena_ind) -{ - - if (!have_dss || !chunk_in_dss(chunk)) - return (chunk_dalloc_mmap(chunk, size)); - return (true); -} - -void -chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, - size_t size, bool committed) -{ - - chunk_hooks_assure_initialized(arena, chunk_hooks); - chunk_hooks->dalloc(chunk, size, committed, arena->ind); - if (config_valgrind && chunk_hooks->dalloc != chunk_dalloc_default) - JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); + if (config_stats) + arena->stats.retained += size; } static bool @@ -634,8 +690,9 @@ chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length, length)); } -bool -chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length) +static bool +chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length, + unsigned arena_ind) { assert(chunk != NULL); @@ -648,21 +705,12 @@ chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length) length)); } -static bool -chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length, - unsigned arena_ind) -{ - - return (chunk_purge_arena(chunk_arena_get(arena_ind), chunk, offset, - length)); -} - bool -chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, - size_t size, size_t offset, size_t length) +chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, size_t offset, size_t length) { - chunk_hooks_assure_initialized(arena, chunk_hooks); + chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks); return (chunk_hooks->purge(chunk, size, offset, length, arena->ind)); } @@ -677,23 +725,30 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b, } static bool -chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, - bool committed, unsigned arena_ind) +chunk_merge_default_impl(void *chunk_a, void *chunk_b) { if (!maps_coalesce) return (true); - if (have_dss && chunk_in_dss(chunk_a) != chunk_in_dss(chunk_b)) + if (have_dss && !chunk_dss_mergeable(chunk_a, chunk_b)) return (true); return (false); } +static bool +chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, + bool committed, unsigned arena_ind) +{ + + return (chunk_merge_default_impl(chunk_a, chunk_b)); +} + static rtree_node_elm_t * chunks_rtree_node_alloc(size_t nelms) { - return ((rtree_node_elm_t *)base_alloc(nelms * + return ((rtree_node_elm_t *)base_alloc(TSDN_NULL, nelms * sizeof(rtree_node_elm_t))); } @@ -716,7 +771,7 @@ chunk_boot(void) * so pages_map will always take fast path. */ if (!opt_lg_chunk) { - opt_lg_chunk = jemalloc_ffs((int)info.dwAllocationGranularity) + opt_lg_chunk = ffs_u((unsigned)info.dwAllocationGranularity) - 1; } #else @@ -730,32 +785,11 @@ chunk_boot(void) chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> LG_PAGE); - if (have_dss && chunk_dss_boot()) - return (true); - if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk, chunks_rtree_node_alloc, NULL)) + if (have_dss) + chunk_dss_boot(); + if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk), chunks_rtree_node_alloc, NULL)) return (true); return (false); } - -void -chunk_prefork(void) -{ - - chunk_dss_prefork(); -} - -void -chunk_postfork_parent(void) -{ - - chunk_dss_postfork_parent(); -} - -void -chunk_postfork_child(void) -{ - - chunk_dss_postfork_child(); -} diff --git a/deps/jemalloc/src/chunk_dss.c b/deps/jemalloc/src/chunk_dss.c index 61fc91696..ee3f83888 100644 --- a/deps/jemalloc/src/chunk_dss.c +++ b/deps/jemalloc/src/chunk_dss.c @@ -10,20 +10,19 @@ const char *dss_prec_names[] = { "N/A" }; -/* Current dss precedence default, used when creating new arenas. */ -static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT; - /* - * Protects sbrk() calls. This avoids malloc races among threads, though it - * does not protect against races with threads that call sbrk() directly. + * Current dss precedence default, used when creating new arenas. NB: This is + * stored as unsigned rather than dss_prec_t because in principle there's no + * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use + * atomic operations to synchronize the setting. */ -static malloc_mutex_t dss_mtx; +static unsigned dss_prec_default = (unsigned)DSS_PREC_DEFAULT; /* Base address of the DSS. */ static void *dss_base; -/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ -static void *dss_prev; -/* Current upper limit on DSS addresses. */ +/* Atomic boolean indicating whether the DSS is exhausted. */ +static unsigned dss_exhausted; +/* Atomic current upper limit on DSS addresses. */ static void *dss_max; /******************************************************************************/ @@ -47,9 +46,7 @@ chunk_dss_prec_get(void) if (!have_dss) return (dss_prec_disabled); - malloc_mutex_lock(&dss_mtx); - ret = dss_prec_default; - malloc_mutex_unlock(&dss_mtx); + ret = (dss_prec_t)atomic_read_u(&dss_prec_default); return (ret); } @@ -59,15 +56,46 @@ chunk_dss_prec_set(dss_prec_t dss_prec) if (!have_dss) return (dss_prec != dss_prec_disabled); - malloc_mutex_lock(&dss_mtx); - dss_prec_default = dss_prec; - malloc_mutex_unlock(&dss_mtx); + atomic_write_u(&dss_prec_default, (unsigned)dss_prec); return (false); } +static void * +chunk_dss_max_update(void *new_addr) +{ + void *max_cur; + spin_t spinner; + + /* + * Get the current end of the DSS as max_cur and assure that dss_max is + * up to date. + */ + spin_init(&spinner); + while (true) { + void *max_prev = atomic_read_p(&dss_max); + + max_cur = chunk_dss_sbrk(0); + if ((uintptr_t)max_prev > (uintptr_t)max_cur) { + /* + * Another thread optimistically updated dss_max. Wait + * for it to finish. + */ + spin_adaptive(&spinner); + continue; + } + if (!atomic_cas_p(&dss_max, max_prev, max_cur)) + break; + } + /* Fixed new_addr can only be supported if it is at the edge of DSS. */ + if (new_addr != NULL && max_cur != new_addr) + return (NULL); + + return (max_cur); +} + void * -chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, - bool *zero, bool *commit) +chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, + size_t alignment, bool *zero, bool *commit) { cassert(have_dss); assert(size > 0 && (size & chunksize_mask) == 0); @@ -80,28 +108,20 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, if ((intptr_t)size < 0) return (NULL); - malloc_mutex_lock(&dss_mtx); - if (dss_prev != (void *)-1) { - + if (!atomic_read_u(&dss_exhausted)) { /* * The loop is necessary to recover from races with other * threads that are using the DSS for something other than * malloc. */ - do { - void *ret, *cpad, *dss_next; + while (true) { + void *ret, *cpad, *max_cur, *dss_next, *dss_prev; size_t gap_size, cpad_size; intptr_t incr; - /* Avoid an unnecessary system call. */ - if (new_addr != NULL && dss_max != new_addr) - break; - /* Get the current end of the DSS. */ - dss_max = chunk_dss_sbrk(0); - - /* Make sure the earlier condition still holds. */ - if (new_addr != NULL && dss_max != new_addr) - break; + max_cur = chunk_dss_max_update(new_addr); + if (max_cur == NULL) + goto label_oom; /* * Calculate how much padding is necessary to @@ -120,22 +140,29 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, cpad_size = (uintptr_t)ret - (uintptr_t)cpad; dss_next = (void *)((uintptr_t)ret + size); if ((uintptr_t)ret < (uintptr_t)dss_max || - (uintptr_t)dss_next < (uintptr_t)dss_max) { - /* Wrap-around. */ - malloc_mutex_unlock(&dss_mtx); - return (NULL); - } + (uintptr_t)dss_next < (uintptr_t)dss_max) + goto label_oom; /* Wrap-around. */ incr = gap_size + cpad_size + size; + + /* + * Optimistically update dss_max, and roll back below if + * sbrk() fails. No other thread will try to extend the + * DSS while dss_max is greater than the current DSS + * max reported by sbrk(0). + */ + if (atomic_cas_p(&dss_max, max_cur, dss_next)) + continue; + + /* Try to allocate. */ dss_prev = chunk_dss_sbrk(incr); - if (dss_prev == dss_max) { + if (dss_prev == max_cur) { /* Success. */ - dss_max = dss_next; - malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; - chunk_dalloc_wrapper(arena, + chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, cpad, cpad_size, + arena_extent_sn_next(arena), false, true); } if (*zero) { @@ -147,68 +174,65 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, *commit = pages_decommit(ret, size); return (ret); } - } while (dss_prev != (void *)-1); - } - malloc_mutex_unlock(&dss_mtx); + /* + * Failure, whether due to OOM or a race with a raw + * sbrk() call from outside the allocator. Try to roll + * back optimistic dss_max update; if rollback fails, + * it's due to another caller of this function having + * succeeded since this invocation started, in which + * case rollback is not necessary. + */ + atomic_cas_p(&dss_max, dss_next, max_cur); + if (dss_prev == (void *)-1) { + /* OOM. */ + atomic_write_u(&dss_exhausted, (unsigned)true); + goto label_oom; + } + } + } +label_oom: return (NULL); } +static bool +chunk_in_dss_helper(void *chunk, void *max) +{ + + return ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk < + (uintptr_t)max); +} + bool chunk_in_dss(void *chunk) { - bool ret; cassert(have_dss); - malloc_mutex_lock(&dss_mtx); - if ((uintptr_t)chunk >= (uintptr_t)dss_base - && (uintptr_t)chunk < (uintptr_t)dss_max) - ret = true; - else - ret = false; - malloc_mutex_unlock(&dss_mtx); - - return (ret); + return (chunk_in_dss_helper(chunk, atomic_read_p(&dss_max))); } bool +chunk_dss_mergeable(void *chunk_a, void *chunk_b) +{ + void *max; + + cassert(have_dss); + + max = atomic_read_p(&dss_max); + return (chunk_in_dss_helper(chunk_a, max) == + chunk_in_dss_helper(chunk_b, max)); +} + +void chunk_dss_boot(void) { cassert(have_dss); - if (malloc_mutex_init(&dss_mtx)) - return (true); dss_base = chunk_dss_sbrk(0); - dss_prev = dss_base; + dss_exhausted = (unsigned)(dss_base == (void *)-1); dss_max = dss_base; - - return (false); -} - -void -chunk_dss_prefork(void) -{ - - if (have_dss) - malloc_mutex_prefork(&dss_mtx); -} - -void -chunk_dss_postfork_parent(void) -{ - - if (have_dss) - malloc_mutex_postfork_parent(&dss_mtx); -} - -void -chunk_dss_postfork_child(void) -{ - - if (have_dss) - malloc_mutex_postfork_child(&dss_mtx); } /******************************************************************************/ diff --git a/deps/jemalloc/src/chunk_mmap.c b/deps/jemalloc/src/chunk_mmap.c index b9ba74191..73fc497af 100644 --- a/deps/jemalloc/src/chunk_mmap.c +++ b/deps/jemalloc/src/chunk_mmap.c @@ -16,23 +16,22 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit) do { void *pages; size_t leadsize; - pages = pages_map(NULL, alloc_size); + pages = pages_map(NULL, alloc_size, commit); if (pages == NULL) return (NULL); leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - (uintptr_t)pages; - ret = pages_trim(pages, alloc_size, leadsize, size); + ret = pages_trim(pages, alloc_size, leadsize, size, commit); } while (ret == NULL); assert(ret != NULL); *zero = true; - if (!*commit) - *commit = pages_decommit(ret, size); return (ret); } void * -chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit) +chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero, + bool *commit) { void *ret; size_t offset; @@ -53,9 +52,10 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit) assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - ret = pages_map(NULL, size); - if (ret == NULL) - return (NULL); + ret = pages_map(new_addr, size, commit); + if (ret == NULL || ret == new_addr) + return (ret); + assert(new_addr == NULL); offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { pages_unmap(ret, size); @@ -64,8 +64,6 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit) assert(ret != NULL); *zero = true; - if (!*commit) - *commit = pages_decommit(ret, size); return (ret); } diff --git a/deps/jemalloc/src/ckh.c b/deps/jemalloc/src/ckh.c index 53a1c1ef1..159bd8ae1 100644 --- a/deps/jemalloc/src/ckh.c +++ b/deps/jemalloc/src/ckh.c @@ -99,7 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * Cycle through the cells in the bucket, starting at a random position. * The randomness avoids worst-case search overhead as buckets fill up. */ - prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + offset = (unsigned)prng_lg_range_u64(&ckh->prng_state, + LG_CKH_BUCKET_CELLS); for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; @@ -141,7 +142,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, * were an item for which both hashes indicated the same * bucket. */ - prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + i = (unsigned)prng_lg_range_u64(&ckh->prng_state, + LG_CKH_BUCKET_CELLS); cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; assert(cell->key != NULL); @@ -247,8 +249,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) { bool ret; ckhc_t *tab, *ttab; - size_t lg_curcells; - unsigned lg_prevbuckets; + unsigned lg_prevbuckets, lg_curcells; #ifdef CKH_COUNT ckh->ngrows++; @@ -266,12 +267,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) lg_curcells++; usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (usize == 0) { + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { ret = true; goto label_return; } - tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, - true, NULL); + tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, + true, NULL, true, arena_ichoose(tsd, NULL)); if (tab == NULL) { ret = true; goto label_return; @@ -283,12 +284,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsd, tab, tcache_get(tsd, false), true); + idalloctm(tsd_tsdn(tsd), tab, NULL, true, true); break; } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); + idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; } @@ -302,8 +303,8 @@ static void ckh_shrink(tsd_t *tsd, ckh_t *ckh) { ckhc_t *tab, *ttab; - size_t lg_curcells, usize; - unsigned lg_prevbuckets; + size_t usize; + unsigned lg_prevbuckets, lg_curcells; /* * It is possible (though unlikely, given well behaved hashes) that the @@ -312,10 +313,10 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return; - tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, - NULL); + tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL, + true, arena_ichoose(tsd, NULL)); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -330,7 +331,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsd, tab, tcache_get(tsd, false), true); + idalloctm(tsd_tsdn(tsd), tab, NULL, true, true); #ifdef CKH_COUNT ckh->nshrinks++; #endif @@ -338,7 +339,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); + idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; #ifdef CKH_COUNT @@ -387,12 +388,12 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh->keycomp = keycomp; usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); - if (usize == 0) { + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { ret = true; goto label_return; } - ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, - NULL); + ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, + NULL, true, arena_ichoose(tsd, NULL)); if (ckh->tab == NULL) { ret = true; goto label_return; @@ -421,9 +422,9 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) (unsigned long long)ckh->nrelocs); #endif - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); + idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); if (config_debug) - memset(ckh, 0x5a, sizeof(ckh_t)); + memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t)); } size_t diff --git a/deps/jemalloc/src/ctl.c b/deps/jemalloc/src/ctl.c index 3de8e602d..bc78b2055 100644 --- a/deps/jemalloc/src/ctl.c +++ b/deps/jemalloc/src/ctl.c @@ -24,7 +24,7 @@ ctl_named_node(const ctl_node_t *node) } JEMALLOC_INLINE_C const ctl_named_node_t * -ctl_named_children(const ctl_named_node_t *node, int index) +ctl_named_children(const ctl_named_node_t *node, size_t index) { const ctl_named_node_t *children = ctl_named_node(node->children); @@ -42,25 +42,25 @@ ctl_indexed_node(const ctl_node_t *node) /* Function prototypes for non-inline static functions. */ #define CTL_PROTO(n) \ -static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen); +static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \ + void *oldp, size_t *oldlenp, void *newp, size_t newlen); #define INDEX_PROTO(n) \ -static const ctl_named_node_t *n##_index(const size_t *mib, \ - size_t miblen, size_t i); +static const ctl_named_node_t *n##_index(tsdn_t *tsdn, \ + const size_t *mib, size_t miblen, size_t i); static bool ctl_arena_init(ctl_arena_stats_t *astats); static void ctl_arena_clear(ctl_arena_stats_t *astats); -static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, +static void ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena); static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats); -static void ctl_arena_refresh(arena_t *arena, unsigned i); -static bool ctl_grow(void); -static void ctl_refresh(void); -static bool ctl_init(void); -static int ctl_lookup(const char *name, ctl_node_t const **nodesp, - size_t *mibp, size_t *depthp); +static void ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i); +static bool ctl_grow(tsdn_t *tsdn); +static void ctl_refresh(tsdn_t *tsdn); +static bool ctl_init(tsdn_t *tsdn); +static int ctl_lookup(tsdn_t *tsdn, const char *name, + ctl_node_t const **nodesp, size_t *mibp, size_t *depthp); CTL_PROTO(version) CTL_PROTO(epoch) @@ -77,6 +77,7 @@ CTL_PROTO(config_cache_oblivious) CTL_PROTO(config_debug) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_malloc_conf) CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) @@ -91,7 +92,9 @@ CTL_PROTO(opt_abort) CTL_PROTO(opt_dss) CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) +CTL_PROTO(opt_purge) CTL_PROTO(opt_lg_dirty_mult) +CTL_PROTO(opt_decay_time) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) @@ -114,10 +117,13 @@ CTL_PROTO(opt_prof_accum) CTL_PROTO(tcache_create) CTL_PROTO(tcache_flush) CTL_PROTO(tcache_destroy) +static void arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all); CTL_PROTO(arena_i_purge) -static void arena_purge(unsigned arena_ind); +CTL_PROTO(arena_i_decay) +CTL_PROTO(arena_i_reset) CTL_PROTO(arena_i_dss) CTL_PROTO(arena_i_lg_dirty_mult) +CTL_PROTO(arena_i_decay_time) CTL_PROTO(arena_i_chunk_hooks) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) @@ -131,6 +137,7 @@ INDEX_PROTO(arenas_hchunk_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) CTL_PROTO(arenas_lg_dirty_mult) +CTL_PROTO(arenas_decay_time) CTL_PROTO(arenas_quantum) CTL_PROTO(arenas_page) CTL_PROTO(arenas_tcache_max) @@ -181,9 +188,11 @@ INDEX_PROTO(stats_arenas_i_hchunks_j) CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_dss) CTL_PROTO(stats_arenas_i_lg_dirty_mult) +CTL_PROTO(stats_arenas_i_decay_time) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_mapped) +CTL_PROTO(stats_arenas_i_retained) CTL_PROTO(stats_arenas_i_npurge) CTL_PROTO(stats_arenas_i_nmadvise) CTL_PROTO(stats_arenas_i_purged) @@ -196,6 +205,7 @@ CTL_PROTO(stats_active) CTL_PROTO(stats_metadata) CTL_PROTO(stats_resident) CTL_PROTO(stats_mapped) +CTL_PROTO(stats_retained) /******************************************************************************/ /* mallctl tree. */ @@ -241,6 +251,7 @@ static const ctl_named_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("malloc_conf"), CTL(config_malloc_conf)}, {NAME("munmap"), CTL(config_munmap)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, @@ -258,7 +269,9 @@ static const ctl_named_node_t opt_node[] = { {NAME("dss"), CTL(opt_dss)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, + {NAME("purge"), CTL(opt_purge)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("decay_time"), CTL(opt_decay_time)}, {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)}, @@ -288,8 +301,11 @@ static const ctl_named_node_t tcache_node[] = { static const ctl_named_node_t arena_i_node[] = { {NAME("purge"), CTL(arena_i_purge)}, + {NAME("decay"), CTL(arena_i_decay)}, + {NAME("reset"), CTL(arena_i_reset)}, {NAME("dss"), CTL(arena_i_dss)}, {NAME("lg_dirty_mult"), CTL(arena_i_lg_dirty_mult)}, + {NAME("decay_time"), CTL(arena_i_decay_time)}, {NAME("chunk_hooks"), CTL(arena_i_chunk_hooks)} }; static const ctl_named_node_t super_arena_i_node[] = { @@ -339,6 +355,7 @@ static const ctl_named_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("lg_dirty_mult"), CTL(arenas_lg_dirty_mult)}, + {NAME("decay_time"), CTL(arenas_decay_time)}, {NAME("quantum"), CTL(arenas_quantum)}, {NAME("page"), CTL(arenas_page)}, {NAME("tcache_max"), CTL(arenas_tcache_max)}, @@ -439,9 +456,11 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("dss"), CTL(stats_arenas_i_dss)}, {NAME("lg_dirty_mult"), CTL(stats_arenas_i_lg_dirty_mult)}, + {NAME("decay_time"), CTL(stats_arenas_i_decay_time)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, + {NAME("retained"), CTL(stats_arenas_i_retained)}, {NAME("npurge"), CTL(stats_arenas_i_npurge)}, {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, {NAME("purged"), CTL(stats_arenas_i_purged)}, @@ -468,6 +487,7 @@ static const ctl_named_node_t stats_node[] = { {NAME("metadata"), CTL(stats_metadata)}, {NAME("resident"), CTL(stats_resident)}, {NAME("mapped"), CTL(stats_mapped)}, + {NAME("retained"), CTL(stats_retained)}, {NAME("arenas"), CHILD(indexed, stats_arenas)} }; @@ -519,8 +539,10 @@ static void ctl_arena_clear(ctl_arena_stats_t *astats) { + astats->nthreads = 0; astats->dss = dss_prec_names[dss_prec_limit]; astats->lg_dirty_mult = -1; + astats->decay_time = -1; astats->pactive = 0; astats->pdirty = 0; if (config_stats) { @@ -538,20 +560,27 @@ ctl_arena_clear(ctl_arena_stats_t *astats) } static void -ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) +ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena) { unsigned i; - arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult, - &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats, - cstats->lstats, cstats->hstats); + if (config_stats) { + arena_stats_merge(tsdn, arena, &cstats->nthreads, &cstats->dss, + &cstats->lg_dirty_mult, &cstats->decay_time, + &cstats->pactive, &cstats->pdirty, &cstats->astats, + cstats->bstats, cstats->lstats, cstats->hstats); - for (i = 0; i < NBINS; i++) { - cstats->allocated_small += cstats->bstats[i].curregs * - index2size(i); - cstats->nmalloc_small += cstats->bstats[i].nmalloc; - cstats->ndalloc_small += cstats->bstats[i].ndalloc; - cstats->nrequests_small += cstats->bstats[i].nrequests; + for (i = 0; i < NBINS; i++) { + cstats->allocated_small += cstats->bstats[i].curregs * + index2size(i); + cstats->nmalloc_small += cstats->bstats[i].nmalloc; + cstats->ndalloc_small += cstats->bstats[i].ndalloc; + cstats->nrequests_small += cstats->bstats[i].nrequests; + } + } else { + arena_basic_stats_merge(tsdn, arena, &cstats->nthreads, + &cstats->dss, &cstats->lg_dirty_mult, &cstats->decay_time, + &cstats->pactive, &cstats->pdirty); } } @@ -560,89 +589,91 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) { unsigned i; + sstats->nthreads += astats->nthreads; sstats->pactive += astats->pactive; sstats->pdirty += astats->pdirty; - sstats->astats.mapped += astats->astats.mapped; - sstats->astats.npurge += astats->astats.npurge; - sstats->astats.nmadvise += astats->astats.nmadvise; - sstats->astats.purged += astats->astats.purged; + if (config_stats) { + sstats->astats.mapped += astats->astats.mapped; + sstats->astats.retained += astats->astats.retained; + sstats->astats.npurge += astats->astats.npurge; + sstats->astats.nmadvise += astats->astats.nmadvise; + sstats->astats.purged += astats->astats.purged; - sstats->astats.metadata_mapped += astats->astats.metadata_mapped; - sstats->astats.metadata_allocated += astats->astats.metadata_allocated; + sstats->astats.metadata_mapped += + astats->astats.metadata_mapped; + sstats->astats.metadata_allocated += + astats->astats.metadata_allocated; - sstats->allocated_small += astats->allocated_small; - sstats->nmalloc_small += astats->nmalloc_small; - sstats->ndalloc_small += astats->ndalloc_small; - sstats->nrequests_small += astats->nrequests_small; + sstats->allocated_small += astats->allocated_small; + sstats->nmalloc_small += astats->nmalloc_small; + sstats->ndalloc_small += astats->ndalloc_small; + sstats->nrequests_small += astats->nrequests_small; - sstats->astats.allocated_large += astats->astats.allocated_large; - sstats->astats.nmalloc_large += astats->astats.nmalloc_large; - sstats->astats.ndalloc_large += astats->astats.ndalloc_large; - sstats->astats.nrequests_large += astats->astats.nrequests_large; + sstats->astats.allocated_large += + astats->astats.allocated_large; + sstats->astats.nmalloc_large += astats->astats.nmalloc_large; + sstats->astats.ndalloc_large += astats->astats.ndalloc_large; + sstats->astats.nrequests_large += + astats->astats.nrequests_large; - sstats->astats.allocated_huge += astats->astats.allocated_huge; - sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; - sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; + sstats->astats.allocated_huge += astats->astats.allocated_huge; + sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; + sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; - for (i = 0; i < NBINS; i++) { - sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; - sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; - sstats->bstats[i].nrequests += astats->bstats[i].nrequests; - sstats->bstats[i].curregs += astats->bstats[i].curregs; - if (config_tcache) { - sstats->bstats[i].nfills += astats->bstats[i].nfills; - sstats->bstats[i].nflushes += - astats->bstats[i].nflushes; + for (i = 0; i < NBINS; i++) { + sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; + sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; + sstats->bstats[i].nrequests += + astats->bstats[i].nrequests; + sstats->bstats[i].curregs += astats->bstats[i].curregs; + if (config_tcache) { + sstats->bstats[i].nfills += + astats->bstats[i].nfills; + sstats->bstats[i].nflushes += + astats->bstats[i].nflushes; + } + sstats->bstats[i].nruns += astats->bstats[i].nruns; + sstats->bstats[i].reruns += astats->bstats[i].reruns; + sstats->bstats[i].curruns += astats->bstats[i].curruns; } - sstats->bstats[i].nruns += astats->bstats[i].nruns; - sstats->bstats[i].reruns += astats->bstats[i].reruns; - sstats->bstats[i].curruns += astats->bstats[i].curruns; - } - for (i = 0; i < nlclasses; i++) { - sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; - sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; - sstats->lstats[i].nrequests += astats->lstats[i].nrequests; - sstats->lstats[i].curruns += astats->lstats[i].curruns; - } + for (i = 0; i < nlclasses; i++) { + sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; + sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; + sstats->lstats[i].nrequests += + astats->lstats[i].nrequests; + sstats->lstats[i].curruns += astats->lstats[i].curruns; + } - for (i = 0; i < nhclasses; i++) { - sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc; - sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc; - sstats->hstats[i].curhchunks += astats->hstats[i].curhchunks; + for (i = 0; i < nhclasses; i++) { + sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc; + sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc; + sstats->hstats[i].curhchunks += + astats->hstats[i].curhchunks; + } } } static void -ctl_arena_refresh(arena_t *arena, unsigned i) +ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i) { ctl_arena_stats_t *astats = &ctl_stats.arenas[i]; ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas]; ctl_arena_clear(astats); - - sstats->nthreads += astats->nthreads; - if (config_stats) { - ctl_arena_stats_amerge(astats, arena); - /* Merge into sum stats as well. */ - ctl_arena_stats_smerge(sstats, astats); - } else { - astats->pactive += arena->nactive; - astats->pdirty += arena->ndirty; - /* Merge into sum stats as well. */ - sstats->pactive += arena->nactive; - sstats->pdirty += arena->ndirty; - } + ctl_arena_stats_amerge(tsdn, astats, arena); + /* Merge into sum stats as well. */ + ctl_arena_stats_smerge(sstats, astats); } static bool -ctl_grow(void) +ctl_grow(tsdn_t *tsdn) { ctl_arena_stats_t *astats; /* Initialize new arena. */ - if (arena_init(ctl_stats.narenas) == NULL) + if (arena_init(tsdn, ctl_stats.narenas) == NULL) return (true); /* Allocate extended arena stats. */ @@ -677,47 +708,32 @@ ctl_grow(void) } static void -ctl_refresh(void) +ctl_refresh(tsdn_t *tsdn) { - tsd_t *tsd; unsigned i; - bool refreshed; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); /* * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ - ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); - tsd = tsd_fetch(); - for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { - tarenas[i] = arena_get(tsd, i, false, false); - if (tarenas[i] == NULL && !refreshed) { - tarenas[i] = arena_get(tsd, i, false, true); - refreshed = true; - } - } - - for (i = 0; i < ctl_stats.narenas; i++) { - if (tarenas[i] != NULL) - ctl_stats.arenas[i].nthreads = arena_nbound(i); - else - ctl_stats.arenas[i].nthreads = 0; - } + for (i = 0; i < ctl_stats.narenas; i++) + tarenas[i] = arena_get(tsdn, i, false); for (i = 0; i < ctl_stats.narenas; i++) { bool initialized = (tarenas[i] != NULL); ctl_stats.arenas[i].initialized = initialized; if (initialized) - ctl_arena_refresh(tarenas[i], i); + ctl_arena_refresh(tsdn, tarenas[i], i); } if (config_stats) { size_t base_allocated, base_resident, base_mapped; - base_stats_get(&base_allocated, &base_resident, &base_mapped); + base_stats_get(tsdn, &base_allocated, &base_resident, + &base_mapped); ctl_stats.allocated = ctl_stats.arenas[ctl_stats.narenas].allocated_small + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large + @@ -734,17 +750,19 @@ ctl_refresh(void) ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE); ctl_stats.mapped = base_mapped + ctl_stats.arenas[ctl_stats.narenas].astats.mapped; + ctl_stats.retained = + ctl_stats.arenas[ctl_stats.narenas].astats.retained; } ctl_epoch++; } static bool -ctl_init(void) +ctl_init(tsdn_t *tsdn) { bool ret; - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_lock(tsdn, &ctl_mtx); if (!ctl_initialized) { /* * Allocate space for one extra arena stats element, which @@ -786,19 +804,19 @@ ctl_init(void) ctl_stats.arenas[ctl_stats.narenas].initialized = true; ctl_epoch = 0; - ctl_refresh(); + ctl_refresh(tsdn); ctl_initialized = true; } ret = false; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsdn, &ctl_mtx); return (ret); } static int -ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, - size_t *depthp) +ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp, + size_t *mibp, size_t *depthp) { int ret; const char *elm, *tdot, *dot; @@ -850,7 +868,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } inode = ctl_indexed_node(node->children); - node = inode->index(mibp, *depthp, (size_t)index); + node = inode->index(tsdn, mibp, *depthp, (size_t)index); if (node == NULL) { ret = ENOENT; goto label_return; @@ -894,8 +912,8 @@ label_return: } int -ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen) +ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; size_t depth; @@ -903,19 +921,19 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t mib[CTL_MAX_DEPTH]; const ctl_named_node_t *node; - if (!ctl_initialized && ctl_init()) { + if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) { ret = EAGAIN; goto label_return; } depth = CTL_MAX_DEPTH; - ret = ctl_lookup(name, nodes, mib, &depth); + ret = ctl_lookup(tsd_tsdn(tsd), name, nodes, mib, &depth); if (ret != 0) goto label_return; node = ctl_named_node(nodes[depth-1]); if (node != NULL && node->ctl) - ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen); + ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen); else { /* The name refers to a partial path through the ctl tree. */ ret = ENOENT; @@ -926,29 +944,29 @@ label_return: } int -ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) +ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp) { int ret; - if (!ctl_initialized && ctl_init()) { + if (!ctl_initialized && ctl_init(tsdn)) { ret = EAGAIN; goto label_return; } - ret = ctl_lookup(name, NULL, mibp, miblenp); + ret = ctl_lookup(tsdn, name, NULL, mibp, miblenp); label_return: return(ret); } int -ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; const ctl_named_node_t *node; size_t i; - if (!ctl_initialized && ctl_init()) { + if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) { ret = EAGAIN; goto label_return; } @@ -960,7 +978,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, assert(node->nchildren > 0); if (ctl_named_node(node->children) != NULL) { /* Children are named. */ - if (node->nchildren <= mib[i]) { + if (node->nchildren <= (unsigned)mib[i]) { ret = ENOENT; goto label_return; } @@ -970,7 +988,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Indexed element. */ inode = ctl_indexed_node(node->children); - node = inode->index(mib, miblen, mib[i]); + node = inode->index(tsd_tsdn(tsd), mib, miblen, mib[i]); if (node == NULL) { ret = ENOENT; goto label_return; @@ -980,7 +998,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Call the ctl function. */ if (node && node->ctl) - ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); + ret = node->ctl(tsd, mib, miblen, oldp, oldlenp, newp, newlen); else { /* Partial MIB. */ ret = ENOENT; @@ -994,7 +1012,7 @@ bool ctl_boot(void) { - if (malloc_mutex_init(&ctl_mtx)) + if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL)) return (true); ctl_initialized = false; @@ -1003,24 +1021,24 @@ ctl_boot(void) } void -ctl_prefork(void) +ctl_prefork(tsdn_t *tsdn) { - malloc_mutex_prefork(&ctl_mtx); + malloc_mutex_prefork(tsdn, &ctl_mtx); } void -ctl_postfork_parent(void) +ctl_postfork_parent(tsdn_t *tsdn) { - malloc_mutex_postfork_parent(&ctl_mtx); + malloc_mutex_postfork_parent(tsdn, &ctl_mtx); } void -ctl_postfork_child(void) +ctl_postfork_child(tsdn_t *tsdn) { - malloc_mutex_postfork_child(&ctl_mtx); + malloc_mutex_postfork_child(tsdn, &ctl_mtx); } /******************************************************************************/ @@ -1077,8 +1095,8 @@ ctl_postfork_child(void) */ #define CTL_RO_CLGEN(c, l, n, v, t) \ static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ @@ -1086,7 +1104,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ if (!(c)) \ return (ENOENT); \ if (l) \ - malloc_mutex_lock(&ctl_mtx); \ + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); \ READONLY(); \ oldval = (v); \ READ(oldval, t); \ @@ -1094,47 +1112,47 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ ret = 0; \ label_return: \ if (l) \ - malloc_mutex_unlock(&ctl_mtx); \ + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); \ return (ret); \ } #define CTL_RO_CGEN(c, n, v, t) \ static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ \ if (!(c)) \ return (ENOENT); \ - malloc_mutex_lock(&ctl_mtx); \ + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); \ READONLY(); \ oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ label_return: \ - malloc_mutex_unlock(&ctl_mtx); \ + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); \ return (ret); \ } #define CTL_RO_GEN(n, v, t) \ static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ \ - malloc_mutex_lock(&ctl_mtx); \ + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); \ READONLY(); \ oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ label_return: \ - malloc_mutex_unlock(&ctl_mtx); \ + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); \ return (ret); \ } @@ -1144,8 +1162,8 @@ label_return: \ */ #define CTL_RO_NL_CGEN(c, n, v, t) \ static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ @@ -1163,8 +1181,8 @@ label_return: \ #define CTL_RO_NL_GEN(n, v, t) \ static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ @@ -1180,17 +1198,15 @@ label_return: \ #define CTL_TSD_RO_NL_CGEN(c, n, m, t) \ static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ - tsd_t *tsd; \ \ if (!(c)) \ return (ENOENT); \ READONLY(); \ - tsd = tsd_fetch(); \ oldval = (m(tsd)); \ READ(oldval, t); \ \ @@ -1199,17 +1215,17 @@ label_return: \ return (ret); \ } -#define CTL_RO_BOOL_CONFIG_GEN(n) \ +#define CTL_RO_CONFIG_GEN(n, t) \ static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen) \ { \ int ret; \ - bool oldval; \ + t oldval; \ \ READONLY(); \ oldval = n; \ - READ(oldval, bool); \ + READ(oldval, t); \ \ ret = 0; \ label_return: \ @@ -1221,48 +1237,51 @@ label_return: \ CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) static int -epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; UNUSED uint64_t newval; - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); WRITE(newval, uint64_t); if (newp != NULL) - ctl_refresh(); + ctl_refresh(tsd_tsdn(tsd)); READ(ctl_epoch, uint64_t); ret = 0; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); return (ret); } /******************************************************************************/ -CTL_RO_BOOL_CONFIG_GEN(config_cache_oblivious) -CTL_RO_BOOL_CONFIG_GEN(config_debug) -CTL_RO_BOOL_CONFIG_GEN(config_fill) -CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) -CTL_RO_BOOL_CONFIG_GEN(config_munmap) -CTL_RO_BOOL_CONFIG_GEN(config_prof) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) -CTL_RO_BOOL_CONFIG_GEN(config_stats) -CTL_RO_BOOL_CONFIG_GEN(config_tcache) -CTL_RO_BOOL_CONFIG_GEN(config_tls) -CTL_RO_BOOL_CONFIG_GEN(config_utrace) -CTL_RO_BOOL_CONFIG_GEN(config_valgrind) -CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) +CTL_RO_CONFIG_GEN(config_cache_oblivious, bool) +CTL_RO_CONFIG_GEN(config_debug, bool) +CTL_RO_CONFIG_GEN(config_fill, bool) +CTL_RO_CONFIG_GEN(config_lazy_lock, bool) +CTL_RO_CONFIG_GEN(config_malloc_conf, const char *) +CTL_RO_CONFIG_GEN(config_munmap, bool) +CTL_RO_CONFIG_GEN(config_prof, bool) +CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) +CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) +CTL_RO_CONFIG_GEN(config_stats, bool) +CTL_RO_CONFIG_GEN(config_tcache, bool) +CTL_RO_CONFIG_GEN(config_tls, bool) +CTL_RO_CONFIG_GEN(config_utrace, bool) +CTL_RO_CONFIG_GEN(config_valgrind, bool) +CTL_RO_CONFIG_GEN(config_xmalloc, bool) /******************************************************************************/ CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) -CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) +CTL_RO_NL_GEN(opt_purge, purge_mode_names[opt_purge], const char *) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *) CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) @@ -1287,20 +1306,18 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) /******************************************************************************/ static int -thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; - tsd_t *tsd; arena_t *oldarena; unsigned newind, oldind; - tsd = tsd_fetch(); oldarena = arena_choose(tsd, NULL); if (oldarena == NULL) return (EAGAIN); - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); newind = oldind = oldarena->ind; WRITE(newind, unsigned); READ(oldind, unsigned); @@ -1314,7 +1331,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Initialize arena if necessary. */ - newarena = arena_get(tsd, newind, true, true); + newarena = arena_get(tsd_tsdn(tsd), newind, true); if (newarena == NULL) { ret = EAGAIN; goto label_return; @@ -1324,15 +1341,15 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (config_tcache) { tcache_t *tcache = tsd_tcache_get(tsd); if (tcache != NULL) { - tcache_arena_reassociate(tcache, oldarena, - newarena); + tcache_arena_reassociate(tsd_tsdn(tsd), tcache, + oldarena, newarena); } } } ret = 0; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); return (ret); } @@ -1346,8 +1363,8 @@ CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp, tsd_thread_deallocatedp_get, uint64_t *) static int -thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; bool oldval; @@ -1371,8 +1388,8 @@ label_return: } static int -thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -1390,7 +1407,7 @@ label_return: } static int -thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp, +thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -1401,20 +1418,16 @@ thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp, READ_XOR_WRITE(); if (newp != NULL) { - tsd_t *tsd; - if (newlen != sizeof(const char *)) { ret = EINVAL; goto label_return; } - tsd = tsd_fetch(); - if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) != 0) goto label_return; } else { - const char *oldname = prof_thread_name_get(); + const char *oldname = prof_thread_name_get(tsd); READ(oldname, const char *); } @@ -1424,7 +1437,7 @@ label_return: } static int -thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, +thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -1433,13 +1446,13 @@ thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, if (!config_prof) return (ENOENT); - oldval = prof_thread_active_get(); + oldval = prof_thread_active_get(tsd); if (newp != NULL) { if (newlen != sizeof(bool)) { ret = EINVAL; goto label_return; } - if (prof_thread_active_set(*(bool *)newp)) { + if (prof_thread_active_set(tsd, *(bool *)newp)) { ret = EAGAIN; goto label_return; } @@ -1454,19 +1467,16 @@ label_return: /******************************************************************************/ static int -tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; - tsd_t *tsd; unsigned tcache_ind; if (!config_tcache) return (ENOENT); - tsd = tsd_fetch(); - - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); READONLY(); if (tcaches_create(tsd, &tcache_ind)) { ret = EFAULT; @@ -1476,23 +1486,20 @@ tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = 0; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); return (ret); } static int -tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; - tsd_t *tsd; unsigned tcache_ind; if (!config_tcache) return (ENOENT); - tsd = tsd_fetch(); - WRITEONLY(); tcache_ind = UINT_MAX; WRITE(tcache_ind, unsigned); @@ -1508,18 +1515,15 @@ label_return: } static int -tcache_destroy_ctl(const size_t *mib, size_t miblen, void *oldp, +tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - tsd_t *tsd; unsigned tcache_ind; if (!config_tcache) return (ENOENT); - tsd = tsd_fetch(); - WRITEONLY(); tcache_ind = UINT_MAX; WRITE(tcache_ind, unsigned); @@ -1536,48 +1540,56 @@ label_return: /******************************************************************************/ -/* ctl_mutex must be held during execution of this function. */ static void -arena_purge(unsigned arena_ind) +arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all) { - tsd_t *tsd; - unsigned i; - bool refreshed; - VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - tsd = tsd_fetch(); - for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { - tarenas[i] = arena_get(tsd, i, false, false); - if (tarenas[i] == NULL && !refreshed) { - tarenas[i] = arena_get(tsd, i, false, true); - refreshed = true; - } - } + malloc_mutex_lock(tsdn, &ctl_mtx); + { + unsigned narenas = ctl_stats.narenas; - if (arena_ind == ctl_stats.narenas) { - unsigned i; - for (i = 0; i < ctl_stats.narenas; i++) { - if (tarenas[i] != NULL) - arena_purge_all(tarenas[i]); + if (arena_ind == narenas) { + unsigned i; + VARIABLE_ARRAY(arena_t *, tarenas, narenas); + + for (i = 0; i < narenas; i++) + tarenas[i] = arena_get(tsdn, i, false); + + /* + * No further need to hold ctl_mtx, since narenas and + * tarenas contain everything needed below. + */ + malloc_mutex_unlock(tsdn, &ctl_mtx); + + for (i = 0; i < narenas; i++) { + if (tarenas[i] != NULL) + arena_purge(tsdn, tarenas[i], all); + } + } else { + arena_t *tarena; + + assert(arena_ind < narenas); + + tarena = arena_get(tsdn, arena_ind, false); + + /* No further need to hold ctl_mtx. */ + malloc_mutex_unlock(tsdn, &ctl_mtx); + + if (tarena != NULL) + arena_purge(tsdn, tarena, all); } - } else { - assert(arena_ind < ctl_stats.narenas); - if (tarenas[arena_ind] != NULL) - arena_purge_all(tarenas[arena_ind]); } } static int -arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; READONLY(); WRITEONLY(); - malloc_mutex_lock(&ctl_mtx); - arena_purge(mib[1]); - malloc_mutex_unlock(&ctl_mtx); + arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], true); ret = 0; label_return: @@ -1585,16 +1597,65 @@ label_return: } static int -arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + READONLY(); + WRITEONLY(); + arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], false); + + ret = 0; +label_return: + return (ret); +} + +static int +arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned arena_ind; + arena_t *arena; + + READONLY(); + WRITEONLY(); + + if ((config_valgrind && unlikely(in_valgrind)) || (config_fill && + unlikely(opt_quarantine))) { + ret = EFAULT; + goto label_return; + } + + arena_ind = (unsigned)mib[1]; + if (config_debug) { + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + assert(arena_ind < ctl_stats.narenas); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + } + assert(arena_ind >= opt_narenas); + + arena = arena_get(tsd_tsdn(tsd), arena_ind, false); + + arena_reset(tsd, arena); + + ret = 0; +label_return: + return (ret); +} + +static int +arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; const char *dss = NULL; - unsigned arena_ind = mib[1]; + unsigned arena_ind = (unsigned)mib[1]; dss_prec_t dss_prec_old = dss_prec_limit; dss_prec_t dss_prec = dss_prec_limit; - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); WRITE(dss, const char *); if (dss != NULL) { int i; @@ -1615,13 +1676,13 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } if (arena_ind < ctl_stats.narenas) { - arena_t *arena = arena_get(tsd_fetch(), arena_ind, false, true); + arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false); if (arena == NULL || (dss_prec != dss_prec_limit && - arena_dss_prec_set(arena, dss_prec))) { + arena_dss_prec_set(tsd_tsdn(tsd), arena, dss_prec))) { ret = EFAULT; goto label_return; } - dss_prec_old = arena_dss_prec_get(arena); + dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena); } else { if (dss_prec != dss_prec_limit && chunk_dss_prec_set(dss_prec)) { @@ -1636,26 +1697,26 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = 0; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); return (ret); } static int -arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena_ind = mib[1]; + unsigned arena_ind = (unsigned)mib[1]; arena_t *arena; - arena = arena_get(tsd_fetch(), arena_ind, false, true); + arena = arena_get(tsd_tsdn(tsd), arena_ind, false); if (arena == NULL) { ret = EFAULT; goto label_return; } if (oldp != NULL && oldlenp != NULL) { - size_t oldval = arena_lg_dirty_mult_get(arena); + size_t oldval = arena_lg_dirty_mult_get(tsd_tsdn(tsd), arena); READ(oldval, ssize_t); } if (newp != NULL) { @@ -1663,7 +1724,8 @@ arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, ret = EINVAL; goto label_return; } - if (arena_lg_dirty_mult_set(arena, *(ssize_t *)newp)) { + if (arena_lg_dirty_mult_set(tsd_tsdn(tsd), arena, + *(ssize_t *)newp)) { ret = EFAULT; goto label_return; } @@ -1675,24 +1737,60 @@ label_return: } static int -arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp, +arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena_ind = mib[1]; + unsigned arena_ind = (unsigned)mib[1]; arena_t *arena; - malloc_mutex_lock(&ctl_mtx); + arena = arena_get(tsd_tsdn(tsd), arena_ind, false); + if (arena == NULL) { + ret = EFAULT; + goto label_return; + } + + if (oldp != NULL && oldlenp != NULL) { + size_t oldval = arena_decay_time_get(tsd_tsdn(tsd), arena); + READ(oldval, ssize_t); + } + if (newp != NULL) { + if (newlen != sizeof(ssize_t)) { + ret = EINVAL; + goto label_return; + } + if (arena_decay_time_set(tsd_tsdn(tsd), arena, + *(ssize_t *)newp)) { + ret = EFAULT; + goto label_return; + } + } + + ret = 0; +label_return: + return (ret); +} + +static int +arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned arena_ind = (unsigned)mib[1]; + arena_t *arena; + + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); if (arena_ind < narenas_total_get() && (arena = - arena_get(tsd_fetch(), arena_ind, false, true)) != NULL) { + arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) { if (newp != NULL) { chunk_hooks_t old_chunk_hooks, new_chunk_hooks; WRITE(new_chunk_hooks, chunk_hooks_t); - old_chunk_hooks = chunk_hooks_set(arena, + old_chunk_hooks = chunk_hooks_set(tsd_tsdn(tsd), arena, &new_chunk_hooks); READ(old_chunk_hooks, chunk_hooks_t); } else { - chunk_hooks_t old_chunk_hooks = chunk_hooks_get(arena); + chunk_hooks_t old_chunk_hooks = + chunk_hooks_get(tsd_tsdn(tsd), arena); READ(old_chunk_hooks, chunk_hooks_t); } } else { @@ -1701,16 +1799,16 @@ arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp, } ret = 0; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); return (ret); } static const ctl_named_node_t * -arena_i_index(const size_t *mib, size_t miblen, size_t i) +arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { - const ctl_named_node_t * ret; + const ctl_named_node_t *ret; - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_lock(tsdn, &ctl_mtx); if (i > ctl_stats.narenas) { ret = NULL; goto label_return; @@ -1718,20 +1816,20 @@ arena_i_index(const size_t *mib, size_t miblen, size_t i) ret = super_arena_i_node; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsdn, &ctl_mtx); return (ret); } /******************************************************************************/ static int -arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp, +arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned narenas; - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); READONLY(); if (*oldlenp != sizeof(unsigned)) { ret = EINVAL; @@ -1742,23 +1840,23 @@ arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp, ret = 0; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); return (ret); } static int -arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, +arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned nread, i; - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); READONLY(); if (*oldlenp != ctl_stats.narenas * sizeof(bool)) { ret = EINVAL; nread = (*oldlenp < ctl_stats.narenas * sizeof(bool)) - ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas; + ? (unsigned)(*oldlenp / sizeof(bool)) : ctl_stats.narenas; } else { ret = 0; nread = ctl_stats.narenas; @@ -1768,13 +1866,13 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); return (ret); } static int -arenas_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +arenas_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -1798,6 +1896,32 @@ label_return: return (ret); } +static int +arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + if (oldp != NULL && oldlenp != NULL) { + size_t oldval = arena_decay_time_default_get(); + READ(oldval, ssize_t); + } + if (newp != NULL) { + if (newlen != sizeof(ssize_t)) { + ret = EINVAL; + goto label_return; + } + if (arena_decay_time_default_set(*(ssize_t *)newp)) { + ret = EFAULT; + goto label_return; + } + } + + ret = 0; +label_return: + return (ret); +} + CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) @@ -1807,7 +1931,7 @@ CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) static const ctl_named_node_t * -arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) +arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { if (i > NBINS) @@ -1816,9 +1940,9 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned) -CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t) static const ctl_named_node_t * -arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) +arenas_lrun_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { if (i > nlclasses) @@ -1827,9 +1951,10 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned) -CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+mib[2]), size_t) +CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]), + size_t) static const ctl_named_node_t * -arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i) +arenas_hchunk_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { if (i > nhclasses) @@ -1838,15 +1963,15 @@ arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i) } static int -arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned narenas; - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); READONLY(); - if (ctl_grow()) { + if (ctl_grow(tsd_tsdn(tsd))) { ret = EAGAIN; goto label_return; } @@ -1855,14 +1980,40 @@ arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = 0; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); return (ret); } /******************************************************************************/ static int -prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp, +prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (!config_prof) + return (ENOENT); + + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + oldval = prof_thread_active_init_set(tsd_tsdn(tsd), + *(bool *)newp); + } else + oldval = prof_thread_active_init_get(tsd_tsdn(tsd)); + READ(oldval, bool); + + ret = 0; +label_return: + return (ret); +} + +static int +prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -1876,9 +2027,9 @@ prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp, ret = EINVAL; goto label_return; } - oldval = prof_thread_active_init_set(*(bool *)newp); + oldval = prof_active_set(tsd_tsdn(tsd), *(bool *)newp); } else - oldval = prof_thread_active_init_get(); + oldval = prof_active_get(tsd_tsdn(tsd)); READ(oldval, bool); ret = 0; @@ -1887,33 +2038,8 @@ label_return: } static int -prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - bool oldval; - - if (!config_prof) - return (ENOENT); - - if (newp != NULL) { - if (newlen != sizeof(bool)) { - ret = EINVAL; - goto label_return; - } - oldval = prof_active_set(*(bool *)newp); - } else - oldval = prof_active_get(); - READ(oldval, bool); - - ret = 0; -label_return: - return (ret); -} - -static int -prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; const char *filename = NULL; @@ -1924,7 +2050,7 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, WRITEONLY(); WRITE(filename, const char *); - if (prof_mdump(filename)) { + if (prof_mdump(tsd, filename)) { ret = EFAULT; goto label_return; } @@ -1935,8 +2061,8 @@ label_return: } static int -prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; bool oldval; @@ -1949,9 +2075,9 @@ prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = EINVAL; goto label_return; } - oldval = prof_gdump_set(*(bool *)newp); + oldval = prof_gdump_set(tsd_tsdn(tsd), *(bool *)newp); } else - oldval = prof_gdump_get(); + oldval = prof_gdump_get(tsd_tsdn(tsd)); READ(oldval, bool); ret = 0; @@ -1960,12 +2086,11 @@ label_return: } static int -prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; size_t lg_sample = lg_prof_sample; - tsd_t *tsd; if (!config_prof) return (ENOENT); @@ -1975,8 +2100,6 @@ prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (lg_sample >= (sizeof(uint64_t) << 3)) lg_sample = (sizeof(uint64_t) << 3) - 1; - tsd = tsd_fetch(); - prof_reset(tsd, lg_sample); ret = 0; @@ -1995,15 +2118,20 @@ CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t) CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) +CTL_RO_CGEN(config_stats, stats_retained, ctl_stats.retained, size_t) CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult, ssize_t) +CTL_RO_GEN(stats_arenas_i_decay_time, ctl_stats.arenas[mib[2]].decay_time, + ssize_t) CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, ctl_stats.arenas[mib[2]].astats.mapped, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_retained, + ctl_stats.arenas[mib[2]].astats.retained, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, @@ -2060,7 +2188,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) static const ctl_named_node_t * -stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) +stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, + size_t j) { if (j > NBINS) @@ -2078,7 +2207,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) static const ctl_named_node_t * -stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) +stats_arenas_i_lruns_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, + size_t j) { if (j > nlclasses) @@ -2097,7 +2227,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks, ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t) static const ctl_named_node_t * -stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j) +stats_arenas_i_hchunks_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, + size_t j) { if (j > nhclasses) @@ -2106,11 +2237,11 @@ stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j) } static const ctl_named_node_t * -stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) +stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { const ctl_named_node_t * ret; - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_lock(tsdn, &ctl_mtx); if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) { ret = NULL; goto label_return; @@ -2118,6 +2249,6 @@ stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) ret = super_stats_arenas_i_node; label_return: - malloc_mutex_unlock(&ctl_mtx); + malloc_mutex_unlock(tsdn, &ctl_mtx); return (ret); } diff --git a/deps/jemalloc/src/extent.c b/deps/jemalloc/src/extent.c index 13f94411c..218156c60 100644 --- a/deps/jemalloc/src/extent.c +++ b/deps/jemalloc/src/extent.c @@ -3,45 +3,48 @@ /******************************************************************************/ +/* + * Round down to the nearest chunk size that can actually be requested during + * normal huge allocation. + */ JEMALLOC_INLINE_C size_t extent_quantize(size_t size) { + size_t ret; + szind_t ind; - /* - * Round down to the nearest chunk size that can actually be requested - * during normal huge allocation. - */ - return (index2size(size2index(size + 1) - 1)); -} + assert(size > 0); -JEMALLOC_INLINE_C int -extent_szad_comp(extent_node_t *a, extent_node_t *b) -{ - int ret; - size_t a_qsize = extent_quantize(extent_node_size_get(a)); - size_t b_qsize = extent_quantize(extent_node_size_get(b)); - - /* - * Compare based on quantized size rather than size, in order to sort - * equally useful extents only by address. - */ - ret = (a_qsize > b_qsize) - (a_qsize < b_qsize); - if (ret == 0) { - uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); - uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); - - ret = (a_addr > b_addr) - (a_addr < b_addr); + ind = size2index(size + 1); + if (ind == 0) { + /* Avoid underflow. */ + return (index2size(0)); } - + ret = index2size(ind - 1); + assert(ret <= size); return (ret); } -/* Generate red-black tree functions. */ -rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link, - extent_szad_comp) +JEMALLOC_INLINE_C int +extent_sz_comp(const extent_node_t *a, const extent_node_t *b) +{ + size_t a_qsize = extent_quantize(extent_node_size_get(a)); + size_t b_qsize = extent_quantize(extent_node_size_get(b)); + + return ((a_qsize > b_qsize) - (a_qsize < b_qsize)); +} JEMALLOC_INLINE_C int -extent_ad_comp(extent_node_t *a, extent_node_t *b) +extent_sn_comp(const extent_node_t *a, const extent_node_t *b) +{ + size_t a_sn = extent_node_sn_get(a); + size_t b_sn = extent_node_sn_get(b); + + return ((a_sn > b_sn) - (a_sn < b_sn)); +} + +JEMALLOC_INLINE_C int +extent_ad_comp(const extent_node_t *a, const extent_node_t *b) { uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); @@ -49,5 +52,26 @@ extent_ad_comp(extent_node_t *a, extent_node_t *b) return ((a_addr > b_addr) - (a_addr < b_addr)); } +JEMALLOC_INLINE_C int +extent_szsnad_comp(const extent_node_t *a, const extent_node_t *b) +{ + int ret; + + ret = extent_sz_comp(a, b); + if (ret != 0) + return (ret); + + ret = extent_sn_comp(a, b); + if (ret != 0) + return (ret); + + ret = extent_ad_comp(a, b); + return (ret); +} + +/* Generate red-black tree functions. */ +rb_gen(, extent_tree_szsnad_, extent_tree_t, extent_node_t, szsnad_link, + extent_szsnad_comp) + /* Generate red-black tree functions. */ rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, ad_link, extent_ad_comp) diff --git a/deps/jemalloc/src/huge.c b/deps/jemalloc/src/huge.c index 1e9a66512..8abd8c00c 100644 --- a/deps/jemalloc/src/huge.c +++ b/deps/jemalloc/src/huge.c @@ -15,12 +15,21 @@ huge_node_get(const void *ptr) } static bool -huge_node_set(const void *ptr, extent_node_t *node) +huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node) { assert(extent_node_addr_get(node) == ptr); assert(!extent_node_achunk_get(node)); - return (chunk_register(ptr, node)); + return (chunk_register(tsdn, ptr, node)); +} + +static void +huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node) +{ + bool err; + + err = huge_node_set(tsdn, ptr, node); + assert(!err); } static void @@ -31,39 +40,39 @@ huge_node_unset(const void *ptr, const extent_node_t *node) } void * -huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache) +huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero) { - size_t usize; - usize = s2u(size); - if (usize == 0) { - /* size_t overflow. */ - return (NULL); - } + assert(usize == s2u(usize)); - return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache)); + return (huge_palloc(tsdn, arena, usize, chunksize, zero)); } void * -huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, - bool zero, tcache_t *tcache) +huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, + bool zero) { void *ret; - size_t usize; + size_t ausize; + arena_t *iarena; extent_node_t *node; + size_t sn; bool is_zeroed; /* Allocate one or more contiguous chunks for this request. */ - usize = sa2u(size, alignment); - if (unlikely(usize == 0)) + assert(!tsdn_null(tsdn) || arena != NULL); + + ausize = sa2u(usize, alignment); + if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS)) return (NULL); - assert(usize >= chunksize); + assert(ausize >= chunksize); /* Allocate an extent node with which to track the chunk. */ - node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), - CACHELINE, false, tcache, true, arena); + iarena = (!tsdn_null(tsdn)) ? arena_ichoose(tsdn_tsd(tsdn), NULL) : + a0get(); + node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)), + CACHELINE, false, NULL, true, iarena); if (node == NULL) return (NULL); @@ -72,33 +81,35 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena, - size, alignment, &is_zeroed)) == NULL) { - idalloctm(tsd, node, tcache, true); + if (likely(!tsdn_null(tsdn))) + arena = arena_choose(tsdn_tsd(tsdn), arena); + if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn, + arena, usize, alignment, &sn, &is_zeroed)) == NULL) { + idalloctm(tsdn, node, NULL, true, true); return (NULL); } - extent_node_init(node, arena, ret, size, is_zeroed, true); + extent_node_init(node, arena, ret, usize, sn, is_zeroed, true); - if (huge_node_set(ret, node)) { - arena_chunk_dalloc_huge(arena, ret, size); - idalloctm(tsd, node, tcache, true); + if (huge_node_set(tsdn, ret, node)) { + arena_chunk_dalloc_huge(tsdn, arena, ret, usize, sn); + idalloctm(tsdn, node, NULL, true, true); return (NULL); } /* Insert node into huge. */ - malloc_mutex_lock(&arena->huge_mtx); + malloc_mutex_lock(tsdn, &arena->huge_mtx); ql_elm_new(node, ql_link); ql_tail_insert(&arena->huge, node, ql_link); - malloc_mutex_unlock(&arena->huge_mtx); + malloc_mutex_unlock(tsdn, &arena->huge_mtx); if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed) - memset(ret, 0, size); + memset(ret, 0, usize); } else if (config_fill && unlikely(opt_junk_alloc)) - memset(ret, 0xa5, size); + memset(ret, JEMALLOC_ALLOC_JUNK, usize); + arena_decay_tick(tsdn, arena); return (ret); } @@ -116,7 +127,7 @@ huge_dalloc_junk(void *ptr, size_t usize) * unmapped. */ if (!config_munmap || (have_dss && chunk_in_dss(ptr))) - memset(ptr, 0x5a, usize); + memset(ptr, JEMALLOC_FREE_JUNK, usize); } } #ifdef JEMALLOC_JET @@ -126,8 +137,8 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); #endif static void -huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min, - size_t usize_max, bool zero) +huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize, + size_t usize_min, size_t usize_max, bool zero) { size_t usize, usize_next; extent_node_t *node; @@ -151,24 +162,28 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min, if (oldsize > usize) { size_t sdiff = oldsize - usize; if (config_fill && unlikely(opt_junk_free)) { - memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff); + memset((void *)((uintptr_t)ptr + usize), + JEMALLOC_FREE_JUNK, sdiff); post_zeroed = false; } else { - post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, - ptr, CHUNK_CEILING(oldsize), usize, sdiff); + post_zeroed = !chunk_purge_wrapper(tsdn, arena, + &chunk_hooks, ptr, CHUNK_CEILING(oldsize), usize, + sdiff); } } else post_zeroed = pre_zeroed; - malloc_mutex_lock(&arena->huge_mtx); + malloc_mutex_lock(tsdn, &arena->huge_mtx); /* Update the size of the huge allocation. */ + huge_node_unset(ptr, node); assert(extent_node_size_get(node) != usize); extent_node_size_set(node, usize); + huge_node_reset(tsdn, ptr, node); /* Update zeroed. */ extent_node_zeroed_set(node, post_zeroed); - malloc_mutex_unlock(&arena->huge_mtx); + malloc_mutex_unlock(tsdn, &arena->huge_mtx); - arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize); + arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize); /* Fill if necessary (growing). */ if (oldsize < usize) { @@ -178,14 +193,15 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min, usize - oldsize); } } else if (config_fill && unlikely(opt_junk_alloc)) { - memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize - - oldsize); + memset((void *)((uintptr_t)ptr + oldsize), + JEMALLOC_ALLOC_JUNK, usize - oldsize); } } } static bool -huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) +huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize, + size_t usize) { extent_node_t *node; arena_t *arena; @@ -196,7 +212,7 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) node = huge_node_get(ptr); arena = extent_node_arena_get(node); pre_zeroed = extent_node_zeroed_get(node); - chunk_hooks = chunk_hooks_get(arena); + chunk_hooks = chunk_hooks_get(tsdn, arena); assert(oldsize > usize); @@ -213,53 +229,59 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) sdiff); post_zeroed = false; } else { - post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, - CHUNK_ADDR2BASE((uintptr_t)ptr + usize), - CHUNK_CEILING(oldsize), + post_zeroed = !chunk_purge_wrapper(tsdn, arena, + &chunk_hooks, CHUNK_ADDR2BASE((uintptr_t)ptr + + usize), CHUNK_CEILING(oldsize), CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff); } } else post_zeroed = pre_zeroed; - malloc_mutex_lock(&arena->huge_mtx); + malloc_mutex_lock(tsdn, &arena->huge_mtx); /* Update the size of the huge allocation. */ + huge_node_unset(ptr, node); extent_node_size_set(node, usize); + huge_node_reset(tsdn, ptr, node); /* Update zeroed. */ extent_node_zeroed_set(node, post_zeroed); - malloc_mutex_unlock(&arena->huge_mtx); + malloc_mutex_unlock(tsdn, &arena->huge_mtx); /* Zap the excess chunks. */ - arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize); + arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize, + extent_node_sn_get(node)); return (false); } static bool -huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) { +huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize, + size_t usize, bool zero) { extent_node_t *node; arena_t *arena; bool is_zeroed_subchunk, is_zeroed_chunk; node = huge_node_get(ptr); arena = extent_node_arena_get(node); - malloc_mutex_lock(&arena->huge_mtx); + malloc_mutex_lock(tsdn, &arena->huge_mtx); is_zeroed_subchunk = extent_node_zeroed_get(node); - malloc_mutex_unlock(&arena->huge_mtx); + malloc_mutex_unlock(tsdn, &arena->huge_mtx); /* - * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so - * that it is possible to make correct junk/zero fill decisions below. + * Use is_zeroed_chunk to detect whether the trailing memory is zeroed, + * update extent's zeroed field, and zero as necessary. */ - is_zeroed_chunk = zero; - - if (arena_chunk_ralloc_huge_expand(arena, ptr, oldsize, usize, + is_zeroed_chunk = false; + if (arena_chunk_ralloc_huge_expand(tsdn, arena, ptr, oldsize, usize, &is_zeroed_chunk)) return (true); - malloc_mutex_lock(&arena->huge_mtx); - /* Update the size of the huge allocation. */ + malloc_mutex_lock(tsdn, &arena->huge_mtx); + huge_node_unset(ptr, node); extent_node_size_set(node, usize); - malloc_mutex_unlock(&arena->huge_mtx); + extent_node_zeroed_set(node, extent_node_zeroed_get(node) && + is_zeroed_chunk); + huge_node_reset(tsdn, ptr, node); + malloc_mutex_unlock(tsdn, &arena->huge_mtx); if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed_subchunk) { @@ -272,19 +294,21 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) { CHUNK_CEILING(oldsize)); } } else if (config_fill && unlikely(opt_junk_alloc)) { - memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize - - oldsize); + memset((void *)((uintptr_t)ptr + oldsize), JEMALLOC_ALLOC_JUNK, + usize - oldsize); } return (false); } bool -huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, +huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero) { assert(s2u(oldsize) == oldsize); + /* The following should have been caught by callers. */ + assert(usize_min > 0 && usize_max <= HUGE_MAXCLASS); /* Both allocations must be huge to avoid a move. */ if (oldsize < chunksize || usize_max < chunksize) @@ -292,13 +316,18 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) { /* Attempt to expand the allocation in-place. */ - if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, zero)) + if (!huge_ralloc_no_move_expand(tsdn, ptr, oldsize, usize_max, + zero)) { + arena_decay_tick(tsdn, huge_aalloc(ptr)); return (false); + } /* Try again, this time with usize_min. */ if (usize_min < usize_max && CHUNK_CEILING(usize_min) > - CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr, - oldsize, usize_min, zero)) + CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsdn, + ptr, oldsize, usize_min, zero)) { + arena_decay_tick(tsdn, huge_aalloc(ptr)); return (false); + } } /* @@ -307,36 +336,46 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, */ if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) { - huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max, - zero); + huge_ralloc_no_move_similar(tsdn, ptr, oldsize, usize_min, + usize_max, zero); + arena_decay_tick(tsdn, huge_aalloc(ptr)); return (false); } /* Attempt to shrink the allocation in-place. */ - if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) - return (huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)); + if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) { + if (!huge_ralloc_no_move_shrink(tsdn, ptr, oldsize, + usize_max)) { + arena_decay_tick(tsdn, huge_aalloc(ptr)); + return (false); + } + } return (true); } static void * -huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, - size_t alignment, bool zero, tcache_t *tcache) +huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero) { if (alignment <= chunksize) - return (huge_malloc(tsd, arena, usize, zero, tcache)); - return (huge_palloc(tsd, arena, usize, alignment, zero, tcache)); + return (huge_malloc(tsdn, arena, usize, zero)); + return (huge_palloc(tsdn, arena, usize, alignment, zero)); } void * -huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, - size_t alignment, bool zero, tcache_t *tcache) +huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, + size_t usize, size_t alignment, bool zero, tcache_t *tcache) { void *ret; size_t copysize; + /* The following should have been caught by callers. */ + assert(usize > 0 && usize <= HUGE_MAXCLASS); + /* Try to avoid moving the allocation. */ - if (!huge_ralloc_no_move(ptr, oldsize, usize, usize, zero)) + if (!huge_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, usize, + zero)) return (ptr); /* @@ -344,19 +383,19 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, * different size class. In that case, fall back to allocating new * space and copying. */ - ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero, - tcache); + ret = huge_ralloc_move_helper(tsd_tsdn(tsd), arena, usize, alignment, + zero); if (ret == NULL) return (NULL); copysize = (usize < oldsize) ? usize : oldsize; memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache); + isqalloc(tsd, ptr, oldsize, tcache, true); return (ret); } void -huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +huge_dalloc(tsdn_t *tsdn, void *ptr) { extent_node_t *node; arena_t *arena; @@ -364,15 +403,18 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) node = huge_node_get(ptr); arena = extent_node_arena_get(node); huge_node_unset(ptr, node); - malloc_mutex_lock(&arena->huge_mtx); + malloc_mutex_lock(tsdn, &arena->huge_mtx); ql_remove(&arena->huge, node, ql_link); - malloc_mutex_unlock(&arena->huge_mtx); + malloc_mutex_unlock(tsdn, &arena->huge_mtx); huge_dalloc_junk(extent_node_addr_get(node), extent_node_size_get(node)); - arena_chunk_dalloc_huge(extent_node_arena_get(node), - extent_node_addr_get(node), extent_node_size_get(node)); - idalloctm(tsd, node, tcache, true); + arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node), + extent_node_addr_get(node), extent_node_size_get(node), + extent_node_sn_get(node)); + idalloctm(tsdn, node, NULL, true, true); + + arena_decay_tick(tsdn, arena); } arena_t * @@ -383,7 +425,7 @@ huge_aalloc(const void *ptr) } size_t -huge_salloc(const void *ptr) +huge_salloc(tsdn_t *tsdn, const void *ptr) { size_t size; extent_node_t *node; @@ -391,15 +433,15 @@ huge_salloc(const void *ptr) node = huge_node_get(ptr); arena = extent_node_arena_get(node); - malloc_mutex_lock(&arena->huge_mtx); + malloc_mutex_lock(tsdn, &arena->huge_mtx); size = extent_node_size_get(node); - malloc_mutex_unlock(&arena->huge_mtx); + malloc_mutex_unlock(tsdn, &arena->huge_mtx); return (size); } prof_tctx_t * -huge_prof_tctx_get(const void *ptr) +huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr) { prof_tctx_t *tctx; extent_node_t *node; @@ -407,29 +449,29 @@ huge_prof_tctx_get(const void *ptr) node = huge_node_get(ptr); arena = extent_node_arena_get(node); - malloc_mutex_lock(&arena->huge_mtx); + malloc_mutex_lock(tsdn, &arena->huge_mtx); tctx = extent_node_prof_tctx_get(node); - malloc_mutex_unlock(&arena->huge_mtx); + malloc_mutex_unlock(tsdn, &arena->huge_mtx); return (tctx); } void -huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) +huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { extent_node_t *node; arena_t *arena; node = huge_node_get(ptr); arena = extent_node_arena_get(node); - malloc_mutex_lock(&arena->huge_mtx); + malloc_mutex_lock(tsdn, &arena->huge_mtx); extent_node_prof_tctx_set(node, tctx); - malloc_mutex_unlock(&arena->huge_mtx); + malloc_mutex_unlock(tsdn, &arena->huge_mtx); } void -huge_prof_tctx_reset(const void *ptr) +huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr) { - huge_prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); + huge_prof_tctx_set(tsdn, ptr, (prof_tctx_t *)(uintptr_t)1U); } diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c index fe77c2475..07389ca2f 100644 --- a/deps/jemalloc/src/jemalloc.c +++ b/deps/jemalloc/src/jemalloc.c @@ -5,7 +5,11 @@ /* Data. */ /* Runtime configuration options. */ -const char *je_malloc_conf JEMALLOC_ATTR(weak); +const char *je_malloc_conf +#ifndef _WIN32 + JEMALLOC_ATTR(weak) +#endif + ; bool opt_abort = #ifdef JEMALLOC_DEBUG true @@ -40,14 +44,14 @@ bool opt_redzone = false; bool opt_utrace = false; bool opt_xmalloc = false; bool opt_zero = false; -size_t opt_narenas = 0; +unsigned opt_narenas = 0; /* Initialized to true if the process is running inside Valgrind. */ bool in_valgrind; unsigned ncpus; -/* Protects arenas initialization (arenas, narenas_total). */ +/* Protects arenas initialization. */ static malloc_mutex_t arenas_lock; /* * Arenas that are used to service external requests. Not all elements of the @@ -57,10 +61,10 @@ static malloc_mutex_t arenas_lock; * arenas. arenas[narenas_auto..narenas_total) are only used if the application * takes some action to create them and allocate from them. */ -static arena_t **arenas; -static unsigned narenas_total; +arena_t **arenas; +static unsigned narenas_total; /* Use narenas_total_*(). */ static arena_t *a0; /* arenas[0]; read-only after initialization. */ -static unsigned narenas_auto; /* Read-only after initialization. */ +unsigned narenas_auto; /* Read-only after initialization. */ typedef enum { malloc_init_uninitialized = 3, @@ -70,9 +74,37 @@ typedef enum { } malloc_init_t; static malloc_init_t malloc_init_state = malloc_init_uninitialized; +/* False should be the common case. Set to true to trigger initialization. */ +static bool malloc_slow = true; + +/* When malloc_slow is true, set the corresponding bits for sanity check. */ +enum { + flag_opt_junk_alloc = (1U), + flag_opt_junk_free = (1U << 1), + flag_opt_quarantine = (1U << 2), + flag_opt_zero = (1U << 3), + flag_opt_utrace = (1U << 4), + flag_in_valgrind = (1U << 5), + flag_opt_xmalloc = (1U << 6) +}; +static uint8_t malloc_slow_flags; + +JEMALLOC_ALIGNED(CACHELINE) +const size_t pind2sz_tab[NPSIZES] = { +#define PSZ_yes(lg_grp, ndelta, lg_delta) \ + (((ZU(1)< MALLOCX_ARENA_MAX) return (NULL); - if (ind == narenas_total) { - unsigned narenas_new = narenas_total + 1; - arena_t **arenas_new = - (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new * - sizeof(arena_t *))); - if (arenas_new == NULL) - return (NULL); - memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *)); - arenas_new[ind] = NULL; - /* - * Deallocate only if arenas came from a0malloc() (not - * base_alloc()). - */ - if (narenas_total != narenas_auto) - a0dalloc(arenas); - arenas = arenas_new; - narenas_total = narenas_new; - } + if (ind == narenas_total_get()) + narenas_total_inc(); /* * Another thread may have already initialized arenas[ind] if it's an * auto arena. */ - arena = arenas[ind]; + arena = arena_get(tsdn, ind, false); if (arena != NULL) { assert(ind < narenas_auto); return (arena); } /* Actually initialize the arena. */ - arena = arenas[ind] = arena_new(ind); + arena = arena_new(tsdn, ind); + arena_set(ind, arena); return (arena); } arena_t * -arena_init(unsigned ind) +arena_init(tsdn_t *tsdn, unsigned ind) { arena_t *arena; - malloc_mutex_lock(&arenas_lock); - arena = arena_init_locked(ind); - malloc_mutex_unlock(&arenas_lock); + malloc_mutex_lock(tsdn, &arenas_lock); + arena = arena_init_locked(tsdn, ind); + malloc_mutex_unlock(tsdn, &arenas_lock); return (arena); } -unsigned -narenas_total_get(void) -{ - unsigned narenas; - - malloc_mutex_lock(&arenas_lock); - narenas = narenas_total; - malloc_mutex_unlock(&arenas_lock); - - return (narenas); -} - static void -arena_bind_locked(tsd_t *tsd, unsigned ind) +arena_bind(tsd_t *tsd, unsigned ind, bool internal) { arena_t *arena; - arena = arenas[ind]; - arena->nthreads++; + if (!tsd_nominal(tsd)) + return; - if (tsd_nominal(tsd)) + arena = arena_get(tsd_tsdn(tsd), ind, false); + arena_nthreads_inc(arena, internal); + + if (internal) + tsd_iarena_set(tsd, arena); + else tsd_arena_set(tsd, arena); } -static void -arena_bind(tsd_t *tsd, unsigned ind) -{ - - malloc_mutex_lock(&arenas_lock); - arena_bind_locked(tsd, ind); - malloc_mutex_unlock(&arenas_lock); -} - void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) { arena_t *oldarena, *newarena; - malloc_mutex_lock(&arenas_lock); - oldarena = arenas[oldind]; - newarena = arenas[newind]; - oldarena->nthreads--; - newarena->nthreads++; - malloc_mutex_unlock(&arenas_lock); + oldarena = arena_get(tsd_tsdn(tsd), oldind, false); + newarena = arena_get(tsd_tsdn(tsd), newind, false); + arena_nthreads_dec(oldarena, false); + arena_nthreads_inc(newarena, false); tsd_arena_set(tsd, newarena); } -unsigned -arena_nbound(unsigned ind) -{ - unsigned nthreads; - - malloc_mutex_lock(&arenas_lock); - nthreads = arenas[ind]->nthreads; - malloc_mutex_unlock(&arenas_lock); - return (nthreads); -} - static void -arena_unbind(tsd_t *tsd, unsigned ind) +arena_unbind(tsd_t *tsd, unsigned ind, bool internal) { arena_t *arena; - malloc_mutex_lock(&arenas_lock); - arena = arenas[ind]; - arena->nthreads--; - malloc_mutex_unlock(&arenas_lock); - tsd_arena_set(tsd, NULL); + arena = arena_get(tsd_tsdn(tsd), ind, false); + arena_nthreads_dec(arena, internal); + if (internal) + tsd_iarena_set(tsd, NULL); + else + tsd_arena_set(tsd, NULL); } -arena_t * -arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) +arena_tdata_t * +arena_tdata_get_hard(tsd_t *tsd, unsigned ind) { - arena_t *arena; - arena_t **arenas_cache = tsd_arenas_cache_get(tsd); - unsigned narenas_cache = tsd_narenas_cache_get(tsd); + arena_tdata_t *tdata, *arenas_tdata_old; + arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); + unsigned narenas_tdata_old, i; + unsigned narenas_tdata = tsd_narenas_tdata_get(tsd); unsigned narenas_actual = narenas_total_get(); - /* Deallocate old cache if it's too small. */ - if (arenas_cache != NULL && narenas_cache < narenas_actual) { - a0dalloc(arenas_cache); - arenas_cache = NULL; - narenas_cache = 0; - tsd_arenas_cache_set(tsd, arenas_cache); - tsd_narenas_cache_set(tsd, narenas_cache); + /* + * Dissociate old tdata array (and set up for deallocation upon return) + * if it's too small. + */ + if (arenas_tdata != NULL && narenas_tdata < narenas_actual) { + arenas_tdata_old = arenas_tdata; + narenas_tdata_old = narenas_tdata; + arenas_tdata = NULL; + narenas_tdata = 0; + tsd_arenas_tdata_set(tsd, arenas_tdata); + tsd_narenas_tdata_set(tsd, narenas_tdata); + } else { + arenas_tdata_old = NULL; + narenas_tdata_old = 0; } - /* Allocate cache if it's missing. */ - if (arenas_cache == NULL) { - bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd); - assert(ind < narenas_actual || !init_if_missing); - narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1; + /* Allocate tdata array if it's missing. */ + if (arenas_tdata == NULL) { + bool *arenas_tdata_bypassp = tsd_arenas_tdata_bypassp_get(tsd); + narenas_tdata = (ind < narenas_actual) ? narenas_actual : ind+1; - if (tsd_nominal(tsd) && !*arenas_cache_bypassp) { - *arenas_cache_bypassp = true; - arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) * - narenas_cache); - *arenas_cache_bypassp = false; + if (tsd_nominal(tsd) && !*arenas_tdata_bypassp) { + *arenas_tdata_bypassp = true; + arenas_tdata = (arena_tdata_t *)a0malloc( + sizeof(arena_tdata_t) * narenas_tdata); + *arenas_tdata_bypassp = false; } - if (arenas_cache == NULL) { - /* - * This function must always tell the truth, even if - * it's slow, so don't let OOM, thread cleanup (note - * tsd_nominal check), nor recursive allocation - * avoidance (note arenas_cache_bypass check) get in the - * way. - */ - if (ind >= narenas_actual) - return (NULL); - malloc_mutex_lock(&arenas_lock); - arena = arenas[ind]; - malloc_mutex_unlock(&arenas_lock); - return (arena); + if (arenas_tdata == NULL) { + tdata = NULL; + goto label_return; } - assert(tsd_nominal(tsd) && !*arenas_cache_bypassp); - tsd_arenas_cache_set(tsd, arenas_cache); - tsd_narenas_cache_set(tsd, narenas_cache); + assert(tsd_nominal(tsd) && !*arenas_tdata_bypassp); + tsd_arenas_tdata_set(tsd, arenas_tdata); + tsd_narenas_tdata_set(tsd, narenas_tdata); } /* - * Copy to cache. It's possible that the actual number of arenas has - * increased since narenas_total_get() was called above, but that causes - * no correctness issues unless two threads concurrently execute the - * arenas.extend mallctl, which we trust mallctl synchronization to + * Copy to tdata array. It's possible that the actual number of arenas + * has increased since narenas_total_get() was called above, but that + * causes no correctness issues unless two threads concurrently execute + * the arenas.extend mallctl, which we trust mallctl synchronization to * prevent. */ - malloc_mutex_lock(&arenas_lock); - memcpy(arenas_cache, arenas, sizeof(arena_t *) * narenas_actual); - malloc_mutex_unlock(&arenas_lock); - if (narenas_cache > narenas_actual) { - memset(&arenas_cache[narenas_actual], 0, sizeof(arena_t *) * - (narenas_cache - narenas_actual)); + + /* Copy/initialize tickers. */ + for (i = 0; i < narenas_actual; i++) { + if (i < narenas_tdata_old) { + ticker_copy(&arenas_tdata[i].decay_ticker, + &arenas_tdata_old[i].decay_ticker); + } else { + ticker_init(&arenas_tdata[i].decay_ticker, + DECAY_NTICKS_PER_UPDATE); + } + } + if (narenas_tdata > narenas_actual) { + memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t) + * (narenas_tdata - narenas_actual)); } - /* Read the refreshed cache, and init the arena if necessary. */ - arena = arenas_cache[ind]; - if (init_if_missing && arena == NULL) - arena = arenas_cache[ind] = arena_init(ind); - return (arena); + /* Read the refreshed tdata array. */ + tdata = &arenas_tdata[ind]; +label_return: + if (arenas_tdata_old != NULL) + a0dalloc(arenas_tdata_old); + return (tdata); } /* Slow path, called only by arena_choose(). */ arena_t * -arena_choose_hard(tsd_t *tsd) +arena_choose_hard(tsd_t *tsd, bool internal) { - arena_t *ret; + arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL); if (narenas_auto > 1) { - unsigned i, choose, first_null; + unsigned i, j, choose[2], first_null; + + /* + * Determine binding for both non-internal and internal + * allocation. + * + * choose[0]: For application allocation. + * choose[1]: For internal metadata allocation. + */ + + for (j = 0; j < 2; j++) + choose[j] = 0; - choose = 0; first_null = narenas_auto; - malloc_mutex_lock(&arenas_lock); - assert(a0get() != NULL); + malloc_mutex_lock(tsd_tsdn(tsd), &arenas_lock); + assert(arena_get(tsd_tsdn(tsd), 0, false) != NULL); for (i = 1; i < narenas_auto; i++) { - if (arenas[i] != NULL) { + if (arena_get(tsd_tsdn(tsd), i, false) != NULL) { /* * Choose the first arena that has the lowest * number of threads assigned to it. */ - if (arenas[i]->nthreads < - arenas[choose]->nthreads) - choose = i; + for (j = 0; j < 2; j++) { + if (arena_nthreads_get(arena_get( + tsd_tsdn(tsd), i, false), !!j) < + arena_nthreads_get(arena_get( + tsd_tsdn(tsd), choose[j], false), + !!j)) + choose[j] = i; + } } else if (first_null == narenas_auto) { /* * Record the index of the first uninitialized @@ -605,27 +641,40 @@ arena_choose_hard(tsd_t *tsd) } } - if (arenas[choose]->nthreads == 0 - || first_null == narenas_auto) { - /* - * Use an unloaded arena, or the least loaded arena if - * all arenas are already initialized. - */ - ret = arenas[choose]; - } else { - /* Initialize a new arena. */ - choose = first_null; - ret = arena_init_locked(choose); - if (ret == NULL) { - malloc_mutex_unlock(&arenas_lock); - return (NULL); + for (j = 0; j < 2; j++) { + if (arena_nthreads_get(arena_get(tsd_tsdn(tsd), + choose[j], false), !!j) == 0 || first_null == + narenas_auto) { + /* + * Use an unloaded arena, or the least loaded + * arena if all arenas are already initialized. + */ + if (!!j == internal) { + ret = arena_get(tsd_tsdn(tsd), + choose[j], false); + } + } else { + arena_t *arena; + + /* Initialize a new arena. */ + choose[j] = first_null; + arena = arena_init_locked(tsd_tsdn(tsd), + choose[j]); + if (arena == NULL) { + malloc_mutex_unlock(tsd_tsdn(tsd), + &arenas_lock); + return (NULL); + } + if (!!j == internal) + ret = arena; } + arena_bind(tsd, choose[j], !!j); } - arena_bind_locked(tsd, choose); - malloc_mutex_unlock(&arenas_lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &arenas_lock); } else { - ret = a0get(); - arena_bind(tsd, 0); + ret = arena_get(tsd_tsdn(tsd), 0, false); + arena_bind(tsd, 0, false); + arena_bind(tsd, 0, true); } return (ret); @@ -645,6 +694,16 @@ thread_deallocated_cleanup(tsd_t *tsd) /* Do nothing. */ } +void +iarena_cleanup(tsd_t *tsd) +{ + arena_t *iarena; + + iarena = tsd_iarena_get(tsd); + if (iarena != NULL) + arena_unbind(tsd, iarena->ind, true); +} + void arena_cleanup(tsd_t *tsd) { @@ -652,30 +711,33 @@ arena_cleanup(tsd_t *tsd) arena = tsd_arena_get(tsd); if (arena != NULL) - arena_unbind(tsd, arena->ind); + arena_unbind(tsd, arena->ind, false); } void -arenas_cache_cleanup(tsd_t *tsd) +arenas_tdata_cleanup(tsd_t *tsd) { - arena_t **arenas_cache; + arena_tdata_t *arenas_tdata; - arenas_cache = tsd_arenas_cache_get(tsd); - if (arenas_cache != NULL) { - tsd_arenas_cache_set(tsd, NULL); - a0dalloc(arenas_cache); + /* Prevent tsd->arenas_tdata from being (re)created. */ + *tsd_arenas_tdata_bypassp_get(tsd) = true; + + arenas_tdata = tsd_arenas_tdata_get(tsd); + if (arenas_tdata != NULL) { + tsd_arenas_tdata_set(tsd, NULL); + a0dalloc(arenas_tdata); } } void -narenas_cache_cleanup(tsd_t *tsd) +narenas_tdata_cleanup(tsd_t *tsd) { /* Do nothing. */ } void -arenas_cache_bypass_cleanup(tsd_t *tsd) +arenas_tdata_bypass_cleanup(tsd_t *tsd) { /* Do nothing. */ @@ -686,8 +748,11 @@ stats_print_atexit(void) { if (config_tcache && config_stats) { + tsdn_t *tsdn; unsigned narenas, i; + tsdn = tsdn_fetch(); + /* * Merge stats from extant threads. This is racy, since * individual threads do not lock when recording tcache stats @@ -696,7 +761,7 @@ stats_print_atexit(void) * continue to allocate. */ for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { - arena_t *arena = arenas[i]; + arena_t *arena = arena_get(tsdn, i, false); if (arena != NULL) { tcache_t *tcache; @@ -706,11 +771,11 @@ stats_print_atexit(void) * and bin locks in the opposite order, * deadlocks may result. */ - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); ql_foreach(tcache, &arena->tcache_ql, link) { - tcache_stats_merge(tcache, arena); + tcache_stats_merge(tsdn, tcache, arena); } - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); } } } @@ -747,6 +812,20 @@ malloc_ncpus(void) SYSTEM_INFO si; GetSystemInfo(&si); result = si.dwNumberOfProcessors; +#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT) + /* + * glibc >= 2.6 has the CPU_COUNT macro. + * + * glibc's sysconf() uses isspace(). glibc allocates for the first time + * *before* setting up the isspace tables. Therefore we need a + * different method to get the number of CPUs. + */ + { + cpu_set_t set; + + pthread_getaffinity_np(pthread_self(), sizeof(set), &set); + result = CPU_COUNT(&set); + } #else result = sysconf(_SC_NPROCESSORS_ONLN); #endif @@ -838,6 +917,26 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, (int)vlen, v); } +static void +malloc_slow_flag_init(void) +{ + /* + * Combine the runtime options into malloc_slow for fast path. Called + * after processing all the options. + */ + malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0) + | (opt_junk_free ? flag_opt_junk_free : 0) + | (opt_quarantine ? flag_opt_quarantine : 0) + | (opt_zero ? flag_opt_zero : 0) + | (opt_utrace ? flag_opt_utrace : 0) + | (opt_xmalloc ? flag_opt_xmalloc : 0); + + if (config_valgrind) + malloc_slow_flags |= (in_valgrind ? flag_in_valgrind : 0); + + malloc_slow = (malloc_slow_flags != 0); +} + static void malloc_conf_init(void) { @@ -864,10 +963,13 @@ malloc_conf_init(void) opt_tcache = false; } - for (i = 0; i < 3; i++) { + for (i = 0; i < 4; i++) { /* Get runtime configuration. */ switch (i) { case 0: + opts = config_malloc_conf; + break; + case 1: if (je_malloc_conf != NULL) { /* * Use options that were compiled into the @@ -880,8 +982,8 @@ malloc_conf_init(void) opts = buf; } break; - case 1: { - int linklen = 0; + case 2: { + ssize_t linklen = 0; #ifndef _WIN32 int saved_errno = errno; const char *linkname = @@ -907,7 +1009,7 @@ malloc_conf_init(void) buf[linklen] = '\0'; opts = buf; break; - } case 2: { + } case 3: { const char *envname = #ifdef JEMALLOC_PREFIX JEMALLOC_CPREFIX"MALLOC_CONF" @@ -954,7 +1056,11 @@ malloc_conf_init(void) if (cont) \ continue; \ } -#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ +#define CONF_MIN_no(um, min) false +#define CONF_MIN_yes(um, min) ((um) < (min)) +#define CONF_MAX_no(um, max) false +#define CONF_MAX_yes(um, max) ((um) > (max)) +#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip) \ if (CONF_MATCH(n)) { \ uintmax_t um; \ char *end; \ @@ -967,24 +1073,35 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } else if (clip) { \ - if ((min) != 0 && um < (min)) \ - o = (min); \ - else if (um > (max)) \ - o = (max); \ + if (CONF_MIN_##check_min(um, \ + (min))) \ + o = (t)(min); \ + else if (CONF_MAX_##check_max( \ + um, (max))) \ + o = (t)(max); \ else \ - o = um; \ + o = (t)um; \ } else { \ - if (((min) != 0 && um < (min)) \ - || um > (max)) { \ + if (CONF_MIN_##check_min(um, \ + (min)) || \ + CONF_MAX_##check_max(um, \ + (max))) { \ malloc_conf_error( \ "Out-of-range " \ "conf value", \ k, klen, v, vlen); \ } else \ - o = um; \ + o = (t)um; \ } \ continue; \ } +#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, \ + clip) \ + CONF_HANDLE_T_U(unsigned, o, n, min, max, \ + check_min, check_max, clip) +#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip) \ + CONF_HANDLE_T_U(size_t, o, n, min, max, \ + check_min, check_max, clip) #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ if (CONF_MATCH(n)) { \ long l; \ @@ -1027,7 +1144,7 @@ malloc_conf_init(void) */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1), - (sizeof(size_t) << 3) - 1, true) + (sizeof(size_t) << 3) - 1, yes, yes, true) if (strncmp("dss", k, klen) == 0) { int i; bool match = false; @@ -1052,17 +1169,47 @@ malloc_conf_init(void) } continue; } - CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, - SIZE_T_MAX, false) + CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1, + UINT_MAX, yes, no, false) + if (strncmp("purge", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < purge_mode_limit; i++) { + if (strncmp(purge_mode_names[i], v, + vlen) == 0) { + opt_purge = (purge_mode_t)i; + match = true; + break; + } + } + if (!match) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1, + NSTIME_SEC_MAX); CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true) if (config_fill) { if (CONF_MATCH("junk")) { if (CONF_MATCH_VALUE("true")) { - opt_junk = "true"; - opt_junk_alloc = opt_junk_free = - true; + if (config_valgrind && + unlikely(in_valgrind)) { + malloc_conf_error( + "Deallocation-time " + "junk filling cannot " + "be enabled while " + "running inside " + "Valgrind", k, klen, v, + vlen); + } else { + opt_junk = "true"; + opt_junk_alloc = true; + opt_junk_free = true; + } } else if (CONF_MATCH_VALUE("false")) { opt_junk = "false"; opt_junk_alloc = opt_junk_free = @@ -1072,9 +1219,20 @@ malloc_conf_init(void) opt_junk_alloc = true; opt_junk_free = false; } else if (CONF_MATCH_VALUE("free")) { - opt_junk = "free"; - opt_junk_alloc = false; - opt_junk_free = true; + if (config_valgrind && + unlikely(in_valgrind)) { + malloc_conf_error( + "Deallocation-time " + "junk filling cannot " + "be enabled while " + "running inside " + "Valgrind", k, klen, v, + vlen); + } else { + opt_junk = "free"; + opt_junk_alloc = false; + opt_junk_free = true; + } } else { malloc_conf_error( "Invalid conf value", k, @@ -1083,7 +1241,7 @@ malloc_conf_init(void) continue; } CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", - 0, SIZE_T_MAX, false) + 0, SIZE_T_MAX, no, no, false) CONF_HANDLE_BOOL(opt_redzone, "redzone", true) CONF_HANDLE_BOOL(opt_zero, "zero", true) } @@ -1120,8 +1278,8 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_prof_thread_active_init, "prof_thread_active_init", true) CONF_HANDLE_SIZE_T(opt_lg_prof_sample, - "lg_prof_sample", 0, - (sizeof(uint64_t) << 3) - 1, true) + "lg_prof_sample", 0, (sizeof(uint64_t) << 3) + - 1, no, yes, true) CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum", true) CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, @@ -1137,7 +1295,14 @@ malloc_conf_init(void) malloc_conf_error("Invalid conf pair", k, klen, v, vlen); #undef CONF_MATCH +#undef CONF_MATCH_VALUE #undef CONF_HANDLE_BOOL +#undef CONF_MIN_no +#undef CONF_MIN_yes +#undef CONF_MAX_no +#undef CONF_MAX_yes +#undef CONF_HANDLE_T_U +#undef CONF_HANDLE_UNSIGNED #undef CONF_HANDLE_SIZE_T #undef CONF_HANDLE_SSIZE_T #undef CONF_HANDLE_CHAR_P @@ -1145,7 +1310,6 @@ malloc_conf_init(void) } } -/* init_lock must be held. */ static bool malloc_init_hard_needed(void) { @@ -1161,11 +1325,14 @@ malloc_init_hard_needed(void) } #ifdef JEMALLOC_THREADED_INIT if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) { + spin_t spinner; + /* Busy-wait until the initializing thread completes. */ + spin_init(&spinner); do { - malloc_mutex_unlock(&init_lock); - CPU_SPINWAIT; - malloc_mutex_lock(&init_lock); + malloc_mutex_unlock(TSDN_NULL, &init_lock); + spin_adaptive(&spinner); + malloc_mutex_lock(TSDN_NULL, &init_lock); } while (!malloc_initialized()); return (false); } @@ -1173,9 +1340,8 @@ malloc_init_hard_needed(void) return (true); } -/* init_lock must be held. */ static bool -malloc_init_hard_a0_locked(void) +malloc_init_hard_a0_locked() { malloc_initializer = INITIALIZER; @@ -1191,6 +1357,7 @@ malloc_init_hard_a0_locked(void) abort(); } } + pages_boot(); if (base_boot()) return (true); if (chunk_boot()) @@ -1199,26 +1366,28 @@ malloc_init_hard_a0_locked(void) return (true); if (config_prof) prof_boot1(); - if (arena_boot()) + arena_boot(); + if (config_tcache && tcache_boot(TSDN_NULL)) return (true); - if (config_tcache && tcache_boot()) - return (true); - if (malloc_mutex_init(&arenas_lock)) + if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS)) return (true); /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ - narenas_total = narenas_auto = 1; + narenas_auto = 1; + narenas_total_set(narenas_auto); arenas = &a0; memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* * Initialize one arena here. The rest are lazily created in * arena_choose_hard(). */ - if (arena_init(0) == NULL) + if (arena_init(TSDN_NULL, 0) == NULL) return (true); + malloc_init_state = malloc_init_a0_initialized; + return (false); } @@ -1227,45 +1396,42 @@ malloc_init_hard_a0(void) { bool ret; - malloc_mutex_lock(&init_lock); + malloc_mutex_lock(TSDN_NULL, &init_lock); ret = malloc_init_hard_a0_locked(); - malloc_mutex_unlock(&init_lock); + malloc_mutex_unlock(TSDN_NULL, &init_lock); return (ret); } -/* - * Initialize data structures which may trigger recursive allocation. - * - * init_lock must be held. - */ -static void +/* Initialize data structures which may trigger recursive allocation. */ +static bool malloc_init_hard_recursible(void) { malloc_init_state = malloc_init_recursible; - malloc_mutex_unlock(&init_lock); ncpus = malloc_ncpus(); -#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ - && !defined(_WIN32) && !defined(__native_client__)) - /* LinuxThreads's pthread_atfork() allocates. */ +#if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \ + && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \ + !defined(__native_client__)) + /* LinuxThreads' pthread_atfork() allocates. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { malloc_write(": Error in pthread_atfork()\n"); if (opt_abort) abort(); + return (true); } #endif - malloc_mutex_lock(&init_lock); + + return (false); } -/* init_lock must be held. */ static bool -malloc_init_hard_finish(void) +malloc_init_hard_finish(tsdn_t *tsdn) { - if (mutex_boot()) + if (malloc_mutex_boot()) return (true); if (opt_narenas == 0) { @@ -1280,68 +1446,69 @@ malloc_init_hard_finish(void) } narenas_auto = opt_narenas; /* - * Make sure that the arenas array can be allocated. In practice, this - * limit is enough to allow the allocator to function, but the ctl - * machinery will fail to allocate memory at far lower limits. + * Limit the number of arenas to the indexing range of MALLOCX_ARENA(). */ - if (narenas_auto > chunksize / sizeof(arena_t *)) { - narenas_auto = chunksize / sizeof(arena_t *); + if (narenas_auto > MALLOCX_ARENA_MAX) { + narenas_auto = MALLOCX_ARENA_MAX; malloc_printf(": Reducing narenas to limit (%d)\n", narenas_auto); } - narenas_total = narenas_auto; + narenas_total_set(narenas_auto); /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); + arenas = (arena_t **)base_alloc(tsdn, sizeof(arena_t *) * + (MALLOCX_ARENA_MAX+1)); if (arenas == NULL) return (true); - /* - * Zero the array. In practice, this should always be pre-zeroed, - * since it was just mmap()ed, but let's be sure. - */ - memset(arenas, 0, sizeof(arena_t *) * narenas_total); /* Copy the pointer to the one arena that was already initialized. */ - arenas[0] = a0; + arena_set(0, a0); malloc_init_state = malloc_init_initialized; + malloc_slow_flag_init(); + return (false); } static bool malloc_init_hard(void) { + tsd_t *tsd; #if defined(_WIN32) && _WIN32_WINNT < 0x0600 _init_init_lock(); #endif - malloc_mutex_lock(&init_lock); + malloc_mutex_lock(TSDN_NULL, &init_lock); if (!malloc_init_hard_needed()) { - malloc_mutex_unlock(&init_lock); + malloc_mutex_unlock(TSDN_NULL, &init_lock); return (false); } if (malloc_init_state != malloc_init_a0_initialized && malloc_init_hard_a0_locked()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - if (malloc_tsd_boot0()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - if (config_prof && prof_boot2()) { - malloc_mutex_unlock(&init_lock); + malloc_mutex_unlock(TSDN_NULL, &init_lock); return (true); } - malloc_init_hard_recursible(); + malloc_mutex_unlock(TSDN_NULL, &init_lock); + /* Recursive allocation relies on functional tsd. */ + tsd = malloc_tsd_boot0(); + if (tsd == NULL) + return (true); + if (malloc_init_hard_recursible()) + return (true); + malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); - if (malloc_init_hard_finish()) { - malloc_mutex_unlock(&init_lock); + if (config_prof && prof_boot2(tsd)) { + malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); return (true); } - malloc_mutex_unlock(&init_lock); + if (malloc_init_hard_finish(tsd_tsdn(tsd))) { + malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); + return (true); + } + + malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); malloc_tsd_boot1(); return (false); } @@ -1355,61 +1522,104 @@ malloc_init_hard(void) */ static void * -imalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) +ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero, + prof_tctx_t *tctx, bool slow_path) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = imalloc(tsd, LARGE_MINCLASS); + szind_t ind_large = size2index(LARGE_MINCLASS); + p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path); if (p == NULL) return (NULL); - arena_prof_promoted(p, usize); + arena_prof_promoted(tsd_tsdn(tsd), p, usize); } else - p = imalloc(tsd, usize); + p = ialloc(tsd, usize, ind, zero, slow_path); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_prof(tsd_t *tsd, size_t usize) +ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path) { void *p; prof_tctx_t *tctx; tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = imalloc_prof_sample(tsd, usize, tctx); + p = ialloc_prof_sample(tsd, usize, ind, zero, tctx, slow_path); else - p = imalloc(tsd, usize); + p = ialloc(tsd, usize, ind, zero, slow_path); if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); } - prof_malloc(p, usize, tctx); + prof_malloc(tsd_tsdn(tsd), p, usize, tctx); return (p); } +/* + * ialloc_body() is inlined so that fast and slow paths are generated separately + * with statically known slow_path. + * + * This function guarantees that *tsdn is non-NULL on success. + */ JEMALLOC_ALWAYS_INLINE_C void * -imalloc_body(size_t size, tsd_t **tsd, size_t *usize) +ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize, + bool slow_path) { + tsd_t *tsd; + szind_t ind; - if (unlikely(malloc_init())) + if (slow_path && unlikely(malloc_init())) { + *tsdn = NULL; return (NULL); - *tsd = tsd_fetch(); - - if (config_prof && opt_prof) { - *usize = s2u(size); - if (unlikely(*usize == 0)) - return (NULL); - return (imalloc_prof(*tsd, *usize)); } - if (config_stats || (config_valgrind && unlikely(in_valgrind))) - *usize = s2u(size); - return (imalloc(*tsd, size)); + tsd = tsd_fetch(); + *tsdn = tsd_tsdn(tsd); + witness_assert_lockless(tsd_tsdn(tsd)); + + ind = size2index(size); + if (unlikely(ind >= NSIZES)) + return (NULL); + + if (config_stats || (config_prof && opt_prof) || (slow_path && + config_valgrind && unlikely(in_valgrind))) { + *usize = index2size(ind); + assert(*usize > 0 && *usize <= HUGE_MAXCLASS); + } + + if (config_prof && opt_prof) + return (ialloc_prof(tsd, *usize, ind, zero, slow_path)); + + return (ialloc(tsd, size, ind, zero, slow_path)); +} + +JEMALLOC_ALWAYS_INLINE_C void +ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func, + bool update_errno, bool slow_path) +{ + + assert(!tsdn_null(tsdn) || ret == NULL); + + if (unlikely(ret == NULL)) { + if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) { + malloc_printf(": Error in %s(): out of " + "memory\n", func); + abort(); + } + if (update_errno) + set_errno(ENOMEM); + } + if (config_stats && likely(ret != NULL)) { + assert(usize == isalloc(tsdn, ret, config_prof)); + *tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize; + } + witness_assert_lockless(tsdn); } JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN @@ -1418,27 +1628,22 @@ JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_malloc(size_t size) { void *ret; - tsd_t *tsd; + tsdn_t *tsdn; size_t usize JEMALLOC_CC_SILENCE_INIT(0); if (size == 0) size = 1; - ret = imalloc_body(size, &tsd, &usize); - if (unlikely(ret == NULL)) { - if (config_xmalloc && unlikely(opt_xmalloc)) { - malloc_write(": Error in malloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); + if (likely(!malloc_slow)) { + ret = ialloc_body(size, false, &tsdn, &usize, false); + ialloc_post_check(ret, tsdn, usize, "malloc", true, false); + } else { + ret = ialloc_body(size, false, &tsdn, &usize, true); + ialloc_post_check(ret, tsdn, usize, "malloc", true, true); + UTRACE(0, size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false); } - if (config_stats && likely(ret != NULL)) { - assert(usize == isalloc(ret, config_prof)); - *tsd_thread_allocatedp_get(tsd) += usize; - } - UTRACE(0, size, ret); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); + return (ret); } @@ -1455,7 +1660,7 @@ imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize, p = ipalloc(tsd, LARGE_MINCLASS, alignment, false); if (p == NULL) return (NULL); - arena_prof_promoted(p, usize); + arena_prof_promoted(tsd_tsdn(tsd), p, usize); } else p = ipalloc(tsd, usize, alignment, false); @@ -1477,7 +1682,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize) prof_alloc_rollback(tsd, tctx, true); return (NULL); } - prof_malloc(p, usize, tctx); + prof_malloc(tsd_tsdn(tsd), p, usize, tctx); return (p); } @@ -1494,10 +1699,12 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) assert(min_alignment != 0); if (unlikely(malloc_init())) { + tsd = NULL; result = NULL; goto label_oom; } tsd = tsd_fetch(); + witness_assert_lockless(tsd_tsdn(tsd)); if (size == 0) size = 1; @@ -1515,7 +1722,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) } usize = sa2u(size, alignment); - if (unlikely(usize == 0)) { + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { result = NULL; goto label_oom; } @@ -1532,10 +1739,13 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) ret = 0; label_return: if (config_stats && likely(result != NULL)) { - assert(usize == isalloc(result, config_prof)); + assert(usize == isalloc(tsd_tsdn(tsd), result, config_prof)); *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, size, result); + JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd_tsdn(tsd), result, usize, + false); + witness_assert_lockless(tsd_tsdn(tsd)); return (ret); label_oom: assert(result == NULL); @@ -1545,6 +1755,7 @@ label_oom: abort(); } ret = ENOMEM; + witness_assert_lockless(tsd_tsdn(tsd)); goto label_return; } @@ -1552,9 +1763,10 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW JEMALLOC_ATTR(nonnull(1)) je_posix_memalign(void **memptr, size_t alignment, size_t size) { - int ret = imemalign(memptr, alignment, size, sizeof(void *)); - JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr, - config_prof), false); + int ret; + + ret = imemalign(memptr, alignment, size, sizeof(void *)); + return (ret); } @@ -1570,114 +1782,45 @@ je_aligned_alloc(size_t alignment, size_t size) ret = NULL; set_errno(err); } - JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof), - false); + return (ret); } -static void * -icalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) -{ - void *p; - - if (tctx == NULL) - return (NULL); - if (usize <= SMALL_MAXCLASS) { - p = icalloc(tsd, LARGE_MINCLASS); - if (p == NULL) - return (NULL); - arena_prof_promoted(p, usize); - } else - p = icalloc(tsd, usize); - - return (p); -} - -JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(tsd_t *tsd, size_t usize) -{ - void *p; - prof_tctx_t *tctx; - - tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); - if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = icalloc_prof_sample(tsd, usize, tctx); - else - p = icalloc(tsd, usize); - if (unlikely(p == NULL)) { - prof_alloc_rollback(tsd, tctx, true); - return (NULL); - } - prof_malloc(p, usize, tctx); - - return (p); -} - JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2) je_calloc(size_t num, size_t size) { void *ret; - tsd_t *tsd; + tsdn_t *tsdn; size_t num_size; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - if (unlikely(malloc_init())) { - num_size = 0; - ret = NULL; - goto label_return; - } - tsd = tsd_fetch(); - num_size = num * size; if (unlikely(num_size == 0)) { if (num == 0 || size == 0) num_size = 1; - else { - ret = NULL; - goto label_return; - } + else + num_size = HUGE_MAXCLASS + 1; /* Trigger OOM. */ /* * Try to avoid division here. We know that it isn't possible to * overflow during multiplication if neither operand uses any of the * most significant half of the bits in a size_t. */ } else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) << - 2))) && (num_size / size != num))) { - /* size_t overflow. */ - ret = NULL; - goto label_return; - } + 2))) && (num_size / size != num))) + num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */ - if (config_prof && opt_prof) { - usize = s2u(num_size); - if (unlikely(usize == 0)) { - ret = NULL; - goto label_return; - } - ret = icalloc_prof(tsd, usize); + if (likely(!malloc_slow)) { + ret = ialloc_body(num_size, true, &tsdn, &usize, false); + ialloc_post_check(ret, tsdn, usize, "calloc", true, false); } else { - if (config_stats || (config_valgrind && unlikely(in_valgrind))) - usize = s2u(num_size); - ret = icalloc(tsd, num_size); + ret = ialloc_body(num_size, true, &tsdn, &usize, true); + ialloc_post_check(ret, tsdn, usize, "calloc", true, true); + UTRACE(0, num_size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, true); } -label_return: - if (unlikely(ret == NULL)) { - if (config_xmalloc && unlikely(opt_xmalloc)) { - malloc_write(": Error in calloc(): out of " - "memory\n"); - abort(); - } - set_errno(ENOMEM); - } - if (config_stats && likely(ret != NULL)) { - assert(usize == isalloc(ret, config_prof)); - *tsd_thread_allocatedp_get(tsd) += usize; - } - UTRACE(0, num_size, ret); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true); return (ret); } @@ -1693,7 +1836,7 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize, p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false); if (p == NULL) return (NULL); - arena_prof_promoted(p, usize); + arena_prof_promoted(tsd_tsdn(tsd), p, usize); } else p = iralloc(tsd, old_ptr, old_usize, usize, 0, false); @@ -1708,7 +1851,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) prof_tctx_t *old_tctx, *tctx; prof_active = prof_active_get_unlocked(); - old_tctx = prof_tctx_get(old_ptr); + old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr); tctx = prof_alloc_prep(tsd, usize, prof_active, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx); @@ -1725,32 +1868,41 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) } JEMALLOC_INLINE_C void -ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) +ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { size_t usize; UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + witness_assert_lockless(tsd_tsdn(tsd)); + assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); if (config_prof && opt_prof) { - usize = isalloc(ptr, config_prof); + usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); prof_free(tsd, ptr, usize); } else if (config_stats || config_valgrind) - usize = isalloc(ptr, config_prof); + usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); if (config_stats) *tsd_thread_deallocatedp_get(tsd) += usize; - if (config_valgrind && unlikely(in_valgrind)) - rzsize = p2rz(ptr); - iqalloc(tsd, ptr, tcache); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); + + if (likely(!slow_path)) + iqalloc(tsd, ptr, tcache, false); + else { + if (config_valgrind && unlikely(in_valgrind)) + rzsize = p2rz(tsd_tsdn(tsd), ptr); + iqalloc(tsd, ptr, tcache, true); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); + } } JEMALLOC_INLINE_C void -isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache) +isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + witness_assert_lockless(tsd_tsdn(tsd)); + assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); @@ -1759,8 +1911,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache) if (config_stats) *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) - rzsize = p2rz(ptr); - isqalloc(tsd, ptr, usize, tcache); + rzsize = p2rz(tsd_tsdn(tsd), ptr); + isqalloc(tsd, ptr, usize, tcache, slow_path); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } @@ -1770,44 +1922,57 @@ JEMALLOC_ALLOC_SIZE(2) je_realloc(void *ptr, size_t size) { void *ret; - tsd_t *tsd JEMALLOC_CC_SILENCE_INIT(NULL); + tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL); size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_usize = 0; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); if (unlikely(size == 0)) { if (ptr != NULL) { + tsd_t *tsd; + /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); tsd = tsd_fetch(); - ifree(tsd, ptr, tcache_get(tsd, false)); + ifree(tsd, ptr, tcache_get(tsd, false), true); return (NULL); } size = 1; } if (likely(ptr != NULL)) { + tsd_t *tsd; + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); tsd = tsd_fetch(); - old_usize = isalloc(ptr, config_prof); - if (config_valgrind && unlikely(in_valgrind)) - old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); + witness_assert_lockless(tsd_tsdn(tsd)); + + old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); + if (config_valgrind && unlikely(in_valgrind)) { + old_rzsize = config_prof ? p2rz(tsd_tsdn(tsd), ptr) : + u2rz(old_usize); + } if (config_prof && opt_prof) { usize = s2u(size); - ret = unlikely(usize == 0) ? NULL : irealloc_prof(tsd, - ptr, old_usize, usize); + ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ? + NULL : irealloc_prof(tsd, ptr, old_usize, usize); } else { if (config_stats || (config_valgrind && unlikely(in_valgrind))) usize = s2u(size); ret = iralloc(tsd, ptr, old_usize, size, 0, false); } + tsdn = tsd_tsdn(tsd); } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - ret = imalloc_body(size, &tsd, &usize); + if (likely(!malloc_slow)) + ret = ialloc_body(size, false, &tsdn, &usize, false); + else + ret = ialloc_body(size, false, &tsdn, &usize, true); + assert(!tsdn_null(tsdn) || ret == NULL); } if (unlikely(ret == NULL)) { @@ -1819,13 +1984,17 @@ je_realloc(void *ptr, size_t size) set_errno(ENOMEM); } if (config_stats && likely(ret != NULL)) { - assert(usize == isalloc(ret, config_prof)); + tsd_t *tsd; + + assert(usize == isalloc(tsdn, ret, config_prof)); + tsd = tsdn_tsd(tsdn); *tsd_thread_allocatedp_get(tsd) += usize; *tsd_thread_deallocatedp_get(tsd) += old_usize; } UTRACE(ptr, size, ret); - JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize, - old_rzsize, true, false); + JEMALLOC_VALGRIND_REALLOC(maybe, tsdn, ret, usize, maybe, ptr, + old_usize, old_rzsize, maybe, false); + witness_assert_lockless(tsdn); return (ret); } @@ -1836,7 +2005,12 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) { tsd_t *tsd = tsd_fetch(); - ifree(tsd, ptr, tcache_get(tsd, false)); + witness_assert_lockless(tsd_tsdn(tsd)); + if (likely(!malloc_slow)) + ifree(tsd, ptr, tcache_get(tsd, false), false); + else + ifree(tsd, ptr, tcache_get(tsd, false), true); + witness_assert_lockless(tsd_tsdn(tsd)); } } @@ -1857,7 +2031,6 @@ je_memalign(size_t alignment, size_t size) void *ret JEMALLOC_CC_SILENCE_INIT(NULL); if (unlikely(imemalign(&ret, alignment, size, 1) != 0)) ret = NULL; - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } #endif @@ -1871,7 +2044,6 @@ je_valloc(size_t size) void *ret JEMALLOC_CC_SILENCE_INIT(NULL); if (unlikely(imemalign(&ret, PAGE, size, 1) != 0)) ret = NULL; - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } #endif @@ -1901,6 +2073,29 @@ JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc; JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = je_memalign; # endif + +#ifdef CPU_COUNT +/* + * To enable static linking with glibc, the libc specific malloc interface must + * be implemented also, so none of glibc's malloc.o functions are added to the + * link. + */ +#define ALIAS(je_fn) __attribute__((alias (#je_fn), used)) +/* To force macro expansion of je_ prefix before stringification. */ +#define PREALIAS(je_fn) ALIAS(je_fn) +void *__libc_malloc(size_t size) PREALIAS(je_malloc); +void __libc_free(void* ptr) PREALIAS(je_free); +void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc); +void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc); +void *__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign); +void *__libc_valloc(size_t size) PREALIAS(je_valloc); +int __posix_memalign(void** r, size_t a, size_t s) + PREALIAS(je_posix_memalign); +#undef PREALIAS +#undef ALIAS + +#endif + #endif /* @@ -1912,7 +2107,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = */ JEMALLOC_ALWAYS_INLINE_C bool -imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, +imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) { @@ -1923,7 +2118,8 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, *alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags); *usize = sa2u(size, *alignment); } - assert(*usize != 0); + if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS)) + return (true); *zero = MALLOCX_ZERO_GET(flags); if ((flags & MALLOCX_TCACHE_MASK) != 0) { if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) @@ -1934,7 +2130,7 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, *tcache = tcache_get(tsd, true); if ((flags & MALLOCX_ARENA_MASK) != 0) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - *arena = arena_get(tsd, arena_ind, true, true); + *arena = arena_get(tsd_tsdn(tsd), arena_ind, true); if (unlikely(*arena == NULL)) return (true); } else @@ -1942,59 +2138,44 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, return (false); } -JEMALLOC_ALWAYS_INLINE_C bool -imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, - size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) -{ - - if (likely(flags == 0)) { - *usize = s2u(size); - assert(*usize != 0); - *alignment = 0; - *zero = false; - *tcache = tcache_get(tsd, true); - *arena = NULL; - return (false); - } else { - return (imallocx_flags_decode_hard(tsd, size, flags, usize, - alignment, zero, tcache, arena)); - } -} - JEMALLOC_ALWAYS_INLINE_C void * -imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, arena_t *arena) +imallocx_flags(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena, bool slow_path) { + szind_t ind; if (unlikely(alignment != 0)) - return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); - if (unlikely(zero)) - return (icalloct(tsd, usize, tcache, arena)); - return (imalloct(tsd, usize, tcache, arena)); + return (ipalloct(tsdn, usize, alignment, zero, tcache, arena)); + ind = size2index(usize); + assert(ind < NSIZES); + return (iallocztm(tsdn, usize, ind, zero, tcache, false, arena, + slow_path)); } static void * -imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, arena_t *arena) +imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena, bool slow_path) { void *p; if (usize <= SMALL_MAXCLASS) { assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); - p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache, - arena); + p = imallocx_flags(tsdn, LARGE_MINCLASS, alignment, zero, + tcache, arena, slow_path); if (p == NULL) return (NULL); - arena_prof_promoted(p, usize); - } else - p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena); + arena_prof_promoted(tsdn, p, usize); + } else { + p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena, + slow_path); + } return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) +imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path) { void *p; size_t alignment; @@ -2007,25 +2188,27 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) &zero, &tcache, &arena))) return (NULL); tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true); - if (likely((uintptr_t)tctx == (uintptr_t)1U)) - p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); - else if ((uintptr_t)tctx > (uintptr_t)1U) { - p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache, - arena); + if (likely((uintptr_t)tctx == (uintptr_t)1U)) { + p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero, + tcache, arena, slow_path); + } else if ((uintptr_t)tctx > (uintptr_t)1U) { + p = imallocx_prof_sample(tsd_tsdn(tsd), *usize, alignment, zero, + tcache, arena, slow_path); } else p = NULL; if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); } - prof_malloc(p, *usize, tctx); + prof_malloc(tsd_tsdn(tsd), p, *usize, tctx); assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) +imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, + bool slow_path) { void *p; size_t alignment; @@ -2033,56 +2216,78 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) tcache_t *tcache; arena_t *arena; - if (likely(flags == 0)) { - if (config_stats || (config_valgrind && unlikely(in_valgrind))) - *usize = s2u(size); - return (imalloc(tsd, size)); - } - - if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, - &alignment, &zero, &tcache, &arena))) + if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment, + &zero, &tcache, &arena))) return (NULL); - p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); + p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero, tcache, + arena, slow_path); assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); return (p); } +/* This function guarantees that *tsdn is non-NULL on success. */ +JEMALLOC_ALWAYS_INLINE_C void * +imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize, + bool slow_path) +{ + tsd_t *tsd; + + if (slow_path && unlikely(malloc_init())) { + *tsdn = NULL; + return (NULL); + } + + tsd = tsd_fetch(); + *tsdn = tsd_tsdn(tsd); + witness_assert_lockless(tsd_tsdn(tsd)); + + if (likely(flags == 0)) { + szind_t ind = size2index(size); + if (unlikely(ind >= NSIZES)) + return (NULL); + if (config_stats || (config_prof && opt_prof) || (slow_path && + config_valgrind && unlikely(in_valgrind))) { + *usize = index2size(ind); + assert(*usize > 0 && *usize <= HUGE_MAXCLASS); + } + + if (config_prof && opt_prof) { + return (ialloc_prof(tsd, *usize, ind, false, + slow_path)); + } + + return (ialloc(tsd, size, ind, false, slow_path)); + } + + if (config_prof && opt_prof) + return (imallocx_prof(tsd, size, flags, usize, slow_path)); + + return (imallocx_no_prof(tsd, size, flags, usize, slow_path)); +} + JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_mallocx(size_t size, int flags) { - tsd_t *tsd; + tsdn_t *tsdn; void *p; size_t usize; assert(size != 0); - if (unlikely(malloc_init())) - goto label_oom; - tsd = tsd_fetch(); - - if (config_prof && opt_prof) - p = imallocx_prof(tsd, size, flags, &usize); - else - p = imallocx_no_prof(tsd, size, flags, &usize); - if (unlikely(p == NULL)) - goto label_oom; - - if (config_stats) { - assert(usize == isalloc(p, config_prof)); - *tsd_thread_allocatedp_get(tsd) += usize; + if (likely(!malloc_slow)) { + p = imallocx_body(size, flags, &tsdn, &usize, false); + ialloc_post_check(p, tsdn, usize, "mallocx", false, false); + } else { + p = imallocx_body(size, flags, &tsdn, &usize, true); + ialloc_post_check(p, tsdn, usize, "mallocx", false, true); + UTRACE(0, size, p); + JEMALLOC_VALGRIND_MALLOC(p != NULL, tsdn, p, usize, + MALLOCX_ZERO_GET(flags)); } - UTRACE(0, size, p); - JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags)); + return (p); -label_oom: - if (config_xmalloc && unlikely(opt_xmalloc)) { - malloc_write(": Error in mallocx(): out of memory\n"); - abort(); - } - UTRACE(0, size, 0); - return (NULL); } static void * @@ -2099,7 +2304,7 @@ irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, zero, tcache, arena); if (p == NULL) return (NULL); - arena_prof_promoted(p, usize); + arena_prof_promoted(tsd_tsdn(tsd), p, usize); } else { p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero, tcache, arena); @@ -2118,8 +2323,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, prof_tctx_t *old_tctx, *tctx; prof_active = prof_active_get_unlocked(); - old_tctx = prof_tctx_get(old_ptr); - tctx = prof_alloc_prep(tsd, *usize, prof_active, true); + old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr); + tctx = prof_alloc_prep(tsd, *usize, prof_active, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize, alignment, zero, tcache, arena, tctx); @@ -2128,7 +2333,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, tcache, arena); } if (unlikely(p == NULL)) { - prof_alloc_rollback(tsd, tctx, true); + prof_alloc_rollback(tsd, tctx, false); return (NULL); } @@ -2141,9 +2346,9 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, * be the same as the current usize because of in-place large * reallocation. Therefore, query the actual value of usize. */ - *usize = isalloc(p, config_prof); + *usize = isalloc(tsd_tsdn(tsd), p, config_prof); } - prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr, + prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr, old_usize, old_tctx); return (p); @@ -2169,10 +2374,11 @@ je_rallocx(void *ptr, size_t size, int flags) assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); tsd = tsd_fetch(); + witness_assert_lockless(tsd_tsdn(tsd)); if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena = arena_get(tsd, arena_ind, true, true); + arena = arena_get(tsd_tsdn(tsd), arena_ind, true); if (unlikely(arena == NULL)) goto label_oom; } else @@ -2186,13 +2392,14 @@ je_rallocx(void *ptr, size_t size, int flags) } else tcache = tcache_get(tsd, true); - old_usize = isalloc(ptr, config_prof); + old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - assert(usize != 0); + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) + goto label_oom; p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize, zero, tcache, arena); if (unlikely(p == NULL)) @@ -2203,7 +2410,7 @@ je_rallocx(void *ptr, size_t size, int flags) if (unlikely(p == NULL)) goto label_oom; if (config_stats || (config_valgrind && unlikely(in_valgrind))) - usize = isalloc(p, config_prof); + usize = isalloc(tsd_tsdn(tsd), p, config_prof); } assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); @@ -2212,8 +2419,9 @@ je_rallocx(void *ptr, size_t size, int flags) *tsd_thread_deallocatedp_get(tsd) += old_usize; } UTRACE(ptr, size, p); - JEMALLOC_VALGRIND_REALLOC(true, p, usize, false, ptr, old_usize, - old_rzsize, false, zero); + JEMALLOC_VALGRIND_REALLOC(maybe, tsd_tsdn(tsd), p, usize, no, ptr, + old_usize, old_rzsize, no, zero); + witness_assert_lockless(tsd_tsdn(tsd)); return (p); label_oom: if (config_xmalloc && unlikely(opt_xmalloc)) { @@ -2221,31 +2429,33 @@ label_oom: abort(); } UTRACE(ptr, size, 0); + witness_assert_lockless(tsd_tsdn(tsd)); return (NULL); } JEMALLOC_ALWAYS_INLINE_C size_t -ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, bool zero) +ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size, + size_t extra, size_t alignment, bool zero) { size_t usize; - if (ixalloc(ptr, old_usize, size, extra, alignment, zero)) + if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero)) return (old_usize); - usize = isalloc(ptr, config_prof); + usize = isalloc(tsdn, ptr, config_prof); return (usize); } static size_t -ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, bool zero, prof_tctx_t *tctx) +ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size, + size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx) { size_t usize; if (tctx == NULL) return (old_usize); - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero); + usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment, + zero); return (usize); } @@ -2259,23 +2469,36 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, prof_tctx_t *old_tctx, *tctx; prof_active = prof_active_get_unlocked(); - old_tctx = prof_tctx_get(ptr); + old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr); /* * usize isn't knowable before ixalloc() returns when extra is non-zero. * Therefore, compute its maximum possible value and use that in * prof_alloc_prep() to decide whether to capture a backtrace. * prof_realloc() will use the actual usize to decide whether to sample. */ - usize_max = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, - alignment); - assert(usize_max != 0); - tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); - if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { - usize = ixallocx_prof_sample(ptr, old_usize, size, extra, - alignment, zero, tctx); + if (alignment == 0) { + usize_max = s2u(size+extra); + assert(usize_max > 0 && usize_max <= HUGE_MAXCLASS); } else { - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero); + usize_max = sa2u(size+extra, alignment); + if (unlikely(usize_max == 0 || usize_max > HUGE_MAXCLASS)) { + /* + * usize_max is out of range, and chances are that + * allocation will fail, but use the maximum possible + * value and carry on with prof_alloc_prep(), just in + * case allocation succeeds. + */ + usize_max = HUGE_MAXCLASS; + } + } + tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); + + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { + usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize, + size, extra, alignment, zero, tctx); + } else { + usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size, + extra, alignment, zero); } if (usize == old_usize) { prof_alloc_rollback(tsd, tctx, false); @@ -2302,18 +2525,25 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); tsd = tsd_fetch(); + witness_assert_lockless(tsd_tsdn(tsd)); - old_usize = isalloc(ptr, config_prof); + old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); - /* Clamp extra if necessary to avoid (size + extra) overflow. */ - if (unlikely(size + extra > HUGE_MAXCLASS)) { - /* Check for size overflow. */ - if (unlikely(size > HUGE_MAXCLASS)) { - usize = old_usize; - goto label_not_resized; - } - extra = HUGE_MAXCLASS - size; + /* + * The API explicitly absolves itself of protecting against (size + + * extra) numerical overflow, but we may need to clamp extra to avoid + * exceeding HUGE_MAXCLASS. + * + * Ordinarily, size limit checking is handled deeper down, but here we + * have to check as part of (size + extra) clamping, since we need the + * clamped value in the above helper functions. + */ + if (unlikely(size > HUGE_MAXCLASS)) { + usize = old_usize; + goto label_not_resized; } + if (unlikely(HUGE_MAXCLASS - size < extra)) + extra = HUGE_MAXCLASS - size; if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); @@ -2322,8 +2552,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) usize = ixallocx_prof(tsd, ptr, old_usize, size, extra, alignment, zero); } else { - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero); + usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size, + extra, alignment, zero); } if (unlikely(usize == old_usize)) goto label_not_resized; @@ -2332,10 +2562,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) *tsd_thread_allocatedp_get(tsd) += usize; *tsd_thread_deallocatedp_get(tsd) += old_usize; } - JEMALLOC_VALGRIND_REALLOC(false, ptr, usize, false, ptr, old_usize, - old_rzsize, false, zero); + JEMALLOC_VALGRIND_REALLOC(no, tsd_tsdn(tsd), ptr, usize, no, ptr, + old_usize, old_rzsize, no, zero); label_not_resized: UTRACE(ptr, size, ptr); + witness_assert_lockless(tsd_tsdn(tsd)); return (usize); } @@ -2344,15 +2575,20 @@ JEMALLOC_ATTR(pure) je_sallocx(const void *ptr, int flags) { size_t usize; + tsdn_t *tsdn; assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); - if (config_ivsalloc) - usize = ivsalloc(ptr, config_prof); - else - usize = isalloc(ptr, config_prof); + tsdn = tsdn_fetch(); + witness_assert_lockless(tsdn); + if (config_ivsalloc) + usize = ivsalloc(tsdn, ptr, config_prof); + else + usize = isalloc(tsdn, ptr, config_prof); + + witness_assert_lockless(tsdn); return (usize); } @@ -2366,6 +2602,7 @@ je_dallocx(void *ptr, int flags) assert(malloc_initialized() || IS_INITIALIZER); tsd = tsd_fetch(); + witness_assert_lockless(tsd_tsdn(tsd)); if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) tcache = NULL; @@ -2375,19 +2612,25 @@ je_dallocx(void *ptr, int flags) tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - ifree(tsd_fetch(), ptr, tcache); + if (likely(!malloc_slow)) + ifree(tsd, ptr, tcache, false); + else + ifree(tsd, ptr, tcache, true); + witness_assert_lockless(tsd_tsdn(tsd)); } JEMALLOC_ALWAYS_INLINE_C size_t -inallocx(size_t size, int flags) +inallocx(tsdn_t *tsdn, size_t size, int flags) { size_t usize; + witness_assert_lockless(tsdn); + if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) usize = s2u(size); else usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags)); - assert(usize != 0); + witness_assert_lockless(tsdn); return (usize); } @@ -2400,10 +2643,11 @@ je_sdallocx(void *ptr, size_t size, int flags) assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); - usize = inallocx(size, flags); - assert(usize == isalloc(ptr, config_prof)); - tsd = tsd_fetch(); + usize = inallocx(tsd_tsdn(tsd), size, flags); + assert(usize == isalloc(tsd_tsdn(tsd), ptr, config_prof)); + + witness_assert_lockless(tsd_tsdn(tsd)); if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) tcache = NULL; @@ -2413,75 +2657,116 @@ je_sdallocx(void *ptr, size_t size, int flags) tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - isfree(tsd, ptr, usize, tcache); + if (likely(!malloc_slow)) + isfree(tsd, ptr, usize, tcache, false); + else + isfree(tsd, ptr, usize, tcache, true); + witness_assert_lockless(tsd_tsdn(tsd)); } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW JEMALLOC_ATTR(pure) je_nallocx(size_t size, int flags) { + size_t usize; + tsdn_t *tsdn; assert(size != 0); if (unlikely(malloc_init())) return (0); - return (inallocx(size, flags)); + tsdn = tsdn_fetch(); + witness_assert_lockless(tsdn); + + usize = inallocx(tsdn, size, flags); + if (unlikely(usize > HUGE_MAXCLASS)) + return (0); + + witness_assert_lockless(tsdn); + return (usize); } JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + tsd_t *tsd; if (unlikely(malloc_init())) return (EAGAIN); - return (ctl_byname(name, oldp, oldlenp, newp, newlen)); + tsd = tsd_fetch(); + witness_assert_lockless(tsd_tsdn(tsd)); + ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen); + witness_assert_lockless(tsd_tsdn(tsd)); + return (ret); } JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { + int ret; + tsdn_t *tsdn; if (unlikely(malloc_init())) return (EAGAIN); - return (ctl_nametomib(name, mibp, miblenp)); + tsdn = tsdn_fetch(); + witness_assert_lockless(tsdn); + ret = ctl_nametomib(tsdn, name, mibp, miblenp); + witness_assert_lockless(tsdn); + return (ret); } JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + tsd_t *tsd; if (unlikely(malloc_init())) return (EAGAIN); - return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); + tsd = tsd_fetch(); + witness_assert_lockless(tsd_tsdn(tsd)); + ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen); + witness_assert_lockless(tsd_tsdn(tsd)); + return (ret); } JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) { + tsdn_t *tsdn; + tsdn = tsdn_fetch(); + witness_assert_lockless(tsdn); stats_print(write_cb, cbopaque, opts); + witness_assert_lockless(tsdn); } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { size_t ret; + tsdn_t *tsdn; assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); - if (config_ivsalloc) - ret = ivsalloc(ptr, config_prof); - else - ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof); + tsdn = tsdn_fetch(); + witness_assert_lockless(tsdn); + if (config_ivsalloc) + ret = ivsalloc(tsdn, ptr, config_prof); + else + ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr, config_prof); + + witness_assert_lockless(tsdn); return (ret); } @@ -2507,6 +2792,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) * to trigger the deadlock described above, but doing so would involve forking * via a library constructor that runs before jemalloc's runs. */ +#ifndef JEMALLOC_JET JEMALLOC_ATTR(constructor) static void jemalloc_constructor(void) @@ -2514,6 +2800,7 @@ jemalloc_constructor(void) malloc_init(); } +#endif #ifndef JEMALLOC_MUTEX_INIT_CB void @@ -2523,7 +2810,9 @@ JEMALLOC_EXPORT void _malloc_prefork(void) #endif { - unsigned i; + tsd_t *tsd; + unsigned i, j, narenas; + arena_t *arena; #ifdef JEMALLOC_MUTEX_INIT_CB if (!malloc_initialized()) @@ -2531,16 +2820,40 @@ _malloc_prefork(void) #endif assert(malloc_initialized()); + tsd = tsd_fetch(); + + narenas = narenas_total_get(); + + witness_prefork(tsd); /* Acquire all mutexes in a safe order. */ - ctl_prefork(); - prof_prefork(); - malloc_mutex_prefork(&arenas_lock); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_prefork(arenas[i]); + ctl_prefork(tsd_tsdn(tsd)); + malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock); + prof_prefork0(tsd_tsdn(tsd)); + for (i = 0; i < 3; i++) { + for (j = 0; j < narenas; j++) { + if ((arena = arena_get(tsd_tsdn(tsd), j, false)) != + NULL) { + switch (i) { + case 0: + arena_prefork0(tsd_tsdn(tsd), arena); + break; + case 1: + arena_prefork1(tsd_tsdn(tsd), arena); + break; + case 2: + arena_prefork2(tsd_tsdn(tsd), arena); + break; + default: not_reached(); + } + } + } } - chunk_prefork(); - base_prefork(); + base_prefork(tsd_tsdn(tsd)); + for (i = 0; i < narenas; i++) { + if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) + arena_prefork3(tsd_tsdn(tsd), arena); + } + prof_prefork1(tsd_tsdn(tsd)); } #ifndef JEMALLOC_MUTEX_INIT_CB @@ -2551,7 +2864,8 @@ JEMALLOC_EXPORT void _malloc_postfork(void) #endif { - unsigned i; + tsd_t *tsd; + unsigned i, narenas; #ifdef JEMALLOC_MUTEX_INIT_CB if (!malloc_initialized()) @@ -2559,35 +2873,44 @@ _malloc_postfork(void) #endif assert(malloc_initialized()); + tsd = tsd_fetch(); + + witness_postfork_parent(tsd); /* Release all mutexes, now that fork() has completed. */ - base_postfork_parent(); - chunk_postfork_parent(); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_postfork_parent(arenas[i]); + base_postfork_parent(tsd_tsdn(tsd)); + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { + arena_t *arena; + + if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) + arena_postfork_parent(tsd_tsdn(tsd), arena); } - malloc_mutex_postfork_parent(&arenas_lock); - prof_postfork_parent(); - ctl_postfork_parent(); + prof_postfork_parent(tsd_tsdn(tsd)); + malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock); + ctl_postfork_parent(tsd_tsdn(tsd)); } void jemalloc_postfork_child(void) { - unsigned i; + tsd_t *tsd; + unsigned i, narenas; assert(malloc_initialized()); + tsd = tsd_fetch(); + + witness_postfork_child(tsd); /* Release all mutexes, now that fork() has completed. */ - base_postfork_child(); - chunk_postfork_child(); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_postfork_child(arenas[i]); + base_postfork_child(tsd_tsdn(tsd)); + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { + arena_t *arena; + + if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) + arena_postfork_child(tsd_tsdn(tsd), arena); } - malloc_mutex_postfork_child(&arenas_lock); - prof_postfork_child(); - ctl_postfork_child(); + prof_postfork_child(tsd_tsdn(tsd)); + malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock); + ctl_postfork_child(tsd_tsdn(tsd)); } /******************************************************************************/ @@ -2607,9 +2930,10 @@ je_get_defrag_hint(void* ptr, int *bin_util, int *run_util) { if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { /* indication that this is not a LARGE alloc */ arena_t *arena = extent_node_arena_get(&chunk->node); size_t rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); - arena_run_t *run = &arena_miscelm_get(chunk, rpages_ind)->run; + arena_run_t *run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run; arena_bin_t *bin = &arena->bins[run->binind]; - malloc_mutex_lock(&bin->lock); + tsd_t *tsd = tsd_fetch(); + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); /* runs that are in the same chunk in as the current chunk, are likely to be the next currun */ if (chunk != (arena_chunk_t *)CHUNK_ADDR2BASE(bin->runcur)) { arena_bin_info_t *bin_info = &arena_bin_info[run->binind]; @@ -2618,7 +2942,7 @@ je_get_defrag_hint(void* ptr, int *bin_util, int *run_util) { *run_util = ((bin_info->nregs - run->nfree)<<16) / bin_info->nregs; defrag = 1; } - malloc_mutex_unlock(&bin->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); } } return defrag; diff --git a/deps/jemalloc/src/mutex.c b/deps/jemalloc/src/mutex.c index 2d47af976..6333e73d6 100644 --- a/deps/jemalloc/src/mutex.c +++ b/deps/jemalloc/src/mutex.c @@ -69,7 +69,7 @@ JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, #endif bool -malloc_mutex_init(malloc_mutex_t *mutex) +malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank) { #ifdef _WIN32 @@ -80,6 +80,8 @@ malloc_mutex_init(malloc_mutex_t *mutex) _CRT_SPINCOUNT)) return (true); # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + mutex->lock = OS_UNFAIR_LOCK_INIT; #elif (defined(JEMALLOC_OSSPIN)) mutex->lock = 0; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -103,31 +105,34 @@ malloc_mutex_init(malloc_mutex_t *mutex) } pthread_mutexattr_destroy(&attr); #endif + if (config_debug) + witness_init(&mutex->witness, name, rank, NULL); return (false); } void -malloc_mutex_prefork(malloc_mutex_t *mutex) +malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex) { - malloc_mutex_lock(mutex); + malloc_mutex_lock(tsdn, mutex); } void -malloc_mutex_postfork_parent(malloc_mutex_t *mutex) +malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex) { - malloc_mutex_unlock(mutex); + malloc_mutex_unlock(tsdn, mutex); } void -malloc_mutex_postfork_child(malloc_mutex_t *mutex) +malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) { #ifdef JEMALLOC_MUTEX_INIT_CB - malloc_mutex_unlock(mutex); + malloc_mutex_unlock(tsdn, mutex); #else - if (malloc_mutex_init(mutex)) { + if (malloc_mutex_init(mutex, mutex->witness.name, + mutex->witness.rank)) { malloc_printf(": Error re-initializing mutex in " "child\n"); if (opt_abort) @@ -137,7 +142,7 @@ malloc_mutex_postfork_child(malloc_mutex_t *mutex) } bool -mutex_boot(void) +malloc_mutex_boot(void) { #ifdef JEMALLOC_MUTEX_INIT_CB diff --git a/deps/jemalloc/src/nstime.c b/deps/jemalloc/src/nstime.c new file mode 100644 index 000000000..0948e29fa --- /dev/null +++ b/deps/jemalloc/src/nstime.c @@ -0,0 +1,194 @@ +#include "jemalloc/internal/jemalloc_internal.h" + +#define BILLION UINT64_C(1000000000) + +void +nstime_init(nstime_t *time, uint64_t ns) +{ + + time->ns = ns; +} + +void +nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec) +{ + + time->ns = sec * BILLION + nsec; +} + +uint64_t +nstime_ns(const nstime_t *time) +{ + + return (time->ns); +} + +uint64_t +nstime_sec(const nstime_t *time) +{ + + return (time->ns / BILLION); +} + +uint64_t +nstime_nsec(const nstime_t *time) +{ + + return (time->ns % BILLION); +} + +void +nstime_copy(nstime_t *time, const nstime_t *source) +{ + + *time = *source; +} + +int +nstime_compare(const nstime_t *a, const nstime_t *b) +{ + + return ((a->ns > b->ns) - (a->ns < b->ns)); +} + +void +nstime_add(nstime_t *time, const nstime_t *addend) +{ + + assert(UINT64_MAX - time->ns >= addend->ns); + + time->ns += addend->ns; +} + +void +nstime_subtract(nstime_t *time, const nstime_t *subtrahend) +{ + + assert(nstime_compare(time, subtrahend) >= 0); + + time->ns -= subtrahend->ns; +} + +void +nstime_imultiply(nstime_t *time, uint64_t multiplier) +{ + + assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) << + 2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns)); + + time->ns *= multiplier; +} + +void +nstime_idivide(nstime_t *time, uint64_t divisor) +{ + + assert(divisor != 0); + + time->ns /= divisor; +} + +uint64_t +nstime_divide(const nstime_t *time, const nstime_t *divisor) +{ + + assert(divisor->ns != 0); + + return (time->ns / divisor->ns); +} + +#ifdef _WIN32 +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + FILETIME ft; + uint64_t ticks_100ns; + + GetSystemTimeAsFileTime(&ft); + ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime; + + nstime_init(time, ticks_100ns * 100); +} +#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); + nstime_init2(time, ts.tv_sec, ts.tv_nsec); +} +#elif JEMALLOC_HAVE_CLOCK_MONOTONIC +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + nstime_init2(time, ts.tv_sec, ts.tv_nsec); +} +#elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + + nstime_init(time, mach_absolute_time()); +} +#else +# define NSTIME_MONOTONIC false +static void +nstime_get(nstime_t *time) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000); +} +#endif + +#ifdef JEMALLOC_JET +#undef nstime_monotonic +#define nstime_monotonic JEMALLOC_N(n_nstime_monotonic) +#endif +bool +nstime_monotonic(void) +{ + + return (NSTIME_MONOTONIC); +#undef NSTIME_MONOTONIC +} +#ifdef JEMALLOC_JET +#undef nstime_monotonic +#define nstime_monotonic JEMALLOC_N(nstime_monotonic) +nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic); +#endif + +#ifdef JEMALLOC_JET +#undef nstime_update +#define nstime_update JEMALLOC_N(n_nstime_update) +#endif +bool +nstime_update(nstime_t *time) +{ + nstime_t old_time; + + nstime_copy(&old_time, time); + nstime_get(time); + + /* Handle non-monotonic clocks. */ + if (unlikely(nstime_compare(&old_time, time) > 0)) { + nstime_copy(time, &old_time); + return (true); + } + + return (false); +} +#ifdef JEMALLOC_JET +#undef nstime_update +#define nstime_update JEMALLOC_N(nstime_update) +nstime_update_t *nstime_update = JEMALLOC_N(n_nstime_update); +#endif diff --git a/deps/jemalloc/src/pages.c b/deps/jemalloc/src/pages.c index 83a167f67..5f0c9669d 100644 --- a/deps/jemalloc/src/pages.c +++ b/deps/jemalloc/src/pages.c @@ -1,29 +1,49 @@ #define JEMALLOC_PAGES_C_ #include "jemalloc/internal/jemalloc_internal.h" +#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT +#include +#endif + +/******************************************************************************/ +/* Data. */ + +#ifndef _WIN32 +# define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) +# define PAGES_PROT_DECOMMIT (PROT_NONE) +static int mmap_flags; +#endif +static bool os_overcommits; + /******************************************************************************/ void * -pages_map(void *addr, size_t size) +pages_map(void *addr, size_t size, bool *commit) { void *ret; assert(size != 0); + if (os_overcommits) + *commit = true; + #ifdef _WIN32 /* * If VirtualAlloc can't allocate at the given address when one is * given, it fails and returns NULL. */ - ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE, + ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), PAGE_READWRITE); #else /* * We don't use MAP_FIXED here, because it can cause the *replacement* * of existing mappings, and we only want to create new mappings. */ - ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, - -1, 0); + { + int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; + + ret = mmap(addr, size, prot, mmap_flags, -1, 0); + } assert(ret != NULL); if (ret == MAP_FAILED) @@ -67,7 +87,8 @@ pages_unmap(void *addr, size_t size) } void * -pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) +pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, + bool *commit) { void *ret = (void *)((uintptr_t)addr + leadsize); @@ -77,7 +98,7 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) void *new_addr; pages_unmap(addr, alloc_size); - new_addr = pages_map(ret, size); + new_addr = pages_map(ret, size, commit); if (new_addr == ret) return (ret); if (new_addr) @@ -101,17 +122,17 @@ static bool pages_commit_impl(void *addr, size_t size, bool commit) { -#ifndef _WIN32 - /* - * The following decommit/commit implementation is functional, but - * always disabled because it doesn't add value beyong improved - * debugging (at the cost of extra system calls) on systems that - * overcommit. - */ - if (false) { - int prot = commit ? (PROT_READ | PROT_WRITE) : PROT_NONE; - void *result = mmap(addr, size, prot, MAP_PRIVATE | MAP_ANON | - MAP_FIXED, -1, 0); + if (os_overcommits) + return (true); + +#ifdef _WIN32 + return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, + PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); +#else + { + int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; + void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, + -1, 0); if (result == MAP_FAILED) return (true); if (result != addr) { @@ -125,7 +146,6 @@ pages_commit_impl(void *addr, size_t size, bool commit) return (false); } #endif - return (true); } bool @@ -150,15 +170,16 @@ pages_purge(void *addr, size_t size) #ifdef _WIN32 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); unzeroed = true; -#elif defined(JEMALLOC_HAVE_MADVISE) -# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED -# define JEMALLOC_MADV_PURGE MADV_DONTNEED -# define JEMALLOC_MADV_ZEROS true -# elif defined(JEMALLOC_PURGE_MADVISE_FREE) +#elif (defined(JEMALLOC_PURGE_MADVISE_FREE) || \ + defined(JEMALLOC_PURGE_MADVISE_DONTNEED)) +# if defined(JEMALLOC_PURGE_MADVISE_FREE) # define JEMALLOC_MADV_PURGE MADV_FREE # define JEMALLOC_MADV_ZEROS false +# elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +# define JEMALLOC_MADV_ZEROS true # else -# error "No madvise(2) flag defined for purging unused dirty pages." +# error No madvise(2) flag defined for purging unused dirty pages # endif int err = madvise(addr, size, JEMALLOC_MADV_PURGE); unzeroed = (!JEMALLOC_MADV_ZEROS || err != 0); @@ -171,3 +192,111 @@ pages_purge(void *addr, size_t size) return (unzeroed); } +bool +pages_huge(void *addr, size_t size) +{ + + assert(PAGE_ADDR2BASE(addr) == addr); + assert(PAGE_CEILING(size) == size); + +#ifdef JEMALLOC_THP + return (madvise(addr, size, MADV_HUGEPAGE) != 0); +#else + return (false); +#endif +} + +bool +pages_nohuge(void *addr, size_t size) +{ + + assert(PAGE_ADDR2BASE(addr) == addr); + assert(PAGE_CEILING(size) == size); + +#ifdef JEMALLOC_THP + return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); +#else + return (false); +#endif +} + +#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT +static bool +os_overcommits_sysctl(void) +{ + int vm_overcommit; + size_t sz; + + sz = sizeof(vm_overcommit); + if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) + return (false); /* Error. */ + + return ((vm_overcommit & 0x3) == 0); +} +#endif + +#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY +/* + * Use syscall(2) rather than {open,read,close}(2) when possible to avoid + * reentry during bootstrapping if another library has interposed system call + * wrappers. + */ +static bool +os_overcommits_proc(void) +{ + int fd; + char buf[1]; + ssize_t nread; + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) + fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); +#else + fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); +#endif + if (fd == -1) + return (false); /* Error. */ + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) + nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf)); +#else + nread = read(fd, &buf, sizeof(buf)); +#endif + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) + syscall(SYS_close, fd); +#else + close(fd); +#endif + + if (nread < 1) + return (false); /* Error. */ + /* + * /proc/sys/vm/overcommit_memory meanings: + * 0: Heuristic overcommit. + * 1: Always overcommit. + * 2: Never overcommit. + */ + return (buf[0] == '0' || buf[0] == '1'); +} +#endif + +void +pages_boot(void) +{ + +#ifndef _WIN32 + mmap_flags = MAP_PRIVATE | MAP_ANON; +#endif + +#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT + os_overcommits = os_overcommits_sysctl(); +#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) + os_overcommits = os_overcommits_proc(); +# ifdef MAP_NORESERVE + if (os_overcommits) + mmap_flags |= MAP_NORESERVE; +# endif +#else + os_overcommits = false; +#endif +} diff --git a/deps/jemalloc/src/prng.c b/deps/jemalloc/src/prng.c new file mode 100644 index 000000000..76646a2a4 --- /dev/null +++ b/deps/jemalloc/src/prng.c @@ -0,0 +1,2 @@ +#define JEMALLOC_PRNG_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/deps/jemalloc/src/prof.c b/deps/jemalloc/src/prof.c index 5d2b9598f..c89dade1f 100644 --- a/deps/jemalloc/src/prof.c +++ b/deps/jemalloc/src/prof.c @@ -109,7 +109,7 @@ static char prof_dump_buf[ 1 #endif ]; -static unsigned prof_dump_buf_end; +static size_t prof_dump_buf_end; static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ @@ -121,13 +121,13 @@ static bool prof_booted = false; * definition. */ -static bool prof_tctx_should_destroy(prof_tctx_t *tctx); +static bool prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx); static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); -static bool prof_tdata_should_destroy(prof_tdata_t *tdata, +static bool prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached); static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached); -static char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name); +static char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name); /******************************************************************************/ /* Red-black trees. */ @@ -213,22 +213,23 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) } if ((uintptr_t)tctx > (uintptr_t)1U) { - malloc_mutex_lock(tctx->tdata->lock); + malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); tctx->prepared = false; - if (prof_tctx_should_destroy(tctx)) + if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) prof_tctx_destroy(tsd, tctx); else - malloc_mutex_unlock(tctx->tdata->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock); } } void -prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) +prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx) { - prof_tctx_set(ptr, usize, tctx); + prof_tctx_set(tsdn, ptr, usize, tctx); - malloc_mutex_lock(tctx->tdata->lock); + malloc_mutex_lock(tsdn, tctx->tdata->lock); tctx->cnts.curobjs++; tctx->cnts.curbytes += usize; if (opt_prof_accum) { @@ -236,23 +237,23 @@ prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) tctx->cnts.accumbytes += usize; } tctx->prepared = false; - malloc_mutex_unlock(tctx->tdata->lock); + malloc_mutex_unlock(tsdn, tctx->tdata->lock); } void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { - malloc_mutex_lock(tctx->tdata->lock); + malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); assert(tctx->cnts.curobjs > 0); assert(tctx->cnts.curbytes >= usize); tctx->cnts.curobjs--; tctx->cnts.curbytes -= usize; - if (prof_tctx_should_destroy(tctx)) + if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) prof_tctx_destroy(tsd, tctx); else - malloc_mutex_unlock(tctx->tdata->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock); } void @@ -277,7 +278,7 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata) tdata->enq = true; } - malloc_mutex_lock(&bt2gctx_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx); } JEMALLOC_INLINE_C void @@ -287,7 +288,7 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata) cassert(config_prof); assert(tdata == prof_tdata_get(tsd, false)); - malloc_mutex_unlock(&bt2gctx_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx); if (tdata != NULL) { bool idump, gdump; @@ -300,9 +301,9 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata) tdata->enq_gdump = false; if (idump) - prof_idump(); + prof_idump(tsd_tsdn(tsd)); if (gdump) - prof_gdump(); + prof_gdump(tsd_tsdn(tsd)); } } @@ -546,14 +547,15 @@ prof_tdata_mutex_choose(uint64_t thr_uid) } static prof_gctx_t * -prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) +prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) { /* * Create a single allocation that has space for vec of length bt->len. */ - prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t, - vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true), - true, NULL); + size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *)); + prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size, + size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true), + true); if (gctx == NULL) return (NULL); gctx->lock = prof_gctx_mutex_choose(); @@ -585,7 +587,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, * into this function. */ prof_enter(tsd, tdata_self); - malloc_mutex_lock(gctx->lock); + malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); assert(gctx->nlimbo != 0); if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { /* Remove gctx from bt2gctx. */ @@ -593,24 +595,25 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, not_reached(); prof_leave(tsd, tdata_self); /* Destroy gctx. */ - malloc_mutex_unlock(gctx->lock); - idalloctm(tsd, gctx, tcache_get(tsd, false), true); + malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); + idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true); } else { /* * Compensate for increment in prof_tctx_destroy() or * prof_lookup(). */ gctx->nlimbo--; - malloc_mutex_unlock(gctx->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); prof_leave(tsd, tdata_self); } } -/* tctx->tdata->lock must be held. */ static bool -prof_tctx_should_destroy(prof_tctx_t *tctx) +prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) { + malloc_mutex_assert_owner(tsdn, tctx->tdata->lock); + if (opt_prof_accum) return (false); if (tctx->cnts.curobjs != 0) @@ -633,7 +636,6 @@ prof_gctx_should_destroy(prof_gctx_t *gctx) return (true); } -/* tctx->tdata->lock is held upon entry, and released before return. */ static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) { @@ -641,6 +643,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) prof_gctx_t *gctx = tctx->gctx; bool destroy_tdata, destroy_tctx, destroy_gctx; + malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock); + assert(tctx->cnts.curobjs == 0); assert(tctx->cnts.curbytes == 0); assert(!opt_prof_accum); @@ -648,10 +652,10 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) assert(tctx->cnts.accumbytes == 0); ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); - destroy_tdata = prof_tdata_should_destroy(tdata, false); - malloc_mutex_unlock(tdata->lock); + destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false); + malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); - malloc_mutex_lock(gctx->lock); + malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); switch (tctx->state) { case prof_tctx_state_nominal: tctx_tree_remove(&gctx->tctxs, tctx); @@ -691,17 +695,19 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) destroy_tctx = false; destroy_gctx = false; } - malloc_mutex_unlock(gctx->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); if (destroy_gctx) { prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx, tdata); } + malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock); + if (destroy_tdata) prof_tdata_destroy(tsd, tdata, false); if (destroy_tctx) - idalloctm(tsd, tctx, tcache_get(tsd, false), true); + idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true); } static bool @@ -721,7 +727,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, prof_enter(tsd, tdata); if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) { /* bt has never been seen before. Insert it. */ - gctx.p = prof_gctx_create(tsd, bt); + gctx.p = prof_gctx_create(tsd_tsdn(tsd), bt); if (gctx.v == NULL) { prof_leave(tsd, tdata); return (true); @@ -730,7 +736,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tsd, tdata); - idalloctm(tsd, gctx.v, tcache_get(tsd, false), true); + idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true); return (true); } new_gctx = true; @@ -739,9 +745,9 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, * Increment nlimbo, in order to avoid a race condition with * prof_tctx_destroy()/prof_gctx_try_destroy(). */ - malloc_mutex_lock(gctx.p->lock); + malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock); gctx.p->nlimbo++; - malloc_mutex_unlock(gctx.p->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock); new_gctx = false; } prof_leave(tsd, tdata); @@ -768,13 +774,12 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) if (tdata == NULL) return (NULL); - malloc_mutex_lock(tdata->lock); + malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v); if (!not_found) /* Note double negative! */ ret.p->prepared = true; - malloc_mutex_unlock(tdata->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); if (not_found) { - tcache_t *tcache; void *btkey; prof_gctx_t *gctx; bool new_gctx, error; @@ -788,9 +793,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) return (NULL); /* Link a prof_tctx_t into gctx for this thread. */ - tcache = tcache_get(tsd, true); - ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true, - NULL); + ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t), + size2index(sizeof(prof_tctx_t)), false, NULL, true, + arena_ichoose(tsd, NULL), true); if (ret.p == NULL) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); @@ -804,41 +809,41 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) ret.p->tctx_uid = tdata->tctx_uid_next++; ret.p->prepared = true; ret.p->state = prof_tctx_state_initializing; - malloc_mutex_lock(tdata->lock); + malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v); - malloc_mutex_unlock(tdata->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); if (error) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); - idalloctm(tsd, ret.v, tcache, true); + idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true); return (NULL); } - malloc_mutex_lock(gctx->lock); + malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); ret.p->state = prof_tctx_state_nominal; tctx_tree_insert(&gctx->tctxs, ret.p); gctx->nlimbo--; - malloc_mutex_unlock(gctx->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); } return (ret.p); } +/* + * The bodies of this function and prof_leakcheck() are compiled out unless heap + * profiling is enabled, so that it is possible to compile jemalloc with + * floating point support completely disabled. Avoiding floating point code is + * important on memory-constrained systems, but it also enables a workaround for + * versions of glibc that don't properly save/restore floating point registers + * during dynamic lazy symbol loading (which internally calls into whatever + * malloc implementation happens to be integrated into the application). Note + * that some compilers (e.g. gcc 4.8) may use floating point registers for fast + * memory moves, so jemalloc must be compiled with such optimizations disabled + * (e.g. + * -mno-sse) in order for the workaround to be complete. + */ void prof_sample_threshold_update(prof_tdata_t *tdata) { - /* - * The body of this function is compiled out unless heap profiling is - * enabled, so that it is possible to compile jemalloc with floating - * point support completely disabled. Avoiding floating point code is - * important on memory-constrained systems, but it also enables a - * workaround for versions of glibc that don't properly save/restore - * floating point registers during dynamic lazy symbol loading (which - * internally calls into whatever malloc implementation happens to be - * integrated into the application). Note that some compilers (e.g. - * gcc 4.8) may use floating point registers for fast memory moves, so - * jemalloc must be compiled with such optimizations disabled (e.g. - * -mno-sse) in order for the workaround to be complete. - */ #ifdef JEMALLOC_PROF uint64_t r; double u; @@ -869,8 +874,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata) * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ - prng64(r, 53, tdata->prng_state, UINT64_C(6364136223846793005), - UINT64_C(1442695040888963407)); + r = prng_lg_range_u64(&tdata->prng_state, 53); u = (double)r * (1.0/9007199254740992.0L); tdata->bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) @@ -893,11 +897,13 @@ size_t prof_tdata_count(void) { size_t tdata_count = 0; + tsdn_t *tsdn; - malloc_mutex_lock(&tdatas_mtx); + tsdn = tsdn_fetch(); + malloc_mutex_lock(tsdn, &tdatas_mtx); tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter, (void *)&tdata_count); - malloc_mutex_unlock(&tdatas_mtx); + malloc_mutex_unlock(tsdn, &tdatas_mtx); return (tdata_count); } @@ -916,9 +922,9 @@ prof_bt_count(void) if (tdata == NULL) return (0); - malloc_mutex_lock(&bt2gctx_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx); bt_count = ckh_count(&bt2gctx); - malloc_mutex_unlock(&bt2gctx_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx); return (bt_count); } @@ -988,7 +994,7 @@ prof_dump_close(bool propagate_err) static bool prof_dump_write(bool propagate_err, const char *s) { - unsigned i, slen, n; + size_t i, slen, n; cassert(config_prof); @@ -1031,20 +1037,21 @@ prof_dump_printf(bool propagate_err, const char *format, ...) return (ret); } -/* tctx->tdata->lock is held. */ static void -prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata) +prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) { - malloc_mutex_lock(tctx->gctx->lock); + malloc_mutex_assert_owner(tsdn, tctx->tdata->lock); + + malloc_mutex_lock(tsdn, tctx->gctx->lock); switch (tctx->state) { case prof_tctx_state_initializing: - malloc_mutex_unlock(tctx->gctx->lock); + malloc_mutex_unlock(tsdn, tctx->gctx->lock); return; case prof_tctx_state_nominal: tctx->state = prof_tctx_state_dumping; - malloc_mutex_unlock(tctx->gctx->lock); + malloc_mutex_unlock(tsdn, tctx->gctx->lock); memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t)); @@ -1063,11 +1070,12 @@ prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata) } } -/* gctx->lock is held. */ static void -prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx) +prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) { + malloc_mutex_assert_owner(tsdn, gctx->lock); + gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs; gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes; if (opt_prof_accum) { @@ -1076,10 +1084,12 @@ prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx) } } -/* tctx->gctx is held. */ static prof_tctx_t * prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { + tsdn_t *tsdn = (tsdn_t *)arg; + + malloc_mutex_assert_owner(tsdn, tctx->gctx->lock); switch (tctx->state) { case prof_tctx_state_nominal: @@ -1087,7 +1097,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) break; case prof_tctx_state_dumping: case prof_tctx_state_purgatory: - prof_tctx_merge_gctx(tctx, tctx->gctx); + prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx); break; default: not_reached(); @@ -1096,11 +1106,18 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) return (NULL); } -/* gctx->lock is held. */ +struct prof_tctx_dump_iter_arg_s { + tsdn_t *tsdn; + bool propagate_err; +}; + static prof_tctx_t * -prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) +prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) { - bool propagate_err = *(bool *)arg; + struct prof_tctx_dump_iter_arg_s *arg = + (struct prof_tctx_dump_iter_arg_s *)opaque; + + malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock); switch (tctx->state) { case prof_tctx_state_initializing: @@ -1109,7 +1126,7 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) break; case prof_tctx_state_dumping: case prof_tctx_state_purgatory: - if (prof_dump_printf(propagate_err, + if (prof_dump_printf(arg->propagate_err, " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": " "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs, tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs, @@ -1122,12 +1139,14 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) return (NULL); } -/* tctx->gctx is held. */ static prof_tctx_t * prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { + tsdn_t *tsdn = (tsdn_t *)arg; prof_tctx_t *ret; + malloc_mutex_assert_owner(tsdn, tctx->gctx->lock); + switch (tctx->state) { case prof_tctx_state_nominal: /* New since dumping started; ignore. */ @@ -1148,12 +1167,12 @@ label_return: } static void -prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) +prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) { cassert(config_prof); - malloc_mutex_lock(gctx->lock); + malloc_mutex_lock(tsdn, gctx->lock); /* * Increment nlimbo so that gctx won't go away before dump. @@ -1165,19 +1184,26 @@ prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t)); - malloc_mutex_unlock(gctx->lock); + malloc_mutex_unlock(tsdn, gctx->lock); } -static prof_gctx_t * -prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) -{ - size_t *leak_ngctx = (size_t *)arg; +struct prof_gctx_merge_iter_arg_s { + tsdn_t *tsdn; + size_t leak_ngctx; +}; - malloc_mutex_lock(gctx->lock); - tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, NULL); +static prof_gctx_t * +prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) +{ + struct prof_gctx_merge_iter_arg_s *arg = + (struct prof_gctx_merge_iter_arg_s *)opaque; + + malloc_mutex_lock(arg->tsdn, gctx->lock); + tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, + (void *)arg->tsdn); if (gctx->cnt_summed.curobjs != 0) - (*leak_ngctx)++; - malloc_mutex_unlock(gctx->lock); + arg->leak_ngctx++; + malloc_mutex_unlock(arg->tsdn, gctx->lock); return (NULL); } @@ -1196,7 +1222,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) */ while ((gctx = gctx_tree_first(gctxs)) != NULL) { gctx_tree_remove(gctxs, gctx); - malloc_mutex_lock(gctx->lock); + malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); { prof_tctx_t *next; @@ -1204,14 +1230,15 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) do { prof_tctx_t *to_destroy = tctx_tree_iter(&gctx->tctxs, next, - prof_tctx_finish_iter, NULL); + prof_tctx_finish_iter, + (void *)tsd_tsdn(tsd)); if (to_destroy != NULL) { next = tctx_tree_next(&gctx->tctxs, to_destroy); tctx_tree_remove(&gctx->tctxs, to_destroy); - idalloctm(tsd, to_destroy, - tcache_get(tsd, false), true); + idalloctm(tsd_tsdn(tsd), to_destroy, + NULL, true, true); } else next = NULL; } while (next != NULL); @@ -1219,19 +1246,26 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) gctx->nlimbo--; if (prof_gctx_should_destroy(gctx)) { gctx->nlimbo++; - malloc_mutex_unlock(gctx->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); prof_gctx_try_destroy(tsd, tdata, gctx, tdata); } else - malloc_mutex_unlock(gctx->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); } } -static prof_tdata_t * -prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) -{ - prof_cnt_t *cnt_all = (prof_cnt_t *)arg; +struct prof_tdata_merge_iter_arg_s { + tsdn_t *tsdn; + prof_cnt_t cnt_all; +}; - malloc_mutex_lock(tdata->lock); +static prof_tdata_t * +prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, + void *opaque) +{ + struct prof_tdata_merge_iter_arg_s *arg = + (struct prof_tdata_merge_iter_arg_s *)opaque; + + malloc_mutex_lock(arg->tsdn, tdata->lock); if (!tdata->expired) { size_t tabind; union { @@ -1243,17 +1277,17 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t)); for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL, &tctx.v);) - prof_tctx_merge_tdata(tctx.p, tdata); + prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata); - cnt_all->curobjs += tdata->cnt_summed.curobjs; - cnt_all->curbytes += tdata->cnt_summed.curbytes; + arg->cnt_all.curobjs += tdata->cnt_summed.curobjs; + arg->cnt_all.curbytes += tdata->cnt_summed.curbytes; if (opt_prof_accum) { - cnt_all->accumobjs += tdata->cnt_summed.accumobjs; - cnt_all->accumbytes += tdata->cnt_summed.accumbytes; + arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs; + arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes; } } else tdata->dumping = false; - malloc_mutex_unlock(tdata->lock); + malloc_mutex_unlock(arg->tsdn, tdata->lock); return (NULL); } @@ -1282,7 +1316,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) #define prof_dump_header JEMALLOC_N(prof_dump_header_impl) #endif static bool -prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) +prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all) { bool ret; @@ -1293,10 +1327,10 @@ prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) return (true); - malloc_mutex_lock(&tdatas_mtx); + malloc_mutex_lock(tsdn, &tdatas_mtx); ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, (void *)&propagate_err) != NULL); - malloc_mutex_unlock(&tdatas_mtx); + malloc_mutex_unlock(tsdn, &tdatas_mtx); return (ret); } #ifdef JEMALLOC_JET @@ -1305,15 +1339,16 @@ prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl); #endif -/* gctx->lock is held. */ static bool -prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt, - prof_gctx_tree_t *gctxs) +prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx, + const prof_bt_t *bt, prof_gctx_tree_t *gctxs) { bool ret; unsigned i; + struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg; cassert(config_prof); + malloc_mutex_assert_owner(tsdn, gctx->lock); /* Avoid dumping such gctx's that have no useful data. */ if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) || @@ -1347,8 +1382,10 @@ prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt, goto label_return; } + prof_tctx_dump_iter_arg.tsdn = tsdn; + prof_tctx_dump_iter_arg.propagate_err = propagate_err; if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, - (void *)&propagate_err) != NULL) { + (void *)&prof_tctx_dump_iter_arg) != NULL) { ret = true; goto label_return; } @@ -1358,6 +1395,7 @@ label_return: return (ret); } +#ifndef _WIN32 JEMALLOC_FORMAT_PRINTF(1, 2) static int prof_open_maps(const char *format, ...) @@ -1373,6 +1411,18 @@ prof_open_maps(const char *format, ...) return (mfd); } +#endif + +static int +prof_getpid(void) +{ + +#ifdef _WIN32 + return (GetCurrentProcessId()); +#else + return (getpid()); +#endif +} static bool prof_dump_maps(bool propagate_err) @@ -1383,9 +1433,11 @@ prof_dump_maps(bool propagate_err) cassert(config_prof); #ifdef __FreeBSD__ mfd = prof_open_maps("/proc/curproc/map"); +#elif defined(_WIN32) + mfd = -1; // Not implemented #else { - int pid = getpid(); + int pid = prof_getpid(); mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid); if (mfd == -1) @@ -1426,39 +1478,66 @@ label_return: return (ret); } +/* + * See prof_sample_threshold_update() comment for why the body of this function + * is conditionally compiled. + */ static void prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx, const char *filename) { +#ifdef JEMALLOC_PROF + /* + * Scaling is equivalent AdjustSamples() in jeprof, but the result may + * differ slightly from what jeprof reports, because here we scale the + * summary values, whereas jeprof scales each context individually and + * reports the sums of the scaled values. + */ if (cnt_all->curbytes != 0) { - malloc_printf(": Leak summary: %"FMTu64" byte%s, %" - FMTu64" object%s, %zu context%s\n", - cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", - cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", - leak_ngctx, (leak_ngctx != 1) ? "s" : ""); + double sample_period = (double)((uint64_t)1 << lg_prof_sample); + double ratio = (((double)cnt_all->curbytes) / + (double)cnt_all->curobjs) / sample_period; + double scale_factor = 1.0 / (1.0 - exp(-ratio)); + uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes) + * scale_factor); + uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) * + scale_factor); + + malloc_printf(": Leak approximation summary: ~%"FMTu64 + " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n", + curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs != + 1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : ""); malloc_printf( ": Run jeprof on \"%s\" for leak detail\n", filename); } +#endif } +struct prof_gctx_dump_iter_arg_s { + tsdn_t *tsdn; + bool propagate_err; +}; + static prof_gctx_t * -prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) +prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) { prof_gctx_t *ret; - bool propagate_err = *(bool *)arg; + struct prof_gctx_dump_iter_arg_s *arg = + (struct prof_gctx_dump_iter_arg_s *)opaque; - malloc_mutex_lock(gctx->lock); + malloc_mutex_lock(arg->tsdn, gctx->lock); - if (prof_dump_gctx(propagate_err, gctx, &gctx->bt, gctxs)) { + if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt, + gctxs)) { ret = gctx; goto label_return; } ret = NULL; label_return: - malloc_mutex_unlock(gctx->lock); + malloc_mutex_unlock(arg->tsdn, gctx->lock); return (ret); } @@ -1466,13 +1545,14 @@ static bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) { prof_tdata_t *tdata; - prof_cnt_t cnt_all; + struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg; size_t tabind; union { prof_gctx_t *p; void *v; } gctx; - size_t leak_ngctx; + struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg; + struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg; prof_gctx_tree_t gctxs; cassert(config_prof); @@ -1481,7 +1561,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) if (tdata == NULL) return (true); - malloc_mutex_lock(&prof_dump_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx); prof_enter(tsd, tdata); /* @@ -1490,20 +1570,24 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) */ gctx_tree_new(&gctxs); for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) - prof_dump_gctx_prep(gctx.p, &gctxs); + prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, &gctxs); /* * Iterate over tdatas, and for the non-expired ones snapshot their tctx * stats and merge them into the associated gctx's. */ - memset(&cnt_all, 0, sizeof(prof_cnt_t)); - malloc_mutex_lock(&tdatas_mtx); - tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, (void *)&cnt_all); - malloc_mutex_unlock(&tdatas_mtx); + prof_tdata_merge_iter_arg.tsdn = tsd_tsdn(tsd); + memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t)); + malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); + tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, + (void *)&prof_tdata_merge_iter_arg); + malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); /* Merge tctx stats into gctx's. */ - leak_ngctx = 0; - gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, (void *)&leak_ngctx); + prof_gctx_merge_iter_arg.tsdn = tsd_tsdn(tsd); + prof_gctx_merge_iter_arg.leak_ngctx = 0; + gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, + (void *)&prof_gctx_merge_iter_arg); prof_leave(tsd, tdata); @@ -1512,12 +1596,15 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) goto label_open_close_error; /* Dump profile header. */ - if (prof_dump_header(propagate_err, &cnt_all)) + if (prof_dump_header(tsd_tsdn(tsd), propagate_err, + &prof_tdata_merge_iter_arg.cnt_all)) goto label_write_error; /* Dump per gctx profile stats. */ + prof_gctx_dump_iter_arg.tsdn = tsd_tsdn(tsd); + prof_gctx_dump_iter_arg.propagate_err = propagate_err; if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, - (void *)&propagate_err) != NULL) + (void *)&prof_gctx_dump_iter_arg) != NULL) goto label_write_error; /* Dump /proc//maps if possible. */ @@ -1528,17 +1615,18 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) goto label_open_close_error; prof_gctx_finish(tsd, &gctxs); - malloc_mutex_unlock(&prof_dump_mtx); - - if (leakcheck) - prof_leakcheck(&cnt_all, leak_ngctx, filename); + malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); + if (leakcheck) { + prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all, + prof_gctx_merge_iter_arg.leak_ngctx, filename); + } return (false); label_write_error: prof_dump_close(propagate_err); label_open_close_error: prof_gctx_finish(tsd, &gctxs); - malloc_mutex_unlock(&prof_dump_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); return (true); } @@ -1554,12 +1642,12 @@ prof_dump_filename(char *filename, char v, uint64_t vseq) /* "...v.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"FMTu64".%c%"FMTu64".heap", - opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); + opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq); } else { /* "....heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"FMTu64".%c.heap", - opt_prof_prefix, (int)getpid(), prof_dump_seq, v); + opt_prof_prefix, prof_getpid(), prof_dump_seq, v); } prof_dump_seq++; } @@ -1578,23 +1666,23 @@ prof_fdump(void) return; tsd = tsd_fetch(); - malloc_mutex_lock(&prof_dump_seq_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); prof_dump_filename(filename, 'f', VSEQ_INVALID); - malloc_mutex_unlock(&prof_dump_seq_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); prof_dump(tsd, false, filename, opt_prof_leak); } void -prof_idump(void) +prof_idump(tsdn_t *tsdn) { tsd_t *tsd; prof_tdata_t *tdata; cassert(config_prof); - if (!prof_booted) + if (!prof_booted || tsdn_null(tsdn)) return; - tsd = tsd_fetch(); + tsd = tsdn_tsd(tsdn); tdata = prof_tdata_get(tsd, false); if (tdata == NULL) return; @@ -1605,50 +1693,48 @@ prof_idump(void) if (opt_prof_prefix[0] != '\0') { char filename[PATH_MAX + 1]; - malloc_mutex_lock(&prof_dump_seq_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); prof_dump_filename(filename, 'i', prof_dump_iseq); prof_dump_iseq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); prof_dump(tsd, false, filename, false); } } bool -prof_mdump(const char *filename) +prof_mdump(tsd_t *tsd, const char *filename) { - tsd_t *tsd; char filename_buf[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (!opt_prof || !prof_booted) return (true); - tsd = tsd_fetch(); if (filename == NULL) { /* No filename specified, so automatically generate one. */ if (opt_prof_prefix[0] == '\0') return (true); - malloc_mutex_lock(&prof_dump_seq_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); prof_dump_filename(filename_buf, 'm', prof_dump_mseq); prof_dump_mseq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); filename = filename_buf; } return (prof_dump(tsd, true, filename, false)); } void -prof_gdump(void) +prof_gdump(tsdn_t *tsdn) { tsd_t *tsd; prof_tdata_t *tdata; cassert(config_prof); - if (!prof_booted) + if (!prof_booted || tsdn_null(tsdn)) return; - tsd = tsd_fetch(); + tsd = tsdn_tsd(tsdn); tdata = prof_tdata_get(tsd, false); if (tdata == NULL) return; @@ -1659,10 +1745,10 @@ prof_gdump(void) if (opt_prof_prefix[0] != '\0') { char filename[DUMP_FILENAME_BUFSIZE]; - malloc_mutex_lock(&prof_dump_seq_mtx); + malloc_mutex_lock(tsdn, &prof_dump_seq_mtx); prof_dump_filename(filename, 'u', prof_dump_useq); prof_dump_useq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); + malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx); prof_dump(tsd, false, filename, false); } } @@ -1691,14 +1777,14 @@ prof_bt_keycomp(const void *k1, const void *k2) } JEMALLOC_INLINE_C uint64_t -prof_thr_uid_alloc(void) +prof_thr_uid_alloc(tsdn_t *tsdn) { uint64_t thr_uid; - malloc_mutex_lock(&next_thr_uid_mtx); + malloc_mutex_lock(tsdn, &next_thr_uid_mtx); thr_uid = next_thr_uid; next_thr_uid++; - malloc_mutex_unlock(&next_thr_uid_mtx); + malloc_mutex_unlock(tsdn, &next_thr_uid_mtx); return (thr_uid); } @@ -1708,14 +1794,13 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, char *thread_name, bool active) { prof_tdata_t *tdata; - tcache_t *tcache; cassert(config_prof); /* Initialize an empty cache for this thread. */ - tcache = tcache_get(tsd, true); - tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false, - tcache, true, NULL); + tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t), + size2index(sizeof(prof_tdata_t)), false, NULL, true, + arena_get(TSDN_NULL, 0, true), true); if (tdata == NULL) return (NULL); @@ -1727,9 +1812,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, tdata->expired = false; tdata->tctx_uid_next = 0; - if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, - prof_bt_hash, prof_bt_keycomp)) { - idalloctm(tsd, tdata, tcache, true); + if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, + prof_bt_keycomp)) { + idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true); return (NULL); } @@ -1743,9 +1828,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, tdata->dumping = false; tdata->active = active; - malloc_mutex_lock(&tdatas_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); tdata_tree_insert(&tdatas, tdata); - malloc_mutex_unlock(&tdatas_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); return (tdata); } @@ -1754,13 +1839,12 @@ prof_tdata_t * prof_tdata_init(tsd_t *tsd) { - return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0, NULL, - prof_thread_active_init_get())); + return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0, + NULL, prof_thread_active_init_get(tsd_tsdn(tsd)))); } -/* tdata->lock must be held. */ static bool -prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached) +prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) { if (tdata->attached && !even_if_attached) @@ -1770,32 +1854,40 @@ prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached) return (true); } -/* tdatas_mtx must be held. */ +static bool +prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, + bool even_if_attached) +{ + + malloc_mutex_assert_owner(tsdn, tdata->lock); + + return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached)); +} + static void prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { - tcache_t *tcache; - assert(prof_tdata_should_destroy(tdata, even_if_attached)); - assert(tsd_prof_tdata_get(tsd) != tdata); + malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx); tdata_tree_remove(&tdatas, tdata); - tcache = tcache_get(tsd, false); + assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached)); + if (tdata->thread_name != NULL) - idalloctm(tsd, tdata->thread_name, tcache, true); + idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true); ckh_delete(tsd, &tdata->bt2tctx); - idalloctm(tsd, tdata, tcache, true); + idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true); } static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { - malloc_mutex_lock(&tdatas_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); prof_tdata_destroy_locked(tsd, tdata, even_if_attached); - malloc_mutex_unlock(&tdatas_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); } static void @@ -1803,9 +1895,10 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) { bool destroy_tdata; - malloc_mutex_lock(tdata->lock); + malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); if (tdata->attached) { - destroy_tdata = prof_tdata_should_destroy(tdata, true); + destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, + true); /* * Only detach if !destroy_tdata, because detaching would allow * another thread to win the race to destroy tdata. @@ -1815,7 +1908,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) tsd_prof_tdata_set(tsd, NULL); } else destroy_tdata = false; - malloc_mutex_unlock(tdata->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); if (destroy_tdata) prof_tdata_destroy(tsd, tdata, true); } @@ -1826,7 +1919,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) uint64_t thr_uid = tdata->thr_uid; uint64_t thr_discrim = tdata->thr_discrim + 1; char *thread_name = (tdata->thread_name != NULL) ? - prof_thread_name_alloc(tsd, tdata->thread_name) : NULL; + prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL; bool active = tdata->active; prof_tdata_detach(tsd, tdata); @@ -1835,18 +1928,18 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) } static bool -prof_tdata_expire(prof_tdata_t *tdata) +prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) { bool destroy_tdata; - malloc_mutex_lock(tdata->lock); + malloc_mutex_lock(tsdn, tdata->lock); if (!tdata->expired) { tdata->expired = true; destroy_tdata = tdata->attached ? false : - prof_tdata_should_destroy(tdata, false); + prof_tdata_should_destroy(tsdn, tdata, false); } else destroy_tdata = false; - malloc_mutex_unlock(tdata->lock); + malloc_mutex_unlock(tsdn, tdata->lock); return (destroy_tdata); } @@ -1854,8 +1947,9 @@ prof_tdata_expire(prof_tdata_t *tdata) static prof_tdata_t * prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) { + tsdn_t *tsdn = (tsdn_t *)arg; - return (prof_tdata_expire(tdata) ? tdata : NULL); + return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL); } void @@ -1865,15 +1959,15 @@ prof_reset(tsd_t *tsd, size_t lg_sample) assert(lg_sample < (sizeof(uint64_t) << 3)); - malloc_mutex_lock(&prof_dump_mtx); - malloc_mutex_lock(&tdatas_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); lg_prof_sample = lg_sample; next = NULL; do { prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next, - prof_tdata_reset_iter, NULL); + prof_tdata_reset_iter, (void *)tsd); if (to_destroy != NULL) { next = tdata_tree_next(&tdatas, to_destroy); prof_tdata_destroy_locked(tsd, to_destroy, false); @@ -1881,8 +1975,8 @@ prof_reset(tsd_t *tsd, size_t lg_sample) next = NULL; } while (next != NULL); - malloc_mutex_unlock(&tdatas_mtx); - malloc_mutex_unlock(&prof_dump_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); } void @@ -1899,35 +1993,33 @@ prof_tdata_cleanup(tsd_t *tsd) } bool -prof_active_get(void) +prof_active_get(tsdn_t *tsdn) { bool prof_active_current; - malloc_mutex_lock(&prof_active_mtx); + malloc_mutex_lock(tsdn, &prof_active_mtx); prof_active_current = prof_active; - malloc_mutex_unlock(&prof_active_mtx); + malloc_mutex_unlock(tsdn, &prof_active_mtx); return (prof_active_current); } bool -prof_active_set(bool active) +prof_active_set(tsdn_t *tsdn, bool active) { bool prof_active_old; - malloc_mutex_lock(&prof_active_mtx); + malloc_mutex_lock(tsdn, &prof_active_mtx); prof_active_old = prof_active; prof_active = active; - malloc_mutex_unlock(&prof_active_mtx); + malloc_mutex_unlock(tsdn, &prof_active_mtx); return (prof_active_old); } const char * -prof_thread_name_get(void) +prof_thread_name_get(tsd_t *tsd) { - tsd_t *tsd; prof_tdata_t *tdata; - tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, true); if (tdata == NULL) return (""); @@ -1935,7 +2027,7 @@ prof_thread_name_get(void) } static char * -prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) +prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) { char *ret; size_t size; @@ -1947,7 +2039,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) if (size == 1) return (""); - ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL); + ret = iallocztm(tsdn, size, size2index(size), false, NULL, true, + arena_get(TSDN_NULL, 0, true), true); if (ret == NULL) return (NULL); memcpy(ret, thread_name, size); @@ -1974,13 +2067,12 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) return (EFAULT); } - s = prof_thread_name_alloc(tsd, thread_name); + s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name); if (s == NULL) return (EAGAIN); if (tdata->thread_name != NULL) { - idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false), - true); + idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true); tdata->thread_name = NULL; } if (strlen(s) > 0) @@ -1989,12 +2081,10 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) } bool -prof_thread_active_get(void) +prof_thread_active_get(tsd_t *tsd) { - tsd_t *tsd; prof_tdata_t *tdata; - tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, true); if (tdata == NULL) return (false); @@ -2002,12 +2092,10 @@ prof_thread_active_get(void) } bool -prof_thread_active_set(bool active) +prof_thread_active_set(tsd_t *tsd, bool active) { - tsd_t *tsd; prof_tdata_t *tdata; - tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, true); if (tdata == NULL) return (true); @@ -2016,48 +2104,48 @@ prof_thread_active_set(bool active) } bool -prof_thread_active_init_get(void) +prof_thread_active_init_get(tsdn_t *tsdn) { bool active_init; - malloc_mutex_lock(&prof_thread_active_init_mtx); + malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx); active_init = prof_thread_active_init; - malloc_mutex_unlock(&prof_thread_active_init_mtx); + malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx); return (active_init); } bool -prof_thread_active_init_set(bool active_init) +prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) { bool active_init_old; - malloc_mutex_lock(&prof_thread_active_init_mtx); + malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx); active_init_old = prof_thread_active_init; prof_thread_active_init = active_init; - malloc_mutex_unlock(&prof_thread_active_init_mtx); + malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx); return (active_init_old); } bool -prof_gdump_get(void) +prof_gdump_get(tsdn_t *tsdn) { bool prof_gdump_current; - malloc_mutex_lock(&prof_gdump_mtx); + malloc_mutex_lock(tsdn, &prof_gdump_mtx); prof_gdump_current = prof_gdump_val; - malloc_mutex_unlock(&prof_gdump_mtx); + malloc_mutex_unlock(tsdn, &prof_gdump_mtx); return (prof_gdump_current); } bool -prof_gdump_set(bool gdump) +prof_gdump_set(tsdn_t *tsdn, bool gdump) { bool prof_gdump_old; - malloc_mutex_lock(&prof_gdump_mtx); + malloc_mutex_lock(tsdn, &prof_gdump_mtx); prof_gdump_old = prof_gdump_val; prof_gdump_val = gdump; - malloc_mutex_unlock(&prof_gdump_mtx); + malloc_mutex_unlock(tsdn, &prof_gdump_mtx); return (prof_gdump_old); } @@ -2098,47 +2186,54 @@ prof_boot1(void) } bool -prof_boot2(void) +prof_boot2(tsd_t *tsd) { cassert(config_prof); if (opt_prof) { - tsd_t *tsd; unsigned i; lg_prof_sample = opt_lg_prof_sample; prof_active = opt_prof_active; - if (malloc_mutex_init(&prof_active_mtx)) + if (malloc_mutex_init(&prof_active_mtx, "prof_active", + WITNESS_RANK_PROF_ACTIVE)) return (true); prof_gdump_val = opt_prof_gdump; - if (malloc_mutex_init(&prof_gdump_mtx)) + if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump", + WITNESS_RANK_PROF_GDUMP)) return (true); prof_thread_active_init = opt_prof_thread_active_init; - if (malloc_mutex_init(&prof_thread_active_init_mtx)) + if (malloc_mutex_init(&prof_thread_active_init_mtx, + "prof_thread_active_init", + WITNESS_RANK_PROF_THREAD_ACTIVE_INIT)) return (true); - tsd = tsd_fetch(); if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); - if (malloc_mutex_init(&bt2gctx_mtx)) + if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx", + WITNESS_RANK_PROF_BT2GCTX)) return (true); tdata_tree_new(&tdatas); - if (malloc_mutex_init(&tdatas_mtx)) + if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas", + WITNESS_RANK_PROF_TDATAS)) return (true); next_thr_uid = 0; - if (malloc_mutex_init(&next_thr_uid_mtx)) + if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid", + WITNESS_RANK_PROF_NEXT_THR_UID)) return (true); - if (malloc_mutex_init(&prof_dump_seq_mtx)) + if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq", + WITNESS_RANK_PROF_DUMP_SEQ)) return (true); - if (malloc_mutex_init(&prof_dump_mtx)) + if (malloc_mutex_init(&prof_dump_mtx, "prof_dump", + WITNESS_RANK_PROF_DUMP)) return (true); if (opt_prof_final && opt_prof_prefix[0] != '\0' && @@ -2148,21 +2243,23 @@ prof_boot2(void) abort(); } - gctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * - sizeof(malloc_mutex_t)); + gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), + PROF_NCTX_LOCKS * sizeof(malloc_mutex_t)); if (gctx_locks == NULL) return (true); for (i = 0; i < PROF_NCTX_LOCKS; i++) { - if (malloc_mutex_init(&gctx_locks[i])) + if (malloc_mutex_init(&gctx_locks[i], "prof_gctx", + WITNESS_RANK_PROF_GCTX)) return (true); } - tdata_locks = (malloc_mutex_t *)base_alloc(PROF_NTDATA_LOCKS * - sizeof(malloc_mutex_t)); + tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), + PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t)); if (tdata_locks == NULL) return (true); for (i = 0; i < PROF_NTDATA_LOCKS; i++) { - if (malloc_mutex_init(&tdata_locks[i])) + if (malloc_mutex_init(&tdata_locks[i], "prof_tdata", + WITNESS_RANK_PROF_TDATA)) return (true); } } @@ -2181,56 +2278,77 @@ prof_boot2(void) } void -prof_prefork(void) +prof_prefork0(tsdn_t *tsdn) { if (opt_prof) { unsigned i; - malloc_mutex_prefork(&tdatas_mtx); - malloc_mutex_prefork(&bt2gctx_mtx); - malloc_mutex_prefork(&next_thr_uid_mtx); - malloc_mutex_prefork(&prof_dump_seq_mtx); - for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_prefork(&gctx_locks[i]); + malloc_mutex_prefork(tsdn, &prof_dump_mtx); + malloc_mutex_prefork(tsdn, &bt2gctx_mtx); + malloc_mutex_prefork(tsdn, &tdatas_mtx); for (i = 0; i < PROF_NTDATA_LOCKS; i++) - malloc_mutex_prefork(&tdata_locks[i]); + malloc_mutex_prefork(tsdn, &tdata_locks[i]); + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_prefork(tsdn, &gctx_locks[i]); } } void -prof_postfork_parent(void) +prof_prefork1(tsdn_t *tsdn) { if (opt_prof) { - unsigned i; - - for (i = 0; i < PROF_NTDATA_LOCKS; i++) - malloc_mutex_postfork_parent(&tdata_locks[i]); - for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_postfork_parent(&gctx_locks[i]); - malloc_mutex_postfork_parent(&prof_dump_seq_mtx); - malloc_mutex_postfork_parent(&next_thr_uid_mtx); - malloc_mutex_postfork_parent(&bt2gctx_mtx); - malloc_mutex_postfork_parent(&tdatas_mtx); + malloc_mutex_prefork(tsdn, &prof_active_mtx); + malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx); + malloc_mutex_prefork(tsdn, &prof_gdump_mtx); + malloc_mutex_prefork(tsdn, &next_thr_uid_mtx); + malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx); } } void -prof_postfork_child(void) +prof_postfork_parent(tsdn_t *tsdn) { if (opt_prof) { unsigned i; - for (i = 0; i < PROF_NTDATA_LOCKS; i++) - malloc_mutex_postfork_child(&tdata_locks[i]); + malloc_mutex_postfork_parent(tsdn, + &prof_thread_active_init_mtx); + malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx); + malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx); + malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx); + malloc_mutex_postfork_parent(tsdn, &prof_active_mtx); for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_postfork_child(&gctx_locks[i]); - malloc_mutex_postfork_child(&prof_dump_seq_mtx); - malloc_mutex_postfork_child(&next_thr_uid_mtx); - malloc_mutex_postfork_child(&bt2gctx_mtx); - malloc_mutex_postfork_child(&tdatas_mtx); + malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]); + for (i = 0; i < PROF_NTDATA_LOCKS; i++) + malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]); + malloc_mutex_postfork_parent(tsdn, &tdatas_mtx); + malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx); + malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx); + } +} + +void +prof_postfork_child(tsdn_t *tsdn) +{ + + if (opt_prof) { + unsigned i; + + malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx); + malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx); + malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx); + malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx); + malloc_mutex_postfork_child(tsdn, &prof_active_mtx); + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_postfork_child(tsdn, &gctx_locks[i]); + for (i = 0; i < PROF_NTDATA_LOCKS; i++) + malloc_mutex_postfork_child(tsdn, &tdata_locks[i]); + malloc_mutex_postfork_child(tsdn, &tdatas_mtx); + malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx); + malloc_mutex_postfork_child(tsdn, &prof_dump_mtx); } } diff --git a/deps/jemalloc/src/quarantine.c b/deps/jemalloc/src/quarantine.c index 6c43dfcaa..18903fb5c 100644 --- a/deps/jemalloc/src/quarantine.c +++ b/deps/jemalloc/src/quarantine.c @@ -13,22 +13,22 @@ /* Function prototypes for non-inline static functions. */ static quarantine_t *quarantine_grow(tsd_t *tsd, quarantine_t *quarantine); -static void quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine); -static void quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, +static void quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine); +static void quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine, size_t upper_bound); /******************************************************************************/ static quarantine_t * -quarantine_init(tsd_t *tsd, size_t lg_maxobjs) +quarantine_init(tsdn_t *tsdn, size_t lg_maxobjs) { quarantine_t *quarantine; + size_t size; - assert(tsd_nominal(tsd)); - - quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs) - + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false, - tcache_get(tsd, true), true, NULL); + size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) * + sizeof(quarantine_obj_t)); + quarantine = (quarantine_t *)iallocztm(tsdn, size, size2index(size), + false, NULL, true, arena_get(TSDN_NULL, 0, true), true); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -47,7 +47,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) if (!tsd_nominal(tsd)) return; - quarantine = quarantine_init(tsd, LG_MAXOBJS_INIT); + quarantine = quarantine_init(tsd_tsdn(tsd), LG_MAXOBJS_INIT); /* * Check again whether quarantine has been initialized, because * quarantine_init() may have triggered recursive initialization. @@ -55,7 +55,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) if (tsd_quarantine_get(tsd) == NULL) tsd_quarantine_set(tsd, quarantine); else - idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true); } static quarantine_t * @@ -63,9 +63,9 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) { quarantine_t *ret; - ret = quarantine_init(tsd, quarantine->lg_maxobjs + 1); + ret = quarantine_init(tsd_tsdn(tsd), quarantine->lg_maxobjs + 1); if (ret == NULL) { - quarantine_drain_one(tsd, quarantine); + quarantine_drain_one(tsd_tsdn(tsd), quarantine); return (quarantine); } @@ -87,18 +87,18 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } - idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true); tsd_quarantine_set(tsd, ret); return (ret); } static void -quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine) +quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine) { quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; - assert(obj->usize == isalloc(obj->ptr, config_prof)); - idalloctm(tsd, obj->ptr, NULL, false); + assert(obj->usize == isalloc(tsdn, obj->ptr, config_prof)); + idalloctm(tsdn, obj->ptr, NULL, false, true); quarantine->curbytes -= obj->usize; quarantine->curobjs--; quarantine->first = (quarantine->first + 1) & ((ZU(1) << @@ -106,24 +106,24 @@ quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine) } static void -quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, size_t upper_bound) +quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine, size_t upper_bound) { while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) - quarantine_drain_one(tsd, quarantine); + quarantine_drain_one(tsdn, quarantine); } void quarantine(tsd_t *tsd, void *ptr) { quarantine_t *quarantine; - size_t usize = isalloc(ptr, config_prof); + size_t usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); cassert(config_fill); assert(opt_quarantine); if ((quarantine = tsd_quarantine_get(tsd)) == NULL) { - idalloctm(tsd, ptr, NULL, false); + idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true); return; } /* @@ -133,7 +133,7 @@ quarantine(tsd_t *tsd, void *ptr) if (quarantine->curbytes + usize > opt_quarantine) { size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine - usize : 0; - quarantine_drain(tsd, quarantine, upper_bound); + quarantine_drain(tsd_tsdn(tsd), quarantine, upper_bound); } /* Grow the quarantine ring buffer if it's full. */ if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs)) @@ -158,11 +158,11 @@ quarantine(tsd_t *tsd, void *ptr) && usize <= SMALL_MAXCLASS) arena_quarantine_junk_small(ptr, usize); else - memset(ptr, 0x5a, usize); + memset(ptr, JEMALLOC_FREE_JUNK, usize); } } else { assert(quarantine->curbytes == 0); - idalloctm(tsd, ptr, NULL, false); + idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true); } } @@ -176,8 +176,8 @@ quarantine_cleanup(tsd_t *tsd) quarantine = tsd_quarantine_get(tsd); if (quarantine != NULL) { - quarantine_drain(tsd, quarantine, 0); - idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + quarantine_drain(tsd_tsdn(tsd), quarantine, 0); + idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true); tsd_quarantine_set(tsd, NULL); } } diff --git a/deps/jemalloc/src/rtree.c b/deps/jemalloc/src/rtree.c index af0d97e75..f2e2997d5 100644 --- a/deps/jemalloc/src/rtree.c +++ b/deps/jemalloc/src/rtree.c @@ -15,6 +15,8 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, { unsigned bits_in_leaf, height, i; + assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) / + RTREE_BITS_PER_LEVEL)); assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL @@ -94,12 +96,15 @@ rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp) rtree_node_elm_t *node; if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) { + spin_t spinner; + /* * Another thread is already in the process of initializing. * Spin-wait until initialization is complete. */ + spin_init(&spinner); do { - CPU_SPINWAIT; + spin_adaptive(&spinner); node = atomic_read_p((void **)elmp); } while (node == RTREE_NODE_INITIALIZING); } else { @@ -123,5 +128,5 @@ rtree_node_elm_t * rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) { - return (rtree_node_init(rtree, level, &elm->child)); + return (rtree_node_init(rtree, level+1, &elm->child)); } diff --git a/deps/jemalloc/src/spin.c b/deps/jemalloc/src/spin.c new file mode 100644 index 000000000..5242d95aa --- /dev/null +++ b/deps/jemalloc/src/spin.c @@ -0,0 +1,2 @@ +#define JEMALLOC_SPIN_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/deps/jemalloc/src/stats.c b/deps/jemalloc/src/stats.c old mode 100644 new mode 100755 index 154c3e74c..1360f3bd0 --- a/deps/jemalloc/src/stats.c +++ b/deps/jemalloc/src/stats.c @@ -3,7 +3,7 @@ #define CTL_GET(n, v, t) do { \ size_t sz = sizeof(t); \ - xmallctl(n, v, &sz, NULL, 0); \ + xmallctl(n, (void *)v, &sz, NULL, 0); \ } while (0) #define CTL_M2_GET(n, i, v, t) do { \ @@ -12,7 +12,7 @@ size_t sz = sizeof(t); \ xmallctlnametomib(n, mib, &miblen); \ mib[2] = (i); \ - xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ + xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0); \ } while (0) #define CTL_M2_M4_GET(n, i, j, v, t) do { \ @@ -22,7 +22,7 @@ xmallctlnametomib(n, mib, &miblen); \ mib[2] = (i); \ mib[4] = (j); \ - xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ + xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0); \ } while (0) /******************************************************************************/ @@ -32,86 +32,107 @@ bool opt_stats_print = false; size_t stats_cactive = 0; -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void stats_arena_bins_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -static void stats_arena_hchunks_print( - void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); -static void stats_arena_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i, bool bins, bool large, bool huge); - /******************************************************************************/ static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) + bool json, bool large, bool huge, unsigned i) { size_t page; - bool config_tcache, in_gap; + bool config_tcache, in_gap, in_gap_prev; unsigned nbins, j; CTL_GET("arenas.page", &page, size_t); - CTL_GET("config.tcache", &config_tcache, bool); - if (config_tcache) { - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs curruns regs" - " pgs util nfills nflushes newruns" - " reruns\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs curruns regs" - " pgs util newruns reruns\n"); - } CTL_GET("arenas.nbins", &nbins, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"bins\": [\n"); + } else { + CTL_GET("config.tcache", &config_tcache, bool); + if (config_tcache) { + malloc_cprintf(write_cb, cbopaque, + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs" + " curruns regs pgs util nfills" + " nflushes newruns reruns\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs" + " curruns regs pgs util newruns" + " reruns\n"); + } + } for (j = 0, in_gap = false; j < nbins; j++) { uint64_t nruns; + size_t reg_size, run_size, curregs; + size_t curruns; + uint32_t nregs; + uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; + uint64_t nreruns; CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns, uint64_t); - if (nruns == 0) - in_gap = true; - else { - size_t reg_size, run_size, curregs, availregs, milli; - size_t curruns; - uint32_t nregs; - uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; - uint64_t reruns; - char util[6]; /* "x.yyy". */ + in_gap_prev = in_gap; + in_gap = (nruns == 0); - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - in_gap = false; - } - CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); - CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); - CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, - size_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, - &nmalloc, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, - &ndalloc, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, - &curregs, size_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j, - &nrequests, uint64_t); + if (!json && in_gap_prev && !in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } + + CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); + CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); + CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, size_t); + + CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc, + uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc, + uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs, + size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j, + &nrequests, uint64_t); + if (config_tcache) { + CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j, + &nfills, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j, + &nflushes, uint64_t); + } + CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, &nreruns, + uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, &curruns, + size_t); + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t{\n" + "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n" + "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n" + "\t\t\t\t\t\t\"curregs\": %zu,\n" + "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n", + nmalloc, + ndalloc, + curregs, + nrequests); if (config_tcache) { - CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, - j, &nfills, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", - i, j, &nflushes, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n" + "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n", + nfills, + nflushes); } - CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, - &reruns, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, - &curruns, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\t\"nreruns\": %"FMTu64",\n" + "\t\t\t\t\t\t\"curruns\": %zu\n" + "\t\t\t\t\t}%s\n", + nreruns, + curruns, + (j + 1 < nbins) ? "," : ""); + } else if (!in_gap) { + size_t availregs, milli; + char util[6]; /* "x.yyy". */ availregs = nregs * curruns; milli = (availregs != 0) ? (1000 * curregs) / availregs @@ -138,7 +159,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, reg_size, j, curregs * reg_size, nmalloc, ndalloc, nrequests, curregs, curruns, nregs, run_size / page, util, nfills, nflushes, - nruns, reruns); + nruns, nreruns); } else { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64 @@ -147,28 +168,38 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, " %12"FMTu64"\n", reg_size, j, curregs * reg_size, nmalloc, ndalloc, nrequests, curregs, curruns, nregs, - run_size / page, util, nruns, reruns); + run_size / page, util, nruns, nreruns); } } } - if (in_gap) { + if (json) { malloc_cprintf(write_cb, cbopaque, - " ---\n"); + "\t\t\t\t]%s\n", (large || huge) ? "," : ""); + } else { + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } } static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) + bool json, bool huge, unsigned i) { unsigned nbins, nlruns, j; - bool in_gap; + bool in_gap, in_gap_prev; - malloc_cprintf(write_cb, cbopaque, - "large: size ind allocated nmalloc ndalloc" - " nrequests curruns\n"); CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlruns", &nlruns, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"lruns\": [\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "large: size ind allocated nmalloc" + " ndalloc nrequests curruns\n"); + } for (j = 0, in_gap = false; j < nlruns; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t run_size, curruns; @@ -179,17 +210,25 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, uint64_t); CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j, &nrequests, uint64_t); - if (nrequests == 0) - in_gap = true; - else { - CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t); - CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, - &curruns, size_t); - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - in_gap = false; - } + in_gap_prev = in_gap; + in_gap = (nrequests == 0); + + if (!json && in_gap_prev && !in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } + + CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t); + CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, &curruns, + size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t{\n" + "\t\t\t\t\t\t\"curruns\": %zu\n" + "\t\t\t\t\t}%s\n", + curruns, + (j + 1 < nlruns) ? "," : ""); + } else if (!in_gap) { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 " %12"FMTu64" %12zu\n", @@ -197,25 +236,35 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, ndalloc, nrequests, curruns); } } - if (in_gap) { + if (json) { malloc_cprintf(write_cb, cbopaque, - " ---\n"); + "\t\t\t\t]%s\n", huge ? "," : ""); + } else { + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } } static void stats_arena_hchunks_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i) + void *cbopaque, bool json, unsigned i) { unsigned nbins, nlruns, nhchunks, j; - bool in_gap; + bool in_gap, in_gap_prev; - malloc_cprintf(write_cb, cbopaque, - "huge: size ind allocated nmalloc ndalloc" - " nrequests curhchunks\n"); CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlruns", &nlruns, unsigned); CTL_GET("arenas.nhchunks", &nhchunks, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"hchunks\": [\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "huge: size ind allocated nmalloc" + " ndalloc nrequests curhchunks\n"); + } for (j = 0, in_gap = false; j < nhchunks; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t hchunk_size, curhchunks; @@ -226,18 +275,25 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *), &ndalloc, uint64_t); CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j, &nrequests, uint64_t); - if (nrequests == 0) - in_gap = true; - else { - CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, - size_t); - CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, - j, &curhchunks, size_t); - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - in_gap = false; - } + in_gap_prev = in_gap; + in_gap = (nrequests == 0); + + if (!json && in_gap_prev && !in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } + + CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, size_t); + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, j, + &curhchunks, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t{\n" + "\t\t\t\t\t\t\"curhchunks\": %zu\n" + "\t\t\t\t\t}%s\n", + curhchunks, + (j + 1 < nhchunks) ? "," : ""); + } else if (!in_gap) { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 " %12"FMTu64" %12zu\n", @@ -246,20 +302,25 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *), nrequests, curhchunks); } } - if (in_gap) { + if (json) { malloc_cprintf(write_cb, cbopaque, - " ---\n"); + "\t\t\t\t]\n"); + } else { + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } } static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i, bool bins, bool large, bool huge) + bool json, unsigned i, bool bins, bool large, bool huge) { unsigned nthreads; const char *dss; - ssize_t lg_dirty_mult; - size_t page, pactive, pdirty, mapped; + ssize_t lg_dirty_mult, decay_time; + size_t page, pactive, pdirty, mapped, retained; size_t metadata_mapped, metadata_allocated; uint64_t npurge, nmadvise, purged; size_t small_allocated; @@ -272,88 +333,731 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &page, size_t); CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned); - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); - CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); - malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", - dss); - CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); - if (lg_dirty_mult >= 0) { + if (json) { malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: %u:1\n", - (1U << lg_dirty_mult)); + "\t\t\t\t\"nthreads\": %u,\n", nthreads); } else { malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: N/A\n"); + "assigned threads: %u\n", nthreads); } + + CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"dss\": \"%s\",\n", dss); + } else { + malloc_cprintf(write_cb, cbopaque, + "dss allocation precedence: %s\n", dss); + } + + CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"lg_dirty_mult\": %zd,\n", lg_dirty_mult); + } else { + if (opt_purge == purge_mode_ratio) { + if (lg_dirty_mult >= 0) { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: %u:1\n", + (1U << lg_dirty_mult)); + } else { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: N/A\n"); + } + } + } + + CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"decay_time\": %zd,\n", decay_time); + } else { + if (opt_purge == purge_mode_decay) { + if (decay_time >= 0) { + malloc_cprintf(write_cb, cbopaque, + "decay time: %zd\n", decay_time); + } else { + malloc_cprintf(write_cb, cbopaque, + "decay time: N/A\n"); + } + } + } + CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t); CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t); CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t); CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t); CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "dirty pages: %zu:%zu active:dirty, %"FMTu64" sweep%s, %"FMTu64 - " madvise%s, %"FMTu64" purged\n", pactive, pdirty, npurge, npurge == - 1 ? "" : "s", nmadvise, nmadvise == 1 ? "" : "s", purged); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"pactive\": %zu,\n", pactive); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"pdirty\": %zu,\n", pdirty); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"npurge\": %"FMTu64",\n", npurge); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"nmadvise\": %"FMTu64",\n", nmadvise); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"purged\": %"FMTu64",\n", purged); + } else { + malloc_cprintf(write_cb, cbopaque, + "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64 + ", purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged); + } - malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc ndalloc" - " nrequests\n"); CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated, size_t); CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated, small_nmalloc, small_ndalloc, small_nrequests); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"small\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + " allocated nmalloc" + " ndalloc nrequests\n"); + malloc_cprintf(write_cb, cbopaque, + "small: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + small_allocated, small_nmalloc, small_ndalloc, + small_nrequests); + } + CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated, size_t); CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - large_allocated, large_nmalloc, large_ndalloc, large_nrequests); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"large\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "large: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + large_allocated, large_nmalloc, large_ndalloc, + large_nrequests); + } + CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t); CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "huge: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); - malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated + large_allocated + huge_allocated, - small_nmalloc + large_nmalloc + huge_nmalloc, - small_ndalloc + large_ndalloc + huge_ndalloc, - small_nrequests + large_nrequests + huge_nrequests); - malloc_cprintf(write_cb, cbopaque, - "active: %12zu\n", pactive * page); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"huge\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu,\n", huge_allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", huge_nmalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", huge_ndalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", huge_nrequests); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "huge: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); + malloc_cprintf(write_cb, cbopaque, + "total: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + small_allocated + large_allocated + huge_allocated, + small_nmalloc + large_nmalloc + huge_nmalloc, + small_ndalloc + large_ndalloc + huge_ndalloc, + small_nrequests + large_nrequests + huge_nrequests); + } + if (!json) { + malloc_cprintf(write_cb, cbopaque, + "active: %12zu\n", pactive * page); + } + CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t); - malloc_cprintf(write_cb, cbopaque, - "mapped: %12zu\n", mapped); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"mapped\": %zu,\n", mapped); + } else { + malloc_cprintf(write_cb, cbopaque, + "mapped: %12zu\n", mapped); + } + + CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"retained\": %zu,\n", retained); + } else { + malloc_cprintf(write_cb, cbopaque, + "retained: %12zu\n", retained); + } + CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped, size_t); CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated, size_t); - malloc_cprintf(write_cb, cbopaque, - "metadata: mapped: %zu, allocated: %zu\n", - metadata_mapped, metadata_allocated); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"metadata\": {\n"); - if (bins) - stats_arena_bins_print(write_cb, cbopaque, i); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"mapped\": %zu,\n", metadata_mapped); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu\n", metadata_allocated); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "metadata: mapped: %zu, allocated: %zu\n", + metadata_mapped, metadata_allocated); + } + + if (bins) { + stats_arena_bins_print(write_cb, cbopaque, json, large, huge, + i); + } if (large) - stats_arena_lruns_print(write_cb, cbopaque, i); + stats_arena_lruns_print(write_cb, cbopaque, json, huge, i); if (huge) - stats_arena_hchunks_print(write_cb, cbopaque, i); + stats_arena_hchunks_print(write_cb, cbopaque, json, i); +} + +static void +stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, + bool json, bool merged, bool unmerged) +{ + const char *cpv; + bool bv; + unsigned uv; + uint32_t u32v; + uint64_t u64v; + ssize_t ssv; + size_t sv, bsz, usz, ssz, sssz, cpsz; + + bsz = sizeof(bool); + usz = sizeof(unsigned); + ssz = sizeof(size_t); + sssz = sizeof(ssize_t); + cpsz = sizeof(const char *); + + CTL_GET("version", &cpv, const char *); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"version\": \"%s\",\n", cpv); + } else + malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); + + /* config. */ +#define CONFIG_WRITE_BOOL_JSON(n, c) \ + if (json) { \ + CTL_GET("config."#n, &bv, bool); \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false", \ + (c)); \ + } + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"config\": {\n"); + } + + CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",") + + CTL_GET("config.debug", &bv, bool); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"debug\": %s,\n", bv ? "true" : "false"); + } else { + malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", + bv ? "enabled" : "disabled"); + } + + CONFIG_WRITE_BOOL_JSON(fill, ",") + CONFIG_WRITE_BOOL_JSON(lazy_lock, ",") + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"malloc_conf\": \"%s\",\n", + config_malloc_conf); + } else { + malloc_cprintf(write_cb, cbopaque, + "config.malloc_conf: \"%s\"\n", config_malloc_conf); + } + + CONFIG_WRITE_BOOL_JSON(munmap, ",") + CONFIG_WRITE_BOOL_JSON(prof, ",") + CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",") + CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",") + CONFIG_WRITE_BOOL_JSON(stats, ",") + CONFIG_WRITE_BOOL_JSON(tcache, ",") + CONFIG_WRITE_BOOL_JSON(tls, ",") + CONFIG_WRITE_BOOL_JSON(utrace, ",") + CONFIG_WRITE_BOOL_JSON(valgrind, ",") + CONFIG_WRITE_BOOL_JSON(xmalloc, "") + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t},\n"); + } +#undef CONFIG_WRITE_BOOL_JSON + + /* opt. */ +#define OPT_WRITE_BOOL(n, c) \ + if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ + "false", (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s\n", bv ? "true" : "false"); \ + } \ + } +#define OPT_WRITE_BOOL_MUTABLE(n, m, c) { \ + bool bv2; \ + if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 && \ + je_mallctl(#m, &bv2, (void *)&bsz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ + "false", (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s ("#m": %s)\n", bv ? "true" \ + : "false", bv2 ? "true" : "false"); \ + } \ + } \ +} +#define OPT_WRITE_UNSIGNED(n, c) \ + if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %u%s\n", uv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %u\n", uv); \ + } \ + } +#define OPT_WRITE_SIZE_T(n, c) \ + if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %zu%s\n", sv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zu\n", sv); \ + } \ + } +#define OPT_WRITE_SSIZE_T(n, c) \ + if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd\n", ssv); \ + } \ + } +#define OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) { \ + ssize_t ssv2; \ + if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 && \ + je_mallctl(#m, (void *)&ssv2, &sssz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd ("#m": %zd)\n", \ + ssv, ssv2); \ + } \ + } \ +} +#define OPT_WRITE_CHAR_P(n, c) \ + if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": \"%s\"\n", cpv); \ + } \ + } + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"opt\": {\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "Run-time option settings:\n"); + } + OPT_WRITE_BOOL(abort, ",") + OPT_WRITE_SIZE_T(lg_chunk, ",") + OPT_WRITE_CHAR_P(dss, ",") + OPT_WRITE_UNSIGNED(narenas, ",") + OPT_WRITE_CHAR_P(purge, ",") + if (json || opt_purge == purge_mode_ratio) { + OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, + arenas.lg_dirty_mult, ",") + } + if (json || opt_purge == purge_mode_decay) { + OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",") + } + OPT_WRITE_CHAR_P(junk, ",") + OPT_WRITE_SIZE_T(quarantine, ",") + OPT_WRITE_BOOL(redzone, ",") + OPT_WRITE_BOOL(zero, ",") + OPT_WRITE_BOOL(utrace, ",") + OPT_WRITE_BOOL(xmalloc, ",") + OPT_WRITE_BOOL(tcache, ",") + OPT_WRITE_SSIZE_T(lg_tcache_max, ",") + OPT_WRITE_BOOL(prof, ",") + OPT_WRITE_CHAR_P(prof_prefix, ",") + OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") + OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init, + ",") + OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",") + OPT_WRITE_BOOL(prof_accum, ",") + OPT_WRITE_SSIZE_T(lg_prof_interval, ",") + OPT_WRITE_BOOL(prof_gdump, ",") + OPT_WRITE_BOOL(prof_final, ",") + OPT_WRITE_BOOL(prof_leak, ",") + /* + * stats_print is always emitted, so as long as stats_print comes last + * it's safe to unconditionally omit the comma here (rather than having + * to conditionally omit it elsewhere depending on configuration). + */ + OPT_WRITE_BOOL(stats_print, "") + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t},\n"); + } + +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_BOOL_MUTABLE +#undef OPT_WRITE_SIZE_T +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_CHAR_P + + /* arenas. */ + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"arenas\": {\n"); + } + + CTL_GET("arenas.narenas", &uv, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"narenas\": %u,\n", uv); + } else + malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); + + CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"lg_dirty_mult\": %zd,\n", ssv); + } else if (opt_purge == purge_mode_ratio) { + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: " + "%u:1\n", (1U << ssv)); + } else { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: " + "N/A\n"); + } + } + CTL_GET("arenas.decay_time", &ssv, ssize_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"decay_time\": %zd,\n", ssv); + } else if (opt_purge == purge_mode_decay) { + malloc_cprintf(write_cb, cbopaque, + "Unused dirty page decay time: %zd%s\n", + ssv, (ssv < 0) ? " (no decay)" : ""); + } + + CTL_GET("arenas.quantum", &sv, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"quantum\": %zu,\n", sv); + } else + malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); + + CTL_GET("arenas.page", &sv, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"page\": %zu,\n", sv); + } else + malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); + + if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) { + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"tcache_max\": %zu,\n", sv); + } else { + malloc_cprintf(write_cb, cbopaque, + "Maximum thread-cached size class: %zu\n", sv); + } + } + + if (json) { + unsigned nbins, nlruns, nhchunks, i; + + CTL_GET("arenas.nbins", &nbins, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nbins\": %u,\n", nbins); + + CTL_GET("arenas.nhbins", &uv, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nhbins\": %u,\n", uv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"bin\": [\n"); + for (i = 0; i < nbins; i++) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t{\n"); + + CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"size\": %zu,\n", sv); + + CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v); + + CTL_M2_GET("arenas.bin.0.run_size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"run_size\": %zu\n", sv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : ""); + } + malloc_cprintf(write_cb, cbopaque, + "\t\t\t],\n"); + + CTL_GET("arenas.nlruns", &nlruns, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nlruns\": %u,\n", nlruns); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"lrun\": [\n"); + for (i = 0; i < nlruns; i++) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t{\n"); + + CTL_M2_GET("arenas.lrun.0.size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"size\": %zu\n", sv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t}%s\n", (i + 1 < nlruns) ? "," : ""); + } + malloc_cprintf(write_cb, cbopaque, + "\t\t\t],\n"); + + CTL_GET("arenas.nhchunks", &nhchunks, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nhchunks\": %u,\n", nhchunks); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"hchunk\": [\n"); + for (i = 0; i < nhchunks; i++) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t{\n"); + + CTL_M2_GET("arenas.hchunk.0.size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"size\": %zu\n", sv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t}%s\n", (i + 1 < nhchunks) ? "," : ""); + } + malloc_cprintf(write_cb, cbopaque, + "\t\t\t]\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t},\n"); + } + + /* prof. */ + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"prof\": {\n"); + + CTL_GET("prof.thread_active_init", &bv, bool); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" : + "false"); + + CTL_GET("prof.active", &bv, bool); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"active\": %s,\n", bv ? "true" : "false"); + + CTL_GET("prof.gdump", &bv, bool); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false"); + + CTL_GET("prof.interval", &u64v, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"interval\": %"FMTu64",\n", u64v); + + CTL_GET("prof.lg_sample", &ssv, ssize_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"lg_sample\": %zd\n", ssv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t}%s\n", (config_stats || merged || unmerged) ? "," : + ""); + } +} + +static void +stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, + bool json, bool merged, bool unmerged, bool bins, bool large, bool huge) +{ + size_t *cactive; + size_t allocated, active, metadata, resident, mapped, retained; + + CTL_GET("stats.cactive", &cactive, size_t *); + CTL_GET("stats.allocated", &allocated, size_t); + CTL_GET("stats.active", &active, size_t); + CTL_GET("stats.metadata", &metadata, size_t); + CTL_GET("stats.resident", &resident, size_t); + CTL_GET("stats.mapped", &mapped, size_t); + CTL_GET("stats.retained", &retained, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"stats\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"cactive\": %zu,\n", atomic_read_z(cactive)); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"allocated\": %zu,\n", allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"active\": %zu,\n", active); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"metadata\": %zu,\n", metadata); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"resident\": %zu,\n", resident); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"mapped\": %zu,\n", mapped); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"retained\": %zu\n", retained); + + malloc_cprintf(write_cb, cbopaque, + "\t\t}%s\n", (merged || unmerged) ? "," : ""); + } else { + malloc_cprintf(write_cb, cbopaque, + "Allocated: %zu, active: %zu, metadata: %zu," + " resident: %zu, mapped: %zu, retained: %zu\n", + allocated, active, metadata, resident, mapped, retained); + malloc_cprintf(write_cb, cbopaque, + "Current active ceiling: %zu\n", + atomic_read_z(cactive)); + } + + if (merged || unmerged) { + unsigned narenas; + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"stats.arenas\": {\n"); + } + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + VARIABLE_ARRAY(bool, initialized, narenas); + size_t isz; + unsigned i, j, ninitialized; + + isz = sizeof(bool) * narenas; + xmallctl("arenas.initialized", (void *)initialized, + &isz, NULL, 0); + for (i = ninitialized = 0; i < narenas; i++) { + if (initialized[i]) + ninitialized++; + } + + /* Merged stats. */ + if (merged && (ninitialized > 1 || !unmerged)) { + /* Print merged arena stats. */ + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"merged\": {\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "\nMerged arenas stats:\n"); + } + stats_arena_print(write_cb, cbopaque, json, + narenas, bins, large, huge); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t}%s\n", (ninitialized > 1) ? + "," : ""); + } + } + + /* Unmerged stats. */ + for (i = j = 0; i < narenas; i++) { + if (initialized[i]) { + if (json) { + j++; + malloc_cprintf(write_cb, + cbopaque, + "\t\t\t\"%u\": {\n", i); + } else { + malloc_cprintf(write_cb, + cbopaque, "\narenas[%u]:\n", + i); + } + stats_arena_print(write_cb, cbopaque, + json, i, bins, large, huge); + if (json) { + malloc_cprintf(write_cb, + cbopaque, + "\t\t\t}%s\n", (j < + ninitialized) ? "," : ""); + } + } + } + } + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t}\n"); + } + } } void @@ -363,6 +1067,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, int err; uint64_t epoch; size_t u64sz; + bool json = false; bool general = true; bool merged = true; bool unmerged = true; @@ -379,7 +1084,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, * */ epoch = 1; u64sz = sizeof(uint64_t); - err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t)); + err = je_mallctl("epoch", (void *)&epoch, &u64sz, (void *)&epoch, + sizeof(uint64_t)); if (err != 0) { if (err == EAGAIN) { malloc_write(": Memory allocation failure in " @@ -396,6 +1102,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, for (i = 0; opts[i] != '\0'; i++) { switch (opts[i]) { + case 'J': + json = true; + break; case 'g': general = false; break; @@ -419,222 +1128,27 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } - malloc_cprintf(write_cb, cbopaque, - "___ Begin jemalloc statistics ___\n"); - if (general) { - const char *cpv; - bool bv; - unsigned uv; - ssize_t ssv; - size_t sv, bsz, ssz, sssz, cpsz; - - bsz = sizeof(bool); - ssz = sizeof(size_t); - sssz = sizeof(ssize_t); - cpsz = sizeof(const char *); - - CTL_GET("version", &cpv, const char *); - malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); - CTL_GET("config.debug", &bv, bool); - malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", - bv ? "enabled" : "disabled"); - -#define OPT_WRITE_BOOL(n) \ - if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s\n", bv ? "true" : "false"); \ - } -#define OPT_WRITE_BOOL_MUTABLE(n, m) { \ - bool bv2; \ - if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 && \ - je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s ("#m": %s)\n", bv ? "true" \ - : "false", bv2 ? "true" : "false"); \ - } \ -} -#define OPT_WRITE_SIZE_T(n) \ - if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zu\n", sv); \ - } -#define OPT_WRITE_SSIZE_T(n) \ - if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd\n", ssv); \ - } -#define OPT_WRITE_SSIZE_T_MUTABLE(n, m) { \ - ssize_t ssv2; \ - if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 && \ - je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd ("#m": %zd)\n", \ - ssv, ssv2); \ - } \ -} -#define OPT_WRITE_CHAR_P(n) \ - if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": \"%s\"\n", cpv); \ - } - + if (json) { malloc_cprintf(write_cb, cbopaque, - "Run-time option settings:\n"); - OPT_WRITE_BOOL(abort) - OPT_WRITE_SIZE_T(lg_chunk) - OPT_WRITE_CHAR_P(dss) - OPT_WRITE_SIZE_T(narenas) - OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, arenas.lg_dirty_mult) - OPT_WRITE_BOOL(stats_print) - OPT_WRITE_CHAR_P(junk) - OPT_WRITE_SIZE_T(quarantine) - OPT_WRITE_BOOL(redzone) - OPT_WRITE_BOOL(zero) - OPT_WRITE_BOOL(utrace) - OPT_WRITE_BOOL(valgrind) - OPT_WRITE_BOOL(xmalloc) - OPT_WRITE_BOOL(tcache) - OPT_WRITE_SSIZE_T(lg_tcache_max) - OPT_WRITE_BOOL(prof) - OPT_WRITE_CHAR_P(prof_prefix) - OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active) - OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, - prof.thread_active_init) - OPT_WRITE_SSIZE_T(lg_prof_sample) - OPT_WRITE_BOOL(prof_accum) - OPT_WRITE_SSIZE_T(lg_prof_interval) - OPT_WRITE_BOOL(prof_gdump) - OPT_WRITE_BOOL(prof_final) - OPT_WRITE_BOOL(prof_leak) - -#undef OPT_WRITE_BOOL -#undef OPT_WRITE_BOOL_MUTABLE -#undef OPT_WRITE_SIZE_T -#undef OPT_WRITE_SSIZE_T -#undef OPT_WRITE_CHAR_P - - malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); - - CTL_GET("arenas.narenas", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); - - malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", - sizeof(void *)); - - CTL_GET("arenas.quantum", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", - sv); - - CTL_GET("arenas.page", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); - - CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); - if (ssv >= 0) { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: %u:1\n", - (1U << ssv)); - } else { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: N/A\n"); - } - if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) { - malloc_cprintf(write_cb, cbopaque, - "Maximum thread-cached size class: %zu\n", sv); - } - if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) { - CTL_GET("prof.lg_sample", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "Average profile sample interval: %"FMTu64 - " (2^%zu)\n", (((uint64_t)1U) << sv), sv); - - CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); - if (ssv >= 0) { - malloc_cprintf(write_cb, cbopaque, - "Average profile dump interval: %"FMTu64 - " (2^%zd)\n", - (((uint64_t)1U) << ssv), ssv); - } else { - malloc_cprintf(write_cb, cbopaque, - "Average profile dump interval: N/A\n"); - } - } - CTL_GET("opt.lg_chunk", &sv, size_t); + "{\n" + "\t\"jemalloc\": {\n"); + } else { malloc_cprintf(write_cb, cbopaque, - "Chunk size: %zu (2^%zu)\n", (ZU(1) << sv), sv); + "___ Begin jemalloc statistics ___\n"); } + if (general) + stats_general_print(write_cb, cbopaque, json, merged, unmerged); if (config_stats) { - size_t *cactive; - size_t allocated, active, metadata, resident, mapped; - - CTL_GET("stats.cactive", &cactive, size_t *); - CTL_GET("stats.allocated", &allocated, size_t); - CTL_GET("stats.active", &active, size_t); - CTL_GET("stats.metadata", &metadata, size_t); - CTL_GET("stats.resident", &resident, size_t); - CTL_GET("stats.mapped", &mapped, size_t); - malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, metadata: %zu," - " resident: %zu, mapped: %zu\n", - allocated, active, metadata, resident, mapped); - malloc_cprintf(write_cb, cbopaque, - "Current active ceiling: %zu\n", - atomic_read_z(cactive)); - - if (merged) { - unsigned narenas; - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - VARIABLE_ARRAY(bool, initialized, narenas); - size_t isz; - unsigned i, ninitialized; - - isz = sizeof(bool) * narenas; - xmallctl("arenas.initialized", initialized, - &isz, NULL, 0); - for (i = ninitialized = 0; i < narenas; i++) { - if (initialized[i]) - ninitialized++; - } - - if (ninitialized > 1 || !unmerged) { - /* Print merged arena stats. */ - malloc_cprintf(write_cb, cbopaque, - "\nMerged arenas stats:\n"); - stats_arena_print(write_cb, cbopaque, - narenas, bins, large, huge); - } - } - } - - if (unmerged) { - unsigned narenas; - - /* Print stats for each arena. */ - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - VARIABLE_ARRAY(bool, initialized, narenas); - size_t isz; - unsigned i; - - isz = sizeof(bool) * narenas; - xmallctl("arenas.initialized", initialized, - &isz, NULL, 0); - - for (i = 0; i < narenas; i++) { - if (initialized[i]) { - malloc_cprintf(write_cb, - cbopaque, - "\narenas[%u]:\n", i); - stats_arena_print(write_cb, - cbopaque, i, bins, large, - huge); - } - } - } - } + stats_print_helper(write_cb, cbopaque, json, merged, unmerged, + bins, large, huge); + } + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t}\n" + "}\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "--- End jemalloc statistics ---\n"); } - malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n"); } diff --git a/deps/jemalloc/src/tcache.c b/deps/jemalloc/src/tcache.c old mode 100644 new mode 100755 index fdafd0c62..21540ff46 --- a/deps/jemalloc/src/tcache.c +++ b/deps/jemalloc/src/tcache.c @@ -10,7 +10,7 @@ ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; tcache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ -size_t nhbins; +unsigned nhbins; size_t tcache_maxclass; tcaches_t *tcaches; @@ -23,10 +23,11 @@ static tcaches_t *tcaches_avail; /******************************************************************************/ -size_t tcache_salloc(const void *ptr) +size_t +tcache_salloc(tsdn_t *tsdn, const void *ptr) { - return (arena_salloc(ptr, false)); + return (arena_salloc(tsdn, ptr, false)); } void @@ -67,20 +68,19 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) tcache->next_gc_bin++; if (tcache->next_gc_bin == nhbins) tcache->next_gc_bin = 0; - tcache->ev_cnt = 0; } void * -tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind) +tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind, bool *tcache_success) { void *ret; - arena_tcache_fill_small(arena, tbin, binind, config_prof ? + arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ? tcache->prof_accumbytes : 0); if (config_prof) tcache->prof_accumbytes = 0; - ret = tcache_alloc_easy(tbin); + ret = tcache_alloc_easy(tbin, tcache_success); return (ret); } @@ -102,17 +102,18 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + *(tbin->avail - 1)); arena_t *bin_arena = extent_node_arena_get(&chunk->node); arena_bin_t *bin = &bin_arena->bins[binind]; if (config_prof && bin_arena == arena) { - if (arena_prof_accum(arena, tcache->prof_accumbytes)) - prof_idump(); + if (arena_prof_accum(tsd_tsdn(tsd), arena, + tcache->prof_accumbytes)) + prof_idump(tsd_tsdn(tsd)); tcache->prof_accumbytes = 0; } - malloc_mutex_lock(&bin->lock); + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); if (config_stats && bin_arena == arena) { assert(!merged_stats); merged_stats = true; @@ -122,16 +123,16 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, } ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = *(tbin->avail - 1 - i); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (extent_node_arena_get(&chunk->node) == bin_arena) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_bits_t *bitselm = - arena_bitselm_get(chunk, pageind); - arena_dalloc_bin_junked_locked(bin_arena, chunk, - ptr, bitselm); + arena_bitselm_get_mutable(chunk, pageind); + arena_dalloc_bin_junked_locked(tsd_tsdn(tsd), + bin_arena, chunk, ptr, bitselm); } else { /* * This object was allocated via a different @@ -139,11 +140,12 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, * locked. Stash the object, so that it can be * handled in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(tbin->avail - 1 - ndeferred) = ptr; ndeferred++; } } - malloc_mutex_unlock(&bin->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred); } if (config_stats && !merged_stats) { /* @@ -151,15 +153,15 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, * arena, so the stats didn't get merged. Manually do so now. */ arena_bin_t *bin = &arena->bins[binind]; - malloc_mutex_lock(&bin->lock); + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; - malloc_mutex_unlock(&bin->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); } - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); + memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * + sizeof(void *)); tbin->ncached = rem; if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; @@ -182,13 +184,13 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + *(tbin->avail - 1)); arena_t *locked_arena = extent_node_arena_get(&chunk->node); UNUSED bool idump; if (config_prof) idump = false; - malloc_mutex_lock(&locked_arena->lock); + malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock); if ((config_prof || config_stats) && locked_arena == arena) { if (config_prof) { idump = arena_prof_accum_locked(arena, @@ -206,13 +208,13 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, } ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = *(tbin->avail - 1 - i); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (extent_node_arena_get(&chunk->node) == locked_arena) { - arena_dalloc_large_junked_locked(locked_arena, - chunk, ptr); + arena_dalloc_large_junked_locked(tsd_tsdn(tsd), + locked_arena, chunk, ptr); } else { /* * This object was allocated via a different @@ -220,62 +222,56 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, * Stash the object, so that it can be handled * in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(tbin->avail - 1 - ndeferred) = ptr; ndeferred++; } } - malloc_mutex_unlock(&locked_arena->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock); if (config_prof && idump) - prof_idump(); + prof_idump(tsd_tsdn(tsd)); + arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush - + ndeferred); } if (config_stats && !merged_stats) { /* * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[binind - NBINS].nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock); } - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); + memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * + sizeof(void *)); tbin->ncached = rem; if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; } -void -tcache_arena_associate(tcache_t *tcache, arena_t *arena) +static void +tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Link into list of extant tcaches. */ - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); ql_elm_new(tcache, link); ql_tail_insert(&arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsdn, &arena->lock); } } -void -tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, arena_t *newarena) -{ - - tcache_arena_dissociate(tcache, oldarena); - tcache_arena_associate(tcache, newarena); -} - -void -tcache_arena_dissociate(tcache_t *tcache, arena_t *arena) +static void +tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsdn, &arena->lock); if (config_debug) { bool in_ql = false; tcache_t *iter; @@ -288,11 +284,20 @@ tcache_arena_dissociate(tcache_t *tcache, arena_t *arena) assert(in_ql); } ql_remove(&arena->tcache_ql, tcache, link); - tcache_stats_merge(tcache, arena); - malloc_mutex_unlock(&arena->lock); + tcache_stats_merge(tsdn, tcache, arena); + malloc_mutex_unlock(tsdn, &arena->lock); } } +void +tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena, + arena_t *newarena) +{ + + tcache_arena_dissociate(tsdn, tcache, oldarena); + tcache_arena_associate(tsdn, tcache, newarena); +} + tcache_t * tcache_get_hard(tsd_t *tsd) { @@ -306,11 +311,11 @@ tcache_get_hard(tsd_t *tsd) arena = arena_choose(tsd, NULL); if (unlikely(arena == NULL)) return (NULL); - return (tcache_create(tsd, arena)); + return (tcache_create(tsd_tsdn(tsd), arena)); } tcache_t * -tcache_create(tsd_t *tsd, arena_t *arena) +tcache_create(tsdn_t *tsdn, arena_t *arena) { tcache_t *tcache; size_t size, stack_offset; @@ -324,18 +329,26 @@ tcache_create(tsd_t *tsd, arena_t *arena) /* Avoid false cacheline sharing. */ size = sa2u(size, CACHELINE); - tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, a0get()); + tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true, + arena_get(TSDN_NULL, 0, true)); if (tcache == NULL) return (NULL); - tcache_arena_associate(tcache, arena); + tcache_arena_associate(tsdn, tcache, arena); + + ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR); assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); for (i = 0; i < nhbins; i++) { tcache->tbins[i].lg_fill_div = 1; + stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); + /* + * avail points past the available space. Allocations will + * access the slots toward higher addresses (for the benefit of + * prefetch). + */ tcache->tbins[i].avail = (void **)((uintptr_t)tcache + (uintptr_t)stack_offset); - stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } return (tcache); @@ -348,7 +361,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) unsigned i; arena = arena_choose(tsd, NULL); - tcache_arena_dissociate(tcache, arena); + tcache_arena_dissociate(tsd_tsdn(tsd), tcache, arena); for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; @@ -356,9 +369,9 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) if (config_stats && tbin->tstats.nrequests != 0) { arena_bin_t *bin = &arena->bins[i]; - malloc_mutex_lock(&bin->lock); + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; - malloc_mutex_unlock(&bin->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); } } @@ -367,19 +380,19 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) tcache_bin_flush_large(tsd, tbin, i, 0, tcache); if (config_stats && tbin->tstats.nrequests != 0) { - malloc_mutex_lock(&arena->lock); + malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[i - NBINS].nrequests += tbin->tstats.nrequests; - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock); } } if (config_prof && tcache->prof_accumbytes > 0 && - arena_prof_accum(arena, tcache->prof_accumbytes)) - prof_idump(); + arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes)) + prof_idump(tsd_tsdn(tsd)); - idalloctm(tsd, tcache, false, true); + idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true); } void @@ -403,21 +416,22 @@ tcache_enabled_cleanup(tsd_t *tsd) /* Do nothing. */ } -/* Caller must own arena->lock. */ void -tcache_stats_merge(tcache_t *tcache, arena_t *arena) +tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { unsigned i; cassert(config_stats); + malloc_mutex_assert_owner(tsdn, &arena->lock); + /* Merge and reset tcache stats. */ for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; tcache_bin_t *tbin = &tcache->tbins[i]; - malloc_mutex_lock(&bin->lock); + malloc_mutex_lock(tsdn, &bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; - malloc_mutex_unlock(&bin->lock); + malloc_mutex_unlock(tsdn, &bin->lock); tbin->tstats.nrequests = 0; } @@ -433,11 +447,12 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) bool tcaches_create(tsd_t *tsd, unsigned *r_ind) { + arena_t *arena; tcache_t *tcache; tcaches_t *elm; if (tcaches == NULL) { - tcaches = base_alloc(sizeof(tcache_t *) * + tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1)); if (tcaches == NULL) return (true); @@ -445,7 +460,10 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) return (true); - tcache = tcache_create(tsd, a0get()); + arena = arena_ichoose(tsd, NULL); + if (unlikely(arena == NULL)) + return (true); + tcache = tcache_create(tsd_tsdn(tsd), arena); if (tcache == NULL) return (true); @@ -453,7 +471,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) elm = tcaches_avail; tcaches_avail = tcaches_avail->next; elm->tcache = tcache; - *r_ind = elm - tcaches; + *r_ind = (unsigned)(elm - tcaches); } else { elm = &tcaches[tcaches_past]; elm->tcache = tcache; @@ -491,7 +509,7 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) } bool -tcache_boot(void) +tcache_boot(tsdn_t *tsdn) { unsigned i; @@ -499,17 +517,17 @@ tcache_boot(void) * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is * known. */ - if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS) + if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) < SMALL_MAXCLASS) tcache_maxclass = SMALL_MAXCLASS; - else if ((1U << opt_lg_tcache_max) > large_maxclass) + else if ((ZU(1) << opt_lg_tcache_max) > large_maxclass) tcache_maxclass = large_maxclass; else - tcache_maxclass = (1U << opt_lg_tcache_max); + tcache_maxclass = (ZU(1) << opt_lg_tcache_max); nhbins = size2index(tcache_maxclass) + 1; /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * + tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, nhbins * sizeof(tcache_bin_info_t)); if (tcache_bin_info == NULL) return (true); diff --git a/deps/jemalloc/src/ticker.c b/deps/jemalloc/src/ticker.c new file mode 100644 index 000000000..db0902404 --- /dev/null +++ b/deps/jemalloc/src/ticker.c @@ -0,0 +1,2 @@ +#define JEMALLOC_TICKER_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/deps/jemalloc/src/tsd.c b/deps/jemalloc/src/tsd.c index 9ffe9afef..ec69a51c3 100644 --- a/deps/jemalloc/src/tsd.c +++ b/deps/jemalloc/src/tsd.c @@ -77,7 +77,7 @@ tsd_cleanup(void *arg) /* Do nothing. */ break; case tsd_state_nominal: -#define O(n, t) \ +#define O(n, t) \ n##_cleanup(tsd); MALLOC_TSD #undef O @@ -106,15 +106,17 @@ MALLOC_TSD } } -bool +tsd_t * malloc_tsd_boot0(void) { + tsd_t *tsd; ncleanups = 0; if (tsd_boot0()) - return (true); - *tsd_arenas_cache_bypassp_get(tsd_fetch()) = true; - return (false); + return (NULL); + tsd = tsd_fetch(); + *tsd_arenas_tdata_bypassp_get(tsd) = true; + return (tsd); } void @@ -122,7 +124,7 @@ malloc_tsd_boot1(void) { tsd_boot1(); - *tsd_arenas_cache_bypassp_get(tsd_fetch()) = false; + *tsd_arenas_tdata_bypassp_get(tsd_fetch()) = false; } #ifdef _WIN32 @@ -148,13 +150,15 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) #ifdef _MSC_VER # ifdef _M_IX86 # pragma comment(linker, "/INCLUDE:__tls_used") +# pragma comment(linker, "/INCLUDE:_tls_callback") # else # pragma comment(linker, "/INCLUDE:_tls_used") +# pragma comment(linker, "/INCLUDE:tls_callback") # endif # pragma section(".CRT$XLY",long,read) #endif JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) -static BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, +BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; #endif @@ -167,10 +171,10 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) tsd_init_block_t *iter; /* Check whether this thread has already inserted into the list. */ - malloc_mutex_lock(&head->lock); + malloc_mutex_lock(TSDN_NULL, &head->lock); ql_foreach(iter, &head->blocks, link) { if (iter->thread == self) { - malloc_mutex_unlock(&head->lock); + malloc_mutex_unlock(TSDN_NULL, &head->lock); return (iter->data); } } @@ -178,7 +182,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) ql_elm_new(block, link); block->thread = self; ql_tail_insert(&head->blocks, block, link); - malloc_mutex_unlock(&head->lock); + malloc_mutex_unlock(TSDN_NULL, &head->lock); return (NULL); } @@ -186,8 +190,8 @@ void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { - malloc_mutex_lock(&head->lock); + malloc_mutex_lock(TSDN_NULL, &head->lock); ql_remove(&head->blocks, block, link); - malloc_mutex_unlock(&head->lock); + malloc_mutex_unlock(TSDN_NULL, &head->lock); } #endif diff --git a/deps/jemalloc/src/util.c b/deps/jemalloc/src/util.c old mode 100644 new mode 100755 index 4cb0d6c1e..dd8c23630 --- a/deps/jemalloc/src/util.c +++ b/deps/jemalloc/src/util.c @@ -1,3 +1,7 @@ +/* + * Define simple versions of assertion macros that won't recurse in case + * of assertion failures in malloc_*printf(). + */ #define assert(e) do { \ if (config_debug && !(e)) { \ malloc_write(": Failed assertion\n"); \ @@ -10,6 +14,7 @@ malloc_write(": Unreachable code reached\n"); \ abort(); \ } \ + unreachable(); \ } while (0) #define not_implemented() do { \ @@ -44,15 +49,19 @@ static void wrtmessage(void *cbopaque, const char *s) { -#ifdef SYS_write +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write) /* * Use syscall(2) rather than write(2) when possible in order to avoid * the possibility of memory allocation within libc. This is necessary * on FreeBSD; most operating systems do not have this problem though. + * + * syscall() returns long or int, depending on platform, so capture the + * unused result in the widest plausible type to avoid compiler + * warnings. */ - UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); + UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); #else - UNUSED int result = write(STDERR_FILENO, s, strlen(s)); + UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s)); #endif } @@ -82,7 +91,7 @@ buferror(int err, char *buf, size_t buflen) #ifdef _WIN32 FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, - (LPSTR)buf, buflen, NULL); + (LPSTR)buf, (DWORD)buflen, NULL); return (0); #elif defined(__GLIBC__) && defined(_GNU_SOURCE) char *b = strerror_r(err, buf, buflen); @@ -191,7 +200,7 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) p++; } if (neg) - ret = -ret; + ret = (uintmax_t)(-((intmax_t)ret)); if (p == ns) { /* No conversion performed. */ @@ -306,10 +315,9 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) return (s); } -int +size_t malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) { - int ret; size_t i; const char *f; @@ -400,6 +408,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) int prec = -1; int width = -1; unsigned char len = '?'; + char *s; + size_t slen; f++; /* Flags. */ @@ -490,8 +500,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) } /* Conversion specifier. */ switch (*f) { - char *s; - size_t slen; case '%': /* %% */ APPEND_C(*f); @@ -577,20 +585,19 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) str[i] = '\0'; else str[size - 1] = '\0'; - ret = i; #undef APPEND_C #undef APPEND_S #undef APPEND_PADDED_S #undef GET_ARG_NUMERIC - return (ret); + return (i); } JEMALLOC_FORMAT_PRINTF(3, 4) -int +size_t malloc_snprintf(char *str, size_t size, const char *format, ...) { - int ret; + size_t ret; va_list ap; va_start(ap, format); @@ -648,3 +655,12 @@ malloc_printf(const char *format, ...) malloc_vcprintf(NULL, NULL, format, ap); va_end(ap); } + +/* + * Restore normal assertion macros, in order to make it possible to compile all + * C files as a single concatenation. + */ +#undef assert +#undef not_reached +#undef not_implemented +#include "jemalloc/internal/assert.h" diff --git a/deps/jemalloc/src/witness.c b/deps/jemalloc/src/witness.c new file mode 100644 index 000000000..23753f246 --- /dev/null +++ b/deps/jemalloc/src/witness.c @@ -0,0 +1,136 @@ +#define JEMALLOC_WITNESS_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +void +witness_init(witness_t *witness, const char *name, witness_rank_t rank, + witness_comp_t *comp) +{ + + witness->name = name; + witness->rank = rank; + witness->comp = comp; +} + +#ifdef JEMALLOC_JET +#undef witness_lock_error +#define witness_lock_error JEMALLOC_N(n_witness_lock_error) +#endif +void +witness_lock_error(const witness_list_t *witnesses, const witness_t *witness) +{ + witness_t *w; + + malloc_printf(": Lock rank order reversal:"); + ql_foreach(w, witnesses, link) { + malloc_printf(" %s(%u)", w->name, w->rank); + } + malloc_printf(" %s(%u)\n", witness->name, witness->rank); + abort(); +} +#ifdef JEMALLOC_JET +#undef witness_lock_error +#define witness_lock_error JEMALLOC_N(witness_lock_error) +witness_lock_error_t *witness_lock_error = JEMALLOC_N(n_witness_lock_error); +#endif + +#ifdef JEMALLOC_JET +#undef witness_owner_error +#define witness_owner_error JEMALLOC_N(n_witness_owner_error) +#endif +void +witness_owner_error(const witness_t *witness) +{ + + malloc_printf(": Should own %s(%u)\n", witness->name, + witness->rank); + abort(); +} +#ifdef JEMALLOC_JET +#undef witness_owner_error +#define witness_owner_error JEMALLOC_N(witness_owner_error) +witness_owner_error_t *witness_owner_error = JEMALLOC_N(n_witness_owner_error); +#endif + +#ifdef JEMALLOC_JET +#undef witness_not_owner_error +#define witness_not_owner_error JEMALLOC_N(n_witness_not_owner_error) +#endif +void +witness_not_owner_error(const witness_t *witness) +{ + + malloc_printf(": Should not own %s(%u)\n", witness->name, + witness->rank); + abort(); +} +#ifdef JEMALLOC_JET +#undef witness_not_owner_error +#define witness_not_owner_error JEMALLOC_N(witness_not_owner_error) +witness_not_owner_error_t *witness_not_owner_error = + JEMALLOC_N(n_witness_not_owner_error); +#endif + +#ifdef JEMALLOC_JET +#undef witness_lockless_error +#define witness_lockless_error JEMALLOC_N(n_witness_lockless_error) +#endif +void +witness_lockless_error(const witness_list_t *witnesses) +{ + witness_t *w; + + malloc_printf(": Should not own any locks:"); + ql_foreach(w, witnesses, link) { + malloc_printf(" %s(%u)", w->name, w->rank); + } + malloc_printf("\n"); + abort(); +} +#ifdef JEMALLOC_JET +#undef witness_lockless_error +#define witness_lockless_error JEMALLOC_N(witness_lockless_error) +witness_lockless_error_t *witness_lockless_error = + JEMALLOC_N(n_witness_lockless_error); +#endif + +void +witnesses_cleanup(tsd_t *tsd) +{ + + witness_assert_lockless(tsd_tsdn(tsd)); + + /* Do nothing. */ +} + +void +witness_fork_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + +void +witness_prefork(tsd_t *tsd) +{ + + tsd_witness_fork_set(tsd, true); +} + +void +witness_postfork_parent(tsd_t *tsd) +{ + + tsd_witness_fork_set(tsd, false); +} + +void +witness_postfork_child(tsd_t *tsd) +{ +#ifndef JEMALLOC_MUTEX_INIT_CB + witness_list_t *witnesses; + + witnesses = tsd_witnessesp_get(tsd); + ql_new(witnesses); +#endif + tsd_witness_fork_set(tsd, false); +} diff --git a/deps/jemalloc/src/zone.c b/deps/jemalloc/src/zone.c index 12e1734a9..0571920e4 100644 --- a/deps/jemalloc/src/zone.c +++ b/deps/jemalloc/src/zone.c @@ -4,7 +4,7 @@ #endif /* - * The malloc_default_purgeable_zone function is only available on >= 10.6. + * The malloc_default_purgeable_zone() function is only available on >= 10.6. * We need to check whether it is present at runtime, thus the weak_import. */ extern malloc_zone_t *malloc_default_purgeable_zone(void) @@ -13,8 +13,9 @@ JEMALLOC_ATTR(weak_import); /******************************************************************************/ /* Data. */ -static malloc_zone_t zone; -static struct malloc_introspection_t zone_introspect; +static malloc_zone_t *default_zone, *purgeable_zone; +static malloc_zone_t jemalloc_zone; +static struct malloc_introspection_t jemalloc_zone_introspect; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -56,7 +57,7 @@ zone_size(malloc_zone_t *zone, void *ptr) * not work in practice, we must check all pointers to assure that they * reside within a mapped chunk before determining size. */ - return (ivsalloc(ptr, config_prof)); + return (ivsalloc(tsdn_fetch(), ptr, config_prof)); } static void * @@ -87,7 +88,7 @@ static void zone_free(malloc_zone_t *zone, void *ptr) { - if (ivsalloc(ptr, config_prof) != 0) { + if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0) { je_free(ptr); return; } @@ -99,7 +100,7 @@ static void * zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) { - if (ivsalloc(ptr, config_prof) != 0) + if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0) return (je_realloc(ptr, size)); return (realloc(ptr, size)); @@ -121,9 +122,11 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { + size_t alloc_size; - if (ivsalloc(ptr, config_prof) != 0) { - assert(ivsalloc(ptr, config_prof) == size); + alloc_size = ivsalloc(tsdn_fetch(), ptr, config_prof); + if (alloc_size != 0) { + assert(alloc_size == size); je_free(ptr); return; } @@ -162,89 +165,103 @@ static void zone_force_unlock(malloc_zone_t *zone) { + /* + * Call jemalloc_postfork_child() rather than + * jemalloc_postfork_parent(), because this function is executed by both + * parent and child. The parent can tolerate having state + * reinitialized, but the child cannot unlock mutexes that were locked + * by the parent. + */ if (isthreaded) - jemalloc_postfork_parent(); + jemalloc_postfork_child(); } -JEMALLOC_ATTR(constructor) -void -register_zone(void) +static void +zone_init(void) { - /* - * If something else replaced the system default zone allocator, don't - * register jemalloc's. - */ - malloc_zone_t *default_zone = malloc_default_zone(); - malloc_zone_t *purgeable_zone = NULL; - if (!default_zone->zone_name || - strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) { - return; - } - - zone.size = (void *)zone_size; - zone.malloc = (void *)zone_malloc; - zone.calloc = (void *)zone_calloc; - zone.valloc = (void *)zone_valloc; - zone.free = (void *)zone_free; - zone.realloc = (void *)zone_realloc; - zone.destroy = (void *)zone_destroy; - zone.zone_name = "jemalloc_zone"; - zone.batch_malloc = NULL; - zone.batch_free = NULL; - zone.introspect = &zone_introspect; - zone.version = JEMALLOC_ZONE_VERSION; + jemalloc_zone.size = (void *)zone_size; + jemalloc_zone.malloc = (void *)zone_malloc; + jemalloc_zone.calloc = (void *)zone_calloc; + jemalloc_zone.valloc = (void *)zone_valloc; + jemalloc_zone.free = (void *)zone_free; + jemalloc_zone.realloc = (void *)zone_realloc; + jemalloc_zone.destroy = (void *)zone_destroy; + jemalloc_zone.zone_name = "jemalloc_zone"; + jemalloc_zone.batch_malloc = NULL; + jemalloc_zone.batch_free = NULL; + jemalloc_zone.introspect = &jemalloc_zone_introspect; + jemalloc_zone.version = JEMALLOC_ZONE_VERSION; #if (JEMALLOC_ZONE_VERSION >= 5) - zone.memalign = zone_memalign; + jemalloc_zone.memalign = zone_memalign; #endif #if (JEMALLOC_ZONE_VERSION >= 6) - zone.free_definite_size = zone_free_definite_size; + jemalloc_zone.free_definite_size = zone_free_definite_size; #endif #if (JEMALLOC_ZONE_VERSION >= 8) - zone.pressure_relief = NULL; + jemalloc_zone.pressure_relief = NULL; #endif - zone_introspect.enumerator = NULL; - zone_introspect.good_size = (void *)zone_good_size; - zone_introspect.check = NULL; - zone_introspect.print = NULL; - zone_introspect.log = NULL; - zone_introspect.force_lock = (void *)zone_force_lock; - zone_introspect.force_unlock = (void *)zone_force_unlock; - zone_introspect.statistics = NULL; + jemalloc_zone_introspect.enumerator = NULL; + jemalloc_zone_introspect.good_size = (void *)zone_good_size; + jemalloc_zone_introspect.check = NULL; + jemalloc_zone_introspect.print = NULL; + jemalloc_zone_introspect.log = NULL; + jemalloc_zone_introspect.force_lock = (void *)zone_force_lock; + jemalloc_zone_introspect.force_unlock = (void *)zone_force_unlock; + jemalloc_zone_introspect.statistics = NULL; #if (JEMALLOC_ZONE_VERSION >= 6) - zone_introspect.zone_locked = NULL; + jemalloc_zone_introspect.zone_locked = NULL; #endif #if (JEMALLOC_ZONE_VERSION >= 7) - zone_introspect.enable_discharge_checking = NULL; - zone_introspect.disable_discharge_checking = NULL; - zone_introspect.discharge = NULL; -#ifdef __BLOCKS__ - zone_introspect.enumerate_discharged_pointers = NULL; -#else - zone_introspect.enumerate_unavailable_without_blocks = NULL; -#endif + jemalloc_zone_introspect.enable_discharge_checking = NULL; + jemalloc_zone_introspect.disable_discharge_checking = NULL; + jemalloc_zone_introspect.discharge = NULL; +# ifdef __BLOCKS__ + jemalloc_zone_introspect.enumerate_discharged_pointers = NULL; +# else + jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL; +# endif #endif +} + +static malloc_zone_t * +zone_default_get(void) +{ + malloc_zone_t **zones = NULL; + unsigned int num_zones = 0; /* - * The default purgeable zone is created lazily by OSX's libc. It uses - * the default zone when it is created for "small" allocations - * (< 15 KiB), but assumes the default zone is a scalable_zone. This - * obviously fails when the default zone is the jemalloc zone, so - * malloc_default_purgeable_zone is called beforehand so that the - * default purgeable zone is created when the default zone is still - * a scalable_zone. As purgeable zones only exist on >= 10.6, we need - * to check for the existence of malloc_default_purgeable_zone() at - * run time. + * On OSX 10.12, malloc_default_zone returns a special zone that is not + * present in the list of registered zones. That zone uses a "lite zone" + * if one is present (apparently enabled when malloc stack logging is + * enabled), or the first registered zone otherwise. In practice this + * means unless malloc stack logging is enabled, the first registered + * zone is the default. So get the list of zones to get the first one, + * instead of relying on malloc_default_zone. */ - if (malloc_default_purgeable_zone != NULL) - purgeable_zone = malloc_default_purgeable_zone(); + if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, + (vm_address_t**)&zones, &num_zones)) { + /* + * Reset the value in case the failure happened after it was + * set. + */ + num_zones = 0; + } - /* Register the custom zone. At this point it won't be the default. */ - malloc_zone_register(&zone); + if (num_zones) + return (zones[0]); + + return (malloc_default_zone()); +} + +/* As written, this function can only promote jemalloc_zone. */ +static void +zone_promote(void) +{ + malloc_zone_t *zone; do { - default_zone = malloc_default_zone(); /* * Unregister and reregister the default zone. On OSX >= 10.6, * unregistering takes the last registered zone and places it @@ -255,6 +272,7 @@ register_zone(void) */ malloc_zone_unregister(default_zone); malloc_zone_register(default_zone); + /* * On OSX 10.6, having the default purgeable zone appear before * the default zone makes some things crash because it thinks it @@ -266,9 +284,47 @@ register_zone(void) * above, i.e. the default zone. Registering it again then puts * it at the end, obviously after the default zone. */ - if (purgeable_zone) { + if (purgeable_zone != NULL) { malloc_zone_unregister(purgeable_zone); malloc_zone_register(purgeable_zone); } - } while (malloc_default_zone() != &zone); + + zone = zone_default_get(); + } while (zone != &jemalloc_zone); +} + +JEMALLOC_ATTR(constructor) +void +zone_register(void) +{ + + /* + * If something else replaced the system default zone allocator, don't + * register jemalloc's. + */ + default_zone = zone_default_get(); + if (!default_zone->zone_name || strcmp(default_zone->zone_name, + "DefaultMallocZone") != 0) + return; + + /* + * The default purgeable zone is created lazily by OSX's libc. It uses + * the default zone when it is created for "small" allocations + * (< 15 KiB), but assumes the default zone is a scalable_zone. This + * obviously fails when the default zone is the jemalloc zone, so + * malloc_default_purgeable_zone() is called beforehand so that the + * default purgeable zone is created when the default zone is still + * a scalable_zone. As purgeable zones only exist on >= 10.6, we need + * to check for the existence of malloc_default_purgeable_zone() at + * run time. + */ + purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL : + malloc_default_purgeable_zone(); + + /* Register the custom zone. At this point it won't be the default. */ + zone_init(); + malloc_zone_register(&jemalloc_zone); + + /* Promote the custom zone to be default. */ + zone_promote(); } diff --git a/deps/jemalloc/test/include/test/jemalloc_test.h.in b/deps/jemalloc/test/include/test/jemalloc_test.h.in index 455569da4..1f36e4695 100644 --- a/deps/jemalloc/test/include/test/jemalloc_test.h.in +++ b/deps/jemalloc/test/include/test/jemalloc_test.h.in @@ -11,7 +11,6 @@ #ifdef _WIN32 # include "msvc_compat/strings.h" #endif -#include #ifdef _WIN32 # include @@ -20,39 +19,6 @@ # include #endif -/******************************************************************************/ -/* - * Define always-enabled assertion macros, so that test assertions execute even - * if assertions are disabled in the library code. These definitions must - * exist prior to including "jemalloc/internal/util.h". - */ -#define assert(e) do { \ - if (!(e)) { \ - malloc_printf( \ - ": %s:%d: Failed assertion: \"%s\"\n", \ - __FILE__, __LINE__, #e); \ - abort(); \ - } \ -} while (0) - -#define not_reached() do { \ - malloc_printf( \ - ": %s:%d: Unreachable code reached\n", \ - __FILE__, __LINE__); \ - abort(); \ -} while (0) - -#define not_implemented() do { \ - malloc_printf(": %s:%d: Not implemented\n", \ - __FILE__, __LINE__); \ - abort(); \ -} while (0) - -#define assert_not_implemented(e) do { \ - if (!(e)) \ - not_implemented(); \ -} while (0) - #include "test/jemalloc_test_defs.h" #ifdef JEMALLOC_OSSPIN @@ -87,6 +53,14 @@ # include "jemalloc/internal/jemalloc_internal_defs.h" # include "jemalloc/internal/jemalloc_internal_macros.h" +static const bool config_debug = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; + # define JEMALLOC_N(n) @private_namespace@##n # include "jemalloc/internal/private_namespace.h" @@ -94,6 +68,7 @@ # define JEMALLOC_H_STRUCTS # define JEMALLOC_H_EXTERNS # define JEMALLOC_H_INLINES +# include "jemalloc/internal/nstime.h" # include "jemalloc/internal/util.h" # include "jemalloc/internal/qr.h" # include "jemalloc/internal/ql.h" @@ -149,3 +124,40 @@ #include "test/thd.h" #define MEXP 19937 #include "test/SFMT.h" + +/******************************************************************************/ +/* + * Define always-enabled assertion macros, so that test assertions execute even + * if assertions are disabled in the library code. + */ +#undef assert +#undef not_reached +#undef not_implemented +#undef assert_not_implemented + +#define assert(e) do { \ + if (!(e)) { \ + malloc_printf( \ + ": %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) + +#define not_reached() do { \ + malloc_printf( \ + ": %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ +} while (0) + +#define not_implemented() do { \ + malloc_printf(": %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ +} while (0) + +#define assert_not_implemented(e) do { \ + if (!(e)) \ + not_implemented(); \ +} while (0) diff --git a/deps/jemalloc/test/include/test/mtx.h b/deps/jemalloc/test/include/test/mtx.h index bbe822f54..58afbc3d1 100644 --- a/deps/jemalloc/test/include/test/mtx.h +++ b/deps/jemalloc/test/include/test/mtx.h @@ -8,6 +8,8 @@ typedef struct { #ifdef _WIN32 CRITICAL_SECTION lock; +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock lock; #elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #else diff --git a/deps/jemalloc/test/include/test/test.h b/deps/jemalloc/test/include/test/test.h index 3cf901fc4..c8112eb8b 100644 --- a/deps/jemalloc/test/include/test/test.h +++ b/deps/jemalloc/test/include/test/test.h @@ -311,6 +311,9 @@ label_test_end: \ #define test(...) \ p_test(__VA_ARGS__, NULL) +#define test_no_malloc_init(...) \ + p_test_no_malloc_init(__VA_ARGS__, NULL) + #define test_skip_if(e) do { \ if (e) { \ test_skip("%s:%s:%d: Test skipped: (%s)", \ @@ -324,6 +327,7 @@ void test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); /* For private use by macros. */ test_status_t p_test(test_t *t, ...); +test_status_t p_test_no_malloc_init(test_t *t, ...); void p_test_init(const char *name); void p_test_fini(void); void p_test_fail(const char *prefix, const char *message); diff --git a/deps/jemalloc/test/include/test/timer.h b/deps/jemalloc/test/include/test/timer.h index a7fefdfd1..ace6191b8 100644 --- a/deps/jemalloc/test/include/test/timer.h +++ b/deps/jemalloc/test/include/test/timer.h @@ -1,23 +1,8 @@ /* Simple timer, for use in benchmark reporting. */ -#include -#include - -#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ - && _POSIX_MONOTONIC_CLOCK >= 0 - typedef struct { -#ifdef _WIN32 - FILETIME ft0; - FILETIME ft1; -#elif JEMALLOC_CLOCK_GETTIME - struct timespec ts0; - struct timespec ts1; - int clock_id; -#else - struct timeval tv0; - struct timeval tv1; -#endif + nstime_t t0; + nstime_t t1; } timedelta_t; void timer_start(timedelta_t *timer); diff --git a/deps/jemalloc/test/integration/MALLOCX_ARENA.c b/deps/jemalloc/test/integration/MALLOCX_ARENA.c old mode 100644 new mode 100755 index 30c203ae6..910a096fd --- a/deps/jemalloc/test/integration/MALLOCX_ARENA.c +++ b/deps/jemalloc/test/integration/MALLOCX_ARENA.c @@ -19,8 +19,8 @@ thd_start(void *arg) size_t sz; sz = sizeof(arena_ind); - assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0, - "Error in arenas.extend"); + assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0), + 0, "Error in arenas.extend"); if (thread_ind % 4 != 3) { size_t mib[3]; diff --git a/deps/jemalloc/test/integration/aligned_alloc.c b/deps/jemalloc/test/integration/aligned_alloc.c index 609001487..58438421d 100644 --- a/deps/jemalloc/test/integration/aligned_alloc.c +++ b/deps/jemalloc/test/integration/aligned_alloc.c @@ -1,9 +1,20 @@ #include "test/jemalloc_test.h" #define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) -#define NITER 4 +#define MAXALIGN (((size_t)1) << 23) + +/* + * On systems which can't merge extents, tests that call this function generate + * a lot of dirty memory very quickly. Purging between cycles mitigates + * potential OOM on e.g. 32-bit Windows. + */ +static void +purge(void) +{ + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl error"); +} TEST_BEGIN(test_alignment_errors) { @@ -74,6 +85,7 @@ TEST_END TEST_BEGIN(test_alignment_and_size) { +#define NITER 4 size_t alignment, size, total; unsigned i; void *ps[NITER]; @@ -110,7 +122,9 @@ TEST_BEGIN(test_alignment_and_size) } } } + purge(); } +#undef NITER } TEST_END diff --git a/deps/jemalloc/test/integration/allocated.c b/deps/jemalloc/test/integration/allocated.c old mode 100644 new mode 100755 index 3630e80ce..6ce145b3e --- a/deps/jemalloc/test/integration/allocated.c +++ b/deps/jemalloc/test/integration/allocated.c @@ -18,14 +18,14 @@ thd_start(void *arg) size_t sz, usize; sz = sizeof(a0); - if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) { + if ((err = mallctl("thread.allocated", (void *)&a0, &sz, NULL, 0))) { if (err == ENOENT) goto label_ENOENT; test_fail("%s(): Error in mallctl(): %s", __func__, strerror(err)); } sz = sizeof(ap0); - if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) { + if ((err = mallctl("thread.allocatedp", (void *)&ap0, &sz, NULL, 0))) { if (err == ENOENT) goto label_ENOENT; test_fail("%s(): Error in mallctl(): %s", __func__, @@ -36,14 +36,15 @@ thd_start(void *arg) "storage"); sz = sizeof(d0); - if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) { + if ((err = mallctl("thread.deallocated", (void *)&d0, &sz, NULL, 0))) { if (err == ENOENT) goto label_ENOENT; test_fail("%s(): Error in mallctl(): %s", __func__, strerror(err)); } sz = sizeof(dp0); - if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) { + if ((err = mallctl("thread.deallocatedp", (void *)&dp0, &sz, NULL, + 0))) { if (err == ENOENT) goto label_ENOENT; test_fail("%s(): Error in mallctl(): %s", __func__, @@ -57,9 +58,9 @@ thd_start(void *arg) assert_ptr_not_null(p, "Unexpected malloc() error"); sz = sizeof(a1); - mallctl("thread.allocated", &a1, &sz, NULL, 0); + mallctl("thread.allocated", (void *)&a1, &sz, NULL, 0); sz = sizeof(ap1); - mallctl("thread.allocatedp", &ap1, &sz, NULL, 0); + mallctl("thread.allocatedp", (void *)&ap1, &sz, NULL, 0); assert_u64_eq(*ap1, a1, "Dereferenced \"thread.allocatedp\" value should equal " "\"thread.allocated\" value"); @@ -74,9 +75,9 @@ thd_start(void *arg) free(p); sz = sizeof(d1); - mallctl("thread.deallocated", &d1, &sz, NULL, 0); + mallctl("thread.deallocated", (void *)&d1, &sz, NULL, 0); sz = sizeof(dp1); - mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0); + mallctl("thread.deallocatedp", (void *)&dp1, &sz, NULL, 0); assert_u64_eq(*dp1, d1, "Dereferenced \"thread.deallocatedp\" value should equal " "\"thread.deallocated\" value"); diff --git a/deps/jemalloc/test/integration/chunk.c b/deps/jemalloc/test/integration/chunk.c index af1c9a53e..94cf0025a 100644 --- a/deps/jemalloc/test/integration/chunk.c +++ b/deps/jemalloc/test/integration/chunk.c @@ -121,6 +121,10 @@ TEST_BEGIN(test_chunk) { void *p; size_t old_size, new_size, large0, large1, huge0, huge1, huge2, sz; + unsigned arena_ind; + int flags; + size_t hooks_mib[3], purge_mib[3]; + size_t hooks_miblen, purge_miblen; chunk_hooks_t new_hooks = { chunk_alloc, chunk_dalloc, @@ -132,11 +136,21 @@ TEST_BEGIN(test_chunk) }; bool xallocx_success_a, xallocx_success_b, xallocx_success_c; + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0), + 0, "Unexpected mallctl() failure"); + flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE; + /* Install custom chunk hooks. */ + hooks_miblen = sizeof(hooks_mib)/sizeof(size_t); + assert_d_eq(mallctlnametomib("arena.0.chunk_hooks", hooks_mib, + &hooks_miblen), 0, "Unexpected mallctlnametomib() failure"); + hooks_mib[1] = (size_t)arena_ind; old_size = sizeof(chunk_hooks_t); new_size = sizeof(chunk_hooks_t); - assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size, - &new_hooks, new_size), 0, "Unexpected chunk_hooks error"); + assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks, + &old_size, (void *)&new_hooks, new_size), 0, + "Unexpected chunk_hooks error"); orig_hooks = old_hooks; assert_ptr_ne(old_hooks.alloc, chunk_alloc, "Unexpected alloc error"); assert_ptr_ne(old_hooks.dalloc, chunk_dalloc, @@ -151,59 +165,63 @@ TEST_BEGIN(test_chunk) /* Get large size classes. */ sz = sizeof(size_t); - assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0, - "Unexpected arenas.lrun.0.size failure"); - assert_d_eq(mallctl("arenas.lrun.1.size", &large1, &sz, NULL, 0), 0, - "Unexpected arenas.lrun.1.size failure"); + assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large0, &sz, NULL, + 0), 0, "Unexpected arenas.lrun.0.size failure"); + assert_d_eq(mallctl("arenas.lrun.1.size", (void *)&large1, &sz, NULL, + 0), 0, "Unexpected arenas.lrun.1.size failure"); /* Get huge size classes. */ - assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0, - "Unexpected arenas.hchunk.0.size failure"); - assert_d_eq(mallctl("arenas.hchunk.1.size", &huge1, &sz, NULL, 0), 0, - "Unexpected arenas.hchunk.1.size failure"); - assert_d_eq(mallctl("arenas.hchunk.2.size", &huge2, &sz, NULL, 0), 0, - "Unexpected arenas.hchunk.2.size failure"); + assert_d_eq(mallctl("arenas.hchunk.0.size", (void *)&huge0, &sz, NULL, + 0), 0, "Unexpected arenas.hchunk.0.size failure"); + assert_d_eq(mallctl("arenas.hchunk.1.size", (void *)&huge1, &sz, NULL, + 0), 0, "Unexpected arenas.hchunk.1.size failure"); + assert_d_eq(mallctl("arenas.hchunk.2.size", (void *)&huge2, &sz, NULL, + 0), 0, "Unexpected arenas.hchunk.2.size failure"); /* Test dalloc/decommit/purge cascade. */ + purge_miblen = sizeof(purge_mib)/sizeof(size_t); + assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen), + 0, "Unexpected mallctlnametomib() failure"); + purge_mib[1] = (size_t)arena_ind; do_dalloc = false; do_decommit = false; - p = mallocx(huge0 * 2, 0); + p = mallocx(huge0 * 2, flags); assert_ptr_not_null(p, "Unexpected mallocx() error"); did_dalloc = false; did_decommit = false; did_purge = false; did_split = false; - xallocx_success_a = (xallocx(p, huge0, 0, 0) == huge0); - assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, - "Unexpected arena.0.purge error"); + xallocx_success_a = (xallocx(p, huge0, 0, flags) == huge0); + assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0), + 0, "Unexpected arena.%u.purge error", arena_ind); if (xallocx_success_a) { assert_true(did_dalloc, "Expected dalloc"); assert_false(did_decommit, "Unexpected decommit"); assert_true(did_purge, "Expected purge"); } assert_true(did_split, "Expected split"); - dallocx(p, 0); + dallocx(p, flags); do_dalloc = true; /* Test decommit/commit and observe split/merge. */ do_dalloc = false; do_decommit = true; - p = mallocx(huge0 * 2, 0); + p = mallocx(huge0 * 2, flags); assert_ptr_not_null(p, "Unexpected mallocx() error"); did_decommit = false; did_commit = false; did_split = false; did_merge = false; - xallocx_success_b = (xallocx(p, huge0, 0, 0) == huge0); - assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, - "Unexpected arena.0.purge error"); + xallocx_success_b = (xallocx(p, huge0, 0, flags) == huge0); + assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0), + 0, "Unexpected arena.%u.purge error", arena_ind); if (xallocx_success_b) assert_true(did_split, "Expected split"); - xallocx_success_c = (xallocx(p, huge0 * 2, 0, 0) == huge0 * 2); + xallocx_success_c = (xallocx(p, huge0 * 2, 0, flags) == huge0 * 2); assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match"); if (xallocx_success_b && xallocx_success_c) assert_true(did_merge, "Expected merge"); - dallocx(p, 0); + dallocx(p, flags); do_dalloc = true; do_decommit = false; @@ -214,43 +232,43 @@ TEST_BEGIN(test_chunk) * successful xallocx() from size=huge2 to size=huge1 is * guaranteed to leave trailing purgeable memory. */ - p = mallocx(huge2, 0); + p = mallocx(huge2, flags); assert_ptr_not_null(p, "Unexpected mallocx() error"); did_purge = false; - assert_zu_eq(xallocx(p, huge1, 0, 0), huge1, + assert_zu_eq(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() failure"); assert_true(did_purge, "Expected purge"); - dallocx(p, 0); + dallocx(p, flags); } /* Test decommit for large allocations. */ do_decommit = true; - p = mallocx(large1, 0); + p = mallocx(large1, flags); assert_ptr_not_null(p, "Unexpected mallocx() error"); - assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, - "Unexpected arena.0.purge error"); + assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0), + 0, "Unexpected arena.%u.purge error", arena_ind); did_decommit = false; - assert_zu_eq(xallocx(p, large0, 0, 0), large0, + assert_zu_eq(xallocx(p, large0, 0, flags), large0, "Unexpected xallocx() failure"); - assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, - "Unexpected arena.0.purge error"); + assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0), + 0, "Unexpected arena.%u.purge error", arena_ind); did_commit = false; - assert_zu_eq(xallocx(p, large1, 0, 0), large1, + assert_zu_eq(xallocx(p, large1, 0, flags), large1, "Unexpected xallocx() failure"); assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match"); - dallocx(p, 0); + dallocx(p, flags); do_decommit = false; /* Make sure non-huge allocation succeeds. */ - p = mallocx(42, 0); + p = mallocx(42, flags); assert_ptr_not_null(p, "Unexpected mallocx() error"); - dallocx(p, 0); + dallocx(p, flags); /* Restore chunk hooks. */ - assert_d_eq(mallctl("arena.0.chunk_hooks", NULL, NULL, &old_hooks, - new_size), 0, "Unexpected chunk_hooks error"); - assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size, - NULL, 0), 0, "Unexpected chunk_hooks error"); + assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL, + (void *)&old_hooks, new_size), 0, "Unexpected chunk_hooks error"); + assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks, + &old_size, NULL, 0), 0, "Unexpected chunk_hooks error"); assert_ptr_eq(old_hooks.alloc, orig_hooks.alloc, "Unexpected alloc error"); assert_ptr_eq(old_hooks.dalloc, orig_hooks.dalloc, diff --git a/deps/jemalloc/test/integration/mallocx.c b/deps/jemalloc/test/integration/mallocx.c old mode 100644 new mode 100755 index 6253175d6..d709eb301 --- a/deps/jemalloc/test/integration/mallocx.c +++ b/deps/jemalloc/test/integration/mallocx.c @@ -1,5 +1,9 @@ #include "test/jemalloc_test.h" +#ifdef JEMALLOC_FILL +const char *malloc_conf = "junk:false"; +#endif + static unsigned get_nsizes_impl(const char *cmd) { @@ -7,7 +11,7 @@ get_nsizes_impl(const char *cmd) size_t z; z = sizeof(unsigned); - assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0, + assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0, "Unexpected mallctl(\"%s\", ...) failure", cmd); return (ret); @@ -33,7 +37,7 @@ get_size_impl(const char *cmd, size_t ind) 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd); mib[2] = ind; z = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0), + assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind); return (ret); @@ -46,43 +50,84 @@ get_huge_size(size_t ind) return (get_size_impl("arenas.hchunk.0.size", ind)); } -TEST_BEGIN(test_oom) +/* + * On systems which can't merge extents, tests that call this function generate + * a lot of dirty memory very quickly. Purging between cycles mitigates + * potential OOM on e.g. 32-bit Windows. + */ +static void +purge(void) { - size_t hugemax, size, alignment; + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl error"); +} + +TEST_BEGIN(test_overflow) +{ + size_t hugemax; hugemax = get_huge_size(get_nhuge()-1); - /* - * It should be impossible to allocate two objects that each consume - * more than half the virtual address space. - */ - { - void *p; + assert_ptr_null(mallocx(hugemax+1, 0), + "Expected OOM for mallocx(size=%#zx, 0)", hugemax+1); - p = mallocx(hugemax, 0); - if (p != NULL) { - assert_ptr_null(mallocx(hugemax, 0), - "Expected OOM for mallocx(size=%#zx, 0)", hugemax); - dallocx(p, 0); - } + assert_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0), + "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1); + + assert_ptr_null(mallocx(SIZE_T_MAX, 0), + "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX); + + assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)), + "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))", + ZU(PTRDIFF_MAX)+1); +} +TEST_END + +TEST_BEGIN(test_oom) +{ + size_t hugemax; + bool oom; + void *ptrs[3]; + unsigned i; + + /* + * It should be impossible to allocate three objects that each consume + * nearly half the virtual address space. + */ + hugemax = get_huge_size(get_nhuge()-1); + oom = false; + for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) { + ptrs[i] = mallocx(hugemax, 0); + if (ptrs[i] == NULL) + oom = true; } + assert_true(oom, + "Expected OOM during series of calls to mallocx(size=%zu, 0)", + hugemax); + for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) { + if (ptrs[i] != NULL) + dallocx(ptrs[i], 0); + } + purge(); #if LG_SIZEOF_PTR == 3 - size = ZU(0x8000000000000000); - alignment = ZU(0x8000000000000000); + assert_ptr_null(mallocx(0x8000000000000000ULL, + MALLOCX_ALIGN(0x8000000000000000ULL)), + "Expected OOM for mallocx()"); + assert_ptr_null(mallocx(0x8000000000000000ULL, + MALLOCX_ALIGN(0x80000000)), + "Expected OOM for mallocx()"); #else - size = ZU(0x80000000); - alignment = ZU(0x80000000); + assert_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)), + "Expected OOM for mallocx()"); #endif - assert_ptr_null(mallocx(size, MALLOCX_ALIGN(alignment)), - "Expected OOM for mallocx(size=%#zx, MALLOCX_ALIGN(%#zx)", size, - alignment); } TEST_END TEST_BEGIN(test_basic) { -#define MAXSZ (((size_t)1) << 26) +#define MAXSZ (((size_t)1) << 23) size_t sz; for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) { @@ -91,23 +136,28 @@ TEST_BEGIN(test_basic) nsz = nallocx(sz, 0); assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); p = mallocx(sz, 0); - assert_ptr_not_null(p, "Unexpected mallocx() error"); + assert_ptr_not_null(p, + "Unexpected mallocx(size=%zx, flags=0) error", sz); rsz = sallocx(p, 0); assert_zu_ge(rsz, sz, "Real size smaller than expected"); assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch"); dallocx(p, 0); p = mallocx(sz, 0); - assert_ptr_not_null(p, "Unexpected mallocx() error"); + assert_ptr_not_null(p, + "Unexpected mallocx(size=%zx, flags=0) error", sz); dallocx(p, 0); nsz = nallocx(sz, MALLOCX_ZERO); assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); p = mallocx(sz, MALLOCX_ZERO); - assert_ptr_not_null(p, "Unexpected mallocx() error"); + assert_ptr_not_null(p, + "Unexpected mallocx(size=%zx, flags=MALLOCX_ZERO) error", + nsz); rsz = sallocx(p, 0); assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch"); dallocx(p, 0); + purge(); } #undef MAXSZ } @@ -115,7 +165,7 @@ TEST_END TEST_BEGIN(test_alignment_and_size) { -#define MAXALIGN (((size_t)1) << 25) +#define MAXALIGN (((size_t)1) << 23) #define NITER 4 size_t nsz, rsz, sz, alignment, total; unsigned i; @@ -165,6 +215,7 @@ TEST_BEGIN(test_alignment_and_size) } } } + purge(); } #undef MAXALIGN #undef NITER @@ -176,6 +227,7 @@ main(void) { return (test( + test_overflow, test_oom, test_basic, test_alignment_and_size)); diff --git a/deps/jemalloc/test/integration/overflow.c b/deps/jemalloc/test/integration/overflow.c old mode 100644 new mode 100755 index 303d9b2d3..84a35652c --- a/deps/jemalloc/test/integration/overflow.c +++ b/deps/jemalloc/test/integration/overflow.c @@ -8,8 +8,8 @@ TEST_BEGIN(test_overflow) void *p; sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0, - "Unexpected mallctl() error"); + assert_d_eq(mallctl("arenas.nhchunks", (void *)&nhchunks, &sz, NULL, 0), + 0, "Unexpected mallctl() error"); miblen = sizeof(mib) / sizeof(size_t); assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0, @@ -17,8 +17,8 @@ TEST_BEGIN(test_overflow) mib[2] = nhchunks - 1; sz = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0, - "Unexpected mallctlbymib() error"); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz, + NULL, 0), 0, "Unexpected mallctlbymib() error"); assert_ptr_null(malloc(max_size_class + 1), "Expected OOM due to over-sized allocation request"); diff --git a/deps/jemalloc/test/integration/posix_memalign.c b/deps/jemalloc/test/integration/posix_memalign.c index 19741c6cb..e22e10200 100644 --- a/deps/jemalloc/test/integration/posix_memalign.c +++ b/deps/jemalloc/test/integration/posix_memalign.c @@ -1,9 +1,20 @@ #include "test/jemalloc_test.h" #define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) -#define NITER 4 +#define MAXALIGN (((size_t)1) << 23) + +/* + * On systems which can't merge extents, tests that call this function generate + * a lot of dirty memory very quickly. Purging between cycles mitigates + * potential OOM on e.g. 32-bit Windows. + */ +static void +purge(void) +{ + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl error"); +} TEST_BEGIN(test_alignment_errors) { @@ -66,6 +77,7 @@ TEST_END TEST_BEGIN(test_alignment_and_size) { +#define NITER 4 size_t alignment, size, total; unsigned i; int err; @@ -104,7 +116,9 @@ TEST_BEGIN(test_alignment_and_size) } } } + purge(); } +#undef NITER } TEST_END diff --git a/deps/jemalloc/test/integration/rallocx.c b/deps/jemalloc/test/integration/rallocx.c old mode 100644 new mode 100755 index be1b27b73..506bf1c90 --- a/deps/jemalloc/test/integration/rallocx.c +++ b/deps/jemalloc/test/integration/rallocx.c @@ -1,5 +1,51 @@ #include "test/jemalloc_test.h" +static unsigned +get_nsizes_impl(const char *cmd) +{ + unsigned ret; + size_t z; + + z = sizeof(unsigned); + assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0, + "Unexpected mallctl(\"%s\", ...) failure", cmd); + + return (ret); +} + +static unsigned +get_nhuge(void) +{ + + return (get_nsizes_impl("arenas.nhchunks")); +} + +static size_t +get_size_impl(const char *cmd, size_t ind) +{ + size_t ret; + size_t z; + size_t mib[4]; + size_t miblen = 4; + + z = sizeof(size_t); + assert_d_eq(mallctlnametomib(cmd, mib, &miblen), + 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd); + mib[2] = ind; + z = sizeof(size_t); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), + 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind); + + return (ret); +} + +static size_t +get_huge_size(size_t ind) +{ + + return (get_size_impl("arenas.hchunk.0.size", ind)); +} + TEST_BEGIN(test_grow_and_shrink) { void *p, *q; @@ -138,22 +184,22 @@ TEST_END TEST_BEGIN(test_lg_align_and_zero) { void *p, *q; - size_t lg_align, sz; + unsigned lg_align; + size_t sz; #define MAX_LG_ALIGN 25 #define MAX_VALIDATE (ZU(1) << 22) - lg_align = ZU(0); + lg_align = 0; p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO); assert_ptr_not_null(p, "Unexpected mallocx() error"); for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) { q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO); assert_ptr_not_null(q, - "Unexpected rallocx() error for lg_align=%zu", lg_align); + "Unexpected rallocx() error for lg_align=%u", lg_align); assert_ptr_null( (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)), - "%p inadequately aligned for lg_align=%zu", - q, lg_align); + "%p inadequately aligned for lg_align=%u", q, lg_align); sz = sallocx(q, 0); if ((sz << 1) <= MAX_VALIDATE) { assert_false(validate_fill(q, 0, 0, sz), @@ -173,6 +219,33 @@ TEST_BEGIN(test_lg_align_and_zero) } TEST_END +TEST_BEGIN(test_overflow) +{ + size_t hugemax; + void *p; + + hugemax = get_huge_size(get_nhuge()-1); + + p = mallocx(1, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + + assert_ptr_null(rallocx(p, hugemax+1, 0), + "Expected OOM for rallocx(p, size=%#zx, 0)", hugemax+1); + + assert_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0), + "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1); + + assert_ptr_null(rallocx(p, SIZE_T_MAX, 0), + "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX); + + assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)), + "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))", + ZU(PTRDIFF_MAX)+1); + + dallocx(p, 0); +} +TEST_END + int main(void) { @@ -181,5 +254,6 @@ main(void) test_grow_and_shrink, test_zero, test_align, - test_lg_align_and_zero)); + test_lg_align_and_zero, + test_overflow)); } diff --git a/deps/jemalloc/test/integration/sdallocx.c b/deps/jemalloc/test/integration/sdallocx.c index b84817d76..f92e0589c 100644 --- a/deps/jemalloc/test/integration/sdallocx.c +++ b/deps/jemalloc/test/integration/sdallocx.c @@ -1,7 +1,7 @@ #include "test/jemalloc_test.h" -#define MAXALIGN (((size_t)1) << 25) -#define NITER 4 +#define MAXALIGN (((size_t)1) << 22) +#define NITER 3 TEST_BEGIN(test_basic) { diff --git a/deps/jemalloc/test/integration/thread_arena.c b/deps/jemalloc/test/integration/thread_arena.c old mode 100644 new mode 100755 index 67be53513..7a35a6351 --- a/deps/jemalloc/test/integration/thread_arena.c +++ b/deps/jemalloc/test/integration/thread_arena.c @@ -16,8 +16,8 @@ thd_start(void *arg) free(p); size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind, - sizeof(main_arena_ind)))) { + if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, + (void *)&main_arena_ind, sizeof(main_arena_ind)))) { char buf[BUFERROR_BUF]; buferror(err, buf, sizeof(buf)); @@ -25,7 +25,8 @@ thd_start(void *arg) } size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { + if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL, + 0))) { char buf[BUFERROR_BUF]; buferror(err, buf, sizeof(buf)); @@ -50,7 +51,8 @@ TEST_BEGIN(test_thread_arena) assert_ptr_not_null(p, "Error in malloc()"); size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { + if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL, + 0))) { char buf[BUFERROR_BUF]; buferror(err, buf, sizeof(buf)); diff --git a/deps/jemalloc/test/integration/thread_tcache_enabled.c b/deps/jemalloc/test/integration/thread_tcache_enabled.c old mode 100644 new mode 100755 index f4e89c682..2c2825e19 --- a/deps/jemalloc/test/integration/thread_tcache_enabled.c +++ b/deps/jemalloc/test/integration/thread_tcache_enabled.c @@ -16,7 +16,8 @@ thd_start(void *arg) bool e0, e1; sz = sizeof(bool); - if ((err = mallctl("thread.tcache.enabled", &e0, &sz, NULL, 0))) { + if ((err = mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL, + 0))) { if (err == ENOENT) { assert_false(config_tcache, "ENOENT should only be returned if tcache is " @@ -27,53 +28,53 @@ thd_start(void *arg) if (e0) { e1 = false; - assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), - 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, + (void *)&e1, sz), 0, "Unexpected mallctl() error"); assert_true(e0, "tcache should be enabled"); } e1 = true; - assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, - "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, + (void *)&e1, sz), 0, "Unexpected mallctl() error"); assert_false(e0, "tcache should be disabled"); e1 = true; - assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, - "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, + (void *)&e1, sz), 0, "Unexpected mallctl() error"); assert_true(e0, "tcache should be enabled"); e1 = false; - assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, - "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, + (void *)&e1, sz), 0, "Unexpected mallctl() error"); assert_true(e0, "tcache should be enabled"); e1 = false; - assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, - "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, + (void *)&e1, sz), 0, "Unexpected mallctl() error"); assert_false(e0, "tcache should be disabled"); free(malloc(1)); e1 = true; - assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, - "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, + (void *)&e1, sz), 0, "Unexpected mallctl() error"); assert_false(e0, "tcache should be disabled"); free(malloc(1)); e1 = true; - assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, - "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, + (void *)&e1, sz), 0, "Unexpected mallctl() error"); assert_true(e0, "tcache should be enabled"); free(malloc(1)); e1 = false; - assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, - "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, + (void *)&e1, sz), 0, "Unexpected mallctl() error"); assert_true(e0, "tcache should be enabled"); free(malloc(1)); e1 = false; - assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, - "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, + (void *)&e1, sz), 0, "Unexpected mallctl() error"); assert_false(e0, "tcache should be disabled"); free(malloc(1)); diff --git a/deps/jemalloc/test/integration/xallocx.c b/deps/jemalloc/test/integration/xallocx.c old mode 100644 new mode 100755 index 373625219..67e0a0e71 --- a/deps/jemalloc/test/integration/xallocx.c +++ b/deps/jemalloc/test/integration/xallocx.c @@ -1,5 +1,28 @@ #include "test/jemalloc_test.h" +#ifdef JEMALLOC_FILL +const char *malloc_conf = "junk:false"; +#endif + +/* + * Use a separate arena for xallocx() extension/contraction tests so that + * internal allocation e.g. by heap profiling can't interpose allocations where + * xallocx() would ordinarily be able to extend. + */ +static unsigned +arena_ind(void) +{ + static unsigned ind = 0; + + if (ind == 0) { + size_t sz = sizeof(ind); + assert_d_eq(mallctl("arenas.extend", (void *)&ind, &sz, NULL, + 0), 0, "Unexpected mallctl failure creating arena"); + } + + return (ind); +} + TEST_BEGIN(test_same_size) { void *p; @@ -55,7 +78,7 @@ get_nsizes_impl(const char *cmd) size_t z; z = sizeof(unsigned); - assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0, + assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0, "Unexpected mallctl(\"%s\", ...) failure", cmd); return (ret); @@ -95,7 +118,7 @@ get_size_impl(const char *cmd, size_t ind) 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd); mib[2] = ind; z = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0), + assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind); return (ret); @@ -218,6 +241,7 @@ TEST_END TEST_BEGIN(test_extra_large) { + int flags = MALLOCX_ARENA(arena_ind()); size_t smallmax, large0, large1, large2, huge0, hugemax; void *p; @@ -229,121 +253,122 @@ TEST_BEGIN(test_extra_large) huge0 = get_huge_size(0); hugemax = get_huge_size(get_nhuge()-1); - p = mallocx(large2, 0); + p = mallocx(large2, flags); assert_ptr_not_null(p, "Unexpected mallocx() error"); - assert_zu_eq(xallocx(p, large2, 0, 0), large2, + assert_zu_eq(xallocx(p, large2, 0, flags), large2, "Unexpected xallocx() behavior"); /* Test size decrease with zero extra. */ - assert_zu_eq(xallocx(p, large0, 0, 0), large0, + assert_zu_eq(xallocx(p, large0, 0, flags), large0, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, smallmax, 0, 0), large0, + assert_zu_eq(xallocx(p, smallmax, 0, flags), large0, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large2, 0, 0), large2, + assert_zu_eq(xallocx(p, large2, 0, flags), large2, "Unexpected xallocx() behavior"); /* Test size decrease with non-zero extra. */ - assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2, + assert_zu_eq(xallocx(p, large0, large2 - large0, flags), large2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large1, large2 - large1, 0), large2, + assert_zu_eq(xallocx(p, large1, large2 - large1, flags), large2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large0, large1 - large0, 0), large1, + assert_zu_eq(xallocx(p, large0, large1 - large0, flags), large1, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, 0), large0, + assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, flags), large0, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large0, 0, 0), large0, + assert_zu_eq(xallocx(p, large0, 0, flags), large0, "Unexpected xallocx() behavior"); /* Test size increase with zero extra. */ - assert_zu_eq(xallocx(p, large2, 0, 0), large2, + assert_zu_eq(xallocx(p, large2, 0, flags), large2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge0, 0, 0), large2, + assert_zu_eq(xallocx(p, huge0, 0, flags), large2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large0, 0, 0), large0, + assert_zu_eq(xallocx(p, large0, 0, flags), large0, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_lt(xallocx(p, large0, huge0 - large0, 0), huge0, + assert_zu_lt(xallocx(p, large0, huge0 - large0, flags), huge0, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large0, 0, 0), large0, + assert_zu_eq(xallocx(p, large0, 0, flags), large0, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2, + assert_zu_eq(xallocx(p, large0, large2 - large0, flags), large2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large2, 0, 0), large2, + assert_zu_eq(xallocx(p, large2, 0, flags), large2, "Unexpected xallocx() behavior"); /* Test size+extra overflow. */ - assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, 0), huge0, + assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, flags), huge0, "Unexpected xallocx() behavior"); - dallocx(p, 0); + dallocx(p, flags); } TEST_END TEST_BEGIN(test_extra_huge) { - size_t largemax, huge0, huge1, huge2, hugemax; + int flags = MALLOCX_ARENA(arena_ind()); + size_t largemax, huge1, huge2, huge3, hugemax; void *p; /* Get size classes. */ largemax = get_large_size(get_nlarge()-1); - huge0 = get_huge_size(0); huge1 = get_huge_size(1); huge2 = get_huge_size(2); + huge3 = get_huge_size(3); hugemax = get_huge_size(get_nhuge()-1); - p = mallocx(huge2, 0); + p = mallocx(huge3, flags); assert_ptr_not_null(p, "Unexpected mallocx() error"); - assert_zu_eq(xallocx(p, huge2, 0, 0), huge2, + assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); /* Test size decrease with zero extra. */ - assert_zu_ge(xallocx(p, huge0, 0, 0), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, largemax, 0, 0), huge0, + assert_zu_ge(xallocx(p, largemax, 0, flags), huge1, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge2, 0, 0), huge2, + assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); /* Test size decrease with non-zero extra. */ - assert_zu_eq(xallocx(p, huge0, huge2 - huge0, 0), huge2, + assert_zu_eq(xallocx(p, huge1, huge3 - huge1, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge1, huge2 - huge1, 0), huge2, + assert_zu_eq(xallocx(p, huge2, huge3 - huge2, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge0, huge1 - huge0, 0), huge1, + assert_zu_eq(xallocx(p, huge1, huge2 - huge1, flags), huge2, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, largemax, huge0 - largemax, 0), huge0, + assert_zu_ge(xallocx(p, largemax, huge1 - largemax, flags), huge1, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge0, 0, 0), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); /* Test size increase with zero extra. */ - assert_zu_le(xallocx(p, huge2, 0, 0), huge2, + assert_zu_le(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_le(xallocx(p, hugemax+1, 0, 0), huge2, + assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge0, 0, 0), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, 0), hugemax, + assert_zu_le(xallocx(p, huge1, SIZE_T_MAX - huge1, flags), hugemax, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge0, 0, 0), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_le(xallocx(p, huge0, huge2 - huge0, 0), huge2, + assert_zu_le(xallocx(p, huge1, huge3 - huge1, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge2, 0, 0), huge2, + assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); /* Test size+extra overflow. */ - assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, 0), hugemax, + assert_zu_le(xallocx(p, huge3, hugemax - huge3 + 1, flags), hugemax, "Unexpected xallocx() behavior"); - dallocx(p, 0); + dallocx(p, flags); } TEST_END @@ -388,12 +413,13 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len) static void test_zero(size_t szmin, size_t szmax) { + int flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO; size_t sz, nsz; void *p; #define FILL_BYTE 0x7aU sz = szmax; - p = mallocx(sz, MALLOCX_ZERO); + p = mallocx(sz, flags); assert_ptr_not_null(p, "Unexpected mallocx() error"); assert_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu", sz); @@ -408,14 +434,14 @@ test_zero(size_t szmin, size_t szmax) /* Shrink in place so that we can expect growing in place to succeed. */ sz = szmin; - assert_zu_eq(xallocx(p, sz, 0, MALLOCX_ZERO), sz, + assert_zu_eq(xallocx(p, sz, 0, flags), sz, "Unexpected xallocx() error"); assert_false(validate_fill(p, FILL_BYTE, 0, sz), "Memory not filled: sz=%zu", sz); for (sz = szmin; sz < szmax; sz = nsz) { - nsz = nallocx(sz+1, MALLOCX_ZERO); - assert_zu_eq(xallocx(p, sz+1, 0, MALLOCX_ZERO), nsz, + nsz = nallocx(sz+1, flags); + assert_zu_eq(xallocx(p, sz+1, 0, flags), nsz, "Unexpected xallocx() failure"); assert_false(validate_fill(p, FILL_BYTE, 0, sz), "Memory not filled: sz=%zu", sz); @@ -426,7 +452,7 @@ test_zero(size_t szmin, size_t szmax) "Memory not filled: nsz=%zu", nsz); } - dallocx(p, 0); + dallocx(p, flags); } TEST_BEGIN(test_zero_large) diff --git a/deps/jemalloc/test/src/mtx.c b/deps/jemalloc/test/src/mtx.c index 73bd02f6d..8a5dfdd99 100644 --- a/deps/jemalloc/test/src/mtx.c +++ b/deps/jemalloc/test/src/mtx.c @@ -11,6 +11,8 @@ mtx_init(mtx_t *mtx) #ifdef _WIN32 if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT)) return (true); +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + mtx->lock = OS_UNFAIR_LOCK_INIT; #elif (defined(JEMALLOC_OSSPIN)) mtx->lock = 0; #else @@ -33,6 +35,7 @@ mtx_fini(mtx_t *mtx) { #ifdef _WIN32 +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) #elif (defined(JEMALLOC_OSSPIN)) #else pthread_mutex_destroy(&mtx->lock); @@ -45,6 +48,8 @@ mtx_lock(mtx_t *mtx) #ifdef _WIN32 EnterCriticalSection(&mtx->lock); +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_lock(&mtx->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mtx->lock); #else @@ -58,6 +63,8 @@ mtx_unlock(mtx_t *mtx) #ifdef _WIN32 LeaveCriticalSection(&mtx->lock); +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_unlock(&mtx->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mtx->lock); #else diff --git a/deps/jemalloc/test/src/test.c b/deps/jemalloc/test/src/test.c index 8173614cf..d70cc7501 100644 --- a/deps/jemalloc/test/src/test.c +++ b/deps/jemalloc/test/src/test.c @@ -60,32 +60,30 @@ p_test_fini(void) malloc_printf("%s: %s\n", test_name, test_status_string(test_status)); } -test_status_t -p_test(test_t *t, ...) +static test_status_t +p_test_impl(bool do_malloc_init, test_t *t, va_list ap) { test_status_t ret; - va_list ap; - /* - * Make sure initialization occurs prior to running tests. Tests are - * special because they may use internal facilities prior to triggering - * initialization as a side effect of calling into the public API. This - * is a final safety that works even if jemalloc_constructor() doesn't - * run, as for MSVC builds. - */ - if (nallocx(1, 0) == 0) { - malloc_printf("Initialization error"); - return (test_status_fail); + if (do_malloc_init) { + /* + * Make sure initialization occurs prior to running tests. + * Tests are special because they may use internal facilities + * prior to triggering initialization as a side effect of + * calling into the public API. + */ + if (nallocx(1, 0) == 0) { + malloc_printf("Initialization error"); + return (test_status_fail); + } } ret = test_status_pass; - va_start(ap, t); for (; t != NULL; t = va_arg(ap, test_t *)) { t(); if (test_status > ret) ret = test_status; } - va_end(ap); malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n", test_status_string(test_status_pass), @@ -98,6 +96,34 @@ p_test(test_t *t, ...) return (ret); } +test_status_t +p_test(test_t *t, ...) +{ + test_status_t ret; + va_list ap; + + ret = test_status_pass; + va_start(ap, t); + ret = p_test_impl(true, t, ap); + va_end(ap); + + return (ret); +} + +test_status_t +p_test_no_malloc_init(test_t *t, ...) +{ + test_status_t ret; + va_list ap; + + ret = test_status_pass; + va_start(ap, t); + ret = p_test_impl(false, t, ap); + va_end(ap); + + return (ret); +} + void p_test_fail(const char *prefix, const char *message) { diff --git a/deps/jemalloc/test/src/timer.c b/deps/jemalloc/test/src/timer.c index 0c93abaf9..3c7e63a26 100644 --- a/deps/jemalloc/test/src/timer.c +++ b/deps/jemalloc/test/src/timer.c @@ -4,50 +4,26 @@ void timer_start(timedelta_t *timer) { -#ifdef _WIN32 - GetSystemTimeAsFileTime(&timer->ft0); -#elif JEMALLOC_CLOCK_GETTIME - if (sysconf(_SC_MONOTONIC_CLOCK) <= 0) - timer->clock_id = CLOCK_REALTIME; - else - timer->clock_id = CLOCK_MONOTONIC; - clock_gettime(timer->clock_id, &timer->ts0); -#else - gettimeofday(&timer->tv0, NULL); -#endif + nstime_init(&timer->t0, 0); + nstime_update(&timer->t0); } void timer_stop(timedelta_t *timer) { -#ifdef _WIN32 - GetSystemTimeAsFileTime(&timer->ft0); -#elif JEMALLOC_CLOCK_GETTIME - clock_gettime(timer->clock_id, &timer->ts1); -#else - gettimeofday(&timer->tv1, NULL); -#endif + nstime_copy(&timer->t1, &timer->t0); + nstime_update(&timer->t1); } uint64_t timer_usec(const timedelta_t *timer) { + nstime_t delta; -#ifdef _WIN32 - uint64_t t0, t1; - t0 = (((uint64_t)timer->ft0.dwHighDateTime) << 32) | - timer->ft0.dwLowDateTime; - t1 = (((uint64_t)timer->ft1.dwHighDateTime) << 32) | - timer->ft1.dwLowDateTime; - return ((t1 - t0) / 10); -#elif JEMALLOC_CLOCK_GETTIME - return (((timer->ts1.tv_sec - timer->ts0.tv_sec) * 1000000) + - (timer->ts1.tv_nsec - timer->ts0.tv_nsec) / 1000); -#else - return (((timer->tv1.tv_sec - timer->tv0.tv_sec) * 1000000) + - timer->tv1.tv_usec - timer->tv0.tv_usec); -#endif + nstime_copy(&delta, &timer->t1); + nstime_subtract(&delta, &timer->t0); + return (nstime_ns(&delta) / 1000); } void @@ -56,9 +32,8 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) uint64_t t0 = timer_usec(a); uint64_t t1 = timer_usec(b); uint64_t mult; - unsigned i = 0; - unsigned j; - int n; + size_t i = 0; + size_t j, n; /* Whole. */ n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1); diff --git a/deps/jemalloc/test/stress/microbench.c b/deps/jemalloc/test/stress/microbench.c index ee39fea7f..7dc45f89c 100644 --- a/deps/jemalloc/test/stress/microbench.c +++ b/deps/jemalloc/test/stress/microbench.c @@ -1,7 +1,8 @@ #include "test/jemalloc_test.h" JEMALLOC_INLINE_C void -time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void)) +time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter, + void (*func)(void)) { uint64_t i; diff --git a/deps/jemalloc/test/unit/a0.c b/deps/jemalloc/test/unit/a0.c new file mode 100644 index 000000000..b9ba45a3d --- /dev/null +++ b/deps/jemalloc/test/unit/a0.c @@ -0,0 +1,19 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_a0) +{ + void *p; + + p = a0malloc(1); + assert_ptr_not_null(p, "Unexpected a0malloc() error"); + a0dalloc(p); +} +TEST_END + +int +main(void) +{ + + return (test_no_malloc_init( + test_a0)); +} diff --git a/deps/jemalloc/test/unit/arena_reset.c b/deps/jemalloc/test/unit/arena_reset.c new file mode 100755 index 000000000..adf9baa5d --- /dev/null +++ b/deps/jemalloc/test/unit/arena_reset.c @@ -0,0 +1,159 @@ +#include "test/jemalloc_test.h" + +#ifdef JEMALLOC_PROF +const char *malloc_conf = "prof:true,lg_prof_sample:0"; +#endif + +static unsigned +get_nsizes_impl(const char *cmd) +{ + unsigned ret; + size_t z; + + z = sizeof(unsigned); + assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0, + "Unexpected mallctl(\"%s\", ...) failure", cmd); + + return (ret); +} + +static unsigned +get_nsmall(void) +{ + + return (get_nsizes_impl("arenas.nbins")); +} + +static unsigned +get_nlarge(void) +{ + + return (get_nsizes_impl("arenas.nlruns")); +} + +static unsigned +get_nhuge(void) +{ + + return (get_nsizes_impl("arenas.nhchunks")); +} + +static size_t +get_size_impl(const char *cmd, size_t ind) +{ + size_t ret; + size_t z; + size_t mib[4]; + size_t miblen = 4; + + z = sizeof(size_t); + assert_d_eq(mallctlnametomib(cmd, mib, &miblen), + 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd); + mib[2] = ind; + z = sizeof(size_t); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), + 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind); + + return (ret); +} + +static size_t +get_small_size(size_t ind) +{ + + return (get_size_impl("arenas.bin.0.size", ind)); +} + +static size_t +get_large_size(size_t ind) +{ + + return (get_size_impl("arenas.lrun.0.size", ind)); +} + +static size_t +get_huge_size(size_t ind) +{ + + return (get_size_impl("arenas.hchunk.0.size", ind)); +} + +TEST_BEGIN(test_arena_reset) +{ +#define NHUGE 4 + unsigned arena_ind, nsmall, nlarge, nhuge, nptrs, i; + size_t sz, miblen; + void **ptrs; + int flags; + size_t mib[3]; + tsdn_t *tsdn; + + test_skip_if((config_valgrind && unlikely(in_valgrind)) || (config_fill + && unlikely(opt_quarantine))); + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0), + 0, "Unexpected mallctl() failure"); + + flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE; + + nsmall = get_nsmall(); + nlarge = get_nlarge(); + nhuge = get_nhuge() > NHUGE ? NHUGE : get_nhuge(); + nptrs = nsmall + nlarge + nhuge; + ptrs = (void **)malloc(nptrs * sizeof(void *)); + assert_ptr_not_null(ptrs, "Unexpected malloc() failure"); + + /* Allocate objects with a wide range of sizes. */ + for (i = 0; i < nsmall; i++) { + sz = get_small_size(i); + ptrs[i] = mallocx(sz, flags); + assert_ptr_not_null(ptrs[i], + "Unexpected mallocx(%zu, %#x) failure", sz, flags); + } + for (i = 0; i < nlarge; i++) { + sz = get_large_size(i); + ptrs[nsmall + i] = mallocx(sz, flags); + assert_ptr_not_null(ptrs[i], + "Unexpected mallocx(%zu, %#x) failure", sz, flags); + } + for (i = 0; i < nhuge; i++) { + sz = get_huge_size(i); + ptrs[nsmall + nlarge + i] = mallocx(sz, flags); + assert_ptr_not_null(ptrs[i], + "Unexpected mallocx(%zu, %#x) failure", sz, flags); + } + + tsdn = tsdn_fetch(); + + /* Verify allocations. */ + for (i = 0; i < nptrs; i++) { + assert_zu_gt(ivsalloc(tsdn, ptrs[i], false), 0, + "Allocation should have queryable size"); + } + + /* Reset. */ + miblen = sizeof(mib)/sizeof(size_t); + assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0, + "Unexpected mallctlnametomib() failure"); + mib[1] = (size_t)arena_ind; + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0, + "Unexpected mallctlbymib() failure"); + + /* Verify allocations no longer exist. */ + for (i = 0; i < nptrs; i++) { + assert_zu_eq(ivsalloc(tsdn, ptrs[i], false), 0, + "Allocation should no longer exist"); + } + + free(ptrs); +} +TEST_END + +int +main(void) +{ + + return (test( + test_arena_reset)); +} diff --git a/deps/jemalloc/test/unit/bitmap.c b/deps/jemalloc/test/unit/bitmap.c index 7da583d85..a2dd54630 100644 --- a/deps/jemalloc/test/unit/bitmap.c +++ b/deps/jemalloc/test/unit/bitmap.c @@ -6,7 +6,11 @@ TEST_BEGIN(test_bitmap_size) prev_size = 0; for (i = 1; i <= BITMAP_MAXBITS; i++) { - size_t size = bitmap_size(i); + bitmap_info_t binfo; + size_t size; + + bitmap_info_init(&binfo, i); + size = bitmap_size(&binfo); assert_true(size >= prev_size, "Bitmap size is smaller than expected"); prev_size = size; @@ -23,8 +27,8 @@ TEST_BEGIN(test_bitmap_init) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) { @@ -46,8 +50,8 @@ TEST_BEGIN(test_bitmap_set) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) @@ -69,8 +73,8 @@ TEST_BEGIN(test_bitmap_unset) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) @@ -97,9 +101,9 @@ TEST_BEGIN(test_bitmap_sfu) bitmap_info_t binfo; bitmap_info_init(&binfo, i); { - ssize_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + size_t j; + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); /* Iteratively set bits starting at the beginning. */ @@ -115,7 +119,7 @@ TEST_BEGIN(test_bitmap_sfu) * Iteratively unset bits starting at the end, and * verify that bitmap_sfu() reaches the unset bits. */ - for (j = i - 1; j >= 0; j--) { + for (j = i - 1; j < i; j--) { /* (i..0] */ bitmap_unset(bitmap, &binfo, j); assert_zd_eq(bitmap_sfu(bitmap, &binfo), j, "First unset bit should the bit previously " diff --git a/deps/jemalloc/test/unit/ckh.c b/deps/jemalloc/test/unit/ckh.c index b11759599..2cbc22688 100644 --- a/deps/jemalloc/test/unit/ckh.c +++ b/deps/jemalloc/test/unit/ckh.c @@ -7,8 +7,8 @@ TEST_BEGIN(test_new_delete) tsd = tsd_fetch(); - assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp), - "Unexpected ckh_new() error"); + assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, + ckh_string_keycomp), "Unexpected ckh_new() error"); ckh_delete(tsd, &ckh); assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash, @@ -32,8 +32,8 @@ TEST_BEGIN(test_count_insert_search_remove) tsd = tsd_fetch(); - assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp), - "Unexpected ckh_new() error"); + assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, + ckh_string_keycomp), "Unexpected ckh_new() error"); assert_zu_eq(ckh_count(&ckh), 0, "ckh_count() should return %zu, but it returned %zu", ZU(0), ckh_count(&ckh)); diff --git a/deps/jemalloc/test/unit/decay.c b/deps/jemalloc/test/unit/decay.c new file mode 100755 index 000000000..5af8f8074 --- /dev/null +++ b/deps/jemalloc/test/unit/decay.c @@ -0,0 +1,374 @@ +#include "test/jemalloc_test.h" + +const char *malloc_conf = "purge:decay,decay_time:1"; + +static nstime_monotonic_t *nstime_monotonic_orig; +static nstime_update_t *nstime_update_orig; + +static unsigned nupdates_mock; +static nstime_t time_mock; +static bool monotonic_mock; + +static bool +nstime_monotonic_mock(void) +{ + + return (monotonic_mock); +} + +static bool +nstime_update_mock(nstime_t *time) +{ + + nupdates_mock++; + if (monotonic_mock) + nstime_copy(time, &time_mock); + return (!monotonic_mock); +} + +TEST_BEGIN(test_decay_ticks) +{ + ticker_t *decay_ticker; + unsigned tick0, tick1; + size_t sz, huge0, large0; + void *p; + + test_skip_if(opt_purge != purge_mode_decay); + + decay_ticker = decay_ticker_get(tsd_fetch(), 0); + assert_ptr_not_null(decay_ticker, + "Unexpected failure getting decay ticker"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.hchunk.0.size", (void *)&huge0, &sz, NULL, + 0), 0, "Unexpected mallctl failure"); + assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large0, &sz, NULL, + 0), 0, "Unexpected mallctl failure"); + + /* + * Test the standard APIs using a huge size class, since we can't + * control tcache interactions (except by completely disabling tcache + * for the entire test program). + */ + + /* malloc(). */ + tick0 = ticker_read(decay_ticker); + p = malloc(huge0); + assert_ptr_not_null(p, "Unexpected malloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()"); + /* free(). */ + tick0 = ticker_read(decay_ticker); + free(p); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during free()"); + + /* calloc(). */ + tick0 = ticker_read(decay_ticker); + p = calloc(1, huge0); + assert_ptr_not_null(p, "Unexpected calloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()"); + free(p); + + /* posix_memalign(). */ + tick0 = ticker_read(decay_ticker); + assert_d_eq(posix_memalign(&p, sizeof(size_t), huge0), 0, + "Unexpected posix_memalign() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during posix_memalign()"); + free(p); + + /* aligned_alloc(). */ + tick0 = ticker_read(decay_ticker); + p = aligned_alloc(sizeof(size_t), huge0); + assert_ptr_not_null(p, "Unexpected aligned_alloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during aligned_alloc()"); + free(p); + + /* realloc(). */ + /* Allocate. */ + tick0 = ticker_read(decay_ticker); + p = realloc(NULL, huge0); + assert_ptr_not_null(p, "Unexpected realloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); + /* Reallocate. */ + tick0 = ticker_read(decay_ticker); + p = realloc(p, huge0); + assert_ptr_not_null(p, "Unexpected realloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); + /* Deallocate. */ + tick0 = ticker_read(decay_ticker); + realloc(p, 0); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); + + /* + * Test the *allocx() APIs using huge, large, and small size classes, + * with tcache explicitly disabled. + */ + { + unsigned i; + size_t allocx_sizes[3]; + allocx_sizes[0] = huge0; + allocx_sizes[1] = large0; + allocx_sizes[2] = 1; + + for (i = 0; i < sizeof(allocx_sizes) / sizeof(size_t); i++) { + sz = allocx_sizes[i]; + + /* mallocx(). */ + tick0 = ticker_read(decay_ticker); + p = mallocx(sz, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during mallocx() (sz=%zu)", + sz); + /* rallocx(). */ + tick0 = ticker_read(decay_ticker); + p = rallocx(p, sz, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected rallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during rallocx() (sz=%zu)", + sz); + /* xallocx(). */ + tick0 = ticker_read(decay_ticker); + xallocx(p, sz, 0, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during xallocx() (sz=%zu)", + sz); + /* dallocx(). */ + tick0 = ticker_read(decay_ticker); + dallocx(p, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during dallocx() (sz=%zu)", + sz); + /* sdallocx(). */ + p = mallocx(sz, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick0 = ticker_read(decay_ticker); + sdallocx(p, sz, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during sdallocx() " + "(sz=%zu)", sz); + } + } + + /* + * Test tcache fill/flush interactions for large and small size classes, + * using an explicit tcache. + */ + if (config_tcache) { + unsigned tcache_ind, i; + size_t tcache_sizes[2]; + tcache_sizes[0] = large0; + tcache_sizes[1] = 1; + + sz = sizeof(unsigned); + assert_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz, + NULL, 0), 0, "Unexpected mallctl failure"); + + for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) { + sz = tcache_sizes[i]; + + /* tcache fill. */ + tick0 = ticker_read(decay_ticker); + p = mallocx(sz, MALLOCX_TCACHE(tcache_ind)); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during tcache fill " + "(sz=%zu)", sz); + /* tcache flush. */ + dallocx(p, MALLOCX_TCACHE(tcache_ind)); + tick0 = ticker_read(decay_ticker); + assert_d_eq(mallctl("tcache.flush", NULL, NULL, + (void *)&tcache_ind, sizeof(unsigned)), 0, + "Unexpected mallctl failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during tcache flush " + "(sz=%zu)", sz); + } + } +} +TEST_END + +TEST_BEGIN(test_decay_ticker) +{ +#define NPS 1024 + int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); + void *ps[NPS]; + uint64_t epoch; + uint64_t npurge0 = 0; + uint64_t npurge1 = 0; + size_t sz, large; + unsigned i, nupdates0; + nstime_t time, decay_time, deadline; + + test_skip_if(opt_purge != purge_mode_decay); + + /* + * Allocate a bunch of large objects, pause the clock, deallocate the + * objects, restore the clock, then [md]allocx() in a tight loop to + * verify the ticker triggers purging. + */ + + if (config_tcache) { + size_t tcache_max; + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, + &sz, NULL, 0), 0, "Unexpected mallctl failure"); + large = nallocx(tcache_max + 1, flags); + } else { + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large, &sz, + NULL, 0), 0, "Unexpected mallctl failure"); + } + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, + sizeof(uint64_t)), 0, "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz, + NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result"); + + for (i = 0; i < NPS; i++) { + ps[i] = mallocx(large, flags); + assert_ptr_not_null(ps[i], "Unexpected mallocx() failure"); + } + + nupdates_mock = 0; + nstime_init(&time_mock, 0); + nstime_update(&time_mock); + monotonic_mock = true; + + nstime_monotonic_orig = nstime_monotonic; + nstime_update_orig = nstime_update; + nstime_monotonic = nstime_monotonic_mock; + nstime_update = nstime_update_mock; + + for (i = 0; i < NPS; i++) { + dallocx(ps[i], flags); + nupdates0 = nupdates_mock; + assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, + "Unexpected arena.0.decay failure"); + assert_u_gt(nupdates_mock, nupdates0, + "Expected nstime_update() to be called"); + } + + nstime_monotonic = nstime_monotonic_orig; + nstime_update = nstime_update_orig; + + nstime_init(&time, 0); + nstime_update(&time); + nstime_init2(&decay_time, opt_decay_time, 0); + nstime_copy(&deadline, &time); + nstime_add(&deadline, &decay_time); + do { + for (i = 0; i < DECAY_NTICKS_PER_UPDATE / 2; i++) { + void *p = mallocx(1, flags); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + dallocx(p, flags); + } + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, + sizeof(uint64_t)), 0, "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1, + &sz, NULL, 0), config_stats ? 0 : ENOENT, + "Unexpected mallctl result"); + + nstime_update(&time); + } while (nstime_compare(&time, &deadline) <= 0 && npurge1 == npurge0); + + if (config_stats) + assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); +#undef NPS +} +TEST_END + +TEST_BEGIN(test_decay_nonmonotonic) +{ +#define NPS (SMOOTHSTEP_NSTEPS + 1) + int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); + void *ps[NPS]; + uint64_t epoch; + uint64_t npurge0 = 0; + uint64_t npurge1 = 0; + size_t sz, large0; + unsigned i, nupdates0; + + test_skip_if(opt_purge != purge_mode_decay); + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large0, &sz, NULL, + 0), 0, "Unexpected mallctl failure"); + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, + sizeof(uint64_t)), 0, "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz, + NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result"); + + nupdates_mock = 0; + nstime_init(&time_mock, 0); + nstime_update(&time_mock); + monotonic_mock = false; + + nstime_monotonic_orig = nstime_monotonic; + nstime_update_orig = nstime_update; + nstime_monotonic = nstime_monotonic_mock; + nstime_update = nstime_update_mock; + + for (i = 0; i < NPS; i++) { + ps[i] = mallocx(large0, flags); + assert_ptr_not_null(ps[i], "Unexpected mallocx() failure"); + } + + for (i = 0; i < NPS; i++) { + dallocx(ps[i], flags); + nupdates0 = nupdates_mock; + assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, + "Unexpected arena.0.decay failure"); + assert_u_gt(nupdates_mock, nupdates0, + "Expected nstime_update() to be called"); + } + + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, + sizeof(uint64_t)), 0, "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1, &sz, + NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result"); + + if (config_stats) + assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred"); + + nstime_monotonic = nstime_monotonic_orig; + nstime_update = nstime_update_orig; +#undef NPS +} +TEST_END + +int +main(void) +{ + + return (test( + test_decay_ticks, + test_decay_ticker, + test_decay_nonmonotonic)); +} diff --git a/deps/jemalloc/test/unit/fork.c b/deps/jemalloc/test/unit/fork.c new file mode 100644 index 000000000..c530797c4 --- /dev/null +++ b/deps/jemalloc/test/unit/fork.c @@ -0,0 +1,64 @@ +#include "test/jemalloc_test.h" + +#ifndef _WIN32 +#include +#endif + +TEST_BEGIN(test_fork) +{ +#ifndef _WIN32 + void *p; + pid_t pid; + + p = malloc(1); + assert_ptr_not_null(p, "Unexpected malloc() failure"); + + pid = fork(); + + free(p); + + p = malloc(64); + assert_ptr_not_null(p, "Unexpected malloc() failure"); + free(p); + + if (pid == -1) { + /* Error. */ + test_fail("Unexpected fork() failure"); + } else if (pid == 0) { + /* Child. */ + _exit(0); + } else { + int status; + + /* Parent. */ + while (true) { + if (waitpid(pid, &status, 0) == -1) + test_fail("Unexpected waitpid() failure"); + if (WIFSIGNALED(status)) { + test_fail("Unexpected child termination due to " + "signal %d", WTERMSIG(status)); + break; + } + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) != 0) { + test_fail( + "Unexpected child exit value %d", + WEXITSTATUS(status)); + } + break; + } + } + } +#else + test_skip("fork(2) is irrelevant to Windows"); +#endif +} +TEST_END + +int +main(void) +{ + + return (test( + test_fork)); +} diff --git a/deps/jemalloc/test/unit/hash.c b/deps/jemalloc/test/unit/hash.c index 77a8cede9..010c9d76f 100644 --- a/deps/jemalloc/test/unit/hash.c +++ b/deps/jemalloc/test/unit/hash.c @@ -35,7 +35,7 @@ typedef enum { hash_variant_x64_128 } hash_variant_t; -static size_t +static int hash_variant_bits(hash_variant_t variant) { @@ -59,19 +59,20 @@ hash_variant_string(hash_variant_t variant) } } +#define KEY_SIZE 256 static void -hash_variant_verify(hash_variant_t variant) +hash_variant_verify_key(hash_variant_t variant, uint8_t *key) { - const size_t hashbytes = hash_variant_bits(variant) / 8; - uint8_t key[256]; - VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256); + const int hashbytes = hash_variant_bits(variant) / 8; + const int hashes_size = hashbytes * 256; + VARIABLE_ARRAY(uint8_t, hashes, hashes_size); VARIABLE_ARRAY(uint8_t, final, hashbytes); unsigned i; uint32_t computed, expected; - memset(key, 0, sizeof(key)); - memset(hashes, 0, sizeof(hashes)); - memset(final, 0, sizeof(final)); + memset(key, 0, KEY_SIZE); + memset(hashes, 0, hashes_size); + memset(final, 0, hashbytes); /* * Hash keys of the form {0}, {0,1}, {0,1,2}, ..., {0,1,...,255} as the @@ -102,17 +103,17 @@ hash_variant_verify(hash_variant_t variant) /* Hash the result array. */ switch (variant) { case hash_variant_x86_32: { - uint32_t out = hash_x86_32(hashes, hashbytes*256, 0); + uint32_t out = hash_x86_32(hashes, hashes_size, 0); memcpy(final, &out, sizeof(out)); break; } case hash_variant_x86_128: { uint64_t out[2]; - hash_x86_128(hashes, hashbytes*256, 0, out); + hash_x86_128(hashes, hashes_size, 0, out); memcpy(final, out, sizeof(out)); break; } case hash_variant_x64_128: { uint64_t out[2]; - hash_x64_128(hashes, hashbytes*256, 0, out); + hash_x64_128(hashes, hashes_size, 0, out); memcpy(final, out, sizeof(out)); break; } default: not_reached(); @@ -139,6 +140,19 @@ hash_variant_verify(hash_variant_t variant) hash_variant_string(variant), expected, computed); } +static void +hash_variant_verify(hash_variant_t variant) +{ +#define MAX_ALIGN 16 + uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)]; + unsigned i; + + for (i = 0; i < MAX_ALIGN; i++) + hash_variant_verify_key(variant, &key[i]); +#undef MAX_ALIGN +} +#undef KEY_SIZE + TEST_BEGIN(test_hash_x86_32) { diff --git a/deps/jemalloc/test/unit/junk.c b/deps/jemalloc/test/unit/junk.c index b23dd1e95..460bd524d 100644 --- a/deps/jemalloc/test/unit/junk.c +++ b/deps/jemalloc/test/unit/junk.c @@ -29,7 +29,7 @@ arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info) arena_dalloc_junk_small_orig(ptr, bin_info); for (i = 0; i < bin_info->reg_size; i++) { - assert_c_eq(((char *)ptr)[i], 0x5a, + assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK, "Missing junk fill for byte %zu/%zu of deallocated region", i, bin_info->reg_size); } @@ -44,7 +44,7 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize) arena_dalloc_junk_large_orig(ptr, usize); for (i = 0; i < usize; i++) { - assert_c_eq(((char *)ptr)[i], 0x5a, + assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK, "Missing junk fill for byte %zu/%zu of deallocated region", i, usize); } @@ -69,7 +69,7 @@ huge_dalloc_junk_intercept(void *ptr, size_t usize) static void test_junk(size_t sz_min, size_t sz_max) { - char *s; + uint8_t *s; size_t sz_prev, sz, i; if (opt_junk_free) { @@ -82,23 +82,23 @@ test_junk(size_t sz_min, size_t sz_max) } sz_prev = 0; - s = (char *)mallocx(sz_min, 0); + s = (uint8_t *)mallocx(sz_min, 0); assert_ptr_not_null((void *)s, "Unexpected mallocx() failure"); for (sz = sallocx(s, 0); sz <= sz_max; sz_prev = sz, sz = sallocx(s, 0)) { if (sz_prev > 0) { - assert_c_eq(s[0], 'a', + assert_u_eq(s[0], 'a', "Previously allocated byte %zu/%zu is corrupted", ZU(0), sz_prev); - assert_c_eq(s[sz_prev-1], 'a', + assert_u_eq(s[sz_prev-1], 'a', "Previously allocated byte %zu/%zu is corrupted", sz_prev-1, sz_prev); } for (i = sz_prev; i < sz; i++) { if (opt_junk_alloc) { - assert_c_eq(s[i], 0xa5, + assert_u_eq(s[i], JEMALLOC_ALLOC_JUNK, "Newly allocated byte %zu/%zu isn't " "junk-filled", i, sz); } @@ -107,7 +107,7 @@ test_junk(size_t sz_min, size_t sz_max) if (xallocx(s, sz+1, 0, 0) == sz) { watch_junking(s); - s = (char *)rallocx(s, sz+1, 0); + s = (uint8_t *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); assert_true(!opt_junk_free || saw_junking, @@ -244,7 +244,6 @@ int main(void) { - assert(!config_fill || opt_junk_alloc || opt_junk_free); return (test( test_junk_small, test_junk_large, diff --git a/deps/jemalloc/test/unit/junk_alloc.c b/deps/jemalloc/test/unit/junk_alloc.c index 8db3331d2..a5895b5c0 100644 --- a/deps/jemalloc/test/unit/junk_alloc.c +++ b/deps/jemalloc/test/unit/junk_alloc.c @@ -1,3 +1,3 @@ -#define JEMALLOC_TEST_JUNK_OPT "junk:alloc" +#define JEMALLOC_TEST_JUNK_OPT "junk:alloc" #include "junk.c" #undef JEMALLOC_TEST_JUNK_OPT diff --git a/deps/jemalloc/test/unit/junk_free.c b/deps/jemalloc/test/unit/junk_free.c index 482a61d07..bb5183c90 100644 --- a/deps/jemalloc/test/unit/junk_free.c +++ b/deps/jemalloc/test/unit/junk_free.c @@ -1,3 +1,3 @@ -#define JEMALLOC_TEST_JUNK_OPT "junk:free" +#define JEMALLOC_TEST_JUNK_OPT "junk:free" #include "junk.c" #undef JEMALLOC_TEST_JUNK_OPT diff --git a/deps/jemalloc/test/unit/mallctl.c b/deps/jemalloc/test/unit/mallctl.c old mode 100644 new mode 100755 index 31e354ca7..2353c92c1 --- a/deps/jemalloc/test/unit/mallctl.c +++ b/deps/jemalloc/test/unit/mallctl.c @@ -12,16 +12,18 @@ TEST_BEGIN(test_mallctl_errors) EPERM, "mallctl() should return EPERM on attempt to write " "read-only value"); - assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)-1), - EINVAL, "mallctl() should return EINVAL for input size mismatch"); - assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)+1), - EINVAL, "mallctl() should return EINVAL for input size mismatch"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, + sizeof(epoch)-1), EINVAL, + "mallctl() should return EINVAL for input size mismatch"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, + sizeof(epoch)+1), EINVAL, + "mallctl() should return EINVAL for input size mismatch"); sz = sizeof(epoch)-1; - assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL, + assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL, "mallctl() should return EINVAL for output size mismatch"); sz = sizeof(epoch)+1; - assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL, + assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL, "mallctl() should return EINVAL for output size mismatch"); } TEST_END @@ -56,18 +58,20 @@ TEST_BEGIN(test_mallctlbymib_errors) assert_d_eq(mallctlnametomib("epoch", mib, &miblen), 0, "Unexpected mallctlnametomib() failure"); - assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch, + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch, sizeof(epoch)-1), EINVAL, "mallctlbymib() should return EINVAL for input size mismatch"); - assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch, + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch, sizeof(epoch)+1), EINVAL, "mallctlbymib() should return EINVAL for input size mismatch"); sz = sizeof(epoch)-1; - assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL, + assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0), + EINVAL, "mallctlbymib() should return EINVAL for output size mismatch"); sz = sizeof(epoch)+1; - assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL, + assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0), + EINVAL, "mallctlbymib() should return EINVAL for output size mismatch"); } TEST_END @@ -83,18 +87,19 @@ TEST_BEGIN(test_mallctl_read_write) assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); /* Read. */ - assert_d_eq(mallctl("epoch", &old_epoch, &sz, NULL, 0), 0, + assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); /* Write. */ - assert_d_eq(mallctl("epoch", NULL, NULL, &new_epoch, sizeof(new_epoch)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch, + sizeof(new_epoch)), 0, "Unexpected mallctl() failure"); assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); /* Read+write. */ - assert_d_eq(mallctl("epoch", &old_epoch, &sz, &new_epoch, - sizeof(new_epoch)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, + (void *)&new_epoch, sizeof(new_epoch)), 0, + "Unexpected mallctl() failure"); assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); } TEST_END @@ -117,29 +122,30 @@ TEST_END TEST_BEGIN(test_mallctl_config) { -#define TEST_MALLCTL_CONFIG(config) do { \ - bool oldval; \ +#define TEST_MALLCTL_CONFIG(config, t) do { \ + t oldval; \ size_t sz = sizeof(oldval); \ - assert_d_eq(mallctl("config."#config, &oldval, &sz, NULL, 0), \ - 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("config."#config, (void *)&oldval, &sz, \ + NULL, 0), 0, "Unexpected mallctl() failure"); \ assert_b_eq(oldval, config_##config, "Incorrect config value"); \ assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \ } while (0) - TEST_MALLCTL_CONFIG(cache_oblivious); - TEST_MALLCTL_CONFIG(debug); - TEST_MALLCTL_CONFIG(fill); - TEST_MALLCTL_CONFIG(lazy_lock); - TEST_MALLCTL_CONFIG(munmap); - TEST_MALLCTL_CONFIG(prof); - TEST_MALLCTL_CONFIG(prof_libgcc); - TEST_MALLCTL_CONFIG(prof_libunwind); - TEST_MALLCTL_CONFIG(stats); - TEST_MALLCTL_CONFIG(tcache); - TEST_MALLCTL_CONFIG(tls); - TEST_MALLCTL_CONFIG(utrace); - TEST_MALLCTL_CONFIG(valgrind); - TEST_MALLCTL_CONFIG(xmalloc); + TEST_MALLCTL_CONFIG(cache_oblivious, bool); + TEST_MALLCTL_CONFIG(debug, bool); + TEST_MALLCTL_CONFIG(fill, bool); + TEST_MALLCTL_CONFIG(lazy_lock, bool); + TEST_MALLCTL_CONFIG(malloc_conf, const char *); + TEST_MALLCTL_CONFIG(munmap, bool); + TEST_MALLCTL_CONFIG(prof, bool); + TEST_MALLCTL_CONFIG(prof_libgcc, bool); + TEST_MALLCTL_CONFIG(prof_libunwind, bool); + TEST_MALLCTL_CONFIG(stats, bool); + TEST_MALLCTL_CONFIG(tcache, bool); + TEST_MALLCTL_CONFIG(tls, bool); + TEST_MALLCTL_CONFIG(utrace, bool); + TEST_MALLCTL_CONFIG(valgrind, bool); + TEST_MALLCTL_CONFIG(xmalloc, bool); #undef TEST_MALLCTL_CONFIG } @@ -153,7 +159,8 @@ TEST_BEGIN(test_mallctl_opt) t oldval; \ size_t sz = sizeof(oldval); \ int expected = config_##config ? 0 : ENOENT; \ - int result = mallctl("opt."#opt, &oldval, &sz, NULL, 0); \ + int result = mallctl("opt."#opt, (void *)&oldval, &sz, NULL, \ + 0); \ assert_d_eq(result, expected, \ "Unexpected mallctl() result for opt."#opt); \ assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \ @@ -162,8 +169,10 @@ TEST_BEGIN(test_mallctl_opt) TEST_MALLCTL_OPT(bool, abort, always); TEST_MALLCTL_OPT(size_t, lg_chunk, always); TEST_MALLCTL_OPT(const char *, dss, always); - TEST_MALLCTL_OPT(size_t, narenas, always); + TEST_MALLCTL_OPT(unsigned, narenas, always); + TEST_MALLCTL_OPT(const char *, purge, always); TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always); + TEST_MALLCTL_OPT(ssize_t, decay_time, always); TEST_MALLCTL_OPT(bool, stats_print, always); TEST_MALLCTL_OPT(const char *, junk, fill); TEST_MALLCTL_OPT(size_t, quarantine, fill); @@ -194,7 +203,7 @@ TEST_BEGIN(test_manpage_example) size_t len, miblen; len = sizeof(nbins); - assert_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0, + assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0, "Unexpected mallctl() failure"); miblen = 4; @@ -205,8 +214,8 @@ TEST_BEGIN(test_manpage_example) mib[2] = i; len = sizeof(bin_size); - assert_d_eq(mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0), - 0, "Unexpected mallctlbymib() failure"); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len, + NULL, 0), 0, "Unexpected mallctlbymib() failure"); /* Do something with bin_size... */ } } @@ -255,25 +264,25 @@ TEST_BEGIN(test_tcache) /* Create tcaches. */ for (i = 0; i < NTCACHES; i++) { sz = sizeof(unsigned); - assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0, - "Unexpected mallctl() failure, i=%u", i); + assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL, + 0), 0, "Unexpected mallctl() failure, i=%u", i); } /* Exercise tcache ID recycling. */ for (i = 0; i < NTCACHES; i++) { - assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i], - sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", - i); + assert_d_eq(mallctl("tcache.destroy", NULL, NULL, + (void *)&tis[i], sizeof(unsigned)), 0, + "Unexpected mallctl() failure, i=%u", i); } for (i = 0; i < NTCACHES; i++) { sz = sizeof(unsigned); - assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0, - "Unexpected mallctl() failure, i=%u", i); + assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL, + 0), 0, "Unexpected mallctl() failure, i=%u", i); } /* Flush empty tcaches. */ for (i = 0; i < NTCACHES; i++) { - assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i], + assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i], sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", i); } @@ -318,16 +327,16 @@ TEST_BEGIN(test_tcache) /* Flush some non-empty tcaches. */ for (i = 0; i < NTCACHES/2; i++) { - assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i], + assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i], sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", i); } /* Destroy tcaches. */ for (i = 0; i < NTCACHES; i++) { - assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i], - sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", - i); + assert_d_eq(mallctl("tcache.destroy", NULL, NULL, + (void *)&tis[i], sizeof(unsigned)), 0, + "Unexpected mallctl() failure, i=%u", i); } } TEST_END @@ -337,15 +346,17 @@ TEST_BEGIN(test_thread_arena) unsigned arena_old, arena_new, narenas; size_t sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0), + 0, "Unexpected mallctl() failure"); assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect"); arena_new = narenas - 1; - assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new, - sizeof(unsigned)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz, + (void *)&arena_new, sizeof(unsigned)), 0, + "Unexpected mallctl() failure"); arena_new = 0; - assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new, - sizeof(unsigned)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz, + (void *)&arena_new, sizeof(unsigned)), 0, + "Unexpected mallctl() failure"); } TEST_END @@ -354,17 +365,20 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult) ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult; size_t sz = sizeof(ssize_t); - assert_d_eq(mallctl("arena.0.lg_dirty_mult", &orig_lg_dirty_mult, &sz, - NULL, 0), 0, "Unexpected mallctl() failure"); + test_skip_if(opt_purge != purge_mode_ratio); + + assert_d_eq(mallctl("arena.0.lg_dirty_mult", + (void *)&orig_lg_dirty_mult, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); lg_dirty_mult = -2; assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL, - &lg_dirty_mult, sizeof(ssize_t)), EFAULT, + (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT, "Unexpected mallctl() success"); lg_dirty_mult = (sizeof(size_t) << 3); assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL, - &lg_dirty_mult, sizeof(ssize_t)), EFAULT, + (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT, "Unexpected mallctl() success"); for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1; @@ -372,15 +386,48 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult) = lg_dirty_mult, lg_dirty_mult++) { ssize_t old_lg_dirty_mult; - assert_d_eq(mallctl("arena.0.lg_dirty_mult", &old_lg_dirty_mult, - &sz, &lg_dirty_mult, sizeof(ssize_t)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arena.0.lg_dirty_mult", + (void *)&old_lg_dirty_mult, &sz, (void *)&lg_dirty_mult, + sizeof(ssize_t)), 0, "Unexpected mallctl() failure"); assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult, "Unexpected old arena.0.lg_dirty_mult"); } } TEST_END +TEST_BEGIN(test_arena_i_decay_time) +{ + ssize_t decay_time, orig_decay_time, prev_decay_time; + size_t sz = sizeof(ssize_t); + + test_skip_if(opt_purge != purge_mode_decay); + + assert_d_eq(mallctl("arena.0.decay_time", (void *)&orig_decay_time, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); + + decay_time = -2; + assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL, + (void *)&decay_time, sizeof(ssize_t)), EFAULT, + "Unexpected mallctl() success"); + + decay_time = 0x7fffffff; + assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL, + (void *)&decay_time, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); + + for (prev_decay_time = decay_time, decay_time = -1; + decay_time < 20; prev_decay_time = decay_time, decay_time++) { + ssize_t old_decay_time; + + assert_d_eq(mallctl("arena.0.decay_time", (void *)&old_decay_time, + &sz, (void *)&decay_time, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); + assert_zd_eq(old_decay_time, prev_decay_time, + "Unexpected old arena.0.decay_time"); + } +} +TEST_END + TEST_BEGIN(test_arena_i_purge) { unsigned narenas; @@ -391,8 +438,8 @@ TEST_BEGIN(test_arena_i_purge) assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, "Unexpected mallctl() failure"); - assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0), + 0, "Unexpected mallctl() failure"); assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0, "Unexpected mallctlnametomib() failure"); mib[1] = narenas; @@ -401,6 +448,26 @@ TEST_BEGIN(test_arena_i_purge) } TEST_END +TEST_BEGIN(test_arena_i_decay) +{ + unsigned narenas; + size_t sz = sizeof(unsigned); + size_t mib[3]; + size_t miblen = 3; + + assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl() failure"); + + assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0), + 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0, + "Unexpected mallctlnametomib() failure"); + mib[1] = narenas; + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0, + "Unexpected mallctlbymib() failure"); +} +TEST_END + TEST_BEGIN(test_arena_i_dss) { const char *dss_prec_old, *dss_prec_new; @@ -413,31 +480,35 @@ TEST_BEGIN(test_arena_i_dss) "Unexpected mallctlnametomib() error"); dss_prec_new = "disabled"; - assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new, - sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, + (void *)&dss_prec_new, sizeof(dss_prec_new)), 0, + "Unexpected mallctl() failure"); assert_str_ne(dss_prec_old, "primary", "Unexpected default for dss precedence"); - assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old, - sizeof(dss_prec_old)), 0, "Unexpected mallctl() failure"); - - assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0, + assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz, + (void *)&dss_prec_old, sizeof(dss_prec_old)), 0, "Unexpected mallctl() failure"); + + assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL, + 0), 0, "Unexpected mallctl() failure"); assert_str_ne(dss_prec_old, "primary", "Unexpected value for dss precedence"); mib[1] = narenas_total_get(); dss_prec_new = "disabled"; - assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new, - sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, + (void *)&dss_prec_new, sizeof(dss_prec_new)), 0, + "Unexpected mallctl() failure"); assert_str_ne(dss_prec_old, "primary", "Unexpected default for dss precedence"); - assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old, - sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); - - assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0, + assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz, + (void *)&dss_prec_old, sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); + + assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL, + 0), 0, "Unexpected mallctl() failure"); assert_str_ne(dss_prec_old, "primary", "Unexpected value for dss precedence"); } @@ -448,14 +519,14 @@ TEST_BEGIN(test_arenas_initialized) unsigned narenas; size_t sz = sizeof(narenas); - assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0), + 0, "Unexpected mallctl() failure"); { VARIABLE_ARRAY(bool, initialized, narenas); sz = narenas * sizeof(bool); - assert_d_eq(mallctl("arenas.initialized", initialized, &sz, - NULL, 0), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.initialized", (void *)initialized, + &sz, NULL, 0), 0, "Unexpected mallctl() failure"); } } TEST_END @@ -465,17 +536,19 @@ TEST_BEGIN(test_arenas_lg_dirty_mult) ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult; size_t sz = sizeof(ssize_t); - assert_d_eq(mallctl("arenas.lg_dirty_mult", &orig_lg_dirty_mult, &sz, - NULL, 0), 0, "Unexpected mallctl() failure"); + test_skip_if(opt_purge != purge_mode_ratio); + + assert_d_eq(mallctl("arenas.lg_dirty_mult", (void *)&orig_lg_dirty_mult, + &sz, NULL, 0), 0, "Unexpected mallctl() failure"); lg_dirty_mult = -2; assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL, - &lg_dirty_mult, sizeof(ssize_t)), EFAULT, + (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT, "Unexpected mallctl() success"); lg_dirty_mult = (sizeof(size_t) << 3); assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL, - &lg_dirty_mult, sizeof(ssize_t)), EFAULT, + (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT, "Unexpected mallctl() success"); for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1; @@ -483,23 +556,56 @@ TEST_BEGIN(test_arenas_lg_dirty_mult) lg_dirty_mult, lg_dirty_mult++) { ssize_t old_lg_dirty_mult; - assert_d_eq(mallctl("arenas.lg_dirty_mult", &old_lg_dirty_mult, - &sz, &lg_dirty_mult, sizeof(ssize_t)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.lg_dirty_mult", + (void *)&old_lg_dirty_mult, &sz, (void *)&lg_dirty_mult, + sizeof(ssize_t)), 0, "Unexpected mallctl() failure"); assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult, "Unexpected old arenas.lg_dirty_mult"); } } TEST_END +TEST_BEGIN(test_arenas_decay_time) +{ + ssize_t decay_time, orig_decay_time, prev_decay_time; + size_t sz = sizeof(ssize_t); + + test_skip_if(opt_purge != purge_mode_decay); + + assert_d_eq(mallctl("arenas.decay_time", (void *)&orig_decay_time, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); + + decay_time = -2; + assert_d_eq(mallctl("arenas.decay_time", NULL, NULL, + (void *)&decay_time, sizeof(ssize_t)), EFAULT, + "Unexpected mallctl() success"); + + decay_time = 0x7fffffff; + assert_d_eq(mallctl("arenas.decay_time", NULL, NULL, + (void *)&decay_time, sizeof(ssize_t)), 0, + "Expected mallctl() failure"); + + for (prev_decay_time = decay_time, decay_time = -1; + decay_time < 20; prev_decay_time = decay_time, decay_time++) { + ssize_t old_decay_time; + + assert_d_eq(mallctl("arenas.decay_time", + (void *)&old_decay_time, &sz, (void *)&decay_time, + sizeof(ssize_t)), 0, "Unexpected mallctl() failure"); + assert_zd_eq(old_decay_time, prev_decay_time, + "Unexpected old arenas.decay_time"); + } +} +TEST_END + TEST_BEGIN(test_arenas_constants) { #define TEST_ARENAS_CONSTANT(t, name, expected) do { \ t name; \ size_t sz = sizeof(t); \ - assert_d_eq(mallctl("arenas."#name, &name, &sz, NULL, 0), 0, \ - "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL, \ + 0), 0, "Unexpected mallctl() failure"); \ assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) @@ -519,8 +625,8 @@ TEST_BEGIN(test_arenas_bin_constants) #define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do { \ t name; \ size_t sz = sizeof(t); \ - assert_d_eq(mallctl("arenas.bin.0."#name, &name, &sz, NULL, 0), \ - 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz, \ + NULL, 0), 0, "Unexpected mallctl() failure"); \ assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) @@ -538,8 +644,8 @@ TEST_BEGIN(test_arenas_lrun_constants) #define TEST_ARENAS_LRUN_CONSTANT(t, name, expected) do { \ t name; \ size_t sz = sizeof(t); \ - assert_d_eq(mallctl("arenas.lrun.0."#name, &name, &sz, NULL, \ - 0), 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("arenas.lrun.0."#name, (void *)&name, &sz, \ + NULL, 0), 0, "Unexpected mallctl() failure"); \ assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) @@ -555,8 +661,8 @@ TEST_BEGIN(test_arenas_hchunk_constants) #define TEST_ARENAS_HCHUNK_CONSTANT(t, name, expected) do { \ t name; \ size_t sz = sizeof(t); \ - assert_d_eq(mallctl("arenas.hchunk.0."#name, &name, &sz, NULL, \ - 0), 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("arenas.hchunk.0."#name, (void *)&name, \ + &sz, NULL, 0), 0, "Unexpected mallctl() failure"); \ assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) @@ -571,12 +677,12 @@ TEST_BEGIN(test_arenas_extend) unsigned narenas_before, arena, narenas_after; size_t sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.narenas", &narenas_before, &sz, NULL, 0), 0, - "Unexpected mallctl() failure"); - assert_d_eq(mallctl("arenas.extend", &arena, &sz, NULL, 0), 0, - "Unexpected mallctl() failure"); - assert_d_eq(mallctl("arenas.narenas", &narenas_after, &sz, NULL, 0), 0, + assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.extend", (void *)&arena, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL, + 0), 0, "Unexpected mallctl() failure"); assert_u_eq(narenas_before+1, narenas_after, "Unexpected number of arenas before versus after extension"); @@ -590,12 +696,14 @@ TEST_BEGIN(test_stats_arenas) #define TEST_STATS_ARENAS(t, name) do { \ t name; \ size_t sz = sizeof(t); \ - assert_d_eq(mallctl("stats.arenas.0."#name, &name, &sz, NULL, \ - 0), 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz, \ + NULL, 0), 0, "Unexpected mallctl() failure"); \ } while (0) - TEST_STATS_ARENAS(const char *, dss); TEST_STATS_ARENAS(unsigned, nthreads); + TEST_STATS_ARENAS(const char *, dss); + TEST_STATS_ARENAS(ssize_t, lg_dirty_mult); + TEST_STATS_ARENAS(ssize_t, decay_time); TEST_STATS_ARENAS(size_t, pactive); TEST_STATS_ARENAS(size_t, pdirty); @@ -620,10 +728,13 @@ main(void) test_tcache, test_thread_arena, test_arena_i_lg_dirty_mult, + test_arena_i_decay_time, test_arena_i_purge, + test_arena_i_decay, test_arena_i_dss, test_arenas_initialized, test_arenas_lg_dirty_mult, + test_arenas_decay_time, test_arenas_constants, test_arenas_bin_constants, test_arenas_lrun_constants, diff --git a/deps/jemalloc/test/unit/math.c b/deps/jemalloc/test/unit/math.c index ebec77a62..adb72bed9 100644 --- a/deps/jemalloc/test/unit/math.c +++ b/deps/jemalloc/test/unit/math.c @@ -5,6 +5,10 @@ #include +#ifdef __PGI +#undef INFINITY +#endif + #ifndef INFINITY #define INFINITY (DBL_MAX + DBL_MAX) #endif diff --git a/deps/jemalloc/test/unit/nstime.c b/deps/jemalloc/test/unit/nstime.c new file mode 100644 index 000000000..0368bc26e --- /dev/null +++ b/deps/jemalloc/test/unit/nstime.c @@ -0,0 +1,227 @@ +#include "test/jemalloc_test.h" + +#define BILLION UINT64_C(1000000000) + +TEST_BEGIN(test_nstime_init) +{ + nstime_t nst; + + nstime_init(&nst, 42000000043); + assert_u64_eq(nstime_ns(&nst), 42000000043, "ns incorrectly read"); + assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read"); + assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read"); +} +TEST_END + +TEST_BEGIN(test_nstime_init2) +{ + nstime_t nst; + + nstime_init2(&nst, 42, 43); + assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read"); + assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read"); +} +TEST_END + +TEST_BEGIN(test_nstime_copy) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_init(&nstb, 0); + nstime_copy(&nstb, &nsta); + assert_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied"); + assert_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied"); +} +TEST_END + +TEST_BEGIN(test_nstime_compare) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, "Times should be equal"); + assert_d_eq(nstime_compare(&nstb, &nsta), 0, "Times should be equal"); + + nstime_init2(&nstb, 42, 42); + assert_d_eq(nstime_compare(&nsta, &nstb), 1, + "nsta should be greater than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), -1, + "nstb should be less than nsta"); + + nstime_init2(&nstb, 42, 44); + assert_d_eq(nstime_compare(&nsta, &nstb), -1, + "nsta should be less than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), 1, + "nstb should be greater than nsta"); + + nstime_init2(&nstb, 41, BILLION - 1); + assert_d_eq(nstime_compare(&nsta, &nstb), 1, + "nsta should be greater than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), -1, + "nstb should be less than nsta"); + + nstime_init2(&nstb, 43, 0); + assert_d_eq(nstime_compare(&nsta, &nstb), -1, + "nsta should be less than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), 1, + "nstb should be greater than nsta"); +} +TEST_END + +TEST_BEGIN(test_nstime_add) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_add(&nsta, &nstb); + nstime_init2(&nstb, 84, 86); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect addition result"); + + nstime_init2(&nsta, 42, BILLION - 1); + nstime_copy(&nstb, &nsta); + nstime_add(&nsta, &nstb); + nstime_init2(&nstb, 85, BILLION - 2); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect addition result"); +} +TEST_END + +TEST_BEGIN(test_nstime_subtract) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_subtract(&nsta, &nstb); + nstime_init(&nstb, 0); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect subtraction result"); + + nstime_init2(&nsta, 42, 43); + nstime_init2(&nstb, 41, 44); + nstime_subtract(&nsta, &nstb); + nstime_init2(&nstb, 0, BILLION - 1); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect subtraction result"); +} +TEST_END + +TEST_BEGIN(test_nstime_imultiply) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_imultiply(&nsta, 10); + nstime_init2(&nstb, 420, 430); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect multiplication result"); + + nstime_init2(&nsta, 42, 666666666); + nstime_imultiply(&nsta, 3); + nstime_init2(&nstb, 127, 999999998); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect multiplication result"); +} +TEST_END + +TEST_BEGIN(test_nstime_idivide) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + nstime_idivide(&nsta, 10); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect division result"); + + nstime_init2(&nsta, 42, 666666666); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 3); + nstime_idivide(&nsta, 3); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect division result"); +} +TEST_END + +TEST_BEGIN(test_nstime_divide) +{ + nstime_t nsta, nstb, nstc; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + assert_u64_eq(nstime_divide(&nsta, &nstb), 10, + "Incorrect division result"); + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + nstime_init(&nstc, 1); + nstime_add(&nsta, &nstc); + assert_u64_eq(nstime_divide(&nsta, &nstb), 10, + "Incorrect division result"); + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + nstime_init(&nstc, 1); + nstime_subtract(&nsta, &nstc); + assert_u64_eq(nstime_divide(&nsta, &nstb), 9, + "Incorrect division result"); +} +TEST_END + +TEST_BEGIN(test_nstime_monotonic) +{ + + nstime_monotonic(); +} +TEST_END + +TEST_BEGIN(test_nstime_update) +{ + nstime_t nst; + + nstime_init(&nst, 0); + + assert_false(nstime_update(&nst), "Basic time update failed."); + + /* Only Rip Van Winkle sleeps this long. */ + { + nstime_t addend; + nstime_init2(&addend, 631152000, 0); + nstime_add(&nst, &addend); + } + { + nstime_t nst0; + nstime_copy(&nst0, &nst); + assert_true(nstime_update(&nst), + "Update should detect time roll-back."); + assert_d_eq(nstime_compare(&nst, &nst0), 0, + "Time should not have been modified"); + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_nstime_init, + test_nstime_init2, + test_nstime_copy, + test_nstime_compare, + test_nstime_add, + test_nstime_subtract, + test_nstime_imultiply, + test_nstime_idivide, + test_nstime_divide, + test_nstime_monotonic, + test_nstime_update)); +} diff --git a/deps/jemalloc/test/unit/pack.c b/deps/jemalloc/test/unit/pack.c new file mode 100644 index 000000000..0b6ffcd21 --- /dev/null +++ b/deps/jemalloc/test/unit/pack.c @@ -0,0 +1,206 @@ +#include "test/jemalloc_test.h" + +const char *malloc_conf = + /* Use smallest possible chunk size. */ + "lg_chunk:0" + /* Immediately purge to minimize fragmentation. */ + ",lg_dirty_mult:-1" + ",decay_time:-1" + ; + +/* + * Size class that is a divisor of the page size, ideally 4+ regions per run. + */ +#if LG_PAGE <= 14 +#define SZ (ZU(1) << (LG_PAGE - 2)) +#else +#define SZ 4096 +#endif + +/* + * Number of chunks to consume at high water mark. Should be at least 2 so that + * if mmap()ed memory grows downward, downward growth of mmap()ed memory is + * tested. + */ +#define NCHUNKS 8 + +static unsigned +binind_compute(void) +{ + size_t sz; + unsigned nbins, i; + + sz = sizeof(nbins); + assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + for (i = 0; i < nbins; i++) { + size_t mib[4]; + size_t miblen = sizeof(mib)/sizeof(size_t); + size_t size; + + assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib, + &miblen), 0, "Unexpected mallctlnametomb failure"); + mib[2] = (size_t)i; + + sz = sizeof(size); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL, + 0), 0, "Unexpected mallctlbymib failure"); + if (size == SZ) + return (i); + } + + test_fail("Unable to compute nregs_per_run"); + return (0); +} + +static size_t +nregs_per_run_compute(void) +{ + uint32_t nregs; + size_t sz; + unsigned binind = binind_compute(); + size_t mib[4]; + size_t miblen = sizeof(mib)/sizeof(size_t); + + assert_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0, + "Unexpected mallctlnametomb failure"); + mib[2] = (size_t)binind; + sz = sizeof(nregs); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL, + 0), 0, "Unexpected mallctlbymib failure"); + return (nregs); +} + +static size_t +npages_per_run_compute(void) +{ + size_t sz; + unsigned binind = binind_compute(); + size_t mib[4]; + size_t miblen = sizeof(mib)/sizeof(size_t); + size_t run_size; + + assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0, + "Unexpected mallctlnametomb failure"); + mib[2] = (size_t)binind; + sz = sizeof(run_size); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&run_size, &sz, NULL, + 0), 0, "Unexpected mallctlbymib failure"); + return (run_size >> LG_PAGE); +} + +static size_t +npages_per_chunk_compute(void) +{ + + return ((chunksize >> LG_PAGE) - map_bias); +} + +static size_t +nruns_per_chunk_compute(void) +{ + + return (npages_per_chunk_compute() / npages_per_run_compute()); +} + +static unsigned +arenas_extend_mallctl(void) +{ + unsigned arena_ind; + size_t sz; + + sz = sizeof(arena_ind); + assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0), + 0, "Error in arenas.extend"); + + return (arena_ind); +} + +static void +arena_reset_mallctl(unsigned arena_ind) +{ + size_t mib[3]; + size_t miblen = sizeof(mib)/sizeof(size_t); + + assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0, + "Unexpected mallctlnametomib() failure"); + mib[1] = (size_t)arena_ind; + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0, + "Unexpected mallctlbymib() failure"); +} + +TEST_BEGIN(test_pack) +{ + unsigned arena_ind = arenas_extend_mallctl(); + size_t nregs_per_run = nregs_per_run_compute(); + size_t nruns_per_chunk = nruns_per_chunk_compute(); + size_t nruns = nruns_per_chunk * NCHUNKS; + size_t nregs = nregs_per_run * nruns; + VARIABLE_ARRAY(void *, ptrs, nregs); + size_t i, j, offset; + + /* Fill matrix. */ + for (i = offset = 0; i < nruns; i++) { + for (j = 0; j < nregs_per_run; j++) { + void *p = mallocx(SZ, MALLOCX_ARENA(arena_ind) | + MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, + "Unexpected mallocx(%zu, MALLOCX_ARENA(%u) |" + " MALLOCX_TCACHE_NONE) failure, run=%zu, reg=%zu", + SZ, arena_ind, i, j); + ptrs[(i * nregs_per_run) + j] = p; + } + } + + /* + * Free all but one region of each run, but rotate which region is + * preserved, so that subsequent allocations exercise the within-run + * layout policy. + */ + offset = 0; + for (i = offset = 0; + i < nruns; + i++, offset = (offset + 1) % nregs_per_run) { + for (j = 0; j < nregs_per_run; j++) { + void *p = ptrs[(i * nregs_per_run) + j]; + if (offset == j) + continue; + dallocx(p, MALLOCX_ARENA(arena_ind) | + MALLOCX_TCACHE_NONE); + } + } + + /* + * Logically refill matrix, skipping preserved regions and verifying + * that the matrix is unmodified. + */ + offset = 0; + for (i = offset = 0; + i < nruns; + i++, offset = (offset + 1) % nregs_per_run) { + for (j = 0; j < nregs_per_run; j++) { + void *p; + + if (offset == j) + continue; + p = mallocx(SZ, MALLOCX_ARENA(arena_ind) | + MALLOCX_TCACHE_NONE); + assert_ptr_eq(p, ptrs[(i * nregs_per_run) + j], + "Unexpected refill discrepancy, run=%zu, reg=%zu\n", + i, j); + } + } + + /* Clean up. */ + arena_reset_mallctl(arena_ind); +} +TEST_END + +int +main(void) +{ + + return (test( + test_pack)); +} diff --git a/deps/jemalloc/test/unit/pages.c b/deps/jemalloc/test/unit/pages.c new file mode 100644 index 000000000..d31a35e68 --- /dev/null +++ b/deps/jemalloc/test/unit/pages.c @@ -0,0 +1,27 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_pages_huge) +{ + bool commit; + void *pages; + + commit = true; + pages = pages_map(NULL, PAGE, &commit); + assert_ptr_not_null(pages, "Unexpected pages_map() error"); + + assert_false(pages_huge(pages, PAGE), + "Unexpected pages_huge() result"); + assert_false(pages_nohuge(pages, PAGE), + "Unexpected pages_nohuge() result"); + + pages_unmap(pages, PAGE); +} +TEST_END + +int +main(void) +{ + + return (test( + test_pages_huge)); +} diff --git a/deps/jemalloc/test/unit/ph.c b/deps/jemalloc/test/unit/ph.c new file mode 100644 index 000000000..da442f07e --- /dev/null +++ b/deps/jemalloc/test/unit/ph.c @@ -0,0 +1,290 @@ +#include "test/jemalloc_test.h" + +typedef struct node_s node_t; + +struct node_s { +#define NODE_MAGIC 0x9823af7e + uint32_t magic; + phn(node_t) link; + uint64_t key; +}; + +static int +node_cmp(const node_t *a, const node_t *b) +{ + int ret; + + ret = (a->key > b->key) - (a->key < b->key); + if (ret == 0) { + /* + * Duplicates are not allowed in the heap, so force an + * arbitrary ordering for non-identical items with equal keys. + */ + ret = (((uintptr_t)a) > ((uintptr_t)b)) + - (((uintptr_t)a) < ((uintptr_t)b)); + } + return (ret); +} + +static int +node_cmp_magic(const node_t *a, const node_t *b) { + + assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic"); + assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic"); + + return (node_cmp(a, b)); +} + +typedef ph(node_t) heap_t; +ph_gen(static, heap_, heap_t, node_t, link, node_cmp_magic); + +static void +node_print(const node_t *node, unsigned depth) +{ + unsigned i; + node_t *leftmost_child, *sibling; + + for (i = 0; i < depth; i++) + malloc_printf("\t"); + malloc_printf("%2"FMTu64"\n", node->key); + + leftmost_child = phn_lchild_get(node_t, link, node); + if (leftmost_child == NULL) + return; + node_print(leftmost_child, depth + 1); + + for (sibling = phn_next_get(node_t, link, leftmost_child); sibling != + NULL; sibling = phn_next_get(node_t, link, sibling)) { + node_print(sibling, depth + 1); + } +} + +static void +heap_print(const heap_t *heap) +{ + node_t *auxelm; + + malloc_printf("vvv heap %p vvv\n", heap); + if (heap->ph_root == NULL) + goto label_return; + + node_print(heap->ph_root, 0); + + for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL; + auxelm = phn_next_get(node_t, link, auxelm)) { + assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t, + link, auxelm)), auxelm, + "auxelm's prev doesn't link to auxelm"); + node_print(auxelm, 0); + } + +label_return: + malloc_printf("^^^ heap %p ^^^\n", heap); +} + +static unsigned +node_validate(const node_t *node, const node_t *parent) +{ + unsigned nnodes = 1; + node_t *leftmost_child, *sibling; + + if (parent != NULL) { + assert_d_ge(node_cmp_magic(node, parent), 0, + "Child is less than parent"); + } + + leftmost_child = phn_lchild_get(node_t, link, node); + if (leftmost_child == NULL) + return (nnodes); + assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child), + (void *)node, "Leftmost child does not link to node"); + nnodes += node_validate(leftmost_child, node); + + for (sibling = phn_next_get(node_t, link, leftmost_child); sibling != + NULL; sibling = phn_next_get(node_t, link, sibling)) { + assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t, + link, sibling)), sibling, + "sibling's prev doesn't link to sibling"); + nnodes += node_validate(sibling, node); + } + return (nnodes); +} + +static unsigned +heap_validate(const heap_t *heap) +{ + unsigned nnodes = 0; + node_t *auxelm; + + if (heap->ph_root == NULL) + goto label_return; + + nnodes += node_validate(heap->ph_root, NULL); + + for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL; + auxelm = phn_next_get(node_t, link, auxelm)) { + assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t, + link, auxelm)), auxelm, + "auxelm's prev doesn't link to auxelm"); + nnodes += node_validate(auxelm, NULL); + } + +label_return: + if (false) + heap_print(heap); + return (nnodes); +} + +TEST_BEGIN(test_ph_empty) +{ + heap_t heap; + + heap_new(&heap); + assert_true(heap_empty(&heap), "Heap should be empty"); + assert_ptr_null(heap_first(&heap), "Unexpected node"); +} +TEST_END + +static void +node_remove(heap_t *heap, node_t *node) +{ + + heap_remove(heap, node); + + node->magic = 0; +} + +static node_t * +node_remove_first(heap_t *heap) +{ + node_t *node = heap_remove_first(heap); + node->magic = 0; + return (node); +} + +TEST_BEGIN(test_ph_random) +{ +#define NNODES 25 +#define NBAGS 250 +#define SEED 42 + sfmt_t *sfmt; + uint64_t bag[NNODES]; + heap_t heap; + node_t nodes[NNODES]; + unsigned i, j, k; + + sfmt = init_gen_rand(SEED); + for (i = 0; i < NBAGS; i++) { + switch (i) { + case 0: + /* Insert in order. */ + for (j = 0; j < NNODES; j++) + bag[j] = j; + break; + case 1: + /* Insert in reverse order. */ + for (j = 0; j < NNODES; j++) + bag[j] = NNODES - j - 1; + break; + default: + for (j = 0; j < NNODES; j++) + bag[j] = gen_rand64_range(sfmt, NNODES); + } + + for (j = 1; j <= NNODES; j++) { + /* Initialize heap and nodes. */ + heap_new(&heap); + assert_u_eq(heap_validate(&heap), 0, + "Incorrect node count"); + for (k = 0; k < j; k++) { + nodes[k].magic = NODE_MAGIC; + nodes[k].key = bag[k]; + } + + /* Insert nodes. */ + for (k = 0; k < j; k++) { + heap_insert(&heap, &nodes[k]); + if (i % 13 == 12) { + /* Trigger merging. */ + assert_ptr_not_null(heap_first(&heap), + "Heap should not be empty"); + } + assert_u_eq(heap_validate(&heap), k + 1, + "Incorrect node count"); + } + + assert_false(heap_empty(&heap), + "Heap should not be empty"); + + /* Remove nodes. */ + switch (i % 4) { + case 0: + for (k = 0; k < j; k++) { + assert_u_eq(heap_validate(&heap), j - k, + "Incorrect node count"); + node_remove(&heap, &nodes[k]); + assert_u_eq(heap_validate(&heap), j - k + - 1, "Incorrect node count"); + } + break; + case 1: + for (k = j; k > 0; k--) { + node_remove(&heap, &nodes[k-1]); + assert_u_eq(heap_validate(&heap), k - 1, + "Incorrect node count"); + } + break; + case 2: { + node_t *prev = NULL; + for (k = 0; k < j; k++) { + node_t *node = node_remove_first(&heap); + assert_u_eq(heap_validate(&heap), j - k + - 1, "Incorrect node count"); + if (prev != NULL) { + assert_d_ge(node_cmp(node, + prev), 0, + "Bad removal order"); + } + prev = node; + } + break; + } case 3: { + node_t *prev = NULL; + for (k = 0; k < j; k++) { + node_t *node = heap_first(&heap); + assert_u_eq(heap_validate(&heap), j - k, + "Incorrect node count"); + if (prev != NULL) { + assert_d_ge(node_cmp(node, + prev), 0, + "Bad removal order"); + } + node_remove(&heap, node); + assert_u_eq(heap_validate(&heap), j - k + - 1, "Incorrect node count"); + prev = node; + } + break; + } default: + not_reached(); + } + + assert_ptr_null(heap_first(&heap), + "Heap should be empty"); + assert_true(heap_empty(&heap), "Heap should be empty"); + } + } + fini_gen_rand(sfmt); +#undef NNODES +#undef SEED +} +TEST_END + +int +main(void) +{ + + return (test( + test_ph_empty, + test_ph_random)); +} diff --git a/deps/jemalloc/test/unit/prng.c b/deps/jemalloc/test/unit/prng.c new file mode 100644 index 000000000..80c9d733f --- /dev/null +++ b/deps/jemalloc/test/unit/prng.c @@ -0,0 +1,263 @@ +#include "test/jemalloc_test.h" + +static void +test_prng_lg_range_u32(bool atomic) +{ + uint32_t sa, sb, ra, rb; + unsigned lg_range; + + sa = 42; + ra = prng_lg_range_u32(&sa, 32, atomic); + sa = 42; + rb = prng_lg_range_u32(&sa, 32, atomic); + assert_u32_eq(ra, rb, + "Repeated generation should produce repeated results"); + + sb = 42; + rb = prng_lg_range_u32(&sb, 32, atomic); + assert_u32_eq(ra, rb, + "Equivalent generation should produce equivalent results"); + + sa = 42; + ra = prng_lg_range_u32(&sa, 32, atomic); + rb = prng_lg_range_u32(&sa, 32, atomic); + assert_u32_ne(ra, rb, + "Full-width results must not immediately repeat"); + + sa = 42; + ra = prng_lg_range_u32(&sa, 32, atomic); + for (lg_range = 31; lg_range > 0; lg_range--) { + sb = 42; + rb = prng_lg_range_u32(&sb, lg_range, atomic); + assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)), + 0, "High order bits should be 0, lg_range=%u", lg_range); + assert_u32_eq(rb, (ra >> (32 - lg_range)), + "Expected high order bits of full-width result, " + "lg_range=%u", lg_range); + } +} + +static void +test_prng_lg_range_u64(void) +{ + uint64_t sa, sb, ra, rb; + unsigned lg_range; + + sa = 42; + ra = prng_lg_range_u64(&sa, 64); + sa = 42; + rb = prng_lg_range_u64(&sa, 64); + assert_u64_eq(ra, rb, + "Repeated generation should produce repeated results"); + + sb = 42; + rb = prng_lg_range_u64(&sb, 64); + assert_u64_eq(ra, rb, + "Equivalent generation should produce equivalent results"); + + sa = 42; + ra = prng_lg_range_u64(&sa, 64); + rb = prng_lg_range_u64(&sa, 64); + assert_u64_ne(ra, rb, + "Full-width results must not immediately repeat"); + + sa = 42; + ra = prng_lg_range_u64(&sa, 64); + for (lg_range = 63; lg_range > 0; lg_range--) { + sb = 42; + rb = prng_lg_range_u64(&sb, lg_range); + assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)), + 0, "High order bits should be 0, lg_range=%u", lg_range); + assert_u64_eq(rb, (ra >> (64 - lg_range)), + "Expected high order bits of full-width result, " + "lg_range=%u", lg_range); + } +} + +static void +test_prng_lg_range_zu(bool atomic) +{ + size_t sa, sb, ra, rb; + unsigned lg_range; + + sa = 42; + ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + sa = 42; + rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + assert_zu_eq(ra, rb, + "Repeated generation should produce repeated results"); + + sb = 42; + rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + assert_zu_eq(ra, rb, + "Equivalent generation should produce equivalent results"); + + sa = 42; + ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + assert_zu_ne(ra, rb, + "Full-width results must not immediately repeat"); + + sa = 42; + ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0; + lg_range--) { + sb = 42; + rb = prng_lg_range_zu(&sb, lg_range, atomic); + assert_zu_eq((rb & (SIZE_T_MAX << lg_range)), + 0, "High order bits should be 0, lg_range=%u", lg_range); + assert_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - + lg_range)), "Expected high order bits of full-width " + "result, lg_range=%u", lg_range); + } +} + +TEST_BEGIN(test_prng_lg_range_u32_nonatomic) +{ + + test_prng_lg_range_u32(false); +} +TEST_END + +TEST_BEGIN(test_prng_lg_range_u32_atomic) +{ + + test_prng_lg_range_u32(true); +} +TEST_END + +TEST_BEGIN(test_prng_lg_range_u64_nonatomic) +{ + + test_prng_lg_range_u64(); +} +TEST_END + +TEST_BEGIN(test_prng_lg_range_zu_nonatomic) +{ + + test_prng_lg_range_zu(false); +} +TEST_END + +TEST_BEGIN(test_prng_lg_range_zu_atomic) +{ + + test_prng_lg_range_zu(true); +} +TEST_END + +static void +test_prng_range_u32(bool atomic) +{ + uint32_t range; +#define MAX_RANGE 10000000 +#define RANGE_STEP 97 +#define NREPS 10 + + for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { + uint32_t s; + unsigned rep; + + s = range; + for (rep = 0; rep < NREPS; rep++) { + uint32_t r = prng_range_u32(&s, range, atomic); + + assert_u32_lt(r, range, "Out of range"); + } + } +} + +static void +test_prng_range_u64(void) +{ + uint64_t range; +#define MAX_RANGE 10000000 +#define RANGE_STEP 97 +#define NREPS 10 + + for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { + uint64_t s; + unsigned rep; + + s = range; + for (rep = 0; rep < NREPS; rep++) { + uint64_t r = prng_range_u64(&s, range); + + assert_u64_lt(r, range, "Out of range"); + } + } +} + +static void +test_prng_range_zu(bool atomic) +{ + size_t range; +#define MAX_RANGE 10000000 +#define RANGE_STEP 97 +#define NREPS 10 + + for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { + size_t s; + unsigned rep; + + s = range; + for (rep = 0; rep < NREPS; rep++) { + size_t r = prng_range_zu(&s, range, atomic); + + assert_zu_lt(r, range, "Out of range"); + } + } +} + +TEST_BEGIN(test_prng_range_u32_nonatomic) +{ + + test_prng_range_u32(false); +} +TEST_END + +TEST_BEGIN(test_prng_range_u32_atomic) +{ + + test_prng_range_u32(true); +} +TEST_END + +TEST_BEGIN(test_prng_range_u64_nonatomic) +{ + + test_prng_range_u64(); +} +TEST_END + +TEST_BEGIN(test_prng_range_zu_nonatomic) +{ + + test_prng_range_zu(false); +} +TEST_END + +TEST_BEGIN(test_prng_range_zu_atomic) +{ + + test_prng_range_zu(true); +} +TEST_END + +int +main(void) +{ + + return (test( + test_prng_lg_range_u32_nonatomic, + test_prng_lg_range_u32_atomic, + test_prng_lg_range_u64_nonatomic, + test_prng_lg_range_zu_nonatomic, + test_prng_lg_range_zu_atomic, + test_prng_range_u32_nonatomic, + test_prng_range_u32_atomic, + test_prng_range_u64_nonatomic, + test_prng_range_zu_nonatomic, + test_prng_range_zu_atomic)); +} diff --git a/deps/jemalloc/test/unit/prof_accum.c b/deps/jemalloc/test/unit/prof_accum.c old mode 100644 new mode 100755 index fd229e0fd..d941b5bc6 --- a/deps/jemalloc/test/unit/prof_accum.c +++ b/deps/jemalloc/test/unit/prof_accum.c @@ -68,8 +68,9 @@ TEST_BEGIN(test_idump) test_skip_if(!config_prof); active = true; - assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), - 0, "Unexpected mallctl failure while activating profiling"); + assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active, + sizeof(active)), 0, + "Unexpected mallctl failure while activating profiling"); prof_dump_open = prof_dump_open_intercept; diff --git a/deps/jemalloc/test/unit/prof_active.c b/deps/jemalloc/test/unit/prof_active.c old mode 100644 new mode 100755 index 814909572..d00943a4c --- a/deps/jemalloc/test/unit/prof_active.c +++ b/deps/jemalloc/test/unit/prof_active.c @@ -12,7 +12,7 @@ mallctl_bool_get(const char *name, bool expected, const char *func, int line) size_t sz; sz = sizeof(old); - assert_d_eq(mallctl(name, &old, &sz, NULL, 0), 0, + assert_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0, "%s():%d: Unexpected mallctl failure reading %s", func, line, name); assert_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line, name); @@ -26,7 +26,8 @@ mallctl_bool_set(const char *name, bool old_expected, bool val_new, size_t sz; sz = sizeof(old); - assert_d_eq(mallctl(name, &old, &sz, &val_new, sizeof(val_new)), 0, + assert_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new, + sizeof(val_new)), 0, "%s():%d: Unexpected mallctl failure reading/writing %s", func, line, name); assert_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func, diff --git a/deps/jemalloc/test/unit/prof_gdump.c b/deps/jemalloc/test/unit/prof_gdump.c old mode 100644 new mode 100755 index a0e6ee921..996cb6704 --- a/deps/jemalloc/test/unit/prof_gdump.c +++ b/deps/jemalloc/test/unit/prof_gdump.c @@ -28,8 +28,9 @@ TEST_BEGIN(test_gdump) test_skip_if(!config_prof); active = true; - assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), - 0, "Unexpected mallctl failure while activating profiling"); + assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active, + sizeof(active)), 0, + "Unexpected mallctl failure while activating profiling"); prof_dump_open = prof_dump_open_intercept; @@ -45,8 +46,8 @@ TEST_BEGIN(test_gdump) gdump = false; sz = sizeof(gdump_old); - assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump, - sizeof(gdump)), 0, + assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz, + (void *)&gdump, sizeof(gdump)), 0, "Unexpected mallctl failure while disabling prof.gdump"); assert(gdump_old); did_prof_dump_open = false; @@ -56,8 +57,8 @@ TEST_BEGIN(test_gdump) gdump = true; sz = sizeof(gdump_old); - assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump, - sizeof(gdump)), 0, + assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz, + (void *)&gdump, sizeof(gdump)), 0, "Unexpected mallctl failure while enabling prof.gdump"); assert(!gdump_old); did_prof_dump_open = false; diff --git a/deps/jemalloc/test/unit/prof_idump.c b/deps/jemalloc/test/unit/prof_idump.c old mode 100644 new mode 100755 index bdea53ecd..16c6462de --- a/deps/jemalloc/test/unit/prof_idump.c +++ b/deps/jemalloc/test/unit/prof_idump.c @@ -29,8 +29,9 @@ TEST_BEGIN(test_idump) test_skip_if(!config_prof); active = true; - assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), - 0, "Unexpected mallctl failure while activating profiling"); + assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active, + sizeof(active)), 0, + "Unexpected mallctl failure while activating profiling"); prof_dump_open = prof_dump_open_intercept; diff --git a/deps/jemalloc/test/unit/prof_reset.c b/deps/jemalloc/test/unit/prof_reset.c old mode 100644 new mode 100755 index 69983e5e5..59d70796a --- a/deps/jemalloc/test/unit/prof_reset.c +++ b/deps/jemalloc/test/unit/prof_reset.c @@ -20,8 +20,8 @@ static void set_prof_active(bool active) { - assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), - 0, "Unexpected mallctl failure"); + assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active, + sizeof(active)), 0, "Unexpected mallctl failure"); } static size_t @@ -30,7 +30,8 @@ get_lg_prof_sample(void) size_t lg_prof_sample; size_t sz = sizeof(size_t); - assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0, + assert_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz, + NULL, 0), 0, "Unexpected mallctl failure while reading profiling sample rate"); return (lg_prof_sample); } @@ -39,7 +40,7 @@ static void do_prof_reset(size_t lg_prof_sample) { assert_d_eq(mallctl("prof.reset", NULL, NULL, - &lg_prof_sample, sizeof(size_t)), 0, + (void *)&lg_prof_sample, sizeof(size_t)), 0, "Unexpected mallctl failure while resetting profile data"); assert_zu_eq(lg_prof_sample, get_lg_prof_sample(), "Expected profile sample rate change"); @@ -54,8 +55,8 @@ TEST_BEGIN(test_prof_reset_basic) test_skip_if(!config_prof); sz = sizeof(size_t); - assert_d_eq(mallctl("opt.lg_prof_sample", &lg_prof_sample_orig, &sz, - NULL, 0), 0, + assert_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig, + &sz, NULL, 0), 0, "Unexpected mallctl failure while reading profiling sample rate"); assert_zu_eq(lg_prof_sample_orig, 0, "Unexpected profiling sample rate"); @@ -94,7 +95,8 @@ TEST_END bool prof_dump_header_intercepted = false; prof_cnt_t cnt_all_copy = {0, 0, 0, 0}; static bool -prof_dump_header_intercept(bool propagate_err, const prof_cnt_t *cnt_all) +prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err, + const prof_cnt_t *cnt_all) { prof_dump_header_intercepted = true; diff --git a/deps/jemalloc/test/unit/prof_thread_name.c b/deps/jemalloc/test/unit/prof_thread_name.c old mode 100644 new mode 100755 index f501158d7..9ec549776 --- a/deps/jemalloc/test/unit/prof_thread_name.c +++ b/deps/jemalloc/test/unit/prof_thread_name.c @@ -12,8 +12,9 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func, size_t sz; sz = sizeof(thread_name_old); - assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz, NULL, 0), - 0, "%s():%d: Unexpected mallctl failure reading thread.prof.name", + assert_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz, + NULL, 0), 0, + "%s():%d: Unexpected mallctl failure reading thread.prof.name", func, line); assert_str_eq(thread_name_old, thread_name_expected, "%s():%d: Unexpected thread.prof.name value", func, line); @@ -26,8 +27,8 @@ mallctl_thread_name_set_impl(const char *thread_name, const char *func, int line) { - assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, - sizeof(thread_name)), 0, + assert_d_eq(mallctl("thread.prof.name", NULL, NULL, + (void *)&thread_name, sizeof(thread_name)), 0, "%s():%d: Unexpected mallctl failure reading thread.prof.name", func, line); mallctl_thread_name_get_impl(thread_name, func, line); @@ -46,15 +47,15 @@ TEST_BEGIN(test_prof_thread_name_validation) /* NULL input shouldn't be allowed. */ thread_name = NULL; - assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, - sizeof(thread_name)), EFAULT, + assert_d_eq(mallctl("thread.prof.name", NULL, NULL, + (void *)&thread_name, sizeof(thread_name)), EFAULT, "Unexpected mallctl result writing \"%s\" to thread.prof.name", thread_name); /* '\n' shouldn't be allowed. */ thread_name = "hi\nthere"; - assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, - sizeof(thread_name)), EFAULT, + assert_d_eq(mallctl("thread.prof.name", NULL, NULL, + (void *)&thread_name, sizeof(thread_name)), EFAULT, "Unexpected mallctl result writing \"%s\" to thread.prof.name", thread_name); @@ -64,8 +65,9 @@ TEST_BEGIN(test_prof_thread_name_validation) size_t sz; sz = sizeof(thread_name_old); - assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz, - &thread_name, sizeof(thread_name)), EPERM, + assert_d_eq(mallctl("thread.prof.name", + (void *)&thread_name_old, &sz, (void *)&thread_name, + sizeof(thread_name)), EPERM, "Unexpected mallctl result writing \"%s\" to " "thread.prof.name", thread_name); } diff --git a/deps/jemalloc/test/unit/rb.c b/deps/jemalloc/test/unit/rb.c index b38eb0e33..cf3d3a783 100644 --- a/deps/jemalloc/test/unit/rb.c +++ b/deps/jemalloc/test/unit/rb.c @@ -3,7 +3,7 @@ #define rbtn_black_height(a_type, a_field, a_rbt, r_height) do { \ a_type *rbp_bh_t; \ for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; \ - rbp_bh_t != &(a_rbt)->rbt_nil; \ + rbp_bh_t != NULL; \ rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) { \ if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) { \ (r_height)++; \ @@ -21,7 +21,7 @@ struct node_s { }; static int -node_cmp(node_t *a, node_t *b) { +node_cmp(const node_t *a, const node_t *b) { int ret; assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic"); @@ -68,38 +68,43 @@ TEST_BEGIN(test_rb_empty) TEST_END static unsigned -tree_recurse(node_t *node, unsigned black_height, unsigned black_depth, - node_t *nil) +tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) { unsigned ret = 0; - node_t *left_node = rbtn_left_get(node_t, link, node); - node_t *right_node = rbtn_right_get(node_t, link, node); + node_t *left_node; + node_t *right_node; + + if (node == NULL) + return (ret); + + left_node = rbtn_left_get(node_t, link, node); + right_node = rbtn_right_get(node_t, link, node); if (!rbtn_red_get(node_t, link, node)) black_depth++; /* Red nodes must be interleaved with black nodes. */ if (rbtn_red_get(node_t, link, node)) { - assert_false(rbtn_red_get(node_t, link, left_node), - "Node should be black"); - assert_false(rbtn_red_get(node_t, link, right_node), - "Node should be black"); + if (left_node != NULL) + assert_false(rbtn_red_get(node_t, link, left_node), + "Node should be black"); + if (right_node != NULL) + assert_false(rbtn_red_get(node_t, link, right_node), + "Node should be black"); } - if (node == nil) - return (ret); /* Self. */ assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic"); /* Left subtree. */ - if (left_node != nil) - ret += tree_recurse(left_node, black_height, black_depth, nil); + if (left_node != NULL) + ret += tree_recurse(left_node, black_height, black_depth); else ret += (black_depth != black_height); /* Right subtree. */ - if (right_node != nil) - ret += tree_recurse(right_node, black_height, black_depth, nil); + if (right_node != NULL) + ret += tree_recurse(right_node, black_height, black_depth); else ret += (black_depth != black_height); @@ -181,8 +186,7 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) node->magic = 0; rbtn_black_height(node_t, link, tree, black_height); - imbalances = tree_recurse(tree->rbt_root, black_height, 0, - &(tree->rbt_nil)); + imbalances = tree_recurse(tree->rbt_root, black_height, 0); assert_u_eq(imbalances, 0, "Tree is unbalanced"); assert_u_eq(tree_iterate(tree), nnodes-1, "Unexpected node iteration count"); @@ -212,6 +216,15 @@ remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) return (ret); } +static void +destroy_cb(node_t *node, void *data) +{ + unsigned *nnodes = (unsigned *)data; + + assert_u_gt(*nnodes, 0, "Destruction removed too many nodes"); + (*nnodes)--; +} + TEST_BEGIN(test_rb_random) { #define NNODES 25 @@ -244,7 +257,6 @@ TEST_BEGIN(test_rb_random) for (j = 1; j <= NNODES; j++) { /* Initialize tree and nodes. */ tree_new(&tree); - tree.rbt_nil.magic = 0; for (k = 0; k < j; k++) { nodes[k].magic = NODE_MAGIC; nodes[k].key = bag[k]; @@ -257,7 +269,7 @@ TEST_BEGIN(test_rb_random) rbtn_black_height(node_t, link, &tree, black_height); imbalances = tree_recurse(tree.rbt_root, - black_height, 0, &(tree.rbt_nil)); + black_height, 0); assert_u_eq(imbalances, 0, "Tree is unbalanced"); @@ -278,7 +290,7 @@ TEST_BEGIN(test_rb_random) } /* Remove nodes. */ - switch (i % 4) { + switch (i % 5) { case 0: for (k = 0; k < j; k++) node_remove(&tree, &nodes[k], j - k); @@ -314,6 +326,12 @@ TEST_BEGIN(test_rb_random) assert_u_eq(nnodes, 0, "Removal terminated early"); break; + } case 4: { + unsigned nnodes = j; + tree_destroy(&tree, destroy_cb, &nnodes); + assert_u_eq(nnodes, 0, + "Destruction terminated early"); + break; } default: not_reached(); } diff --git a/deps/jemalloc/test/unit/run_quantize.c b/deps/jemalloc/test/unit/run_quantize.c new file mode 100644 index 000000000..089176f39 --- /dev/null +++ b/deps/jemalloc/test/unit/run_quantize.c @@ -0,0 +1,149 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_small_run_size) +{ + unsigned nbins, i; + size_t sz, run_size; + size_t mib[4]; + size_t miblen = sizeof(mib) / sizeof(size_t); + + /* + * Iterate over all small size classes, get their run sizes, and verify + * that the quantized size is the same as the run size. + */ + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0, + "Unexpected mallctlnametomib failure"); + for (i = 0; i < nbins; i++) { + mib[2] = i; + sz = sizeof(size_t); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&run_size, &sz, + NULL, 0), 0, "Unexpected mallctlbymib failure"); + assert_zu_eq(run_size, run_quantize_floor(run_size), + "Small run quantization should be a no-op (run_size=%zu)", + run_size); + assert_zu_eq(run_size, run_quantize_ceil(run_size), + "Small run quantization should be a no-op (run_size=%zu)", + run_size); + } +} +TEST_END + +TEST_BEGIN(test_large_run_size) +{ + bool cache_oblivious; + unsigned nlruns, i; + size_t sz, run_size_prev, ceil_prev; + size_t mib[4]; + size_t miblen = sizeof(mib) / sizeof(size_t); + + /* + * Iterate over all large size classes, get their run sizes, and verify + * that the quantized size is the same as the run size. + */ + + sz = sizeof(bool); + assert_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious, + &sz, NULL, 0), 0, "Unexpected mallctl failure"); + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nlruns", (void *)&nlruns, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + assert_d_eq(mallctlnametomib("arenas.lrun.0.size", mib, &miblen), 0, + "Unexpected mallctlnametomib failure"); + for (i = 0; i < nlruns; i++) { + size_t lrun_size, run_size, floor, ceil; + + mib[2] = i; + sz = sizeof(size_t); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&lrun_size, &sz, + NULL, 0), 0, "Unexpected mallctlbymib failure"); + run_size = cache_oblivious ? lrun_size + PAGE : lrun_size; + floor = run_quantize_floor(run_size); + ceil = run_quantize_ceil(run_size); + + assert_zu_eq(run_size, floor, + "Large run quantization should be a no-op for precise " + "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size); + assert_zu_eq(run_size, ceil, + "Large run quantization should be a no-op for precise " + "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size); + + if (i > 0) { + assert_zu_eq(run_size_prev, run_quantize_floor(run_size + - PAGE), "Floor should be a precise size"); + if (run_size_prev < ceil_prev) { + assert_zu_eq(ceil_prev, run_size, + "Ceiling should be a precise size " + "(run_size_prev=%zu, ceil_prev=%zu, " + "run_size=%zu)", run_size_prev, ceil_prev, + run_size); + } + } + run_size_prev = floor; + ceil_prev = run_quantize_ceil(run_size + PAGE); + } +} +TEST_END + +TEST_BEGIN(test_monotonic) +{ + unsigned nbins, nlruns, i; + size_t sz, floor_prev, ceil_prev; + + /* + * Iterate over all run sizes and verify that + * run_quantize_{floor,ceil}() are monotonic. + */ + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nlruns", (void *)&nlruns, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + floor_prev = 0; + ceil_prev = 0; + for (i = 1; i <= chunksize >> LG_PAGE; i++) { + size_t run_size, floor, ceil; + + run_size = i << LG_PAGE; + floor = run_quantize_floor(run_size); + ceil = run_quantize_ceil(run_size); + + assert_zu_le(floor, run_size, + "Floor should be <= (floor=%zu, run_size=%zu, ceil=%zu)", + floor, run_size, ceil); + assert_zu_ge(ceil, run_size, + "Ceiling should be >= (floor=%zu, run_size=%zu, ceil=%zu)", + floor, run_size, ceil); + + assert_zu_le(floor_prev, floor, "Floor should be monotonic " + "(floor_prev=%zu, floor=%zu, run_size=%zu, ceil=%zu)", + floor_prev, floor, run_size, ceil); + assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic " + "(floor=%zu, run_size=%zu, ceil_prev=%zu, ceil=%zu)", + floor, run_size, ceil_prev, ceil); + + floor_prev = floor; + ceil_prev = ceil; + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_small_run_size, + test_large_run_size, + test_monotonic)); +} diff --git a/deps/jemalloc/test/unit/size_classes.c b/deps/jemalloc/test/unit/size_classes.c old mode 100644 new mode 100755 index d3aaebd77..81cc60617 --- a/deps/jemalloc/test/unit/size_classes.c +++ b/deps/jemalloc/test/unit/size_classes.c @@ -8,8 +8,8 @@ get_max_size_class(void) size_t sz, miblen, max_size_class; sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0, - "Unexpected mallctl() error"); + assert_d_eq(mallctl("arenas.nhchunks", (void *)&nhchunks, &sz, NULL, 0), + 0, "Unexpected mallctl() error"); miblen = sizeof(mib) / sizeof(size_t); assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0, @@ -17,8 +17,8 @@ get_max_size_class(void) mib[2] = nhchunks - 1; sz = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0, - "Unexpected mallctlbymib() error"); + assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz, + NULL, 0), 0, "Unexpected mallctlbymib() error"); return (max_size_class); } @@ -80,10 +80,105 @@ TEST_BEGIN(test_size_classes) } TEST_END +TEST_BEGIN(test_psize_classes) +{ + size_t size_class, max_size_class; + pszind_t pind, max_pind; + + max_size_class = get_max_size_class(); + max_pind = psz2ind(max_size_class); + + for (pind = 0, size_class = pind2sz(pind); pind < max_pind || + size_class < max_size_class; pind++, size_class = + pind2sz(pind)) { + assert_true(pind < max_pind, + "Loop conditionals should be equivalent; pind=%u, " + "size_class=%zu (%#zx)", pind, size_class, size_class); + assert_true(size_class < max_size_class, + "Loop conditionals should be equivalent; pind=%u, " + "size_class=%zu (%#zx)", pind, size_class, size_class); + + assert_u_eq(pind, psz2ind(size_class), + "psz2ind() does not reverse pind2sz(): pind=%u -->" + " size_class=%zu --> pind=%u --> size_class=%zu", pind, + size_class, psz2ind(size_class), + pind2sz(psz2ind(size_class))); + assert_zu_eq(size_class, pind2sz(psz2ind(size_class)), + "pind2sz() does not reverse psz2ind(): pind=%u -->" + " size_class=%zu --> pind=%u --> size_class=%zu", pind, + size_class, psz2ind(size_class), + pind2sz(psz2ind(size_class))); + + assert_u_eq(pind+1, psz2ind(size_class+1), + "Next size_class does not round up properly"); + + assert_zu_eq(size_class, (pind > 0) ? + psz2u(pind2sz(pind-1)+1) : psz2u(1), + "psz2u() does not round up to size class"); + assert_zu_eq(size_class, psz2u(size_class-1), + "psz2u() does not round up to size class"); + assert_zu_eq(size_class, psz2u(size_class), + "psz2u() does not compute same size class"); + assert_zu_eq(psz2u(size_class+1), pind2sz(pind+1), + "psz2u() does not round up to next size class"); + } + + assert_u_eq(pind, psz2ind(pind2sz(pind)), + "psz2ind() does not reverse pind2sz()"); + assert_zu_eq(max_size_class, pind2sz(psz2ind(max_size_class)), + "pind2sz() does not reverse psz2ind()"); + + assert_zu_eq(size_class, psz2u(pind2sz(pind-1)+1), + "psz2u() does not round up to size class"); + assert_zu_eq(size_class, psz2u(size_class-1), + "psz2u() does not round up to size class"); + assert_zu_eq(size_class, psz2u(size_class), + "psz2u() does not compute same size class"); +} +TEST_END + +TEST_BEGIN(test_overflow) +{ + size_t max_size_class; + + max_size_class = get_max_size_class(); + + assert_u_eq(size2index(max_size_class+1), NSIZES, + "size2index() should return NSIZES on overflow"); + assert_u_eq(size2index(ZU(PTRDIFF_MAX)+1), NSIZES, + "size2index() should return NSIZES on overflow"); + assert_u_eq(size2index(SIZE_T_MAX), NSIZES, + "size2index() should return NSIZES on overflow"); + + assert_zu_eq(s2u(max_size_class+1), 0, + "s2u() should return 0 for unsupported size"); + assert_zu_eq(s2u(ZU(PTRDIFF_MAX)+1), 0, + "s2u() should return 0 for unsupported size"); + assert_zu_eq(s2u(SIZE_T_MAX), 0, + "s2u() should return 0 on overflow"); + + assert_u_eq(psz2ind(max_size_class+1), NPSIZES, + "psz2ind() should return NPSIZES on overflow"); + assert_u_eq(psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES, + "psz2ind() should return NPSIZES on overflow"); + assert_u_eq(psz2ind(SIZE_T_MAX), NPSIZES, + "psz2ind() should return NPSIZES on overflow"); + + assert_zu_eq(psz2u(max_size_class+1), 0, + "psz2u() should return 0 for unsupported size"); + assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), 0, + "psz2u() should return 0 for unsupported size"); + assert_zu_eq(psz2u(SIZE_T_MAX), 0, + "psz2u() should return 0 on overflow"); +} +TEST_END + int main(void) { return (test( - test_size_classes)); + test_size_classes, + test_psize_classes, + test_overflow)); } diff --git a/deps/jemalloc/test/unit/smoothstep.c b/deps/jemalloc/test/unit/smoothstep.c new file mode 100644 index 000000000..4cfb21343 --- /dev/null +++ b/deps/jemalloc/test/unit/smoothstep.c @@ -0,0 +1,106 @@ +#include "test/jemalloc_test.h" + +static const uint64_t smoothstep_tab[] = { +#define STEP(step, h, x, y) \ + h, + SMOOTHSTEP +#undef STEP +}; + +TEST_BEGIN(test_smoothstep_integral) +{ + uint64_t sum, min, max; + unsigned i; + + /* + * The integral of smoothstep in the [0..1] range equals 1/2. Verify + * that the fixed point representation's integral is no more than + * rounding error distant from 1/2. Regarding rounding, each table + * element is rounded down to the nearest fixed point value, so the + * integral may be off by as much as SMOOTHSTEP_NSTEPS ulps. + */ + sum = 0; + for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) + sum += smoothstep_tab[i]; + + max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1); + min = max - SMOOTHSTEP_NSTEPS; + + assert_u64_ge(sum, min, + "Integral too small, even accounting for truncation"); + assert_u64_le(sum, max, "Integral exceeds 1/2"); + if (false) { + malloc_printf("%"FMTu64" ulps under 1/2 (limit %d)\n", + max - sum, SMOOTHSTEP_NSTEPS); + } +} +TEST_END + +TEST_BEGIN(test_smoothstep_monotonic) +{ + uint64_t prev_h; + unsigned i; + + /* + * The smoothstep function is monotonic in [0..1], i.e. its slope is + * non-negative. In practice we want to parametrize table generation + * such that piecewise slope is greater than zero, but do not require + * that here. + */ + prev_h = 0; + for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) { + uint64_t h = smoothstep_tab[i]; + assert_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i); + prev_h = h; + } + assert_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1], + (KQU(1) << SMOOTHSTEP_BFP), "Last step must equal 1"); +} +TEST_END + +TEST_BEGIN(test_smoothstep_slope) +{ + uint64_t prev_h, prev_delta; + unsigned i; + + /* + * The smoothstep slope strictly increases until x=0.5, and then + * strictly decreases until x=1.0. Verify the slightly weaker + * requirement of monotonicity, so that inadequate table precision does + * not cause false test failures. + */ + prev_h = 0; + prev_delta = 0; + for (i = 0; i < SMOOTHSTEP_NSTEPS / 2 + SMOOTHSTEP_NSTEPS % 2; i++) { + uint64_t h = smoothstep_tab[i]; + uint64_t delta = h - prev_h; + assert_u64_ge(delta, prev_delta, + "Slope must monotonically increase in 0.0 <= x <= 0.5, " + "i=%u", i); + prev_h = h; + prev_delta = delta; + } + + prev_h = KQU(1) << SMOOTHSTEP_BFP; + prev_delta = 0; + for (i = SMOOTHSTEP_NSTEPS-1; i >= SMOOTHSTEP_NSTEPS / 2; i--) { + uint64_t h = smoothstep_tab[i]; + uint64_t delta = prev_h - h; + assert_u64_ge(delta, prev_delta, + "Slope must monotonically decrease in 0.5 <= x <= 1.0, " + "i=%u", i); + prev_h = h; + prev_delta = delta; + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_smoothstep_integral, + test_smoothstep_monotonic, + test_smoothstep_slope)); +} diff --git a/deps/jemalloc/test/unit/stats.c b/deps/jemalloc/test/unit/stats.c old mode 100644 new mode 100755 index 8e4bc631e..315717dfb --- a/deps/jemalloc/test/unit/stats.c +++ b/deps/jemalloc/test/unit/stats.c @@ -7,18 +7,18 @@ TEST_BEGIN(test_stats_summary) int expected = config_stats ? 0 : ENOENT; sz = sizeof(cactive); - assert_d_eq(mallctl("stats.cactive", &cactive, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.cactive", (void *)&cactive, &sz, NULL, 0), + expected, "Unexpected mallctl() result"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.allocated", &allocated, &sz, NULL, 0), + assert_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL, + 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.active", &active, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.resident", &resident, &sz, NULL, 0), + assert_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0), + expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.mapped", &mapped, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); if (config_stats) { assert_zu_le(active, *cactive, @@ -45,19 +45,19 @@ TEST_BEGIN(test_stats_huge) p = mallocx(large_maxclass+1, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), + 0, "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.allocated", (void *)&allocated, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz, NULL, - 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz, NULL, - 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.huge.nrequests", &nrequests, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", (void *)&nmalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", (void *)&ndalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.nrequests", (void *)&nrequests, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_zu_gt(allocated, 0, @@ -83,8 +83,8 @@ TEST_BEGIN(test_stats_arenas_summary) uint64_t npurge, nmadvise, purged; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, + sizeof(arena)), 0, "Unexpected mallctl() failure"); little = mallocx(SMALL_MAXCLASS, 0); assert_ptr_not_null(little, "Unexpected mallocx() failure"); @@ -93,22 +93,26 @@ TEST_BEGIN(test_stats_arenas_summary) huge = mallocx(chunksize, 0); assert_ptr_not_null(huge, "Unexpected mallocx() failure"); + dallocx(little, 0); + dallocx(large, 0); + dallocx(huge, 0); + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, "Unexpected mallctl() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), + 0, "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.mapped", &mapped, &sz, NULL, 0), - expected, "Unexepected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL, + 0), expected, "Unexepected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge, &sz, NULL, 0), - expected, "Unexepected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.nmadvise", &nmadvise, &sz, NULL, 0), - expected, "Unexepected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.purged", &purged, &sz, NULL, 0), - expected, "Unexepected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge, &sz, NULL, + 0), expected, "Unexepected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.nmadvise", (void *)&nmadvise, &sz, + NULL, 0), expected, "Unexepected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.purged", (void *)&purged, &sz, NULL, + 0), expected, "Unexepected mallctl() result"); if (config_stats) { assert_u64_gt(npurge, 0, @@ -116,10 +120,6 @@ TEST_BEGIN(test_stats_arenas_summary) assert_u64_le(nmadvise, purged, "nmadvise should be no greater than purged"); } - - dallocx(little, 0); - dallocx(large, 0); - dallocx(huge, 0); } TEST_END @@ -150,8 +150,8 @@ TEST_BEGIN(test_stats_arenas_small) no_lazy_lock(); /* Lazy locking would dodge tcache testing. */ arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, + sizeof(arena)), 0, "Unexpected mallctl() failure"); p = mallocx(SMALL_MAXCLASS, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); @@ -159,19 +159,21 @@ TEST_BEGIN(test_stats_arenas_small) assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), config_tcache ? 0 : ENOENT, "Unexpected mallctl() result"); - assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), + 0, "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.small.allocated", &allocated, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.small.allocated", + (void *)&allocated, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", &nmalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", &ndalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.small.nrequests", &nrequests, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.small.nrequests", + (void *)&nrequests, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); if (config_stats) { assert_zu_gt(allocated, 0, @@ -197,34 +199,36 @@ TEST_BEGIN(test_stats_arenas_large) int expected = config_stats ? 0 : ENOENT; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, + sizeof(arena)), 0, "Unexpected mallctl() failure"); p = mallocx(large_maxclass, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), + 0, "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.large.allocated", &allocated, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.large.allocated", + (void *)&allocated, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", &nmalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.large.nrequests", &nrequests, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.large.nrequests", + (void *)&nrequests, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); if (config_stats) { assert_zu_gt(allocated, 0, "allocated should be greater than zero"); - assert_zu_gt(nmalloc, 0, + assert_u64_gt(nmalloc, 0, "nmalloc should be greater than zero"); - assert_zu_ge(nmalloc, ndalloc, + assert_u64_ge(nmalloc, ndalloc, "nmalloc should be at least as large as ndalloc"); - assert_zu_gt(nrequests, 0, + assert_u64_gt(nrequests, 0, "nrequests should be greater than zero"); } @@ -241,30 +245,30 @@ TEST_BEGIN(test_stats_arenas_huge) int expected = config_stats ? 0 : ENOENT; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, + sizeof(arena)), 0, "Unexpected mallctl() failure"); p = mallocx(chunksize, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), + 0, "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.allocated", (void *)&allocated, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", (void *)&nmalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", (void *)&ndalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_zu_gt(allocated, 0, "allocated should be greater than zero"); - assert_zu_gt(nmalloc, 0, + assert_u64_gt(nmalloc, 0, "nmalloc should be greater than zero"); - assert_zu_ge(nmalloc, ndalloc, + assert_u64_ge(nmalloc, ndalloc, "nmalloc should be at least as large as ndalloc"); } @@ -282,8 +286,8 @@ TEST_BEGIN(test_stats_arenas_bins) int expected = config_stats ? 0 : ENOENT; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, + sizeof(arena)), 0, "Unexpected mallctl() failure"); p = mallocx(arena_bin_info[0].reg_size, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); @@ -291,35 +295,36 @@ TEST_BEGIN(test_stats_arenas_bins) assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), config_tcache ? 0 : ENOENT, "Unexpected mallctl() result"); - assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), + 0, "Unexpected mallctl() failure"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", &ndalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests", &nrequests, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", (void *)&nmalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", (void *)&ndalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests", + (void *)&nrequests, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", &curregs, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", (void *)&curregs, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", &nfills, &sz, - NULL, 0), config_tcache ? expected : ENOENT, + assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", (void *)&nfills, + &sz, NULL, 0), config_tcache ? expected : ENOENT, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", &nflushes, &sz, - NULL, 0), config_tcache ? expected : ENOENT, + assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", (void *)&nflushes, + &sz, NULL, 0), config_tcache ? expected : ENOENT, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nruns", &nruns, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nreruns", &nreruns, &sz, + assert_d_eq(mallctl("stats.arenas.0.bins.0.nruns", (void *)&nruns, &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nreruns", (void *)&nreruns, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.bins.0.curruns", &curruns, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.curruns", (void *)&curruns, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_u64_gt(nmalloc, 0, @@ -355,25 +360,26 @@ TEST_BEGIN(test_stats_arenas_lruns) int expected = config_stats ? 0 : ENOENT; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, + sizeof(arena)), 0, "Unexpected mallctl() failure"); p = mallocx(LARGE_MINCLASS, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), + 0, "Unexpected mallctl() failure"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.lruns.0.nmalloc", &nmalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.lruns.0.ndalloc", &ndalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.lruns.0.nrequests", &nrequests, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.nmalloc", (void *)&nmalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.ndalloc", (void *)&ndalloc, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.nrequests", + (void *)&nrequests, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.lruns.0.curruns", &curruns, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.curruns", (void *)&curruns, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_u64_gt(nmalloc, 0, @@ -399,23 +405,26 @@ TEST_BEGIN(test_stats_arenas_hchunks) int expected = config_stats ? 0 : ENOENT; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, + sizeof(arena)), 0, "Unexpected mallctl() failure"); p = mallocx(chunksize, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), + 0, "Unexpected mallctl() failure"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.hchunks.0.nmalloc", &nmalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.hchunks.0.ndalloc", &ndalloc, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.hchunks.0.nmalloc", + (void *)&nmalloc, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.hchunks.0.ndalloc", + (void *)&ndalloc, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.hchunks.0.curhchunks", &curhchunks, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.hchunks.0.curhchunks", + (void *)&curhchunks, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); if (config_stats) { assert_u64_gt(nmalloc, 0, diff --git a/deps/jemalloc/test/unit/ticker.c b/deps/jemalloc/test/unit/ticker.c new file mode 100644 index 000000000..e737020ab --- /dev/null +++ b/deps/jemalloc/test/unit/ticker.c @@ -0,0 +1,76 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_ticker_tick) +{ +#define NREPS 2 +#define NTICKS 3 + ticker_t ticker; + int32_t i, j; + + ticker_init(&ticker, NTICKS); + for (i = 0; i < NREPS; i++) { + for (j = 0; j < NTICKS; j++) { + assert_u_eq(ticker_read(&ticker), NTICKS - j, + "Unexpected ticker value (i=%d, j=%d)", i, j); + assert_false(ticker_tick(&ticker), + "Unexpected ticker fire (i=%d, j=%d)", i, j); + } + assert_u32_eq(ticker_read(&ticker), 0, + "Expected ticker depletion"); + assert_true(ticker_tick(&ticker), + "Expected ticker fire (i=%d)", i); + assert_u32_eq(ticker_read(&ticker), NTICKS, + "Expected ticker reset"); + } +#undef NTICKS +} +TEST_END + +TEST_BEGIN(test_ticker_ticks) +{ +#define NTICKS 3 + ticker_t ticker; + + ticker_init(&ticker, NTICKS); + + assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); + assert_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire"); + assert_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value"); + assert_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire"); + assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); + + assert_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire"); + assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); +#undef NTICKS +} +TEST_END + +TEST_BEGIN(test_ticker_copy) +{ +#define NTICKS 3 + ticker_t ta, tb; + + ticker_init(&ta, NTICKS); + ticker_copy(&tb, &ta); + assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); + assert_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire"); + assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); + + ticker_tick(&ta); + ticker_copy(&tb, &ta); + assert_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value"); + assert_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire"); + assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); +#undef NTICKS +} +TEST_END + +int +main(void) +{ + + return (test( + test_ticker_tick, + test_ticker_ticks, + test_ticker_copy)); +} diff --git a/deps/jemalloc/test/unit/tsd.c b/deps/jemalloc/test/unit/tsd.c index 8be787fda..d5f96ac36 100644 --- a/deps/jemalloc/test/unit/tsd.c +++ b/deps/jemalloc/test/unit/tsd.c @@ -58,18 +58,18 @@ thd_start(void *arg) data_t d = (data_t)(uintptr_t)arg; void *p; - assert_x_eq(*data_tsd_get(), DATA_INIT, + assert_x_eq(*data_tsd_get(true), DATA_INIT, "Initial tsd get should return initialization value"); p = malloc(1); assert_ptr_not_null(p, "Unexpected malloc() failure"); data_tsd_set(&d); - assert_x_eq(*data_tsd_get(), d, + assert_x_eq(*data_tsd_get(true), d, "After tsd set, tsd get should return value that was set"); d = 0; - assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg, + assert_x_eq(*data_tsd_get(true), (data_t)(uintptr_t)arg, "Resetting local data should have no effect on tsd"); free(p); @@ -79,7 +79,7 @@ thd_start(void *arg) TEST_BEGIN(test_tsd_main_thread) { - thd_start((void *) 0xa5f3e329); + thd_start((void *)(uintptr_t)0xa5f3e329); } TEST_END @@ -99,6 +99,11 @@ int main(void) { + /* Core tsd bootstrapping must happen prior to data_tsd_boot(). */ + if (nallocx(1, 0) == 0) { + malloc_printf("Initialization error"); + return (test_status_fail); + } data_tsd_boot(); return (test( diff --git a/deps/jemalloc/test/unit/util.c b/deps/jemalloc/test/unit/util.c index 8ab39a458..b1f9abd9b 100644 --- a/deps/jemalloc/test/unit/util.c +++ b/deps/jemalloc/test/unit/util.c @@ -1,33 +1,54 @@ #include "test/jemalloc_test.h" -TEST_BEGIN(test_pow2_ceil) +#define TEST_POW2_CEIL(t, suf, pri) do { \ + unsigned i, pow2; \ + t x; \ + \ + assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result"); \ + \ + for (i = 0; i < sizeof(t) * 8; i++) { \ + assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) \ + << i, "Unexpected result"); \ + } \ + \ + for (i = 2; i < sizeof(t) * 8; i++) { \ + assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1), \ + ((t)1) << i, "Unexpected result"); \ + } \ + \ + for (i = 0; i < sizeof(t) * 8 - 1; i++) { \ + assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1), \ + ((t)1) << (i+1), "Unexpected result"); \ + } \ + \ + for (pow2 = 1; pow2 < 25; pow2++) { \ + for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2; \ + x++) { \ + assert_##suf##_eq(pow2_ceil_##suf(x), \ + ((t)1) << pow2, \ + "Unexpected result, x=%"pri, x); \ + } \ + } \ +} while (0) + +TEST_BEGIN(test_pow2_ceil_u64) { - unsigned i, pow2; - size_t x; - assert_zu_eq(pow2_ceil(0), 0, "Unexpected result"); + TEST_POW2_CEIL(uint64_t, u64, FMTu64); +} +TEST_END - for (i = 0; i < sizeof(size_t) * 8; i++) { - assert_zu_eq(pow2_ceil(ZU(1) << i), ZU(1) << i, - "Unexpected result"); - } +TEST_BEGIN(test_pow2_ceil_u32) +{ - for (i = 2; i < sizeof(size_t) * 8; i++) { - assert_zu_eq(pow2_ceil((ZU(1) << i) - 1), ZU(1) << i, - "Unexpected result"); - } + TEST_POW2_CEIL(uint32_t, u32, FMTu32); +} +TEST_END - for (i = 0; i < sizeof(size_t) * 8 - 1; i++) { - assert_zu_eq(pow2_ceil((ZU(1) << i) + 1), ZU(1) << (i+1), - "Unexpected result"); - } +TEST_BEGIN(test_pow2_ceil_zu) +{ - for (pow2 = 1; pow2 < 25; pow2++) { - for (x = (ZU(1) << (pow2-1)) + 1; x <= ZU(1) << pow2; x++) { - assert_zu_eq(pow2_ceil(x), ZU(1) << pow2, - "Unexpected result, x=%zu", x); - } - } + TEST_POW2_CEIL(size_t, zu, "zu"); } TEST_END @@ -54,6 +75,7 @@ TEST_BEGIN(test_malloc_strtoumax) }; #define ERR(e) e, #e #define KUMAX(x) ((uintmax_t)x##ULL) +#define KSMAX(x) ((uintmax_t)(intmax_t)x##LL) struct test_s tests[] = { {"0", "0", -1, ERR(EINVAL), UINTMAX_MAX}, {"0", "0", 1, ERR(EINVAL), UINTMAX_MAX}, @@ -66,13 +88,13 @@ TEST_BEGIN(test_malloc_strtoumax) {"42", "", 0, ERR(0), KUMAX(42)}, {"+42", "", 0, ERR(0), KUMAX(42)}, - {"-42", "", 0, ERR(0), KUMAX(-42)}, + {"-42", "", 0, ERR(0), KSMAX(-42)}, {"042", "", 0, ERR(0), KUMAX(042)}, {"+042", "", 0, ERR(0), KUMAX(042)}, - {"-042", "", 0, ERR(0), KUMAX(-042)}, + {"-042", "", 0, ERR(0), KSMAX(-042)}, {"0x42", "", 0, ERR(0), KUMAX(0x42)}, {"+0x42", "", 0, ERR(0), KUMAX(0x42)}, - {"-0x42", "", 0, ERR(0), KUMAX(-0x42)}, + {"-0x42", "", 0, ERR(0), KSMAX(-0x42)}, {"0", "", 0, ERR(0), KUMAX(0)}, {"1", "", 0, ERR(0), KUMAX(1)}, @@ -109,6 +131,7 @@ TEST_BEGIN(test_malloc_strtoumax) }; #undef ERR #undef KUMAX +#undef KSMAX unsigned i; for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) { @@ -139,14 +162,14 @@ TEST_BEGIN(test_malloc_snprintf_truncated) { #define BUFLEN 15 char buf[BUFLEN]; - int result; + size_t result; size_t len; -#define TEST(expected_str_untruncated, ...) do { \ +#define TEST(expected_str_untruncated, ...) do { \ result = malloc_snprintf(buf, len, __VA_ARGS__); \ assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0, \ "Unexpected string inequality (\"%s\" vs \"%s\")", \ - buf, expected_str_untruncated); \ - assert_d_eq(result, strlen(expected_str_untruncated), \ + buf, expected_str_untruncated); \ + assert_zu_eq(result, strlen(expected_str_untruncated), \ "Unexpected result"); \ } while (0) @@ -172,11 +195,11 @@ TEST_BEGIN(test_malloc_snprintf) { #define BUFLEN 128 char buf[BUFLEN]; - int result; + size_t result; #define TEST(expected_str, ...) do { \ result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__); \ assert_str_eq(buf, expected_str, "Unexpected output"); \ - assert_d_eq(result, strlen(expected_str), "Unexpected result"); \ + assert_zu_eq(result, strlen(expected_str), "Unexpected result");\ } while (0) TEST("hello", "hello"); @@ -286,7 +309,9 @@ main(void) { return (test( - test_pow2_ceil, + test_pow2_ceil_u64, + test_pow2_ceil_u32, + test_pow2_ceil_zu, test_malloc_strtoumax_no_endptr, test_malloc_strtoumax, test_malloc_snprintf_truncated, diff --git a/deps/jemalloc/test/unit/witness.c b/deps/jemalloc/test/unit/witness.c new file mode 100644 index 000000000..ed172753c --- /dev/null +++ b/deps/jemalloc/test/unit/witness.c @@ -0,0 +1,278 @@ +#include "test/jemalloc_test.h" + +static witness_lock_error_t *witness_lock_error_orig; +static witness_owner_error_t *witness_owner_error_orig; +static witness_not_owner_error_t *witness_not_owner_error_orig; +static witness_lockless_error_t *witness_lockless_error_orig; + +static bool saw_lock_error; +static bool saw_owner_error; +static bool saw_not_owner_error; +static bool saw_lockless_error; + +static void +witness_lock_error_intercept(const witness_list_t *witnesses, + const witness_t *witness) +{ + + saw_lock_error = true; +} + +static void +witness_owner_error_intercept(const witness_t *witness) +{ + + saw_owner_error = true; +} + +static void +witness_not_owner_error_intercept(const witness_t *witness) +{ + + saw_not_owner_error = true; +} + +static void +witness_lockless_error_intercept(const witness_list_t *witnesses) +{ + + saw_lockless_error = true; +} + +static int +witness_comp(const witness_t *a, const witness_t *b) +{ + + assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank"); + + return (strcmp(a->name, b->name)); +} + +static int +witness_comp_reverse(const witness_t *a, const witness_t *b) +{ + + assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank"); + + return (-strcmp(a->name, b->name)); +} + +TEST_BEGIN(test_witness) +{ + witness_t a, b; + tsdn_t *tsdn; + + test_skip_if(!config_debug); + + tsdn = tsdn_fetch(); + + witness_assert_lockless(tsdn); + + witness_init(&a, "a", 1, NULL); + witness_assert_not_owner(tsdn, &a); + witness_lock(tsdn, &a); + witness_assert_owner(tsdn, &a); + + witness_init(&b, "b", 2, NULL); + witness_assert_not_owner(tsdn, &b); + witness_lock(tsdn, &b); + witness_assert_owner(tsdn, &b); + + witness_unlock(tsdn, &a); + witness_unlock(tsdn, &b); + + witness_assert_lockless(tsdn); +} +TEST_END + +TEST_BEGIN(test_witness_comp) +{ + witness_t a, b, c, d; + tsdn_t *tsdn; + + test_skip_if(!config_debug); + + tsdn = tsdn_fetch(); + + witness_assert_lockless(tsdn); + + witness_init(&a, "a", 1, witness_comp); + witness_assert_not_owner(tsdn, &a); + witness_lock(tsdn, &a); + witness_assert_owner(tsdn, &a); + + witness_init(&b, "b", 1, witness_comp); + witness_assert_not_owner(tsdn, &b); + witness_lock(tsdn, &b); + witness_assert_owner(tsdn, &b); + witness_unlock(tsdn, &b); + + witness_lock_error_orig = witness_lock_error; + witness_lock_error = witness_lock_error_intercept; + saw_lock_error = false; + + witness_init(&c, "c", 1, witness_comp_reverse); + witness_assert_not_owner(tsdn, &c); + assert_false(saw_lock_error, "Unexpected witness lock error"); + witness_lock(tsdn, &c); + assert_true(saw_lock_error, "Expected witness lock error"); + witness_unlock(tsdn, &c); + + saw_lock_error = false; + + witness_init(&d, "d", 1, NULL); + witness_assert_not_owner(tsdn, &d); + assert_false(saw_lock_error, "Unexpected witness lock error"); + witness_lock(tsdn, &d); + assert_true(saw_lock_error, "Expected witness lock error"); + witness_unlock(tsdn, &d); + + witness_unlock(tsdn, &a); + + witness_assert_lockless(tsdn); + + witness_lock_error = witness_lock_error_orig; +} +TEST_END + +TEST_BEGIN(test_witness_reversal) +{ + witness_t a, b; + tsdn_t *tsdn; + + test_skip_if(!config_debug); + + witness_lock_error_orig = witness_lock_error; + witness_lock_error = witness_lock_error_intercept; + saw_lock_error = false; + + tsdn = tsdn_fetch(); + + witness_assert_lockless(tsdn); + + witness_init(&a, "a", 1, NULL); + witness_init(&b, "b", 2, NULL); + + witness_lock(tsdn, &b); + assert_false(saw_lock_error, "Unexpected witness lock error"); + witness_lock(tsdn, &a); + assert_true(saw_lock_error, "Expected witness lock error"); + + witness_unlock(tsdn, &a); + witness_unlock(tsdn, &b); + + witness_assert_lockless(tsdn); + + witness_lock_error = witness_lock_error_orig; +} +TEST_END + +TEST_BEGIN(test_witness_recursive) +{ + witness_t a; + tsdn_t *tsdn; + + test_skip_if(!config_debug); + + witness_not_owner_error_orig = witness_not_owner_error; + witness_not_owner_error = witness_not_owner_error_intercept; + saw_not_owner_error = false; + + witness_lock_error_orig = witness_lock_error; + witness_lock_error = witness_lock_error_intercept; + saw_lock_error = false; + + tsdn = tsdn_fetch(); + + witness_assert_lockless(tsdn); + + witness_init(&a, "a", 1, NULL); + + witness_lock(tsdn, &a); + assert_false(saw_lock_error, "Unexpected witness lock error"); + assert_false(saw_not_owner_error, "Unexpected witness not owner error"); + witness_lock(tsdn, &a); + assert_true(saw_lock_error, "Expected witness lock error"); + assert_true(saw_not_owner_error, "Expected witness not owner error"); + + witness_unlock(tsdn, &a); + + witness_assert_lockless(tsdn); + + witness_owner_error = witness_owner_error_orig; + witness_lock_error = witness_lock_error_orig; + +} +TEST_END + +TEST_BEGIN(test_witness_unlock_not_owned) +{ + witness_t a; + tsdn_t *tsdn; + + test_skip_if(!config_debug); + + witness_owner_error_orig = witness_owner_error; + witness_owner_error = witness_owner_error_intercept; + saw_owner_error = false; + + tsdn = tsdn_fetch(); + + witness_assert_lockless(tsdn); + + witness_init(&a, "a", 1, NULL); + + assert_false(saw_owner_error, "Unexpected owner error"); + witness_unlock(tsdn, &a); + assert_true(saw_owner_error, "Expected owner error"); + + witness_assert_lockless(tsdn); + + witness_owner_error = witness_owner_error_orig; +} +TEST_END + +TEST_BEGIN(test_witness_lockful) +{ + witness_t a; + tsdn_t *tsdn; + + test_skip_if(!config_debug); + + witness_lockless_error_orig = witness_lockless_error; + witness_lockless_error = witness_lockless_error_intercept; + saw_lockless_error = false; + + tsdn = tsdn_fetch(); + + witness_assert_lockless(tsdn); + + witness_init(&a, "a", 1, NULL); + + assert_false(saw_lockless_error, "Unexpected lockless error"); + witness_assert_lockless(tsdn); + + witness_lock(tsdn, &a); + witness_assert_lockless(tsdn); + assert_true(saw_lockless_error, "Expected lockless error"); + + witness_unlock(tsdn, &a); + + witness_assert_lockless(tsdn); + + witness_lockless_error = witness_lockless_error_orig; +} +TEST_END + +int +main(void) +{ + + return (test( + test_witness, + test_witness_comp, + test_witness_reversal, + test_witness_recursive, + test_witness_unlock_not_owned, + test_witness_lockful)); +} diff --git a/deps/jemalloc/test/unit/zero.c b/deps/jemalloc/test/unit/zero.c index 93afc2b87..30ebe37a4 100644 --- a/deps/jemalloc/test/unit/zero.c +++ b/deps/jemalloc/test/unit/zero.c @@ -8,39 +8,41 @@ const char *malloc_conf = static void test_zero(size_t sz_min, size_t sz_max) { - char *s; + uint8_t *s; size_t sz_prev, sz, i; +#define MAGIC ((uint8_t)0x61) sz_prev = 0; - s = (char *)mallocx(sz_min, 0); + s = (uint8_t *)mallocx(sz_min, 0); assert_ptr_not_null((void *)s, "Unexpected mallocx() failure"); for (sz = sallocx(s, 0); sz <= sz_max; sz_prev = sz, sz = sallocx(s, 0)) { if (sz_prev > 0) { - assert_c_eq(s[0], 'a', + assert_u_eq(s[0], MAGIC, "Previously allocated byte %zu/%zu is corrupted", ZU(0), sz_prev); - assert_c_eq(s[sz_prev-1], 'a', + assert_u_eq(s[sz_prev-1], MAGIC, "Previously allocated byte %zu/%zu is corrupted", sz_prev-1, sz_prev); } for (i = sz_prev; i < sz; i++) { - assert_c_eq(s[i], 0x0, + assert_u_eq(s[i], 0x0, "Newly allocated byte %zu/%zu isn't zero-filled", i, sz); - s[i] = 'a'; + s[i] = MAGIC; } if (xallocx(s, sz+1, 0, 0) == sz) { - s = (char *)rallocx(s, sz+1, 0); + s = (uint8_t *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); } } dallocx(s, 0); +#undef MAGIC } TEST_BEGIN(test_zero_small) From 5bd957844538906ccb489ec3208e484362071439 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 27 Jan 2017 16:54:20 +0100 Subject: [PATCH 0263/1722] ziplist: better comments, some refactoring. --- src/ziplist.c | 350 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 250 insertions(+), 100 deletions(-) diff --git a/src/ziplist.c b/src/ziplist.c index f270cdbf9..81d23ca38 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -9,78 +9,149 @@ * ---------------------------------------------------------------------------- * * ZIPLIST OVERALL LAYOUT + * ====================== * * The general layout of the ziplist is as follows: * * ... * - * All fields are stored in little endian. + * NOTE: all fields are stored in little endian, if not specified otherwise. * * is an unsigned integer to hold the number of bytes that - * the ziplist occupies. This value needs to be stored to be able to resize the - * entire structure without the need to traverse it first. + * the ziplist occupies, including the four bytes of the zlbytes field itself. + * This value needs to be stored to be able to resize the entire structure + * without the need to traverse it first. * * is the offset to the last entry in the list. This allows * a pop operation on the far side of the list without the need for full * traversal. * - * is the number of entries. When this value is larger - * than 2^16-2, we need to traverse the entire list to know how many items it - * holds. + * is the number of entries. When there are more than + * 2^16-2 entires, this value is set to 2^16-1 and we need to traverse the + * entire list to know how many items it holds. * - * is a single byte special value, equal to 255, which - * indicates the end of the list. + * is a special entry representing the end of the ziplist. + * Is encoded as a single byte equal to 255. No other normal entry starts + * with a byte set to the value of 255. * * ZIPLIST ENTRIES + * =============== * - * Every entry in the ziplist is prefixed by a header that contains two pieces + * Every entry in the ziplist is prefixed by metadata that contains two pieces * of information. First, the length of the previous entry is stored to be - * able to traverse the list from back to front. Second, the encoding with an - * optional string length of the entry itself is stored. + * able to traverse the list from back to front. Second, the entry encoding is + * provided. It represents the entry type, integer or string, and in the case + * of strings it also represents the length of the string payload. + * So a complete entry is stored like this: * - * The length of the previous entry is encoded in the following way: - * If this length is smaller than 254 bytes, it will only consume a single - * byte that takes the length as value. When the length is greater than or - * equal to 254, it will consume 5 bytes. The first byte is set to 254 to - * indicate a larger value is following. The remaining 4 bytes take the - * length of the previous entry as value. + * * - * The other header field of the entry itself depends on the contents of the - * entry. When the entry is a string, the first 2 bits of this header will hold - * the type of encoding used to store the length of the string, followed by the - * actual length of the string. When the entry is an integer the first 2 bits - * are both set to 1. The following 2 bits are used to specify what kind of - * integer will be stored after this header. An overview of the different - * types and encodings is as follows: + * Sometimes the encoding represents the entry itself, like for small integers + * as we'll see later. In such a case the part is missing, and we + * could have just: + * + * + * + * The length of the previous entry, , is encoded in the following way: + * If this length is smaller than 255 bytes, it will only consume a single + * byte representing the length as an unsinged 8 bit integer. When the length + * is greater than or equal to 255, it will consume 5 bytes. The first byte is + * set to 255 (FF) to indicate a larger value is following. The remaining 4 + * bytes take the length of the previous entry as value. + * + * So practically an entry is encoded in the following way: + * + * + * + * Or alternatively if the previous entry length is greater than 254 bytes + * the following encoding is used: + * + * 0xFF <4 bytes unsigned little endian prevlen> + * + * The encoding field of the entry depends on the content of the + * entry. When the entry is a string, the first 2 bits of the encoding first + * byte will hold the type of encoding used to store the length of the string, + * followed by the actual length of the string. When the entry is an integer + * the first 2 bits are both set to 1. The following 2 bits are used to specify + * what kind of integer will be stored after this header. An overview of the + * different types and encodings is as follows. The first byte is always enough + * to determine the kind of entry. * * |00pppppp| - 1 byte * String value with length less than or equal to 63 bytes (6 bits). + * "pppppp" represents the unsigned 6 bit length. * |01pppppp|qqqqqqqq| - 2 bytes * String value with length less than or equal to 16383 bytes (14 bits). - * |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes + * IMPORTANT: The 14 bit number is stored in big endian. + * |10000000|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes * String value with length greater than or equal to 16384 bytes. - * |11000000| - 1 byte + * Only the 4 bytes following the first byte represents the length + * up to 32^2-1. The 6 lower bits of the first byte are not used and + * are set to zero. + * IMPORTANT: The 32 bit number is stored in big endian. + * |11000000| - 3 bytes * Integer encoded as int16_t (2 bytes). - * |11010000| - 1 byte + * |11010000| - 5 bytes * Integer encoded as int32_t (4 bytes). - * |11100000| - 1 byte + * |11100000| - 9 bytes * Integer encoded as int64_t (8 bytes). - * |11110000| - 1 byte + * |11110000| - 4 bytes * Integer encoded as 24 bit signed (3 bytes). - * |11111110| - 1 byte + * |11111110| - 2 bytes * Integer encoded as 8 bit signed (1 byte). * |1111xxxx| - (with xxxx between 0000 and 1101) immediate 4 bit integer. * Unsigned integer from 0 to 12. The encoded value is actually from * 1 to 13 because 0000 and 1111 can not be used, so 1 should be * subtracted from the encoded 4 bit value to obtain the right value. - * |11111111| - End of ziplist. + * |11111111| - End of ziplist special entry. * - * All the integers are represented in little endian byte order. + * Like for the ziplist header, all the integers are represented in little + * endian byte order, even when this code is compiled in big endian systems. + * + * EXAMPLES OF ACTUAL ZIPLISTS + * =========================== + * + * The following is a ziplist containing the two elements representing + * the strings "2" and "5". It is composed of 15 bytes, that we visually + * split into sections: + * + * [0f 00 00 00] [0c 00 00 00] [02 00] [00 f3] [02 f6] [ff] + * | | | | | | + * zlbytes zltail entries "2" "5" end + * + * The first 4 bytes represent the number 15, that is the number of bytes + * the whole ziplist is composed of. The second 4 bytes are the offset + * at which the last ziplist entry is found, that is 12, in fact the + * last entry, that is "5", is at offset 12 inside the ziplist. + * The next 16 bit integer represents the number of elements inside the + * ziplist, its value is 2 since there are just two elements inside. + * Finally "00 f3" is the first entry representing the number 2. It is + * composed of the previous entry length, which is zero because this is + * our first entry, and the byte F3 which corresponds to the encoding + * |1111xxxx| with xxxx between 0001 and 1101. We need to remove the "F" + * higher order bits 1111, and subtract 1 from the "3", so the entry value + * is "2". The next entry has a prevlen of 02, since the first entry is + * composed of exactly two bytes. The entry itself, F6, is encoded exactly + * like the first entry, and 6-1 = 5, so the value of the entry is 5. + * Finally the special entry FF signals the end of the ziplist. + * + * Adding another element to the above string with the value "Hello World" + * allows us to show how the ziplist encodes small strings. We'll just show + * the hex dump of the entry itself. Imagine the bytes as following the + * entry that stores "5" in the ziplist above: + * + * [02] [0b] [48 65 6c 6c 6f 20 57 6f 72 6c 64] + * + * The first byte, 02, is the length of the previous entry. The next + * byte represents the encoding in the pattern |00pppppp| that means + * that the entry is a string of length , so 0B means that + * an 11 bytes string follows. From the third byte (48) to the last (64) + * there are just the ASCII characters for "Hello World". * * ---------------------------------------------------------------------------- * * Copyright (c) 2009-2012, Pieter Noordhuis - * Copyright (c) 2009-2012, Salvatore Sanfilippo + * Copyright (c) 2009-2017, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -119,8 +190,13 @@ #include "endianconv.h" #include "redisassert.h" -#define ZIP_END 255 -#define ZIP_BIGLEN 254 +#define ZIP_END 255 /* Special "end of ziplist" entry. */ +#define ZIP_BIG_PREVLEN 254 /* Max number of bytes of the previous entry, for + the "prevlen" field prefixing each entry, to be + represented with just a single byte. Otherwise + it is represented as FF AA BB CC DD, where + AA BB CC DD are a 4 bytes unsigned integer + representing the previous entry len. */ /* Different encoding/length possibilities */ #define ZIP_STR_MASK 0xc0 @@ -133,41 +209,83 @@ #define ZIP_INT_64B (0xc0 | 2<<4) #define ZIP_INT_24B (0xc0 | 3<<4) #define ZIP_INT_8B 0xfe -/* 4 bit integer immediate encoding */ -#define ZIP_INT_IMM_MASK 0x0f + +/* 4 bit integer immediate encoding |1111xxxx| with xxxx between + * 0001 and 1101. */ +#define ZIP_INT_IMM_MASK 0x0f /* Mask to extract the 4 bits value. To add + one is needed to reconstruct the value. */ #define ZIP_INT_IMM_MIN 0xf1 /* 11110001 */ #define ZIP_INT_IMM_MAX 0xfd /* 11111101 */ -#define ZIP_INT_IMM_VAL(v) (v & ZIP_INT_IMM_MASK) #define INT24_MAX 0x7fffff #define INT24_MIN (-INT24_MAX - 1) -/* Macro to determine type */ +/* Macro to determine if the entry is a string. String entries never start + * with "11" as most significant bits of the first byte. */ #define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK) -/* Utility macros */ +/* Utility macros.*/ + +/* Return total bytes a ziplist is composed of. */ #define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl))) + +/* Return the offset of the last item inside the ziplist. */ #define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t)))) + +/* Return the length of a ziplist, or UINT16_MAX if the length cannot be + * determined without scanning the whole ziplist. */ #define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2))) + +/* The size of a ziplist header: two 32 bit integers for the total + * bytes count and last item offset. One 16 bit integer for the number + * of items field. */ #define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t)) + +/* Size of the "end of ziplist" entry. Just one byte. */ #define ZIPLIST_END_SIZE (sizeof(uint8_t)) + +/* Return the pointer to the first entry of a ziplist. */ #define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE) + +/* Return the pointer to the last entry of a ziplist, using the + * last entry offset inside the ziplist header. */ #define ZIPLIST_ENTRY_TAIL(zl) ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) + +/* Return the pointer to the last byte of a ziplist, which is, the + * end of ziplist FF entry. */ #define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-1) -/* We know a positive increment can only be 1 because entries can only be - * pushed one at a time. */ +/* Increment the number of items field in the ziplist header. Note that this + * macro should never overflow the unsigned 16 bit integer, since entires are + * always pushed one at a time. When UINT16_MAX is reached we want the count + * to stay there to signal that a full scan is needed to get the number of + * items inside the ziplist. */ #define ZIPLIST_INCR_LENGTH(zl,incr) { \ if (ZIPLIST_LENGTH(zl) < UINT16_MAX) \ ZIPLIST_LENGTH(zl) = intrev16ifbe(intrev16ifbe(ZIPLIST_LENGTH(zl))+incr); \ } +/* We use this function to receive information about a ziplist entry. + * Note that this is not how the data is actually encoded, is just what we + * get filled by a function in order to operate more easily. */ typedef struct zlentry { - unsigned int prevrawlensize, prevrawlen; - unsigned int lensize, len; - unsigned int headersize; - unsigned char encoding; - unsigned char *p; + unsigned int prevrawlensize; /* Bytes used to encode the previos entry len*/ + unsigned int prevrawlen; /* Previous entry len. */ + unsigned int lensize; /* Bytes used to encode this entry type/len. + For example strings have a 1, 2 or 5 bytes + header. Integers always use a single byte.*/ + unsigned int len; /* Bytes used to represent the actual entry. + For strings this is just the string length + while for integers it is 1, 2, 3, 4, 8 or + 0 (for 4 bit immediate) depending on the + number range. */ + unsigned int headersize; /* prevrawlensize + lensize. */ + unsigned char encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on + the entry encoding. However for 4 bits + immediate integers this can assume a range + of values and must be range-checked. */ + unsigned char *p; /* Pointer to the very start of the entry, that + is, this points to prev-entry-len field. */ } zlentry; #define ZIPLIST_ENTRY_ZERO(zle) { \ @@ -178,15 +296,13 @@ typedef struct zlentry { } /* Extract the encoding from the byte pointed by 'ptr' and set it into - * 'encoding'. */ + * 'encoding' field of the zlentry structure. */ #define ZIP_ENTRY_ENCODING(ptr, encoding) do { \ (encoding) = (ptr[0]); \ if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \ } while(0) -void ziplistRepr(unsigned char *zl); - -/* Return bytes needed to store integer encoded by 'encoding' */ +/* Return bytes needed to store integer encoded by 'encoding'. */ unsigned int zipIntSize(unsigned char encoding) { switch(encoding) { case ZIP_INT_8B: return 1; @@ -194,15 +310,26 @@ unsigned int zipIntSize(unsigned char encoding) { case ZIP_INT_24B: return 3; case ZIP_INT_32B: return 4; case ZIP_INT_64B: return 8; - default: return 0; /* 4 bit immediate */ } - assert(NULL); + if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) + return 0; /* 4 bit immediate */ + panic("Invalid integer encoding 0x%02X", encoding); return 0; } -/* Encode the length 'rawlen' writing it in 'p'. If p is NULL it just returns - * the amount of bytes required to encode such a length. */ -unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) { +/* Write the encoidng header of the entry in 'p'. If p is NULL it just returns + * the amount of bytes required to encode such a length. Arguments: + * + * 'encoding' is the encoding we are using for the entry. It could be + * ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX + * for single-byte small immediate integers. + * + * 'rawlen' is only used for ZIP_STR_* encodings and is the length of the + * srting that this entry represents. + * + * The function returns the number of bytes used by the encoding/length + * header stored in 'p'. */ +unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) { unsigned char len = 1, buf[5]; if (ZIP_IS_STR(encoding)) { @@ -231,15 +358,16 @@ unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned buf[0] = encoding; } - /* Store this length at p */ + /* Store this length at p. */ memcpy(p,buf,len); return len; } -/* Decode the length encoded in 'ptr'. The 'encoding' variable will hold the - * entries encoding, the 'lensize' variable will hold the number of bytes - * required to encode the entries length, and the 'len' variable will hold the - * entries length. */ +/* Decode the entry encoding type and data length (string length for strings, + * number of bytes used for the integer for integer entries) encoded in 'ptr'. + * The 'encoding' variable will hold the entry encoding, the 'lensize' + * variable will hold the number of bytes required to encode the entry + * length, and the 'len' variable will hold the entry length. */ #define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do { \ ZIP_ENTRY_ENCODING((ptr), (encoding)); \ if ((encoding) < ZIP_STR_MASK) { \ @@ -256,7 +384,7 @@ unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned ((ptr)[3] << 8) | \ ((ptr)[4]); \ } else { \ - assert(NULL); \ + panic("Invalid string encoding 0x%02X", (encoding)); \ } \ } else { \ (lensize) = 1; \ @@ -264,45 +392,49 @@ unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned } \ } while(0); +/* Encode the length of the previous entry and write it to "p". This only + * uses the larger encoding (required in __ziplistCascadeUpdate). */ +int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) { + if (p != NULL) { + p[0] = ZIP_BIG_PREVLEN; + memcpy(p+1,&len,sizeof(len)); + memrev32ifbe(p+1); + } + return 1+sizeof(len); +} + /* Encode the length of the previous entry and write it to "p". Return the * number of bytes needed to encode this length if "p" is NULL. */ -unsigned int zipPrevEncodeLength(unsigned char *p, unsigned int len) { +unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) { if (p == NULL) { - return (len < ZIP_BIGLEN) ? 1 : sizeof(len)+1; + return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(len)+1; } else { - if (len < ZIP_BIGLEN) { + if (len < ZIP_BIG_PREVLEN) { p[0] = len; return 1; } else { - p[0] = ZIP_BIGLEN; - memcpy(p+1,&len,sizeof(len)); - memrev32ifbe(p+1); - return 1+sizeof(len); + return zipStorePrevEntryLengthLarge(p,len); } } } -/* Encode the length of the previous entry and write it to "p". This only - * uses the larger encoding (required in __ziplistCascadeUpdate). */ -void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { - if (p == NULL) return; - p[0] = ZIP_BIGLEN; - memcpy(p+1,&len,sizeof(len)); - memrev32ifbe(p+1); -} - -/* Decode the number of bytes required to store the length of the previous - * element, from the perspective of the entry pointed to by 'ptr'. */ +/* Return the number of bytes used to encode the length of the previous + * entry. The length is returned by setting the var 'prevlensize'. */ #define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do { \ - if ((ptr)[0] < ZIP_BIGLEN) { \ + if ((ptr)[0] < ZIP_BIG_PREVLEN) { \ (prevlensize) = 1; \ } else { \ (prevlensize) = 5; \ } \ } while(0); -/* Decode the length of the previous element, from the perspective of the entry - * pointed to by 'ptr'. */ +/* Return the length of the previous element, and the number of bytes that + * are used in order to encode the previous element length. + * 'ptr' must point to the prevlen prefix of an entry (that encodes the + * length of the previos entry in order to navigate the elements backward). + * The length of the previous entry is stored in 'prevlen', the number of + * bytes needed to encode the previous entry length are stored in + * 'prevlensize'. */ #define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do { \ ZIP_DECODE_PREVLENSIZE(ptr, prevlensize); \ if ((prevlensize) == 1) { \ @@ -314,12 +446,25 @@ void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { } \ } while(0); -/* Return the difference in number of bytes needed to store the length of the - * previous element 'len', in the entry pointed to by 'p'. */ +/* Given a pointer 'p' to the prevlen info that prefixes an entry, this + * function returns the difference in number of bytes needed to encode + * the prevlen if the previous entry changes of size. + * + * So if A is the number of bytes used right now to encode the 'prevlen' + * field. + * + * And B is the number of bytes that are needed in order to encode the + * 'prevlen' if the previous element will be updated to one of size 'len'. + * + * Then the function returns B - A + * + * So the function returns a positive number if more space is needed, + * a negative number if less space is needed, or zero if the same space + * is needed. */ int zipPrevLenByteDiff(unsigned char *p, unsigned int len) { unsigned int prevlensize; ZIP_DECODE_PREVLENSIZE(p, prevlensize); - return zipPrevEncodeLength(NULL, len) - prevlensize; + return zipStorePrevEntryLength(NULL, len) - prevlensize; } /* Return the total number of bytes used by the entry pointed to by 'p'. */ @@ -456,8 +601,8 @@ unsigned char *ziplistResize(unsigned char *zl, unsigned int len) { * causes a realloc and memmove). However, encoding the prevlen may require * that this entry is grown as well. This effect may cascade throughout * the ziplist when there are consecutive entries with a size close to - * ZIP_BIGLEN, so we need to check that the prevlen can be encoded in every - * consecutive entry. + * ZIP_BIG_PREVLEN, so we need to check that the prevlen can be encoded in + * every consecutive entry. * * Note that this effect can also happen in reverse, where the bytes required * to encode the prevlen field can shrink. This effect is deliberately ignored, @@ -477,7 +622,7 @@ unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) { while (p[0] != ZIP_END) { zipEntry(p, &cur); rawlen = cur.headersize + cur.len; - rawlensize = zipPrevEncodeLength(NULL,rawlen); + rawlensize = zipStorePrevEntryLength(NULL,rawlen); /* Abort if there is no next entry. */ if (p[rawlen] == ZIP_END) break; @@ -508,7 +653,7 @@ unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) { memmove(np+rawlensize, np+next.prevrawlensize, curlen-noffset-next.prevrawlensize-1); - zipPrevEncodeLength(np,rawlen); + zipStorePrevEntryLength(np,rawlen); /* Advance the cursor */ p += rawlen; @@ -517,9 +662,9 @@ unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) { if (next.prevrawlensize > rawlensize) { /* This would result in shrinking, which we want to avoid. * So, set "rawlen" in the available bytes. */ - zipPrevEncodeLengthForceLarge(p+rawlen,rawlen); + zipStorePrevEntryLengthLarge(p+rawlen,rawlen); } else { - zipPrevEncodeLength(p+rawlen,rawlen); + zipStorePrevEntryLength(p+rawlen,rawlen); } /* Stop here, as the raw length of "next" has not changed. */ @@ -542,7 +687,7 @@ unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int deleted++; } - totlen = p-first.p; + totlen = p-first.p; /* Bytes taken by the element(s) to delete. */ if (totlen > 0) { if (p[0] != ZIP_END) { /* Storing `prevrawlen` in this entry may increase or decrease the @@ -550,8 +695,13 @@ unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int * There always is room to store this, because it was previously * stored by an entry that is now being deleted. */ nextdiff = zipPrevLenByteDiff(p,first.prevrawlen); + + /* Note that there is always space when p jumps backward: if + * the new previous entry is large, one of the deleted elements + * had a 5 bytes prevlen header, so there is for sure at least + * 5 bytes free and we need just 4. */ p -= nextdiff; - zipPrevEncodeLength(p,first.prevrawlen); + zipStorePrevEntryLength(p,first.prevrawlen); /* Update offset for tail */ ZIPLIST_TAIL_OFFSET(zl) = @@ -616,14 +766,14 @@ unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned cha /* 'encoding' is set to the appropriate integer encoding */ reqlen = zipIntSize(encoding); } else { - /* 'encoding' is untouched, however zipEncodeLength will use the + /* 'encoding' is untouched, however zipStoreEntryEncoding will use the * string length to figure out how to encode it. */ reqlen = slen; } /* We need space for both the length of the previous entry and * the length of the payload. */ - reqlen += zipPrevEncodeLength(NULL,prevlen); - reqlen += zipEncodeLength(NULL,encoding,slen); + reqlen += zipStorePrevEntryLength(NULL,prevlen); + reqlen += zipStoreEntryEncoding(NULL,encoding,slen); /* When the insert position is not equal to the tail, we need to * make sure that the next entry can hold this entry's length in @@ -641,7 +791,7 @@ unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned cha memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff); /* Encode this entry's raw length in the next entry. */ - zipPrevEncodeLength(p+reqlen,reqlen); + zipStorePrevEntryLength(p+reqlen,reqlen); /* Update offset for tail */ ZIPLIST_TAIL_OFFSET(zl) = @@ -669,8 +819,8 @@ unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned cha } /* Write the entry */ - p += zipPrevEncodeLength(p,prevlen); - p += zipEncodeLength(p,encoding,slen); + p += zipStorePrevEntryLength(p,prevlen); + p += zipStoreEntryEncoding(p,encoding,slen); if (ZIP_IS_STR(encoding)) { memcpy(p,s,slen); } else { From c750d3215ea3a707d792a934dbd29cc1f9c916a8 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 30 Jan 2017 16:34:08 +0100 Subject: [PATCH 0264/1722] Ziplist: insertion bug under particular conditions fixed. Ziplists had a bug that was discovered while investigating a different issue, resulting in a corrupted ziplist representation, and a likely segmentation foult and/or data corruption of the last element of the ziplist, once the ziplist is accessed again. The bug happens when a specific set of insertions / deletions is performed so that an entry is encoded to have a "prevlen" field (the length of the previous entry) of 5 bytes but with a count that could be encoded in a "prevlen" field of a since byte. This could happen when the "cascading update" process called by ziplistInsert()/ziplistDelete() in certain contitious forces the prevlen to be bigger than necessary in order to avoid too much data moving around. Once such an entry is generated, inserting a very small entry immediately before it will result in a resizing of the ziplist for a count smaller than the current ziplist length (which is a violation, inserting code expects the ziplist to get bigger actually). So an FF byte is inserted in a misplaced position. Moreover a realloc() is performed with a count smaller than the ziplist current length so the final bytes could be trashed as well. SECURITY IMPLICATIONS: Currently it looks like an attacker can only crash a Redis server by providing specifically choosen commands. However a FF byte is written and there are other memory operations that depend on a wrong count, so even if it is not immediately apparent how to mount an attack in order to execute code remotely, it is not impossible at all that this could be done. Attacks always get better... and we did not spent enough time in order to think how to exploit this issue, but security researchers or malicious attackers could. --- src/ziplist.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/ziplist.c b/src/ziplist.c index 81d23ca38..0da1b0ca6 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -778,7 +778,12 @@ unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned cha /* When the insert position is not equal to the tail, we need to * make sure that the next entry can hold this entry's length in * its prevlen field. */ + int forcelarge = 0; nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0; + if (nextdiff == -4 && reqlen < 4) { + nextdiff = 0; + forcelarge = 1; + } /* Store offset because a realloc may change the address of zl. */ offset = p-zl; @@ -791,7 +796,10 @@ unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned cha memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff); /* Encode this entry's raw length in the next entry. */ - zipStorePrevEntryLength(p+reqlen,reqlen); + if (forcelarge) + zipStorePrevEntryLength(p+reqlen,reqlen); + else + zipStorePrevEntryLengthLarge(p+reqlen,reqlen); /* Update offset for tail */ ZIPLIST_TAIL_OFFSET(zl) = From 34b3b012617b276260f1f7a0c09cab982d064e94 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 1 Feb 2017 17:01:31 +0100 Subject: [PATCH 0265/1722] Fix ziplist fix... --- src/ziplist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ziplist.c b/src/ziplist.c index 0da1b0ca6..e407937ff 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -797,9 +797,9 @@ unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned cha /* Encode this entry's raw length in the next entry. */ if (forcelarge) - zipStorePrevEntryLength(p+reqlen,reqlen); - else zipStorePrevEntryLengthLarge(p+reqlen,reqlen); + else + zipStorePrevEntryLength(p+reqlen,reqlen); /* Update offset for tail */ ZIPLIST_TAIL_OFFSET(zl) = From 38894de7f5b19e4c60cd06712992870bca4c059b Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 9 Feb 2017 09:58:38 +0100 Subject: [PATCH 0266/1722] Fix MIGRATE closing of cached socket on error. After investigating issue #3796, it was discovered that MIGRATE could call migrateCloseSocket() after the original MIGRATE c->argv was already rewritten as a DEL operation. As a result the host/port passed to migrateCloseSocket() could be anything, often a NULL pointer that gets deferenced crashing the server. Now the socket is closed at an earlier time when there is a socket error in a later stage where no retry will be performed, before we rewrite the argument vector. Moreover a check was added so that later, in the socket_err label, there is no further attempt at closing the socket if the argument was rewritten. This fix should resolve the bug reported in #3796. --- src/cluster.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 6e3fc8b00..4d7b0502d 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4756,6 +4756,7 @@ void migrateCommand(client *c) { rio cmd, payload; int may_retry = 1; int write_error = 0; + int argv_rewritten = 0; /* To support the KEYS option we need the following additional state. */ int first_key = 3; /* Argument index of the first key. */ @@ -4939,12 +4940,20 @@ try_again: goto socket_err; /* A retry is guaranteed because of tested conditions.*/ } + /* On socket errors, close the migration socket now that we still have + * the original host/port in the ARGV. Later the original command may be + * rewritten to DEL and will be too later. */ + if (socket_error) migrateCloseSocket(c->argv[1],c->argv[2]); + if (!copy) { - /* Translate MIGRATE as DEL for replication/AOF. */ + /* Translate MIGRATE as DEL for replication/AOF. Note that we do + * this only for the keys for which we received an acknowledgement + * from the receiving Redis server, by using the del_idx index. */ if (del_idx > 1) { newargv[0] = createStringObject("DEL",3); /* Note that the following call takes ownership of newargv. */ replaceClientCommandVector(c,del_idx,newargv); + argv_rewritten = 1; } else { /* No key transfer acknowledged, no need to rewrite as DEL. */ zfree(newargv); @@ -4953,8 +4962,8 @@ try_again: } /* If we are here and a socket error happened, we don't want to retry. - * Just signal the problem to the client, but only do it if we don't - * already queued a different error reported by the destination server. */ + * Just signal the problem to the client, but only do it if we did not + * already queue a different error reported by the destination server. */ if (!error_from_target && socket_error) { may_retry = 0; goto socket_err; @@ -4962,7 +4971,11 @@ try_again: if (!error_from_target) { /* Success! Update the last_dbid in migrateCachedSocket, so that we can - * avoid SELECT the next time if the target DB is the same. Reply +OK. */ + * avoid SELECT the next time if the target DB is the same. Reply +OK. + * + * Note: If we reached this point, even if socket_error is true + * still the SELECT command succeeded (otherwise the code jumps to + * socket_err label. */ cs->last_dbid = dbid; addReply(c,shared.ok); } else { @@ -4972,7 +4985,6 @@ try_again: sdsfree(cmd.io.buffer.ptr); zfree(ov); zfree(kv); zfree(newargv); - if (socket_error) migrateCloseSocket(c->argv[1],c->argv[2]); return; /* On socket errors we try to close the cached socket and try again. @@ -4982,7 +4994,12 @@ socket_err: /* Cleanup we want to perform in both the retry and no retry case. * Note: Closing the migrate socket will also force SELECT next time. */ sdsfree(cmd.io.buffer.ptr); - migrateCloseSocket(c->argv[1],c->argv[2]); + + /* If the command was rewritten as DEL and there was a socket error, + * we already closed the socket earlier. While migrateCloseSocket() + * is idempotent, the host/port arguments are now gone, so don't do it + * again. */ + if (!argv_rewritten) migrateCloseSocket(c->argv[1],c->argv[2]); zfree(newargv); newargv = NULL; /* This will get reallocated on retry. */ From a20c8bba4ad4ac1c7adcb827e6a76df9f5ddb4ba Mon Sep 17 00:00:00 2001 From: "minghang.zmh" Date: Fri, 10 Feb 2017 20:13:01 +0800 Subject: [PATCH 0267/1722] fix server.stat_net_output_bytes calc bug --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 343a910e2..bb5d74730 100644 --- a/src/networking.c +++ b/src/networking.c @@ -931,11 +931,11 @@ int writeToClient(int fd, client *c, int handler_installed) { * * However if we are over the maxmemory limit we ignore that and * just deliver as much data as it is possible to deliver. */ - server.stat_net_output_bytes += totwritten; if (totwritten > NET_MAX_WRITES_PER_EVENT && (server.maxmemory == 0 || zmalloc_used_memory() < server.maxmemory)) break; } + server.stat_net_output_bytes += totwritten; if (nwritten == -1) { if (errno == EAGAIN) { nwritten = 0; From 18b4a41899b6c3bfdfbebe552a5bc1e72a16924b Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Sun, 19 Feb 2017 14:01:58 +0000 Subject: [PATCH 0268/1722] ARM: Fix 64 bit unaligned access in MurmurHash64A(). --- src/config.h | 6 ++++++ src/hyperloglog.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/src/config.h b/src/config.h index 9fd53626e..354f8f5e5 100644 --- a/src/config.h +++ b/src/config.h @@ -206,4 +206,10 @@ void setproctitle(const char *fmt, ...); #endif #endif +/* Make sure we can test for ARM just checking for __arm__, since sometimes + * __arm is defined but __arm__ is not. */ +#if defined(__arm) && !defined(__arm__) +#define __arm__ +#endif + #endif diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 0800bf59d..7de5786f9 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -401,7 +401,11 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) { uint64_t k; #if (BYTE_ORDER == LITTLE_ENDIAN) + #ifdef __arm__ + memcpy(&k,data,sizeof(uint64_t)); + #else k = *((uint64_t*)data); + #endif #else k = (uint64_t) data[0]; k |= (uint64_t) data[1] << 8; From d17abbf4e2fefae8bc9bc94e35dd67a96b807dd8 Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Sun, 19 Feb 2017 14:59:39 +0000 Subject: [PATCH 0269/1722] ARM: Avoid memcpy() in MurmurHash64A() if we are using 64 bit ARM. However note that in architectures supporting 64 bit unaligned accesses memcpy(...,...,8) is likely translated to a simple word memory movement anyway. --- src/Makefile | 1 + src/config.h | 3 +++ src/hyperloglog.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 3f445f40f..f211eb84b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -14,6 +14,7 @@ release_hdr := $(shell sh -c './mkreleasehdr.sh') uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') +uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') OPTIMIZATION?=-O2 DEPENDENCY_TARGETS=hiredis linenoise lua NODEPS:=clean distclean diff --git a/src/config.h b/src/config.h index 354f8f5e5..1005dcc2a 100644 --- a/src/config.h +++ b/src/config.h @@ -211,5 +211,8 @@ void setproctitle(const char *fmt, ...); #if defined(__arm) && !defined(__arm__) #define __arm__ #endif +#if defined (__aarch64__) && !defined(__arm64__) +#define __arm64__ +#endif #endif diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 7de5786f9..b8a63a73f 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -401,7 +401,7 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) { uint64_t k; #if (BYTE_ORDER == LITTLE_ENDIAN) - #ifdef __arm__ + #if defined(__arm__) && !defined(__arm64__) memcpy(&k,data,sizeof(uint64_t)); #else k = *((uint64_t*)data); From bd6b031738e58afea9b33c7550187d26ea79625b Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Sun, 19 Feb 2017 15:02:37 +0000 Subject: [PATCH 0270/1722] ARM: Use libc malloc by default. I'm not sure how much test Jemalloc gets on ARM, moreover compiling Redis with Jemalloc support in not very powerful devices, like most ARMs people will build Redis on, is extremely slow. It is possible to enable Jemalloc build anyway if needed by using "make MALLOC=jemalloc". --- src/Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index f211eb84b..83f5c6d79 100644 --- a/src/Makefile +++ b/src/Makefile @@ -28,11 +28,14 @@ PREFIX?=/usr/local INSTALL_BIN=$(PREFIX)/bin INSTALL=install -# Default allocator +# Default allocator defaults to Jemalloc if it's not an ARM +MALLOC=libc +ifneq ($(uname_M),armv6l) +ifneq ($(uname_M),armv7l) ifeq ($(uname_S),Linux) MALLOC=jemalloc -else - MALLOC=libc +endif +endif endif # Backwards compatibility for selecting an allocator From d921cbea3c63c28e98007d72eb4b2d519d5c9fed Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Sun, 19 Feb 2017 15:07:08 +0000 Subject: [PATCH 0271/1722] ARM: Avoid fast path for BITOP. GCC will produce certain unaligned multi load-store instructions that will be trapped by the Linux kernel since ARM v6 cannot handle them with unaligned addresses. Better to use the slower but safer implementation instead of generating the exception which should be anyway very slow. --- src/bitops.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bitops.c b/src/bitops.c index 46eee22c3..7ab72633c 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -654,8 +654,11 @@ void bitopCommand(client *c) { /* Fast path: as far as we have data for all the input bitmaps we * can take a fast path that performs much better than the - * vanilla algorithm. */ + * vanilla algorithm. On ARM we skip the fast path since it will + * result in GCC compiling the code using multiple-words load/store + * operations that are not supported even in ARM >= v6. */ j = 0; + #ifndef __arm__ if (minlen >= sizeof(unsigned long)*4 && numkeys <= 16) { unsigned long *lp[16]; unsigned long *lres = (unsigned long*) res; @@ -716,6 +719,7 @@ void bitopCommand(client *c) { } } } + #endif /* j is set to the next byte to process by the previous loop. */ for (; j < maxlen; j++) { From c9f6868c512f1fe6fbf85b54ad4890d18f981cf9 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 20 Feb 2017 10:18:41 +0100 Subject: [PATCH 0272/1722] Don't leak file descriptor on syncWithMaster(). Close #3804. --- src/replication.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index df2e23f3a..bde37bc43 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1568,7 +1568,7 @@ int slaveTryPartialResynchronization(int fd, int read_reply) { * establish a connection with the master. */ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { char tmpfile[256], *err = NULL; - int dfd, maxtries = 5; + int dfd = -1, maxtries = 5; int sockerr = 0, psync_result; socklen_t errlen = sizeof(sockerr); UNUSED(el); @@ -1832,6 +1832,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { error: aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE); + if (dfd != -1) close(dfd); close(fd); server.repl_transfer_s = -1; server.repl_state = REPL_STATE_CONNECT; From 9383cda5ff52aab44836ea8d959cacc7dd3344fd Mon Sep 17 00:00:00 2001 From: "John.Koepi" Date: Sat, 23 Jul 2016 16:31:56 +0200 Subject: [PATCH 0273/1722] fix #2883, #2857 pipe fds leak when fork() failed on bg aof rw --- src/aof.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aof.c b/src/aof.c index 07d8561da..f73da1e25 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1357,6 +1357,7 @@ int rewriteAppendOnlyFileBackground(void) { serverLog(LL_WARNING, "Can't rewrite append only file in background: fork: %s", strerror(errno)); + aofClosePipes(); return C_ERR; } serverLog(LL_NOTICE, From b49721d57d4bdebec207815e1b7494e7bc26f2e5 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 20 Feb 2017 16:09:54 +0100 Subject: [PATCH 0274/1722] Use SipHash hash function to mitigate HashDos attempts. This change attempts to switch to an hash function which mitigates the effects of the HashDoS attack (denial of service attack trying to force data structures to worst case behavior) while at the same time providing Redis with an hash function that does not expect the input data to be word aligned, a condition no longer true now that sds.c strings have a varialbe length header. Note that it is possible sometimes that even using an hash function for which collisions cannot be generated without knowing the seed, special implementation details or the exposure of the seed in an indirect way (for example the ability to add elements to a Set and check the return in which Redis returns them with SMEMBERS) may make the attacker's life simpler in the process of trying to guess the correct seed, however the next step would be to switch to a log(N) data structure when too many items in a single bucket are detected: this seems like an overkill in the case of Redis. SPEED REGRESION TESTS: In order to verify that switching from MurmurHash to SipHash had no impact on speed, a set of benchmarks involving fast insertion of 5 million of keys were performed. The result shows Redis with SipHash in high pipelining conditions to be about 4% slower compared to using the previous hash function. However this could partially be related to the fact that the current implementation does not attempt to hash whole words at a time but reads single bytes, in order to have an output which is endian-netural and at the same time working on systems where unaligned memory accesses are a problem. Further X86 specific optimizations should be tested, the function may easily get at the same level of MurMurHash2 if a few optimizations are performed. --- src/Makefile | 2 +- src/config.c | 2 +- src/debug.c | 2 - src/dict.c | 75 ++--------- src/dict.h | 10 +- src/latency.c | 2 +- src/module.c | 2 +- src/sentinel.c | 2 +- src/server.c | 14 ++- src/server.h | 2 +- src/siphash.c | 328 +++++++++++++++++++++++++++++++++++++++++++++++++ src/t_zset.c | 2 +- 12 files changed, 361 insertions(+), 82 deletions(-) create mode 100644 src/siphash.c diff --git a/src/Makefile b/src/Makefile index 3f445f40f..5f7536e83 100644 --- a/src/Makefile +++ b/src/Makefile @@ -128,7 +128,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o REDIS_BENCHMARK_NAME=redis-benchmark diff --git a/src/config.c b/src/config.c index 83651877c..900274f65 100644 --- a/src/config.c +++ b/src/config.c @@ -1423,7 +1423,7 @@ void configGetCommand(client *c) { /* We use the following dictionary type to store where a configuration * option is mentioned in the old configuration file, so it's * like "maxmemory" -> list of line numbers (first line is zero). */ -unsigned int dictSdsCaseHash(const void *key); +uint64_t dictSdsCaseHash(const void *key); int dictSdsKeyCaseCompare(void *privdata, const void *key1, const void *key2); void dictSdsDestructor(void *privdata, void *val); void dictListDestructor(void *privdata, void *val); diff --git a/src/debug.c b/src/debug.c index a6bc62dc8..a4caa49f2 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1029,8 +1029,6 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) { /* Log INFO and CLIENT LIST */ serverLogRaw(LL_WARNING|LL_RAW, "\n------ INFO OUTPUT ------\n"); infostring = genRedisInfoString("all"); - infostring = sdscatprintf(infostring, "hash_init_value: %u\n", - dictGetHashFunctionSeed()); serverLogRaw(LL_WARNING|LL_RAW, infostring); serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n"); clients = getAllClientsInfoString(); diff --git a/src/dict.c b/src/dict.c index 59aef7724..8ce735961 100644 --- a/src/dict.c +++ b/src/dict.c @@ -37,11 +37,11 @@ #include #include +#include #include #include #include #include -#include #include "dict.h" #include "zmalloc.h" @@ -71,77 +71,28 @@ static int _dictInit(dict *ht, dictType *type, void *privDataPtr); /* -------------------------- hash functions -------------------------------- */ -static uint32_t dict_hash_function_seed = 5381; +static uint8_t dict_hash_function_seed[16]; -void dictSetHashFunctionSeed(uint32_t seed) { - dict_hash_function_seed = seed; +void dictSetHashFunctionSeed(uint8_t *seed) { + memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed)); } -uint32_t dictGetHashFunctionSeed(void) { +uint8_t *dictGetHashFunctionSeed(void) { return dict_hash_function_seed; } -/* MurmurHash2, by Austin Appleby - * Note - This code makes a few assumptions about how your machine behaves - - * 1. We can read a 4-byte value from any address without crashing - * 2. sizeof(int) == 4 - * - * And it has a few limitations - - * - * 1. It will not work incrementally. - * 2. It will not produce the same results on little-endian and big-endian - * machines. - */ -unsigned int dictGenHashFunction(const void *key, int len) { - /* 'm' and 'r' are mixing constants generated offline. - They're not really 'magic', they just happen to work well. */ - uint32_t seed = dict_hash_function_seed; - const uint32_t m = 0x5bd1e995; - const int r = 24; +/* The default hashing function uses SipHash implementation + * in siphash.c. */ - /* Initialize the hash to a 'random' value */ - uint32_t h = seed ^ len; +uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k); +uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k); - /* Mix 4 bytes at a time into the hash */ - const unsigned char *data = (const unsigned char *)key; - - while(len >= 4) { - uint32_t k = *(uint32_t*)data; - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - } - - /* Handle the last few bytes of the input array */ - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - - /* Do a few final mixes of the hash to ensure the last few - * bytes are well-incorporated. */ - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return (unsigned int)h; +uint64_t dictGenHashFunction(const void *key, int len) { + return siphash(key,len,dict_hash_function_seed); } -/* And a case insensitive hash function (based on djb hash) */ -unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) { - unsigned int hash = (unsigned int)dict_hash_function_seed; - - while (len--) - hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */ - return hash; +uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) { + return siphash_nocase(buf,len,dict_hash_function_seed); } /* ----------------------------- API implementation ------------------------- */ diff --git a/src/dict.h b/src/dict.h index 60a423a2c..bf316a00f 100644 --- a/src/dict.h +++ b/src/dict.h @@ -56,7 +56,7 @@ typedef struct dictEntry { } dictEntry; typedef struct dictType { - unsigned int (*hashFunction)(const void *key); + uint64_t (*hashFunction)(const void *key); void *(*keyDup)(void *privdata, const void *key); void *(*valDup)(void *privdata, const void *obj); int (*keyCompare)(void *privdata, const void *key1, const void *key2); @@ -168,15 +168,15 @@ void dictReleaseIterator(dictIterator *iter); dictEntry *dictGetRandomKey(dict *d); unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count); void dictGetStats(char *buf, size_t bufsize, dict *d); -unsigned int dictGenHashFunction(const void *key, int len); -unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len); +uint64_t dictGenHashFunction(const void *key, int len); +uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len); void dictEmpty(dict *d, void(callback)(void*)); void dictEnableResize(void); void dictDisableResize(void); int dictRehash(dict *d, int n); int dictRehashMilliseconds(dict *d, int ms); -void dictSetHashFunctionSeed(unsigned int initval); -unsigned int dictGetHashFunctionSeed(void); +void dictSetHashFunctionSeed(uint8_t *seed); +uint8_t *dictGetHashFunctionSeed(void); unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata); unsigned int dictGetHash(dict *d, const void *key); dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, unsigned int hash); diff --git a/src/latency.c b/src/latency.c index 53e0ec7be..9e9f1f13a 100644 --- a/src/latency.c +++ b/src/latency.c @@ -41,7 +41,7 @@ int dictStringKeyCompare(void *privdata, const void *key1, const void *key2) { return strcmp(key1,key2) == 0; } -unsigned int dictStringHash(const void *key) { +uint64_t dictStringHash(const void *key) { return dictGenHashFunction(key, strlen(key)); } diff --git a/src/module.c b/src/module.c index 1fbc5094f..3b90eae4a 100644 --- a/src/module.c +++ b/src/module.c @@ -3264,7 +3264,7 @@ void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx) { /* server.moduleapi dictionary type. Only uses plain C strings since * this gets queries from modules. */ -unsigned int dictCStringKeyHash(const void *key) { +uint64_t dictCStringKeyHash(const void *key) { return dictGenHashFunction((unsigned char*)key, strlen((char*)key)); } diff --git a/src/sentinel.c b/src/sentinel.c index 1f47dd337..6c6a3a0cd 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -379,7 +379,7 @@ void sentinelSimFailureCrash(void); /* ========================= Dictionary types =============================== */ -unsigned int dictSdsHash(const void *key); +uint64_t dictSdsHash(const void *key); int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2); void releaseSentinelRedisInstance(sentinelRedisInstance *ri); diff --git a/src/server.c b/src/server.c index 8bf6510de..0494a4e75 100644 --- a/src/server.c +++ b/src/server.c @@ -482,16 +482,16 @@ int dictObjKeyCompare(void *privdata, const void *key1, return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr); } -unsigned int dictObjHash(const void *key) { +uint64_t dictObjHash(const void *key) { const robj *o = key; return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr)); } -unsigned int dictSdsHash(const void *key) { +uint64_t dictSdsHash(const void *key) { return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); } -unsigned int dictSdsCaseHash(const void *key) { +uint64_t dictSdsCaseHash(const void *key) { return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key)); } @@ -513,7 +513,7 @@ int dictEncObjKeyCompare(void *privdata, const void *key1, return cmp; } -unsigned int dictEncObjHash(const void *key) { +uint64_t dictEncObjHash(const void *key) { robj *o = (robj*) key; if (sdsEncodedObject(o)) { @@ -526,7 +526,7 @@ unsigned int dictEncObjHash(const void *key) { len = ll2string(buf,32,(long)o->ptr); return dictGenHashFunction((unsigned char*)buf, len); } else { - unsigned int hash; + uint64_t hash; o = getDecodedObject(o); hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr)); @@ -3639,7 +3639,9 @@ int main(int argc, char **argv) { zmalloc_set_oom_handler(redisOutOfMemoryHandler); srand(time(NULL)^getpid()); gettimeofday(&tv,NULL); - dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid()); + char hashseed[16]; + getRandomHexChars(hashseed,sizeof(hashseed)); + dictSetHashFunctionSeed((uint8_t*)hashseed); server.sentinel_mode = checkForSentinelMode(argc,argv); initServerConfig(); moduleInitModulesSystem(); diff --git a/src/server.h b/src/server.h index 30d8be849..75ff384cd 100644 --- a/src/server.h +++ b/src/server.h @@ -1765,7 +1765,7 @@ unsigned long LFUGetTimeInMinutes(void); uint8_t LFULogIncr(uint8_t value); /* Keys hashing / comparison functions for dict.c hash tables. */ -unsigned int dictSdsHash(const void *key); +uint64_t dictSdsHash(const void *key); int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2); void dictSdsDestructor(void *privdata, void *val); diff --git a/src/siphash.c b/src/siphash.c new file mode 100644 index 000000000..04e571fd7 --- /dev/null +++ b/src/siphash.c @@ -0,0 +1,328 @@ +/* + SipHash reference C implementation + + Copyright (c) 2012-2016 Jean-Philippe Aumasson + + Copyright (c) 2012-2014 Daniel J. Bernstein + Copyright (c) 2017 Salvatore Sanfilippo + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along + with this software. If not, see + . + + ---------------------------------------------------------------------------- + + This version was modified by Salvatore Sanfilippo + in the following ways: + + 1. Hard-code 2-4 rounds in the hope the compiler can optimize it more + in this raw from. Anyway we always want the standard 2-4 variant. + 2. Modify the prototype and implementation so that the function directly + returns an uint64_t value, the hash itself, instead of receiving an + output buffer. This also means that the output size is set to 8 bytes + and the 16 bytes output code handling was removed. + 3. Provide a case insensitive variant to be used when hashing strings that + must be considered identical by the hash table regardless of the case. + If we don't have directly a case insensitive hash function, we need to + perform a text transformation in some temporary buffer, which is costly. + 4. Remove debugging code. + 5. Modified the original test.c file to be a stand-alone function testing + the function in the new form (returing an uint64_t) using just the + relevant test vector. + */ +#include +#include +#include +#include +#include + +#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) + +#define U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v)); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); + +#define U64TO8_LE(p, v) \ + U32TO8_LE((p), (uint32_t)((v))); \ + U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); + +#define U8TO64_LE(p) \ + (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ + ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ + ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ + ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) + +#define U8TO64_LE_NOCASE(p) \ + (((uint64_t)(tolower((p)[0]))) | \ + ((uint64_t)(tolower((p)[1])) << 8) | \ + ((uint64_t)(tolower((p)[2])) << 16) | \ + ((uint64_t)(tolower((p)[3])) << 24) | \ + ((uint64_t)(tolower((p)[4])) << 32) | \ + ((uint64_t)(tolower((p)[5])) << 40) | \ + ((uint64_t)(tolower((p)[6])) << 48) | \ + ((uint64_t)(tolower((p)[7])) << 56)) + +#define SIPROUND \ + do { \ + v0 += v1; \ + v1 = ROTL(v1, 13); \ + v1 ^= v0; \ + v0 = ROTL(v0, 32); \ + v2 += v3; \ + v3 = ROTL(v3, 16); \ + v3 ^= v2; \ + v0 += v3; \ + v3 = ROTL(v3, 21); \ + v3 ^= v0; \ + v2 += v1; \ + v1 = ROTL(v1, 17); \ + v1 ^= v2; \ + v2 = ROTL(v2, 32); \ + } while (0) + +uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) { + uint64_t hash; + uint8_t *out = (uint8_t*) &hash; + uint64_t v0 = 0x736f6d6570736575ULL; + uint64_t v1 = 0x646f72616e646f6dULL; + uint64_t v2 = 0x6c7967656e657261ULL; + uint64_t v3 = 0x7465646279746573ULL; + uint64_t k0 = U8TO64_LE(k); + uint64_t k1 = U8TO64_LE(k + 8); + uint64_t m; + const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); + const int left = inlen & 7; + uint64_t b = ((uint64_t)inlen) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + for (; in != end; in += 8) { + m = U8TO64_LE(in); + v3 ^= m; + + SIPROUND; + SIPROUND; + + v0 ^= m; + } + + switch (left) { + case 7: b |= ((uint64_t)in[6]) << 48; + case 6: b |= ((uint64_t)in[5]) << 40; + case 5: b |= ((uint64_t)in[4]) << 32; + case 4: b |= ((uint64_t)in[3]) << 24; + case 3: b |= ((uint64_t)in[2]) << 16; + case 2: b |= ((uint64_t)in[1]) << 8; + case 1: b |= ((uint64_t)in[0]); break; + case 0: break; + } + + v3 ^= b; + + SIPROUND; + SIPROUND; + + v0 ^= b; + v2 ^= 0xff; + + SIPROUND; + SIPROUND; + SIPROUND; + SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE(out, b); + + return hash; +} + +uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k) +{ + uint64_t hash; + uint8_t *out = (uint8_t*) &hash; + uint64_t v0 = 0x736f6d6570736575ULL; + uint64_t v1 = 0x646f72616e646f6dULL; + uint64_t v2 = 0x6c7967656e657261ULL; + uint64_t v3 = 0x7465646279746573ULL; + uint64_t k0 = U8TO64_LE(k); + uint64_t k1 = U8TO64_LE(k + 8); + uint64_t m; + const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); + const int left = inlen & 7; + uint64_t b = ((uint64_t)inlen) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + for (; in != end; in += 8) { + m = U8TO64_LE_NOCASE(in); + v3 ^= m; + + SIPROUND; + SIPROUND; + + v0 ^= m; + } + + switch (left) { + case 7: b |= ((uint64_t)tolower(in[6])) << 48; + case 6: b |= ((uint64_t)tolower(in[5])) << 40; + case 5: b |= ((uint64_t)tolower(in[4])) << 32; + case 4: b |= ((uint64_t)tolower(in[3])) << 24; + case 3: b |= ((uint64_t)tolower(in[2])) << 16; + case 2: b |= ((uint64_t)tolower(in[1])) << 8; + case 1: b |= ((uint64_t)tolower(in[0])); break; + case 0: break; + } + + v3 ^= b; + + SIPROUND; + SIPROUND; + + v0 ^= b; + v2 ^= 0xff; + + SIPROUND; + SIPROUND; + SIPROUND; + SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE(out, b); + + return hash; +} + + +/* --------------------------------- TEST ------------------------------------ */ + +#ifdef SIPHASH_TEST + +const uint8_t vectors_sip64[64][8] = { + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, }, + { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, }, + { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, }, + { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, }, + { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, }, + { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, }, + { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, }, + { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, }, + { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, }, + { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, }, + { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, }, + { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, }, + { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, }, + { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, }, + { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, }, + { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, }, + { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, }, + { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, }, + { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, }, + { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, }, + { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, }, + { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, }, + { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, }, + { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, }, + { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, }, + { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, }, + { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, }, + { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, }, + { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, }, + { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, }, + { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, }, + { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, }, + { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, }, + { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, }, + { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, }, + { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, }, + { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, }, + { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, }, + { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, }, + { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, }, + { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, }, + { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, }, + { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, }, + { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, }, + { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, }, + { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, }, + { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, }, + { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, }, + { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, }, + { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, }, + { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, }, + { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, }, + { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, }, + { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, }, + { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, }, + { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, }, + { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, }, + { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, }, + { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, }, + { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, }, + { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, }, + { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, }, + { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, }, + { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, }, +}; + + +/* Test siphash using a test vector. Returns 0 if the function passed + * all the tests, otherwise 1 is returned. */ +int siphash_test(void) { + uint8_t in[64], k[16]; + int i; + int fails = 0; + + for (i = 0; i < 16; ++i) + k[i] = i; + + for (i = 0; i < 64; ++i) { + in[i] = i; + uint64_t hash = siphash(in, i, k); + const uint8_t *v = NULL; + v = (uint8_t *)vectors_sip64; + if (memcmp(&hash, v + (i * 8), 8)) { + /* printf("fail for %d bytes\n", i); */ + fails++; + } + } + + /* Run a few basic tests with the case insensitive version. */ + uint64_t h1, h2; + h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678"); + h2 = siphash_nocase((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678"); + if (h1 != h2) fails++; + + h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678"); + h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678"); + if (h1 != h2) fails++; + + h1 = siphash((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678"); + h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678"); + if (h1 == h2) fails++; + + if (!fails) return 0; + return 1; +} + +int main(void) { + if (siphash_test() == 0) { + printf("SipHash test: OK\n"); + return 0; + } else { + printf("SipHash test: FAILED\n"); + return 1; + } +} + +#endif diff --git a/src/t_zset.c b/src/t_zset.c index d36fa30ae..f7f4c6eb2 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -2110,7 +2110,7 @@ inline static void zunionInterAggregate(double *target, double val, int aggregat } } -unsigned int dictSdsHash(const void *key); +uint64_t dictSdsHash(const void *key); int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2); dictType setAccumulatorDictType = { From fc201008619292bab2c020e37f2acfd981c82cdd Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 20 Feb 2017 17:32:46 +0100 Subject: [PATCH 0275/1722] SipHash x86 optimizations. --- src/siphash.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/siphash.c b/src/siphash.c index 04e571fd7..3bcada728 100644 --- a/src/siphash.c +++ b/src/siphash.c @@ -40,6 +40,13 @@ #include #include +/* Test of the CPU is Little Endian and supports not aligned accesses. + * Two interesting conditions to speedup the function that happen to be + * in most of x86 servers. */ +#if defined(__X86_64__) || defined(__x86_64__) || defined (__i386__) +#define UNALIGNED_LE_CPU +#endif + #define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) #define U32TO8_LE(p, v) \ @@ -52,11 +59,15 @@ U32TO8_LE((p), (uint32_t)((v))); \ U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); +#ifdef UNALIGNED_LE_CPU +#define U8TO64_LE(p) (*((uint64_t*)(p))) +#else #define U8TO64_LE(p) \ (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) +#endif #define U8TO64_LE_NOCASE(p) \ (((uint64_t)(tolower((p)[0]))) | \ @@ -87,8 +98,10 @@ } while (0) uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) { +#ifndef UNALIGNED_LE_CPU uint64_t hash; uint8_t *out = (uint8_t*) &hash; +#endif uint64_t v0 = 0x736f6d6570736575ULL; uint64_t v1 = 0x646f72616e646f6dULL; uint64_t v2 = 0x6c7967656e657261ULL; @@ -139,15 +152,20 @@ uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) { SIPROUND; b = v0 ^ v1 ^ v2 ^ v3; +#ifndef UNALIGNED_LE_CPU U64TO8_LE(out, b); - return hash; +#else + return b; +#endif } uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k) { +#ifndef UNALIGNED_LE_CPU uint64_t hash; uint8_t *out = (uint8_t*) &hash; +#endif uint64_t v0 = 0x736f6d6570736575ULL; uint64_t v1 = 0x646f72616e646f6dULL; uint64_t v2 = 0x6c7967656e657261ULL; @@ -198,9 +216,12 @@ uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k) SIPROUND; b = v0 ^ v1 ^ v2 ^ v3; +#ifndef UNALIGNED_LE_CPU U64TO8_LE(out, b); - return hash; +#else + return b; +#endif } From 49cb8be3fd375e81fc39029a3ef373661227a3cc Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 20 Feb 2017 17:39:44 +0100 Subject: [PATCH 0276/1722] Use locale agnostic tolower() in dict.c hash function. --- src/Makefile | 2 +- src/dict.c | 2 +- src/siphash.c | 40 +++++++++++++++++++++++++--------------- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/src/Makefile b/src/Makefile index 5f7536e83..cb7afa023 100644 --- a/src/Makefile +++ b/src/Makefile @@ -204,7 +204,7 @@ $(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ) $(REDIS_LD) -o $@ $^ $(FINAL_LIBS) dict-benchmark: dict.c zmalloc.c sds.c - $(REDIS_CC) $(FINAL_CFLAGS) dict.c zmalloc.c sds.c -D DICT_BENCHMARK_MAIN -o dict-benchmark + $(REDIS_CC) $(FINAL_CFLAGS) dict.c zmalloc.c sds.c siphash.c -D DICT_BENCHMARK_MAIN -o dict-benchmark # Because the jemalloc.h header is generated as a part of the jemalloc build, # building it should complete before building any other object. Instead of diff --git a/src/dict.c b/src/dict.c index 8ce735961..69fb3b8f8 100644 --- a/src/dict.c +++ b/src/dict.c @@ -1109,7 +1109,7 @@ void dictGetStats(char *buf, size_t bufsize, dict *d) { #include "sds.h" -unsigned int hashCallback(const void *key) { +uint64_t hashCallback(const void *key) { return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); } diff --git a/src/siphash.c b/src/siphash.c index 3bcada728..7219d4b88 100644 --- a/src/siphash.c +++ b/src/siphash.c @@ -40,6 +40,16 @@ #include #include +/* Fast tolower() alike function that does not care about locale + * but just returns a-z insetad of A-Z. */ +int siptlw(int c) { + if (c >= 'A' && c <= 'Z') { + return c+('a'-'A'); + } else { + return c; + } +} + /* Test of the CPU is Little Endian and supports not aligned accesses. * Two interesting conditions to speedup the function that happen to be * in most of x86 servers. */ @@ -70,14 +80,14 @@ #endif #define U8TO64_LE_NOCASE(p) \ - (((uint64_t)(tolower((p)[0]))) | \ - ((uint64_t)(tolower((p)[1])) << 8) | \ - ((uint64_t)(tolower((p)[2])) << 16) | \ - ((uint64_t)(tolower((p)[3])) << 24) | \ - ((uint64_t)(tolower((p)[4])) << 32) | \ - ((uint64_t)(tolower((p)[5])) << 40) | \ - ((uint64_t)(tolower((p)[6])) << 48) | \ - ((uint64_t)(tolower((p)[7])) << 56)) + (((uint64_t)(siptlw((p)[0]))) | \ + ((uint64_t)(siptlw((p)[1])) << 8) | \ + ((uint64_t)(siptlw((p)[2])) << 16) | \ + ((uint64_t)(siptlw((p)[3])) << 24) | \ + ((uint64_t)(siptlw((p)[4])) << 32) | \ + ((uint64_t)(siptlw((p)[5])) << 40) | \ + ((uint64_t)(siptlw((p)[6])) << 48) | \ + ((uint64_t)(siptlw((p)[7])) << 56)) #define SIPROUND \ do { \ @@ -192,13 +202,13 @@ uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k) } switch (left) { - case 7: b |= ((uint64_t)tolower(in[6])) << 48; - case 6: b |= ((uint64_t)tolower(in[5])) << 40; - case 5: b |= ((uint64_t)tolower(in[4])) << 32; - case 4: b |= ((uint64_t)tolower(in[3])) << 24; - case 3: b |= ((uint64_t)tolower(in[2])) << 16; - case 2: b |= ((uint64_t)tolower(in[1])) << 8; - case 1: b |= ((uint64_t)tolower(in[0])); break; + case 7: b |= ((uint64_t)siptlw(in[6])) << 48; + case 6: b |= ((uint64_t)siptlw(in[5])) << 40; + case 5: b |= ((uint64_t)siptlw(in[4])) << 32; + case 4: b |= ((uint64_t)siptlw(in[3])) << 24; + case 3: b |= ((uint64_t)siptlw(in[2])) << 16; + case 2: b |= ((uint64_t)siptlw(in[1])) << 8; + case 1: b |= ((uint64_t)siptlw(in[0])); break; case 0: break; } From 5ed1b45aadcfd7bfc499ee4001fc39ce873cfe07 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 21 Feb 2017 12:25:29 +0100 Subject: [PATCH 0277/1722] freeMemoryIfNeeded(): improve code and lazyfree handling. 1. Refactor memory overhead computation into a function. 2. Every 10 keys evicted, check if memory usage already reached the target value directly, since we otherwise don't count all the memory reclaimed by the background thread right now. --- src/evict.c | 61 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/src/evict.c b/src/evict.c index 802997ce8..62753c5a7 100644 --- a/src/evict.c +++ b/src/evict.c @@ -336,11 +336,34 @@ unsigned long LFUDecrAndReturn(robj *o) { * server when there is data to add in order to make space if needed. * --------------------------------------------------------------------------*/ +/* We don't want to count AOF buffers and slaves output buffers as + * used memory: the eviction should use mostly data size. This function + * returns the sum of AOF and slaves buffer. */ +size_t freeMemoryGetNotCountedMemory(void) { + size_t overhead = 0; + int slaves = listLength(server.slaves); + + if (slaves) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + client *slave = listNodeValue(ln); + overhead += getClientOutputBufferMemoryUsage(slave); + } + } + if (server.aof_state != AOF_OFF) { + overhead += sdslen(server.aof_buf)+aofRewriteBufferSize(); + } + return overhead; +} + int freeMemoryIfNeeded(void) { size_t mem_reported, mem_used, mem_tofree, mem_freed; - int slaves = listLength(server.slaves); mstime_t latency, eviction_latency; long long delta; + int slaves = listLength(server.slaves); /* Check if we are over the memory usage limit. If we are not, no need * to subtract the slaves output buffers. We can just return ASAP. */ @@ -350,24 +373,8 @@ int freeMemoryIfNeeded(void) { /* Remove the size of slaves output buffers and AOF buffer from the * count of used memory. */ mem_used = mem_reported; - if (slaves) { - listIter li; - listNode *ln; - - listRewind(server.slaves,&li); - while((ln = listNext(&li))) { - client *slave = listNodeValue(ln); - unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave); - if (obuf_bytes > mem_used) - mem_used = 0; - else - mem_used -= obuf_bytes; - } - } - if (server.aof_state != AOF_OFF) { - mem_used -= sdslen(server.aof_buf); - mem_used -= aofRewriteBufferSize(); - } + size_t overhead = freeMemoryGetNotCountedMemory(); + mem_used = (mem_used > overhead) ? mem_used-overhead : 0; /* Check if we are still over the memory limit. */ if (mem_used <= server.maxmemory) return C_OK; @@ -498,6 +505,22 @@ int freeMemoryIfNeeded(void) { * deliver data to the slaves fast enough, so we force the * transmission here inside the loop. */ if (slaves) flushSlavesOutputBuffers(); + + /* Normally our stop condition is the ability to release + * a fixed, pre-computed amount of memory. However when we + * are deleting objects in another thread, it's better to + * check, from time to time, if we already reached our target + * memory, since the "mem_freed" amount is computed only + * across the dbAsyncDelete() call, while the thread can + * release the memory all the time. */ + if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) { + overhead = freeMemoryGetNotCountedMemory(); + mem_used = zmalloc_used_memory(); + mem_used = (mem_used > overhead) ? mem_used-overhead : 0; + if (mem_used <= server.maxmemory) { + mem_freed = mem_tofree; + } + } } if (!keys_freed) { From afd60f3f741a8b551ead4841383ed13a870688b8 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 21 Feb 2017 17:07:28 +0100 Subject: [PATCH 0278/1722] SipHash 2-4 -> SipHash 1-2. For performance reasons we use a reduced rounds variant of SipHash. This should still provide enough protection and the effects in the hash table distribution are non existing. If some real world attack on SipHash 1-2 will be found we can trivially switch to something more secure. Anyway it is a big step forward from Murmurhash, for which it is trivial to generate *seed independent* colliding keys... The speed penatly introduced by SipHash 2-4, around 4%, was a too big price to pay compared to the effectiveness of the HashDoS attack against SipHash 1-2, and considering so far in the Redis history, no such an incident ever happened even while using trivially to collide hash functions. --- src/siphash.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/siphash.c b/src/siphash.c index 7219d4b88..6c41fe6b6 100644 --- a/src/siphash.c +++ b/src/siphash.c @@ -19,18 +19,23 @@ This version was modified by Salvatore Sanfilippo in the following ways: - 1. Hard-code 2-4 rounds in the hope the compiler can optimize it more + 1. We use SipHash 1-2. This is not believed to be as strong as the + suggested 2-4 variant, but AFAIK there are not trivial attacks + against this reduced-rounds version, and it runs at the same speed + as Murmurhash2 that we used previously, why the 2-4 variant slowed + down Redis by a 4% figure more or less. + 2. Hard-code rounds in the hope the compiler can optimize it more in this raw from. Anyway we always want the standard 2-4 variant. - 2. Modify the prototype and implementation so that the function directly + 3. Modify the prototype and implementation so that the function directly returns an uint64_t value, the hash itself, instead of receiving an output buffer. This also means that the output size is set to 8 bytes and the 16 bytes output code handling was removed. - 3. Provide a case insensitive variant to be used when hashing strings that + 4. Provide a case insensitive variant to be used when hashing strings that must be considered identical by the hash table regardless of the case. If we don't have directly a case insensitive hash function, we need to perform a text transformation in some temporary buffer, which is costly. - 4. Remove debugging code. - 5. Modified the original test.c file to be a stand-alone function testing + 5. Remove debugging code. + 6. Modified the original test.c file to be a stand-alone function testing the function in the new form (returing an uint64_t) using just the relevant test vector. */ @@ -131,7 +136,6 @@ uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) { m = U8TO64_LE(in); v3 ^= m; - SIPROUND; SIPROUND; v0 ^= m; @@ -150,14 +154,11 @@ uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) { v3 ^= b; - SIPROUND; SIPROUND; v0 ^= b; v2 ^= 0xff; - SIPROUND; - SIPROUND; SIPROUND; SIPROUND; @@ -195,7 +196,6 @@ uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k) m = U8TO64_LE_NOCASE(in); v3 ^= m; - SIPROUND; SIPROUND; v0 ^= m; @@ -214,14 +214,11 @@ uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k) v3 ^= b; - SIPROUND; SIPROUND; v0 ^= b; v2 ^= 0xff; - SIPROUND; - SIPROUND; SIPROUND; SIPROUND; @@ -308,7 +305,11 @@ const uint8_t vectors_sip64[64][8] = { /* Test siphash using a test vector. Returns 0 if the function passed - * all the tests, otherwise 1 is returned. */ + * all the tests, otherwise 1 is returned. + * + * IMPORTANT: The test vector is for SipHash 2-4. Before running + * the test revert back the siphash() function to 2-4 rounds since + * now it uses 1-2 rounds. */ int siphash_test(void) { uint8_t in[64], k[16]; int i; From d0e1affa70722344a54c8566b96023a1169926f0 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Feb 2017 12:00:09 +0100 Subject: [PATCH 0279/1722] Test: fix conditional execution of HINCRBYFLOAT representation test. --- tests/unit/type/hash.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl index 02560ede2..d2c679d32 100644 --- a/tests/unit/type/hash.tcl +++ b/tests/unit/type/hash.tcl @@ -525,7 +525,7 @@ start_server {tags {"hash"}} { # 1.23 cannot be represented correctly with 64 bit doubles, so we skip # the test, since we are only testing pretty printing here and is not # a bug if the program outputs things like 1.299999... - if {!$::valgrind || ![string match *x86_64* [exec uname -a]]} { + if {!$::valgrind && [string match *x86_64* [exec uname -a]]} { test {Test HINCRBYFLOAT for correct float representation (issue #2846)} { r del myhash assert {[r hincrbyfloat myhash float 1.23] eq {1.23}} From 384ae4032df6ff36ef39fa76216c4f86c7e29eec Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Feb 2017 12:27:01 +0100 Subject: [PATCH 0280/1722] Test: replication-psync, wait more to detect write load. Slow systems like the original Raspberry PI need more time than 5 seconds to start the script and detect writes. After fixing the Raspberry PI can pass the unit without issues. --- tests/integration/replication-psync.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl index 3a41ceb21..da1e9cf5b 100644 --- a/tests/integration/replication-psync.tcl +++ b/tests/integration/replication-psync.tcl @@ -47,7 +47,7 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec # Check that the background clients are actually writing. test {Detect write load to master} { - wait_for_condition 50 100 { + wait_for_condition 50 1000 { [$master dbsize] > 100 } else { fail "Can't detect write load from background clients." From 9df5191d0af53addb6e6679f870e3ab8313d0454 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Feb 2017 13:08:21 +0100 Subject: [PATCH 0281/1722] Solaris fixes about tail usage and atomic vars. Testing with Solaris C compiler (SunOS 5.11 11.2 sun4v sparc sun4v) there were issues compiling due to atomicvar.h and running the tests also failed because of "tail" usage not conform with Solaris tail implementation. This commit fixes both the issues. --- src/atomicvar.h | 2 +- tests/integration/aof.tcl | 6 +++--- tests/integration/rdb.tcl | 4 ++-- tests/unit/aofrw.tcl | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/atomicvar.h b/src/atomicvar.h index 4aa8fa173..c522db3e9 100644 --- a/src/atomicvar.h +++ b/src/atomicvar.h @@ -51,7 +51,7 @@ #ifndef __ATOMIC_VAR_H #define __ATOMIC_VAR_H -#if defined(__ATOMIC_RELAXED) && (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057) +#if defined(__ATOMIC_RELAXED) && !defined(__sun) && (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057) /* Implementation using __atomic macros. */ #define atomicIncr(var,count,mutex) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 832f996e1..e397faeeb 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -88,7 +88,7 @@ tags {"aof"} { set pattern "*Bad file format reading the append only file*" set retry 10 while {$retry} { - set result [exec tail -n1 < [dict get $srv stdout]] + set result [exec tail -1 < [dict get $srv stdout]] if {[string match $pattern $result]} { break } @@ -113,7 +113,7 @@ tags {"aof"} { set pattern "*Unexpected end of file reading the append only file*" set retry 10 while {$retry} { - set result [exec tail -n1 < [dict get $srv stdout]] + set result [exec tail -1 < [dict get $srv stdout]] if {[string match $pattern $result]} { break } @@ -137,7 +137,7 @@ tags {"aof"} { set pattern "*Unexpected end of file reading the append only file*" set retry 10 while {$retry} { - set result [exec tail -n1 < [dict get $srv stdout]] + set result [exec tail -1 < [dict get $srv stdout]] if {[string match $pattern $result]} { break } diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl index 2ed47cc58..66aad4cc7 100644 --- a/tests/integration/rdb.tcl +++ b/tests/integration/rdb.tcl @@ -66,7 +66,7 @@ if {!$isroot} { test {Server should not start if RDB file can't be open} { wait_for_condition 50 100 { [string match {*Fatal error loading*} \ - [exec tail -n1 < [dict get $srv stdout]]] + [exec tail -1 < [dict get $srv stdout]]] } else { fail "Server started even if RDB was unreadable!" } @@ -90,7 +90,7 @@ start_server_and_kill_it [list "dir" $server_path] { test {Server should not start if RDB is corrupted} { wait_for_condition 50 100 { [string match {*CRC error*} \ - [exec tail -n10 < [dict get $srv stdout]]] + [exec tail -10 < [dict get $srv stdout]]] } else { fail "Server started even if RDB was corrupted!" } diff --git a/tests/unit/aofrw.tcl b/tests/unit/aofrw.tcl index c5430eedc..dff7588ff 100644 --- a/tests/unit/aofrw.tcl +++ b/tests/unit/aofrw.tcl @@ -73,7 +73,7 @@ start_server {tags {"aofrw"}} { r config set appendonly no r exec wait_for_condition 50 100 { - [string match {*Killing*AOF*child*} [exec tail -n5 < [srv 0 stdout]]] + [string match {*Killing*AOF*child*} [exec tail -5 < [srv 0 stdout]]] } else { fail "Can't find 'Killing AOF child' into recent logs" } From ecda721a3e647ed4df77fd3cead83ce95754d674 Mon Sep 17 00:00:00 2001 From: oranagra Date: Thu, 23 Feb 2017 03:04:08 -0800 Subject: [PATCH 0282/1722] add SDS_NOINIT option to sdsnewlen to avoid unnecessary memsets. this commit also contains small bugfix in rdbLoadLzfStringObject a bug that currently has no implications. --- src/aof.c | 2 +- src/networking.c | 4 ++-- src/object.c | 4 +++- src/rdb.c | 12 ++++++------ src/sds.c | 7 ++++++- src/sds.h | 1 + 6 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/aof.c b/src/aof.c index f73da1e25..ebe3e3a47 100644 --- a/src/aof.c +++ b/src/aof.c @@ -699,7 +699,7 @@ int loadAppendOnlyFile(char *filename) { } if (buf[0] != '$') goto fmterr; len = strtol(buf+1,NULL,10); - argsds = sdsnewlen(NULL,len); + argsds = sdsnewlen(SDS_NOINIT,len); if (len && fread(argsds,len,1,fp) == 0) { sdsfree(argsds); fakeClient->argc = j; /* Free up to j-1. */ diff --git a/src/networking.c b/src/networking.c index 343a910e2..eba0d3a62 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1234,7 +1234,7 @@ int processMultibulkBuffer(client *c) { sdsIncrLen(c->querybuf,-2); /* remove CRLF */ /* Assume that if we saw a fat argument we'll see another one * likely... */ - c->querybuf = sdsnewlen(NULL,c->bulklen+2); + c->querybuf = sdsnewlen(SDS_NOINIT,c->bulklen+2); sdsclear(c->querybuf); pos = 0; } else { @@ -1477,7 +1477,7 @@ sds getAllClientsInfoString(void) { listNode *ln; listIter li; client *client; - sds o = sdsnewlen(NULL,200*listLength(server.clients)); + sds o = sdsnewlen(SDS_NOINIT,200*listLength(server.clients)); sdsclear(o); listRewind(server.clients,&li); while ((ln = listNext(&li)) != NULL) { diff --git a/src/object.c b/src/object.c index 08c9ad956..741466dd3 100644 --- a/src/object.c +++ b/src/object.c @@ -98,7 +98,9 @@ robj *createEmbeddedStringObject(const char *ptr, size_t len) { sh->len = len; sh->alloc = len; sh->flags = SDS_TYPE_8; - if (ptr) { + if (ptr == SDS_NOINIT) + sh->buf[len] = '\0'; + else if (ptr) { memcpy(sh->buf,ptr,len); sh->buf[len] = '\0'; } else { diff --git a/src/rdb.c b/src/rdb.c index 2689b172d..eb37a82f6 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -254,7 +254,7 @@ void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) { char buf[LONG_STR_SIZE], *p; int len = ll2string(buf,sizeof(buf),val); if (lenptr) *lenptr = len; - p = plain ? zmalloc(len) : sdsnewlen(NULL,len); + p = plain ? zmalloc(len) : sdsnewlen(SDS_NOINIT,len); memcpy(p,buf,len); return p; } else if (encode) { @@ -343,10 +343,10 @@ void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) { /* Allocate our target according to the uncompressed size. */ if (plain) { val = zmalloc(len); - if (lenptr) *lenptr = len; } else { - val = sdsnewlen(NULL,len); + val = sdsnewlen(SDS_NOINIT,len); } + if (lenptr) *lenptr = len; /* Load the compressed representation and uncompress it to target. */ if (rioRead(rdb,c,clen) == 0) goto err; @@ -471,7 +471,7 @@ void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) { if (len == RDB_LENERR) return NULL; if (plain || sds) { - void *buf = plain ? zmalloc(len) : sdsnewlen(NULL,len); + void *buf = plain ? zmalloc(len) : sdsnewlen(SDS_NOINIT,len); if (lenptr) *lenptr = len; if (len && rioRead(rdb,buf,len) == 0) { if (plain) @@ -482,8 +482,8 @@ void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) { } return buf; } else { - robj *o = encode ? createStringObject(NULL,len) : - createRawStringObject(NULL,len); + robj *o = encode ? createStringObject(SDS_NOINIT,len) : + createRawStringObject(SDS_NOINIT,len); if (len && rioRead(rdb,o->ptr,len) == 0) { decrRefCount(o); return NULL; diff --git a/src/sds.c b/src/sds.c index eafa13c29..c25fd55b5 100644 --- a/src/sds.c +++ b/src/sds.c @@ -39,6 +39,8 @@ #include "sds.h" #include "sdsalloc.h" +const char *SDS_NOINIT = "SDS_NOINIT"; + static inline int sdsHdrSize(char type) { switch(type&SDS_TYPE_MASK) { case SDS_TYPE_5: @@ -72,6 +74,7 @@ static inline char sdsReqType(size_t string_size) { /* Create a new sds string with the content specified by the 'init' pointer * and 'initlen'. * If NULL is used for 'init' the string is initialized with zero bytes. + * If SDS_NOINIT is used, the buffer is left uninitialized; * * The string is always null-termined (all the sds strings are, always) so * even if you create an sds string with: @@ -92,7 +95,9 @@ sds sdsnewlen(const void *init, size_t initlen) { unsigned char *fp; /* flags pointer. */ sh = s_malloc(hdrlen+initlen+1); - if (!init) + if (init==SDS_NOINIT) + init = NULL; + else if (!init) memset(sh, 0, hdrlen+initlen+1); if (sh == NULL) return NULL; s = (char*)sh+hdrlen; diff --git a/src/sds.h b/src/sds.h index 394f8b52e..16e85ce04 100644 --- a/src/sds.h +++ b/src/sds.h @@ -34,6 +34,7 @@ #define __SDS_H #define SDS_MAX_PREALLOC (1024*1024) +const char *SDS_NOINIT; #include #include From 8f9e82e868ddf45e9e84a499334008f34da5767c Mon Sep 17 00:00:00 2001 From: oranagra Date: Thu, 23 Feb 2017 03:44:42 -0800 Subject: [PATCH 0283/1722] when a slave experiances an error on commands that come from master, print to the log since slave isn't replying to it's master, these errors go unnoticed. since we don't expect the master to send garbadge to the slave, this should be safe. (as long as we don't log OOM errors there) --- src/networking.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/networking.c b/src/networking.c index 343a910e2..1ef27815a 100644 --- a/src/networking.c +++ b/src/networking.c @@ -371,6 +371,8 @@ void addReplyErrorLength(client *c, const char *s, size_t len) { addReplyString(c,"-ERR ",5); addReplyString(c,s,len); addReplyString(c,"\r\n",2); + if (c->flags & CLIENT_MASTER) + serverLog(LL_WARNING,"Error sent to master: %s", s); } void addReplyError(client *c, const char *err) { From 08007ed4422cfc1ed5375646fcbccc405c1b7c2c Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Thu, 23 Feb 2017 22:38:44 +0800 Subject: [PATCH 0284/1722] Fix BITPOS unaligned memory access. --- src/bitops.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/bitops.c b/src/bitops.c index 7ab72633c..d5b96ce1e 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -104,6 +104,7 @@ long redisBitpos(void *s, unsigned long count, int bit) { unsigned long skipval, word = 0, one; long pos = 0; /* Position of bit, to return to the caller. */ unsigned long j; + int found; /* Process whole words first, seeking for first word that is not * all ones or all zeros respectively if we are lookig for zeros @@ -117,21 +118,27 @@ long redisBitpos(void *s, unsigned long count, int bit) { /* Skip initial bits not aligned to sizeof(unsigned long) byte by byte. */ skipval = bit ? 0 : UCHAR_MAX; c = (unsigned char*) s; + found = 0; while((unsigned long)c & (sizeof(*l)-1) && count) { - if (*c != skipval) break; + if (*c != skipval) { + found = 1; + break; + } c++; count--; pos += 8; } /* Skip bits with full word step. */ - skipval = bit ? 0 : ULONG_MAX; l = (unsigned long*) c; - while (count >= sizeof(*l)) { - if (*l != skipval) break; - l++; - count -= sizeof(*l); - pos += sizeof(*l)*8; + if (!found) { + skipval = bit ? 0 : ULONG_MAX; + while (count >= sizeof(*l)) { + if (*l != skipval) break; + l++; + count -= sizeof(*l); + pos += sizeof(*l)*8; + } } /* Load bytes into "word" considering the first byte as the most significant From 7a4c9474f858d8c1a10302a89c46befe1390e7af Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Thu, 23 Feb 2017 22:39:44 +0800 Subject: [PATCH 0285/1722] Use ARM unaligned accesses ifdefs for SPARC as well. --- src/bitops.c | 2 +- src/config.h | 9 +++++++++ src/hyperloglog.c | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/bitops.c b/src/bitops.c index d5b96ce1e..43450fca3 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -665,7 +665,7 @@ void bitopCommand(client *c) { * result in GCC compiling the code using multiple-words load/store * operations that are not supported even in ARM >= v6. */ j = 0; - #ifndef __arm__ + #ifndef USE_ALIGNED_ACCESS if (minlen >= sizeof(unsigned long)*4 && numkeys <= 16) { unsigned long *lp[16]; unsigned long *lres = (unsigned long*) res; diff --git a/src/config.h b/src/config.h index 1005dcc2a..c23f1c789 100644 --- a/src/config.h +++ b/src/config.h @@ -215,4 +215,13 @@ void setproctitle(const char *fmt, ...); #define __arm64__ #endif +/* Make sure we can test for SPARC just checking for __sparc__. */ +#if defined(__sparc) && !defined(__sparc__) +#define __sparc__ +#endif + +#if defined(__sparc__) || defined(__arm__) +#define USE_ALIGNED_ACCESS +#endif + #endif diff --git a/src/hyperloglog.c b/src/hyperloglog.c index b8a63a73f..49516f824 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -401,7 +401,7 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) { uint64_t k; #if (BYTE_ORDER == LITTLE_ENDIAN) - #if defined(__arm__) && !defined(__arm64__) + #ifdef USE_ALIGNED_ACCESS memcpy(&k,data,sizeof(uint64_t)); #else k = *((uint64_t*)data); From 1f0dae3c7fcffe6cd244e4802bd912f749f173e2 Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Fri, 24 Feb 2017 00:00:13 +0800 Subject: [PATCH 0286/1722] Makefile: fix building with Solaris C compiler, 64 bit. --- src/Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Makefile b/src/Makefile index 8cf6ba6f8..5d2b2b395 100644 --- a/src/Makefile +++ b/src/Makefile @@ -65,6 +65,13 @@ DEBUG=-g -ggdb ifeq ($(uname_S),SunOS) # SunOS + ifneq ($(@@),32bit) + CFLAGS+= -m64 + LDFLAGS+= -m64 + endif + DEBUG=-g + DEBUG_FLAGS=-g + export CFLAGS LDFLAGS DEBUG DEBUG_FLAGS INSTALL=cp -pf FINAL_CFLAGS+= -D__EXTENSIONS__ -D_XPG6 FINAL_LIBS+= -ldl -lnsl -lsocket -lresolv -lpthread -lrt From c40322945a91575c708e6ffc1777daa133c11363 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Wed, 1 Mar 2017 16:51:01 +0200 Subject: [PATCH 0287/1722] fixed free of blocked client before refering to it --- src/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index 3b90eae4a..db3c8dcef 100644 --- a/src/module.c +++ b/src/module.c @@ -3217,8 +3217,8 @@ void moduleHandleBlockedClients(void) { } if (bc->privdata && bc->free_privdata) bc->free_privdata(bc->privdata); - zfree(bc); if (c != NULL) unblockClient(c); + zfree(bc); /* Lock again before to iterate the loop. */ pthread_mutex_lock(&moduleUnblockedClientsMutex); From a04ba58d9ad52e66b70519519141a173ab161931 Mon Sep 17 00:00:00 2001 From: itamar Date: Mon, 6 Mar 2017 14:37:10 +0200 Subject: [PATCH 0288/1722] Sets up fake client to select current db in RM_Call() --- src/module.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/module.c b/src/module.c index 3b90eae4a..0ae514be9 100644 --- a/src/module.c +++ b/src/module.c @@ -2460,6 +2460,7 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch /* Setup our fake client for command execution. */ c->flags |= CLIENT_MODULE; + c->db = ctx->client->db; c->argv = argv; c->argc = argc; c->cmd = c->lastcmd = cmd; From bcb1240ccf05e5bfd813dcc29c38a20125f3c005 Mon Sep 17 00:00:00 2001 From: vienna Date: Tue, 7 Mar 2017 16:14:05 +0000 Subject: [PATCH 0289/1722] fix #3847: add close socket before return ANET_ERR. --- src/anet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/anet.c b/src/anet.c index ef1711d06..993615601 100644 --- a/src/anet.c +++ b/src/anet.c @@ -380,8 +380,10 @@ int anetUnixGenericConnect(char *err, char *path, int flags) sa.sun_family = AF_LOCAL; strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1); if (flags & ANET_CONNECT_NONBLOCK) { - if (anetNonBlock(err,s) != ANET_OK) + if (anetNonBlock(err,s) != ANET_OK) { + close(s); return ANET_ERR; + } } if (connect(s,(struct sockaddr*)&sa,sizeof(sa)) == -1) { if (errno == EINPROGRESS && From 01f56d44dc602482bbd5a2dcfd9698c16b530b0d Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 9 Mar 2017 13:49:36 +0100 Subject: [PATCH 0290/1722] Use sha256 instead of sha1 to generate tarball hashes. --- utils/releasetools/04_release_hash.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/releasetools/04_release_hash.sh b/utils/releasetools/04_release_hash.sh index df082149a..9d5c6ad4b 100755 --- a/utils/releasetools/04_release_hash.sh +++ b/utils/releasetools/04_release_hash.sh @@ -1,6 +1,6 @@ #!/bin/bash -SHA=$(curl -s http://download.redis.io/releases/redis-${1}.tar.gz | shasum | cut -f 1 -d' ') -ENTRY="hash redis-${1}.tar.gz sha1 $SHA http://download.redis.io/releases/redis-${1}.tar.gz" +SHA=$(curl -s http://download.redis.io/releases/redis-${1}.tar.gz | shasum -a 256 | cut -f 1 -d' ') +ENTRY="hash redis-${1}.tar.gz sha256 $SHA http://download.redis.io/releases/redis-${1}.tar.gz" echo $ENTRY >> ~/hack/redis-hashes/README vi ~/hack/redis-hashes/README echo "Press any key to commit, Ctrl-C to abort)." From 499595f510e77ed147e50cc5acc3eac5e98d7048 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Wed, 15 Mar 2017 01:05:15 -0700 Subject: [PATCH 0291/1722] add LFU policies to the test suite, just for coverage --- tests/unit/maxmemory.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl index e6bf7860c..0c3f6b32c 100644 --- a/tests/unit/maxmemory.tcl +++ b/tests/unit/maxmemory.tcl @@ -24,7 +24,7 @@ start_server {tags {"maxmemory"}} { } foreach policy { - allkeys-random allkeys-lru volatile-lru volatile-random volatile-ttl + allkeys-random allkeys-lru allkeys-lfu volatile-lru volatile-lfu volatile-random volatile-ttl } { test "maxmemory - is the memory limit honoured? (policy $policy)" { # make sure to start with a blank instance @@ -98,7 +98,7 @@ start_server {tags {"maxmemory"}} { } foreach policy { - volatile-lru volatile-random volatile-ttl + volatile-lru volatile-lfu volatile-random volatile-ttl } { test "maxmemory - policy $policy should only remove volatile keys." { # make sure to start with a blank instance From de52b6375bb2bad39645abd3e4095f0fe4f40f41 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 27 Mar 2017 15:26:56 +0200 Subject: [PATCH 0292/1722] Cluster: hash slots tracking using a radix tree. --- src/Makefile | 2 +- src/cluster.c | 6 +- src/cluster.h | 3 +- src/db.c | 99 ++- src/lazyfree.c | 17 +- src/rax.c | 1845 ++++++++++++++++++++++++++++++++++++++++++++++ src/rax.h | 158 ++++ src/rax_malloc.h | 44 ++ src/server.h | 4 +- 9 files changed, 2114 insertions(+), 64 deletions(-) create mode 100644 src/rax.c create mode 100644 src/rax.h create mode 100644 src/rax_malloc.h diff --git a/src/Makefile b/src/Makefile index 5d2b2b395..fec6573c2 100644 --- a/src/Makefile +++ b/src/Makefile @@ -139,7 +139,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o REDIS_BENCHMARK_NAME=redis-benchmark diff --git a/src/cluster.c b/src/cluster.c index 4d7b0502d..1a38a8e47 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -476,8 +476,10 @@ void clusterInit(void) { } } - /* The slots -> keys map is a sorted set. Init it. */ - server.cluster->slots_to_keys = zslCreate(); + /* The slots -> keys map is a radix tree. Initialize it here. */ + server.cluster->slots_to_keys = raxNew(); + memset(server.cluster->slots_keys_count,0, + sizeof(server.cluster->slots_keys_count)); /* Set myself->port / cport to my listening ports, we'll just need to * discover the IP address via MEET messages. */ diff --git a/src/cluster.h b/src/cluster.h index 6dd69a01b..be6fe0bdc 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -116,7 +116,8 @@ typedef struct clusterState { clusterNode *migrating_slots_to[CLUSTER_SLOTS]; clusterNode *importing_slots_from[CLUSTER_SLOTS]; clusterNode *slots[CLUSTER_SLOTS]; - zskiplist *slots_to_keys; + uint64_t slots_keys_count[CLUSTER_SLOTS]; + rax *slots_to_keys; /* The following fields are used to take the slave state on elections. */ mstime_t failover_auth_time; /* Time of previous or next election. */ int failover_auth_count; /* Number of votes received so far. */ diff --git a/src/db.c b/src/db.c index a21437c76..ee7398abb 100644 --- a/src/db.c +++ b/src/db.c @@ -1301,90 +1301,85 @@ int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkey /* Slot to Key API. This is used by Redis Cluster in order to obtain in * a fast way a key that belongs to a specified hash slot. This is useful - * while rehashing the cluster. */ -void slotToKeyAdd(robj *key) { + * while rehashing the cluster and in other conditions when we need to + * understand if we have keys for a given hash slot. */ +void slotToKeyUpdateKey(robj *key, int add) { unsigned int hashslot = keyHashSlot(key->ptr,sdslen(key->ptr)); + unsigned char buf[64]; + unsigned char *indexed = buf; + size_t keylen = sdslen(key->ptr); - sds sdskey = sdsdup(key->ptr); - zslInsert(server.cluster->slots_to_keys,hashslot,sdskey); + server.cluster->slots_keys_count[hashslot] += add ? 1 : -1; + if (keylen+2 > 64) indexed = zmalloc(keylen+2); + indexed[0] = (hashslot >> 8) & 0xff; + indexed[1] = hashslot & 0xff; + memcpy(indexed+2,key->ptr,keylen); + if (add) { + raxInsert(server.cluster->slots_to_keys,indexed,keylen+2,NULL); + } else { + raxRemove(server.cluster->slots_to_keys,indexed,keylen+2); + } + if (indexed != buf) zfree(indexed); +} + +void slotToKeyAdd(robj *key) { + slotToKeyUpdateKey(key,1); } void slotToKeyDel(robj *key) { - unsigned int hashslot = keyHashSlot(key->ptr,sdslen(key->ptr)); - zslDelete(server.cluster->slots_to_keys,hashslot,key->ptr,NULL); + slotToKeyUpdateKey(key,0); } void slotToKeyFlush(void) { - zslFree(server.cluster->slots_to_keys); - server.cluster->slots_to_keys = zslCreate(); + raxFree(server.cluster->slots_to_keys); + server.cluster->slots_to_keys = raxNew(); + memset(server.cluster->slots_keys_count,0, + sizeof(server.cluster->slots_keys_count)); } /* Pupulate the specified array of objects with keys in the specified slot. * New objects are returned to represent keys, it's up to the caller to * decrement the reference count to release the keys names. */ unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count) { - zskiplistNode *n; - zrangespec range; + raxIterator iter; int j = 0; + unsigned char indexed[2]; - range.min = range.max = hashslot; - range.minex = range.maxex = 0; - - n = zslFirstInRange(server.cluster->slots_to_keys, &range); - while(n && n->score == hashslot && count--) { - keys[j++] = createStringObject(n->ele,sdslen(n->ele)); - n = n->level[0].forward; + indexed[0] = (hashslot >> 8) & 0xff; + indexed[1] = hashslot & 0xff; + raxStart(&iter,server.cluster->slots_to_keys); + raxSeek(&iter,indexed,2,">="); + while(count-- && raxNext(&iter,NULL,0,NULL)) { + if (iter.key[0] != indexed[0] || iter.key[1] != indexed[1]) break; + keys[j++] = createStringObject((char*)iter.key+2,iter.key_len-2); } + raxStop(&iter); return j; } /* Remove all the keys in the specified hash slot. * The number of removed items is returned. */ unsigned int delKeysInSlot(unsigned int hashslot) { - zskiplistNode *n; - zrangespec range; + raxIterator iter; int j = 0; + unsigned char indexed[2]; - range.min = range.max = hashslot; - range.minex = range.maxex = 0; + indexed[0] = (hashslot >> 8) & 0xff; + indexed[1] = hashslot & 0xff; + raxStart(&iter,server.cluster->slots_to_keys); + while(server.cluster->slots_keys_count[hashslot]) { + raxSeek(&iter,indexed,2,">="); + raxNext(&iter,NULL,0,NULL); - n = zslFirstInRange(server.cluster->slots_to_keys, &range); - while(n && n->score == hashslot) { - sds sdskey = n->ele; - robj *key = createStringObject(sdskey,sdslen(sdskey)); - n = n->level[0].forward; /* Go to the next item before freeing it. */ + robj *key = createStringObject((char*)iter.key+2,iter.key_len-2); dbDelete(&server.db[0],key); decrRefCount(key); j++; } + raxStop(&iter); return j; } unsigned int countKeysInSlot(unsigned int hashslot) { - zskiplist *zsl = server.cluster->slots_to_keys; - zskiplistNode *zn; - zrangespec range; - int rank, count = 0; - - range.min = range.max = hashslot; - range.minex = range.maxex = 0; - - /* Find first element in range */ - zn = zslFirstInRange(zsl, &range); - - /* Use rank of first element, if any, to determine preliminary count */ - if (zn != NULL) { - rank = zslGetRank(zsl, zn->score, zn->ele); - count = (zsl->length - (rank - 1)); - - /* Find last element in range */ - zn = zslLastInRange(zsl, &range); - - /* Use rank of last element, if any, to determine the actual count */ - if (zn != NULL) { - rank = zslGetRank(zsl, zn->score, zn->ele); - count -= (zsl->length - rank); - } - } - return count; + return server.cluster->slots_keys_count[hashslot]; } diff --git a/src/lazyfree.c b/src/lazyfree.c index c05252159..8d56e1031 100644 --- a/src/lazyfree.c +++ b/src/lazyfree.c @@ -97,11 +97,14 @@ void emptyDbAsync(redisDb *db) { /* Empty the slots-keys map of Redis CLuster by creating a new empty one * and scheduiling the old for lazy freeing. */ void slotToKeyFlushAsync(void) { - zskiplist *oldsl = server.cluster->slots_to_keys; - server.cluster->slots_to_keys = zslCreate(); - atomicIncr(lazyfree_objects,oldsl->length, + rax *old = server.cluster->slots_to_keys; + + server.cluster->slots_to_keys = raxNew(); + memset(server.cluster->slots_keys_count,0, + sizeof(server.cluster->slots_keys_count)); + atomicIncr(lazyfree_objects,old->numele, lazyfree_objects_mutex); - bioCreateBackgroundJob(BIO_LAZY_FREE,NULL,NULL,oldsl); + bioCreateBackgroundJob(BIO_LAZY_FREE,NULL,NULL,old); } /* Release objects from the lazyfree thread. It's just decrRefCount() @@ -125,8 +128,8 @@ void lazyfreeFreeDatabaseFromBioThread(dict *ht1, dict *ht2) { /* Release the skiplist mapping Redis Cluster keys to slots in the * lazyfree thread. */ -void lazyfreeFreeSlotsMapFromBioThread(zskiplist *sl) { - size_t len = sl->length; - zslFree(sl); +void lazyfreeFreeSlotsMapFromBioThread(rax *rt) { + size_t len = rt->numele; + raxFree(rt); atomicDecr(lazyfree_objects,len,lazyfree_objects_mutex); } diff --git a/src/rax.c b/src/rax.c new file mode 100644 index 000000000..ca49787ed --- /dev/null +++ b/src/rax.c @@ -0,0 +1,1845 @@ +/* Rax -- A radix tree implementation. + * + * Copyright (c) 2017, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include "rax.h" +#include "rax_malloc.h" + +/* This is a special pointer that is guaranteed to never have the same value + * of a radix tree node. It's used in order to report "not found" error without + * requiring the function to have multiple return values. */ +void *raxNotFound = (void*)"rax-not-found-pointer"; + +/* -------------------------------- Debugging ------------------------------ */ + +void raxDebugShowNode(const char *msg, raxNode *n); + +/* Turn debugging messages on/off. */ +#if 0 +#define debugf(...) \ + do { \ + printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \ + printf(__VA_ARGS__); \ + fflush(stdout); \ + } while (0); + +#define debugnode(msg,n) raxDebugShowNode(msg,n) +#else +#define debugf(...) +#define debugnode(msg,n) +#endif + +/* ------------------------- raxStack functions -------------------------- + * The raxStack is a simple stack of pointers that is capable of switching + * from using a stack-allocated array to dynamic heap once a given number of + * items are reached. It is used in order to retain the list of parent nodes + * while walking the radix tree in order to implement certain operations that + * need to navigate the tree upward. + * ------------------------------------------------------------------------- */ + +/* Initialize the stack. */ +static inline void raxStackInit(raxStack *ts) { + ts->stack = ts->static_items; + ts->items = 0; + ts->maxitems = RAX_STACK_STATIC_ITEMS; + ts->oom = 0; +} + +/* Push an item into the stack, returns 1 on success, 0 on out of memory. */ +static inline int raxStackPush(raxStack *ts, void *ptr) { + if (ts->items == ts->maxitems) { + if (ts->stack == ts->static_items) { + ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2); + if (ts->stack == NULL) { + ts->stack = ts->static_items; + ts->oom = 1; + return 0; + } + memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems); + } else { + void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2); + if (newalloc == NULL) { + ts->oom = 1; + return 0; + } + ts->stack = newalloc; + } + ts->maxitems *= 2; + } + ts->stack[ts->items] = ptr; + ts->items++; + return 1; +} + +/* Pop an item from the stack, the function returns NULL if there are no + * items to pop. */ +static inline void *raxStackPop(raxStack *ts) { + if (ts->items == 0) return NULL; + ts->items--; + return ts->stack[ts->items]; +} + +/* Return the stack item at the top of the stack without actually consuming + * it. */ +static inline void *raxStackPeek(raxStack *ts) { + if (ts->items == 0) return NULL; + return ts->stack[ts->items-1]; +} + +/* Free the stack in case we used heap allocation. */ +static inline void raxStackFree(raxStack *ts) { + if (ts->stack != ts->static_items) rax_free(ts->stack); +} + +/* ---------------------------------------------------------------------------- + * Radis tree implementation + * --------------------------------------------------------------------------*/ + +/* Allocate a new non compressed node with the specified number of children. + * If datafiled is true, the allocation is made large enough to hold the + * associated data pointer. + * Returns the new node pointer. On out of memory NULL is returned. */ +raxNode *raxNewNode(size_t children, int datafield) { + size_t nodesize = sizeof(raxNode)+children+ + sizeof(raxNode*)*children; + if (datafield) nodesize += sizeof(void*); + raxNode *node = rax_malloc(nodesize); + if (node == NULL) return NULL; + node->iskey = 0; + node->isnull = 0; + node->iscompr = 0; + node->size = children; + return node; +} + +/* Allocate a new rax and return its pointer. On out of memory the function + * returns NULL. */ +rax *raxNew(void) { + rax *rax = rax_malloc(sizeof(*rax)); + if (rax == NULL) return NULL; + rax->numele = 0; + rax->numnodes = 1; + rax->head = raxNewNode(0,0); + if (rax->head == NULL) { + rax_free(rax); + return NULL; + } else { + return rax; + } +} + +/* Return the current total size of the node. */ +#define raxNodeCurrentLength(n) ( \ + sizeof(raxNode)+(n)->size+ \ + ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \ + (((n)->iskey && !(n)->isnull)*sizeof(void*)) \ +) + +/* realloc the node to make room for auxiliary data in order + * to store an item in that node. On out of memory NULL is returned. */ +raxNode *raxReallocForData(raxNode *n, void *data) { + if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */ + size_t curlen = raxNodeCurrentLength(n); + return rax_realloc(n,curlen+sizeof(void*)); +} + +/* Set the node auxiliary data to the specified pointer. */ +void raxSetData(raxNode *n, void *data) { + n->iskey = 1; + if (data != NULL) { + void **ndata = (void**) + ((char*)n+raxNodeCurrentLength(n)-sizeof(void*)); + memcpy(ndata,&data,sizeof(data)); + n->isnull = 0; + } else { + n->isnull = 1; + } +} + +/* Get the node auxiliary data. */ +void *raxGetData(raxNode *n) { + if (n->isnull) return NULL; + void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*)); + void *data; + memcpy(&data,ndata,sizeof(data)); + return data; +} + +/* Add a new child to the node 'n' representing the character 'c' and return + * its new pointer, as well as the child pointer by reference. Additionally + * '***parentlink' is populated with the raxNode pointer-to-pointer of where + * the new child was stored, which is useful for the caller to replace the + * child pointer if it gets reallocated. + * + * On success the new parent node pointer is returned (it may change because + * of the realloc, so the caller should discard 'n' and use the new value). + * On out of memory NULL is returned, and the old node is still valid. */ +raxNode *raxAddChild(raxNode *n, char c, raxNode **childptr, raxNode ***parentlink) { + assert(n->iscompr == 0); + + size_t curlen = sizeof(raxNode)+ + n->size+ + sizeof(raxNode*)*n->size; + size_t newlen; + + /* Alloc the new child we will link to 'n'. */ + raxNode *child = raxNewNode(0,0); + if (child == NULL) return NULL; + + /* Make space in the original node. */ + if (n->iskey) curlen += sizeof(void*); + newlen = curlen+sizeof(raxNode*)+1; /* Add 1 char and 1 pointer. */ + raxNode *newn = rax_realloc(n,newlen); + if (newn == NULL) { + rax_free(child); + return NULL; + } + n = newn; + + /* After the reallocation, we have 5/9 (depending on the system + * pointer size) bytes at the end, that is, the additional char + * in the 'data' section, plus one pointer to the new child: + * + * [numc][abx][ap][bp][xp]|auxp|..... + * + * Let's find where to insert the new child in order to make sure + * it is inserted in-place lexicographically. */ + int pos; + for (pos = 0; pos < n->size; pos++) { + if (n->data[pos] > c) break; + } + + /* Now, if present, move auxiliary data pointer at the end + * so that we can mess with the other data without overwriting it. + * We will obtain something like that: + * + * [numc][abx][ap][bp][xp].....|auxp| */ + unsigned char *src; + if (n->iskey && !n->isnull) { + src = n->data+n->size+sizeof(raxNode*)*n->size; + memmove(src+1+sizeof(raxNode*),src,sizeof(void*)); + } + + /* Now imagine we are adding a node with edge 'c'. The insertion + * point is between 'b' and 'x', so the 'pos' variable value is + * To start, move all the child pointers after the insertion point + * of 1+sizeof(pointer) bytes on the right, to obtain: + * + * [numc][abx][ap][bp].....[xp]|auxp| */ + src = n->data+n->size+sizeof(raxNode*)*pos; + memmove(src+1+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos)); + + /* Now make the space for the additional char in the data section, + * but also move the pointers before the insertion point in the right + * by 1 byte, in order to obtain the following: + * + * [numc][ab.x][ap][bp]....[xp]|auxp| */ + src = n->data+pos; + memmove(src+1,src,n->size-pos+sizeof(raxNode*)*pos); + + /* We can now set the character and its child node pointer to get: + * + * [numc][abcx][ap][bp][cp]....|auxp| + * [numc][abcx][ap][bp][cp][xp]|auxp| */ + n->data[pos] = c; + n->size++; + raxNode **childfield = (raxNode**)(n->data+n->size+sizeof(raxNode*)*pos); + memcpy(childfield,&child,sizeof(child)); + *childptr = child; + *parentlink = childfield; + return n; +} + +/* Return the pointer to the last child pointer in a node. For the compressed + * nodes this is the only child pointer. */ +#define raxNodeLastChildPtr(n) ((raxNode**) ( \ + ((char*)(n)) + \ + raxNodeCurrentLength(n) - \ + sizeof(raxNode*) - \ + (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \ +)) + +/* Return the pointer to the first child pointer. */ +#define raxNodeFirstChildPtr(n) ((raxNode**)((n)->data+(n)->size)) + +/* Turn the node 'n', that must be a node without any children, into a + * compressed node representing a set of nodes linked one after the other + * and having exactly one child each. The node can be a key or not: this + * property and the associated value if any will be preserved. + * + * The function also returns a child node, since the last node of the + * compressed chain cannot be part of the chain: it has zero children while + * we can only compress inner nodes with exactly one child each. */ +raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) { + assert(n->size == 0 && n->iscompr == 0); + void *data = NULL; /* Initialized only to avoid warnings. */ + size_t newsize; + + debugf("Compress node: %.*s\n", (int)len,s); + + /* Allocate the child to link to this node. */ + *child = raxNewNode(0,0); + if (*child == NULL) return NULL; + + /* Make space in the parent node. */ + newsize = sizeof(raxNode)+len+sizeof(raxNode*); + if (n->iskey) { + data = raxGetData(n); /* To restore it later. */ + if (!n->isnull) newsize += sizeof(void*); + } + raxNode *newn = rax_realloc(n,newsize); + if (newn == NULL) { + rax_free(*child); + return NULL; + } + n = newn; + + n->iscompr = 1; + n->size = len; + memcpy(n->data,s,len); + if (n->iskey) raxSetData(n,data); + raxNode **childfield = raxNodeLastChildPtr(n); + memcpy(childfield,&child,sizeof(child)); + return n; +} + +/* Low level function that walks the tree looking for the string + * 's' of 'len' bytes. The function returns the number of characters + * of the key that was possible to process: if the returned integer + * is the same as 'len', then it means that the node corresponding to the + * string was found (however it may not be a key in case the node->iskey is + * zero or if simply we stopped in the middle of a compressed node, so that + * 'splitpos' is non zero). + * + * Otherwise if the returned integer is not the same as 'len', there was an + * early stop during the tree walk because of a character mismatch. + * + * The node where the search ended (because the full string was processed + * or because there was an early stop) is returned by reference as + * '*stopnode' if the passed pointer is not NULL. This node link in the + * parent's node is returned as '*plink' if not NULL. Finally, if the + * search stopped in a compressed node, '*splitpos' returns the index + * inside the compressed node where the search ended. This is useful to + * know where to split the node for insertion. */ +static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) { + raxNode *h = rax->head; + raxNode **parentlink = &rax->head; + + size_t i = 0; /* Position in the string. */ + size_t j = 0; /* Position in the node children (or bytes if compressed).*/ + while(h->size && i < len) { + debugnode("Lookup current node",h); + unsigned char *v = h->data; + + if (h->iscompr) { + for (j = 0; j < h->size && i < len; j++, i++) { + if (v[j] != s[i]) break; + } + if (j != h->size) break; + } else { + /* Even when h->size is large, linear scan provides good + * performances compared to other approaches that are in theory + * more sounding, like performing a binary search. */ + for (j = 0; j < h->size; j++) { + if (v[j] == s[i]) break; + } + if (j == h->size) break; + i++; + } + + if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */ + raxNode **children = raxNodeFirstChildPtr(h); + if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */ + memcpy(&h,children+j,sizeof(h)); + parentlink = children+j; + j = 0; /* If the new node is compressed and we do not + iterate again (since i == l) set the split + position to 0 to signal this node represents + the searched key. */ + } + if (stopnode) *stopnode = h; + if (plink) *plink = parentlink; + if (splitpos && h->iscompr) *splitpos = j; + return i; +} + +/* Insert the element 's' of size 'len', setting as auxiliary data + * the pointer 'data'. If the element is already present, the associated + * data is updated, and 0 is returned, otherwise the element is inserted + * and 1 is returned. On out of memory the function returns 0 as well but + * sets errno to ENOMEM, otherwise errno will be set to 0. */ +int raxInsert(rax *rax, unsigned char *s, size_t len, void *data) { + size_t i; + int j = 0; /* Split position. If raxLowWalk() stops in a compressed + node, the index 'j' represents the char we stopped within the + compressed node, that is, the position where to split the + node for insertion. */ + raxNode *h, **parentlink; + + debugf("### Insert %.*s with value %p\n", (int)len, s, data); + i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL); + + /* If i == len we walked following the whole string. If we are not + * in the middle of a compressed node, the string is either already + * inserted or this middle node is currently not a key, but can represent + * our key. We have just to reallocate the node and make space for the + * data pointer. */ + if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) { + if (h->iskey) { + raxSetData(h,data); + errno = 0; + return 0; /* Element already exists. */ + } + h = raxReallocForData(h,data); + if (h == NULL) { + errno = ENOMEM; + return 0; + } + memcpy(parentlink,&h,sizeof(h)); + raxSetData(h,data); + rax->numele++; + return 1; /* Element inserted. */ + } + + /* If the node we stopped at is a compressed node, we need to + * split it before to continue. + * + * Splitting a compressed node have a few possibile cases. + * Imagine that the node 'h' we are currently at is a compressed + * node contaning the string "ANNIBALE" (it means that it represents + * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child + * pointer of this node pointing at the 'E' node, because remember that + * we have characters at the edges of the graph, not inside the nodes + * themselves. + * + * In order to show a real case imagine our node to also point to + * another compressed node, that finally points at the node without + * children, representing 'O': + * + * "ANNIBALE" -> "SCO" -> [] + * + * When inserting we may face the following cases. Note that all the cases + * require the insertion of a non compressed node with exactly two + * children, except for the last case which just requires splitting a + * compressed node. + * + * 1) Inserting "ANNIENTARE" + * + * |B| -> "ALE" -> "SCO" -> [] + * "ANNI" -> |-| + * |E| -> (... continue algo ...) "NTARE" -> [] + * + * 2) Inserting "ANNIBALI" + * + * |E| -> "SCO" -> [] + * "ANNIBAL" -> |-| + * |I| -> (... continue algo ...) [] + * + * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node) + * + * |N| -> "NIBALE" -> "SCO" -> [] + * |A| -> |-| + * |G| -> (... continue algo ...) |O| -> [] + * + * 4) Inserting "CIAO" + * + * |A| -> "NNIBALE" -> "SCO" -> [] + * |-| + * |C| -> (... continue algo ...) "IAO" -> [] + * + * 5) Inserting "ANNI" + * + * "ANNI" -> "BALE" -> "SCO" -> [] + * + * The final algorithm for insertion covering all the above cases is as + * follows. + * + * ============================= ALGO 1 ============================= + * + * For the above cases 1 to 4, that is, all cases where we stopped in + * the middle of a compressed node for a character mismatch, do: + * + * Let $SPLITPOS be the zero-based index at which, in the + * compressed node array of characters, we found the mismatching + * character. For example if the node contains "ANNIBALE" and we add + * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the + * mismatching character is found. + * + * 1. Save the current compressed node $NEXT pointer (the pointer to the + * child element, that is always present in compressed nodes). + * + * 2. Create "split node" having as child the non common letter + * at the compressed node. The other non common letter (at the key) + * will be added later as we continue the normal insertion algorithm + * at step "6". + * + * 3a. IF $SPLITPOS == 0: + * Replace the old node with the split node, by copying the auxiliary + * data if any. Fix parent's reference. Free old node eventually + * (we still need its data for the next steps of the algorithm). + * + * 3b. IF $SPLITPOS != 0: + * Trim the compressed node (reallocating it as well) in order to + * contain $splitpos characters. Change chilid pointer in order to link + * to the split node. If new compressed node len is just 1, set + * iscompr to 0 (layout is the same). Fix parent's reference. + * + * 4a. IF the postfix len (the length of the remaining string of the + * original compressed node after the split character) is non zero, + * create a "postfix node". If the postfix node has just one character + * set iscompr to 0, otherwise iscompr to 1. Set the postfix node + * child pointer to $NEXT. + * + * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer. + * + * 5. Set child[0] of split node to postfix node. + * + * 6. Set the split node as the current node, set current index at child[1] + * and continue insertion algorithm as usually. + * + * ============================= ALGO 2 ============================= + * + * For case 5, that is, if we stopped in the middle of a compressed + * node but no mismatch was found, do: + * + * Let $SPLITPOS be the zero-based index at which, in the + * compressed node array of characters, we stopped iterating because + * there were no more keys character to match. So in the example of + * the node "ANNIBALE", addig the string "ANNI", the $SPLITPOS is 4. + * + * 1. Save the current compressed node $NEXT pointer (the pointer to the + * child element, that is always present in compressed nodes). + * + * 2. Create a "postfix node" containing all the characters from $SPLITPOS + * to the end. Use $NEXT as the postfix node child pointer. + * If the postfix node length is 1, set iscompr to 0. + * Set the node as a key with the associated value of the new + * inserted key. + * + * 3. Trim the current node to contain the first $SPLITPOS characters. + * As usually if the new node length is just 1, set iscompr to 0. + * Take the iskey / associated value as it was in the orignal node. + * Fix the parent's reference. + * + * 4. Set the postfix node as the only child pointer of the trimmed + * node created at step 1. + */ + + /* ------------------------- ALGORITHM 1 --------------------------- */ + if (h->iscompr && i != len) { + debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n", + h->size, h->data, (void*)h); + debugf("Still to insert: %.*s\n", (int)(len-i), s+i); + debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]); + debugf("Other (key) letter is '%c'\n", s[i]); + + /* 1: Save next pointer. */ + raxNode **childfield = raxNodeLastChildPtr(h); + raxNode *next; + memcpy(&next,childfield,sizeof(next)); + debugf("Next is %p\n", (void*)next); + debugf("iskey %d\n", h->iskey); + if (h->iskey) { + debugf("key value is %p\n", raxGetData(h)); + } + + /* Set the length of the additional nodes we will need. */ + size_t trimmedlen = j; + size_t postfixlen = h->size - j - 1; + int split_node_is_key = !trimmedlen && h->iskey && !h->isnull; + size_t nodesize; + + /* 2: Create the split node. Also allocate the other nodes we'll need + * ASAP, so that it will be simpler to handle OOM. */ + raxNode *splitnode = raxNewNode(1, split_node_is_key); + raxNode *trimmed = NULL; + raxNode *postfix = NULL; + + if (trimmedlen) { + nodesize = sizeof(raxNode)+trimmedlen+sizeof(raxNode*); + if (h->iskey && !h->isnull) nodesize += sizeof(void*); + trimmed = rax_malloc(nodesize); + } + + if (postfixlen) { + nodesize = sizeof(raxNode)+postfixlen+ + sizeof(raxNode*); + postfix = rax_malloc(nodesize); + } + + /* OOM? Abort now that the tree is untouched. */ + if (splitnode == NULL || + (trimmedlen && trimmed == NULL) || + (postfixlen && postfix == NULL)) + { + rax_free(splitnode); + rax_free(trimmed); + rax_free(postfix); + errno = ENOMEM; + return 0; + } + splitnode->data[0] = h->data[j]; + + if (j == 0) { + /* 3a: Replace the old node with the split node. */ + if (h->iskey) { + void *ndata = raxGetData(h); + raxSetData(splitnode,ndata); + } + memcpy(parentlink,&splitnode,sizeof(splitnode)); + } else { + /* 3b: Trim the compressed node. */ + trimmed->size = j; + memcpy(trimmed->data,h->data,j); + trimmed->iscompr = j > 1 ? 1 : 0; + trimmed->iskey = h->iskey; + trimmed->isnull = h->isnull; + if (h->iskey && !h->isnull) { + void *ndata = raxGetData(h); + raxSetData(trimmed,ndata); + } + raxNode **cp = raxNodeLastChildPtr(trimmed); + memcpy(cp,&splitnode,sizeof(splitnode)); + memcpy(parentlink,&trimmed,sizeof(trimmed)); + parentlink = cp; /* Set parentlink to splitnode parent. */ + rax->numnodes++; + } + + /* 4: Create the postfix node: what remains of the original + * compressed node after the split. */ + if (postfixlen) { + /* 4a: create a postfix node. */ + postfix->iskey = 0; + postfix->isnull = 0; + postfix->size = postfixlen; + postfix->iscompr = postfixlen > 1; + memcpy(postfix->data,h->data+j+1,postfixlen); + raxNode **cp = raxNodeLastChildPtr(postfix); + memcpy(cp,&next,sizeof(next)); + rax->numnodes++; + } else { + /* 4b: just use next as postfix node. */ + postfix = next; + } + + /* 5: Set splitnode first child as the postfix node. */ + raxNode **splitchild = raxNodeLastChildPtr(splitnode); + memcpy(splitchild,&postfix,sizeof(postfix)); + + /* 6. Continue insertion: this will cause the splitnode to + * get a new child (the non common character at the currently + * inserted key). */ + rax_free(h); + h = splitnode; + } else if (h->iscompr && i == len) { + /* ------------------------- ALGORITHM 2 --------------------------- */ + debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n", + h->size, h->data, (void*)h, j); + + /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */ + size_t postfixlen = h->size - j; + size_t nodesize = sizeof(raxNode)+postfixlen+sizeof(raxNode*); + if (data != NULL) nodesize += sizeof(void*); + raxNode *postfix = rax_malloc(nodesize); + + nodesize = sizeof(raxNode)+j+sizeof(raxNode*); + if (h->iskey && !h->isnull) nodesize += sizeof(void*); + raxNode *trimmed = rax_malloc(nodesize); + + if (postfix == NULL || trimmed == NULL) { + rax_free(postfix); + rax_free(trimmed); + errno = ENOMEM; + return 0; + } + + /* 1: Save next pointer. */ + raxNode **childfield = raxNodeLastChildPtr(h); + raxNode *next; + memcpy(&next,childfield,sizeof(next)); + + /* 2: Create the postfix node. */ + postfix->size = postfixlen; + postfix->iscompr = postfixlen > 1; + postfix->iskey = 1; + postfix->isnull = 0; + memcpy(postfix->data,h->data+j,postfixlen); + raxSetData(postfix,data); + raxNode **cp = raxNodeLastChildPtr(postfix); + memcpy(cp,&next,sizeof(next)); + rax->numnodes++; + + /* 3: Trim the compressed node. */ + trimmed->size = j; + trimmed->iscompr = j > 1; + trimmed->iskey = 0; + trimmed->isnull = 0; + memcpy(trimmed->data,h->data,j); + memcpy(parentlink,&trimmed,sizeof(trimmed)); + if (h->iskey) { + void *aux = raxGetData(h); + raxSetData(trimmed,aux); + } + + /* Fix the trimmed node child pointer to point to + * the postfix node. */ + cp = raxNodeLastChildPtr(trimmed); + memcpy(cp,&postfix,sizeof(postfix)); + + /* Finish! We don't need to contine with the insertion + * algorithm for ALGO 2. The key is already inserted. */ + rax->numele++; + return 1; /* Key inserted. */ + } + + /* We walked the radix tree as far as we could, but still there are left + * chars in our string. We need to insert the missing nodes. + * Note: while loop never entered if the node was split by ALGO2, + * since i == len. */ + while(i < len) { + raxNode *child; + rax->numnodes++; + + /* If this node is going to have a single child, and there + * are other characters, so that that would result in a chain + * of single-childed nodes, turn it into a compressed node. */ + if (h->size == 0 && len-i > 1) { + debugf("Inserting compressed node\n"); + size_t comprsize = len-i; + if (comprsize > RAX_NODE_MAX_SIZE) + comprsize = RAX_NODE_MAX_SIZE; + raxNode *newh = raxCompressNode(h,s+i,comprsize,&child); + if (newh == NULL) goto oom; + h = newh; + memcpy(parentlink,&h,sizeof(h)); + parentlink = raxNodeLastChildPtr(h); + i += comprsize; + } else { + debugf("Inserting normal node\n"); + raxNode **new_parentlink; + raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink); + if (newh == NULL) goto oom; + h = newh; + memcpy(parentlink,&h,sizeof(h)); + parentlink = new_parentlink; + i++; + } + h = child; + } + raxNode *newh = raxReallocForData(h,data); + if (newh == NULL) goto oom; + h = newh; + if (!h->iskey) rax->numele++; + raxSetData(h,data); + memcpy(parentlink,&h,sizeof(h)); + return 1; /* Element inserted. */ + +oom: + /* This code path handles out of memory after part of the sub-tree was + * already added. Set the node as a key, and then remove it. */ + h->isnull = 1; + h->iskey = 1; + raxRemove(rax,s,i); + errno = ENOMEM; + return 0; +} + +/* Find a key in the rax, returns raxNotFound special void pointer value + * if the item was not found, otherwise the value associated with the + * item is returned. */ +void *raxFind(rax *rax, unsigned char *s, size_t len) { + raxNode *h; + + debugf("### Lookup: %.*s\n", (int)len, s); + int splitpos = 0; + size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL); + if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) + return raxNotFound; + return raxGetData(h); +} + +/* Return the memory address where the 'parent' node stores the specified + * 'child' pointer, so that the caller can update the pointer with another + * one if needed. The function assumes it will find a match, otherwise the + * operation is an undefined behavior (it will continue scanning the + * memory without any bound checking). */ +raxNode **raxFindParentLink(raxNode *parent, raxNode *child) { + raxNode **cp = raxNodeFirstChildPtr(parent); + raxNode *c; + while(1) { + memcpy(&c,cp,sizeof(c)); + if (c == child) break; + cp++; + } + return cp; +} + +/* Low level child removal from node. The new node pointer (after the child + * removal) is returned. Note that this function does not fix the pointer + * of the parent node in its parent, so this task is up to the caller. + * The function never fails for out of memory. */ +raxNode *raxRemoveChild(raxNode *parent, raxNode *child) { + debugnode("raxRemoveChild before", parent); + /* If parent is a compressed node (having a single child, as for definition + * of the data structure), the removal of the child consists into turning + * it into a normal node without children. */ + if (parent->iscompr) { + void *data = NULL; + if (parent->iskey) data = raxGetData(parent); + parent->isnull = 0; + parent->iscompr = 0; + parent->size = 0; + if (parent->iskey) raxSetData(parent,data); + debugnode("raxRemoveChild after", parent); + return parent; + } + + /* Otherwise we need to scan for the children pointer and memmove() + * accordingly. + * + * 1. To start we seek the first element in both the children + * pointers and edge bytes in the node. */ + raxNode **cp = raxNodeLastChildPtr(parent) - (parent->size-1); + raxNode **c = cp; + unsigned char *e = parent->data; + + /* 2. Search the child pointer to remove inside the array of children + * pointers. */ + while(1) { + raxNode *aux; + memcpy(&aux,c,sizeof(aux)); + if (aux == child) break; + c++; + e++; + } + + /* 3. Remove the edge and the pointer by memmoving the remaining children + * pointer and edge bytes one position before. */ + int taillen = parent->size - (e - parent->data) - 1; + debugf("raxRemoveChild tail len: %d\n", taillen); + memmove(e,e+1,taillen); + + /* Since we have one data byte less, also child pointers start one byte + * before now. */ + memmove(((char*)cp)-1,cp,(parent->size-taillen-1)*sizeof(raxNode**)); + + /* Move the remaining "tail" pointer at the right position as well. */ + memmove(((char*)c)-1,c+1,taillen*sizeof(raxNode**)); + + /* 4. Update size. */ + parent->size--; + + /* realloc the node according to the theoretical memory usage, to free + * data if we are over-allocating right now. */ + raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent)); + debugnode("raxRemoveChild after", newnode); + /* Note: if rax_realloc() fails we just return the old address, which + * is valid. */ + return newnode ? newnode : parent; +} + +/* Remove the specified item. Returns 1 if the item was found and + * deleted, 0 otherwise. */ +int raxRemove(rax *rax, unsigned char *s, size_t len) { + raxNode *h; + raxStack ts; + + debugf("### Delete: %.*s\n", (int)len, s); + raxStackInit(&ts); + int splitpos = 0; + size_t i = raxLowWalk(rax,s,len,&h,NULL,NULL,&ts); + if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) { + raxStackFree(&ts); + return 0; + } + h->iskey = 0; + rax->numele--; + + /* If this node has no children, the deletion needs to reclaim the + * no longer used nodes. This is an iterative process that needs to + * walk the three upward, deleting all the nodes with just one child + * that are not keys, until the head of the rax is reached or the first + * node with more than one child is found. */ + + int trycompress = 0; /* Will be set to 1 if we should try to optimize the + tree resulting from the deletion. */ + + if (h->size == 0) { + debugf("Key deleted in node without children. Cleanup needed.\n"); + raxNode *child = NULL; + while(h != rax->head) { + child = h; + debugf("Freeing child %p [%.*s] key:%d\n", (void*)child, + (int)child->size, (char*)child->data, child->iskey); + rax_free(child); + rax->numnodes--; + h = raxStackPop(&ts); + /* If this node has more then one child, or actually holds + * a key, stop here. */ + if (h->iskey || (!h->iscompr && h->size != 1)) break; + } + if (child) { + debugf("Unlinking child %p from parent %p\n", + (void*)child, (void*)h); + raxNode *new = raxRemoveChild(h,child); + if (new != h) { + raxNode *parent = raxStackPeek(&ts); + raxNode **parentlink; + if (parent == NULL) { + parentlink = &rax->head; + } else { + parentlink = raxFindParentLink(parent,h); + } + memcpy(parentlink,&new,sizeof(new)); + } + + /* If after the removal the node has just a single child + * and is not a key, we need to try to compress it. */ + if (new->size == 1 && new->iskey == 0) { + trycompress = 1; + h = new; + } + } + } else if (h->size == 1) { + /* If the node had just one child, after the removal of the key + * further compression with adjacent nodes is pontentially possible. */ + trycompress = 1; + } + + /* Don't try node compression if our nodes pointers stack is not + * complete because of OOM while executing raxLowWalk() */ + if (trycompress && ts.oom) trycompress = 0; + + /* Recompression: if trycompress is true, 'h' points to a radix tree node + * that changed in a way that could allow to compress nodes in this + * sub-branch. Compressed nodes represent chains of nodes that are not + * keys and have a single child, so there are two deletion events that + * may alter the tree so that further compression is needed: + * + * 1) A node with a single child was a key and now no longer is a key. + * 2) A node with two children now has just one child. + * + * We try to navigate upward till there are other nodes that can be + * compressed, when we reach the upper node which is not a key and has + * a single child, we scan the chain of children to collect the + * compressable part of the tree, and replace the current node with the + * new one, fixing the child pointer to reference the first non + * compressable node. + * + * Example of case "1". A tree stores the keys "FOO" = 1 and + * "FOOBAR" = 2: + * + * + * "FOO" -> "BAR" -> [] (2) + * (1) + * + * After the removal of "FOO" the tree can be compressed as: + * + * "FOOBAR" -> [] (2) + * + * + * Example of case "2". A tree stores the keys "FOOBAR" = 1 and + * "FOOTER" = 2: + * + * |B| -> "AR" -> [] (1) + * "FOO" -> |-| + * |T| -> "ER" -> [] (2) + * + * After the removal of "FOOTER" the resulting tree is: + * + * "FOO" -> |B| -> "AR" -> [] (1) + * + * That can be compressed into: + * + * "FOOBAR" -> [] (1) + */ + if (trycompress) { + debugf("After removing %.*s:\n", (int)len, s); + debugnode("Compression may be needed",h); + debugf("Seek start node\n"); + + /* Try to reach the upper node that is compressible. + * At the end of the loop 'h' will point to the first node we + * can try to compress and 'parent' to its parent. */ + raxNode *parent; + while(1) { + parent = raxStackPop(&ts); + if (!parent || parent->iskey || + (!parent->iscompr && parent->size != 1)) break; + h = parent; + debugnode("Going up to",h); + } + raxNode *start = h; /* Compression starting node. */ + + /* Scan chain of nodes we can compress. */ + size_t comprsize = h->size; + int nodes = 1; + while(h->size != 0) { + raxNode **cp = raxNodeLastChildPtr(h); + memcpy(&h,cp,sizeof(h)); + if (h->iskey || (!h->iscompr && h->size != 1)) break; + nodes++; + comprsize += h->size; + } + if (nodes > 1) { + /* If we can compress, create the new node and populate it. */ + size_t nodesize = + sizeof(raxNode)+comprsize+sizeof(raxNode*); + raxNode *new = rax_malloc(nodesize); + /* An out of memory here just means we cannot optimize this + * node, but the tree is left in a consistent state. */ + if (new == NULL) { + raxStackFree(&ts); + return 1; + } + new->iskey = 0; + new->isnull = 0; + new->iscompr = 1; + new->size = comprsize; + rax->numnodes++; + + /* Scan again, this time to populate the new node content and + * to fix the new node child pointer. At the same time we free + * all the nodes that we'll no longer use. */ + comprsize = 0; + h = start; + while(h->size != 0) { + memcpy(new->data+comprsize,h->data,h->size); + comprsize += h->size; + raxNode **cp = raxNodeLastChildPtr(h); + raxNode *tofree = h; + memcpy(&h,cp,sizeof(h)); + rax_free(tofree); rax->numnodes--; + if (h->iskey || (!h->iscompr && h->size != 1)) break; + } + debugnode("New node",new); + + /* Now 'h' points to the first node that we still need to use, + * so our new node child pointer will point to it. */ + raxNode **cp = raxNodeLastChildPtr(new); + memcpy(cp,&h,sizeof(h)); + + /* Fix parent link. */ + if (parent) { + raxNode **parentlink = raxFindParentLink(parent,start); + memcpy(parentlink,&new,sizeof(new)); + } else { + rax->head = new; + } + + debugf("Compressed %d nodes, %d total bytes\n", + nodes, (int)comprsize); + } + } + raxStackFree(&ts); + return 1; +} + +/* This is the core of raxFree(): performs a depth-first scan of the + * tree and releases all the nodes found. */ +void raxRecursiveFree(rax *rax, raxNode *n) { + int numchildren = n->iscompr ? 1 : n->size; + raxNode **cp = raxNodeLastChildPtr(n); + while(numchildren--) { + raxNode *child; + memcpy(&child,cp,sizeof(child)); + raxRecursiveFree(rax,child); + cp--; + } + debugnode("free depth-first",n); + rax_free(n); + rax->numnodes--; +} + +/* Free a whole radix tree. */ +void raxFree(rax *rax) { + raxRecursiveFree(rax,rax->head); + assert(rax->numnodes == 0); + rax_free(rax); +} + +/* ------------------------------- Iterator --------------------------------- */ + +/* Initialize a Rax iterator. This call should be performed a single time + * to initialize the iterator, and must be followed by a raxSeek() call, + * otherwise the raxPrev()/raxNext() functions will just return EOF. */ +void raxStart(raxIterator *it, rax *rt) { + it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */ + it->rt = rt; + it->key_len = 0; + it->key = it->key_static_string; + it->key_max = RAX_ITER_STATIC_LEN; + it->data = NULL; + raxStackInit(&it->stack); +} + +/* Append characters at the current key string of the iterator 'it'. This + * is a low level function used to implement the iterator, not callable by + * the user. Returns 0 on out of memory, otherwise 1 is returned. */ +int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) { + if (it->key_max < it->key_len+len) { + unsigned char *old = (it->key == it->key_static_string) ? NULL : + it->key; + size_t new_max = (it->key_len+len)*2; + it->key = rax_realloc(old,new_max); + if (it->key == NULL) { + it->key = (!old) ? it->key_static_string : old; + return 0; + } + if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len); + it->key_max = new_max; + } + /* Use memmove since there could be an overlap between 's' and + * it->key when we use the current key in order to re-seek. */ + memmove(it->key+it->key_len,s,len); + it->key_len += len; + return 1; +} + +/* Remove the specified number of chars from the right of the current + * iterator key. */ +void raxIteratorDelChars(raxIterator *it, size_t count) { + it->key_len -= count; +} + +/* Do an iteration step towards the next element. At the end of the step the + * iterator key will represent the (new) current key. If it is not possible + * to step in the specified direction since there are no longer elements, the + * iterator is flagged with RAX_ITER_EOF. + * + * If 'noup' is true the function starts directly scanning for the next + * lexicographically smaller children, and the current node is already assumed + * to be the parent of the last key node, so the first operation to go back to + * the parent will be skipped. This option is used by raxSeek() when + * implementing seeking a non existing element with the ">" or "<" options: + * the starting node is not a key in that particular case, so we start the scan + * from a node that does not represent the key set. + * + * The function returns 1 on success or 0 on out of memory. */ +int raxIteratorNextStep(raxIterator *it, int noup) { + if (it->flags & RAX_ITER_EOF) { + return 0; + } else if (it->flags & RAX_ITER_JUST_SEEKED) { + it->flags &= ~RAX_ITER_JUST_SEEKED; + return 1; + } + + /* Save key len, stack items and the node where we are currently + * so that on iterator EOF we can restore the current key and state. */ + size_t orig_key_len = it->key_len; + size_t orig_stack_items = it->stack.items; + raxNode *orig_node = it->node; + + /* Clear the EOF flag: it will be set again if the EOF condition + * is still valid. */ + it->flags &= ~RAX_ITER_EOF; + + while(1) { + int children = it->node->iscompr ? 1 : it->node->size; + if (!noup && children) { + debugf("GO DEEPER\n"); + /* Seek the lexicographically smaller key in this subtree, which + * is the first one found always going torwards the first child + * of every successive node. */ + if (!raxStackPush(&it->stack,it->node)) return 0; + raxNode **cp = raxNodeFirstChildPtr(it->node); + if (!raxIteratorAddChars(it,it->node->data, + it->node->iscompr ? it->node->size : 1)) return 0; + memcpy(&it->node,cp,sizeof(it->node)); + /* For "next" step, stop every time we find a key along the + * way, since the key is lexicograhically smaller compared to + * what follows in the sub-children. */ + if (it->node->iskey) { + it->data = raxGetData(it->node); + return 1; + } + } else { + /* If we finished exporing the previous sub-tree, switch to the + * new one: go upper until a node is found where there are + * children representing keys lexicographically greater than the + * current key. */ + while(1) { + /* Already on head? Can't go up, iteration finished. */ + if (!noup && it->node == it->rt->head) { + it->flags |= RAX_ITER_EOF; + it->stack.items = orig_stack_items; + it->key_len = orig_key_len; + it->node = orig_node; + return 1; + } + /* If there are no children at the current node, try parent's + * next child. */ + unsigned char prevchild = it->key[it->key_len-1]; + if (!noup) { + it->node = raxStackPop(&it->stack); + } else { + noup = 0; + } + /* Adjust the current key to represent the node we are + * at. */ + int todel = it->node->iscompr ? it->node->size : 1; + raxIteratorDelChars(it,todel); + + /* Try visitng the next child if there was at least one + * additional child. */ + if (!it->node->iscompr && it->node->size > 1) { + raxNode **cp = raxNodeFirstChildPtr(it->node); + int i = 0; + while (i < it->node->size) { + debugf("SCAN NEXT %c\n", it->node->data[i]); + if (it->node->data[i] > prevchild) break; + i++; + cp++; + } + if (i != it->node->size) { + debugf("SCAN found a new node\n"); + raxIteratorAddChars(it,it->node->data+i,1); + if (!raxStackPush(&it->stack,it->node)) return 0; + memcpy(&it->node,cp,sizeof(it->node)); + if (it->node->iskey) { + it->data = raxGetData(it->node); + return 1; + } + break; + } + } + } + } + } +} + +/* Seek the grestest key in the subtree at the current node. Return 0 on + * out of memory, otherwise 1. This is an helper function for different + * iteration functions below. */ +int raxSeekGreatest(raxIterator *it) { + while(it->node->size) { + if (it->node->iscompr) { + if (!raxIteratorAddChars(it,it->node->data, + it->node->size)) return 0; + } else { + if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1)) + return 0; + } + raxNode **cp = raxNodeLastChildPtr(it->node); + if (!raxStackPush(&it->stack,it->node)) return 0; + memcpy(&it->node,cp,sizeof(it->node)); + } + return 1; +} + +/* Like raxIteratorNextStep() but implements an iteration step moving + * to the lexicographically previous element. The 'noup' option has a similar + * effect to the one of raxIteratorPrevSte(). */ +int raxIteratorPrevStep(raxIterator *it, int noup) { + if (it->flags & RAX_ITER_EOF) { + return 0; + } else if (it->flags & RAX_ITER_JUST_SEEKED) { + it->flags &= ~RAX_ITER_JUST_SEEKED; + return 1; + } + + /* Save key len, stack items and the node where we are currently + * so that on iterator EOF we can restore the current key and state. */ + size_t orig_key_len = it->key_len; + size_t orig_stack_items = it->stack.items; + raxNode *orig_node = it->node; + + while(1) { + /* Already on head? Can't go up, iteration finished. */ + if (!noup && it->node == it->rt->head) { + it->flags |= RAX_ITER_EOF; + it->stack.items = orig_stack_items; + it->key_len = orig_key_len; + it->node = orig_node; + return 1; + } + + unsigned char prevchild = it->key[it->key_len-1]; + if (!noup) { + it->node = raxStackPop(&it->stack); + } else { + noup = 0; + } + + /* Adjust the current key to represent the node we are + * at. */ + int todel = it->node->iscompr ? it->node->size : 1; + raxIteratorDelChars(it,todel); + + /* Try visiting the prev child if there was at least one + * additional child. */ + if (!it->node->iscompr && it->node->size > 1) { + raxNode **cp = raxNodeLastChildPtr(it->node); + int i = it->node->size-1; + while (i >= 0) { + debugf("SCAN PREV %c\n", it->node->data[i]); + if (it->node->data[i] < prevchild) break; + i--; + cp--; + } + /* If we found a new subtree to explore in this node, + * go deeper following all the last children in order to + * find the key lexicographically greater. */ + if (i != -1) { + debugf("SCAN found a new node\n"); + /* Enter the node we just found. */ + if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0; + if (!raxStackPush(&it->stack,it->node)) return 0; + memcpy(&it->node,cp,sizeof(it->node)); + /* Seek sub-tree max. */ + if (!raxSeekGreatest(it)) return 0; + } + } + + /* Return the key: this could be the key we found scanning a new + * subtree, or if we did not find a new subtree to explore here, + * before giving up with this node, check if it's a key itself. */ + if (it->node->iskey) { + it->data = raxGetData(it->node); + return 1; + } + } +} + +/* Seek an iterator at the specified element. + * Return 0 if the seek failed for syntax error or out of memory. Otherwise + * 1 is returned. */ +int raxSeek(raxIterator *it, unsigned char *ele, size_t len, const char *op) { + int eq = 0, lt = 0, gt = 0, first = 0, last = 0; + + it->stack.items = 0; /* Just resetting. Intialized by raxStart(). */ + it->flags |= RAX_ITER_JUST_SEEKED; + it->flags &= ~RAX_ITER_EOF; + it->key_len = 0; + it->node = NULL; + + /* Set flags according to the operator used to perform the seek. */ + if (op[0] == '>') { + gt = 1; + if (op[1] == '=') eq = 1; + } else if (op[0] == '<') { + lt = 1; + if (op[1] == '=') eq = 1; + } else if (op[0] == '=') { + eq = 1; + } else if (op[0] == '^') { + first = 1; + } else if (op[0] == '$') { + last = 1; + } else { + return 0; /* Error. */ + } + + /* If there are no elements, set the EOF condition immediately and + * return. */ + if (it->rt->numele == 0) { + it->flags |= RAX_ITER_EOF; + return 1; + } + + if (first) { + /* Seeking the first key greater or equal to the empty string + * is equivalent to seeking the smaller key available. */ + return raxSeek(it,NULL,0,">="); + } + + if (last) { + /* Find the greatest key taking always the last child till a + * final node is found. */ + it->node = it->rt->head; + if (!raxSeekGreatest(it)) return 0; + assert(it->node->iskey); + return 1; + } + + /* We need to seek the specified key. What we do here is to actually + * perform a lookup, and later invoke the prev/next key code that + * we already use for iteration. */ + int splitpos = 0; + size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL,&splitpos,&it->stack); + + /* Return OOM on incomplete stack info. */ + if (it->stack.oom) return 0; + + if (eq && i == len && (!it->node->iscompr || splitpos == 0) && + it->node->iskey) + { + /* We found our node, since the key matches and we have an + * "equal" condition. */ + if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */ + } else { + /* Exact key not found or eq flag not set. We have to set as current + * key the one represented by the node we stopped at, and perform + * a next/prev operation to seek. To reconstruct the key at this node + * we start from the parent and go to the current node, accumulating + * the characters found along the way. */ + if (!raxStackPush(&it->stack,it->node)) return 0; + for (size_t j = 1; j < it->stack.items; j++) { + raxNode *parent = it->stack.stack[j-1]; + raxNode *child = it->stack.stack[j]; + if (parent->iscompr) { + if (!raxIteratorAddChars(it,parent->data,parent->size)) + return 0; + } else { + raxNode **cp = raxNodeFirstChildPtr(parent); + unsigned char *p = parent->data; + while(1) { + raxNode *aux; + memcpy(&aux,cp,sizeof(aux)); + if (aux == child) break; + cp++; + p++; + } + if (!raxIteratorAddChars(it,p,1)) return 0; + } + } + raxStackPop(&it->stack); + + /* We need to set the iterator in the correct state to call next/prev + * step in order to seek the desired element. */ + debugf("After initial seek: i=%d len=%d key=%.*s\n", + (int)i, (int)len, (int)it->key_len, it->key); + if (i != len && !it->node->iscompr) { + /* If we stopped in the middle of a normal node because of a + * mismatch, add the mismatching character to the current key + * and call the iterator with the 'noup' flag so that it will try + * to seek the next/prev child in the current node directly based + * on the mismatching character. */ + if (!raxIteratorAddChars(it,ele+i,1)) return 0; + debugf("Seek normal node on mismatch: %.*s\n", + (int)it->key_len, (char*)it->key); + + it->flags &= ~RAX_ITER_JUST_SEEKED; + if (lt && !raxIteratorPrevStep(it,1)) return 0; + if (gt && !raxIteratorNextStep(it,1)) return 0; + it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ + } else if (i != len && it->node->iscompr) { + debugf("Compressed mismatch: %.*s\n", + (int)it->key_len, (char*)it->key); + /* In case of a mismatch within a compressed node. */ + int nodechar = it->node->data[splitpos]; + int keychar = ele[i]; + it->flags &= ~RAX_ITER_JUST_SEEKED; + if (gt) { + /* If the key the compressed node represents is greater + * than our seek element, continue forward, otherwise set the + * state in order to go back to the next sub-tree. */ + if (nodechar > keychar) { + if (!raxIteratorNextStep(it,0)) return 0; + } else { + if (!raxIteratorAddChars(it,it->node->data,it->node->size)) + return 0; + if (!raxIteratorNextStep(it,1)) return 0; + } + } + if (lt) { + /* If the key the compressed node represents is smaller + * than our seek element, seek the greater key in this + * subtree, otherwise set the state in order to go back to + * the previous sub-tree. */ + if (nodechar < keychar) { + if (!raxSeekGreatest(it)) return 0; + } else { + if (!raxIteratorAddChars(it,it->node->data,it->node->size)) + return 0; + if (!raxIteratorPrevStep(it,1)) return 0; + } + } + it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ + } else { + debugf("No mismatch: %.*s\n", + (int)it->key_len, (char*)it->key); + /* If there was no mismatch we are into a node representing the + * key, (but which is not a key or the seek operator does not + * include 'eq'), or we stopped in the middle of a compressed node + * after processing all the key. Cotinue iterating as this was + * a legitimate key we stopped at. */ + it->flags &= ~RAX_ITER_JUST_SEEKED; + if (gt && !raxIteratorNextStep(it,0)) return 0; + if (lt && !raxIteratorPrevStep(it,0)) return 0; + it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ + } + } + return 1; +} + +/* Go to the next element in the scope of the iterator 'it'. + * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is + * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ +int raxNext(raxIterator *it, unsigned char *stop, size_t stoplen, char *op) { + if (!raxIteratorNextStep(it,0)) { + errno = ENOMEM; + return 0; + } + if (it->flags & RAX_ITER_EOF) { + errno = 0; + return 0; + } + return 1; +} + +/* Go to the previous element in the scope of the iterator 'it'. + * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is + * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ +int raxPrev(raxIterator *it, unsigned char *stop, size_t stoplen, char *op) { + if (!raxIteratorPrevStep(it,0)) { + errno = ENOMEM; + return 0; + } + if (it->flags & RAX_ITER_EOF) { + errno = 0; + return 0; + } + return 1; +} + +/* Free the iterator. */ +void raxStop(raxIterator *it) { + if (it->key != it->key_static_string) rax_free(it->key); + raxStackFree(&it->stack); +} + +/* ----------------------------- Introspection ------------------------------ */ + +/* This function is mostly used for debugging and learning purposes. + * It shows an ASCII representation of a tree on standard output, outling + * all the nodes and the contained keys. + * + * The representation is as follow: + * + * "foobar" (compressed node) + * [abc] (normal node with three children) + * [abc]=0x12345678 (node is a key, pointing to value 0x12345678) + * [] (a normal empty node) + * + * Children are represented in new idented lines, each children prefixed by + * the "`-(x)" string, where "x" is the edge byte. + * + * [abc] + * `-(a) "ladin" + * `-(b) [kj] + * `-(c) [] + * + * However when a node has a single child the following representation + * is used instead: + * + * [abc] -> "ladin" -> [] + */ + +/* The actual implementation of raxShow(). */ +void raxRecursiveShow(int level, int lpad, raxNode *n) { + char s = n->iscompr ? '"' : '['; + char e = n->iscompr ? '"' : ']'; + + int numchars = printf("%c%.*s%c", s, n->size, n->data, e); + if (n->iskey) { + numchars += printf("=%p",raxGetData(n)); + } + + int numchildren = n->iscompr ? 1 : n->size; + /* Note that 7 and 4 magic constants are the string length + * of " `-(x) " and " -> " respectively. */ + if (level) { + lpad += (numchildren > 1) ? 7 : 4; + if (numchildren == 1) lpad += numchars; + } + raxNode **cp = raxNodeFirstChildPtr(n); + for (int i = 0; i < numchildren; i++) { + char *branch = " `-(%c) "; + if (numchildren > 1) { + printf("\n"); + for (int j = 0; j < lpad; j++) putchar(' '); + printf(branch,n->data[i]); + } else { + printf(" -> "); + } + raxNode *child; + memcpy(&child,cp,sizeof(child)); + raxRecursiveShow(level+1,lpad,child); + cp++; + } +} + +/* Show a tree, as outlined in the comment above. */ +void raxShow(rax *rax) { + raxRecursiveShow(0,0,rax->head); + putchar('\n'); +} + +/* Used by debugnode() macro to show info about a given node. */ +void raxDebugShowNode(const char *msg, raxNode *n) { + printf("%s: %p [%.*s] key:%d size:%d children:", + msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size); + int numcld = n->iscompr ? 1 : n->size; + raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1); + while(numcld--) { + raxNode *child; + memcpy(&child,cldptr,sizeof(child)); + cldptr++; + printf("%p ", (void*)child); + } + printf("\n"); + fflush(stdout); +} + +#ifdef BENCHMARK_MAIN +#include +#include + +/* This is a simple Feistel network in order to turn every possible + * uint32_t input into another "randomly" looking uint32_t. It is a + * one to one map so there are no repetitions. */ +static uint32_t int2int(uint32_t input) { + uint16_t l = input & 0xffff; + uint16_t r = input >> 16; + for (int i = 0; i < 8; i++) { + uint16_t nl = r; + uint16_t F = (((r * 31) + (r >> 5) + 7 * 371) ^ r) & 0xffff; + r = l ^ F; + l = nl; + } + return (r<<16)|l; +} + +/* Turn an uint32_t integer into an alphanumerical key and return its + * length. This function is used in order to generate keys that have + * a large charset, so that the radix tree can be testsed with many + * children per node. */ +static size_t int2alphakey(char *s, size_t maxlen, uint32_t i) { + const char *set = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789"; + const size_t setlen = 62; + + if (maxlen == 0) return 0; + maxlen--; /* Space for null term char. */ + size_t len = 0; + while(len < maxlen) { + s[len++] = set[i%setlen]; + i /= setlen; + if (i == 0) break; + } + s[len] = '\0'; + return len; +} + +/* Return the UNIX time in microseconds */ +static long long ustime(void) { + struct timeval tv; + long long ust; + + gettimeofday(&tv, NULL); + ust = ((long long)tv.tv_sec)*1000000; + ust += tv.tv_usec; + return ust; +} + +/* Turn the integer 'i' into a key according to 'mode'. + * mode = 0, just represents the integer as a string. + * mode = 1, turn it into a random-looking alphanumerical string + * according to the int2alphakey() function. */ +static size_t int2key(char *s, size_t maxlen, uint32_t i, int mode) { + if (mode == 0) { + return snprintf(s,maxlen,"%lu",(unsigned long)i); + } else { + i = int2int(i); + return int2alphakey(s,maxlen,i); + } +} + +int main(void) { + for (int mode = 0; mode < 2; mode++) { + printf("Benchmark with %s keys:\n", + (mode == 0) ? "integer" : "alphanumerical"); + rax *t = raxNew(); + long long start = ustime(); + for (int i = 0; i < 5000000; i++) { + char buf[64]; + int len = int2key(buf,sizeof(buf),i,mode); + raxInsert(t,(unsigned char*)buf,len,(void*)(long)i); + } + printf("Insert: %f\n", (double)(ustime()-start)/1000000); + printf("%llu total nodes\n", (unsigned long long)t->numnodes); + printf("%llu total elements\n", (unsigned long long)t->numele); + + start = ustime(); + for (int i = 0; i < 5000000; i++) { + char buf[64]; + int len = int2key(buf,sizeof(buf),i,mode); + void *data = raxFind(t,(unsigned char*)buf,len); + if (data != (void*)(long)i) { + printf("Issue with %s: %p instead of %p\n", buf, + data, (void*)(long)i); + } + } + printf("Lookup: %f\n", (double)(ustime()-start)/1000000); + + start = ustime(); + for (int i = 0; i < 5000000; i++) { + char buf[64]; + int r = rand() % 5000000; + int len = int2key(buf,sizeof(buf),r,mode); + void *data = raxFind(t,(unsigned char*)buf,len); + if (data != (void*)(long)r) { + printf("Issue with %s: %p instead of %p\n", buf, + data, (void*)(long)r); + } + } + printf("Random lookup: %f\n", (double)(ustime()-start)/1000000); + + start = ustime(); + int count = 0; + for (int i = 0; i < 5000000; i++) { + char buf[64]; + int len = int2key(buf,sizeof(buf),i,mode); + buf[i%len] = '!'; /* "!" is never set into keys. */ + void *data = raxFind(t,(unsigned char*) buf,len); + if (data != (void*)(long)i) count++; + } + printf("Failed lookup: %f\n", (double)(ustime()-start)/1000000); + + start = ustime(); + for (int i = 0; i < 5000000; i++) { + char buf[64]; + int len = int2key(buf,sizeof(buf),i,mode); + int retval = raxRemove(t,(unsigned char*)buf,len); + assert(retval == 1); + } + printf("Deletion: %f\n", (double)(ustime()-start)/1000000); + + printf("%llu total nodes\n", (unsigned long long)t->numnodes); + printf("%llu total elements\n", (unsigned long long)t->numele); + raxFree(t); + } + return 0; +} +#endif + +#ifdef TEST_MAIN +#include +#include + +int main(void) { + printf("notfound = %p\n", raxNotFound); + rax *t = raxNew(); + char *toadd[] = {"alligator","alien","baloon","chromodynamic","romane","romanus","romulus","rubens","ruber","rubicon","rubicundus","all","rub","ba",NULL}; + + srand(time(NULL)); + for (int x = 0; x < 10000; x++) rand(); + + long items = 0; + while(toadd[items] != NULL) items++; + + for (long i = 0; i < items; i++) { + raxInsert(t,(unsigned char*)toadd[i],strlen(toadd[i]),(void*)i); + printf("Added %s\n", toadd[i]); + } + raxShow(t); + + raxIterator iter; + raxStart(&iter,t); + + // OK: all this tests will need to go in the Rax unit test. + // raxSeek(&iter,(unsigned char*)"rpxxx",5,"<="); + // raxSeek(&iter,(unsigned char*)"rom",3,">="); + // raxSeek(&iter,(unsigned char*)"rub",3,">="); + // raxSeek(&iter,(unsigned char*)"rub",3,">"); + // raxSeek(&iter,(unsigned char*)"rub",3,"<"); + // raxSeek(&iter,(unsigned char*)"rom",3,">"); + // raxSeek(&iter,(unsigned char*)"chro",4,">"); + // raxSeek(&iter,(unsigned char*)"chro",4,"<"); + // raxSeek(&iter,(unsigned char*)"chromz",6,"<"); + // raxSeek(&iter,NULL,0,"^"); + // raxSeek(&iter,"zorro",5,"<="); + // raxSeek(&iter,"zorro",5,"<"); + // raxSeek(&iter,NULL,0,"$"); + + // STILL TO TEST + raxSeek(&iter,(unsigned char*)"ro",2,">="); + printf("EOF: %d\n", (iter.flags & RAX_ITER_EOF) != 0); + + printf("SEEKED: %.*s, val %p\n", (int)iter.key_len, + (char*)iter.key, + iter.data); + + printf("NEXT\n"); + while(raxNext(&iter,NULL,0,NULL)) { + printf("--- key: %.*s, val %p\n", (int)iter.key_len, + (char*)iter.key, + iter.data); + } + + printf("After EOF element is: %.*s\n", (int)iter.key_len, + (char*)iter.key); + printf("~~~~~~~~~~~~~~\n"); + + printf("PREV\n"); + raxSeek(&iter,iter.key,iter.key_len,"=="); + while(raxPrev(&iter,NULL,0,NULL)) { + printf("--- key: %.*s, val %p\n", (int)iter.key_len, + (char*)iter.key, + iter.data); + } + + printf("After EOF element is: %.*s\n", (int)iter.key_len, + (char*)iter.key); + raxStop(&iter); + +#if 0 + raxStop(&iter); +#endif + exit(0); + + int rnum = rand(); + int survivor = rnum % items; + +#if 1 + printf("Removing everything but %s in random order\n", toadd[survivor]); + for (long i = 0; i < 1000; i++) { + int r = rand() % items; + if (r == survivor) continue; + raxRemove(t,(unsigned char*)toadd[r],strlen(toadd[r])); + } +#else + printf("Removing rubicon\n"); + raxRemove(t,(unsigned char*)"rubicon",7); +#endif + + printf("%llu total nodes\n", (unsigned long long)t->numnodes); + printf("%llu total elements\n", (unsigned long long)t->numele); + + raxShow(t); + raxFree(t); +} +#endif diff --git a/src/rax.h b/src/rax.h new file mode 100644 index 000000000..76330e0de --- /dev/null +++ b/src/rax.h @@ -0,0 +1,158 @@ +#ifndef RAX_H +#define RAX_H + +#include + +/* Representation of a radix tree as implemented in this file, that contains + * the strings "foo", "foobar" and "footer" after the insertion of each + * word. When the node represents a key inside the radix tree, we write it + * between [], otherwise it is written between (). + * + * This is the vanilla representation: + * + * (f) "" + * \ + * (o) "f" + * \ + * (o) "fo" + * \ + * [t b] "foo" + * / \ + * "foot" (e) (a) "foob" + * / \ + * "foote" (r) (r) "fooba" + * / \ + * "footer" [] [] "foobar" + * + * However, this implementation implements a very common optimization where + * successive nodes having a single child are "compressed" into the node + * itself as a string of characters, each representing a next-level child, + * and only the link to the node representing the last character node is + * provided inside the representation. So the above representation is turend + * into: + * + * ["foo"] "" + * | + * [t b] "foo" + * / \ + * "foot" ("er") ("ar") "foob" + * / \ + * "footer" [] [] "foobar" + * + * However this optimization makes the implementation a bit more complex. + * For instance if a key "first" is added in the above radix tree, a + * "node splitting" operation is needed, since the "foo" prefix is no longer + * composed of nodes having a single child one after the other. This is the + * above tree and the resulting node splitting after this event happens: + * + * + * (f) "" + * / + * (i o) "f" + * / \ + * "firs" ("rst") (o) "fo" + * / \ + * "first" [] [t b] "foo" + * / \ + * "foot" ("er") ("ar") "foob" + * / \ + * "footer" [] [] "foobar" + * + * Similarly after deletion, if a new chain of nodes having a single child + * is created (the chain must also not include nodes that represent keys), + * it must be compressed back into a single node. + * + */ + +#define RAX_NODE_MAX_SIZE ((1<<29)-1) +typedef struct raxNode { + uint32_t iskey:1; /* Does this node contain a key? */ + uint32_t isnull:1; /* Associated value is NULL (don't store it). */ + uint32_t iscompr:1; /* Node is compressed. */ + uint32_t size:29; /* Number of children, or compressed string len. */ + /* Data layout is as follows: + * + * If node is not compressed we have 'size' bytes, one for each children + * character, and 'size' raxNode pointers, point to each child node. + * Note how the character is not stored in the children but in the + * edge of the parents: + * + * [header strlen=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?) + * + * if node is compressed (strlen != 0) the node has 1 children. + * In that case the 'size' bytes of the string stored immediately at + * the start of the data section, represent a sequence of successive + * nodes linked one after the other, for which only the last one in + * the sequence is actually represented as a node, and pointed to by + * the current compressed node. + * + * [header strlen=3][xyz][z-ptr](value-ptr?) + * + * Both compressed and not compressed nodes can represent a key + * with associated data in the radix tree at any level (not just terminal + * nodes). + * + * If the node has an associated key (iskey=1) and is not NULL + * (isnull=0), then after the raxNode pointers poiting to the + * childen, an additional value pointer is present (as you can see + * in the representation above as "value-ptr" field). + */ + unsigned char data[]; +} raxNode; + +typedef struct rax { + raxNode *head; + uint64_t numele; + uint64_t numnodes; +} rax; + +/* Stack data structure used by raxLowWalk() in order to, optionally, return + * a list of parent nodes to the caller. The nodes do not have a "parent" + * field for space concerns, so we use the auxiliary stack when needed. */ +#define RAX_STACK_STATIC_ITEMS 32 +typedef struct raxStack { + void **stack; /* Points to static_items or an heap allocated array. */ + size_t items, maxitems; /* Number of items contained and total space. */ + /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap + * and use this static array of pointers instead. */ + void *static_items[RAX_STACK_STATIC_ITEMS]; + int oom; /* True if pushing into this stack failed for OOM at some point. */ +} raxStack; + +/* Radix tree iterator state is encapsulated into this data structure. */ +#define RAX_ITER_STATIC_LEN 128 +#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current + element for the first iteration and + clear the flag. */ +#define RAX_ITER_EOF (1<<1) /* End of iteration reached. */ +#define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while + iterating. But it is slower. */ +typedef struct raxIterator { + int flags; + rax *rt; /* Radix tree we are iterating. */ + unsigned char *key; /* The current string. */ + void *data; /* Data associated to this key. */ + size_t key_len; /* Current key length. */ + size_t key_max; /* Max key len the current key buffer can hold. */ + unsigned char key_static_string[RAX_ITER_STATIC_LEN]; + raxNode *node; /* Current node. Only for unsafe iteration. */ + raxStack stack; /* Stack used for unsafe iteration. */ +} raxIterator; + +/* A special pointer returned for not found items. */ +extern void *raxNotFound; + +/* Exported API. */ +rax *raxNew(void); +int raxInsert(rax *rax, unsigned char *s, size_t len, void *data); +int raxRemove(rax *rax, unsigned char *s, size_t len); +void *raxFind(rax *rax, unsigned char *s, size_t len); +void raxFree(rax *rax); +void raxStart(raxIterator *it, rax *rt); +int raxSeek(raxIterator *it, unsigned char *ele, size_t len, const char *op); +int raxNext(raxIterator *it, unsigned char *stop, size_t stoplen, char *op); +int raxPrev(raxIterator *it, unsigned char *stop, size_t stoplen, char *op); +void raxStop(raxIterator *it); +void raxShow(rax *rax); + +#endif diff --git a/src/rax_malloc.h b/src/rax_malloc.h new file mode 100644 index 000000000..9295985c6 --- /dev/null +++ b/src/rax_malloc.h @@ -0,0 +1,44 @@ +/* Rax -- A radix tree implementation. + * + * Copyright (c) 2017, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* Allocator selection. + * + * This file is used in order to change the Rax allocator at compile time. + * Just define the following defines to what you want to use. Also add + * the include of your alternate allocator if needed (not needed in order + * to use the default libc allocator). */ + +#ifndef RAX_ALLOC_H +#define RAX_ALLOC_H +#include "zmalloc.h" +#define rax_malloc zmalloc +#define rax_realloc zrealloc +#define rax_free zfree +#endif diff --git a/src/server.h b/src/server.h index 75ff384cd..3b28fa44d 100644 --- a/src/server.h +++ b/src/server.h @@ -63,7 +63,9 @@ typedef long long mstime_t; /* millisecond time type. */ #include "util.h" /* Misc functions useful in many places */ #include "latency.h" /* Latency monitor API */ #include "sparkline.h" /* ASCII graphs API */ -#include "quicklist.h" +#include "quicklist.h" /* Lists are encoded as linked lists of + N-elements flat arrays */ +#include "rax.h" /* Radix tree */ /* Following includes allow test functions to be called from Redis main() */ #include "zipmap.h" From fcad87788a517ed73b1ff9990da92adac37f5bd4 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 7 Apr 2017 08:46:39 +0200 Subject: [PATCH 0293/1722] Rax library updated. --- src/db.c | 12 +- src/rax.c | 393 +++++++++++++++++++----------------------------------- src/rax.h | 12 +- 3 files changed, 152 insertions(+), 265 deletions(-) diff --git a/src/db.c b/src/db.c index ee7398abb..760843120 100644 --- a/src/db.c +++ b/src/db.c @@ -1315,9 +1315,9 @@ void slotToKeyUpdateKey(robj *key, int add) { indexed[1] = hashslot & 0xff; memcpy(indexed+2,key->ptr,keylen); if (add) { - raxInsert(server.cluster->slots_to_keys,indexed,keylen+2,NULL); + raxInsert(server.cluster->slots_to_keys,indexed,keylen+2,NULL,NULL); } else { - raxRemove(server.cluster->slots_to_keys,indexed,keylen+2); + raxRemove(server.cluster->slots_to_keys,indexed,keylen+2,NULL); } if (indexed != buf) zfree(indexed); } @@ -1348,8 +1348,8 @@ unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int coun indexed[0] = (hashslot >> 8) & 0xff; indexed[1] = hashslot & 0xff; raxStart(&iter,server.cluster->slots_to_keys); - raxSeek(&iter,indexed,2,">="); - while(count-- && raxNext(&iter,NULL,0,NULL)) { + raxSeek(&iter,">=",indexed,2); + while(count-- && raxNext(&iter)) { if (iter.key[0] != indexed[0] || iter.key[1] != indexed[1]) break; keys[j++] = createStringObject((char*)iter.key+2,iter.key_len-2); } @@ -1368,8 +1368,8 @@ unsigned int delKeysInSlot(unsigned int hashslot) { indexed[1] = hashslot & 0xff; raxStart(&iter,server.cluster->slots_to_keys); while(server.cluster->slots_keys_count[hashslot]) { - raxSeek(&iter,indexed,2,">="); - raxNext(&iter,NULL,0,NULL); + raxSeek(&iter,">=",indexed,2); + raxNext(&iter); robj *key = createStringObject((char*)iter.key+2,iter.key_len-2); dbDelete(&server.db[0],key); diff --git a/src/rax.c b/src/rax.c index ca49787ed..78ead7a27 100644 --- a/src/rax.c +++ b/src/rax.c @@ -33,8 +33,14 @@ #include #include #include +#include #include "rax.h" -#include "rax_malloc.h" + +#ifndef RAX_MALLOC_INCLUDE +#define RAX_MALLOC_INCLUDE "rax_malloc.h" +#endif + +#include RAX_MALLOC_INCLUDE /* This is a special pointer that is guaranteed to never have the same value * of a radix tree node. It's used in order to report "not found" error without @@ -84,6 +90,7 @@ static inline int raxStackPush(raxStack *ts, void *ptr) { if (ts->stack == NULL) { ts->stack = ts->static_items; ts->oom = 1; + errno = ENOMEM; return 0; } memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems); @@ -91,6 +98,7 @@ static inline int raxStackPush(raxStack *ts, void *ptr) { void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2); if (newalloc == NULL) { ts->oom = 1; + errno = ENOMEM; return 0; } ts->stack = newalloc; @@ -205,7 +213,7 @@ void *raxGetData(raxNode *n) { * On success the new parent node pointer is returned (it may change because * of the realloc, so the caller should discard 'n' and use the new value). * On out of memory NULL is returned, and the old node is still valid. */ -raxNode *raxAddChild(raxNode *n, char c, raxNode **childptr, raxNode ***parentlink) { +raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) { assert(n->iscompr == 0); size_t curlen = sizeof(raxNode)+ @@ -330,7 +338,7 @@ raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **chi memcpy(n->data,s,len); if (n->iskey) raxSetData(n,data); raxNode **childfield = raxNodeLastChildPtr(n); - memcpy(childfield,&child,sizeof(child)); + memcpy(childfield,child,sizeof(*child)); return n; } @@ -399,7 +407,7 @@ static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode * data is updated, and 0 is returned, otherwise the element is inserted * and 1 is returned. On out of memory the function returns 0 as well but * sets errno to ENOMEM, otherwise errno will be set to 0. */ -int raxInsert(rax *rax, unsigned char *s, size_t len, void *data) { +int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { size_t i; int j = 0; /* Split position. If raxLowWalk() stops in a compressed node, the index 'j' represents the char we stopped within the @@ -417,6 +425,7 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data) { * data pointer. */ if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) { if (h->iskey) { + if (old) *old = raxGetData(h); raxSetData(h,data); errno = 0; return 0; /* Element already exists. */ @@ -720,6 +729,7 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data) { /* Finish! We don't need to contine with the insertion * algorithm for ALGO 2. The key is already inserted. */ rax->numele++; + rax_free(h); return 1; /* Key inserted. */ } @@ -729,7 +739,6 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data) { * since i == len. */ while(i < len) { raxNode *child; - rax->numnodes++; /* If this node is going to have a single child, and there * are other characters, so that that would result in a chain @@ -755,6 +764,7 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data) { parentlink = new_parentlink; i++; } + rax->numnodes++; h = child; } raxNode *newh = raxReallocForData(h,data); @@ -767,10 +777,16 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data) { oom: /* This code path handles out of memory after part of the sub-tree was - * already added. Set the node as a key, and then remove it. */ - h->isnull = 1; - h->iskey = 1; - raxRemove(rax,s,i); + * already modified. Set the node as a key, and then remove it. However we + * do that only if the node is a terminal node, otherwise if the OOM + * happened reallocating a node in the middle, we don't need to free + * anything. */ + if (h->size == 0) { + h->isnull = 1; + h->iskey = 1; + rax->numele++; /* Compensate the next remove. */ + assert(raxRemove(rax,s,i,NULL) != 0); + } errno = ENOMEM; return 0; } @@ -830,7 +846,7 @@ raxNode *raxRemoveChild(raxNode *parent, raxNode *child) { * * 1. To start we seek the first element in both the children * pointers and edge bytes in the node. */ - raxNode **cp = raxNodeLastChildPtr(parent) - (parent->size-1); + raxNode **cp = raxNodeFirstChildPtr(parent); raxNode **c = cp; unsigned char *e = parent->data; @@ -855,7 +871,7 @@ raxNode *raxRemoveChild(raxNode *parent, raxNode *child) { memmove(((char*)cp)-1,cp,(parent->size-taillen-1)*sizeof(raxNode**)); /* Move the remaining "tail" pointer at the right position as well. */ - memmove(((char*)c)-1,c+1,taillen*sizeof(raxNode**)); + memmove(((char*)c)-1,c+1,taillen*sizeof(raxNode**)+parent->iskey*sizeof(void*)); /* 4. Update size. */ parent->size--; @@ -863,7 +879,9 @@ raxNode *raxRemoveChild(raxNode *parent, raxNode *child) { /* realloc the node according to the theoretical memory usage, to free * data if we are over-allocating right now. */ raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent)); - debugnode("raxRemoveChild after", newnode); + if (newnode) { + debugnode("raxRemoveChild after", newnode); + } /* Note: if rax_realloc() fails we just return the old address, which * is valid. */ return newnode ? newnode : parent; @@ -871,18 +889,19 @@ raxNode *raxRemoveChild(raxNode *parent, raxNode *child) { /* Remove the specified item. Returns 1 if the item was found and * deleted, 0 otherwise. */ -int raxRemove(rax *rax, unsigned char *s, size_t len) { +int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { raxNode *h; raxStack ts; debugf("### Delete: %.*s\n", (int)len, s); raxStackInit(&ts); int splitpos = 0; - size_t i = raxLowWalk(rax,s,len,&h,NULL,NULL,&ts); + size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,&ts); if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) { raxStackFree(&ts); return 0; } + if (old) *old = raxGetData(h); h->iskey = 0; rax->numele--; @@ -1009,6 +1028,9 @@ int raxRemove(rax *rax, unsigned char *s, size_t len) { raxNode **cp = raxNodeLastChildPtr(h); memcpy(&h,cp,sizeof(h)); if (h->iskey || (!h->iscompr && h->size != 1)) break; + /* Stop here if going to the next node would result into + * a compressed node larger than h->size can hold. */ + if (comprsize + h->size > RAX_NODE_MAX_SIZE) break; nodes++; comprsize += h->size; } @@ -1115,6 +1137,7 @@ int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) { it->key = rax_realloc(old,new_max); if (it->key == NULL) { it->key = (!old) ? it->key_static_string : old; + errno = ENOMEM; return 0; } if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len); @@ -1190,6 +1213,8 @@ int raxIteratorNextStep(raxIterator *it, int noup) { * children representing keys lexicographically greater than the * current key. */ while(1) { + int old_noup = noup; + /* Already on head? Can't go up, iteration finished. */ if (!noup && it->node == it->rt->head) { it->flags |= RAX_ITER_EOF; @@ -1211,9 +1236,9 @@ int raxIteratorNextStep(raxIterator *it, int noup) { int todel = it->node->iscompr ? it->node->size : 1; raxIteratorDelChars(it,todel); - /* Try visitng the next child if there was at least one + /* Try visiting the next child if there was at least one * additional child. */ - if (!it->node->iscompr && it->node->size > 1) { + if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) { raxNode **cp = raxNodeFirstChildPtr(it->node); int i = 0; while (i < it->node->size) { @@ -1276,6 +1301,8 @@ int raxIteratorPrevStep(raxIterator *it, int noup) { raxNode *orig_node = it->node; while(1) { + int old_noup = noup; + /* Already on head? Can't go up, iteration finished. */ if (!noup && it->node == it->rt->head) { it->flags |= RAX_ITER_EOF; @@ -1297,9 +1324,9 @@ int raxIteratorPrevStep(raxIterator *it, int noup) { int todel = it->node->iscompr ? it->node->size : 1; raxIteratorDelChars(it,todel); - /* Try visiting the prev child if there was at least one - * additional child. */ - if (!it->node->iscompr && it->node->size > 1) { + /* Try visiting the prev child if there is at least one + * child. */ + if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) { raxNode **cp = raxNodeLastChildPtr(it->node); int i = it->node->size-1; while (i >= 0) { @@ -1334,8 +1361,9 @@ int raxIteratorPrevStep(raxIterator *it, int noup) { /* Seek an iterator at the specified element. * Return 0 if the seek failed for syntax error or out of memory. Otherwise - * 1 is returned. */ -int raxSeek(raxIterator *it, unsigned char *ele, size_t len, const char *op) { + * 1 is returned. When 0 is returned for out of memory, errno is set to + * the ENOMEM value. */ +int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) { int eq = 0, lt = 0, gt = 0, first = 0, last = 0; it->stack.items = 0; /* Just resetting. Intialized by raxStart(). */ @@ -1358,6 +1386,7 @@ int raxSeek(raxIterator *it, unsigned char *ele, size_t len, const char *op) { } else if (op[0] == '$') { last = 1; } else { + errno = 0; return 0; /* Error. */ } @@ -1371,7 +1400,7 @@ int raxSeek(raxIterator *it, unsigned char *ele, size_t len, const char *op) { if (first) { /* Seeking the first key greater or equal to the empty string * is equivalent to seeking the smaller key available. */ - return raxSeek(it,NULL,0,">="); + return raxSeek(it,">=",NULL,0); } if (last) { @@ -1398,7 +1427,7 @@ int raxSeek(raxIterator *it, unsigned char *ele, size_t len, const char *op) { /* We found our node, since the key matches and we have an * "equal" condition. */ if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */ - } else { + } else if (lt || gt) { /* Exact key not found or eq flag not set. We have to set as current * key the one represented by the node we stopped at, and perform * a next/prev operation to seek. To reconstruct the key at this node @@ -1490,6 +1519,10 @@ int raxSeek(raxIterator *it, unsigned char *ele, size_t len, const char *op) { if (lt && !raxIteratorPrevStep(it,0)) return 0; it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ } + } else { + /* If we are here just eq was set but no match was found. */ + it->flags |= RAX_ITER_EOF; + return 1; } return 1; } @@ -1497,7 +1530,7 @@ int raxSeek(raxIterator *it, unsigned char *ele, size_t len, const char *op) { /* Go to the next element in the scope of the iterator 'it'. * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ -int raxNext(raxIterator *it, unsigned char *stop, size_t stoplen, char *op) { +int raxNext(raxIterator *it) { if (!raxIteratorNextStep(it,0)) { errno = ENOMEM; return 0; @@ -1512,7 +1545,7 @@ int raxNext(raxIterator *it, unsigned char *stop, size_t stoplen, char *op) { /* Go to the previous element in the scope of the iterator 'it'. * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ -int raxPrev(raxIterator *it, unsigned char *stop, size_t stoplen, char *op) { +int raxPrev(raxIterator *it) { if (!raxIteratorPrevStep(it,0)) { errno = ENOMEM; return 0; @@ -1524,6 +1557,87 @@ int raxPrev(raxIterator *it, unsigned char *stop, size_t stoplen, char *op) { return 1; } +/* Perform a random walk starting in the current position of the iterator. + * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned + * and the iterator is set to the node reached after doing a random walk + * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed + * using a random number of steps between 1 and two times the logarithm of + * the number of elements. + * + * NOTE: if you use this function to generate random elements from the radix + * tree, expect a disappointing distribution. A random walk produces good + * random elements if the tree is not sparse, however in the case of a radix + * tree certain keys will be reported much more often than others. At least + * this function should be able to expore every possible element eventually. */ +int raxRandomWalk(raxIterator *it, size_t steps) { + if (it->rt->numele == 0) { + it->flags |= RAX_ITER_EOF; + return 0; + } + + if (steps == 0) { + size_t fle = floor(log(it->rt->numele)); + fle *= 2; + steps = 1 + rand() % fle; + } + + raxNode *n = it->node; + while(steps > 0 || !n->iskey) { + int numchildren = n->iscompr ? 1 : n->size; + int r = rand() % (numchildren+(n != it->rt->head)); + + if (r == numchildren) { + /* Go up to parent. */ + n = raxStackPop(&it->stack); + int todel = n->iscompr ? n->size : 1; + raxIteratorDelChars(it,todel); + } else { + /* Select a random child. */ + if (n->iscompr) { + if (!raxIteratorAddChars(it,n->data,n->size)) return 0; + } else { + if (!raxIteratorAddChars(it,n->data+r,1)) return 0; + } + raxNode **cp = raxNodeFirstChildPtr(n)+r; + if (!raxStackPush(&it->stack,n)) return 0; + memcpy(&n,cp,sizeof(n)); + } + if (n->iskey) steps--; + } + it->node = n; + return 1; +} + +/* Compare the key currently pointed by the iterator to the specified + * key according to the specified operator. Returns 1 if the comparison is + * true, otherwise 0 is returned. */ +int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) { + int eq = 0, lt = 0, gt = 0; + + if (op[0] == '=' || op[1] == '=') eq = 1; + if (op[1] == '>') gt = 1; + else if (op[1] == '<') lt = 1; + else if (op[1] != '=') return 0; /* Syntax error. */ + + size_t minlen = key_len < iter->key_len ? key_len : iter->key_len; + int cmp = memcmp(iter->key,key,minlen); + + /* Handle == */ + if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len; + + /* Handle >, >=, <, <= */ + if (cmp == 0) { + /* Same prefix: longer wins. */ + if (eq && key_len == iter->key_len) return 1; + else if (lt) return iter->key_len < key_len; + else if (gt) return iter->key_len > key_len; + } if (cmp > 0) { + return gt ? 1 : 0; + } else /* (cmp < 0) */ { + return lt ? 1 : 0; + } +} + /* Free the iterator. */ void raxStop(raxIterator *it) { if (it->key != it->key_static_string) rax_free(it->key); @@ -1613,233 +1727,4 @@ void raxDebugShowNode(const char *msg, raxNode *n) { fflush(stdout); } -#ifdef BENCHMARK_MAIN -#include -#include -/* This is a simple Feistel network in order to turn every possible - * uint32_t input into another "randomly" looking uint32_t. It is a - * one to one map so there are no repetitions. */ -static uint32_t int2int(uint32_t input) { - uint16_t l = input & 0xffff; - uint16_t r = input >> 16; - for (int i = 0; i < 8; i++) { - uint16_t nl = r; - uint16_t F = (((r * 31) + (r >> 5) + 7 * 371) ^ r) & 0xffff; - r = l ^ F; - l = nl; - } - return (r<<16)|l; -} - -/* Turn an uint32_t integer into an alphanumerical key and return its - * length. This function is used in order to generate keys that have - * a large charset, so that the radix tree can be testsed with many - * children per node. */ -static size_t int2alphakey(char *s, size_t maxlen, uint32_t i) { - const char *set = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789"; - const size_t setlen = 62; - - if (maxlen == 0) return 0; - maxlen--; /* Space for null term char. */ - size_t len = 0; - while(len < maxlen) { - s[len++] = set[i%setlen]; - i /= setlen; - if (i == 0) break; - } - s[len] = '\0'; - return len; -} - -/* Return the UNIX time in microseconds */ -static long long ustime(void) { - struct timeval tv; - long long ust; - - gettimeofday(&tv, NULL); - ust = ((long long)tv.tv_sec)*1000000; - ust += tv.tv_usec; - return ust; -} - -/* Turn the integer 'i' into a key according to 'mode'. - * mode = 0, just represents the integer as a string. - * mode = 1, turn it into a random-looking alphanumerical string - * according to the int2alphakey() function. */ -static size_t int2key(char *s, size_t maxlen, uint32_t i, int mode) { - if (mode == 0) { - return snprintf(s,maxlen,"%lu",(unsigned long)i); - } else { - i = int2int(i); - return int2alphakey(s,maxlen,i); - } -} - -int main(void) { - for (int mode = 0; mode < 2; mode++) { - printf("Benchmark with %s keys:\n", - (mode == 0) ? "integer" : "alphanumerical"); - rax *t = raxNew(); - long long start = ustime(); - for (int i = 0; i < 5000000; i++) { - char buf[64]; - int len = int2key(buf,sizeof(buf),i,mode); - raxInsert(t,(unsigned char*)buf,len,(void*)(long)i); - } - printf("Insert: %f\n", (double)(ustime()-start)/1000000); - printf("%llu total nodes\n", (unsigned long long)t->numnodes); - printf("%llu total elements\n", (unsigned long long)t->numele); - - start = ustime(); - for (int i = 0; i < 5000000; i++) { - char buf[64]; - int len = int2key(buf,sizeof(buf),i,mode); - void *data = raxFind(t,(unsigned char*)buf,len); - if (data != (void*)(long)i) { - printf("Issue with %s: %p instead of %p\n", buf, - data, (void*)(long)i); - } - } - printf("Lookup: %f\n", (double)(ustime()-start)/1000000); - - start = ustime(); - for (int i = 0; i < 5000000; i++) { - char buf[64]; - int r = rand() % 5000000; - int len = int2key(buf,sizeof(buf),r,mode); - void *data = raxFind(t,(unsigned char*)buf,len); - if (data != (void*)(long)r) { - printf("Issue with %s: %p instead of %p\n", buf, - data, (void*)(long)r); - } - } - printf("Random lookup: %f\n", (double)(ustime()-start)/1000000); - - start = ustime(); - int count = 0; - for (int i = 0; i < 5000000; i++) { - char buf[64]; - int len = int2key(buf,sizeof(buf),i,mode); - buf[i%len] = '!'; /* "!" is never set into keys. */ - void *data = raxFind(t,(unsigned char*) buf,len); - if (data != (void*)(long)i) count++; - } - printf("Failed lookup: %f\n", (double)(ustime()-start)/1000000); - - start = ustime(); - for (int i = 0; i < 5000000; i++) { - char buf[64]; - int len = int2key(buf,sizeof(buf),i,mode); - int retval = raxRemove(t,(unsigned char*)buf,len); - assert(retval == 1); - } - printf("Deletion: %f\n", (double)(ustime()-start)/1000000); - - printf("%llu total nodes\n", (unsigned long long)t->numnodes); - printf("%llu total elements\n", (unsigned long long)t->numele); - raxFree(t); - } - return 0; -} -#endif - -#ifdef TEST_MAIN -#include -#include - -int main(void) { - printf("notfound = %p\n", raxNotFound); - rax *t = raxNew(); - char *toadd[] = {"alligator","alien","baloon","chromodynamic","romane","romanus","romulus","rubens","ruber","rubicon","rubicundus","all","rub","ba",NULL}; - - srand(time(NULL)); - for (int x = 0; x < 10000; x++) rand(); - - long items = 0; - while(toadd[items] != NULL) items++; - - for (long i = 0; i < items; i++) { - raxInsert(t,(unsigned char*)toadd[i],strlen(toadd[i]),(void*)i); - printf("Added %s\n", toadd[i]); - } - raxShow(t); - - raxIterator iter; - raxStart(&iter,t); - - // OK: all this tests will need to go in the Rax unit test. - // raxSeek(&iter,(unsigned char*)"rpxxx",5,"<="); - // raxSeek(&iter,(unsigned char*)"rom",3,">="); - // raxSeek(&iter,(unsigned char*)"rub",3,">="); - // raxSeek(&iter,(unsigned char*)"rub",3,">"); - // raxSeek(&iter,(unsigned char*)"rub",3,"<"); - // raxSeek(&iter,(unsigned char*)"rom",3,">"); - // raxSeek(&iter,(unsigned char*)"chro",4,">"); - // raxSeek(&iter,(unsigned char*)"chro",4,"<"); - // raxSeek(&iter,(unsigned char*)"chromz",6,"<"); - // raxSeek(&iter,NULL,0,"^"); - // raxSeek(&iter,"zorro",5,"<="); - // raxSeek(&iter,"zorro",5,"<"); - // raxSeek(&iter,NULL,0,"$"); - - // STILL TO TEST - raxSeek(&iter,(unsigned char*)"ro",2,">="); - printf("EOF: %d\n", (iter.flags & RAX_ITER_EOF) != 0); - - printf("SEEKED: %.*s, val %p\n", (int)iter.key_len, - (char*)iter.key, - iter.data); - - printf("NEXT\n"); - while(raxNext(&iter,NULL,0,NULL)) { - printf("--- key: %.*s, val %p\n", (int)iter.key_len, - (char*)iter.key, - iter.data); - } - - printf("After EOF element is: %.*s\n", (int)iter.key_len, - (char*)iter.key); - printf("~~~~~~~~~~~~~~\n"); - - printf("PREV\n"); - raxSeek(&iter,iter.key,iter.key_len,"=="); - while(raxPrev(&iter,NULL,0,NULL)) { - printf("--- key: %.*s, val %p\n", (int)iter.key_len, - (char*)iter.key, - iter.data); - } - - printf("After EOF element is: %.*s\n", (int)iter.key_len, - (char*)iter.key); - raxStop(&iter); - -#if 0 - raxStop(&iter); -#endif - exit(0); - - int rnum = rand(); - int survivor = rnum % items; - -#if 1 - printf("Removing everything but %s in random order\n", toadd[survivor]); - for (long i = 0; i < 1000; i++) { - int r = rand() % items; - if (r == survivor) continue; - raxRemove(t,(unsigned char*)toadd[r],strlen(toadd[r])); - } -#else - printf("Removing rubicon\n"); - raxRemove(t,(unsigned char*)"rubicon",7); -#endif - - printf("%llu total nodes\n", (unsigned long long)t->numnodes); - printf("%llu total elements\n", (unsigned long long)t->numele); - - raxShow(t); - raxFree(t); -} -#endif diff --git a/src/rax.h b/src/rax.h index 76330e0de..6f91f4c1b 100644 --- a/src/rax.h +++ b/src/rax.h @@ -144,14 +144,16 @@ extern void *raxNotFound; /* Exported API. */ rax *raxNew(void); -int raxInsert(rax *rax, unsigned char *s, size_t len, void *data); -int raxRemove(rax *rax, unsigned char *s, size_t len); +int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old); +int raxRemove(rax *rax, unsigned char *s, size_t len, void **old); void *raxFind(rax *rax, unsigned char *s, size_t len); void raxFree(rax *rax); void raxStart(raxIterator *it, rax *rt); -int raxSeek(raxIterator *it, unsigned char *ele, size_t len, const char *op); -int raxNext(raxIterator *it, unsigned char *stop, size_t stoplen, char *op); -int raxPrev(raxIterator *it, unsigned char *stop, size_t stoplen, char *op); +int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len); +int raxNext(raxIterator *it); +int raxPrev(raxIterator *it); +int raxRandomWalk(raxIterator *it, size_t steps); +int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len); void raxStop(raxIterator *it); void raxShow(rax *rax); From e8b44eb33ab60d47aecc75daeebbab00cceea2d2 Mon Sep 17 00:00:00 2001 From: lorneli Date: Sat, 8 Apr 2017 15:15:24 +0800 Subject: [PATCH 0294/1722] Expire: Update comment of activeExpireCycle function The macro REDIS_EXPIRELOOKUPS_TIME_PERC has been replaced by ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC in commit 6500fabfb881a7ffaadfbff74ab801c55d4591fc. --- src/expire.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expire.c b/src/expire.c index 637139f63..22b1f1da9 100644 --- a/src/expire.c +++ b/src/expire.c @@ -92,7 +92,7 @@ int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) { * * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is * executed, where the time limit is a percentage of the REDIS_HZ period - * as specified by the REDIS_EXPIRELOOKUPS_TIME_PERC define. */ + * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. */ void activeExpireCycle(int type) { /* This function has some global state in order to continue the work From f9db3144d6c5b86ec2e4f1c799e2181a4b3e6730 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 8 Apr 2017 17:31:09 +0200 Subject: [PATCH 0295/1722] Rax library updated. Important bugs fixed. --- src/rax.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/rax.c b/src/rax.c index 78ead7a27..dda008dff 100644 --- a/src/rax.c +++ b/src/rax.c @@ -186,10 +186,10 @@ raxNode *raxReallocForData(raxNode *n, void *data) { void raxSetData(raxNode *n, void *data) { n->iskey = 1; if (data != NULL) { + n->isnull = 0; void **ndata = (void**) ((char*)n+raxNodeCurrentLength(n)-sizeof(void*)); memcpy(ndata,&data,sizeof(data)); - n->isnull = 0; } else { n->isnull = 1; } @@ -396,6 +396,7 @@ static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode position to 0 to signal this node represents the searched key. */ } + debugnode("Lookup stop node is",h); if (stopnode) *stopnode = h; if (plink) *plink = parentlink; if (splitpos && h->iscompr) *splitpos = j; @@ -424,18 +425,21 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { * our key. We have just to reallocate the node and make space for the * data pointer. */ if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) { + debugf("### Insert: node representing key exists\n"); + if (!h->iskey || h->isnull) { + h = raxReallocForData(h,data); + if (h) memcpy(parentlink,&h,sizeof(h)); + } + if (h == NULL) { + errno = ENOMEM; + return 0; + } if (h->iskey) { if (old) *old = raxGetData(h); raxSetData(h,data); errno = 0; return 0; /* Element already exists. */ } - h = raxReallocForData(h,data); - if (h == NULL) { - errno = ENOMEM; - return 0; - } - memcpy(parentlink,&h,sizeof(h)); raxSetData(h,data); rax->numele++; return 1; /* Element inserted. */ @@ -734,9 +738,7 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { } /* We walked the radix tree as far as we could, but still there are left - * chars in our string. We need to insert the missing nodes. - * Note: while loop never entered if the node was split by ALGO2, - * since i == len. */ + * chars in our string. We need to insert the missing nodes. */ while(i < len) { raxNode *child; @@ -1091,6 +1093,7 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { /* This is the core of raxFree(): performs a depth-first scan of the * tree and releases all the nodes found. */ void raxRecursiveFree(rax *rax, raxNode *n) { + debugnode("free traversing",n); int numchildren = n->iscompr ? 1 : n->size; raxNode **cp = raxNodeLastChildPtr(n); while(numchildren--) { From 60ffbb72b4db08d146b0fca28090072d2f1b829c Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 10 Apr 2017 09:33:21 +0200 Subject: [PATCH 0296/1722] Fix modules blocking commands awake delay. If a thread unblocks a client blocked in a module command, by using the RedisMdoule_UnblockClient() API, the event loop may not be awaken until the next timeout of the multiplexing API or the next unrelated I/O operation on other clients. We actually want the client to be served ASAP, so a mechanism is needed in order for the unblocking API to inform Redis that there is a client to serve ASAP. This commit fixes the issue using the old trick of the pipe: when a client needs to be unblocked, a byte is written in a pipe. When we run the list of clients blocked in modules, we consume all the bytes written in the pipe. Writes and reads are performed inside the context of the mutex, so no race is possible in which we consume the bytes that are actually related to an awake request for a client that should still be put into the list of clients to unblock. It was verified that after the fix the server handles the blocked clients with the expected short delay. Thanks to @dvirsky for understanding there was such a problem and reporting it. --- src/module.c | 28 ++++++++++++++++++++++++++++ src/server.c | 10 ++++++++++ src/server.h | 4 ++++ 3 files changed, 42 insertions(+) diff --git a/src/module.c b/src/module.c index 3b90eae4a..4255c4953 100644 --- a/src/module.c +++ b/src/module.c @@ -3108,6 +3108,17 @@ void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ... * Blocking clients from modules * -------------------------------------------------------------------------- */ +/* Readable handler for the awake pipe. We do nothing here, the awake bytes + * will be actually read in a more appropriate place in the + * moduleHandleBlockedClients() function that is where clients are actually + * served. */ +void moduleBlockedClientPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask) { + UNUSED(el); + UNUSED(fd); + UNUSED(mask); + UNUSED(privdata); +} + /* This is called from blocked.c in order to unblock a client: may be called * for multiple reasons while the client is in the middle of being blocked * because the client is terminated, but is also called for cleanup when a @@ -3171,6 +3182,9 @@ int RM_UnblockClient(RedisModuleBlockedClient *bc, void *privdata) { pthread_mutex_lock(&moduleUnblockedClientsMutex); bc->privdata = privdata; listAddNodeTail(moduleUnblockedClients,bc); + if (write(server.module_blocked_pipe[1],"A",1) != 1) { + /* Ignore the error, this is best-effort. */ + } pthread_mutex_unlock(&moduleUnblockedClientsMutex); return REDISMODULE_OK; } @@ -3195,6 +3209,10 @@ void moduleHandleBlockedClients(void) { RedisModuleBlockedClient *bc; pthread_mutex_lock(&moduleUnblockedClientsMutex); + /* Here we unblock all the pending clients blocked in modules operations + * so we can read every pending "awake byte" in the pipe. */ + char buf[1]; + while (read(server.module_blocked_pipe[0],buf,1) == 1); while (listLength(moduleUnblockedClients)) { ln = listFirst(moduleUnblockedClients); bc = ln->value; @@ -3298,6 +3316,16 @@ void moduleInitModulesSystem(void) { server.loadmodule_queue = listCreate(); modules = dictCreate(&modulesDictType,NULL); moduleRegisterCoreAPI(); + if (pipe(server.module_blocked_pipe) == -1) { + serverLog(LL_WARNING, + "Can't create the pipe for module blocking commands: %s", + strerror(errno)); + exit(1); + } + /* Make the pipe non blocking. This is just a best effort aware mechanism + * and we do not want to block not in the read nor in the write half. */ + anetNonBlock(NULL,server.module_blocked_pipe[0]); + anetNonBlock(NULL,server.module_blocked_pipe[1]); } /* Load all the modules in the server.loadmodule_queue list, which is diff --git a/src/server.c b/src/server.c index 0494a4e75..8f5a0911b 100644 --- a/src/server.c +++ b/src/server.c @@ -1870,6 +1870,16 @@ void initServer(void) { if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE, acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating server.sofd file event."); + + /* Register a readable event for the pipe used to awake the event loop + * when a blocked client in a module needs attention. */ + if (aeCreateFileEvent(server.el, server.module_blocked_pipe[0], AE_READABLE, + moduleBlockedClientPipeReadable,NULL) == AE_ERR) { + serverPanic( + "Error registering the readable event for the module " + "blocked clients subsystem."); + } + /* Open the AOF file if needed. */ if (server.aof_state == AOF_ON) { server.aof_fd = open(server.aof_filename, diff --git a/src/server.h b/src/server.h index 3b28fa44d..19be92ba2 100644 --- a/src/server.h +++ b/src/server.h @@ -877,6 +877,9 @@ struct redisServer { /* Modules */ dict *moduleapi; /* Exported APIs dictionary for modules. */ list *loadmodule_queue; /* List of modules to load at startup. */ + int module_blocked_pipe[2]; /* Pipe used to awake the event loop if a + client blocked on a module command needs + to be processed. */ /* Networking */ int port; /* TCP listening port */ int tcp_backlog; /* TCP listen() backlog */ @@ -1286,6 +1289,7 @@ void moduleFreeContext(struct RedisModuleCtx *ctx); void unblockClientFromModule(client *c); void moduleHandleBlockedClients(void); void moduleBlockedClientTimedOut(client *c); +void moduleBlockedClientPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask); /* Utils */ long long ustime(void); From 71c350f73bd1486603ba2c188b90b1e372c4204d Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 10 Apr 2017 13:17:05 +0200 Subject: [PATCH 0297/1722] Make more obvious why there was issue #3843. --- src/module.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/module.c b/src/module.c index b4da7f634..c81330b63 100644 --- a/src/module.c +++ b/src/module.c @@ -3236,6 +3236,9 @@ void moduleHandleBlockedClients(void) { if (bc->privdata && bc->free_privdata) bc->free_privdata(bc->privdata); if (c != NULL) unblockClient(c); + /* Free 'bc' only after unblocking the client, since it is + * referenced in the client blocking context, and must be valid + * when calling unblockClient(). */ zfree(bc); /* Lock again before to iterate the loop. */ From 7e0c3177d6482fc84334d8e3855215586154f64f Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 11 Apr 2017 16:45:08 +0200 Subject: [PATCH 0298/1722] Fix zmalloc_get_memory_size() ifdefs to actually use the else branch. Close #3927. --- src/zmalloc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/zmalloc.c b/src/zmalloc.c index f71ce2c9e..0d3607793 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -418,8 +418,6 @@ size_t zmalloc_get_memory_size(void) { if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) return (size_t)size; return 0L; /* Failed? */ -#endif /* sysctl and sysconf variants */ - #else return 0L; /* Unknown OS. */ #endif From ffce9ebf9bb2ba787937c8b0179a24fa76608fff Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 11 Apr 2017 16:54:27 +0200 Subject: [PATCH 0299/1722] Fix preprocessor if/else chain broken in order to fix #3927. --- src/zmalloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/zmalloc.c b/src/zmalloc.c index 0d3607793..f653760a7 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -418,6 +418,9 @@ size_t zmalloc_get_memory_size(void) { if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) return (size_t)size; return 0L; /* Failed? */ +#else + return 0L; /* Unknown method to get the data. */ +#endif #else return 0L; /* Unknown OS. */ #endif From 7c415014b00af05b36f1ebcb8b7def98b4e2e4f7 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 11 Apr 2017 16:56:00 +0200 Subject: [PATCH 0300/1722] Set lua-time-limit default value at safe place. Otherwise, as it was, it will overwrite whatever the user set. Close #3703. --- src/scripting.c | 1 - src/server.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripting.c b/src/scripting.c index 11adcf282..1da6f763f 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -900,7 +900,6 @@ void scriptingInit(int setup) { server.lua_caller = NULL; server.lua_timedout = 0; server.lua_always_replicate_commands = 0; /* Only DEBUG can change it.*/ - server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; ldbInit(); } diff --git a/src/server.c b/src/server.c index 8f5a0911b..db853b836 100644 --- a/src/server.c +++ b/src/server.c @@ -1412,6 +1412,7 @@ void initServerConfig(void) { server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE; server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL; server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO; + server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; server.lruclock = getLRUClock(); resetServerSaveParams(); From 6081a5873c5b05ff0be3edd0f9e3ef1f5db0aa23 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 12 Apr 2017 10:12:27 +0200 Subject: [PATCH 0301/1722] Add a top comment in crucial functions inside networking.c. --- src/networking.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 343a910e2..fbab9970f 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1027,6 +1027,13 @@ void resetClient(client *c) { } } +/* Like processMultibulkBuffer(), but for the inline protocol instead of RESP, + * this function consumes the client query buffer and creates a command ready + * to be executed inside the client structure. Returns C_OK if the command + * is ready to be executed, or C_ERR if there is still protocol to read to + * have a well formed command. The function also returns C_ERR when there is + * a protocol error: in such a case the client structure is setup to reply + * with the error and close the connection. */ int processInlineBuffer(client *c) { char *newline; int argc, j; @@ -1119,6 +1126,17 @@ static void setProtocolError(const char *errstr, client *c, int pos) { sdsrange(c->querybuf,pos,-1); } +/* Process the query buffer for client 'c', setting up the client argument + * vector for command execution. Returns C_OK if after running the function + * the client has a well-formed ready to be processed command, otherwise + * C_ERR if there is still to read more buffer to get the full command. + * The function also returns C_ERR when there is a protocol error: in such a + * case the client structure is setup to reply with the error and close + * the connection. + * + * This function is called if processInputBuffer() detects that the next + * command is in RESP format, so the first byte in the command is found + * to be '*'. Otherwise for inline commands processInlineBuffer() is called. */ int processMultibulkBuffer(client *c) { char *newline = NULL; int pos = 0, ok; @@ -1253,10 +1271,14 @@ int processMultibulkBuffer(client *c) { /* We're done when c->multibulk == 0 */ if (c->multibulklen == 0) return C_OK; - /* Still not read to process the command */ + /* Still not ready to process the command */ return C_ERR; } +/* This function is called every time, in the client structure 'c', there is + * more query buffer to process, because we read more data from the socket + * or because a client was blocked and later reactivated, so there could be + * pending query buffer, already representing a full command, to process. */ void processInputBuffer(client *c) { server.current_client = c; /* Keep processing while there is something in the input buffer */ From 341adb516bfe81cea90666a8c290827cf8dd5ad3 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 12 Apr 2017 12:28:05 +0200 Subject: [PATCH 0302/1722] Fix typo in feedReplicationBacklog() top comment. --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index bde37bc43..c7a703b85 100644 --- a/src/replication.c +++ b/src/replication.c @@ -122,7 +122,7 @@ void freeReplicationBacklog(void) { /* Add data to the replication backlog. * This function also increments the global replication offset stored at * server.master_repl_offset, because there is no case where we want to feed - * the backlog without incrementing the buffer. */ + * the backlog without incrementing the offset. */ void feedReplicationBacklog(void *ptr, size_t len) { unsigned char *p = ptr; From cac5a8b65de4136dc975b74d7a589e0e078532ce Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Thu, 13 Apr 2017 17:03:46 +0300 Subject: [PATCH 0303/1722] Changes command stats iteration to being dict-based With the addition of modules, looping over the redisCommandTable misses any added commands. By moving to dictionary iteration this is resolved. --- src/server.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/server.c b/src/server.c index db853b836..e0a96c7ca 100644 --- a/src/server.c +++ b/src/server.c @@ -1951,15 +1951,18 @@ void populateCommandTable(void) { } void resetCommandTableStats(void) { - int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand); - int j; - - for (j = 0; j < numcommands; j++) { - struct redisCommand *c = redisCommandTable+j; + struct redisCommand *c; + dictEntry *de; + dictIterator *di; + di = dictGetSafeIterator(server.commands); + while((de = dictNext(di)) != NULL) { + c = (struct redisCommand *) dictGetVal(de); c->microseconds = 0; c->calls = 0; } + dictReleaseIterator(di); + } /* ========================== Redis OP Array API ============================ */ @@ -2758,7 +2761,7 @@ void bytesToHuman(char *s, unsigned long long n) { sds genRedisInfoString(char *section) { sds info = sdsempty(); time_t uptime = server.unixtime-server.stat_starttime; - int j, numcommands; + int j; struct rusage self_ru, c_ru; unsigned long lol, bib; int allsections = 0, defsections = 0; @@ -3215,20 +3218,24 @@ sds genRedisInfoString(char *section) { (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000); } - /* cmdtime */ + /* Command statistics */ if (allsections || !strcasecmp(section,"commandstats")) { if (sections++) info = sdscat(info,"\r\n"); info = sdscatprintf(info, "# Commandstats\r\n"); - numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand); - for (j = 0; j < numcommands; j++) { - struct redisCommand *c = redisCommandTable+j; + struct redisCommand *c; + dictEntry *de; + dictIterator *di; + di = dictGetSafeIterator(server.commands); + while((de = dictNext(di)) != NULL) { + c = (struct redisCommand *) dictGetVal(de); if (!c->calls) continue; info = sdscatprintf(info, "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f\r\n", c->name, c->calls, c->microseconds, (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls)); } + dictReleaseIterator(di); } /* Cluster */ From c628ae114bd10eef352488a6a0530f4cd4e71734 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 13 Apr 2017 19:22:35 +0200 Subject: [PATCH 0304/1722] Cluster: collect more specific bus messages stats. First step in order to change Cluster in order to use less messages. Related to issue #3929. --- src/cluster.c | 65 ++++++++++++++++++++++++++++++++++++++++++++------- src/cluster.h | 48 ++++++++++++++++++++----------------- 2 files changed, 82 insertions(+), 31 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 1a38a8e47..bb8580288 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -423,8 +423,10 @@ void clusterInit(void) { server.cluster->failover_auth_epoch = 0; server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE; server.cluster->lastVoteEpoch = 0; - server.cluster->stats_bus_messages_sent = 0; - server.cluster->stats_bus_messages_received = 0; + for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) { + server.cluster->stats_bus_messages_sent[i] = 0; + server.cluster->stats_bus_messages_received[i] = 0; + } memset(server.cluster->slots,0, sizeof(server.cluster->slots)); clusterCloseAllSlots(); @@ -1583,7 +1585,8 @@ int clusterProcessPacket(clusterLink *link) { uint32_t totlen = ntohl(hdr->totlen); uint16_t type = ntohs(hdr->type); - server.cluster->stats_bus_messages_received++; + if (type < CLUSTERMSG_TYPE_COUNT) + server.cluster->stats_bus_messages_received[type]++; serverLog(LL_DEBUG,"--- Processing packet of type %d, %lu bytes", type, (unsigned long) totlen); @@ -2130,7 +2133,12 @@ void clusterSendMessage(clusterLink *link, unsigned char *msg, size_t msglen) { clusterWriteHandler,link); link->sndbuf = sdscatlen(link->sndbuf, msg, msglen); - server.cluster->stats_bus_messages_sent++; + + /* Populate sent messages stats. */ + clusterMsg *hdr = (clusterMsg*) msg; + uint16_t type = ntohs(hdr->type); + if (type < CLUSTERMSG_TYPE_COUNT) + server.cluster->stats_bus_messages_sent[type]++; } /* Send a message to all the nodes that are part of the cluster having @@ -3877,6 +3885,21 @@ sds clusterGenNodesDescription(int filter) { * CLUSTER command * -------------------------------------------------------------------------- */ +const char *clusterGetMessageTypeString(int type) { + switch(type) { + case CLUSTERMSG_TYPE_PING: return "ping"; + case CLUSTERMSG_TYPE_PONG: return "pong"; + case CLUSTERMSG_TYPE_MEET: return "meet"; + case CLUSTERMSG_TYPE_FAIL: return "fail"; + case CLUSTERMSG_TYPE_PUBLISH: return "publish"; + case CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST: return "auth-req"; + case CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK: return "auth-ack"; + case CLUSTERMSG_TYPE_UPDATE: return "update"; + case CLUSTERMSG_TYPE_MFSTART: return "mfstart"; + } + return "unknown"; +} + int getSlotOrReply(client *c, robj *o) { long long slot; @@ -4208,8 +4231,6 @@ void clusterCommand(client *c) { "cluster_size:%d\r\n" "cluster_current_epoch:%llu\r\n" "cluster_my_epoch:%llu\r\n" - "cluster_stats_messages_sent:%lld\r\n" - "cluster_stats_messages_received:%lld\r\n" , statestr[server.cluster->state], slots_assigned, slots_ok, @@ -4218,10 +4239,36 @@ void clusterCommand(client *c) { dictSize(server.cluster->nodes), server.cluster->size, (unsigned long long) server.cluster->currentEpoch, - (unsigned long long) myepoch, - server.cluster->stats_bus_messages_sent, - server.cluster->stats_bus_messages_received + (unsigned long long) myepoch ); + + /* Show stats about messages sent and received. */ + long long tot_msg_sent = 0; + long long tot_msg_received = 0; + + for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) { + if (server.cluster->stats_bus_messages_sent[i] == 0) continue; + tot_msg_sent += server.cluster->stats_bus_messages_sent[i]; + info = sdscatprintf(info, + "cluster_stats_messages_%s_sent:%lld\r\n", + clusterGetMessageTypeString(i), + server.cluster->stats_bus_messages_sent[i]); + } + info = sdscatprintf(info, + "cluster_stats_messages_sent:%lld\r\n", tot_msg_sent); + + for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) { + if (server.cluster->stats_bus_messages_received[i] == 0) continue; + tot_msg_received += server.cluster->stats_bus_messages_received[i]; + info = sdscatprintf(info, + "cluster_stats_messages_%s_received:%lld\r\n", + clusterGetMessageTypeString(i), + server.cluster->stats_bus_messages_received[i]); + } + info = sdscatprintf(info, + "cluster_stats_messages_received:%lld\r\n", tot_msg_received); + + /* Produce the reply protocol. */ addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n", (unsigned long)sdslen(info))); addReplySds(c,info); diff --git a/src/cluster.h b/src/cluster.h index be6fe0bdc..e7c088569 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -73,6 +73,29 @@ typedef struct clusterLink { #define CLUSTER_CANT_FAILOVER_WAITING_VOTES 4 #define CLUSTER_CANT_FAILOVER_RELOG_PERIOD (60*5) /* seconds. */ +/* clusterState todo_before_sleep flags. */ +#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0) +#define CLUSTER_TODO_UPDATE_STATE (1<<1) +#define CLUSTER_TODO_SAVE_CONFIG (1<<2) +#define CLUSTER_TODO_FSYNC_CONFIG (1<<3) + +/* Message types. + * + * Note that the PING, PONG and MEET messages are actually the same exact + * kind of packet. PONG is the reply to ping, in the exact format as a PING, + * while MEET is a special PING that forces the receiver to add the sender + * as a node (if it is not already in the list). */ +#define CLUSTERMSG_TYPE_PING 0 /* Ping */ +#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */ +#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */ +#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */ +#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propagation */ +#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */ +#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6 /* Yes, you have my vote */ +#define CLUSTERMSG_TYPE_UPDATE 7 /* Another node slots configuration */ +#define CLUSTERMSG_TYPE_MFSTART 8 /* Pause clients for manual failover */ +#define CLUSTERMSG_TYPE_COUNT 9 /* Total number of message types. */ + /* This structure represent elements of node->fail_reports. */ typedef struct clusterNodeFailReport { struct clusterNode *node; /* Node reporting the failure condition. */ @@ -139,32 +162,13 @@ typedef struct clusterState { /* The followign fields are used by masters to take state on elections. */ uint64_t lastVoteEpoch; /* Epoch of the last vote granted. */ int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */ - long long stats_bus_messages_sent; /* Num of msg sent via cluster bus. */ - long long stats_bus_messages_received; /* Num of msg rcvd via cluster bus.*/ + /* Messages received and sent by type. */ + long long stats_bus_messages_sent[CLUSTERMSG_TYPE_COUNT]; + long long stats_bus_messages_received[CLUSTERMSG_TYPE_COUNT]; } clusterState; -/* clusterState todo_before_sleep flags. */ -#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0) -#define CLUSTER_TODO_UPDATE_STATE (1<<1) -#define CLUSTER_TODO_SAVE_CONFIG (1<<2) -#define CLUSTER_TODO_FSYNC_CONFIG (1<<3) - /* Redis cluster messages header */ -/* Note that the PING, PONG and MEET messages are actually the same exact - * kind of packet. PONG is the reply to ping, in the exact format as a PING, - * while MEET is a special PING that forces the receiver to add the sender - * as a node (if it is not already in the list). */ -#define CLUSTERMSG_TYPE_PING 0 /* Ping */ -#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */ -#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */ -#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */ -#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propagation */ -#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */ -#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6 /* Yes, you have my vote */ -#define CLUSTERMSG_TYPE_UPDATE 7 /* Another node slots configuration */ -#define CLUSTERMSG_TYPE_MFSTART 8 /* Pause clients for manual failover */ - /* Initially we don't know our "name", but we'll find it once we connect * to the first node, using the getsockname() function. Then we'll use this * address for all the next messages. */ From de2eed4838f1f703b834944dc67383dc0c1b0890 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Apr 2017 10:14:17 +0200 Subject: [PATCH 0305/1722] Cluster: decrease ping/pong traffic by trusting other nodes reports. Cluster of bigger sizes tend to have a lot of traffic in the cluster bus just for failure detection: a node will try to get a ping reply from another node no longer than when the half the node timeout would elapsed, in order to avoid a false positive. However this means that if we have N nodes and the node timeout is set to, for instance M seconds, we'll have to ping N nodes every M/2 seconds. This N*M/2 pings will receive the same number of pongs, so a total of N*M packets per node. However given that we have a total of N nodes doing this, the total number of messages will be N*N*M. In a 100 nodes cluster with a timeout of 60 seconds, this translates to a total of 100*100*30 packets per second, summing all the packets exchanged by all the nodes. This is, as you can guess, a lot... So this patch changes the implementation in a very simple way in order to trust the reports of other nodes: if a node A reports a node B as alive at least up to a given time, we update our view accordingly. The problem with this approach is that it could result into a subset of nodes being able to reach a given node X, and preventing others from detecting that is actually not reachable from the majority of nodes. So the above algorithm is refined by trusting other nodes only if we do not have currently a ping pending for the node X, and if there are no failure reports for that node. Since each node, anyway, pings 10 other nodes every second (one node every 100 milliseconds), anyway eventually even trusting the other nodes reports, we will detect if a given node is down from our POV. Now to understand the number of packets that the cluster would exchange for failure detection with the patch, we can start considering the random PINGs that the cluster sent anyway as base line: Each node sends 10 packets per second, so the total traffic if no additioal packets would be sent, including PONG packets, would be: Total messages per second = N*10*2 However by trusting other nodes gossip sections will not AWALYS prevent pinging nodes for the "half timeout reached" rule all the times. The math involved in computing the actual rate as N and M change is quite complex and depends also on another parameter, which is the number of entries in the gossip section of PING and PONG packets. However it is possible to compare what happens in cluster of different sizes experimentally. After applying this patch a very important reduction in the number of packets exchanged is trivial to observe, without apparent impacts on the failure detection performances. Actual numbers with different cluster sizes should be published in the Reids Cluster documentation in the future. Related to #3929. --- src/cluster.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/cluster.c b/src/cluster.c index bb8580288..f2c086909 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1354,6 +1354,19 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { } } + /* If from our POV the node is up (no failure flags are set), + * we have no pending ping for the node, nor we have failure + * reports for this node, update the last pong time with the + * one we see from the other nodes. */ + if (!(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) && + node->ping_sent == 0 && + clusterNodeFailureReportsCount(node) == 0) + { + uint32_t pongtime = ntohl(g->pong_received); + if (pongtime > node->pong_received) + node->pong_received = pongtime; + } + /* If we already know this node, but it is not reachable, and * we see a different address in the gossip section of a node that * can talk with this other node, update the address, disconnect From 81a36f41eccab29a5307e8bf1cefec004ac82175 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Apr 2017 10:35:55 +0200 Subject: [PATCH 0306/1722] Cluster: add clean-logs command to create-cluster script. --- src/cluster.c | 3 ++- utils/create-cluster/create-cluster | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index f2c086909..d1d839cba 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1363,8 +1363,9 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { clusterNodeFailureReportsCount(node) == 0) { uint32_t pongtime = ntohl(g->pong_received); - if (pongtime > node->pong_received) + if (pongtime > node->pong_received) { node->pong_received = pongtime; + } } /* If we already know this node, but it is not reachable, and diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster index 98941496f..d821683f6 100755 --- a/utils/create-cluster/create-cluster +++ b/utils/create-cluster/create-cluster @@ -86,6 +86,12 @@ then exit 0 fi +if [ "$1" == "clean-logs" ] +then + rm -rf *.log + exit 0 +fi + echo "Usage: $0 [start|create|stop|watch|tail|clean]" echo "start -- Launch Redis Cluster instances." echo "create -- Create a cluster using redis-trib create." @@ -93,3 +99,4 @@ echo "stop -- Stop Redis Cluster instances." echo "watch -- Show CLUSTER NODES output (first 30 lines) of first node." echo "tail -- Run tail -f of instance at base port + ID." echo "clean -- Remove all instances data, logs, configs." +echo "clean-logs -- Remove just instances logs." From 06de4577d65f3f77aa1065ab2f15071b79771801 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Apr 2017 10:40:22 +0200 Subject: [PATCH 0307/1722] Cluster: fix gossip section ping/pong times encoding. The gossip section times are 32 bit, so cannot store the milliseconds time but just the seconds approximation, which is good enough for our uses. At the same time however, when comparing the gossip section times of other nodes with our node's view, we need to convert back to milliseconds. Related to #3929. Without this change the patch to reduce the traffic in the bus message does not work. --- src/cluster.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index d1d839cba..cae63e924 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1362,7 +1362,8 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { node->ping_sent == 0 && clusterNodeFailureReportsCount(node) == 0) { - uint32_t pongtime = ntohl(g->pong_received); + mstime_t pongtime = ntohl(g->pong_received); + pongtime *= 1000; /* Convert back to milliseconds. */ if (pongtime > node->pong_received) { node->pong_received = pongtime; } @@ -2353,8 +2354,8 @@ void clusterSendPing(clusterLink *link, int type) { freshnodes--; gossip = &(hdr->data.ping.gossip[gossipcount]); memcpy(gossip->nodename,this->name,CLUSTER_NAMELEN); - gossip->ping_sent = htonl(this->ping_sent); - gossip->pong_received = htonl(this->pong_received); + gossip->ping_sent = htonl(this->ping_sent/1000); + gossip->pong_received = htonl(this->pong_received/1000); memcpy(gossip->ip,this->ip,sizeof(this->ip)); gossip->port = htons(this->port); gossip->cport = htons(this->cport); From 0530c7e5640ef859a94ea6ab9090368b15a31f92 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Apr 2017 13:39:49 +0200 Subject: [PATCH 0308/1722] Cluster: always add PFAIL nodes at end of gossip section. To rely on the fact that nodes in PFAIL state will be shared around by randomly adding them in the gossip section is a weak assumption, especially after changes related to sending less ping/pong packets. We want to always include gossip entries for all the nodes that are in PFAIL state, so that the PFAIL -> FAIL state promotion can happen much faster and reliably. Related to #3929. --- src/cluster.c | 92 ++++++++++++++++++++++++++++++++++++++------------- src/cluster.h | 2 ++ 2 files changed, 71 insertions(+), 23 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index cae63e924..b23160b90 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -427,6 +427,7 @@ void clusterInit(void) { server.cluster->stats_bus_messages_sent[i] = 0; server.cluster->stats_bus_messages_received[i] = 0; } + server.cluster->stats_pfail_nodes = 0; memset(server.cluster->slots,0, sizeof(server.cluster->slots)); clusterCloseAllSlots(); @@ -2254,6 +2255,33 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) { /* For PING, PONG, and MEET, fixing the totlen field is up to the caller. */ } +/* Return non zero if the node is already present in the gossip section of the + * message pointed by 'hdr' and having 'count' gossip entries. Otherwise + * zero is returned. Helper for clusterSendPing(). */ +int clusterNodeIsInGossipSection(clusterMsg *hdr, int count, clusterNode *n) { + int j; + for (j = 0; j < count; j++) { + if (memcmp(hdr->data.ping.gossip[j].nodename,n->name, + CLUSTER_NAMELEN) == 0) break; + } + return j != count; +} + +/* Set the i-th entry of the gossip section in the message pointed by 'hdr' + * to the info of the specified node 'n'. */ +void clusterSetGossipEntry(clusterMsg *hdr, int i, clusterNode *n) { + clusterMsgDataGossip *gossip; + gossip = &(hdr->data.ping.gossip[i]); + memcpy(gossip->nodename,n->name,CLUSTER_NAMELEN); + gossip->ping_sent = htonl(n->ping_sent/1000); + gossip->pong_received = htonl(n->pong_received/1000); + memcpy(gossip->ip,n->ip,sizeof(n->ip)); + gossip->port = htons(n->port); + gossip->cport = htons(n->cport); + gossip->flags = htons(n->flags); + gossip->notused1 = 0; +} + /* Send a PING or PONG packet to the specified node, making sure to add enough * gossip informations. */ void clusterSendPing(clusterLink *link, int type) { @@ -2298,11 +2326,15 @@ void clusterSendPing(clusterLink *link, int type) { if (wanted < 3) wanted = 3; if (wanted > freshnodes) wanted = freshnodes; + /* Include all the nodes in PFAIL state, so that failure reports are + * faster to propagate to go from PFAIL to FAIL state. */ + int pfail_wanted = server.cluster->stats_pfail_nodes; + /* Compute the maxium totlen to allocate our buffer. We'll fix the totlen * later according to the number of gossip sections we really were able * to put inside the packet. */ totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); - totlen += (sizeof(clusterMsgDataGossip)*wanted); + totlen += (sizeof(clusterMsgDataGossip)*(wanted+pfail_wanted)); /* Note: clusterBuildMessageHdr() expects the buffer to be always at least * sizeof(clusterMsg) or more. */ if (totlen < (int)sizeof(clusterMsg)) totlen = sizeof(clusterMsg); @@ -2319,17 +2351,13 @@ void clusterSendPing(clusterLink *link, int type) { while(freshnodes > 0 && gossipcount < wanted && maxiterations--) { dictEntry *de = dictGetRandomKey(server.cluster->nodes); clusterNode *this = dictGetVal(de); - clusterMsgDataGossip *gossip; - int j; /* Don't include this node: the whole packet header is about us * already, so we just gossip about other nodes. */ if (this == myself) continue; - /* Give a bias to FAIL/PFAIL nodes. */ - if (maxiterations > wanted*2 && - !(this->flags & (CLUSTER_NODE_PFAIL|CLUSTER_NODE_FAIL))) - continue; + /* PFAIL nodes will be added later. */ + if (this->flags & CLUSTER_NODE_PFAIL) continue; /* In the gossip section don't include: * 1) Nodes in HANDSHAKE state. @@ -2343,27 +2371,37 @@ void clusterSendPing(clusterLink *link, int type) { continue; } - /* Check if we already added this node */ - for (j = 0; j < gossipcount; j++) { - if (memcmp(hdr->data.ping.gossip[j].nodename,this->name, - CLUSTER_NAMELEN) == 0) break; - } - if (j != gossipcount) continue; + /* Do not add a node we already have. */ + if (clusterNodeIsInGossipSection(hdr,gossipcount,this)) continue; /* Add it */ + clusterSetGossipEntry(hdr,gossipcount,this); freshnodes--; - gossip = &(hdr->data.ping.gossip[gossipcount]); - memcpy(gossip->nodename,this->name,CLUSTER_NAMELEN); - gossip->ping_sent = htonl(this->ping_sent/1000); - gossip->pong_received = htonl(this->pong_received/1000); - memcpy(gossip->ip,this->ip,sizeof(this->ip)); - gossip->port = htons(this->port); - gossip->cport = htons(this->cport); - gossip->flags = htons(this->flags); - gossip->notused1 = 0; gossipcount++; } + /* If there are PFAIL nodes, add them at the end. */ + if (pfail_wanted) { + dictIterator *di; + dictEntry *de; + + di = dictGetSafeIterator(server.cluster->nodes); + while((de = dictNext(di)) != NULL && pfail_wanted > 0) { + clusterNode *node = dictGetVal(de); + if (node->flags & CLUSTER_NODE_HANDSHAKE) continue; + if (node->flags & CLUSTER_NODE_NOADDR) continue; + if (!(node->flags & CLUSTER_NODE_PFAIL)) continue; + clusterSetGossipEntry(hdr,gossipcount,node); + freshnodes--; + gossipcount++; + /* We take the count of the slots we allocated, since the + * PFAIL stats may not match perfectly with the current number + * of PFAIL nodes. */ + pfail_wanted--; + } + dictReleaseIterator(di); + } + /* Ready to send... fix the totlen fiend and queue the message in the * output buffer. */ totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); @@ -3189,13 +3227,21 @@ void clusterCron(void) { handshake_timeout = server.cluster_node_timeout; if (handshake_timeout < 1000) handshake_timeout = 1000; - /* Check if we have disconnected nodes and re-establish the connection. */ + /* Check if we have disconnected nodes and re-establish the connection. + * Also update a few stats while we are here, that can be used to make + * better decisions in other part of the code. */ di = dictGetSafeIterator(server.cluster->nodes); + server.cluster->stats_pfail_nodes = 0; while((de = dictNext(di)) != NULL) { clusterNode *node = dictGetVal(de); + /* Not interested in reconnecting the link with myself or nodes + * for which we have no address. */ if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR)) continue; + if (node->flags & CLUSTER_NODE_PFAIL) + server.cluster->stats_pfail_nodes++; + /* A Node in HANDSHAKE state has a limited lifespan equal to the * configured node timeout. */ if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) { diff --git a/src/cluster.h b/src/cluster.h index e7c088569..5e228c0f9 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -165,6 +165,8 @@ typedef struct clusterState { /* Messages received and sent by type. */ long long stats_bus_messages_sent[CLUSTERMSG_TYPE_COUNT]; long long stats_bus_messages_received[CLUSTERMSG_TYPE_COUNT]; + long long stats_pfail_nodes; /* Number of nodes in PFAIL status, + excluding nodes without address. */ } clusterState; /* Redis cluster messages header */ From a2e3dc0a1d48ed348f9d03979c2a9ac0fe089533 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Apr 2017 17:53:11 +0200 Subject: [PATCH 0309/1722] Test: fix, hopefully, false PSYNC failure like in issue #2715. And many other related Github issues... all reporting the same problem. There was probably just not enough backlog in certain unlucky runs. I'll ask people that can reporduce if they see now this as fixed as well. --- tests/integration/replication-psync.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl index da1e9cf5b..2b9e13f50 100644 --- a/tests/integration/replication-psync.tcl +++ b/tests/integration/replication-psync.tcl @@ -110,7 +110,7 @@ foreach diskless {no yes} { test_psync {no reconnection, just sync} 6 1000000 3600 0 { } $diskless 0 - test_psync {ok psync} 6 1000000 3600 0 { + test_psync {ok psync} 6 100000000 3600 0 { assert {[s -1 sync_partial_ok] > 0} } $diskless 1 From 4d4f22843ce32fcc8bb8bc4d7d57b1f00fa61690 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 15 Apr 2017 10:08:39 +0200 Subject: [PATCH 0310/1722] Cluster: discard pong times in the future. However we allow for 500 milliseconds of tolerance, in order to avoid often discarding semantically valid info (the node is up) because of natural few milliseconds desync among servers even when NTP is used. Note that anyway we should ping the node from time to time regardless and discover if it's actually down from our point of view, since no update is accepted while we have an active ping on the node. Related to #3929. --- src/cluster.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index b23160b90..d5ad85fe7 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1365,7 +1365,14 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { { mstime_t pongtime = ntohl(g->pong_received); pongtime *= 1000; /* Convert back to milliseconds. */ - if (pongtime > node->pong_received) { + + /* Replace the pong time with the received one only if + * it's greater than our view but is not in the future + * (with 500 milliseconds tolerance) from the POV of our + * clock. */ + if (pongtime <= (server.mstime+500) && + pongtime > node->pong_received) + { node->pong_received = pongtime; } } From ed5d5d66337456ef2981bb876e879b8a5a26de68 Mon Sep 17 00:00:00 2001 From: spinlock Date: Fri, 31 Mar 2017 21:45:00 +0800 Subject: [PATCH 0311/1722] rdb: saving skiplist in reversed order to accelerate the deserialisation process --- src/rdb.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 2689b172d..c6a88081b 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -704,23 +704,24 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { nwritten += n; } else if (o->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = o->ptr; - dictIterator *di = dictGetIterator(zs->dict); - dictEntry *de; + zskiplist *zsl = zs->zsl; - if ((n = rdbSaveLen(rdb,dictSize(zs->dict))) == -1) return -1; + if ((n = rdbSaveLen(rdb,zsl->length)) == -1) return -1; nwritten += n; - while((de = dictNext(di)) != NULL) { - sds ele = dictGetKey(de); - double *score = dictGetVal(de); + zskiplistNode *zn = zsl->tail; + while (zn != NULL) { + sds ele = zn->ele; + double *score = &zn->score; if ((n = rdbSaveRawString(rdb,(unsigned char*)ele,sdslen(ele))) == -1) return -1; nwritten += n; if ((n = rdbSaveBinaryDoubleValue(rdb,*score)) == -1) return -1; nwritten += n; + + zn = zn->backward; } - dictReleaseIterator(di); } else { serverPanic("Unknown sorted set encoding"); } From 606c74baf6da9f0bff10648d9262f0f5216449fb Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Mon, 17 Apr 2017 13:37:59 +0200 Subject: [PATCH 0312/1722] Reorder to make dict-benchmark compile on Linux Fixes #3944 --- src/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Makefile b/src/Makefile index fec6573c2..8f429431b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -214,8 +214,8 @@ $(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ) $(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ) $(REDIS_LD) -o $@ $^ $(FINAL_LIBS) -dict-benchmark: dict.c zmalloc.c sds.c - $(REDIS_CC) $(FINAL_CFLAGS) dict.c zmalloc.c sds.c siphash.c -D DICT_BENCHMARK_MAIN -o dict-benchmark +dict-benchmark: dict.c zmalloc.c sds.c siphash.c + $(REDIS_CC) $(FINAL_CFLAGS) $^ -D DICT_BENCHMARK_MAIN -o $@ $(FINAL_LIBS) # Because the jemalloc.h header is generated as a part of the jemalloc build, # building it should complete before building any other object. Instead of From 083636b801fe5fcedc8167e8e60b1316e992628f Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 18 Apr 2017 11:01:47 +0200 Subject: [PATCH 0313/1722] Clarify why we save ziplist elements in revserse order. Also get rid of variables that are now kinda redundant, since the dictionary iterator was removed. This is related to PR #3949. --- src/rdb.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index c6a88081b..1a5a7b2c5 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -709,17 +709,23 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { if ((n = rdbSaveLen(rdb,zsl->length)) == -1) return -1; nwritten += n; + /* We save the skiplist elements from the greatest to the smallest + * (that's trivial since the elements are already ordered in the + * skiplist): this improves the load process, since the next loaded + * element will always be the smaller, so adding to the skiplist + * will always immediately stop at the head, making the insertion + * O(1) instead of O(log(N)). */ zskiplistNode *zn = zsl->tail; while (zn != NULL) { - sds ele = zn->ele; - double *score = &zn->score; - - if ((n = rdbSaveRawString(rdb,(unsigned char*)ele,sdslen(ele))) - == -1) return -1; + if ((n = rdbSaveRawString(rdb, + (unsigned char*)zn->ele,sdslen(zn->ele))) == -1) + { + return -1; + } nwritten += n; - if ((n = rdbSaveBinaryDoubleValue(rdb,*score)) == -1) return -1; + if ((n = rdbSaveBinaryDoubleValue(rdb,zn->score)) == -1) + return -1; nwritten += n; - zn = zn->backward; } } else { From f34c984f2ff4642d851851db38c20fcd2d94b87b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=96=87=E5=BA=B7?= Date: Tue, 18 Apr 2017 20:10:08 +0800 Subject: [PATCH 0314/1722] update block->free after some diff data are written to the child process --- src/aof.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aof.c b/src/aof.c index f73da1e25..9b15ad1d0 100644 --- a/src/aof.c +++ b/src/aof.c @@ -115,6 +115,7 @@ void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask) { if (nwritten <= 0) return; memmove(block->buf,block->buf+nwritten,block->used-nwritten); block->used -= nwritten; + block->free += nwritten; } if (block->used == 0) listDelNode(server.aof_rewrite_buf_blocks,ln); } From 199b0b0c36f8ce6c3f197e81577d12ff75435963 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 18 Apr 2017 16:14:33 +0200 Subject: [PATCH 0315/1722] Fix descriptor leak. Close #3848. --- src/anet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/anet.c b/src/anet.c index ef1711d06..993615601 100644 --- a/src/anet.c +++ b/src/anet.c @@ -380,8 +380,10 @@ int anetUnixGenericConnect(char *err, char *path, int flags) sa.sun_family = AF_LOCAL; strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1); if (flags & ANET_CONNECT_NONBLOCK) { - if (anetNonBlock(err,s) != ANET_OK) + if (anetNonBlock(err,s) != ANET_OK) { + close(s); return ANET_ERR; + } } if (connect(s,(struct sockaddr*)&sa,sizeof(sa)) == -1) { if (errno == EINPROGRESS && From f52c65edf2efdbb5c3bbb9522d2a5ffd001f8729 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 18 Apr 2017 16:24:06 +0200 Subject: [PATCH 0316/1722] Fix #3848 by closing the descriptor on error. --- src/anet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/anet.c b/src/anet.c index 993615601..53a56b0d2 100644 --- a/src/anet.c +++ b/src/anet.c @@ -464,7 +464,7 @@ static int anetV6Only(char *err, int s) { static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backlog) { - int s, rv; + int s = -1, rv; char _port[6]; /* strlen("65535") */ struct addrinfo hints, *servinfo, *p; @@ -493,6 +493,7 @@ static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backl } error: + if (s != -1) close(s); s = ANET_ERR; end: freeaddrinfo(servinfo); From ad56461850bdea93a9919fda33e506bef8cc0670 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 19 Apr 2017 10:25:45 +0200 Subject: [PATCH 0317/1722] Fix PSYNC2 incomplete command bug as described in #3899. This bug was discovered by @kevinmcgehee and constituted a major hidden bug in the PSYNC2 implementation, caused by the propagation from the master of incomplete commands to slaves. The bug had several results: 1. Borrowing from Kevin text in the issue: "Given that slaves blindly copy over their master's input into their own replication backlog over successive read syscalls, it's possible that with large commands or small TCP buffers, partial commands are present in this buffer. If the master were to fail before successfully propagating the entire command to a slave, the slaves will never execute the partial command (since the client is invalidated) but will copy it to replication backlog which may relay those invalid bytes to its slaves on PSYNC2, corrupting the backlog and possibly other valid commands that follow the failover. Simple command boundaries aren't sufficient to capture this, either, because in the case of a MULTI/EXEC block, if the master successfully propagates a subset of the commands but not the EXEC, then the transaction in the backlog becomes corrupt and could corrupt other slaves that consume this data." 2. As identified by @yangsiran later, there is another effect of the bug. For the same mechanism of the first problem, a slave having another slave, could receive a full resynchronization request with an already half-applied command in the backlog. Once the RDB is ready, it will be sent to the slave, and the replication will continue sending to the sub-slave the other half of the command, which is not valid. The fix, designed by @yangsiran and @antirez, and implemented by @antirez, uses a secondary buffer in order to feed the sub-masters and update the replication backlog and offsets, only when a given part of the query buffer is actually *applied* to the state of the instance, that is, when the command gets processed and the command is not pending in the Redis transaction buffer because of CLIENT_MULTI state. Given that now the backlog and offsets representation are in agreement with the actual processed commands, both issue 1 and 2 should no longer be possible. Thanks to @kevinmcgehee, @yangsiran and @oranagra for their work in identifying and designing a fix for this problem. --- src/networking.c | 42 +++++++++++++++++++++++++++++++++++------- src/replication.c | 7 +++++++ src/server.h | 6 +++++- 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/networking.c b/src/networking.c index fbab9970f..fae8e52bd 100644 --- a/src/networking.c +++ b/src/networking.c @@ -93,6 +93,7 @@ client *createClient(int fd) { c->name = NULL; c->bufpos = 0; c->querybuf = sdsempty(); + c->pending_querybuf = sdsempty(); c->querybuf_peak = 0; c->reqtype = 0; c->argc = 0; @@ -107,6 +108,7 @@ client *createClient(int fd) { c->replstate = REPL_STATE_NONE; c->repl_put_online_on_ack = 0; c->reploff = 0; + c->read_reploff = 0; c->repl_ack_off = 0; c->repl_ack_time = 0; c->slave_listening_port = 0; @@ -796,6 +798,7 @@ void freeClient(client *c) { /* Free the query buffer */ sdsfree(c->querybuf); + sdsfree(c->pending_querybuf); c->querybuf = NULL; /* Deallocate structures used to block on blocking ops. */ @@ -1318,8 +1321,13 @@ void processInputBuffer(client *c) { resetClient(c); } else { /* Only reset the client when the command was executed. */ - if (processCommand(c) == C_OK) + if (processCommand(c) == C_OK) { + if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { + /* Update the applied replication offset of our master. */ + c->reploff = c->read_reploff - sdslen(c->querybuf); + } resetClient(c); + } /* freeMemoryIfNeeded may flush slave output buffers. This may result * into a slave, that may be the active client, to be freed. */ if (server.current_client == NULL) break; @@ -1366,15 +1374,17 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { serverLog(LL_VERBOSE, "Client closed connection"); freeClient(c); return; + } else if (c->flags & CLIENT_MASTER) { + /* Append the query buffer to the pending (not applied) buffer + * of the master. We'll use this buffer later in order to have a + * copy of the string applied by the last command executed. */ + c->pending_querybuf = sdscatlen(c->pending_querybuf, + c->querybuf+qblen,nread); } sdsIncrLen(c->querybuf,nread); c->lastinteraction = server.unixtime; - if (c->flags & CLIENT_MASTER) { - c->reploff += nread; - replicationFeedSlavesFromMasterStream(server.slaves, - c->querybuf+qblen,nread); - } + if (c->flags & CLIENT_MASTER) c->read_reploff += nread; server.stat_net_input_bytes += nread; if (sdslen(c->querybuf) > server.client_max_querybuf_len) { sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty(); @@ -1386,7 +1396,25 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { freeClient(c); return; } - processInputBuffer(c); + + /* Time to process the buffer. If the client is a master we need to + * compute the difference between the applied offset before and after + * processing the buffer, to understand how much of the replication stream + * was actually applied to the master state: this quantity, and its + * corresponding part of the replication stream, will be propagated to + * the sub-slaves and to the replication backlog. */ + if (!(c->flags & CLIENT_MASTER)) { + processInputBuffer(c); + } else { + size_t prev_offset = c->reploff; + processInputBuffer(c); + size_t applied = c->reploff - prev_offset; + if (applied) { + replicationFeedSlavesFromMasterStream(server.slaves, + c->pending_querybuf, applied); + sdsrange(c->pending_querybuf,applied,-1); + } + } } void getClientsMaxBuffers(unsigned long *longest_output_list, diff --git a/src/replication.c b/src/replication.c index c7a703b85..91ede828d 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1078,6 +1078,7 @@ void replicationCreateMasterClient(int fd, int dbid) { server.master->flags |= CLIENT_MASTER; server.master->authenticated = 1; server.master->reploff = server.master_initial_offset; + server.master->read_reploff = server.master->reploff; memcpy(server.master->replid, server.master_replid, sizeof(server.master_replid)); /* If master offset is set to -1, this master is old and is not @@ -2118,6 +2119,12 @@ void replicationCacheMaster(client *c) { /* Unlink the client from the server structures. */ unlinkClient(c); + /* Fix the master specific fields: we want to discard to non processed + * query buffers and non processed offsets. */ + sdsclear(server.master->querybuf); + sdsclear(server.master->pending_querybuf); + server.master->read_reploff = server.master->reploff; + /* Save the master. Server.master will be set to null later by * replicationHandleMasterDisconnection(). */ server.cached_master = server.master; diff --git a/src/server.h b/src/server.h index 19be92ba2..8cc172149 100644 --- a/src/server.h +++ b/src/server.h @@ -663,6 +663,9 @@ typedef struct client { redisDb *db; /* Pointer to currently SELECTed DB. */ robj *name; /* As set by CLIENT SETNAME. */ sds querybuf; /* Buffer we use to accumulate client queries. */ + sds pending_querybuf; /* If this is a master, this buffer represents the + yet not applied replication stream that we + are receiving from the master. */ size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */ int argc; /* Num of arguments of current command. */ robj **argv; /* Arguments of current command. */ @@ -685,7 +688,8 @@ typedef struct client { off_t repldboff; /* Replication DB file offset. */ off_t repldbsize; /* Replication DB file size. */ sds replpreamble; /* Replication DB preamble. */ - long long reploff; /* Replication offset if this is our master. */ + long long read_reploff; /* Read replication offset if this is a master. */ + long long reploff; /* Applied replication offset if this is a master. */ long long repl_ack_off; /* Replication ack offset, if this is a slave. */ long long repl_ack_time;/* Replication ack time, if this is a slave. */ long long psync_initial_offset; /* FULLRESYNC reply offset other slaves From 62ef6778c9680e1fc4ab5befbd6f77c74fc0cd5e Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 19 Apr 2017 14:02:52 +0200 Subject: [PATCH 0318/1722] PSYNC2: discard pending transactions from cached master. During the review of the fix for #3899, @yangsiran identified an implementation bug: given that the offset is now relative to the applied part of the replication log, when we cache a master, the successive PSYNC2 request will be made in order to *include* the transaction that was not completely processed. This means that we need to discard any pending transaction from our replication buffer: it will be re-executed. --- src/replication.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index 91ede828d..1828eb8bf 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2120,10 +2120,12 @@ void replicationCacheMaster(client *c) { unlinkClient(c); /* Fix the master specific fields: we want to discard to non processed - * query buffers and non processed offsets. */ + * query buffers and non processed offsets, including pending + * transactions. */ sdsclear(server.master->querybuf); sdsclear(server.master->pending_querybuf); server.master->read_reploff = server.master->reploff; + if (c->flags & CLIENT_MULTI) discardTransaction(c); /* Save the master. Server.master will be set to null later by * replicationHandleMasterDisconnection(). */ From 29bc56e589e91a38a5d0270aee700af758bfe7ce Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 19 Apr 2017 16:17:08 +0200 Subject: [PATCH 0319/1722] Fix getKeysUsingCommandTable() in cluster mode. Close #3940. --- src/db.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index 760843120..86dabac8f 100644 --- a/src/db.c +++ b/src/db.c @@ -1133,11 +1133,24 @@ int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, in *numkeys = 0; return NULL; } + last = cmd->lastkey; if (last < 0) last = argc+last; keys = zmalloc(sizeof(int)*((last - cmd->firstkey)+1)); for (j = cmd->firstkey; j <= last; j += cmd->keystep) { - serverAssert(j < argc); + if (j >= argc) { + /* Modules command do not have dispatch time arity checks, so + * we need to handle the case where the user passed an invalid + * number of arguments here. In this case we return no keys + * and expect the module command to report an arity error. */ + if (cmd->flags & CMD_MODULE) { + zfree(keys); + *numkeys = 0; + return NULL; + } else { + serverPanic("Redis built-in command declared keys positions not matching the arity requirements."); + } + } keys[i++] = j; } *numkeys = i; From cb86d8916a4635213bc637d373b9b06b98c0d31b Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 21 Apr 2017 16:27:38 +0200 Subject: [PATCH 0320/1722] Check event loop creation return value. Fix #3951. Normally we never check for OOM conditions inside Redis since the allocator will always return a pointer or abort the program on OOM conditons. However we cannot have control on epool_create(), that may fail for kernel OOM (according to the manual page) even if all the parameters are correct, so the function aeCreateEventLoop() may indeed return NULL and this condition must be checked. --- src/server.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/server.c b/src/server.c index db853b836..72914c53b 100644 --- a/src/server.c +++ b/src/server.c @@ -1782,6 +1782,12 @@ void initServer(void) { createSharedObjects(); adjustOpenFilesLimit(); server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR); + if (server.el == NULL) { + serverLog(LL_WARNING, + "Failed creating the event loop. Error message: '%s'", + strerror(errno)); + exit(1); + } server.db = zmalloc(sizeof(redisDb)*server.dbnum); /* Open the TCP listening socket for the user commands. */ From d2ed9829883e9846b04d3e1449acf0f2547e19c3 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 22 Apr 2017 13:12:42 +0200 Subject: [PATCH 0321/1722] Revert "Jemalloc updated to 4.4.0." This reverts commit 36c1acc222d29e6e2dc9fc25362e4faa471111bd. --- deps/jemalloc/.appveyor.yml | 28 - deps/jemalloc/.gitignore | 16 - deps/jemalloc/.travis.yml | 29 - deps/jemalloc/COPYING | 4 +- deps/jemalloc/ChangeLog | 220 -- deps/jemalloc/INSTALL | 26 - deps/jemalloc/Makefile.in | 128 +- deps/jemalloc/README | 2 +- deps/jemalloc/VERSION | 2 +- deps/jemalloc/bin/jeprof.in | 131 +- deps/jemalloc/{build-aux => }/config.guess | 174 +- deps/jemalloc/{build-aux => }/config.sub | 76 +- deps/jemalloc/configure | 1391 +---------- deps/jemalloc/configure.ac | 436 +--- deps/jemalloc/doc/html.xsl.in | 1 - deps/jemalloc/doc/jemalloc.3 | 859 +++---- deps/jemalloc/doc/jemalloc.html | 1507 +++++++----- deps/jemalloc/doc/jemalloc.xml.in | 540 ++--- deps/jemalloc/doc/stylesheet.xsl | 7 +- .../include/jemalloc/internal/arena.h | 582 ++--- .../include/jemalloc/internal/assert.h | 45 - .../include/jemalloc/internal/atomic.h | 4 +- .../jemalloc/include/jemalloc/internal/base.h | 11 +- .../include/jemalloc/internal/bitmap.h | 76 +- .../include/jemalloc/internal/chunk.h | 38 +- .../include/jemalloc/internal/chunk_dss.h | 10 +- .../include/jemalloc/internal/chunk_mmap.h | 4 +- deps/jemalloc/include/jemalloc/internal/ckh.h | 6 +- deps/jemalloc/include/jemalloc/internal/ctl.h | 29 +- .../include/jemalloc/internal/extent.h | 43 +- .../jemalloc/include/jemalloc/internal/hash.h | 33 +- .../jemalloc/include/jemalloc/internal/huge.h | 21 +- .../jemalloc/internal/jemalloc_internal.h.in | 478 ++-- .../internal/jemalloc_internal_decls.h | 11 - .../internal/jemalloc_internal_defs.h.in | 73 +- deps/jemalloc/include/jemalloc/internal/mb.h | 10 +- .../include/jemalloc/internal/mutex.h | 62 +- .../include/jemalloc/internal/nstime.h | 48 - .../include/jemalloc/internal/pages.h | 7 +- deps/jemalloc/include/jemalloc/internal/ph.h | 345 --- .../jemalloc/internal/private_symbols.txt | 256 +- .../jemalloc/include/jemalloc/internal/prng.h | 193 +- .../jemalloc/include/jemalloc/internal/prof.h | 86 +- deps/jemalloc/include/jemalloc/internal/rb.h | 208 +- .../include/jemalloc/internal/rtree.h | 160 +- .../include/jemalloc/internal/size_classes.sh | 50 +- .../include/jemalloc/internal/smoothstep.h | 246 -- .../include/jemalloc/internal/smoothstep.sh | 115 - .../jemalloc/include/jemalloc/internal/spin.h | 51 - .../include/jemalloc/internal/stats.h | 14 - .../include/jemalloc/internal/tcache.h | 159 +- .../include/jemalloc/internal/ticker.h | 75 - deps/jemalloc/include/jemalloc/internal/tsd.h | 164 +- .../jemalloc/include/jemalloc/internal/util.h | 224 +- .../include/jemalloc/internal/valgrind.h | 40 +- .../include/jemalloc/internal/witness.h | 266 --- .../include/jemalloc/jemalloc_defs.h.in | 8 - .../include/jemalloc/jemalloc_macros.h.in | 61 +- deps/jemalloc/include/msvc_compat/strings.h | 30 - .../include/msvc_compat/windows_extra.h | 22 +- deps/jemalloc/{build-aux => }/install-sh | 0 deps/jemalloc/jemalloc.pc.in | 2 +- deps/jemalloc/msvc/ReadMe.txt | 24 - deps/jemalloc/msvc/jemalloc_vc2015.sln | 63 - .../projects/vc2015/jemalloc/jemalloc.vcxproj | 402 ---- .../vc2015/jemalloc/jemalloc.vcxproj.filters | 272 --- .../vc2015/test_threads/test_threads.cpp | 89 - .../vc2015/test_threads/test_threads.h | 3 - .../vc2015/test_threads/test_threads.vcxproj | 327 --- .../test_threads/test_threads.vcxproj.filters | 26 - .../vc2015/test_threads/test_threads_main.cpp | 12 - deps/jemalloc/src/arena.c | 2053 ++++++----------- deps/jemalloc/src/base.c | 73 +- deps/jemalloc/src/bitmap.c | 59 +- deps/jemalloc/src/chunk.c | 434 ++-- deps/jemalloc/src/chunk_dss.c | 188 +- deps/jemalloc/src/chunk_mmap.c | 18 +- deps/jemalloc/src/ckh.c | 43 +- deps/jemalloc/src/ctl.c | 797 +++---- deps/jemalloc/src/extent.c | 72 +- deps/jemalloc/src/huge.c | 238 +- deps/jemalloc/src/jemalloc.c | 1496 +++++------- deps/jemalloc/src/mutex.c | 23 +- deps/jemalloc/src/nstime.c | 194 -- deps/jemalloc/src/pages.c | 177 +- deps/jemalloc/src/prng.c | 2 - deps/jemalloc/src/prof.c | 664 +++--- deps/jemalloc/src/quarantine.c | 50 +- deps/jemalloc/src/rtree.c | 9 +- deps/jemalloc/src/spin.c | 2 - deps/jemalloc/src/stats.c | 1242 +++------- deps/jemalloc/src/tcache.c | 170 +- deps/jemalloc/src/ticker.c | 2 - deps/jemalloc/src/tsd.c | 28 +- deps/jemalloc/src/util.c | 42 +- deps/jemalloc/src/witness.c | 136 -- deps/jemalloc/src/zone.c | 216 +- .../test/include/test/jemalloc_test.h.in | 80 +- deps/jemalloc/test/include/test/mtx.h | 2 - deps/jemalloc/test/include/test/test.h | 4 - deps/jemalloc/test/include/test/timer.h | 19 +- .../jemalloc/test/integration/MALLOCX_ARENA.c | 4 +- .../jemalloc/test/integration/aligned_alloc.c | 20 +- deps/jemalloc/test/integration/allocated.c | 17 +- deps/jemalloc/test/integration/chunk.c | 98 +- deps/jemalloc/test/integration/mallocx.c | 108 +- deps/jemalloc/test/integration/overflow.c | 8 +- .../test/integration/posix_memalign.c | 20 +- deps/jemalloc/test/integration/rallocx.c | 86 +- deps/jemalloc/test/integration/sdallocx.c | 4 +- deps/jemalloc/test/integration/thread_arena.c | 10 +- .../test/integration/thread_tcache_enabled.c | 39 +- deps/jemalloc/test/integration/xallocx.c | 120 +- deps/jemalloc/test/src/mtx.c | 7 - deps/jemalloc/test/src/test.c | 56 +- deps/jemalloc/test/src/timer.c | 45 +- deps/jemalloc/test/stress/microbench.c | 3 +- deps/jemalloc/test/unit/a0.c | 19 - deps/jemalloc/test/unit/arena_reset.c | 159 -- deps/jemalloc/test/unit/bitmap.c | 26 +- deps/jemalloc/test/unit/ckh.c | 8 +- deps/jemalloc/test/unit/decay.c | 374 --- deps/jemalloc/test/unit/fork.c | 64 - deps/jemalloc/test/unit/hash.c | 36 +- deps/jemalloc/test/unit/junk.c | 17 +- deps/jemalloc/test/unit/junk_alloc.c | 2 +- deps/jemalloc/test/unit/junk_free.c | 2 +- deps/jemalloc/test/unit/mallctl.c | 319 +-- deps/jemalloc/test/unit/math.c | 4 - deps/jemalloc/test/unit/nstime.c | 227 -- deps/jemalloc/test/unit/pack.c | 206 -- deps/jemalloc/test/unit/pages.c | 27 - deps/jemalloc/test/unit/ph.c | 290 --- deps/jemalloc/test/unit/prng.c | 263 --- deps/jemalloc/test/unit/prof_accum.c | 5 +- deps/jemalloc/test/unit/prof_active.c | 5 +- deps/jemalloc/test/unit/prof_gdump.c | 13 +- deps/jemalloc/test/unit/prof_idump.c | 5 +- deps/jemalloc/test/unit/prof_reset.c | 16 +- deps/jemalloc/test/unit/prof_thread_name.c | 22 +- deps/jemalloc/test/unit/rb.c | 60 +- deps/jemalloc/test/unit/run_quantize.c | 149 -- deps/jemalloc/test/unit/size_classes.c | 105 +- deps/jemalloc/test/unit/smoothstep.c | 106 - deps/jemalloc/test/unit/stats.c | 253 +- deps/jemalloc/test/unit/ticker.c | 76 - deps/jemalloc/test/unit/tsd.c | 13 +- deps/jemalloc/test/unit/util.c | 89 +- deps/jemalloc/test/unit/witness.c | 278 --- deps/jemalloc/test/unit/zero.c | 16 +- 150 files changed, 6355 insertions(+), 17238 deletions(-) delete mode 100644 deps/jemalloc/.appveyor.yml delete mode 100644 deps/jemalloc/.travis.yml rename deps/jemalloc/{build-aux => }/config.guess (90%) rename deps/jemalloc/{build-aux => }/config.sub (95%) delete mode 100644 deps/jemalloc/include/jemalloc/internal/assert.h delete mode 100644 deps/jemalloc/include/jemalloc/internal/nstime.h delete mode 100644 deps/jemalloc/include/jemalloc/internal/ph.h delete mode 100644 deps/jemalloc/include/jemalloc/internal/smoothstep.h delete mode 100755 deps/jemalloc/include/jemalloc/internal/smoothstep.sh delete mode 100644 deps/jemalloc/include/jemalloc/internal/spin.h delete mode 100644 deps/jemalloc/include/jemalloc/internal/ticker.h delete mode 100644 deps/jemalloc/include/jemalloc/internal/witness.h rename deps/jemalloc/{build-aux => }/install-sh (100%) delete mode 100644 deps/jemalloc/msvc/ReadMe.txt delete mode 100644 deps/jemalloc/msvc/jemalloc_vc2015.sln delete mode 100644 deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj delete mode 100644 deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters delete mode 100755 deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.cpp delete mode 100644 deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.h delete mode 100644 deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj delete mode 100644 deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters delete mode 100644 deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads_main.cpp delete mode 100644 deps/jemalloc/src/nstime.c delete mode 100644 deps/jemalloc/src/prng.c delete mode 100644 deps/jemalloc/src/spin.c mode change 100755 => 100644 deps/jemalloc/src/stats.c mode change 100755 => 100644 deps/jemalloc/src/tcache.c delete mode 100644 deps/jemalloc/src/ticker.c mode change 100755 => 100644 deps/jemalloc/src/util.c delete mode 100644 deps/jemalloc/src/witness.c mode change 100755 => 100644 deps/jemalloc/test/integration/MALLOCX_ARENA.c mode change 100755 => 100644 deps/jemalloc/test/integration/allocated.c mode change 100755 => 100644 deps/jemalloc/test/integration/mallocx.c mode change 100755 => 100644 deps/jemalloc/test/integration/overflow.c mode change 100755 => 100644 deps/jemalloc/test/integration/rallocx.c mode change 100755 => 100644 deps/jemalloc/test/integration/thread_arena.c mode change 100755 => 100644 deps/jemalloc/test/integration/thread_tcache_enabled.c mode change 100755 => 100644 deps/jemalloc/test/integration/xallocx.c delete mode 100644 deps/jemalloc/test/unit/a0.c delete mode 100755 deps/jemalloc/test/unit/arena_reset.c delete mode 100755 deps/jemalloc/test/unit/decay.c delete mode 100644 deps/jemalloc/test/unit/fork.c mode change 100755 => 100644 deps/jemalloc/test/unit/mallctl.c delete mode 100644 deps/jemalloc/test/unit/nstime.c delete mode 100644 deps/jemalloc/test/unit/pack.c delete mode 100644 deps/jemalloc/test/unit/pages.c delete mode 100644 deps/jemalloc/test/unit/ph.c delete mode 100644 deps/jemalloc/test/unit/prng.c mode change 100755 => 100644 deps/jemalloc/test/unit/prof_accum.c mode change 100755 => 100644 deps/jemalloc/test/unit/prof_active.c mode change 100755 => 100644 deps/jemalloc/test/unit/prof_gdump.c mode change 100755 => 100644 deps/jemalloc/test/unit/prof_idump.c mode change 100755 => 100644 deps/jemalloc/test/unit/prof_reset.c mode change 100755 => 100644 deps/jemalloc/test/unit/prof_thread_name.c delete mode 100644 deps/jemalloc/test/unit/run_quantize.c mode change 100755 => 100644 deps/jemalloc/test/unit/size_classes.c delete mode 100644 deps/jemalloc/test/unit/smoothstep.c mode change 100755 => 100644 deps/jemalloc/test/unit/stats.c delete mode 100644 deps/jemalloc/test/unit/ticker.c delete mode 100644 deps/jemalloc/test/unit/witness.c diff --git a/deps/jemalloc/.appveyor.yml b/deps/jemalloc/.appveyor.yml deleted file mode 100644 index ddd5c5711..000000000 --- a/deps/jemalloc/.appveyor.yml +++ /dev/null @@ -1,28 +0,0 @@ -version: '{build}' - -environment: - matrix: - - MSYSTEM: MINGW64 - CPU: x86_64 - MSVC: amd64 - - MSYSTEM: MINGW32 - CPU: i686 - MSVC: x86 - - MSYSTEM: MINGW64 - CPU: x86_64 - - MSYSTEM: MINGW32 - CPU: i686 - -install: - - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% - - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC% - - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc - - pacman --noconfirm -Suy mingw-w64-%CPU%-make - -build_script: - - bash -c "autoconf" - - bash -c "./configure" - - mingw32-make -j3 - - file lib/jemalloc.dll - - mingw32-make -j3 tests - - mingw32-make -k check diff --git a/deps/jemalloc/.gitignore b/deps/jemalloc/.gitignore index 08278d087..d0e393619 100644 --- a/deps/jemalloc/.gitignore +++ b/deps/jemalloc/.gitignore @@ -73,19 +73,3 @@ test/include/test/jemalloc_test_defs.h /test/unit/*.out /VERSION - -*.pdb -*.sdf -*.opendb -*.opensdf -*.cachefile -*.suo -*.user -*.sln.docstates -*.tmp -/msvc/Win32/ -/msvc/x64/ -/msvc/projects/*/*/Debug*/ -/msvc/projects/*/*/Release*/ -/msvc/projects/*/*/Win32/ -/msvc/projects/*/*/x64/ diff --git a/deps/jemalloc/.travis.yml b/deps/jemalloc/.travis.yml deleted file mode 100644 index 1fed4f8e6..000000000 --- a/deps/jemalloc/.travis.yml +++ /dev/null @@ -1,29 +0,0 @@ -language: c - -matrix: - include: - - os: linux - compiler: gcc - - os: linux - compiler: gcc - env: - - EXTRA_FLAGS=-m32 - addons: - apt: - packages: - - gcc-multilib - - os: osx - compiler: clang - - os: osx - compiler: clang - env: - - EXTRA_FLAGS=-m32 - -before_script: - - autoconf - - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS"} - - make -j3 - - make -j3 tests - -script: - - make check diff --git a/deps/jemalloc/COPYING b/deps/jemalloc/COPYING index 104b1f8b0..611968cda 100644 --- a/deps/jemalloc/COPYING +++ b/deps/jemalloc/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2016 Jason Evans . +Copyright (C) 2002-2015 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2016 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2015 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/deps/jemalloc/ChangeLog b/deps/jemalloc/ChangeLog index f75edd933..e3b0a5190 100644 --- a/deps/jemalloc/ChangeLog +++ b/deps/jemalloc/ChangeLog @@ -4,226 +4,6 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc -* 4.4.0 (December 3, 2016) - - New features: - - Add configure support for *-*-linux-android. (@cferris1000, @jasone) - - Add the --disable-syscall configure option, for use on systems that place - security-motivated limitations on syscall(2). (@jasone) - - Add support for Debian GNU/kFreeBSD. (@thesam) - - Optimizations: - - Add extent serial numbers and use them where appropriate as a sort key that - is higher priority than address, so that the allocation policy prefers older - extents. This tends to improve locality (decrease fragmentation) when - memory grows downward. (@jasone) - - Refactor madvise(2) configuration so that MADV_FREE is detected and utilized - on Linux 4.5 and newer. (@jasone) - - Mark partially purged arena chunks as non-huge-page. This improves - interaction with Linux's transparent huge page functionality. (@jasone) - - Bug fixes: - - Fix size class computations for edge conditions involving extremely large - allocations. This regression was first released in 4.0.0. (@jasone, - @ingvarha) - - Remove overly restrictive assertions related to the cactive statistic. This - regression was first released in 4.1.0. (@jasone) - - Implement a more reliable detection scheme for os_unfair_lock on macOS. - (@jszakmeister) - -* 4.3.1 (November 7, 2016) - - Bug fixes: - - Fix a severe virtual memory leak. This regression was first released in - 4.3.0. (@interwq, @jasone) - - Refactor atomic and prng APIs to restore support for 32-bit platforms that - use pre-C11 toolchains, e.g. FreeBSD's mips. (@jasone) - -* 4.3.0 (November 4, 2016) - - This is the first release that passes the test suite for multiple Windows - configurations, thanks in large part to @glandium setting up continuous - integration via AppVeyor (and Travis CI for Linux and OS X). - - New features: - - Add "J" (JSON) support to malloc_stats_print(). (@jasone) - - Add Cray compiler support. (@ronawho) - - Optimizations: - - Add/use adaptive spinning for bootstrapping and radix tree node - initialization. (@jasone) - - Bug fixes: - - Fix large allocation to search starting in the optimal size class heap, - which can substantially reduce virtual memory churn and fragmentation. This - regression was first released in 4.0.0. (@mjp41, @jasone) - - Fix stats.arenas..nthreads accounting. (@interwq) - - Fix and simplify decay-based purging. (@jasone) - - Make DSS (sbrk(2)-related) operations lockless, which resolves potential - deadlocks during thread exit. (@jasone) - - Fix over-sized allocation of radix tree leaf nodes. (@mjp41, @ogaun, - @jasone) - - Fix over-sized allocation of arena_t (plus associated stats) data - structures. (@jasone, @interwq) - - Fix EXTRA_CFLAGS to not affect configuration. (@jasone) - - Fix a Valgrind integration bug. (@ronawho) - - Disallow 0x5a junk filling when running in Valgrind. (@jasone) - - Fix a file descriptor leak on Linux. This regression was first released in - 4.2.0. (@vsarunas, @jasone) - - Fix static linking of jemalloc with glibc. (@djwatson) - - Use syscall(2) rather than {open,read,close}(2) during boot on Linux. This - works around other libraries' system call wrappers performing reentrant - allocation. (@kspinka, @Whissi, @jasone) - - Fix OS X default zone replacement to work with OS X 10.12. (@glandium, - @jasone) - - Fix cached memory management to avoid needless commit/decommit operations - during purging, which resolves permanent virtual memory map fragmentation - issues on Windows. (@mjp41, @jasone) - - Fix TSD fetches to avoid (recursive) allocation. This is relevant to - non-TLS and Windows configurations. (@jasone) - - Fix malloc_conf overriding to work on Windows. (@jasone) - - Forcibly disable lazy-lock on Windows (was forcibly *enabled*). (@jasone) - -* 4.2.1 (June 8, 2016) - - Bug fixes: - - Fix bootstrapping issues for configurations that require allocation during - tsd initialization (e.g. --disable-tls). (@cferris1000, @jasone) - - Fix gettimeofday() version of nstime_update(). (@ronawho) - - Fix Valgrind regressions in calloc() and chunk_alloc_wrapper(). (@ronawho) - - Fix potential VM map fragmentation regression. (@jasone) - - Fix opt_zero-triggered in-place huge reallocation zeroing. (@jasone) - - Fix heap profiling context leaks in reallocation edge cases. (@jasone) - -* 4.2.0 (May 12, 2016) - - New features: - - Add the arena..reset mallctl, which makes it possible to discard all of - an arena's allocations in a single operation. (@jasone) - - Add the stats.retained and stats.arenas..retained statistics. (@jasone) - - Add the --with-version configure option. (@jasone) - - Support --with-lg-page values larger than actual page size. (@jasone) - - Optimizations: - - Use pairing heaps rather than red-black trees for various hot data - structures. (@djwatson, @jasone) - - Streamline fast paths of rtree operations. (@jasone) - - Optimize the fast paths of calloc() and [m,d,sd]allocx(). (@jasone) - - Decommit unused virtual memory if the OS does not overcommit. (@jasone) - - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order - to avoid unfortunate interactions during fork(2). (@jasone) - - Bug fixes: - - Fix chunk accounting related to triggering gdump profiles. (@jasone) - - Link against librt for clock_gettime(2) if glibc < 2.17. (@jasone) - - Scale leak report summary according to sampling probability. (@jasone) - -* 4.1.1 (May 3, 2016) - - This bugfix release resolves a variety of mostly minor issues, though the - bitmap fix is critical for 64-bit Windows. - - Bug fixes: - - Fix the linear scan version of bitmap_sfu() to shift by the proper amount - even when sizeof(long) is not the same as sizeof(void *), as on 64-bit - Windows. (@jasone) - - Fix hashing functions to avoid unaligned memory accesses (and resulting - crashes). This is relevant at least to some ARM-based platforms. - (@rkmisra) - - Fix fork()-related lock rank ordering reversals. These reversals were - unlikely to cause deadlocks in practice except when heap profiling was - enabled and active. (@jasone) - - Fix various chunk leaks in OOM code paths. (@jasone) - - Fix malloc_stats_print() to print opt.narenas correctly. (@jasone) - - Fix MSVC-specific build/test issues. (@rustyx, @yuslepukhin) - - Fix a variety of test failures that were due to test fragility rather than - core bugs. (@jasone) - -* 4.1.0 (February 28, 2016) - - This release is primarily about optimizations, but it also incorporates a lot - of portability-motivated refactoring and enhancements. Many people worked on - this release, to an extent that even with the omission here of minor changes - (see git revision history), and of the people who reported and diagnosed - issues, so much of the work was contributed that starting with this release, - changes are annotated with author credits to help reflect the collaborative - effort involved. - - New features: - - Implement decay-based unused dirty page purging, a major optimization with - mallctl API impact. This is an alternative to the existing ratio-based - unused dirty page purging, and is intended to eventually become the sole - purging mechanism. New mallctls: - + opt.purge - + opt.decay_time - + arena..decay - + arena..decay_time - + arenas.decay_time - + stats.arenas..decay_time - (@jasone, @cevans87) - - Add --with-malloc-conf, which makes it possible to embed a default - options string during configuration. This was motivated by the desire to - specify --with-malloc-conf=purge:decay , since the default must remain - purge:ratio until the 5.0.0 release. (@jasone) - - Add MS Visual Studio 2015 support. (@rustyx, @yuslepukhin) - - Make *allocx() size class overflow behavior defined. The maximum - size class is now less than PTRDIFF_MAX to protect applications against - numerical overflow, and all allocation functions are guaranteed to indicate - errors rather than potentially crashing if the request size exceeds the - maximum size class. (@jasone) - - jeprof: - + Add raw heap profile support. (@jasone) - + Add --retain and --exclude for backtrace symbol filtering. (@jasone) - - Optimizations: - - Optimize the fast path to combine various bootstrapping and configuration - checks and execute more streamlined code in the common case. (@interwq) - - Use linear scan for small bitmaps (used for small object tracking). In - addition to speeding up bitmap operations on 64-bit systems, this reduces - allocator metadata overhead by approximately 0.2%. (@djwatson) - - Separate arena_avail trees, which substantially speeds up run tree - operations. (@djwatson) - - Use memoization (boot-time-computed table) for run quantization. Separate - arena_avail trees reduced the importance of this optimization. (@jasone) - - Attempt mmap-based in-place huge reallocation. This can dramatically speed - up incremental huge reallocation. (@jasone) - - Incompatible changes: - - Make opt.narenas unsigned rather than size_t. (@jasone) - - Bug fixes: - - Fix stats.cactive accounting regression. (@rustyx, @jasone) - - Handle unaligned keys in hash(). This caused problems for some ARM systems. - (@jasone, @cferris1000) - - Refactor arenas array. In addition to fixing a fork-related deadlock, this - makes arena lookups faster and simpler. (@jasone) - - Move retained memory allocation out of the default chunk allocation - function, to a location that gets executed even if the application installs - a custom chunk allocation function. This resolves a virtual memory leak. - (@buchgr) - - Fix a potential tsd cleanup leak. (@cferris1000, @jasone) - - Fix run quantization. In practice this bug had no impact unless - applications requested memory with alignment exceeding one page. - (@jasone, @djwatson) - - Fix LinuxThreads-specific bootstrapping deadlock. (Cosmin Paraschiv) - - jeprof: - + Don't discard curl options if timeout is not defined. (@djwatson) - + Detect failed profile fetches. (@djwatson) - - Fix stats.arenas..{dss,lg_dirty_mult,decay_time,pactive,pdirty} for - --disable-stats case. (@jasone) - -* 4.0.4 (October 24, 2015) - - This bugfix release fixes another xallocx() regression. No other regressions - have come to light in over a month, so this is likely a good starting point - for people who prefer to wait for "dot one" releases with all the major issues - shaken out. - - Bug fixes: - - Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of large - allocations that have been randomly assigned an offset of 0 when - --enable-cache-oblivious configure option is enabled. - * 4.0.3 (September 24, 2015) This bugfix release continues the trend of xallocx() and heap profiling fixes. diff --git a/deps/jemalloc/INSTALL b/deps/jemalloc/INSTALL index cce3ed711..8d3968745 100644 --- a/deps/jemalloc/INSTALL +++ b/deps/jemalloc/INSTALL @@ -35,10 +35,6 @@ any of the following arguments (not a definitive list) to 'configure': will cause files to be installed into /usr/local/include, /usr/local/lib, and /usr/local/man. ---with-version=..--g - Use the specified version string rather than trying to generate one (if in - a git repository) or use existing the VERSION file (if present). - --with-rpath= Embed one or more library paths, so that libjemalloc can find the libraries it is linked to. This works only on ELF-based systems. @@ -88,14 +84,6 @@ any of the following arguments (not a definitive list) to 'configure': versions of jemalloc can coexist in the same installation directory. For example, libjemalloc.so.0 becomes libjemalloc.so.0. ---with-malloc-conf= - Embed as a run-time options string that is processed prior to - the malloc_conf global variable, the /etc/malloc.conf symlink, and the - MALLOC_CONF environment variable. For example, to change the default chunk - size to 256 KiB: - - --with-malloc-conf=lg_chunk:18 - --disable-cc-silence Disable code that silences non-useful compiler warnings. This is mainly useful during development when auditing the set of warnings that are being @@ -206,11 +194,6 @@ any of the following arguments (not a definitive list) to 'configure': most extreme case increases physical memory usage for the 16 KiB size class to 20 KiB. ---disable-syscall - Disable use of syscall(2) rather than {open,read,write,close}(2). This is - intended as a workaround for systems that place security limitations on - syscall(2). - --with-xslroot= Specify where to find DocBook XSL stylesheets when building the documentation. @@ -332,15 +315,6 @@ LDFLAGS="?" PATH="?" 'configure' uses this to find programs. -In some cases it may be necessary to work around configuration results that do -not match reality. For example, Linux 4.5 added support for the MADV_FREE flag -to madvise(2), which can cause problems if building on a host with MADV_FREE -support and deploying to a target without. To work around this, use a cache -file to override the relevant configuration variable defined in configure.ac, -e.g.: - - echo "je_cv_madv_free=no" > config.cache && ./configure -C - === Advanced compilation ======================================================= To build only parts of jemalloc, use the following targets: diff --git a/deps/jemalloc/Makefile.in b/deps/jemalloc/Makefile.in index c70536391..1ac6f2926 100644 --- a/deps/jemalloc/Makefile.in +++ b/deps/jemalloc/Makefile.in @@ -24,11 +24,11 @@ abs_objroot := @abs_objroot@ # Build parameters. CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include -EXTRA_CFLAGS := @EXTRA_CFLAGS@ -CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS) +CFLAGS := @CFLAGS@ LDFLAGS := @LDFLAGS@ EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ LIBS := @LIBS@ +TESTLIBS := @TESTLIBS@ RPATH_EXTRA := @RPATH_EXTRA@ SO := @so@ IMPORTLIB := @importlib@ @@ -53,19 +53,15 @@ enable_prof := @enable_prof@ enable_valgrind := @enable_valgrind@ enable_zone_allocator := @enable_zone_allocator@ MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF -link_whole_archive := @link_whole_archive@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ CTARGET = @CTARGET@ LDTARGET = @LDTARGET@ -TEST_LD_MODE = @TEST_LD_MODE@ MKLIB = @MKLIB@ AR = @AR@ ARFLAGS = @ARFLAGS@ CC_MM = @CC_MM@ -LM := @LM@ -INSTALL = @INSTALL@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" @@ -82,34 +78,15 @@ LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h -C_SRCS := $(srcroot)src/jemalloc.c \ - $(srcroot)src/arena.c \ - $(srcroot)src/atomic.c \ - $(srcroot)src/base.c \ - $(srcroot)src/bitmap.c \ - $(srcroot)src/chunk.c \ - $(srcroot)src/chunk_dss.c \ - $(srcroot)src/chunk_mmap.c \ - $(srcroot)src/ckh.c \ - $(srcroot)src/ctl.c \ - $(srcroot)src/extent.c \ - $(srcroot)src/hash.c \ - $(srcroot)src/huge.c \ - $(srcroot)src/mb.c \ - $(srcroot)src/mutex.c \ - $(srcroot)src/nstime.c \ - $(srcroot)src/pages.c \ - $(srcroot)src/prng.c \ - $(srcroot)src/prof.c \ - $(srcroot)src/quarantine.c \ - $(srcroot)src/rtree.c \ - $(srcroot)src/stats.c \ - $(srcroot)src/spin.c \ - $(srcroot)src/tcache.c \ - $(srcroot)src/ticker.c \ - $(srcroot)src/tsd.c \ - $(srcroot)src/util.c \ - $(srcroot)src/witness.c +C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ + $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \ + $(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \ + $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \ + $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ + $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \ + $(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \ + $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/util.c \ + $(srcroot)src/tsd.c ifeq ($(enable_valgrind), 1) C_SRCS += $(srcroot)src/valgrind.c endif @@ -128,11 +105,6 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif -ifeq (1, $(link_whole_archive)) -LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive -else -LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) -endif PC := $(objroot)jemalloc.pc MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml @@ -144,19 +116,10 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c -ifeq (1, $(link_whole_archive)) -C_UTIL_INTEGRATION_SRCS := -else -C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c -endif -TESTS_UNIT := \ - $(srcroot)test/unit/a0.c \ - $(srcroot)test/unit/arena_reset.c \ - $(srcroot)test/unit/atomic.c \ +C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c +TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ - $(srcroot)test/unit/decay.c \ - $(srcroot)test/unit/fork.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/junk.c \ $(srcroot)test/unit/junk_alloc.c \ @@ -166,10 +129,6 @@ TESTS_UNIT := \ $(srcroot)test/unit/math.c \ $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c \ - $(srcroot)test/unit/pack.c \ - $(srcroot)test/unit/pages.c \ - $(srcroot)test/unit/ph.c \ - $(srcroot)test/unit/prng.c \ $(srcroot)test/unit/prof_accum.c \ $(srcroot)test/unit/prof_active.c \ $(srcroot)test/unit/prof_gdump.c \ @@ -181,16 +140,11 @@ TESTS_UNIT := \ $(srcroot)test/unit/quarantine.c \ $(srcroot)test/unit/rb.c \ $(srcroot)test/unit/rtree.c \ - $(srcroot)test/unit/run_quantize.c \ $(srcroot)test/unit/SFMT.c \ $(srcroot)test/unit/size_classes.c \ - $(srcroot)test/unit/smoothstep.c \ $(srcroot)test/unit/stats.c \ - $(srcroot)test/unit/ticker.c \ - $(srcroot)test/unit/nstime.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ - $(srcroot)test/unit/witness.c \ $(srcroot)test/unit/zero.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ @@ -312,69 +266,69 @@ $(STATIC_LIBS): $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) build_lib: build_lib_shared build_lib_static install_bin: - $(INSTALL) -d $(BINDIR) + install -d $(BINDIR) @for b in $(BINS); do \ - echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \ - $(INSTALL) -m 755 $$b $(BINDIR); \ + echo "install -m 755 $$b $(BINDIR)"; \ + install -m 755 $$b $(BINDIR); \ done install_include: - $(INSTALL) -d $(INCLUDEDIR)/jemalloc + install -d $(INCLUDEDIR)/jemalloc @for h in $(C_HDRS); do \ - echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ - $(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \ + echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ + install -m 644 $$h $(INCLUDEDIR)/jemalloc; \ done install_lib_shared: $(DSOS) - $(INSTALL) -d $(LIBDIR) - $(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR) + install -d $(LIBDIR) + install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR) ifneq ($(SOREV),$(SO)) ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO) endif install_lib_static: $(STATIC_LIBS) - $(INSTALL) -d $(LIBDIR) + install -d $(LIBDIR) @for l in $(STATIC_LIBS); do \ - echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \ - $(INSTALL) -m 755 $$l $(LIBDIR); \ + echo "install -m 755 $$l $(LIBDIR)"; \ + install -m 755 $$l $(LIBDIR); \ done install_lib_pc: $(PC) - $(INSTALL) -d $(LIBDIR)/pkgconfig + install -d $(LIBDIR)/pkgconfig @for l in $(PC); do \ - echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \ - $(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \ + echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \ + install -m 644 $$l $(LIBDIR)/pkgconfig; \ done install_lib: install_lib_shared install_lib_static install_lib_pc install_doc_html: - $(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix) + install -d $(DATADIR)/doc/jemalloc$(install_suffix) @for d in $(DOCS_HTML); do \ - echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \ - $(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \ + echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \ + install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \ done install_doc_man: - $(INSTALL) -d $(MANDIR)/man3 + install -d $(MANDIR)/man3 @for d in $(DOCS_MAN3); do \ - echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \ - $(INSTALL) -m 644 $$d $(MANDIR)/man3; \ + echo "install -m 644 $$d $(MANDIR)/man3"; \ + install -m 644 $$d $(MANDIR)/man3; \ done install_doc: install_doc_html install_doc_man @@ -395,22 +349,18 @@ stress_dir: check_dir: check_unit_dir check_integration_dir check_unit: tests_unit check_unit_dir - $(MALLOC_CONF)="purge:ratio" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) - $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) + $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) check_integration_prof: tests_integration check_integration_dir ifeq ($(enable_prof), 1) $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) endif -check_integration_decay: tests_integration check_integration_dir - $(MALLOC_CONF)="purge:decay,decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) - $(MALLOC_CONF)="purge:decay,decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) - $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) check_integration: tests_integration check_integration_dir $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) stress: tests_stress stress_dir $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) -check: check_unit check_integration check_integration_decay check_integration_prof +check: tests check_dir check_integration_prof + $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) ifeq ($(enable_code_coverage), 1) coverage_unit: check_unit diff --git a/deps/jemalloc/README b/deps/jemalloc/README index 5ff24a9ef..9b268f422 100644 --- a/deps/jemalloc/README +++ b/deps/jemalloc/README @@ -17,4 +17,4 @@ jemalloc. The ChangeLog file contains a brief summary of changes for each release. -URL: http://jemalloc.net/ +URL: http://www.canonware.com/jemalloc/ diff --git a/deps/jemalloc/VERSION b/deps/jemalloc/VERSION index 810bd6d4c..f1f9f1c61 100644 --- a/deps/jemalloc/VERSION +++ b/deps/jemalloc/VERSION @@ -1 +1 @@ -4.4.0-0-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc +4.0.3-0-ge9192eacf8935e29fc62fddc2701f7942b1cc02c diff --git a/deps/jemalloc/bin/jeprof.in b/deps/jemalloc/bin/jeprof.in index 42087fcec..e7178078a 100644 --- a/deps/jemalloc/bin/jeprof.in +++ b/deps/jemalloc/bin/jeprof.in @@ -95,7 +95,7 @@ my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread my @KCACHEGRIND = ("kcachegrind"); my @PS2PDF = ("ps2pdf"); # These are used for dynamic profiles -my @URL_FETCHER = ("curl", "-s", "--fail"); +my @URL_FETCHER = ("curl", "-s"); # These are the web pages that servers need to support for dynamic profiles my $HEAP_PAGE = "/pprof/heap"; @@ -223,14 +223,12 @@ Call-graph Options: --nodefraction= Hide nodes below *total [default=.005] --edgefraction= Hide edges below *total [default=.001] --maxdegree= Max incoming/outgoing edges per node [default=8] - --focus= Focus on backtraces with nodes matching + --focus= Focus on nodes matching --thread= Show profile for thread - --ignore= Ignore backtraces with nodes matching + --ignore= Ignore nodes matching --scale= Set GV scaling [default=0] --heapcheck Make nodes with non-0 object counts (i.e. direct leak generators) more visible - --retain= Retain only nodes that match - --exclude= Exclude all nodes that match Miscellaneous: --tools=[,...] \$PATH for object tool pathnames @@ -341,8 +339,6 @@ sub Init() { $main::opt_ignore = ''; $main::opt_scale = 0; $main::opt_heapcheck = 0; - $main::opt_retain = ''; - $main::opt_exclude = ''; $main::opt_seconds = 30; $main::opt_lib = ""; @@ -414,8 +410,6 @@ sub Init() { "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, "heapcheck" => \$main::opt_heapcheck, - "retain=s" => \$main::opt_retain, - "exclude=s" => \$main::opt_exclude, "inuse_space!" => \$main::opt_inuse_space, "inuse_objects!" => \$main::opt_inuse_objects, "alloc_space!" => \$main::opt_alloc_space, @@ -1166,21 +1160,8 @@ sub PrintSymbolizedProfile { } print '---', "\n"; - my $profile_marker; - if ($main::profile_type eq 'heap') { - $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash - $profile_marker = $&; - } elsif ($main::profile_type eq 'growth') { - $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash - $profile_marker = $&; - } elsif ($main::profile_type eq 'contention') { - $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash - $profile_marker = $&; - } else { # elsif ($main::profile_type eq 'cpu') - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - $profile_marker = $&; - } - + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; print '--- ', $profile_marker, "\n"; if (defined($main::collected_profile)) { # if used with remote fetch, simply dump the collected profile to output. @@ -1190,12 +1171,6 @@ sub PrintSymbolizedProfile { } close(SRC); } else { - # --raw/http: For everything to work correctly for non-remote profiles, we - # would need to extend PrintProfileData() to handle all possible profile - # types, re-enable the code that is currently disabled in ReadCPUProfile() - # and FixCallerAddresses(), and remove the remote profile dumping code in - # the block above. - die "--raw/http: jeprof can only dump remote profiles for --raw\n"; # dump a cpu-format profile to standard out PrintProfileData($profile); } @@ -2846,43 +2821,6 @@ sub ExtractCalls { return $calls; } -sub FilterFrames { - my $symbols = shift; - my $profile = shift; - - if ($main::opt_retain eq '' && $main::opt_exclude eq '') { - return $profile; - } - - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - my @path = (); - foreach my $a (@addrs) { - my $sym; - if (exists($symbols->{$a})) { - $sym = $symbols->{$a}->[0]; - } else { - $sym = $a; - } - if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) { - next; - } - if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) { - next; - } - push(@path, $a); - } - if (scalar(@path) > 0) { - my $reduced_path = join("\n", @path); - AddEntry($result, $reduced_path, $count); - } - } - - return $result; -} - sub RemoveUninterestingFrames { my $symbols = shift; my $profile = shift; @@ -3027,9 +2965,6 @@ sub RemoveUninterestingFrames { my $reduced_path = join("\n", @path); AddEntry($result, $reduced_path, $count); } - - $result = FilterFrames($symbols, $result); - return $result; } @@ -3339,7 +3274,7 @@ sub ResolveRedirectionForCurl { # Add a timeout flat to URL_FETCHER. Returns a new list. sub AddFetchTimeout { my $timeout = shift; - my @fetcher = @_; + my @fetcher = shift; if (defined($timeout)) { if (join(" ", @fetcher) =~ m/\bcurl -s/) { push(@fetcher, "--max-time", sprintf("%d", $timeout)); @@ -3385,27 +3320,6 @@ sub ReadSymbols { return $map; } -sub URLEncode { - my $str = shift; - $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg; - return $str; -} - -sub AppendSymbolFilterParams { - my $url = shift; - my @params = (); - if ($main::opt_retain ne '') { - push(@params, sprintf("retain=%s", URLEncode($main::opt_retain))); - } - if ($main::opt_exclude ne '') { - push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude))); - } - if (scalar @params > 0) { - $url = sprintf("%s?%s", $url, join("&", @params)); - } - return $url; -} - # Fetches and processes symbols to prepare them for use in the profile output # code. If the optional 'symbol_map' arg is not given, fetches symbols from # $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols @@ -3430,11 +3344,9 @@ sub FetchSymbols { my $command_line; if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { $url = ResolveRedirectionForCurl($url); - $url = AppendSymbolFilterParams($url); $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", $url); } else { - $url = AppendSymbolFilterParams($url); $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) . " < " . ShellEscape($main::tmpfile_sym)); } @@ -3515,22 +3427,12 @@ sub FetchDynamicProfile { } $url .= sprintf("seconds=%d", $main::opt_seconds); $fetch_timeout = $main::opt_seconds * 1.01 + 60; - # Set $profile_type for consumption by PrintSymbolizedProfile. - $main::profile_type = 'cpu'; } else { # For non-CPU profiles, we add a type-extension to # the target profile file name. my $suffix = $path; $suffix =~ s,/,.,g; $profile_file .= $suffix; - # Set $profile_type for consumption by PrintSymbolizedProfile. - if ($path =~ m/$HEAP_PAGE/) { - $main::profile_type = 'heap'; - } elsif ($path =~ m/$GROWTH_PAGE/) { - $main::profile_type = 'growth'; - } elsif ($path =~ m/$CONTENTION_PAGE/) { - $main::profile_type = 'contention'; - } } my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof"); @@ -3828,8 +3730,6 @@ sub ReadProfile { my $symbol_marker = $&; $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash my $profile_marker = $&; - $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $heap_marker = $&; # Look at first line to see if it is a heap or a CPU profile. # CPU profile may start with no header at all, and just binary data @@ -3856,13 +3756,7 @@ sub ReadProfile { $header = ReadProfileHeader(*PROFILE) || ""; } - if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) { - # Skip "--- ..." line for profile types that have their own headers. - $header = ReadProfileHeader(*PROFILE) || ""; - } - $main::profile_type = ''; - if ($header =~ m/^heap profile:.*$growth_marker/o) { $main::profile_type = 'growth'; $result = ReadHeapProfile($prog, *PROFILE, $header); @@ -3914,9 +3808,9 @@ sub ReadProfile { # independent implementation. sub FixCallerAddresses { my $stack = shift; - # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile() - # dumps unadjusted profiles. - { + if ($main::use_symbolized_profile) { + return $stack; + } else { $stack =~ /(\s)/; my $delimiter = $1; my @addrs = split(' ', $stack); @@ -3984,7 +3878,12 @@ sub ReadCPUProfile { for (my $j = 0; $j < $d; $j++) { my $pc = $slots->get($i+$j); # Subtract one from caller pc so we map back to call instr. - $pc--; + # However, don't do this if we're reading a symbolized profile + # file, in which case the subtract-one was done when the file + # was written. + if ($j > 0 && !$main::use_symbolized_profile) { + $pc--; + } $pc = sprintf("%0*x", $address_length, $pc); $pcs->{$pc} = 1; push @k, $pc; diff --git a/deps/jemalloc/build-aux/config.guess b/deps/jemalloc/config.guess similarity index 90% rename from deps/jemalloc/build-aux/config.guess rename to deps/jemalloc/config.guess index 2e9ad7fe8..1f5c50c0d 100755 --- a/deps/jemalloc/build-aux/config.guess +++ b/deps/jemalloc/config.guess @@ -1,8 +1,8 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2016 Free Software Foundation, Inc. +# Copyright 1992-2014 Free Software Foundation, Inc. -timestamp='2016-10-02' +timestamp='2014-03-23' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -24,12 +24,12 @@ timestamp='2016-10-02' # program. This Exception is an additional permission under section 7 # of the GNU General Public License, version 3 ("GPLv3"). # -# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# Originally written by Per Bothner. # # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD # -# Please send patches to . +# Please send patches with a ChangeLog entry to config-patches@gnu.org. me=`echo "$0" | sed -e 's,.*/,,'` @@ -50,7 +50,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2016 Free Software Foundation, Inc. +Copyright 1992-2014 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -168,29 +168,19 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # Note: NetBSD doesn't particularly care about the vendor # portion of the name. We always set it to "unknown". sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ - /sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || \ - echo unknown)` + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` case "${UNAME_MACHINE_ARCH}" in armeb) machine=armeb-unknown ;; arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; sh5el) machine=sh5le-unknown ;; - earmv*) - arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'` - endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'` - machine=${arch}${endian}-unknown - ;; *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched - # to ELF recently (or will in the future) and ABI. + # to ELF recently, or will in the future. case "${UNAME_MACHINE_ARCH}" in - earm*) - os=netbsdelf - ;; arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ @@ -207,13 +197,6 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in os=netbsd ;; esac - # Determine ABI tags. - case "${UNAME_MACHINE_ARCH}" in - earm*) - expr='s/^earmv[0-9]/-eabi/;s/eb$//' - abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"` - ;; - esac # The OS release # Debian GNU/NetBSD machines have a different userland, and # thus, need a distinct triplet. However, they do not need @@ -224,13 +207,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in release='-gnu' ;; *) - release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2` + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` ;; esac # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}${abi}" + echo "${machine}-${os}${release}" exit ;; *:Bitrig:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` @@ -240,10 +223,6 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} exit ;; - *:LibertyBSD:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` - echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE} - exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} exit ;; @@ -256,9 +235,6 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:MirBSD:*:*) echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} exit ;; - *:Sortix:*:*) - echo ${UNAME_MACHINE}-unknown-sortix - exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) @@ -275,42 +251,42 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case "$ALPHA_CPU_TYPE" in "EV4 (21064)") - UNAME_MACHINE=alpha ;; + UNAME_MACHINE="alpha" ;; "EV4.5 (21064)") - UNAME_MACHINE=alpha ;; + UNAME_MACHINE="alpha" ;; "LCA4 (21066/21068)") - UNAME_MACHINE=alpha ;; + UNAME_MACHINE="alpha" ;; "EV5 (21164)") - UNAME_MACHINE=alphaev5 ;; + UNAME_MACHINE="alphaev5" ;; "EV5.6 (21164A)") - UNAME_MACHINE=alphaev56 ;; + UNAME_MACHINE="alphaev56" ;; "EV5.6 (21164PC)") - UNAME_MACHINE=alphapca56 ;; + UNAME_MACHINE="alphapca56" ;; "EV5.7 (21164PC)") - UNAME_MACHINE=alphapca57 ;; + UNAME_MACHINE="alphapca57" ;; "EV6 (21264)") - UNAME_MACHINE=alphaev6 ;; + UNAME_MACHINE="alphaev6" ;; "EV6.7 (21264A)") - UNAME_MACHINE=alphaev67 ;; + UNAME_MACHINE="alphaev67" ;; "EV6.8CB (21264C)") - UNAME_MACHINE=alphaev68 ;; + UNAME_MACHINE="alphaev68" ;; "EV6.8AL (21264B)") - UNAME_MACHINE=alphaev68 ;; + UNAME_MACHINE="alphaev68" ;; "EV6.8CX (21264D)") - UNAME_MACHINE=alphaev68 ;; + UNAME_MACHINE="alphaev68" ;; "EV6.9A (21264/EV69A)") - UNAME_MACHINE=alphaev69 ;; + UNAME_MACHINE="alphaev69" ;; "EV7 (21364)") - UNAME_MACHINE=alphaev7 ;; + UNAME_MACHINE="alphaev7" ;; "EV7.9 (21364A)") - UNAME_MACHINE=alphaev79 ;; + UNAME_MACHINE="alphaev79" ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` # Reset EXIT trap before exiting to avoid spurious non-zero exit code. exitcode=$? trap '' 0 @@ -383,16 +359,16 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) eval $set_cc_for_build - SUN_ARCH=i386 + SUN_ARCH="i386" # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. - if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then - SUN_ARCH=x86_64 + SUN_ARCH="x86_64" fi fi echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` @@ -417,7 +393,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3 + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) echo m68k-sun-sunos${UNAME_RELEASE} @@ -603,9 +579,8 @@ EOF else IBM_ARCH=powerpc fi - if [ -x /usr/bin/lslpp ] ; then - IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | - awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` else IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi @@ -642,13 +617,13 @@ EOF sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` case "${sc_cpu_version}" in - 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 - 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 case "${sc_kernel_bits}" in - 32) HP_ARCH=hppa2.0n ;; - 64) HP_ARCH=hppa2.0w ;; - '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 esac ;; esac fi @@ -687,11 +662,11 @@ EOF exit (0); } EOF - (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac - if [ ${HP_ARCH} = hppa2.0w ] + if [ ${HP_ARCH} = "hppa2.0w" ] then eval $set_cc_for_build @@ -704,12 +679,12 @@ EOF # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 - if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then - HP_ARCH=hppa2.0w + HP_ARCH="hppa2.0w" else - HP_ARCH=hppa64 + HP_ARCH="hppa64" fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} @@ -814,14 +789,14 @@ EOF echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` - FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) @@ -903,7 +878,7 @@ EOF exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix @@ -926,7 +901,7 @@ EOF EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + if test "$?" = 0 ; then LIBC="gnulibc1" ; fi echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; arc:Linux:*:* | arceb:Linux:*:*) @@ -957,9 +932,6 @@ EOF crisv32:Linux:*:*) echo ${UNAME_MACHINE}-axis-linux-${LIBC} exit ;; - e2k:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; frv:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; @@ -972,9 +944,6 @@ EOF ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; - k1om:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; @@ -1000,9 +969,6 @@ EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } ;; - mips64el:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; openrisc*:Linux:*:*) echo or1k-unknown-linux-${LIBC} exit ;; @@ -1035,9 +1001,6 @@ EOF ppcle:Linux:*:*) echo powerpcle-unknown-linux-${LIBC} exit ;; - riscv32:Linux:*:* | riscv64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux-${LIBC} exit ;; @@ -1057,7 +1020,7 @@ EOF echo ${UNAME_MACHINE}-dec-linux-${LIBC} exit ;; x86_64:Linux:*:*) - echo ${UNAME_MACHINE}-pc-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; xtensa*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} @@ -1136,7 +1099,7 @@ EOF # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configure will decide that + # prints for the "djgpp" host, or else GDB configury will decide that # this is a cross-build. echo i586-pc-msdosdjgpp exit ;; @@ -1285,9 +1248,6 @@ EOF SX-8R:SUPER-UX:*:*) echo sx8r-nec-superux${UNAME_RELEASE} exit ;; - SX-ACE:SUPER-UX:*:*) - echo sxace-nec-superux${UNAME_RELEASE} - exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} exit ;; @@ -1301,9 +1261,9 @@ EOF UNAME_PROCESSOR=powerpc fi if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then - if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then case $UNAME_PROCESSOR in @@ -1325,7 +1285,7 @@ EOF exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = x86; then + if test "$UNAME_PROCESSOR" = "x86"; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi @@ -1356,7 +1316,7 @@ EOF # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. - if test "$cputype" = 386; then + if test "$cputype" = "386"; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" @@ -1398,7 +1358,7 @@ EOF echo i386-pc-xenix exit ;; i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'` + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' exit ;; i*86:rdos:*:*) echo ${UNAME_MACHINE}-pc-rdos @@ -1409,25 +1369,23 @@ EOF x86_64:VMkernel:*:*) echo ${UNAME_MACHINE}-unknown-esx exit ;; - amd64:Isilon\ OneFS:*:*) - echo x86_64-unknown-onefs - exit ;; esac cat >&2 < in order to provide the needed +information to handle your system. config.guess timestamp = $timestamp diff --git a/deps/jemalloc/build-aux/config.sub b/deps/jemalloc/config.sub similarity index 95% rename from deps/jemalloc/build-aux/config.sub rename to deps/jemalloc/config.sub index dd2ca93c6..0ccff7706 100755 --- a/deps/jemalloc/build-aux/config.sub +++ b/deps/jemalloc/config.sub @@ -1,8 +1,8 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2016 Free Software Foundation, Inc. +# Copyright 1992-2014 Free Software Foundation, Inc. -timestamp='2016-11-04' +timestamp='2014-05-01' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ timestamp='2016-11-04' # of the GNU General Public License, version 3 ("GPLv3"). -# Please send patches to . +# Please send patches with a ChangeLog entry to config-patches@gnu.org. # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. @@ -33,7 +33,7 @@ timestamp='2016-11-04' # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases @@ -53,7 +53,8 @@ timestamp='2016-11-04' me=`echo "$0" | sed -e 's,.*/,,'` usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS Canonicalize a configuration name. @@ -67,7 +68,7 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright 1992-2016 Free Software Foundation, Inc. +Copyright 1992-2014 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -116,8 +117,8 @@ maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ - knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ - kopensolaris*-gnu* | cloudabi*-eabi* | \ + knetbsd*-gnu* | netbsd*-gnu* | \ + kopensolaris*-gnu* | \ storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` @@ -254,13 +255,12 @@ case $basic_machine in | arc | arceb \ | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ | avr | avr32 \ - | ba \ | be32 | be64 \ | bfin \ | c4x | c8051 | clipper \ | d10v | d30v | dlx | dsp16xx \ - | e2k | epiphany \ - | fido | fr30 | frv | ft32 \ + | epiphany \ + | fido | fr30 | frv \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ | i370 | i860 | i960 | ia64 \ @@ -301,12 +301,10 @@ case $basic_machine in | open8 | or1k | or1knd | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle \ - | pru \ | pyramid \ - | riscv32 | riscv64 \ | rl78 | rx \ | score \ - | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ @@ -314,7 +312,6 @@ case $basic_machine in | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ | ubicom32 \ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ - | visium \ | we32k \ | x86 | xc16x | xstormy16 | xtensa \ | z8k | z80) @@ -329,9 +326,6 @@ case $basic_machine in c6x) basic_machine=tic6x-unknown ;; - leon|leon[3-9]) - basic_machine=sparc-$basic_machine - ;; m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) basic_machine=$basic_machine-unknown os=-none @@ -377,13 +371,12 @@ case $basic_machine in | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | avr-* | avr32-* \ - | ba-* \ | be32-* | be64-* \ | bfin-* | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* \ | c8051-* | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ - | e2k-* | elxsi-* \ + | elxsi-* \ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ @@ -429,15 +422,13 @@ case $basic_machine in | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ - | pru-* \ | pyramid-* \ - | riscv32-* | riscv64-* \ | rl78-* | romp-* | rs6000-* | rx-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ | sparclite-* \ - | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ | tahoe-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ | tile*-* \ @@ -445,7 +436,6 @@ case $basic_machine in | ubicom32-* \ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ | vax-* \ - | visium-* \ | we32k-* \ | x86-* | x86_64-* | xc16x-* | xps100-* \ | xstormy16-* | xtensa*-* \ @@ -522,9 +512,6 @@ case $basic_machine in basic_machine=i386-pc os=-aros ;; - asmjs) - basic_machine=asmjs-unknown - ;; aux) basic_machine=m68k-apple os=-aux @@ -645,14 +632,6 @@ case $basic_machine in basic_machine=m68k-bull os=-sysv3 ;; - e500v[12]) - basic_machine=powerpc-unknown - os=$os"spe" - ;; - e500v[12]-*) - basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` - os=$os"spe" - ;; ebmon29k) basic_machine=a29k-amd os=-ebmon @@ -794,9 +773,6 @@ case $basic_machine in basic_machine=m68k-isi os=-sysv ;; - leon-*|leon[3-9]-*) - basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'` - ;; m68knommu) basic_machine=m68k-unknown os=-linux @@ -852,10 +828,6 @@ case $basic_machine in basic_machine=powerpc-unknown os=-morphos ;; - moxiebox) - basic_machine=moxie-unknown - os=-moxiebox - ;; msdos) basic_machine=i386-pc os=-msdos @@ -1032,7 +1004,7 @@ case $basic_machine in ppc-* | ppcbe-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppcle | powerpclittle) + ppcle | powerpclittle | ppc-le | powerpc-little) basic_machine=powerpcle-unknown ;; ppcle-* | powerpclittle-*) @@ -1042,7 +1014,7 @@ case $basic_machine in ;; ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppc64le | powerpc64little) + ppc64le | powerpc64little | ppc64-le | powerpc64-little) basic_machine=powerpc64le-unknown ;; ppc64le-* | powerpc64little-*) @@ -1388,28 +1360,27 @@ case $os in | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ | -sym* | -kopensolaris* | -plan9* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* | -aros* | -cloudabi* | -sortix* \ + | -aos* | -aros* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ + | -bitrig* | -openbsd* | -solidbsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -chorusos* | -chorusrdb* | -cegcc* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ | -linux-newlib* | -linux-musl* | -linux-uclibc* \ - | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ - | -onefs* | -tirtos* | -phoenix* | -fuchsia*) + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1433,6 +1404,9 @@ case $os in -mac*) os=`echo $os | sed -e 's|mac|macos|'` ;; + # Apple iOS + -ios*) + ;; -linux-dietlibc) os=-linux-dietlibc ;; @@ -1541,8 +1515,6 @@ case $os in ;; -nacl*) ;; - -ios) - ;; -none) ;; *) diff --git a/deps/jemalloc/configure b/deps/jemalloc/configure index d7391524d..8c56c92a1 100755 --- a/deps/jemalloc/configure +++ b/deps/jemalloc/configure @@ -628,6 +628,7 @@ cfghdrs_in enable_zone_allocator enable_tls enable_lazy_lock +TESTLIBS jemalloc_version_gid jemalloc_version_nrev jemalloc_version_bugfix @@ -657,19 +658,16 @@ INSTALL_SCRIPT INSTALL_PROGRAM enable_autogen RPATH_EXTRA -LM CC_MM AROUT ARFLAGS MKLIB -TEST_LD_MODE LDTARGET CTARGET PIC_CFLAGS SOREV EXTRA_LDFLAGS DSO_LDFLAGS -link_whole_archive libprefix exe a @@ -691,7 +689,6 @@ build EGREP GREP CPP -EXTRA_CFLAGS OBJEXT EXEEXT ac_ct_CC @@ -732,7 +729,6 @@ infodir docdir oldincludedir includedir -runstatedir localstatedir sharedstatedir sysconfdir @@ -764,7 +760,6 @@ with_jemalloc_prefix with_export with_private_namespace with_install_suffix -with_malloc_conf enable_cc_silence enable_debug enable_ivsalloc @@ -786,8 +781,6 @@ with_lg_quantum with_lg_page with_lg_page_sizes with_lg_size_class_group -with_version -enable_syscall enable_lazy_lock enable_tls enable_zone_allocator @@ -839,7 +832,6 @@ datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' -runstatedir='${localstatedir}/run' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE}' @@ -1092,15 +1084,6 @@ do | -silent | --silent | --silen | --sile | --sil) silent=yes ;; - -runstatedir | --runstatedir | --runstatedi | --runstated \ - | --runstate | --runstat | --runsta | --runst | --runs \ - | --run | --ru | --r) - ac_prev=runstatedir ;; - -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ - | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ - | --run=* | --ru=* | --r=*) - runstatedir=$ac_optarg ;; - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ @@ -1238,7 +1221,7 @@ fi for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir runstatedir + libdir localedir mandir do eval ac_val=\$$ac_var # Remove trailing slashes. @@ -1391,7 +1374,6 @@ Fine tuning of the installation directories: --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] - --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] @@ -1443,7 +1425,6 @@ Optional Features: --disable-cache-oblivious Disable support for cache-oblivious allocation alignment - --disable-syscall Disable use of syscall(2) --enable-lazy-lock Enable lazy locking (only lock when multi-threaded) --disable-tls Disable thread-local storage (__thread keyword) --disable-zone-allocator @@ -1462,8 +1443,6 @@ Optional Packages: Prefix to prepend to all library-private APIs --with-install-suffix= Suffix to append to all installed files - --with-malloc-conf= - config.malloc_conf options string --with-static-libunwind= Path to static libunwind library; use rather than dynamically linking @@ -1477,8 +1456,6 @@ Optional Packages: Base 2 logs of system page sizes to support --with-lg-size-class-group= Base 2 log of size classes per doubling - --with-version=..--g - Version string Some influential environment variables: CC C compiler command @@ -2507,36 +2484,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu -ac_aux_dir= -for ac_dir in build-aux "$srcdir"/build-aux; do - if test -f "$ac_dir/install-sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install-sh -c" - break - elif test -f "$ac_dir/install.sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install.sh -c" - break - elif test -f "$ac_dir/shtool"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/shtool install -c" - break - fi -done -if test -z "$ac_aux_dir"; then - as_fn_error $? "cannot find install-sh, install.sh, or shtool in build-aux \"$srcdir\"/build-aux" "$LINENO" 5 -fi - -# These three variables are undocumented and unsupported, -# and are intended to be withdrawn in a future Autoconf release. -# They can cause serious problems if a builder's source tree is in a directory -# whose full name contains unusual characters. -ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. -ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. -ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. - - - @@ -3443,7 +3390,6 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu - if test "x$GCC" != "xyes" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is MSVC" >&5 @@ -3477,125 +3423,10 @@ fi $as_echo "$je_cv_msvc" >&6; } fi -je_cv_cray_prgenv_wrapper="" -if test "x${PE_ENV}" != "x" ; then - case "${CC}" in - CC|cc) - je_cv_cray_prgenv_wrapper="yes" - ;; - *) - ;; - esac -fi - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is cray" >&5 -$as_echo_n "checking whether compiler is cray... " >&6; } -if ${je_cv_cray+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - -#ifndef _CRAYC - int fail-1; -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cray=yes -else - je_cv_cray=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_cray" >&5 -$as_echo "$je_cv_cray" >&6; } - -if test "x${je_cv_cray}" = "xyes" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether cray compiler version is 8.4" >&5 -$as_echo_n "checking whether cray compiler version is 8.4... " >&6; } -if ${je_cv_cray_84+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - -#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4) - int fail-1; -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cray_84=yes -else - je_cv_cray_84=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_cray_84" >&5 -$as_echo "$je_cv_cray_84" >&6; } -fi - if test "x$CFLAGS" = "x" ; then no_CFLAGS="yes" if test "x$GCC" = "xyes" ; then -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu11" >&5 -$as_echo_n "checking whether compiler supports -std=gnu11... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-std=gnu11" -else - CFLAGS="${CFLAGS} -std=gnu11" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-std=gnu11 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then - cat >>confdefs.h <<_ACEOF -#define JEMALLOC_HAS_RESTRICT 1 -_ACEOF - - else - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu99" >&5 $as_echo_n "checking whether compiler supports -std=gnu99... " >&6; } TCFLAGS="${CFLAGS}" @@ -3631,12 +3462,11 @@ $as_echo "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then - cat >>confdefs.h <<_ACEOF + if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then + cat >>confdefs.h <<_ACEOF #define JEMALLOC_HAS_RESTRICT 1 _ACEOF - fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wall" >&5 @@ -3711,78 +3541,6 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wshorten-64-to-32" >&5 -$as_echo_n "checking whether compiler supports -Wshorten-64-to-32... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-Wshorten-64-to-32" -else - CFLAGS="${CFLAGS} -Wshorten-64-to-32" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-Wshorten-64-to-32 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wsign-compare" >&5 -$as_echo_n "checking whether compiler supports -Wsign-compare... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-Wsign-compare" -else - CFLAGS="${CFLAGS} -Wsign-compare" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-Wsign-compare - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -pipe" >&5 $as_echo_n "checking whether compiler supports -pipe... " >&6; } TCFLAGS="${CFLAGS}" @@ -4002,16 +3760,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat" fi - if test "x$je_cv_cray" = "xyes" ; then - if test "x$je_cv_cray_84" = "xyes" ; then +fi +if test "x$EXTRA_CFLAGS" != "x" ; then -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hipa2" >&5 -$as_echo_n "checking whether compiler supports -hipa2... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports $EXTRA_CFLAGS" >&5 +$as_echo_n "checking whether compiler supports $EXTRA_CFLAGS... " >&6; } TCFLAGS="${CFLAGS}" if test "x${CFLAGS}" = "x" ; then - CFLAGS="-hipa2" + CFLAGS="$EXTRA_CFLAGS" else - CFLAGS="${CFLAGS} -hipa2" + CFLAGS="${CFLAGS} $EXTRA_CFLAGS" fi cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4028,7 +3786,7 @@ main () } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-hipa2 + je_cv_cflags_appended=$EXTRA_CFLAGS { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else @@ -4040,120 +3798,7 @@ $as_echo "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnognu" >&5 -$as_echo_n "checking whether compiler supports -hnognu... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-hnognu" -else - CFLAGS="${CFLAGS} -hnognu" fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-hnognu - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - fi - if test "x$enable_cc_silence" != "xno" ; then - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnomessage=128" >&5 -$as_echo_n "checking whether compiler supports -hnomessage=128... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-hnomessage=128" -else - CFLAGS="${CFLAGS} -hnomessage=128" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-hnomessage=128 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnomessage=1357" >&5 -$as_echo_n "checking whether compiler supports -hnomessage=1357... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-hnomessage=1357" -else - CFLAGS="${CFLAGS} -hnomessage=1357" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-hnomessage=1357 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - fi - fi -fi - ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -4786,12 +4431,7 @@ if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99" fi -if test "x${je_cv_msvc}" = "xyes" ; then - LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN - { $as_echo "$as_me:${as_lineno-$LINENO}: result: Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit" >&5 -$as_echo "Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit" >&6; } -else - # The cast to long int works around a bug in the HP C Compiler +# The cast to long int works around a bug in the HP C Compiler # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. # This bug is HP SR number 8606223364. @@ -4824,13 +4464,12 @@ cat >>confdefs.h <<_ACEOF _ACEOF - if test "x${ac_cv_sizeof_void_p}" = "x8" ; then - LG_SIZEOF_PTR=3 - elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then - LG_SIZEOF_PTR=2 - else - as_fn_error $? "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5 - fi +if test "x${ac_cv_sizeof_void_p}" = "x8" ; then + LG_SIZEOF_PTR=3 +elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then + LG_SIZEOF_PTR=2 +else + as_fn_error $? "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5 fi cat >>confdefs.h <<_ACEOF #define LG_SIZEOF_PTR $LG_SIZEOF_PTR @@ -4927,51 +4566,6 @@ cat >>confdefs.h <<_ACEOF _ACEOF -# The cast to long int works around a bug in the HP C Compiler -# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects -# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. -# This bug is HP SR number 8606223364. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long long" >&5 -$as_echo_n "checking size of long long... " >&6; } -if ${ac_cv_sizeof_long_long+:} false; then : - $as_echo_n "(cached) " >&6 -else - if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (long long))" "ac_cv_sizeof_long_long" "$ac_includes_default"; then : - -else - if test "$ac_cv_type_long_long" = yes; then - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error 77 "cannot compute sizeof (long long) -See \`config.log' for more details" "$LINENO" 5; } - else - ac_cv_sizeof_long_long=0 - fi -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long_long" >&5 -$as_echo "$ac_cv_sizeof_long_long" >&6; } - - - -cat >>confdefs.h <<_ACEOF -#define SIZEOF_LONG_LONG $ac_cv_sizeof_long_long -_ACEOF - - -if test "x${ac_cv_sizeof_long_long}" = "x8" ; then - LG_SIZEOF_LONG_LONG=3 -elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then - LG_SIZEOF_LONG_LONG=2 -else - as_fn_error $? "Unsupported long long size: ${ac_cv_sizeof_long_long}" "$LINENO" 5 -fi -cat >>confdefs.h <<_ACEOF -#define LG_SIZEOF_LONG_LONG $LG_SIZEOF_LONG_LONG -_ACEOF - - # The cast to long int works around a bug in the HP C Compiler # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. @@ -5019,6 +4613,35 @@ cat >>confdefs.h <<_ACEOF _ACEOF +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + # Make sure we can run config.sub. $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 @@ -5093,45 +4716,7 @@ case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac CPU_SPINWAIT="" case "${host_cpu}" in i686|x86_64) - if test "x${je_cv_msvc}" = "xyes" ; then - if ${je_cv_pause_msvc+:} false; then : - $as_echo_n "(cached) " >&6 -else - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pause instruction MSVC is compilable" >&5 -$as_echo_n "checking whether pause instruction MSVC is compilable... " >&6; } -if ${je_cv_pause_msvc+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ -_mm_pause(); return 0; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_pause_msvc=yes -else - je_cv_pause_msvc=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pause_msvc" >&5 -$as_echo "$je_cv_pause_msvc" >&6; } - -fi - - if test "x${je_cv_pause_msvc}" = "xyes" ; then - CPU_SPINWAIT='_mm_pause()' - fi - else - if ${je_cv_pause+:} false; then : + if ${je_cv_pause+:} false; then : $as_echo_n "(cached) " >&6 else @@ -5164,9 +4749,8 @@ $as_echo "$je_cv_pause" >&6; } fi - if test "x${je_cv_pause}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' - fi + if test "x${je_cv_pause}" = "xyes" ; then + CPU_SPINWAIT='__asm__ volatile("pause")' fi ;; powerpc) @@ -5190,27 +4774,17 @@ o="$ac_objext" a="a" exe="$ac_exeext" libprefix="lib" -link_whole_archive="0" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' CTARGET='-o $@' LDTARGET='-o $@' -TEST_LD_MODE= EXTRA_LDFLAGS= ARFLAGS='crus' AROUT=' $@' CC_MM=1 -if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then - TEST_LD_MODE='-dynamic' -fi - -if test "x${je_cv_cray}" = "xyes" ; then - CC_MM= -fi - @@ -5307,12 +4881,14 @@ else fi -CFLAGS="$CFLAGS" default_munmap="1" maps_coalesce="1" case "${host}" in *-*-darwin* | *-*-ios*) + CFLAGS="$CFLAGS" abi="macho" + $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h + RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" @@ -5323,41 +4899,38 @@ case "${host}" in sbrk_deprecated="1" ;; *-*-freebsd*) + CFLAGS="$CFLAGS" abi="elf" - $as_echo "#define JEMALLOC_SYSCTL_VM_OVERCOMMIT " >>confdefs.h + $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h force_lazy_lock="1" ;; *-*-dragonfly*) + CFLAGS="$CFLAGS" abi="elf" + $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h + ;; *-*-openbsd*) + CFLAGS="$CFLAGS" abi="elf" + $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h + force_tls="0" ;; *-*-bitrig*) + CFLAGS="$CFLAGS" abi="elf" + $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h + ;; - *-*-linux-android) - CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" + *-*-linux*) + CFLAGS="$CFLAGS" + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" $as_echo "#define JEMALLOC_HAS_ALLOCA_H 1" >>confdefs.h - $as_echo "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY " >>confdefs.h - - $as_echo "#define JEMALLOC_THREADED_INIT " >>confdefs.h - - $as_echo "#define JEMALLOC_C11ATOMICS 1" >>confdefs.h - - force_tls="0" - default_munmap="0" - ;; - *-*-linux* | *-*-kfreebsd*) - CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" - abi="elf" - $as_echo "#define JEMALLOC_HAS_ALLOCA_H 1" >>confdefs.h - - $as_echo "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY " >>confdefs.h + $as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED " >>confdefs.h $as_echo "#define JEMALLOC_THREADED_INIT " >>confdefs.h @@ -5385,16 +4958,21 @@ main () } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : - abi="elf" + CFLAGS="$CFLAGS"; abi="elf" else abi="aout" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $abi" >&5 $as_echo "$abi" >&6; } + $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h + ;; *-*-solaris2*) + CFLAGS="$CFLAGS" abi="elf" + $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h + RPATH='-Wl,-R,$(1)' CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" LIBS="$LIBS -lposix4 -lsocket -lnsl" @@ -5410,6 +4988,7 @@ $as_echo "$abi" >&6; } *-*-mingw* | *-*-cygwin*) abi="pecoff" force_tls="0" + force_lazy_lock="1" maps_coalesce="0" RPATH="" so="dll" @@ -5426,7 +5005,6 @@ $as_echo "$abi" >&6; } else importlib="${so}" DSO_LDFLAGS="-shared" - link_whole_archive="1" fi a="lib" libprefix="" @@ -5506,73 +5084,6 @@ _ACEOF - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing log" >&5 -$as_echo_n "checking for library containing log... " >&6; } -if ${ac_cv_search_log+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char log (); -int -main () -{ -return log (); - ; - return 0; -} -_ACEOF -for ac_lib in '' m; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_log=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_log+:} false; then : - break -fi -done -if ${ac_cv_search_log+:} false; then : - -else - ac_cv_search_log=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_log" >&5 -$as_echo "$ac_cv_search_log" >&6; } -ac_res=$ac_cv_search_log -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -else - as_fn_error $? "Missing math functions" "$LINENO" 5 -fi - -if test "x$ac_cv_search_log" != "xnone required" ; then - LM="$ac_cv_search_log" -else - LM= -fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __attribute__ syntax is compilable" >&5 @@ -5682,42 +5193,6 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5 -$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-herror_on_warning" -else - CFLAGS="${CFLAGS} -herror_on_warning" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-herror_on_warning - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether tls_model attribute is compilable" >&5 $as_echo_n "checking whether tls_model attribute is compilable... " >&6; } if ${je_cv_tls_model+:} false; then : @@ -5793,42 +5268,6 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5 -$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-herror_on_warning" -else - CFLAGS="${CFLAGS} -herror_on_warning" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-herror_on_warning - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether alloc_size attribute is compilable" >&5 $as_echo_n "checking whether alloc_size attribute is compilable... " >&6; } if ${je_cv_alloc_size+:} false; then : @@ -5899,42 +5338,6 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5 -$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-herror_on_warning" -else - CFLAGS="${CFLAGS} -herror_on_warning" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-herror_on_warning - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(gnu_printf, ...) attribute is compilable" >&5 $as_echo_n "checking whether format(gnu_printf, ...) attribute is compilable... " >&6; } if ${je_cv_format_gnu_printf+:} false; then : @@ -6005,42 +5408,6 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5 -$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-herror_on_warning" -else - CFLAGS="${CFLAGS} -herror_on_warning" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-herror_on_warning - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(printf, ...) attribute is compilable" >&5 $as_echo_n "checking whether format(printf, ...) attribute is compilable... " >&6; } if ${je_cv_format_printf+:} false; then : @@ -6560,21 +5927,6 @@ fi install_suffix="$INSTALL_SUFFIX" - -# Check whether --with-malloc_conf was given. -if test "${with_malloc_conf+set}" = set; then : - withval=$with_malloc_conf; JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf" -else - JEMALLOC_CONFIG_MALLOC_CONF="" - -fi - -config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF" -cat >>confdefs.h <<_ACEOF -#define JEMALLOC_CONFIG_MALLOC_CONF "$config_malloc_conf" -_ACEOF - - je_="je_" @@ -7143,8 +6495,8 @@ $as_echo_n "checking configured backtracing method... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $backtrace_method" >&5 $as_echo "$backtrace_method" >&6; } if test "x$enable_prof" = "x1" ; then - if test "x$LM" != "x" ; then - LIBS="$LIBS $LM" + if test "x$abi" != "xpecoff"; then + LIBS="$LIBS -lm" fi $as_echo "#define JEMALLOC_PROF " >>confdefs.h @@ -7393,52 +6745,6 @@ fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_unreachable is compilable" >&5 -$as_echo_n "checking whether a program using __builtin_unreachable is compilable... " >&6; } -if ${je_cv_gcc_builtin_unreachable+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -void foo (void) { - __builtin_unreachable(); -} - -int -main () -{ - - { - foo(); - } - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_gcc_builtin_unreachable=yes -else - je_cv_gcc_builtin_unreachable=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_gcc_builtin_unreachable" >&5 -$as_echo "$je_cv_gcc_builtin_unreachable" >&6; } - -if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then - $as_echo "#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable" >>confdefs.h - -else - $as_echo "#define JEMALLOC_INTERNAL_UNREACHABLE abort" >>confdefs.h - -fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_ffsl is compilable" >&5 $as_echo_n "checking whether a program using __builtin_ffsl is compilable... " >&6; } if ${je_cv_gcc_builtin_ffsl+:} false; then : @@ -7476,8 +6782,6 @@ fi $as_echo "$je_cv_gcc_builtin_ffsl" >&6; } if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then - $as_echo "#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll" >>confdefs.h - $as_echo "#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl" >>confdefs.h $as_echo "#define JEMALLOC_INTERNAL_FFS __builtin_ffs" >>confdefs.h @@ -7521,8 +6825,6 @@ fi $as_echo "$je_cv_function_ffsl" >&6; } if test "x${je_cv_function_ffsl}" = "xyes" ; then - $as_echo "#define JEMALLOC_INTERNAL_FFSLL ffsll" >>confdefs.h - $as_echo "#define JEMALLOC_INTERNAL_FFSL ffsl" >>confdefs.h $as_echo "#define JEMALLOC_INTERNAL_FFS ffs" >>confdefs.h @@ -7611,7 +6913,7 @@ main () if (f == NULL) { return 1; } - fprintf(f, "%d", result); + fprintf(f, "%d\n", result); fclose(f); return 0; @@ -7662,6 +6964,7 @@ else LG_SIZE_CLASS_GROUP="2" fi + if test ! -e "${objroot}VERSION" ; then if test ! -e "${srcroot}VERSION" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: Missing VERSION file, and unable to generate it; creating bogus VERSION" >&5 @@ -7799,46 +7102,12 @@ fi fi - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthread_atfork(3) is compilable" >&5 -$as_echo_n "checking whether pthread_atfork(3) is compilable... " >&6; } -if ${je_cv_pthread_atfork+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include - -int -main () -{ - - pthread_atfork((void *)0, (void *)0, (void *)0); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_pthread_atfork=yes -else - je_cv_pthread_atfork=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pthread_atfork" >&5 -$as_echo "$je_cv_pthread_atfork" >&6; } - - if test "x${je_cv_pthread_atfork}" = "xyes" ; then - $as_echo "#define JEMALLOC_HAVE_PTHREAD_ATFORK " >>confdefs.h - - fi fi CPPFLAGS="$CPPFLAGS -D_REENTRANT" +SAVED_LIBS="${LIBS}" +LIBS= { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5 $as_echo_n "checking for library containing clock_gettime... " >&6; } if ${ac_cv_search_clock_gettime+:} false; then : @@ -7892,321 +7161,11 @@ $as_echo "$ac_cv_search_clock_gettime" >&6; } ac_res=$ac_cv_search_clock_gettime if test "$ac_res" != no; then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - + TESTLIBS="${LIBS}" fi -if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then - if test "$ac_cv_search_clock_gettime" != "-lrt"; then - SAVED_CFLAGS="${CFLAGS}" - - unset ac_cv_search_clock_gettime - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -dynamic" >&5 -$as_echo_n "checking whether compiler supports -dynamic... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-dynamic" -else - CFLAGS="${CFLAGS} -dynamic" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-dynamic - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5 -$as_echo_n "checking for library containing clock_gettime... " >&6; } -if ${ac_cv_search_clock_gettime+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char clock_gettime (); -int -main () -{ -return clock_gettime (); - ; - return 0; -} -_ACEOF -for ac_lib in '' rt; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_clock_gettime=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_clock_gettime+:} false; then : - break -fi -done -if ${ac_cv_search_clock_gettime+:} false; then : - -else - ac_cv_search_clock_gettime=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5 -$as_echo "$ac_cv_search_clock_gettime" >&6; } -ac_res=$ac_cv_search_clock_gettime -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -fi - - - CFLAGS="${SAVED_CFLAGS}" - fi -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is compilable" >&5 -$as_echo_n "checking whether clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is compilable... " >&6; } -if ${je_cv_clock_monotonic_coarse+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include - -int -main () -{ - - struct timespec ts; - - clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_clock_monotonic_coarse=yes -else - je_cv_clock_monotonic_coarse=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_clock_monotonic_coarse" >&5 -$as_echo "$je_cv_clock_monotonic_coarse" >&6; } - -if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then - $as_echo "#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1" >>confdefs.h - -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clock_gettime(CLOCK_MONOTONIC, ...) is compilable" >&5 -$as_echo_n "checking whether clock_gettime(CLOCK_MONOTONIC, ...) is compilable... " >&6; } -if ${je_cv_clock_monotonic+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include - -int -main () -{ - - struct timespec ts; - - clock_gettime(CLOCK_MONOTONIC, &ts); -#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0 -# error _POSIX_MONOTONIC_CLOCK missing/invalid -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_clock_monotonic=yes -else - je_cv_clock_monotonic=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_clock_monotonic" >&5 -$as_echo "$je_cv_clock_monotonic" >&6; } - -if test "x${je_cv_clock_monotonic}" = "xyes" ; then - $as_echo "#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1" >>confdefs.h - -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mach_absolute_time() is compilable" >&5 -$as_echo_n "checking whether mach_absolute_time() is compilable... " >&6; } -if ${je_cv_mach_absolute_time+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include - -int -main () -{ - - mach_absolute_time(); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_mach_absolute_time=yes -else - je_cv_mach_absolute_time=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_mach_absolute_time" >&5 -$as_echo "$je_cv_mach_absolute_time" >&6; } - -if test "x${je_cv_mach_absolute_time}" = "xyes" ; then - $as_echo "#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1" >>confdefs.h - -fi - -# Check whether --enable-syscall was given. -if test "${enable_syscall+set}" = set; then : - enableval=$enable_syscall; if test "x$enable_syscall" = "xno" ; then - enable_syscall="0" -else - enable_syscall="1" -fi - -else - enable_syscall="1" - -fi - -if test "x$enable_syscall" = "x1" ; then - SAVED_CFLAGS="${CFLAGS}" - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5 -$as_echo_n "checking whether compiler supports -Werror... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-Werror" -else - CFLAGS="${CFLAGS} -Werror" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_cflags_appended=-Werror - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - je_cv_cflags_appended= - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether syscall(2) is compilable" >&5 -$as_echo_n "checking whether syscall(2) is compilable... " >&6; } -if ${je_cv_syscall+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include - -int -main () -{ - - syscall(SYS_write, 2, "hello", 5); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_syscall=yes -else - je_cv_syscall=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_syscall" >&5 -$as_echo "$je_cv_syscall" >&6; } - - CFLAGS="${SAVED_CFLAGS}" - if test "x$je_cv_syscall" = "xyes" ; then - $as_echo "#define JEMALLOC_USE_SYSCALL " >>confdefs.h - - fi -fi +LIBS="${SAVED_LIBS}" ac_fn_c_check_func "$LINENO" "secure_getenv" "ac_cv_func_secure_getenv" if test "x$ac_cv_func_secure_getenv" = xyes; then : @@ -8274,19 +7233,10 @@ else fi -if test "x${enable_lazy_lock}" = "x" ; then - if test "x${force_lazy_lock}" = "x1" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5 +if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5 $as_echo "Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&6; } - enable_lazy_lock="1" - else - enable_lazy_lock="0" - fi -fi -if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing no lazy-lock because thread creation monitoring is unimplemented" >&5 -$as_echo "Forcing no lazy-lock because thread creation monitoring is unimplemented" >&6; } - enable_lazy_lock="0" + enable_lazy_lock="1" fi if test "x$enable_lazy_lock" = "x1" ; then if test "x$abi" != "xpecoff" ; then @@ -8356,6 +7306,8 @@ fi fi $as_echo "#define JEMALLOC_LAZY_LOCK " >>confdefs.h +else + enable_lazy_lock="0" fi @@ -8592,7 +7544,9 @@ int main () { - madvise((void *)0, 0, 0); + { + madvise((void *)0, 0, 0); + } ; return 0; @@ -8612,118 +7566,6 @@ $as_echo "$je_cv_madvise" >&6; } if test "x${je_cv_madvise}" = "xyes" ; then $as_echo "#define JEMALLOC_HAVE_MADVISE " >>confdefs.h - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(..., MADV_FREE) is compilable" >&5 -$as_echo_n "checking whether madvise(..., MADV_FREE) is compilable... " >&6; } -if ${je_cv_madv_free+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include - -int -main () -{ - - madvise((void *)0, 0, MADV_FREE); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_madv_free=yes -else - je_cv_madv_free=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_madv_free" >&5 -$as_echo "$je_cv_madv_free" >&6; } - - if test "x${je_cv_madv_free}" = "xyes" ; then - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h - - fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(..., MADV_DONTNEED) is compilable" >&5 -$as_echo_n "checking whether madvise(..., MADV_DONTNEED) is compilable... " >&6; } -if ${je_cv_madv_dontneed+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include - -int -main () -{ - - madvise((void *)0, 0, MADV_DONTNEED); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_madv_dontneed=yes -else - je_cv_madv_dontneed=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_madv_dontneed" >&5 -$as_echo "$je_cv_madv_dontneed" >&6; } - - if test "x${je_cv_madv_dontneed}" = "xyes" ; then - $as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED " >>confdefs.h - - fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(..., MADV_[NO]HUGEPAGE) is compilable" >&5 -$as_echo_n "checking whether madvise(..., MADV_[NO]HUGEPAGE) is compilable... " >&6; } -if ${je_cv_thp+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include - -int -main () -{ - - madvise((void *)0, 0, MADV_HUGEPAGE); - madvise((void *)0, 0, MADV_NOHUGEPAGE); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_thp=yes -else - je_cv_thp=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_thp" >&5 -$as_echo "$je_cv_thp" >&6; } - - if test "x${je_cv_thp}" = "xyes" ; then - $as_echo "#define JEMALLOC_THP " >>confdefs.h - - fi fi @@ -8866,51 +7708,6 @@ fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin os_unfair_lock_*() is compilable" >&5 -$as_echo_n "checking whether Darwin os_unfair_lock_*() is compilable... " >&6; } -if ${je_cv_os_unfair_lock+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include - -int -main () -{ - - #if MAC_OS_X_VERSION_MIN_REQUIRED < 101200 - #error "os_unfair_lock is not supported" - #else - os_unfair_lock lock = OS_UNFAIR_LOCK_INIT; - os_unfair_lock_lock(&lock); - os_unfair_lock_unlock(&lock); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_os_unfair_lock=yes -else - je_cv_os_unfair_lock=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_os_unfair_lock" >&5 -$as_echo "$je_cv_os_unfair_lock" >&6; } - -if test "x${je_cv_os_unfair_lock}" = "xyes" ; then - $as_echo "#define JEMALLOC_OS_UNFAIR_LOCK " >>confdefs.h - -fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSSpin*() is compilable" >&5 $as_echo_n "checking whether Darwin OSSpin*() is compilable... " >&6; } if ${je_cv_osspin+:} false; then : @@ -10813,8 +9610,6 @@ $as_echo "CONFIG : ${CONFIG}" >&6; } $as_echo "CC : ${CC}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: CFLAGS : ${CFLAGS}" >&5 $as_echo "CFLAGS : ${CFLAGS}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: EXTRA_CFLAGS : ${EXTRA_CFLAGS}" >&5 -$as_echo "EXTRA_CFLAGS : ${EXTRA_CFLAGS}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: CPPFLAGS : ${CPPFLAGS}" >&5 $as_echo "CPPFLAGS : ${CPPFLAGS}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: LDFLAGS : ${LDFLAGS}" >&5 @@ -10823,6 +9618,8 @@ $as_echo "LDFLAGS : ${LDFLAGS}" >&6; } $as_echo "EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: LIBS : ${LIBS}" >&5 $as_echo "LIBS : ${LIBS}" >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: TESTLIBS : ${TESTLIBS}" >&5 +$as_echo "TESTLIBS : ${TESTLIBS}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: RPATH_EXTRA : ${RPATH_EXTRA}" >&5 $as_echo "RPATH_EXTRA : ${RPATH_EXTRA}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5 @@ -10865,8 +9662,6 @@ $as_echo "JEMALLOC_PRIVATE_NAMESPACE" >&6; } $as_echo " : ${JEMALLOC_PRIVATE_NAMESPACE}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: install_suffix : ${install_suffix}" >&5 $as_echo "install_suffix : ${install_suffix}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: malloc_conf : ${config_malloc_conf}" >&5 -$as_echo "malloc_conf : ${config_malloc_conf}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: autogen : ${enable_autogen}" >&5 $as_echo "autogen : ${enable_autogen}" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: cc-silence : ${enable_cc_silence}" >&5 diff --git a/deps/jemalloc/configure.ac b/deps/jemalloc/configure.ac index 9573c3020..7a1290e0d 100644 --- a/deps/jemalloc/configure.ac +++ b/deps/jemalloc/configure.ac @@ -1,8 +1,6 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT([Makefile.in]) -AC_CONFIG_AUX_DIR([build-aux]) - dnl ============================================================================ dnl Custom macro definitions. @@ -118,7 +116,6 @@ dnl If CFLAGS isn't defined, set CFLAGS to something reasonable. Otherwise, dnl just prevent autoconf from molesting CFLAGS. CFLAGS=$CFLAGS AC_PROG_CC - if test "x$GCC" != "xyes" ; then AC_CACHE_CHECK([whether compiler is MSVC], [je_cv_msvc], @@ -132,58 +129,15 @@ if test "x$GCC" != "xyes" ; then [je_cv_msvc=no])]) fi -dnl check if a cray prgenv wrapper compiler is being used -je_cv_cray_prgenv_wrapper="" -if test "x${PE_ENV}" != "x" ; then - case "${CC}" in - CC|cc) - je_cv_cray_prgenv_wrapper="yes" - ;; - *) - ;; - esac -fi - -AC_CACHE_CHECK([whether compiler is cray], - [je_cv_cray], - [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], - [ -#ifndef _CRAYC - int fail[-1]; -#endif -])], - [je_cv_cray=yes], - [je_cv_cray=no])]) - -if test "x${je_cv_cray}" = "xyes" ; then - AC_CACHE_CHECK([whether cray compiler version is 8.4], - [je_cv_cray_84], - [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], - [ -#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4) - int fail[-1]; -#endif -])], - [je_cv_cray_84=yes], - [je_cv_cray_84=no])]) -fi - if test "x$CFLAGS" = "x" ; then no_CFLAGS="yes" if test "x$GCC" = "xyes" ; then - JE_CFLAGS_APPEND([-std=gnu11]) - if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then + JE_CFLAGS_APPEND([-std=gnu99]) + if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT]) - else - JE_CFLAGS_APPEND([-std=gnu99]) - if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then - AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT]) - fi fi JE_CFLAGS_APPEND([-Wall]) JE_CFLAGS_APPEND([-Werror=declaration-after-statement]) - JE_CFLAGS_APPEND([-Wshorten-64-to-32]) - JE_CFLAGS_APPEND([-Wsign-compare]) JE_CFLAGS_APPEND([-pipe]) JE_CFLAGS_APPEND([-g3]) elif test "x$je_cv_msvc" = "xyes" ; then @@ -194,21 +148,11 @@ if test "x$CFLAGS" = "x" ; then JE_CFLAGS_APPEND([-FS]) CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat" fi - if test "x$je_cv_cray" = "xyes" ; then - dnl cray compiler 8.4 has an inlining bug - if test "x$je_cv_cray_84" = "xyes" ; then - JE_CFLAGS_APPEND([-hipa2]) - JE_CFLAGS_APPEND([-hnognu]) - fi - if test "x$enable_cc_silence" != "xno" ; then - dnl ignore unreachable code warning - JE_CFLAGS_APPEND([-hnomessage=128]) - dnl ignore redefinition of "malloc", "free", etc warning - JE_CFLAGS_APPEND([-hnomessage=1357]) - fi - fi fi -AC_SUBST([EXTRA_CFLAGS]) +dnl Append EXTRA_CFLAGS to CFLAGS, if defined. +if test "x$EXTRA_CFLAGS" != "x" ; then + JE_CFLAGS_APPEND([$EXTRA_CFLAGS]) +fi AC_PROG_CPP AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0]) @@ -220,18 +164,13 @@ if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99" fi -if test "x${je_cv_msvc}" = "xyes" ; then - LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN - AC_MSG_RESULT([Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit]) +AC_CHECK_SIZEOF([void *]) +if test "x${ac_cv_sizeof_void_p}" = "x8" ; then + LG_SIZEOF_PTR=3 +elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then + LG_SIZEOF_PTR=2 else - AC_CHECK_SIZEOF([void *]) - if test "x${ac_cv_sizeof_void_p}" = "x8" ; then - LG_SIZEOF_PTR=3 - elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then - LG_SIZEOF_PTR=2 - else - AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) - fi + AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) fi AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR]) @@ -255,16 +194,6 @@ else fi AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG]) -AC_CHECK_SIZEOF([long long]) -if test "x${ac_cv_sizeof_long_long}" = "x8" ; then - LG_SIZEOF_LONG_LONG=3 -elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then - LG_SIZEOF_LONG_LONG=2 -else - AC_MSG_ERROR([Unsupported long long size: ${ac_cv_sizeof_long_long}]) -fi -AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG]) - AC_CHECK_SIZEOF([intmax_t]) if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then LG_SIZEOF_INTMAX_T=4 @@ -282,22 +211,12 @@ dnl CPU-specific settings. CPU_SPINWAIT="" case "${host_cpu}" in i686|x86_64) - if test "x${je_cv_msvc}" = "xyes" ; then - AC_CACHE_VAL([je_cv_pause_msvc], - [JE_COMPILABLE([pause instruction MSVC], [], - [[_mm_pause(); return 0;]], - [je_cv_pause_msvc])]) - if test "x${je_cv_pause_msvc}" = "xyes" ; then - CPU_SPINWAIT='_mm_pause()' - fi - else - AC_CACHE_VAL([je_cv_pause], - [JE_COMPILABLE([pause instruction], [], - [[__asm__ volatile("pause"); return 0;]], - [je_cv_pause])]) - if test "x${je_cv_pause}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' - fi + AC_CACHE_VAL([je_cv_pause], + [JE_COMPILABLE([pause instruction], [], + [[__asm__ volatile("pause"); return 0;]], + [je_cv_pause])]) + if test "x${je_cv_pause}" = "xyes" ; then + CPU_SPINWAIT='__asm__ volatile("pause")' fi ;; powerpc) @@ -315,27 +234,17 @@ o="$ac_objext" a="a" exe="$ac_exeext" libprefix="lib" -link_whole_archive="0" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' CTARGET='-o $@' LDTARGET='-o $@' -TEST_LD_MODE= EXTRA_LDFLAGS= ARFLAGS='crus' AROUT=' $@' CC_MM=1 -if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then - TEST_LD_MODE='-dynamic' -fi - -if test "x${je_cv_cray}" = "xyes" ; then - CC_MM= -fi - AN_MAKEVAR([AR], [AC_PROG_AR]) AN_PROGRAM([ar], [AC_PROG_AR]) AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)]) @@ -348,12 +257,13 @@ dnl dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the dnl definitions need to be seen before any headers are included, which is a pain dnl to make happen otherwise. -CFLAGS="$CFLAGS" default_munmap="1" maps_coalesce="1" case "${host}" in *-*-darwin* | *-*-ios*) + CFLAGS="$CFLAGS" abi="macho" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" @@ -364,37 +274,33 @@ case "${host}" in sbrk_deprecated="1" ;; *-*-freebsd*) + CFLAGS="$CFLAGS" abi="elf" - AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) force_lazy_lock="1" ;; *-*-dragonfly*) + CFLAGS="$CFLAGS" abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; *-*-openbsd*) + CFLAGS="$CFLAGS" abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) force_tls="0" ;; *-*-bitrig*) + CFLAGS="$CFLAGS" abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; - *-*-linux-android) - dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE. + *-*-linux*) + CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) - AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ]) - AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) - AC_DEFINE([JEMALLOC_C11ATOMICS]) - force_tls="0" - default_munmap="0" - ;; - *-*-linux* | *-*-kfreebsd*) - dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE. - CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" - abi="elf" - AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) - AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ]) default_munmap="0" @@ -408,12 +314,15 @@ case "${host}" in #error aout #endif ]])], - [abi="elf"], + [CFLAGS="$CFLAGS"; abi="elf"], [abi="aout"]) AC_MSG_RESULT([$abi]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; *-*-solaris2*) + CFLAGS="$CFLAGS" abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH='-Wl,-R,$(1)' dnl Solaris needs this for sigwait(). CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" @@ -432,6 +341,7 @@ case "${host}" in *-*-mingw* | *-*-cygwin*) abi="pecoff" force_tls="0" + force_lazy_lock="1" maps_coalesce="0" RPATH="" so="dll" @@ -448,7 +358,6 @@ case "${host}" in else importlib="${so}" DSO_LDFLAGS="-shared" - link_whole_archive="1" fi a="lib" libprefix="" @@ -486,28 +395,17 @@ AC_SUBST([o]) AC_SUBST([a]) AC_SUBST([exe]) AC_SUBST([libprefix]) -AC_SUBST([link_whole_archive]) AC_SUBST([DSO_LDFLAGS]) AC_SUBST([EXTRA_LDFLAGS]) AC_SUBST([SOREV]) AC_SUBST([PIC_CFLAGS]) AC_SUBST([CTARGET]) AC_SUBST([LDTARGET]) -AC_SUBST([TEST_LD_MODE]) AC_SUBST([MKLIB]) AC_SUBST([ARFLAGS]) AC_SUBST([AROUT]) AC_SUBST([CC_MM]) -dnl Determine whether libm must be linked to use e.g. log(3). -AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])]) -if test "x$ac_cv_search_log" != "xnone required" ; then - LM="$ac_cv_search_log" -else - LM= -fi -AC_SUBST(LM) - JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], [], @@ -521,7 +419,6 @@ fi dnl Check for tls_model attribute support (clang 3.0 still lacks support). SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) -JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([tls_model attribute], [], [static __thread int __attribute__((tls_model("initial-exec"), unused)) foo; @@ -537,7 +434,6 @@ fi dnl Check for alloc_size attribute support. SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) -JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([alloc_size attribute], [#include ], [void *foo(size_t size) __attribute__((alloc_size(1)));], [je_cv_alloc_size]) @@ -548,7 +444,6 @@ fi dnl Check for format(gnu_printf, ...) attribute support. SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) -JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include ], [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));], [je_cv_format_gnu_printf]) @@ -559,7 +454,6 @@ fi dnl Check for format(printf, ...) attribute support. SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) -JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([format(printf, ...) attribute], [#include ], [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));], [je_cv_format_printf]) @@ -681,15 +575,6 @@ AC_ARG_WITH([install_suffix], install_suffix="$INSTALL_SUFFIX" AC_SUBST([install_suffix]) -dnl Specify default malloc_conf. -AC_ARG_WITH([malloc_conf], - [AS_HELP_STRING([--with-malloc-conf=], [config.malloc_conf options string])], - [JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf"], - [JEMALLOC_CONFIG_MALLOC_CONF=""] -) -config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF" -AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"]) - dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of dnl jemalloc_protos_jet.h easy. je_="je_" @@ -954,9 +839,9 @@ fi AC_MSG_CHECKING([configured backtracing method]) AC_MSG_RESULT([$backtrace_method]) if test "x$enable_prof" = "x1" ; then - dnl Heap profiling uses the log(3) function. - if test "x$LM" != "x" ; then - LIBS="$LIBS $LM" + if test "x$abi" != "xpecoff"; then + dnl Heap profiling uses the log(3) function. + LIBS="$LIBS -lm" fi AC_DEFINE([JEMALLOC_PROF], [ ]) @@ -1125,28 +1010,11 @@ if test "x$enable_cache_oblivious" = "x1" ; then fi AC_SUBST([enable_cache_oblivious]) - - -JE_COMPILABLE([a program using __builtin_unreachable], [ -void foo (void) { - __builtin_unreachable(); -} -], [ - { - foo(); - } -], [je_cv_gcc_builtin_unreachable]) -if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then - AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable]) -else - AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort]) -fi - dnl ============================================================================ dnl Check for __builtin_ffsl(), then ffsl(3), and fail if neither are found. dnl One of those two functions should (theoretically) exist on all platforms dnl that jemalloc currently has a chance of functioning on without modification. -dnl We additionally assume ffs[ll]() or __builtin_ffs[ll]() are defined if +dnl We additionally assume ffs() or __builtin_ffs() are defined if dnl ffsl() or __builtin_ffsl() are defined, respectively. JE_COMPILABLE([a program using __builtin_ffsl], [ #include @@ -1159,7 +1027,6 @@ JE_COMPILABLE([a program using __builtin_ffsl], [ } ], [je_cv_gcc_builtin_ffsl]) if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then - AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll]) AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) else @@ -1174,7 +1041,6 @@ else } ], [je_cv_function_ffsl]) if test "x${je_cv_function_ffsl}" = "xyes" ; then - AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll]) AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) else @@ -1234,7 +1100,7 @@ if test "x$LG_PAGE" = "xdetect"; then if (f == NULL) { return 1; } - fprintf(f, "%d", result); + fprintf(f, "%d\n", result); fclose(f); return 0; @@ -1267,36 +1133,27 @@ dnl ============================================================================ dnl jemalloc configuration. dnl -AC_ARG_WITH([version], - [AS_HELP_STRING([--with-version=..--g], - [Version string])], - [ - echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null - if test $? -ne 0 ; then - AC_MSG_ERROR([${with_version} does not match ..--g]) +dnl Set VERSION if source directory is inside a git repository. +if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then + dnl Pattern globs aren't powerful enough to match both single- and + dnl double-digit version numbers, so iterate over patterns to support up to + dnl version 99.99.99 without any accidental matches. + rm -f "${objroot}VERSION" + for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \ + '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \ + '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \ + '[0-9][0-9].[0-9][0-9].[0-9]' \ + '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do + if test ! -e "${objroot}VERSION" ; then + (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null + if test $? -eq 0 ; then + mv "${objroot}VERSION.tmp" "${objroot}VERSION" + break + fi fi - echo "$with_version" > "${objroot}VERSION" - ], [ - dnl Set VERSION if source directory is inside a git repository. - if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then - dnl Pattern globs aren't powerful enough to match both single- and - dnl double-digit version numbers, so iterate over patterns to support up - dnl to version 99.99.99 without any accidental matches. - for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \ - '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \ - '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \ - '[0-9][0-9].[0-9][0-9].[0-9]' \ - '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do - (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null - if test $? -eq 0 ; then - mv "${objroot}VERSION.tmp" "${objroot}VERSION" - break - fi - done - fi - rm -f "${objroot}VERSION.tmp" - ]) - + done +fi +rm -f "${objroot}VERSION.tmp" if test ! -e "${objroot}VERSION" ; then if test ! -e "${srcroot}VERSION" ; then AC_MSG_RESULT( @@ -1329,101 +1186,17 @@ if test "x$abi" != "xpecoff" ; then AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], [AC_SEARCH_LIBS([pthread_create], , , AC_MSG_ERROR([libpthread is missing]))]) - JE_COMPILABLE([pthread_atfork(3)], [ -#include -], [ - pthread_atfork((void *)0, (void *)0, (void *)0); -], [je_cv_pthread_atfork]) - if test "x${je_cv_pthread_atfork}" = "xyes" ; then - AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ]) - fi fi CPPFLAGS="$CPPFLAGS -D_REENTRANT" -dnl Check whether clock_gettime(2) is in libc or librt. -AC_SEARCH_LIBS([clock_gettime], [rt]) - -dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with -dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc -if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then - if test "$ac_cv_search_clock_gettime" != "-lrt"; then - SAVED_CFLAGS="${CFLAGS}" - - unset ac_cv_search_clock_gettime - JE_CFLAGS_APPEND([-dynamic]) - AC_SEARCH_LIBS([clock_gettime], [rt]) - - CFLAGS="${SAVED_CFLAGS}" - fi -fi - -dnl check for CLOCK_MONOTONIC_COARSE (Linux-specific). -JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [ -#include -], [ - struct timespec ts; - - clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); -], [je_cv_clock_monotonic_coarse]) -if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then - AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE]) -fi - -dnl check for CLOCK_MONOTONIC. -JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [ -#include -#include -], [ - struct timespec ts; - - clock_gettime(CLOCK_MONOTONIC, &ts); -#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0 -# error _POSIX_MONOTONIC_CLOCK missing/invalid -#endif -], [je_cv_clock_monotonic]) -if test "x${je_cv_clock_monotonic}" = "xyes" ; then - AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC]) -fi - -dnl Check for mach_absolute_time(). -JE_COMPILABLE([mach_absolute_time()], [ -#include -], [ - mach_absolute_time(); -], [je_cv_mach_absolute_time]) -if test "x${je_cv_mach_absolute_time}" = "xyes" ; then - AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME]) -fi - -dnl Use syscall(2) (if available) by default. -AC_ARG_ENABLE([syscall], - [AS_HELP_STRING([--disable-syscall], [Disable use of syscall(2)])], -[if test "x$enable_syscall" = "xno" ; then - enable_syscall="0" -else - enable_syscall="1" -fi -], -[enable_syscall="1"] -) -if test "x$enable_syscall" = "x1" ; then - dnl Check if syscall(2) is usable. Treat warnings as errors, so that e.g. OS - dnl X 10.12's deprecation warning prevents use. - SAVED_CFLAGS="${CFLAGS}" - JE_CFLAGS_APPEND([-Werror]) - JE_COMPILABLE([syscall(2)], [ -#include -#include -], [ - syscall(SYS_write, 2, "hello", 5); -], - [je_cv_syscall]) - CFLAGS="${SAVED_CFLAGS}" - if test "x$je_cv_syscall" = "xyes" ; then - AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ]) - fi -fi +dnl Check whether clock_gettime(2) is in libc or librt. This function is only +dnl used in test code, so save the result to TESTLIBS to avoid poluting LIBS. +SAVED_LIBS="${LIBS}" +LIBS= +AC_SEARCH_LIBS([clock_gettime], [rt], [TESTLIBS="${LIBS}"]) +AC_SUBST([TESTLIBS]) +LIBS="${SAVED_LIBS}" dnl Check if the GNU-specific secure_getenv function exists. AC_CHECK_FUNC([secure_getenv], @@ -1479,17 +1252,9 @@ fi ], [enable_lazy_lock=""] ) -if test "x${enable_lazy_lock}" = "x" ; then - if test "x${force_lazy_lock}" = "x1" ; then - AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues]) - enable_lazy_lock="1" - else - enable_lazy_lock="0" - fi -fi -if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then - AC_MSG_RESULT([Forcing no lazy-lock because thread creation monitoring is unimplemented]) - enable_lazy_lock="0" +if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then + AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues]) + enable_lazy_lock="1" fi if test "x$enable_lazy_lock" = "x1" ; then if test "x$abi" != "xpecoff" ; then @@ -1500,6 +1265,8 @@ if test "x$enable_lazy_lock" = "x1" ; then ]) fi AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) +else + enable_lazy_lock="0" fi AC_SUBST([enable_lazy_lock]) @@ -1622,41 +1389,12 @@ dnl Check for madvise(2). JE_COMPILABLE([madvise(2)], [ #include ], [ - madvise((void *)0, 0, 0); + { + madvise((void *)0, 0, 0); + } ], [je_cv_madvise]) if test "x${je_cv_madvise}" = "xyes" ; then AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ]) - - dnl Check for madvise(..., MADV_FREE). - JE_COMPILABLE([madvise(..., MADV_FREE)], [ -#include -], [ - madvise((void *)0, 0, MADV_FREE); -], [je_cv_madv_free]) - if test "x${je_cv_madv_free}" = "xyes" ; then - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) - fi - - dnl Check for madvise(..., MADV_DONTNEED). - JE_COMPILABLE([madvise(..., MADV_DONTNEED)], [ -#include -], [ - madvise((void *)0, 0, MADV_DONTNEED); -], [je_cv_madv_dontneed]) - if test "x${je_cv_madv_dontneed}" = "xyes" ; then - AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) - fi - - dnl Check for madvise(..., MADV_[NO]HUGEPAGE). - JE_COMPILABLE([madvise(..., MADV_[[NO]]HUGEPAGE)], [ -#include -], [ - madvise((void *)0, 0, MADV_HUGEPAGE); - madvise((void *)0, 0, MADV_NOHUGEPAGE); -], [je_cv_thp]) - if test "x${je_cv_thp}" = "xyes" ; then - AC_DEFINE([JEMALLOC_THP], [ ]) - fi fi dnl ============================================================================ @@ -1716,25 +1454,6 @@ if test "x${je_cv_builtin_clz}" = "xyes" ; then AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ]) fi -dnl ============================================================================ -dnl Check for os_unfair_lock operations as provided on Darwin. - -JE_COMPILABLE([Darwin os_unfair_lock_*()], [ -#include -#include -], [ - #if MAC_OS_X_VERSION_MIN_REQUIRED < 101200 - #error "os_unfair_lock is not supported" - #else - os_unfair_lock lock = OS_UNFAIR_LOCK_INIT; - os_unfair_lock_lock(&lock); - os_unfair_lock_unlock(&lock); - #endif -], [je_cv_os_unfair_lock]) -if test "x${je_cv_os_unfair_lock}" = "xyes" ; then - AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ]) -fi - dnl ============================================================================ dnl Check for spinlock(3) operations as provided on Darwin. @@ -1979,11 +1698,11 @@ AC_MSG_RESULT([]) AC_MSG_RESULT([CONFIG : ${CONFIG}]) AC_MSG_RESULT([CC : ${CC}]) AC_MSG_RESULT([CFLAGS : ${CFLAGS}]) -AC_MSG_RESULT([EXTRA_CFLAGS : ${EXTRA_CFLAGS}]) AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) AC_MSG_RESULT([LDFLAGS : ${LDFLAGS}]) AC_MSG_RESULT([EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}]) AC_MSG_RESULT([LIBS : ${LIBS}]) +AC_MSG_RESULT([TESTLIBS : ${TESTLIBS}]) AC_MSG_RESULT([RPATH_EXTRA : ${RPATH_EXTRA}]) AC_MSG_RESULT([]) AC_MSG_RESULT([XSLTPROC : ${XSLTPROC}]) @@ -2005,7 +1724,6 @@ AC_MSG_RESULT([JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}]) AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE]) AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}]) AC_MSG_RESULT([install_suffix : ${install_suffix}]) -AC_MSG_RESULT([malloc_conf : ${config_malloc_conf}]) AC_MSG_RESULT([autogen : ${enable_autogen}]) AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) AC_MSG_RESULT([debug : ${enable_debug}]) diff --git a/deps/jemalloc/doc/html.xsl.in b/deps/jemalloc/doc/html.xsl.in index ec4fa6552..a91d9746f 100644 --- a/deps/jemalloc/doc/html.xsl.in +++ b/deps/jemalloc/doc/html.xsl.in @@ -1,5 +1,4 @@ - diff --git a/deps/jemalloc/doc/jemalloc.3 b/deps/jemalloc/doc/jemalloc.3 index 3709f6692..2e6b2c0e8 100644 --- a/deps/jemalloc/doc/jemalloc.3 +++ b/deps/jemalloc/doc/jemalloc.3 @@ -1,13 +1,13 @@ '\" t .\" Title: JEMALLOC .\" Author: Jason Evans -.\" Generator: DocBook XSL Stylesheets v1.79.1 -.\" Date: 12/03/2016 +.\" Generator: DocBook XSL Stylesheets v1.78.1 +.\" Date: 09/24/2015 .\" Manual: User Manual -.\" Source: jemalloc 4.4.0-0-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc +.\" Source: jemalloc 4.0.3-0-ge9192eacf8935e29fc62fddc2701f7942b1cc02c .\" Language: English .\" -.TH "JEMALLOC" "3" "12/03/2016" "jemalloc 4.4.0-0-gf1f76357313e" "User Manual" +.TH "JEMALLOC" "3" "09/24/2015" "jemalloc 4.0.3-0-ge9192eacf893" "User Manual" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- @@ -31,7 +31,7 @@ jemalloc \- general purpose memory allocation functions .SH "LIBRARY" .PP -This manual describes jemalloc 4\&.4\&.0\-0\-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc\&. More information can be found at the +This manual describes jemalloc 4\&.0\&.3\-0\-ge9192eacf8935e29fc62fddc2701f7942b1cc02c\&. More information can be found at the \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&. .SH "SYNOPSIS" .sp @@ -86,26 +86,26 @@ const char *\fImalloc_conf\fR; .SS "Standard API" .PP The -malloc() +\fBmalloc\fR\fB\fR function allocates \fIsize\fR bytes of uninitialized memory\&. The allocated space is suitably aligned (after possible pointer coercion) for storage of any type of object\&. .PP The -calloc() +\fBcalloc\fR\fB\fR function allocates space for \fInumber\fR objects, each \fIsize\fR bytes in length\&. The result is identical to calling -malloc() +\fBmalloc\fR\fB\fR with an argument of \fInumber\fR * \fIsize\fR, with the exception that the allocated memory is explicitly initialized to zero bytes\&. .PP The -posix_memalign() +\fBposix_memalign\fR\fB\fR function allocates \fIsize\fR bytes of memory such that the allocation\*(Aqs base address is a multiple of @@ -116,7 +116,7 @@ must be a power of 2 at least as large as sizeof(\fBvoid *\fR)\&. .PP The -aligned_alloc() +\fBaligned_alloc\fR\fB\fR function allocates \fIsize\fR bytes of memory such that the allocation\*(Aqs base address is a multiple of @@ -128,7 +128,7 @@ is not an integral multiple of \fIalignment\fR\&. .PP The -realloc() +\fBrealloc\fR\fB\fR function changes the size of the previously allocated memory referenced by \fIptr\fR to @@ -136,19 +136,19 @@ to bytes\&. The contents of the memory are unchanged up to the lesser of the new and old sizes\&. If the new size is larger, the contents of the newly allocated portion of the memory are undefined\&. Upon success, the memory referenced by \fIptr\fR is freed and a pointer to the newly allocated memory is returned\&. Note that -realloc() +\fBrealloc\fR\fB\fR may move the memory allocation, resulting in a different return value than \fIptr\fR\&. If \fIptr\fR is \fBNULL\fR, the -realloc() +\fBrealloc\fR\fB\fR function behaves identically to -malloc() +\fBmalloc\fR\fB\fR for the specified size\&. .PP The -free() +\fBfree\fR\fB\fR function causes the allocated memory referenced by \fIptr\fR to be made available for future allocations\&. If @@ -158,13 +158,13 @@ is .SS "Non\-standard API" .PP The -mallocx(), -rallocx(), -xallocx(), -sallocx(), -dallocx(), -sdallocx(), and -nallocx() +\fBmallocx\fR\fB\fR, +\fBrallocx\fR\fB\fR, +\fBxallocx\fR\fB\fR, +\fBsallocx\fR\fB\fR, +\fBdallocx\fR\fB\fR, +\fBsdallocx\fR\fB\fR, and +\fBnallocx\fR\fB\fR functions all have a \fIflags\fR argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following: @@ -196,7 +196,7 @@ Initialize newly allocated memory to contain zero bytes\&. In the growing reallo .RS 4 Use the thread\-specific cache (tcache) specified by the identifier \fItc\fR, which must have been acquired via the -tcache\&.create +"tcache\&.create" mallctl\&. This macro does not validate that \fItc\fR specifies a valid identifier\&. @@ -223,16 +223,16 @@ specifies an arena index in the valid range\&. .RE .PP The -mallocx() +\fBmallocx\fR\fB\fR function allocates at least \fIsize\fR bytes of memory, and returns a pointer to the base address of the allocation\&. Behavior is undefined if \fIsize\fR is -\fB0\fR\&. +\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. .PP The -rallocx() +\fBrallocx\fR\fB\fR function resizes the allocation at \fIptr\fR to be at least @@ -240,10 +240,10 @@ to be at least bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location\&. Behavior is undefined if \fIsize\fR is -\fB0\fR\&. +\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. .PP The -xallocx() +\fBxallocx\fR\fB\fR function resizes the allocation at \fIptr\fR in place to be at least @@ -259,42 +259,40 @@ is (\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&. .PP The -sallocx() +\fBsallocx\fR\fB\fR function returns the real size of the allocation at \fIptr\fR\&. .PP The -dallocx() +\fBdallocx\fR\fB\fR function causes the memory referenced by \fIptr\fR to be made available for future allocations\&. .PP The -sdallocx() +\fBsdallocx\fR\fB\fR function is an extension of -dallocx() +\fBdallocx\fR\fB\fR with a \fIsize\fR parameter to allow the caller to pass in the allocation size as an optimization\&. The minimum valid input size is the original requested size of the allocation, and the maximum valid input size is the corresponding value returned by -nallocx() +\fBnallocx\fR\fB\fR or -sallocx()\&. +\fBsallocx\fR\fB\fR\&. .PP The -nallocx() +\fBnallocx\fR\fB\fR function allocates no memory, but it performs the same size computation as the -mallocx() +\fBmallocx\fR\fB\fR function, and returns the real size of the allocation that would result from the equivalent -mallocx() -function call, or -\fB0\fR -if the inputs exceed the maximum supported size class and/or alignment\&. Behavior is undefined if +\fBmallocx\fR\fB\fR +function call\&. Behavior is undefined if \fIsize\fR is -\fB0\fR\&. +\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. .PP The -mallctl() +\fBmallctl\fR\fB\fR function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions\&. The period\-separated \fIname\fR argument specifies a location in a tree\-structured namespace; see the @@ -313,12 +311,10 @@ and \fB0\fR\&. .PP The -mallctlnametomib() -function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name to a -\(lqManagement Information Base\(rq -(MIB) that can be passed repeatedly to -mallctlbymib()\&. Upon successful return from -mallctlnametomib(), +\fBmallctlnametomib\fR\fB\fR +function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name to a \(lqManagement Information Base\(rq (MIB) that can be passed repeatedly to +\fBmallctlbymib\fR\fB\fR\&. Upon successful return from +\fBmallctlnametomib\fR\fB\fR, \fImibp\fR contains an array of \fI*miblenp\fR @@ -330,7 +326,7 @@ and the input value of \fI*miblenp\fR\&. Thus it is possible to pass a \fI*miblenp\fR that is smaller than the number of period\-separated name components, which results in a partial MIB that can be used as the basis for constructing a complete MIB\&. For name components that are integers (e\&.g\&. the 2 in -arenas\&.bin\&.2\&.size), the corresponding MIB component will always be that integer\&. Therefore, it is legitimate to construct code like the following: +"arenas\&.bin\&.2\&.size"), the corresponding MIB component will always be that integer\&. Therefore, it is legitimate to construct code like the following: .sp .if n \{\ .RS 4 @@ -350,7 +346,7 @@ for (i = 0; i < nbins; i++) { mib[2] = i; len = sizeof(bin_size); - mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0); + mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0); /* Do something with bin_size\&.\&.\&. */ } .fi @@ -359,87 +355,67 @@ for (i = 0; i < nbins; i++) { .\} .PP The -malloc_stats_print() -function writes summary statistics via the +\fBmalloc_stats_print\fR\fB\fR +function writes human\-readable summary statistics via the \fIwrite_cb\fR callback function pointer and \fIcbopaque\fR data passed to \fIwrite_cb\fR, or -malloc_message() +\fBmalloc_message\fR\fB\fR if \fIwrite_cb\fR is -\fBNULL\fR\&. The statistics are presented in human\-readable form unless -\(lqJ\(rq -is specified as a character within the -\fIopts\fR -string, in which case the statistics are presented in -\m[blue]\fBJSON format\fR\m[]\&\s-2\u[2]\d\s+2\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying -\(lqg\(rq -as a character within the +\fBNULL\fR\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying "g" as a character within the \fIopts\fR string\&. Note that -malloc_message() +\fBmalloc_message\fR\fB\fR uses the -mallctl*() +\fBmallctl*\fR\fB\fR functions internally, so inconsistent statistics can be reported if multiple threads use these functions simultaneously\&. If \fB\-\-enable\-stats\fR -is specified during configuration, -\(lqm\(rq -and -\(lqa\(rq -can be specified to omit merged arena and per arena statistics, respectively; -\(lqb\(rq, -\(lql\(rq, and -\(lqh\(rq -can be specified to omit per size class statistics for bins, large objects, and huge objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&. +is specified during configuration, \(lqm\(rq and \(lqa\(rq can be specified to omit merged arena and per arena statistics, respectively; \(lqb\(rq, \(lql\(rq, and \(lqh\(rq can be specified to omit per size class statistics for bins, large objects, and huge objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&. .PP The -malloc_usable_size() +\fBmalloc_usable_size\fR\fB\fR function returns the usable size of the allocation pointed to by \fIptr\fR\&. The return value may be larger than the size that was requested during allocation\&. The -malloc_usable_size() +\fBmalloc_usable_size\fR\fB\fR function is not a mechanism for in\-place -realloc(); rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by -malloc_usable_size() +\fBrealloc\fR\fB\fR; rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by +\fBmalloc_usable_size\fR\fB\fR should not be depended on, since such behavior is entirely implementation\-dependent\&. .SH "TUNING" .PP Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various options that can be specified at compile\- or run\-time\&. .PP -The string specified via -\fB\-\-with\-malloc\-conf\fR, the string pointed to by the global variable -\fImalloc_conf\fR, the -\(lqname\(rq -of the file referenced by the symbolic link named +The string pointed to by the global variable +\fImalloc_conf\fR, the \(lqname\(rq of the file referenced by the symbolic link named /etc/malloc\&.conf, and the value of the environment variable \fBMALLOC_CONF\fR, will be interpreted, in that order, from left to right as options\&. Note that \fImalloc_conf\fR may be read before -main() +\fBmain\fR\fB\fR is entered, so the declaration of \fImalloc_conf\fR should specify an initializer that contains the final value to be read by jemalloc\&. -\fB\-\-with\-malloc\-conf\fR -and \fImalloc_conf\fR -are compile\-time mechanisms, whereas +is a compile\-time setting, whereas /etc/malloc\&.conf and \fBMALLOC_CONF\fR can be safely set any time prior to program invocation\&. .PP An options string is a comma\-separated list of option:value pairs\&. There is one key corresponding to each -opt\&.* +"opt\&.*" mallctl (see the MALLCTL NAMESPACE section for options documentation)\&. For example, abort:true,narenas:1 sets the -opt\&.abort +"opt\&.abort" and -opt\&.narenas +"opt\&.narenas" options\&. Some options have boolean values (true/false), others have integer values (base 8, 10, or 16, depending on prefix), and yet others have raw string values\&. .SH "IMPLEMENTATION NOTES" .PP @@ -460,26 +436,29 @@ In addition to multiple arenas, unless \fB\-\-disable\-tcache\fR is specified during configuration, this allocator supports thread\-specific caching for small and large objects, in order to make it possible to completely avoid synchronization for most allocation requests\&. Such caching allows very fast allocation in the common case, but it increases memory usage and fragmentation, since a bounded number of objects can remain allocated in each thread cache\&. .PP -Memory is conceptually broken into equal\-sized chunks, where the chunk size is a power of two that is greater than the page size\&. Chunks are always aligned to multiples of the chunk size\&. This alignment makes it possible to find metadata for user objects very quickly\&. User objects are broken into three categories according to size: small, large, and huge\&. Multiple small and large objects can reside within a single chunk, whereas huge objects each have one or more chunks backing them\&. Each chunk that contains small and/or large objects tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&. +Memory is conceptually broken into equal\-sized chunks, where the chunk size is a power of two that is greater than the page size\&. Chunks are always aligned to multiples of the chunk size\&. This alignment makes it possible to find metadata for user objects very quickly\&. +.PP +User objects are broken into three categories according to size: small, large, and huge\&. Small and large objects are managed entirely by arenas; huge objects are additionally aggregated in a single data structure that is shared by all threads\&. Huge objects are typically used by applications infrequently enough that this single data structure is not a scalability issue\&. +.PP +Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&. .PP Small objects are managed in groups by page runs\&. Each run maintains a bitmap to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least sizeof(\fBdouble\fR)\&. All other object size classes are multiples of the quantum, spaced such that there are four size classes for each doubling in size, which limits internal fragmentation to approximately 20% for all but the smallest size classes\&. Small size classes are smaller than four times the page size, large size classes are smaller than the chunk size (see the -opt\&.lg_chunk -option), and huge size classes extend from the chunk size up to the largest size class that does not exceed -\fBPTRDIFF_MAX\fR\&. +"opt\&.lg_chunk" +option), and huge size classes extend from the chunk size up to one size class less than the full address space size\&. .PP Allocations are packed tightly together, which can be an issue for multi\-threaded applications\&. If you need to assure that allocations do not suffer from cacheline sharing, round your allocation requests up to the nearest multiple of the cacheline size, or specify cacheline alignment when allocating\&. .PP The -realloc(), -rallocx(), and -xallocx() +\fBrealloc\fR\fB\fR, +\fBrallocx\fR\fB\fR, and +\fBxallocx\fR\fB\fR functions may resize allocations without moving them under limited circumstances\&. Unlike the -*allocx() +\fB*allocx\fR\fB\fR API, the standard API does not officially round up the usable size of an allocation to the nearest size class, so technically it is necessary to call -realloc() +\fBrealloc\fR\fB\fR to grow e\&.g\&. a 9\-byte allocation to 16 bytes, or shrink a 16\-byte allocation to 9 bytes\&. Growth and shrinkage trivially succeeds in place as long as the pre\-size and post\-size both round up to the same size class\&. No other API guarantees are made regarding in\-place resizing, but the current implementation also tries to resize large and huge allocations in place, as long as the pre\-size and post\-size are both large or both huge\&. In such cases shrinkage always succeeds for large size classes, but for huge size classes the chunk allocator must support splitting (see -arena\&.\&.chunk_hooks)\&. Growth only succeeds if the trailing memory is currently available, and additionally for huge size classes the chunk allocator must support merging\&. +"arena\&.\&.chunk_hooks")\&. Growth only succeeds if the trailing memory is currently available, and additionally for huge size classes the chunk allocator must support merging\&. .PP Assuming 2 MiB chunks, 4 KiB pages, and a 16\-byte quantum on a 64\-bit system, the size classes in each category are as shown in Table 1\&. @@ -523,8 +502,6 @@ l r l ^ r l ^ r l ^ r l -^ r l -^ r l ^ r l. T{ Small @@ -652,22 +629,12 @@ T} T}:T{ \&.\&.\&. T} -:T{ -512 PiB -T}:T{ -[2560 PiB, 3 EiB, 3584 PiB, 4 EiB] -T} -:T{ -1 EiB -T}:T{ -[5 EiB, 6 EiB, 7 EiB] -T} .TE .sp 1 .SH "MALLCTL NAMESPACE" .PP The following names are defined in the namespace accessible via the -mallctl*() +\fBmallctl*\fR\fB\fR functions\&. Value types are specified in parentheses, their readable/writable statuses are encoded as rw, r\-, @@ -677,118 +644,111 @@ r\-, or indicates an integer component, where the integer varies from 0 to some upper value that must be determined via introspection\&. In the case of -stats\&.arenas\&.\&.*, +"stats\&.arenas\&.\&.*", equal to -arenas\&.narenas +"arenas\&.narenas" can be used to access the summation of statistics from all arenas\&. Take special note of the -epoch +"epoch" mallctl, which controls refreshing of cached dynamic statistics\&. .PP -version (\fBconst char *\fR) r\- +"version" (\fBconst char *\fR) r\- .RS 4 Return the jemalloc version string\&. .RE .PP -epoch (\fBuint64_t\fR) rw +"epoch" (\fBuint64_t\fR) rw .RS 4 If a value is passed in, refresh the data from which the -mallctl*() +\fBmallctl*\fR\fB\fR functions report values, and increment the epoch\&. Return the current epoch\&. This is useful for detecting whether another thread caused a refresh\&. .RE .PP -config\&.cache_oblivious (\fBbool\fR) r\- +"config\&.cache_oblivious" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-cache\-oblivious\fR was specified during build configuration\&. .RE .PP -config\&.debug (\fBbool\fR) r\- +"config\&.debug" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-debug\fR was specified during build configuration\&. .RE .PP -config\&.fill (\fBbool\fR) r\- +"config\&.fill" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-fill\fR was specified during build configuration\&. .RE .PP -config\&.lazy_lock (\fBbool\fR) r\- +"config\&.lazy_lock" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-lazy\-lock\fR was specified during build configuration\&. .RE .PP -config\&.malloc_conf (\fBconst char *\fR) r\- -.RS 4 -Embedded configure\-time\-specified run\-time options string, empty unless -\fB\-\-with\-malloc\-conf\fR -was specified during build configuration\&. -.RE -.PP -config\&.munmap (\fBbool\fR) r\- +"config\&.munmap" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-munmap\fR was specified during build configuration\&. .RE .PP -config\&.prof (\fBbool\fR) r\- +"config\&.prof" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-prof\fR was specified during build configuration\&. .RE .PP -config\&.prof_libgcc (\fBbool\fR) r\- +"config\&.prof_libgcc" (\fBbool\fR) r\- .RS 4 \fB\-\-disable\-prof\-libgcc\fR was not specified during build configuration\&. .RE .PP -config\&.prof_libunwind (\fBbool\fR) r\- +"config\&.prof_libunwind" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-prof\-libunwind\fR was specified during build configuration\&. .RE .PP -config\&.stats (\fBbool\fR) r\- +"config\&.stats" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-stats\fR was specified during build configuration\&. .RE .PP -config\&.tcache (\fBbool\fR) r\- +"config\&.tcache" (\fBbool\fR) r\- .RS 4 \fB\-\-disable\-tcache\fR was not specified during build configuration\&. .RE .PP -config\&.tls (\fBbool\fR) r\- +"config\&.tls" (\fBbool\fR) r\- .RS 4 \fB\-\-disable\-tls\fR was not specified during build configuration\&. .RE .PP -config\&.utrace (\fBbool\fR) r\- +"config\&.utrace" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-utrace\fR was specified during build configuration\&. .RE .PP -config\&.valgrind (\fBbool\fR) r\- +"config\&.valgrind" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-valgrind\fR was specified during build configuration\&. .RE .PP -config\&.xmalloc (\fBbool\fR) r\- +"config\&.xmalloc" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-xmalloc\fR was specified during build configuration\&. .RE .PP -opt\&.abort (\fBbool\fR) r\- +"opt\&.abort" (\fBbool\fR) r\- .RS 4 Abort\-on\-warning enabled/disabled\&. If true, most warnings are fatal\&. The process will call \fBabort\fR(3) @@ -797,132 +757,97 @@ in these cases\&. This option is disabled by default unless is specified during configuration, in which case it is enabled by default\&. .RE .PP -opt\&.dss (\fBconst char *\fR) r\- +"opt\&.dss" (\fBconst char *\fR) r\- .RS 4 dss (\fBsbrk\fR(2)) allocation precedence as related to \fBmmap\fR(2) allocation\&. The following settings are supported if \fBsbrk\fR(2) -is supported by the operating system: -\(lqdisabled\(rq, -\(lqprimary\(rq, and -\(lqsecondary\(rq; otherwise only -\(lqdisabled\(rq -is supported\&. The default is -\(lqsecondary\(rq -if +is supported by the operating system: \(lqdisabled\(rq, \(lqprimary\(rq, and \(lqsecondary\(rq; otherwise only \(lqdisabled\(rq is supported\&. The default is \(lqsecondary\(rq if \fBsbrk\fR(2) -is supported by the operating system; -\(lqdisabled\(rq -otherwise\&. +is supported by the operating system; \(lqdisabled\(rq otherwise\&. .RE .PP -opt\&.lg_chunk (\fBsize_t\fR) r\- +"opt\&.lg_chunk" (\fBsize_t\fR) r\- .RS 4 Virtual memory chunk size (log base 2)\&. If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size\&. The default chunk size is 2 MiB (2^21)\&. .RE .PP -opt\&.narenas (\fBunsigned\fR) r\- +"opt\&.narenas" (\fBsize_t\fR) r\- .RS 4 Maximum number of arenas to use for automatic multiplexing of threads and arenas\&. The default is four times the number of CPUs, or one if there is a single CPU\&. .RE .PP -opt\&.purge (\fBconst char *\fR) r\- -.RS 4 -Purge mode is \(lqratio\(rq (default) or \(lqdecay\(rq\&. See -opt\&.lg_dirty_mult -for details of the ratio mode\&. See -opt\&.decay_time -for details of the decay mode\&. -.RE -.PP -opt\&.lg_dirty_mult (\fBssize_t\fR) r\- +"opt\&.lg_dirty_mult" (\fBssize_t\fR) r\- .RS 4 Per\-arena minimum ratio (log base 2) of active to dirty pages\&. Some dirty unused pages may be allowed to accumulate, within the limit set by the ratio (or one chunk worth of dirty pages, whichever is greater), before informing the kernel about some of those pages via \fBmadvise\fR(2) or a similar system call\&. This provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused\&. The default minimum ratio is 8:1 (2^3:1); an option value of \-1 will disable dirty page purging\&. See -arenas\&.lg_dirty_mult +"arenas\&.lg_dirty_mult" and -arena\&.\&.lg_dirty_mult +"arena\&.\&.lg_dirty_mult" for related dynamic control options\&. .RE .PP -opt\&.decay_time (\fBssize_t\fR) r\- -.RS 4 -Approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. The pages are incrementally purged according to a sigmoidal decay curve that starts and ends with zero purge rate\&. A decay time of 0 causes all unused dirty pages to be purged immediately upon creation\&. A decay time of \-1 disables purging\&. The default decay time is 10 seconds\&. See -arenas\&.decay_time -and -arena\&.\&.decay_time -for related dynamic control options\&. -.RE -.PP -opt\&.stats_print (\fBbool\fR) r\- +"opt\&.stats_print" (\fBbool\fR) r\- .RS 4 Enable/disable statistics printing at exit\&. If enabled, the -malloc_stats_print() +\fBmalloc_stats_print\fR\fB\fR function is called at program exit via an \fBatexit\fR(3) function\&. If \fB\-\-enable\-stats\fR is specified during configuration, this has the potential to cause deadlock for a multi\-threaded process that exits while one or more threads are executing in the memory allocation functions\&. Furthermore, -atexit() +\fBatexit\fR\fB\fR may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls -atexit(), so this option is not universally usable (though the application can register its own -atexit() +\fBatexit\fR\fB\fR, so this option is not univerally usable (though the application can register its own +\fBatexit\fR\fB\fR function with equivalent functionality)\&. Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development\&. This option is disabled by default\&. .RE .PP -opt\&.junk (\fBconst char *\fR) r\- [\fB\-\-enable\-fill\fR] +"opt\&.junk" (\fBconst char *\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 -Junk filling\&. If set to -\(lqalloc\(rq, each byte of uninitialized allocated memory will be initialized to -0xa5\&. If set to -\(lqfree\(rq, all deallocated memory will be initialized to -0x5a\&. If set to -\(lqtrue\(rq, both allocated and deallocated memory will be initialized, and if set to -\(lqfalse\(rq, junk filling be disabled entirely\&. This is intended for debugging and will impact performance negatively\&. This option is -\(lqfalse\(rq -by default unless +Junk filling\&. If set to "alloc", each byte of uninitialized allocated memory will be initialized to +0xa5\&. If set to "free", all deallocated memory will be initialized to +0x5a\&. If set to "true", both allocated and deallocated memory will be initialized, and if set to "false", junk filling be disabled entirely\&. This is intended for debugging and will impact performance negatively\&. This option is "false" by default unless \fB\-\-enable\-debug\fR -is specified during configuration, in which case it is -\(lqtrue\(rq -by default unless running inside -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2\&. +is specified during configuration, in which case it is "true" by default unless running inside +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2\&. .RE .PP -opt\&.quarantine (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR] +"opt\&.quarantine" (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 Per thread quarantine size in bytes\&. If non\-zero, each thread maintains a FIFO object quarantine that stores up to the specified number of bytes of memory\&. The quarantined memory is not freed until it is released from quarantine, though it is immediately junk\-filled if the -opt\&.junk +"opt\&.junk" option is enabled\&. This feature is of particular use in combination with -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0 unless running inside Valgrind, in which case the default is 16 MiB\&. +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0 unless running inside Valgrind, in which case the default is 16 MiB\&. .RE .PP -opt\&.redzone (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] +"opt\&.redzone" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 Redzones enabled/disabled\&. If enabled, small allocations have redzones before and after them\&. Furthermore, if the -opt\&.junk +"opt\&.junk" option is enabled, the redzones are checked for corruption during deallocation\&. However, the primary intended purpose of this feature is to be used in combination with -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default unless running inside Valgrind\&. +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default unless running inside Valgrind\&. .RE .PP -opt\&.zero (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] +"opt\&.zero" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 Zero filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to 0\&. Note that this initialization only happens once for each byte, so -realloc() +\fBrealloc\fR\fB\fR and -rallocx() +\fBrallocx\fR\fB\fR calls do not zero memory that was previously allocated\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default\&. .RE .PP -opt\&.utrace (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR] +"opt\&.utrace" (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR] .RS 4 Allocation tracing based on \fButrace\fR(2) enabled/disabled\&. This option is disabled by default\&. .RE .PP -opt\&.xmalloc (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR] +"opt\&.xmalloc" (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR] .RS 4 Abort\-on\-out\-of\-memory enabled/disabled\&. If enabled, rather than returning failure for any allocation function, display a diagnostic message on \fBSTDERR_FILENO\fR @@ -942,94 +867,92 @@ malloc_conf = "xmalloc:true"; This option is disabled by default\&. .RE .PP -opt\&.tcache (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR] +"opt\&.tcache" (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Thread\-specific caching (tcache) enabled/disabled\&. When there are multiple threads, each thread uses a tcache for objects up to a certain size\&. Thread\-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use\&. See the -opt\&.lg_tcache_max +"opt\&.lg_tcache_max" option for related tuning information\&. This option is enabled by default unless running inside -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, in which case it is forcefully disabled\&. +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, in which case it is forcefully disabled\&. .RE .PP -opt\&.lg_tcache_max (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] +"opt\&.lg_tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Maximum size class (log base 2) to cache in the thread\-specific cache (tcache)\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&. .RE .PP -opt\&.prof (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.prof" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity\&. See the -opt\&.prof_active +"opt\&.prof_active" option for on\-the\-fly activation/deactivation\&. See the -opt\&.lg_prof_sample +"opt\&.lg_prof_sample" option for probabilistic sampling control\&. See the -opt\&.prof_accum +"opt\&.prof_accum" option for control of cumulative sample reporting\&. See the -opt\&.lg_prof_interval +"opt\&.lg_prof_interval" option for information on interval\-triggered profile dumping, the -opt\&.prof_gdump +"opt\&.prof_gdump" option for information on high\-water\-triggered profile dumping, and the -opt\&.prof_final +"opt\&.prof_final" option for final profile dumping\&. Profile output is compatible with the \fBjeprof\fR command, which is based on the \fBpprof\fR that is developed as part of the -\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[4]\d\s+2\&. See -HEAP PROFILE FORMAT -for heap profile format documentation\&. +\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2\&. .RE .PP -opt\&.prof_prefix (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.prof_prefix" (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Filename prefix for profile dumps\&. If the prefix is set to the empty string, no automatic dumps will occur; this is primarily useful for disabling the automatic final heap dump (which also disables leak reporting, if enabled)\&. The default prefix is jeprof\&. .RE .PP -opt\&.prof_active (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.prof_active" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Profiling activated/deactivated\&. This is a secondary control mechanism that makes it possible to start the application with profiling enabled (see the -opt\&.prof +"opt\&.prof" option) but inactive, then toggle profiling at any time during program execution with the -prof\&.active +"prof\&.active" mallctl\&. This option is enabled by default\&. .RE .PP -opt\&.prof_thread_active_init (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.prof_thread_active_init" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Initial setting for -thread\&.prof\&.active +"thread\&.prof\&.active" in newly created threads\&. The initial setting for newly created threads can also be changed during execution via the -prof\&.thread_active_init +"prof\&.thread_active_init" mallctl\&. This option is enabled by default\&. .RE .PP -opt\&.lg_prof_sample (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.lg_prof_sample" (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 512 KiB (2^19 B)\&. .RE .PP -opt\&.prof_accum (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.prof_accum" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Reporting of cumulative object/byte counts in profile dumps enabled/disabled\&. If this option is enabled, every unique backtrace must be stored for the duration of execution\&. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest\&. This option is disabled by default\&. .RE .PP -opt\&.lg_prof_interval (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.lg_prof_interval" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Average interval (log base 2) between memory profile dumps, as measured in bytes of allocation activity\&. The actual interval between dumps may be sporadic because decentralized allocation counters are used to avoid synchronization bottlenecks\&. Profiles are dumped to files named according to the pattern \&.\&.\&.i\&.heap, where is controlled by the -opt\&.prof_prefix +"opt\&.prof_prefix" option\&. By default, interval\-triggered profile dumping is disabled (encoded as \-1)\&. .RE .PP -opt\&.prof_gdump (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.prof_gdump" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Set the initial state of -prof\&.gdump, which when enabled triggers a memory profile dump every time the total virtual memory exceeds the previous maximum\&. This option is disabled by default\&. +"prof\&.gdump", which when enabled triggers a memory profile dump every time the total virtual memory exceeds the previous maximum\&. This option is disabled by default\&. .RE .PP -opt\&.prof_final (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.prof_final" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Use an \fBatexit\fR(3) @@ -1037,150 +960,127 @@ function to dump final memory usage to a file named according to the pattern \&.\&.\&.f\&.heap, where is controlled by the -opt\&.prof_prefix +"opt\&.prof_prefix" option\&. Note that -atexit() +\fBatexit\fR\fB\fR may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls -atexit(), so this option is not universally usable (though the application can register its own -atexit() +\fBatexit\fR\fB\fR, so this option is not univerally usable (though the application can register its own +\fBatexit\fR\fB\fR function with equivalent functionality)\&. This option is disabled by default\&. .RE .PP -opt\&.prof_leak (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.prof_leak" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Leak reporting enabled/disabled\&. If enabled, use an \fBatexit\fR(3) function to report memory leaks detected by allocation sampling\&. See the -opt\&.prof +"opt\&.prof" option for information on analyzing heap profile output\&. This option is disabled by default\&. .RE .PP -thread\&.arena (\fBunsigned\fR) rw +"thread\&.arena" (\fBunsigned\fR) rw .RS 4 Get or set the arena associated with the calling thread\&. If the specified arena was not initialized beforehand (see the -arenas\&.initialized +"arenas\&.initialized" mallctl), it will be automatically initialized as a side effect of calling this interface\&. .RE .PP -thread\&.allocated (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"thread\&.allocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get the total number of bytes ever allocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&. .RE .PP -thread\&.allocatedp (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] +"thread\&.allocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get a pointer to the the value that is returned by the -thread\&.allocated +"thread\&.allocated" mallctl\&. This is useful for avoiding the overhead of repeated -mallctl*() +\fBmallctl*\fR\fB\fR calls\&. .RE .PP -thread\&.deallocated (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"thread\&.deallocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get the total number of bytes ever deallocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&. .RE .PP -thread\&.deallocatedp (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] +"thread\&.deallocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get a pointer to the the value that is returned by the -thread\&.deallocated +"thread\&.deallocated" mallctl\&. This is useful for avoiding the overhead of repeated -mallctl*() +\fBmallctl*\fR\fB\fR calls\&. .RE .PP -thread\&.tcache\&.enabled (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR] +"thread\&.tcache\&.enabled" (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR] .RS 4 Enable/disable calling thread\*(Aqs tcache\&. The tcache is implicitly flushed as a side effect of becoming disabled (see -thread\&.tcache\&.flush)\&. +"thread\&.tcache\&.flush")\&. .RE .PP -thread\&.tcache\&.flush (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR] +"thread\&.tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR] .RS 4 Flush calling thread\*(Aqs thread\-specific cache (tcache)\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs tcache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&. .RE .PP -thread\&.prof\&.name (\fBconst char *\fR) r\- or \-w [\fB\-\-enable\-prof\fR] +"thread\&.prof\&.name" (\fBconst char *\fR) r\- or \-w [\fB\-\-enable\-prof\fR] .RS 4 -Get/set the descriptive name associated with the calling thread in memory profile dumps\&. An internal copy of the name string is created, so the input string need not be maintained after this interface completes execution\&. The output string of this interface should be copied for non\-ephemeral uses, because multiple implementation details can cause asynchronous string deallocation\&. Furthermore, each invocation of this interface can only read or write; simultaneous read/write is not supported due to string lifetime limitations\&. The name string must be nil\-terminated and comprised only of characters in the sets recognized by +Get/set the descriptive name associated with the calling thread in memory profile dumps\&. An internal copy of the name string is created, so the input string need not be maintained after this interface completes execution\&. The output string of this interface should be copied for non\-ephemeral uses, because multiple implementation details can cause asynchronous string deallocation\&. Furthermore, each invocation of this interface can only read or write; simultaneous read/write is not supported due to string lifetime limitations\&. The name string must nil\-terminated and comprised only of characters in the sets recognized by \fBisgraph\fR(3) and \fBisblank\fR(3)\&. .RE .PP -thread\&.prof\&.active (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +"thread\&.prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Control whether sampling is currently active for the calling thread\&. This is an activation mechanism in addition to -prof\&.active; both must be active for the calling thread to sample\&. This flag is enabled by default\&. +"prof\&.active"; both must be active for the calling thread to sample\&. This flag is enabled by default\&. .RE .PP -tcache\&.create (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] +"tcache\&.create" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Create an explicit thread\-specific cache (tcache) and return an identifier that can be passed to the \fBMALLOCX_TCACHE(\fR\fB\fItc\fR\fR\fB)\fR macro to explicitly use the specified cache rather than the automatically managed one that is used by default\&. Each explicit cache can be used by only one thread at a time; the application must assure that this constraint holds\&. .RE .PP -tcache\&.flush (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] +"tcache\&.flush" (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] .RS 4 Flush the specified thread\-specific cache (tcache)\&. The same considerations apply to this interface as to -thread\&.tcache\&.flush, except that the tcache will never be automatically discarded\&. +"thread\&.tcache\&.flush", except that the tcache will never be automatically be discarded\&. .RE .PP -tcache\&.destroy (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] +"tcache\&.destroy" (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] .RS 4 Flush the specified thread\-specific cache (tcache) and make the identifier available for use during a future tcache creation\&. .RE .PP -arena\&.\&.purge (\fBvoid\fR) \-\- +"arena\&.\&.purge" (\fBvoid\fR) \-\- .RS 4 -Purge all unused dirty pages for arena , or for all arenas if equals -arenas\&.narenas\&. +Purge unused dirty pages for arena , or for all arenas if equals +"arenas\&.narenas"\&. .RE .PP -arena\&.\&.decay (\fBvoid\fR) \-\- -.RS 4 -Trigger decay\-based purging of unused dirty pages for arena , or for all arenas if equals -arenas\&.narenas\&. The proportion of unused dirty pages to be purged depends on the current time; see -opt\&.decay_time -for details\&. -.RE -.PP -arena\&.\&.reset (\fBvoid\fR) \-\- -.RS 4 -Discard all of the arena\*(Aqs extant allocations\&. This interface can only be used with arenas created via -arenas\&.extend\&. None of the arena\*(Aqs discarded/cached allocations may accessed afterward\&. As part of this requirement, all thread caches which were used to allocate/deallocate in conjunction with the arena must be flushed beforehand\&. This interface cannot be used if running inside Valgrind, nor if the -quarantine -size is non\-zero\&. -.RE -.PP -arena\&.\&.dss (\fBconst char *\fR) rw +"arena\&.\&.dss" (\fBconst char *\fR) rw .RS 4 Set the precedence of dss allocation as related to mmap allocation for arena , or for all arenas if equals -arenas\&.narenas\&. See -opt\&.dss +"arenas\&.narenas"\&. See +"opt\&.dss" for supported settings\&. .RE .PP -arena\&.\&.lg_dirty_mult (\fBssize_t\fR) rw +"arena\&.\&.lg_dirty_mult" (\fBssize_t\fR) rw .RS 4 Current per\-arena minimum ratio (log base 2) of active to dirty pages for arena \&. Each time this interface is set and the ratio is increased, pages are synchronously purged as necessary to impose the new ratio\&. See -opt\&.lg_dirty_mult +"opt\&.lg_dirty_mult" for additional information\&. .RE .PP -arena\&.\&.decay_time (\fBssize_t\fR) rw -.RS 4 -Current per\-arena approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. Each time this interface is set, all currently unused dirty pages are considered to have fully decayed, which causes immediate purging of all unused dirty pages unless the decay time is set to \-1 (i\&.e\&. purging disabled)\&. See -opt\&.decay_time -for additional information\&. -.RE -.PP -arena\&.\&.chunk_hooks (\fBchunk_hooks_t\fR) rw +"arena\&.\&.chunk_hooks" (\fBchunk_hooks_t\fR) rw .RS 4 Get or set the chunk management hook functions for arena \&. The functions must be capable of operating on all extant chunks associated with arena , usually by passing unknown chunks to the replaced functions\&. In practice, it is feasible to control allocation for arenas created via -arenas\&.extend +"arenas\&.extend" such that all chunks originate from an application\-supplied chunk allocator (by setting custom chunk hook functions just after arena creation), but the automatically created arenas may have already created chunks prior to the application having an opportunity to take over chunk allocation\&. .sp .if n \{\ @@ -1249,7 +1149,7 @@ is not on success or \fBNULL\fR on error\&. Committed memory may be committed in absolute terms as on a system that does not overcommit, or in implicit terms as on a system that overcommits and satisfies physical memory needs on demand via soft page faults\&. Note that replacing the default chunk allocation function makes the arena\*(Aqs -arena\&.\&.dss +"arena\&.\&.dss" setting irrelevant\&. .HP \w'typedef\ bool\ (chunk_dalloc_t)('u .BI "typedef bool (chunk_dalloc_t)(void\ *" "chunk" ", size_t\ " "size" ", bool\ " "committed" ", unsigned\ " "arena_ind" ");" @@ -1396,504 +1296,407 @@ into one contiguous chunk, operating on \fIarena_ind\fR, returning false upon success\&. If the function returns true, this indicates that the chunks remain distinct mappings and therefore should continue to be operated on independently\&. .RE .PP -arenas\&.narenas (\fBunsigned\fR) r\- +"arenas\&.narenas" (\fBunsigned\fR) r\- .RS 4 Current limit on number of arenas\&. .RE .PP -arenas\&.initialized (\fBbool *\fR) r\- +"arenas\&.initialized" (\fBbool *\fR) r\- .RS 4 An array of -arenas\&.narenas +"arenas\&.narenas" booleans\&. Each boolean indicates whether the corresponding arena is initialized\&. .RE .PP -arenas\&.lg_dirty_mult (\fBssize_t\fR) rw +"arenas\&.lg_dirty_mult" (\fBssize_t\fR) rw .RS 4 Current default per\-arena minimum ratio (log base 2) of active to dirty pages, used to initialize -arena\&.\&.lg_dirty_mult +"arena\&.\&.lg_dirty_mult" during arena creation\&. See -opt\&.lg_dirty_mult +"opt\&.lg_dirty_mult" for additional information\&. .RE .PP -arenas\&.decay_time (\fBssize_t\fR) rw -.RS 4 -Current default per\-arena approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused, used to initialize -arena\&.\&.decay_time -during arena creation\&. See -opt\&.decay_time -for additional information\&. -.RE -.PP -arenas\&.quantum (\fBsize_t\fR) r\- +"arenas\&.quantum" (\fBsize_t\fR) r\- .RS 4 Quantum size\&. .RE .PP -arenas\&.page (\fBsize_t\fR) r\- +"arenas\&.page" (\fBsize_t\fR) r\- .RS 4 Page size\&. .RE .PP -arenas\&.tcache_max (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] +"arenas\&.tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Maximum thread\-cached size class\&. .RE .PP -arenas\&.nbins (\fBunsigned\fR) r\- +"arenas\&.nbins" (\fBunsigned\fR) r\- .RS 4 Number of bin size classes\&. .RE .PP -arenas\&.nhbins (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] +"arenas\&.nhbins" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Total number of thread cache bin size classes\&. .RE .PP -arenas\&.bin\&.\&.size (\fBsize_t\fR) r\- +"arenas\&.bin\&.\&.size" (\fBsize_t\fR) r\- .RS 4 Maximum size supported by size class\&. .RE .PP -arenas\&.bin\&.\&.nregs (\fBuint32_t\fR) r\- +"arenas\&.bin\&.\&.nregs" (\fBuint32_t\fR) r\- .RS 4 Number of regions per page run\&. .RE .PP -arenas\&.bin\&.\&.run_size (\fBsize_t\fR) r\- +"arenas\&.bin\&.\&.run_size" (\fBsize_t\fR) r\- .RS 4 Number of bytes per page run\&. .RE .PP -arenas\&.nlruns (\fBunsigned\fR) r\- +"arenas\&.nlruns" (\fBunsigned\fR) r\- .RS 4 Total number of large size classes\&. .RE .PP -arenas\&.lrun\&.\&.size (\fBsize_t\fR) r\- +"arenas\&.lrun\&.\&.size" (\fBsize_t\fR) r\- .RS 4 Maximum size supported by this large size class\&. .RE .PP -arenas\&.nhchunks (\fBunsigned\fR) r\- +"arenas\&.nhchunks" (\fBunsigned\fR) r\- .RS 4 Total number of huge size classes\&. .RE .PP -arenas\&.hchunk\&.\&.size (\fBsize_t\fR) r\- +"arenas\&.hchunk\&.\&.size" (\fBsize_t\fR) r\- .RS 4 Maximum size supported by this huge size class\&. .RE .PP -arenas\&.extend (\fBunsigned\fR) r\- +"arenas\&.extend" (\fBunsigned\fR) r\- .RS 4 Extend the array of arenas by appending a new arena, and returning the new arena index\&. .RE .PP -prof\&.thread_active_init (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +"prof\&.thread_active_init" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Control the initial setting for -thread\&.prof\&.active +"thread\&.prof\&.active" in newly created threads\&. See the -opt\&.prof_thread_active_init +"opt\&.prof_thread_active_init" option for additional information\&. .RE .PP -prof\&.active (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +"prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Control whether sampling is currently active\&. See the -opt\&.prof_active +"opt\&.prof_active" option for additional information, as well as the interrelated -thread\&.prof\&.active +"thread\&.prof\&.active" mallctl\&. .RE .PP -prof\&.dump (\fBconst char *\fR) \-w [\fB\-\-enable\-prof\fR] +"prof\&.dump" (\fBconst char *\fR) \-w [\fB\-\-enable\-prof\fR] .RS 4 Dump a memory profile to the specified file, or if NULL is specified, to a file according to the pattern \&.\&.\&.m\&.heap, where is controlled by the -opt\&.prof_prefix +"opt\&.prof_prefix" option\&. .RE .PP -prof\&.gdump (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +"prof\&.gdump" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 When enabled, trigger a memory profile dump every time the total virtual memory exceeds the previous maximum\&. Profiles are dumped to files named according to the pattern \&.\&.\&.u\&.heap, where is controlled by the -opt\&.prof_prefix +"opt\&.prof_prefix" option\&. .RE .PP -prof\&.reset (\fBsize_t\fR) \-w [\fB\-\-enable\-prof\fR] +"prof\&.reset" (\fBsize_t\fR) \-w [\fB\-\-enable\-prof\fR] .RS 4 Reset all memory profile statistics, and optionally update the sample rate (see -opt\&.lg_prof_sample +"opt\&.lg_prof_sample" and -prof\&.lg_sample)\&. +"prof\&.lg_sample")\&. .RE .PP -prof\&.lg_sample (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] +"prof\&.lg_sample" (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Get the current sample rate (see -opt\&.lg_prof_sample)\&. +"opt\&.lg_prof_sample")\&. .RE .PP -prof\&.interval (\fBuint64_t\fR) r\- [\fB\-\-enable\-prof\fR] +"prof\&.interval" (\fBuint64_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 -Average number of bytes allocated between interval\-based profile dumps\&. See the -opt\&.lg_prof_interval +Average number of bytes allocated between inverval\-based profile dumps\&. See the +"opt\&.lg_prof_interval" option for additional information\&. .RE .PP -stats\&.cactive (\fBsize_t *\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.cactive" (\fBsize_t *\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Pointer to a counter that contains an approximate count of the current number of bytes in active pages\&. The estimate may be high, but never low, because each arena rounds up when computing its contribution to the counter\&. Note that the -epoch +"epoch" mallctl has no bearing on this counter\&. Furthermore, counter consistency is maintained via atomic operations, so it is necessary to use an atomic operation in order to guarantee a consistent read when dereferencing the pointer\&. .RE .PP -stats\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes allocated by the application\&. .RE .PP -stats\&.active (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.active" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes in active pages allocated by the application\&. This is a multiple of the page size, and greater than or equal to -stats\&.allocated\&. This does not include -stats\&.arenas\&.\&.pdirty, nor pages entirely devoted to allocator metadata\&. +"stats\&.allocated"\&. This does not include +"stats\&.arenas\&.\&.pdirty", nor pages entirely devoted to allocator metadata\&. .RE .PP -stats\&.metadata (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.metadata" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes dedicated to metadata, which comprise base allocations used for bootstrap\-sensitive internal allocator data structures, arena chunk headers (see -stats\&.arenas\&.\&.metadata\&.mapped), and internal allocations (see -stats\&.arenas\&.\&.metadata\&.allocated)\&. +"stats\&.arenas\&.\&.metadata\&.mapped"), and internal allocations (see +"stats\&.arenas\&.\&.metadata\&.allocated")\&. .RE .PP -stats\&.resident (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.resident" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Maximum number of bytes in physically resident data pages mapped by the allocator, comprising all pages dedicated to allocator metadata, pages backing active allocations, and unused dirty pages\&. This is a maximum rather than precise because pages may not actually be physically resident if they correspond to demand\-zeroed virtual memory that has not yet been touched\&. This is a multiple of the page size, and is larger than -stats\&.active\&. +"stats\&.active"\&. .RE .PP -stats\&.mapped (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes in active chunks mapped by the allocator\&. This is a multiple of the chunk size, and is larger than -stats\&.active\&. This does not include inactive chunks, even those that contain unused dirty pages, which means that there is no strict ordering between this and -stats\&.resident\&. +"stats\&.active"\&. This does not include inactive chunks, even those that contain unused dirty pages, which means that there is no strict ordering between this and +"stats\&.resident"\&. .RE .PP -stats\&.retained (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Total number of bytes in virtual memory mappings that were retained rather than being returned to the operating system via e\&.g\&. -\fBmunmap\fR(2)\&. Retained virtual memory is typically untouched, decommitted, or purged, so it has no strongly associated physical memory (see -chunk hooks -for details)\&. Retained memory is excluded from mapped memory statistics, e\&.g\&. -stats\&.mapped\&. -.RE -.PP -stats\&.arenas\&.\&.dss (\fBconst char *\fR) r\- +"stats\&.arenas\&.\&.dss" (\fBconst char *\fR) r\- .RS 4 dss (\fBsbrk\fR(2)) allocation precedence as related to \fBmmap\fR(2) allocation\&. See -opt\&.dss +"opt\&.dss" for details\&. .RE .PP -stats\&.arenas\&.\&.lg_dirty_mult (\fBssize_t\fR) r\- +"stats\&.arenas\&.\&.lg_dirty_mult" (\fBssize_t\fR) r\- .RS 4 Minimum ratio (log base 2) of active to dirty pages\&. See -opt\&.lg_dirty_mult +"opt\&.lg_dirty_mult" for details\&. .RE .PP -stats\&.arenas\&.\&.decay_time (\fBssize_t\fR) r\- -.RS 4 -Approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. See -opt\&.decay_time -for details\&. -.RE -.PP -stats\&.arenas\&.\&.nthreads (\fBunsigned\fR) r\- +"stats\&.arenas\&.\&.nthreads" (\fBunsigned\fR) r\- .RS 4 Number of threads currently assigned to arena\&. .RE .PP -stats\&.arenas\&.\&.pactive (\fBsize_t\fR) r\- +"stats\&.arenas\&.\&.pactive" (\fBsize_t\fR) r\- .RS 4 Number of pages in active runs\&. .RE .PP -stats\&.arenas\&.\&.pdirty (\fBsize_t\fR) r\- +"stats\&.arenas\&.\&.pdirty" (\fBsize_t\fR) r\- .RS 4 Number of pages within unused runs that are potentially dirty, and for which -madvise\fI\&.\&.\&.\fR \fI\fBMADV_DONTNEED\fR\fR +\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR or similar has not been called\&. .RE .PP -stats\&.arenas\&.\&.mapped (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of mapped bytes\&. .RE .PP -stats\&.arenas\&.\&.retained (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Number of retained bytes\&. See -stats\&.retained -for details\&. -.RE -.PP -stats\&.arenas\&.\&.metadata\&.mapped (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.metadata\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of mapped bytes in arena chunk headers, which track the states of the non\-metadata pages\&. .RE .PP -stats\&.arenas\&.\&.metadata\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.metadata\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes dedicated to internal allocations\&. Internal allocations differ from application\-originated allocations in that they are for internal use, and that they are omitted from heap profiles\&. This statistic is reported separately from -stats\&.metadata +"stats\&.metadata" and -stats\&.arenas\&.\&.metadata\&.mapped +"stats\&.arenas\&.\&.metadata\&.mapped" because it overlaps with e\&.g\&. the -stats\&.allocated +"stats\&.allocated" and -stats\&.active +"stats\&.active" statistics, whereas the other metadata statistics do not\&. .RE .PP -stats\&.arenas\&.\&.npurge (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.npurge" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of dirty page purge sweeps performed\&. .RE .PP -stats\&.arenas\&.\&.nmadvise (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.nmadvise" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of -madvise\fI\&.\&.\&.\fR \fI\fBMADV_DONTNEED\fR\fR +\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR or similar calls made to purge dirty pages\&. .RE .PP -stats\&.arenas\&.\&.purged (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.purged" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of pages purged\&. .RE .PP -stats\&.arenas\&.\&.small\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.small\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes currently allocated by small objects\&. .RE .PP -stats\&.arenas\&.\&.small\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.small\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests served by small bins\&. .RE .PP -stats\&.arenas\&.\&.small\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.small\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of small objects returned to bins\&. .RE .PP -stats\&.arenas\&.\&.small\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.small\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of small allocation requests\&. .RE .PP -stats\&.arenas\&.\&.large\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.large\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes currently allocated by large objects\&. .RE .PP -stats\&.arenas\&.\&.large\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.large\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of large allocation requests served directly by the arena\&. .RE .PP -stats\&.arenas\&.\&.large\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.large\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of large deallocation requests served directly by the arena\&. .RE .PP -stats\&.arenas\&.\&.large\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.large\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of large allocation requests\&. .RE .PP -stats\&.arenas\&.\&.huge\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.huge\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes currently allocated by huge objects\&. .RE .PP -stats\&.arenas\&.\&.huge\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.huge\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of huge allocation requests served directly by the arena\&. .RE .PP -stats\&.arenas\&.\&.huge\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.huge\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of huge deallocation requests served directly by the arena\&. .RE .PP -stats\&.arenas\&.\&.huge\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.huge\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of huge allocation requests\&. .RE .PP -stats\&.arenas\&.\&.bins\&.\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.bins\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocations served by bin\&. .RE .PP -stats\&.arenas\&.\&.bins\&.\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.bins\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocations returned to bin\&. .RE .PP -stats\&.arenas\&.\&.bins\&.\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.bins\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests\&. .RE .PP -stats\&.arenas\&.\&.bins\&.\&.curregs (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.bins\&.\&.curregs" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of regions for this size class\&. .RE .PP -stats\&.arenas\&.\&.bins\&.\&.nfills (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] +"stats\&.arenas\&.\&.bins\&.\&.nfills" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] .RS 4 Cumulative number of tcache fills\&. .RE .PP -stats\&.arenas\&.\&.bins\&.\&.nflushes (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] +"stats\&.arenas\&.\&.bins\&.\&.nflushes" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] .RS 4 Cumulative number of tcache flushes\&. .RE .PP -stats\&.arenas\&.\&.bins\&.\&.nruns (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.bins\&.\&.nruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of runs created\&. .RE .PP -stats\&.arenas\&.\&.bins\&.\&.nreruns (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.bins\&.\&.nreruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of times the current run from which to allocate changed\&. .RE .PP -stats\&.arenas\&.\&.bins\&.\&.curruns (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.bins\&.\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of runs\&. .RE .PP -stats\&.arenas\&.\&.lruns\&.\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.lruns\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class served directly by the arena\&. .RE .PP -stats\&.arenas\&.\&.lruns\&.\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.lruns\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of deallocation requests for this size class served directly by the arena\&. .RE .PP -stats\&.arenas\&.\&.lruns\&.\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.lruns\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class\&. .RE .PP -stats\&.arenas\&.\&.lruns\&.\&.curruns (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.lruns\&.\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of runs for this size class\&. .RE .PP -stats\&.arenas\&.\&.hchunks\&.\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.hchunks\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class served directly by the arena\&. .RE .PP -stats\&.arenas\&.\&.hchunks\&.\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.hchunks\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of deallocation requests for this size class served directly by the arena\&. .RE .PP -stats\&.arenas\&.\&.hchunks\&.\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.hchunks\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class\&. .RE .PP -stats\&.arenas\&.\&.hchunks\&.\&.curhchunks (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.hchunks\&.\&.curhchunks" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of huge allocations for this size class\&. .RE -.SH "HEAP PROFILE FORMAT" -.PP -Although the heap profiling functionality was originally designed to be compatible with the -\fBpprof\fR -command that is developed as part of the -\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[4]\d\s+2, the addition of per thread heap profiling functionality required a different heap profile format\&. The -\fBjeprof\fR -command is derived from -\fBpprof\fR, with enhancements to support the heap profile format described here\&. -.PP -In the following hypothetical heap profile, -\fB[\&.\&.\&.]\fR -indicates elision for the sake of compactness\&. -.sp -.if n \{\ -.RS 4 -.\} -.nf -heap_v2/524288 - t*: 28106: 56637512 [0: 0] - [\&.\&.\&.] - t3: 352: 16777344 [0: 0] - [\&.\&.\&.] - t99: 17754: 29341640 [0: 0] - [\&.\&.\&.] -@ 0x5f86da8 0x5f5a1dc [\&.\&.\&.] 0x29e4d4e 0xa200316 0xabb2988 [\&.\&.\&.] - t*: 13: 6688 [0: 0] - t3: 12: 6496 [0: ] - t99: 1: 192 [0: 0] -[\&.\&.\&.] - -MAPPED_LIBRARIES: -[\&.\&.\&.] -.fi -.if n \{\ -.RE -.\} -.sp -The following matches the above heap profile, but most tokens are replaced with -\fB\fR -to indicate descriptions of the corresponding fields\&. -.sp -.if n \{\ -.RS 4 -.\} -.nf -/ - : : [: ] - [\&.\&.\&.] - : : [: ] - [\&.\&.\&.] - : : [: ] - [\&.\&.\&.] -@ [\&.\&.\&.] [\&.\&.\&.] - : : [: ] - : : [: ] - : : [: ] -[\&.\&.\&.] - -MAPPED_LIBRARIES: -/maps> -.fi -.if n \{\ -.RE -.\} .SH "DEBUGGING MALLOC PROBLEMS" .PP When debugging, it is a good idea to configure/build jemalloc with the @@ -1902,16 +1705,14 @@ and \fB\-\-enable\-fill\fR options, and recompile the program with suitable options and symbols for debugger support\&. When so configured, jemalloc incorporates a wide variety of run\-time assertions that catch application errors such as double\-free, write\-after\-free, etc\&. .PP -Programs often accidentally depend on -\(lquninitialized\(rq -memory actually being filled with zero bytes\&. Junk filling (see the -opt\&.junk +Programs often accidentally depend on \(lquninitialized\(rq memory actually being filled with zero bytes\&. Junk filling (see the +"opt\&.junk" option) tends to expose such bugs in the form of obviously incorrect results and/or coredumps\&. Conversely, zero filling (see the -opt\&.zero +"opt\&.zero" option) eliminates the symptoms of such bugs\&. Between these two options, it is usually possible to quickly detect, diagnose, and eliminate such bugs\&. .PP This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive\&. However, jemalloc does integrate with the most excellent -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2 +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2 tool if the \fB\-\-enable\-valgrind\fR configuration option is enabled\&. @@ -1919,7 +1720,7 @@ configuration option is enabled\&. .PP If any of the memory allocation/deallocation functions detect an error or warning condition, a message will be printed to file descriptor \fBSTDERR_FILENO\fR\&. Errors will result in the process dumping core\&. If the -opt\&.abort +"opt\&.abort" option is set, most warnings are treated as errors\&. .PP The @@ -1927,23 +1728,22 @@ The variable allows the programmer to override the function which emits the text strings forming the errors and warnings if for some reason the \fBSTDERR_FILENO\fR file descriptor is not suitable for this\&. -malloc_message() +\fBmalloc_message\fR\fB\fR takes the \fIcbopaque\fR pointer argument that is \fBNULL\fR unless overridden by the arguments in a call to -malloc_stats_print(), followed by a string pointer\&. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock\&. +\fBmalloc_stats_print\fR\fB\fR, followed by a string pointer\&. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock\&. .PP -All messages are prefixed by -\(lq: \(rq\&. +All messages are prefixed by \(lq:\(rq\&. .SH "RETURN VALUES" .SS "Standard API" .PP The -malloc() +\fBmalloc\fR\fB\fR and -calloc() +\fBcalloc\fR\fB\fR functions return a pointer to the allocated memory if successful; otherwise a \fBNULL\fR pointer is returned and @@ -1952,9 +1752,9 @@ is set to ENOMEM\&. .PP The -posix_memalign() +\fBposix_memalign\fR\fB\fR function returns the value 0 if successful; otherwise it returns an error value\&. The -posix_memalign() +\fBposix_memalign\fR\fB\fR function will fail if: .PP EINVAL @@ -1971,13 +1771,13 @@ Memory allocation error\&. .RE .PP The -aligned_alloc() +\fBaligned_alloc\fR\fB\fR function returns a pointer to the allocated memory if successful; otherwise a \fBNULL\fR pointer is returned and \fIerrno\fR is set\&. The -aligned_alloc() +\fBaligned_alloc\fR\fB\fR function will fail if: .PP EINVAL @@ -1993,7 +1793,7 @@ Memory allocation error\&. .RE .PP The -realloc() +\fBrealloc\fR\fB\fR function returns a pointer, possibly identical to \fIptr\fR, to the allocated memory if successful; otherwise a \fBNULL\fR @@ -2002,44 +1802,44 @@ pointer is returned, and is set to ENOMEM if the error was the result of an allocation failure\&. The -realloc() +\fBrealloc\fR\fB\fR function always leaves the original buffer intact when an error occurs\&. .PP The -free() +\fBfree\fR\fB\fR function returns no value\&. .SS "Non\-standard API" .PP The -mallocx() +\fBmallocx\fR\fB\fR and -rallocx() +\fBrallocx\fR\fB\fR functions return a pointer to the allocated memory if successful; otherwise a \fBNULL\fR pointer is returned to indicate insufficient contiguous memory was available to service the allocation request\&. .PP The -xallocx() +\fBxallocx\fR\fB\fR function returns the real size of the resulting resized allocation pointed to by \fIptr\fR, which is a value less than \fIsize\fR if the allocation could not be adequately grown in place\&. .PP The -sallocx() +\fBsallocx\fR\fB\fR function returns the real size of the allocation pointed to by \fIptr\fR\&. .PP The -nallocx() +\fBnallocx\fR\fB\fR returns the real size that would result from a successful equivalent -mallocx() +\fBmallocx\fR\fB\fR function call, or zero if insufficient memory is available to perform the size computation\&. .PP The -mallctl(), -mallctlnametomib(), and -mallctlbymib() +\fBmallctl\fR\fB\fR, +\fBmallctlnametomib\fR\fB\fR, and +\fBmallctlbymib\fR\fB\fR functions return 0 on success; otherwise they return an error value\&. The functions will fail if: .PP EINVAL @@ -2074,12 +1874,12 @@ A memory allocation failure occurred\&. EFAULT .RS 4 An interface with side effects failed in some way not directly related to -mallctl*() +\fBmallctl*\fR\fB\fR read/write processing\&. .RE .PP The -malloc_usable_size() +\fBmalloc_usable_size\fR\fB\fR function returns the usable size of the allocation pointed to by \fIptr\fR\&. .SH "ENVIRONMENT" @@ -2129,14 +1929,14 @@ malloc_conf = "lg_chunk:24"; .SH "STANDARDS" .PP The -malloc(), -calloc(), -realloc(), and -free() +\fBmalloc\fR\fB\fR, +\fBcalloc\fR\fB\fR, +\fBrealloc\fR\fB\fR, and +\fBfree\fR\fB\fR functions conform to ISO/IEC 9899:1990 (\(lqISO C90\(rq)\&. .PP The -posix_memalign() +\fBposix_memalign\fR\fB\fR function conforms to IEEE Std 1003\&.1\-2001 (\(lqPOSIX\&.1\(rq)\&. .SH "AUTHOR" .PP @@ -2147,19 +1947,14 @@ function conforms to IEEE Std 1003\&.1\-2001 (\(lqPOSIX\&.1\(rq)\&. .IP " 1." 4 jemalloc website .RS 4 -\%http://jemalloc.net/ +\%http://www.canonware.com/jemalloc/ .RE .IP " 2." 4 -JSON format -.RS 4 -\%http://www.json.org/ -.RE -.IP " 3." 4 Valgrind .RS 4 \%http://valgrind.org/ .RE -.IP " 4." 4 +.IP " 3." 4 gperftools package .RS 4 \%http://code.google.com/p/gperftools/ diff --git a/deps/jemalloc/doc/jemalloc.html b/deps/jemalloc/doc/jemalloc.html index db2504f6e..7b8e2be8c 100644 --- a/deps/jemalloc/doc/jemalloc.html +++ b/deps/jemalloc/doc/jemalloc.html @@ -1,29 +1,28 @@ - -JEMALLOC \ No newline at end of file + getpagesize(3)

STANDARDS

The malloc(), + calloc(), + realloc(), and + free() functions conform to ISO/IEC + 9899:1990 (“ISO C90”).

The posix_memalign() function conforms + to IEEE Std 1003.1-2001 (“POSIX.1”).

diff --git a/deps/jemalloc/doc/jemalloc.xml.in b/deps/jemalloc/doc/jemalloc.xml.in index d9c83452d..8fc774b18 100644 --- a/deps/jemalloc/doc/jemalloc.xml.in +++ b/deps/jemalloc/doc/jemalloc.xml.in @@ -52,7 +52,7 @@ LIBRARY This manual describes jemalloc @jemalloc_version@. More information can be found at the jemalloc website. + url="http://www.canonware.com/jemalloc/">jemalloc website. SYNOPSIS @@ -180,20 +180,20 @@ Standard API - The malloc() function allocates + The malloc function allocates size bytes of uninitialized memory. The allocated space is suitably aligned (after possible pointer coercion) for storage of any type of object. - The calloc() function allocates + The calloc function allocates space for number objects, each size bytes in length. The result is identical to - calling malloc() with an argument of + calling malloc with an argument of number * size, with the exception that the allocated memory is explicitly initialized to zero bytes. - The posix_memalign() function + The posix_memalign function allocates size bytes of memory such that the allocation's base address is a multiple of alignment, and returns the allocation in the value @@ -201,7 +201,7 @@ alignment must be a power of 2 at least as large as sizeof(void *). - The aligned_alloc() function + The aligned_alloc function allocates size bytes of memory such that the allocation's base address is a multiple of alignment. The requested @@ -209,7 +209,7 @@ undefined if size is not an integral multiple of alignment. - The realloc() function changes the + The realloc function changes the size of the previously allocated memory referenced by ptr to size bytes. The contents of the memory are unchanged up to the lesser of the new and old @@ -217,26 +217,26 @@ portion of the memory are undefined. Upon success, the memory referenced by ptr is freed and a pointer to the newly allocated memory is returned. Note that - realloc() may move the memory allocation, + realloc may move the memory allocation, resulting in a different return value than ptr. If ptr is NULL, the - realloc() function behaves identically to - malloc() for the specified size. + realloc function behaves identically to + malloc for the specified size. - The free() function causes the + The free function causes the allocated memory referenced by ptr to be made available for future allocations. If ptr is NULL, no action occurs. Non-standard API - The mallocx(), - rallocx(), - xallocx(), - sallocx(), - dallocx(), - sdallocx(), and - nallocx() functions all have a + The mallocx, + rallocx, + xallocx, + sallocx, + dallocx, + sdallocx, and + nallocx functions all have a flags argument that can be used to specify options. The functions only check the options that are contextually relevant. Use bitwise or (|) operations to @@ -307,19 +307,21 @@ - The mallocx() function allocates at + The mallocx function allocates at least size bytes of memory, and returns a pointer to the base address of the allocation. Behavior is undefined if - size is 0. + size is 0, or if request size + overflows due to size class and/or alignment constraints. - The rallocx() function resizes the + The rallocx function resizes the allocation at ptr to be at least size bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location. Behavior is undefined if - size is 0. + size is 0, or if request size + overflows due to size class and/or alignment constraints. - The xallocx() function resizes the + The xallocx function resizes the allocation at ptr in place to be at least size bytes, and returns the real size of the allocation. If extra is non-zero, an attempt is @@ -332,32 +334,32 @@ language="C">(size + extra > SIZE_T_MAX)
. - The sallocx() function returns the + The sallocx function returns the real size of the allocation at ptr. - The dallocx() function causes the + The dallocx function causes the memory referenced by ptr to be made available for future allocations. - The sdallocx() function is an - extension of dallocx() with a + The sdallocx function is an + extension of dallocx with a size parameter to allow the caller to pass in the allocation size as an optimization. The minimum valid input size is the original requested size of the allocation, and the maximum valid input size is the corresponding value returned by - nallocx() or - sallocx(). + nallocx or + sallocx. - The nallocx() function allocates no + The nallocx function allocates no memory, but it performs the same size computation as the - mallocx() function, and returns the real + mallocx function, and returns the real size of the allocation that would result from the equivalent - mallocx() function call, or - 0 if the inputs exceed the maximum supported size - class and/or alignment. Behavior is undefined if - size is 0. + mallocx function call. Behavior is + undefined if size is 0, or if + request size overflows due to size class and/or alignment + constraints. - The mallctl() function provides a + The mallctl function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions. The period-separated name argument specifies a @@ -372,12 +374,12 @@ newlen; otherwise pass NULL and 0. - The mallctlnametomib() function + The mallctlnametomib function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name - to a Management Information Base (MIB) that can be passed - repeatedly to mallctlbymib(). Upon - successful return from mallctlnametomib(), + to a “Management Information Base” (MIB) that can be passed + repeatedly to mallctlbymib. Upon + successful return from mallctlnametomib, mibp contains an array of *miblenp integers, where *miblenp is the lesser of the number of components @@ -406,44 +408,43 @@ for (i = 0; i < nbins; i++) { mib[2] = i; len = sizeof(bin_size); - mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0); + mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0); /* Do something with bin_size... */ }]]> - The malloc_stats_print() function writes - summary statistics via the write_cb callback - function pointer and cbopaque data passed to - write_cb, or malloc_message() - if write_cb is NULL. The - statistics are presented in human-readable form unless J is - specified as a character within the opts string, in - which case the statistics are presented in JSON format. This function can be - called repeatedly. General information that never changes during - execution can be omitted by specifying g as a character + The malloc_stats_print function + writes human-readable summary statistics via the + write_cb callback function pointer and + cbopaque data passed to + write_cb, or + malloc_message if + write_cb is NULL. This + function can be called repeatedly. General information that never + changes during execution can be omitted by specifying "g" as a character within the opts string. Note that - malloc_message() uses the - mallctl*() functions internally, so inconsistent - statistics can be reported if multiple threads use these functions - simultaneously. If is specified during - configuration, m and a can be specified to - omit merged arena and per arena statistics, respectively; - b, l, and h can be specified - to omit per size class statistics for bins, large objects, and huge - objects, respectively. Unrecognized characters are silently ignored. - Note that thread caching may prevent some statistics from being completely - up to date, since extra locking would be required to merge counters that - track thread cache operations. + malloc_message uses the + mallctl* functions internally, so + inconsistent statistics can be reported if multiple threads use these + functions simultaneously. If is + specified during configuration, “m” and “a” can + be specified to omit merged arena and per arena statistics, respectively; + “b”, “l”, and “h” can be specified to + omit per size class statistics for bins, large objects, and huge objects, + respectively. Unrecognized characters are silently ignored. Note that + thread caching may prevent some statistics from being completely up to + date, since extra locking would be required to merge counters that track + thread cache operations. + - The malloc_usable_size() function + The malloc_usable_size function returns the usable size of the allocation pointed to by ptr. The return value may be larger than the size that was requested during allocation. The - malloc_usable_size() function is not a - mechanism for in-place realloc(); rather + malloc_usable_size function is not a + mechanism for in-place realloc; rather it is provided solely as a tool for introspection purposes. Any discrepancy between the requested allocation size and the size reported - by malloc_usable_size() should not be + by malloc_usable_size should not be depended on, since such behavior is entirely implementation-dependent. @@ -454,20 +455,19 @@ for (i = 0; i < nbins; i++) { routines, the allocator initializes its internals based in part on various options that can be specified at compile- or run-time. - The string specified via , the - string pointed to by the global variable malloc_conf, the - name of the file referenced by the symbolic link named - /etc/malloc.conf, and the value of the + The string pointed to by the global variable + malloc_conf, the “name” of the file + referenced by the symbolic link named /etc/malloc.conf, and the value of the environment variable MALLOC_CONF, will be interpreted, in that order, from left to right as options. Note that malloc_conf may be read before - main() is entered, so the declaration of + main is entered, so the declaration of malloc_conf should specify an initializer that contains - the final value to be read by jemalloc. - and malloc_conf are compile-time mechanisms, whereas - /etc/malloc.conf and - MALLOC_CONF can be safely set any time prior to program - invocation. + the final value to be read by jemalloc. malloc_conf is + a compile-time setting, whereas /etc/malloc.conf and MALLOC_CONF + can be safely set any time prior to program invocation. An options string is a comma-separated list of option:value pairs. There is one key corresponding to each - Memory is conceptually broken into equal-sized chunks, where the chunk - size is a power of two that is greater than the page size. Chunks are - always aligned to multiples of the chunk size. This alignment makes it - possible to find metadata for user objects very quickly. User objects are - broken into three categories according to size: small, large, and huge. - Multiple small and large objects can reside within a single chunk, whereas - huge objects each have one or more chunks backing them. Each chunk that - contains small and/or large objects tracks its contents as runs of + Memory is conceptually broken into equal-sized chunks, where the + chunk size is a power of two that is greater than the page size. Chunks + are always aligned to multiples of the chunk size. This alignment makes it + possible to find metadata for user objects very quickly. + + User objects are broken into three categories according to size: + small, large, and huge. Small and large objects are managed entirely by + arenas; huge objects are additionally aggregated in a single data structure + that is shared by all threads. Huge objects are typically used by + applications infrequently enough that this single data structure is not a + scalability issue. + + Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one - large object). The combination of chunk alignment and chunk page maps makes - it possible to determine all metadata regarding small and large allocations - in constant time. + large object). The combination of chunk alignment and chunk page maps + makes it possible to determine all metadata regarding small and large + allocations in constant time. Small objects are managed in groups by page runs. Each run maintains a bitmap to track which regions are in use. Allocation requests that are no @@ -541,8 +546,8 @@ for (i = 0; i < nbins; i++) { are smaller than four times the page size, large size classes are smaller than the chunk size (see the opt.lg_chunk option), and - huge size classes extend from the chunk size up to the largest size class - that does not exceed PTRDIFF_MAX. + huge size classes extend from the chunk size up to one size class less than + the full address space size. Allocations are packed tightly together, which can be an issue for multi-threaded applications. If you need to assure that allocations do not @@ -550,14 +555,14 @@ for (i = 0; i < nbins; i++) { nearest multiple of the cacheline size, or specify cacheline alignment when allocating. - The realloc(), - rallocx(), and - xallocx() functions may resize allocations + The realloc, + rallocx, and + xallocx functions may resize allocations without moving them under limited circumstances. Unlike the - *allocx() API, the standard API does not + *allocx API, the standard API does not officially round up the usable size of an allocation to the nearest size class, so technically it is necessary to call - realloc() to grow e.g. a 9-byte allocation to + realloc to grow e.g. a 9-byte allocation to 16 bytes, or shrink a 16-byte allocation to 9 bytes. Growth and shrinkage trivially succeeds in place as long as the pre-size and post-size both round up to the same size class. No other API guarantees are made regarding @@ -660,7 +665,7 @@ for (i = 0; i < nbins; i++) { [1280 KiB, 1536 KiB, 1792 KiB] - Huge + Huge 256 KiB [2 MiB] @@ -688,14 +693,6 @@ for (i = 0; i < nbins; i++) { ... ... - - 512 PiB - [2560 PiB, 3 EiB, 3584 PiB, 4 EiB] - - - 1 EiB - [5 EiB, 6 EiB, 7 EiB] - @@ -703,7 +700,7 @@ for (i = 0; i < nbins; i++) { MALLCTL NAMESPACE The following names are defined in the namespace accessible via the - mallctl*() functions. Value types are + mallctl* functions. Value types are specified in parentheses, their readable/writable statuses are encoded as rw, r-, -w, or --, and required build configuration flags follow, if @@ -734,7 +731,7 @@ for (i = 0; i < nbins; i++) { rw If a value is passed in, refresh the data from which - the mallctl*() functions report values, + the mallctl* functions report values, and increment the epoch. Return the current epoch. This is useful for detecting whether another thread caused a refresh. @@ -779,17 +776,6 @@ for (i = 0; i < nbins; i++) { during build configuration. - - - config.malloc_conf - (const char *) - r- - - Embedded configure-time-specified run-time options - string, empty unless was specified - during build configuration. - - config.munmap @@ -918,12 +904,12 @@ for (i = 0; i < nbins; i++) { settings are supported if sbrk 2 is supported by the operating - system: disabled, primary, and - secondary; otherwise only disabled is - supported. The default is secondary if + system: “disabled”, “primary”, and + “secondary”; otherwise only “disabled” is + supported. The default is “secondary” if sbrk 2 is supported by the operating - system; disabled otherwise. + system; “disabled” otherwise. @@ -943,7 +929,7 @@ for (i = 0; i < nbins; i++) { opt.narenas - (unsigned) + (size_t) r- Maximum number of arenas to use for automatic @@ -951,20 +937,6 @@ for (i = 0; i < nbins; i++) { number of CPUs, or one if there is a single CPU. - - - opt.purge - (const char *) - r- - - Purge mode is “ratio” (default) or - “decay”. See opt.lg_dirty_mult - for details of the ratio mode. See opt.decay_time for - details of the decay mode. - - opt.lg_dirty_mult @@ -987,26 +959,6 @@ for (i = 0; i < nbins; i++) { for related dynamic control options. - - - opt.decay_time - (ssize_t) - r- - - Approximate time in seconds from the creation of a set - of unused dirty pages until an equivalent set of unused dirty pages is - purged and/or reused. The pages are incrementally purged according to a - sigmoidal decay curve that starts and ends with zero purge rate. A - decay time of 0 causes all unused dirty pages to be purged immediately - upon creation. A decay time of -1 disables purging. The default decay - time is 10 seconds. See arenas.decay_time - and arena.<i>.decay_time - for related dynamic control options. - - - opt.stats_print @@ -1014,19 +966,19 @@ for (i = 0; i < nbins; i++) { r- Enable/disable statistics printing at exit. If - enabled, the malloc_stats_print() + enabled, the malloc_stats_print function is called at program exit via an atexit 3 function. If is specified during configuration, this has the potential to cause deadlock for a multi-threaded process that exits while one or more threads are executing in the memory allocation - functions. Furthermore, atexit() may + functions. Furthermore, atexit may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls - atexit(), so this option is not - universally usable (though the application can register its own - atexit() function with equivalent + atexit, so this option is not + univerally usable (though the application can register its own + atexit function with equivalent functionality). Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development. This option is disabled by default. @@ -1039,16 +991,15 @@ for (i = 0; i < nbins; i++) { r- [] - Junk filling. If set to alloc, each byte - of uninitialized allocated memory will be initialized to - 0xa5. If set to free, all deallocated - memory will be initialized to 0x5a. If set to - true, both allocated and deallocated memory will be - initialized, and if set to false, junk filling be - disabled entirely. This is intended for debugging and will impact - performance negatively. This option is false by default - unless is specified during - configuration, in which case it is true by default unless + Junk filling. If set to "alloc", each byte of + uninitialized allocated memory will be initialized to + 0xa5. If set to "free", all deallocated memory will + be initialized to 0x5a. If set to "true", both + allocated and deallocated memory will be initialized, and if set to + "false", junk filling be disabled entirely. This is intended for + debugging and will impact performance negatively. This option is + "false" by default unless is specified + during configuration, in which case it is "true" by default unless running inside Valgrind. @@ -1103,8 +1054,8 @@ for (i = 0; i < nbins; i++) { Zero filling enabled/disabled. If enabled, each byte of uninitialized allocated memory will be initialized to 0. Note that this initialization only happens once for each byte, so - realloc() and - rallocx() calls do not zero memory that + realloc and + rallocx calls do not zero memory that was previously allocated. This is intended for debugging and will impact performance negatively. This option is disabled by default. @@ -1199,8 +1150,7 @@ malloc_conf = "xmalloc:true";]]> the jeprof command, which is based on the pprof that is developed as part of the gperftools - package. See HEAP PROFILE - FORMAT for heap profile format documentation. + package. @@ -1327,11 +1277,11 @@ malloc_conf = "xmalloc:true";]]> <prefix>.<pid>.<seq>.f.heap, where <prefix> is controlled by the opt.prof_prefix - option. Note that atexit() may allocate + option. Note that atexit may allocate memory during application initialization and then deadlock internally - when jemalloc in turn calls atexit(), so - this option is not universally usable (though the application can - register its own atexit() function with + when jemalloc in turn calls atexit, so + this option is not univerally usable (though the application can + register its own atexit function with equivalent functionality). This option is disabled by default. @@ -1390,7 +1340,7 @@ malloc_conf = "xmalloc:true";]]> thread.allocated mallctl. This is useful for avoiding the overhead of repeated - mallctl*() calls. + mallctl* calls. @@ -1417,7 +1367,7 @@ malloc_conf = "xmalloc:true";]]> thread.deallocated mallctl. This is useful for avoiding the overhead of repeated - mallctl*() calls. + mallctl* calls. @@ -1468,8 +1418,8 @@ malloc_conf = "xmalloc:true";]]> can cause asynchronous string deallocation. Furthermore, each invocation of this interface can only read or write; simultaneous read/write is not supported due to string lifetime limitations. The - name string must be nil-terminated and comprised only of characters in - the sets recognized + name string must nil-terminated and comprised only of characters in the + sets recognized by isgraph 3 and isblank @@ -1517,7 +1467,7 @@ malloc_conf = "xmalloc:true";]]> Flush the specified thread-specific cache (tcache). The same considerations apply to this interface as to thread.tcache.flush, - except that the tcache will never be automatically discarded. + except that the tcache will never be automatically be discarded. @@ -1539,44 +1489,12 @@ malloc_conf = "xmalloc:true";]]> (void) -- - Purge all unused dirty pages for arena <i>, or for + Purge unused dirty pages for arena <i>, or for all arenas if <i> equals arenas.narenas. - - - arena.<i>.decay - (void) - -- - - Trigger decay-based purging of unused dirty pages for - arena <i>, or for all arenas if <i> equals arenas.narenas. - The proportion of unused dirty pages to be purged depends on the current - time; see opt.decay_time for - details. - - - - - arena.<i>.reset - (void) - -- - - Discard all of the arena's extant allocations. This - interface can only be used with arenas created via arenas.extend. None - of the arena's discarded/cached allocations may accessed afterward. As - part of this requirement, all thread caches which were used to - allocate/deallocate in conjunction with the arena must be flushed - beforehand. This interface cannot be used if running inside Valgrind, - nor if the quarantine size is - non-zero. - - arena.<i>.dss @@ -1605,22 +1523,6 @@ malloc_conf = "xmalloc:true";]]> for additional information. - - - arena.<i>.decay_time - (ssize_t) - rw - - Current per-arena approximate time in seconds from the - creation of a set of unused dirty pages until an equivalent set of - unused dirty pages is purged and/or reused. Each time this interface is - set, all currently unused dirty pages are considered to have fully - decayed, which causes immediate purging of all unused dirty pages unless - the decay time is set to -1 (i.e. purging disabled). See opt.decay_time for - additional information. - - arena.<i>.chunk_hooks @@ -1855,21 +1757,6 @@ typedef struct { for additional information. - - - arenas.decay_time - (ssize_t) - rw - - Current default per-arena approximate time in seconds - from the creation of a set of unused dirty pages until an equivalent set - of unused dirty pages is purged and/or reused, used to initialize arena.<i>.decay_time - during arena creation. See opt.decay_time for - additional information. - - arenas.quantum @@ -2089,7 +1976,7 @@ typedef struct { [] Average number of bytes allocated between - interval-based profile dumps. See the + inverval-based profile dumps. See the opt.lg_prof_interval option for additional information. @@ -2188,25 +2075,6 @@ typedef struct { linkend="stats.resident">stats.resident. - - - stats.retained - (size_t) - r- - [] - - Total number of bytes in virtual memory mappings that - were retained rather than being returned to the operating system via - e.g. munmap - 2. Retained virtual memory is - typically untouched, decommitted, or purged, so it has no strongly - associated physical memory (see chunk hooks for details). Retained - memory is excluded from mapped memory statistics, e.g. stats.mapped. - - - stats.arenas.<i>.dss @@ -2233,19 +2101,6 @@ typedef struct { for details. - - - stats.arenas.<i>.decay_time - (ssize_t) - r- - - Approximate time in seconds from the creation of a set - of unused dirty pages until an equivalent set of unused dirty pages is - purged and/or reused. See opt.decay_time - for details. - - stats.arenas.<i>.nthreads @@ -2287,18 +2142,6 @@ typedef struct { Number of mapped bytes. - - - stats.arenas.<i>.retained - (size_t) - r- - [] - - Number of retained bytes. See stats.retained for - details. - - stats.arenas.<i>.metadata.mapped @@ -2680,53 +2523,6 @@ typedef struct { - - HEAP PROFILE FORMAT - Although the heap profiling functionality was originally designed to - be compatible with the - pprof command that is developed as part of the gperftools - package, the addition of per thread heap profiling functionality - required a different heap profile format. The jeprof - command is derived from pprof, with enhancements to - support the heap profile format described here. - - In the following hypothetical heap profile, [...] - indicates elision for the sake of compactness. The following matches the above heap profile, but most -tokens are replaced with <description> to indicate -descriptions of the corresponding fields. / - : : [: ] - [...] - : : [: ] - [...] - : : [: ] - [...] -@ [...] [...] - : : [: ] - : : [: ] - : : [: ] -[...] - -MAPPED_LIBRARIES: -/maps>]]> - - DEBUGGING MALLOC PROBLEMS When debugging, it is a good idea to configure/build jemalloc with @@ -2736,7 +2532,7 @@ MAPPED_LIBRARIES: of run-time assertions that catch application errors such as double-free, write-after-free, etc. - Programs often accidentally depend on uninitialized + Programs often accidentally depend on “uninitialized” memory actually being filled with zero bytes. Junk filling (see the opt.junk option) tends to expose such bugs in the form of obviously incorrect @@ -2765,29 +2561,29 @@ MAPPED_LIBRARIES: to override the function which emits the text strings forming the errors and warnings if for some reason the STDERR_FILENO file descriptor is not suitable for this. - malloc_message() takes the + malloc_message takes the cbopaque pointer argument that is NULL unless overridden by the arguments in a call to - malloc_stats_print(), followed by a string + malloc_stats_print, followed by a string pointer. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock. All messages are prefixed by - <jemalloc>: . + “<jemalloc>: ”. RETURN VALUES Standard API - The malloc() and - calloc() functions return a pointer to the + The malloc and + calloc functions return a pointer to the allocated memory if successful; otherwise a NULL pointer is returned and errno is set to ENOMEM. - The posix_memalign() function + The posix_memalign function returns the value 0 if successful; otherwise it returns an error value. - The posix_memalign() function will fail + The posix_memalign function will fail if: @@ -2806,11 +2602,11 @@ MAPPED_LIBRARIES: - The aligned_alloc() function returns + The aligned_alloc function returns a pointer to the allocated memory if successful; otherwise a NULL pointer is returned and errno is set. The - aligned_alloc() function will fail if: + aligned_alloc function will fail if: EINVAL @@ -2827,44 +2623,44 @@ MAPPED_LIBRARIES: - The realloc() function returns a + The realloc function returns a pointer, possibly identical to ptr, to the allocated memory if successful; otherwise a NULL pointer is returned, and errno is set to ENOMEM if the error was the result of an - allocation failure. The realloc() + allocation failure. The realloc function always leaves the original buffer intact when an error occurs. - The free() function returns no + The free function returns no value. Non-standard API - The mallocx() and - rallocx() functions return a pointer to + The mallocx and + rallocx functions return a pointer to the allocated memory if successful; otherwise a NULL pointer is returned to indicate insufficient contiguous memory was available to service the allocation request. - The xallocx() function returns the + The xallocx function returns the real size of the resulting resized allocation pointed to by ptr, which is a value less than size if the allocation could not be adequately grown in place. - The sallocx() function returns the + The sallocx function returns the real size of the allocation pointed to by ptr. - The nallocx() returns the real size + The nallocx returns the real size that would result from a successful equivalent - mallocx() function call, or zero if + mallocx function call, or zero if insufficient memory is available to perform the size computation. - The mallctl(), - mallctlnametomib(), and - mallctlbymib() functions return 0 on + The mallctl, + mallctlnametomib, and + mallctlbymib functions return 0 on success; otherwise they return an error value. The functions will fail if: @@ -2900,13 +2696,13 @@ MAPPED_LIBRARIES: EFAULT An interface with side effects failed in some way - not directly related to mallctl*() + not directly related to mallctl* read/write processing. - The malloc_usable_size() function + The malloc_usable_size function returns the usable size of the allocation pointed to by ptr. @@ -2954,13 +2750,13 @@ malloc_conf = "lg_chunk:24";]]> STANDARDS - The malloc(), - calloc(), - realloc(), and - free() functions conform to ISO/IEC - 9899:1990 (ISO C90). + The malloc, + calloc, + realloc, and + free functions conform to ISO/IEC + 9899:1990 (“ISO C90”). - The posix_memalign() function conforms - to IEEE Std 1003.1-2001 (POSIX.1). + The posix_memalign function conforms + to IEEE Std 1003.1-2001 (“POSIX.1”). diff --git a/deps/jemalloc/doc/stylesheet.xsl b/deps/jemalloc/doc/stylesheet.xsl index 619365d82..4e334a86f 100644 --- a/deps/jemalloc/doc/stylesheet.xsl +++ b/deps/jemalloc/doc/stylesheet.xsl @@ -1,10 +1,7 @@ ansi - - - - + - + "" diff --git a/deps/jemalloc/include/jemalloc/internal/arena.h b/deps/jemalloc/include/jemalloc/internal/arena.h index ce4e6029e..12c617979 100644 --- a/deps/jemalloc/include/jemalloc/internal/arena.h +++ b/deps/jemalloc/include/jemalloc/internal/arena.h @@ -23,29 +23,14 @@ */ #define LG_DIRTY_MULT_DEFAULT 3 -typedef enum { - purge_mode_ratio = 0, - purge_mode_decay = 1, - - purge_mode_limit = 2 -} purge_mode_t; -#define PURGE_DEFAULT purge_mode_ratio -/* Default decay time in seconds. */ -#define DECAY_TIME_DEFAULT 10 -/* Number of event ticks between time checks. */ -#define DECAY_NTICKS_PER_UPDATE 1000 - typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; -typedef struct arena_avail_links_s arena_avail_links_t; typedef struct arena_run_s arena_run_t; typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_bin_info_s arena_bin_info_t; -typedef struct arena_decay_s arena_decay_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; -typedef struct arena_tdata_s arena_tdata_t; #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -155,13 +140,13 @@ struct arena_runs_dirty_link_s { */ struct arena_chunk_map_misc_s { /* - * Linkage for run heaps. There are two disjoint uses: + * Linkage for run trees. There are two disjoint uses: * - * 1) arena_t's runs_avail heaps. + * 1) arena_t's runs_avail tree. * 2) arena_run_t conceptually uses this linkage for in-use non-full * runs, rather than directly embedding linkage. */ - phn(arena_chunk_map_misc_t) ph_link; + rb_node(arena_chunk_map_misc_t) rb_link; union { /* Linkage for list of dirty runs. */ @@ -169,15 +154,16 @@ struct arena_chunk_map_misc_s { /* Profile counters, used for large object runs. */ union { - void *prof_tctx_pun; - prof_tctx_t *prof_tctx; + void *prof_tctx_pun; + prof_tctx_t *prof_tctx; }; /* Small region run metadata. */ arena_run_t run; }; }; -typedef ph(arena_chunk_map_misc_t) arena_run_heap_t; +typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; +typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; #endif /* JEMALLOC_ARENA_STRUCTS_A */ #ifdef JEMALLOC_ARENA_STRUCTS_B @@ -190,14 +176,6 @@ struct arena_chunk_s { */ extent_node_t node; - /* - * True if memory could be backed by transparent huge pages. This is - * only directly relevant to Linux, since it is the only supported - * platform on which jemalloc interacts with explicit transparent huge - * page controls. - */ - bool hugepage; - /* * Map of pages within chunk that keeps track of free/large/small. The * first map_bias entries are omitted, since the chunk header does not @@ -242,71 +220,28 @@ struct arena_chunk_s { */ struct arena_bin_info_s { /* Size of regions in a run for this bin's size class. */ - size_t reg_size; + size_t reg_size; /* Redzone size. */ - size_t redzone_size; + size_t redzone_size; /* Interval between regions (reg_size + (redzone_size << 1)). */ - size_t reg_interval; + size_t reg_interval; /* Total size of a run for this bin's size class. */ - size_t run_size; + size_t run_size; /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; + uint32_t nregs; /* * Metadata used to manipulate bitmaps for runs associated with this * bin. */ - bitmap_info_t bitmap_info; + bitmap_info_t bitmap_info; /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; -}; - -struct arena_decay_s { - /* - * Approximate time in seconds from the creation of a set of unused - * dirty pages until an equivalent set of unused dirty pages is purged - * and/or reused. - */ - ssize_t time; - /* time / SMOOTHSTEP_NSTEPS. */ - nstime_t interval; - /* - * Time at which the current decay interval logically started. We do - * not actually advance to a new epoch until sometime after it starts - * because of scheduling and computation delays, and it is even possible - * to completely skip epochs. In all cases, during epoch advancement we - * merge all relevant activity into the most recently recorded epoch. - */ - nstime_t epoch; - /* Deadline randomness generator. */ - uint64_t jitter_state; - /* - * Deadline for current epoch. This is the sum of interval and per - * epoch jitter which is a uniform random variable in [0..interval). - * Epochs always advance by precise multiples of interval, but we - * randomize the deadline to reduce the likelihood of arenas purging in - * lockstep. - */ - nstime_t deadline; - /* - * Number of dirty pages at beginning of current epoch. During epoch - * advancement we use the delta between arena->decay.ndirty and - * arena->ndirty to determine how many dirty pages, if any, were - * generated. - */ - size_t ndirty; - /* - * Trailing log of how many unused dirty pages were generated during - * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last - * element is the most recent epoch. Corresponding epoch times are - * relative to epoch. - */ - size_t backlog[SMOOTHSTEP_NSTEPS]; + uint32_t reg0_offset; }; struct arena_bin_s { @@ -316,25 +251,25 @@ struct arena_bin_s { * which may be acquired while holding one or more bin locks, but not * vise versa. */ - malloc_mutex_t lock; + malloc_mutex_t lock; /* * Current run being used to service allocations of this bin's size * class. */ - arena_run_t *runcur; + arena_run_t *runcur; /* - * Heap of non-full runs. This heap is used when looking for an + * Tree of non-full runs. This tree is used when looking for an * existing run when runcur is no longer usable. We choose the * non-full run that is lowest in memory; this policy tends to keep * objects packed well, and it can also help reduce the number of * almost-empty chunks. */ - arena_run_heap_t runs; + arena_run_tree_t runs; /* Bin statistics. */ - malloc_bin_stats_t stats; + malloc_bin_stats_t stats; }; struct arena_s { @@ -342,23 +277,15 @@ struct arena_s { unsigned ind; /* - * Number of threads currently assigned to this arena, synchronized via - * atomic operations. Each thread has two distinct assignments, one for - * application-serving allocation, and the other for internal metadata - * allocation. Internal metadata must not be allocated from arenas - * created via the arenas.extend mallctl, because the arena..reset - * mallctl indiscriminately discards all allocations for the affected - * arena. - * - * 0: Application allocation. - * 1: Internal metadata allocation. + * Number of threads currently assigned to this arena. This field is + * protected by arenas_lock. */ - unsigned nthreads[2]; + unsigned nthreads; /* * There are three classes of arena operations from a locking * perspective: - * 1) Thread assignment (modifies nthreads) is synchronized via atomics. + * 1) Thread assignment (modifies nthreads) is protected by arenas_lock. * 2) Bin-related operations are protected by bin locks. * 3) Chunk- and run-related operations are protected by this mutex. */ @@ -378,16 +305,10 @@ struct arena_s { * PRNG state for cache index randomization of large allocation base * pointers. */ - size_t offset_state; + uint64_t offset_state; dss_prec_t dss_prec; - /* Extant arena chunks. */ - ql_head(extent_node_t) achunks; - - /* Extent serial number generator state. */ - size_t extent_sn_next; - /* * In order to avoid rapid chunk allocation/deallocation when an arena * oscillates right on the cusp of needing a new chunk, cache the most @@ -403,7 +324,7 @@ struct arena_s { /* Minimum ratio (log base 2) of nactive:ndirty. */ ssize_t lg_dirty_mult; - /* True if a thread is currently executing arena_purge_to_limit(). */ + /* True if a thread is currently executing arena_purge(). */ bool purging; /* Number of pages in active runs and huge regions. */ @@ -417,6 +338,12 @@ struct arena_s { */ size_t ndirty; + /* + * Size/address-ordered tree of this arena's available runs. The tree + * is used for first-best-fit run allocation. + */ + arena_avail_tree_t runs_avail; + /* * Unused dirty memory this arena manages. Dirty memory is conceptually * tracked as an arbitrarily interleaved LRU of dirty runs and cached @@ -448,9 +375,6 @@ struct arena_s { arena_runs_dirty_link_t runs_dirty; extent_node_t chunks_cache; - /* Decay-based purging state. */ - arena_decay_t decay; - /* Extant huge allocations. */ ql_head(extent_node_t) huge; /* Synchronizes all huge allocation/update/deallocation. */ @@ -463,9 +387,9 @@ struct arena_s { * orderings are needed, which is why there are two trees with the same * contents. */ - extent_tree_t chunks_szsnad_cached; + extent_tree_t chunks_szad_cached; extent_tree_t chunks_ad_cached; - extent_tree_t chunks_szsnad_retained; + extent_tree_t chunks_szad_retained; extent_tree_t chunks_ad_retained; malloc_mutex_t chunks_mtx; @@ -478,19 +402,6 @@ struct arena_s { /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; - - /* - * Size-segregated address-ordered heaps of this arena's available runs, - * used for first-best-fit run allocation. Runs are quantized, i.e. - * they reside in the last heap which corresponds to a size class less - * than or equal to the run size. - */ - arena_run_heap_t runs_avail[NPSIZES]; -}; - -/* Used in conjunction with tsd for fast arena-related context lookup. */ -struct arena_tdata_s { - ticker_t decay_ticker; }; #endif /* JEMALLOC_ARENA_STRUCTS_B */ @@ -506,10 +417,7 @@ static const size_t large_pad = #endif ; -extern purge_mode_t opt_purge; -extern const char *purge_mode_names[]; extern ssize_t opt_lg_dirty_mult; -extern ssize_t opt_decay_time; extern arena_bin_info_t arena_bin_info[NBINS]; @@ -520,37 +428,27 @@ extern size_t large_maxclass; /* Max large size class. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ -#ifdef JEMALLOC_JET -typedef size_t (run_quantize_t)(size_t); -extern run_quantize_t *run_quantize_floor; -extern run_quantize_t *run_quantize_ceil; -#endif void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, bool cache); void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, bool cache); -extent_node_t *arena_node_alloc(tsdn_t *tsdn, arena_t *arena); -void arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node); -void *arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, size_t *sn, bool *zero); -void arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, - size_t usize, size_t sn); -void arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, - void *chunk, size_t oldsize, size_t usize); -void arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, - void *chunk, size_t oldsize, size_t usize, size_t sn); -bool arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, - void *chunk, size_t oldsize, size_t usize, bool *zero); -ssize_t arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena); -bool arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, - ssize_t lg_dirty_mult); -ssize_t arena_decay_time_get(tsdn_t *tsdn, arena_t *arena); -bool arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time); -void arena_purge(tsdn_t *tsdn, arena_t *arena, bool all); -void arena_maybe_purge(tsdn_t *tsdn, arena_t *arena); -void arena_reset(tsd_t *tsd, arena_t *arena); -void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, - tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); +extent_node_t *arena_node_alloc(arena_t *arena); +void arena_node_dalloc(arena_t *arena, extent_node_t *node); +void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, + bool *zero); +void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize); +void arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, + size_t oldsize, size_t usize); +void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, + size_t oldsize, size_t usize); +bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, + size_t oldsize, size_t usize, bool *zero); +ssize_t arena_lg_dirty_mult_get(arena_t *arena); +bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult); +void arena_maybe_purge(arena_t *arena); +void arena_purge_all(arena_t *arena); +void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, + szind_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); #ifdef JEMALLOC_JET @@ -563,100 +461,75 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); #endif void arena_quarantine_junk_small(void *ptr, size_t usize); -void *arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind, - bool zero); -void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, - szind_t ind, bool zero); -void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, +void *arena_malloc_small(arena_t *arena, size_t size, bool zero); +void *arena_malloc_large(arena_t *arena, size_t size, bool zero); +void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); -void arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size); -void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, - arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm); -void arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm); -void arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t pageind); +void arena_prof_promoted(const void *ptr, size_t size); +void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr, arena_chunk_map_bits_t *bitselm); +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind, arena_chunk_map_bits_t *bitselm); +void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind); #ifdef JEMALLOC_JET typedef void (arena_dalloc_junk_large_t)(void *, size_t); extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; #else void arena_dalloc_junk_large(void *ptr, size_t usize); #endif -void arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena, - arena_chunk_t *chunk, void *ptr); -void arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, +void arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr); +void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); #ifdef JEMALLOC_JET typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; #endif -bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, - size_t size, size_t extra, bool zero); +bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache); -dss_prec_t arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena); -bool arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec); +dss_prec_t arena_dss_prec_get(arena_t *arena); +bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); ssize_t arena_lg_dirty_mult_default_get(void); bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); -ssize_t arena_decay_time_default_get(void); -bool arena_decay_time_default_set(ssize_t decay_time); -void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, - unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult, - ssize_t *decay_time, size_t *nactive, size_t *ndirty); -void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, - const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, - size_t *nactive, size_t *ndirty, arena_stats_t *astats, - malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, - malloc_huge_stats_t *hstats); -unsigned arena_nthreads_get(arena_t *arena, bool internal); -void arena_nthreads_inc(arena_t *arena, bool internal); -void arena_nthreads_dec(arena_t *arena, bool internal); -size_t arena_extent_sn_next(arena_t *arena); -arena_t *arena_new(tsdn_t *tsdn, unsigned ind); -void arena_boot(void); -void arena_prefork0(tsdn_t *tsdn, arena_t *arena); -void arena_prefork1(tsdn_t *tsdn, arena_t *arena); -void arena_prefork2(tsdn_t *tsdn, arena_t *arena); -void arena_prefork3(tsdn_t *tsdn, arena_t *arena); -void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); -void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); +void arena_stats_merge(arena_t *arena, const char **dss, + ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); +arena_t *arena_new(unsigned ind); +bool arena_boot(void); +void arena_prefork(arena_t *arena); +void arena_postfork_parent(arena_t *arena); +void arena_postfork_child(arena_t *arena); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -arena_chunk_map_bits_t *arena_bitselm_get_mutable(arena_chunk_t *chunk, +arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, size_t pageind); -const arena_chunk_map_bits_t *arena_bitselm_get_const( - const arena_chunk_t *chunk, size_t pageind); -arena_chunk_map_misc_t *arena_miscelm_get_mutable(arena_chunk_t *chunk, +arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, size_t pageind); -const arena_chunk_map_misc_t *arena_miscelm_get_const( - const arena_chunk_t *chunk, size_t pageind); -size_t arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm); -void *arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm); +size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm); +void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm); arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); -size_t *arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind); -const size_t *arena_mapbitsp_get_const(const arena_chunk_t *chunk, - size_t pageind); -size_t arena_mapbitsp_read(const size_t *mapbitsp); -size_t arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind); +size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbitsp_read(size_t *mapbitsp); +size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_size_decode(size_t mapbits); -size_t arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, +size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_large_size_get(const arena_chunk_t *chunk, - size_t pageind); -size_t arena_mapbits_small_runind_get(const arena_chunk_t *chunk, - size_t pageind); -szind_t arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_decommitted_get(const arena_chunk_t *chunk, - size_t pageind); -size_t arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); +szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits); size_t arena_mapbits_size_encode(size_t size); void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, @@ -676,31 +549,27 @@ void arena_metadata_allocated_sub(arena_t *arena, size_t size); size_t arena_metadata_allocated_get(arena_t *arena); bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); -size_t arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, +unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); -prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr); -void arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, - prof_tctx_t *tctx); -void arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, +prof_tctx_t *arena_prof_tctx_get(const void *ptr); +void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); +void arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, prof_tctx_t *old_tctx); -void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks); -void arena_decay_tick(tsdn_t *tsdn, arena_t *arena); -void *arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, - bool zero, tcache_t *tcache, bool slow_path); +void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache); arena_t *arena_aalloc(const void *ptr); -size_t arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote); -void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); -void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, - bool slow_path); +size_t arena_salloc(const void *ptr, bool demote); +void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * -arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind) +arena_bitselm_get(arena_chunk_t *chunk, size_t pageind) { assert(pageind >= map_bias); @@ -709,15 +578,8 @@ arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind) return (&chunk->map_bits[pageind-map_bias]); } -JEMALLOC_ALWAYS_INLINE const arena_chunk_map_bits_t * -arena_bitselm_get_const(const arena_chunk_t *chunk, size_t pageind) -{ - - return (arena_bitselm_get_mutable((arena_chunk_t *)chunk, pageind)); -} - JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * -arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind) +arena_miscelm_get(arena_chunk_t *chunk, size_t pageind) { assert(pageind >= map_bias); @@ -727,15 +589,8 @@ arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind) (uintptr_t)map_misc_offset) + pageind-map_bias); } -JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t * -arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind) -{ - - return (arena_miscelm_get_mutable((arena_chunk_t *)chunk, pageind)); -} - JEMALLOC_ALWAYS_INLINE size_t -arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm) +arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk + @@ -748,7 +603,7 @@ arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm) } JEMALLOC_ALWAYS_INLINE void * -arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm) +arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); size_t pageind = arena_miscelm_to_pageind(miscelm); @@ -781,31 +636,24 @@ arena_run_to_miscelm(arena_run_t *run) } JEMALLOC_ALWAYS_INLINE size_t * -arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind) +arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) { - return (&arena_bitselm_get_mutable(chunk, pageind)->bits); -} - -JEMALLOC_ALWAYS_INLINE const size_t * -arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind) -{ - - return (arena_mapbitsp_get_mutable((arena_chunk_t *)chunk, pageind)); + return (&arena_bitselm_get(chunk, pageind)->bits); } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbitsp_read(const size_t *mapbitsp) +arena_mapbitsp_read(size_t *mapbitsp) { return (*mapbitsp); } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind) +arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) { - return (arena_mapbitsp_read(arena_mapbitsp_get_const(chunk, pageind))); + return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind))); } JEMALLOC_ALWAYS_INLINE size_t @@ -825,7 +673,7 @@ arena_mapbits_size_decode(size_t mapbits) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind) +arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -835,7 +683,7 @@ arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind) +arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -846,7 +694,7 @@ arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind) +arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -857,7 +705,7 @@ arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE szind_t -arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind) +arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; szind_t binind; @@ -869,7 +717,7 @@ arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind) +arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -880,7 +728,7 @@ arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind) +arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -891,7 +739,7 @@ arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind) +arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -902,7 +750,7 @@ arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind) +arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -911,7 +759,7 @@ arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind) +arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -947,7 +795,7 @@ JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); @@ -961,7 +809,7 @@ JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, size_t size) { - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); assert((size & PAGE_MASK) == 0); @@ -973,7 +821,7 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, JEMALLOC_ALWAYS_INLINE void arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags) { - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((flags & CHUNK_MAP_UNZEROED) == flags); arena_mapbitsp_write(mapbitsp, flags); @@ -983,7 +831,7 @@ JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); @@ -998,7 +846,7 @@ JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, szind_t binind) { - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); assert(binind <= BININD_INVALID); @@ -1012,7 +860,7 @@ JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, szind_t binind, size_t flags) { - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); assert(binind < BININD_INVALID); assert(pageind - runind >= map_bias); @@ -1069,7 +917,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) } JEMALLOC_INLINE bool -arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) +arena_prof_accum(arena_t *arena, uint64_t accumbytes) { cassert(config_prof); @@ -1080,9 +928,9 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { bool ret; - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); ret = arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); return (ret); } } @@ -1100,12 +948,12 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) size_t pageind; size_t actual_mapbits; size_t rpages_ind; - const arena_run_t *run; + arena_run_t *run; arena_bin_t *bin; szind_t run_binind, actual_binind; arena_bin_info_t *bin_info; - const arena_chunk_map_misc_t *miscelm; - const void *rpages; + arena_chunk_map_misc_t *miscelm; + void *rpages; assert(binind != BININD_INVALID); assert(binind < NBINS); @@ -1118,11 +966,11 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) assert(arena_mapbits_allocated_get(chunk, pageind) != 0); rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); - miscelm = arena_miscelm_get_const(chunk, rpages_ind); + miscelm = arena_miscelm_get(chunk, rpages_ind); run = &miscelm->run; run_binind = run->binind; bin = &arena->bins[run_binind]; - actual_binind = (szind_t)(bin - arena->bins); + actual_binind = bin - arena->bins; assert(run_binind == actual_binind); bin_info = &arena_bin_info[actual_binind]; rpages = arena_miscelm_to_rpages(miscelm); @@ -1139,15 +987,16 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) JEMALLOC_INLINE szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { - szind_t binind = (szind_t)(bin - arena->bins); + szind_t binind = bin - arena->bins; assert(binind < NBINS); return (binind); } -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { - size_t diff, interval, shift, regind; + unsigned shift, diff, regind; + size_t interval; arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); void *rpages = arena_miscelm_to_rpages(miscelm); @@ -1162,12 +1011,12 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * Avoid doing division with a variable divisor if possible. Using * actual division here can reduce allocator throughput by over 20%! */ - diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages - + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages - bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ interval = bin_info->reg_interval; - shift = ffs_zu(interval) - 1; + shift = jemalloc_ffs(interval) - 1; diff >>= shift; interval >>= shift; @@ -1189,9 +1038,9 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * divide by 0, and 1 and 2 are both powers of two, which are * handled above. */ -#define SIZE_INV_SHIFT ((sizeof(size_t) << 3) - LG_RUN_MAXREGS) -#define SIZE_INV(s) (((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1) - static const size_t interval_invs[] = { +#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) +#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) + static const unsigned interval_invs[] = { SIZE_INV(3), SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), @@ -1202,8 +1051,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) }; - if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t)) - + 2))) { + if (likely(interval <= ((sizeof(interval_invs) / + sizeof(unsigned)) + 2))) { regind = (diff * interval_invs[interval - 3]) >> SIZE_INV_SHIFT; } else @@ -1218,7 +1067,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) } JEMALLOC_INLINE prof_tctx_t * -arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr) +arena_prof_tctx_get(const void *ptr) { prof_tctx_t *ret; arena_chunk_t *chunk; @@ -1234,19 +1083,18 @@ arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr) if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) ret = (prof_tctx_t *)(uintptr_t)1U; else { - arena_chunk_map_misc_t *elm = - arena_miscelm_get_mutable(chunk, pageind); + arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk, + pageind); ret = atomic_read_p(&elm->prof_tctx_pun); } } else - ret = huge_prof_tctx_get(tsdn, ptr); + ret = huge_prof_tctx_get(ptr); return (ret); } JEMALLOC_INLINE void -arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, - prof_tctx_t *tctx) +arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) { arena_chunk_t *chunk; @@ -1265,7 +1113,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, assert(arena_mapbits_large_get(chunk, pageind) != 0); - elm = arena_miscelm_get_mutable(chunk, pageind); + elm = arena_miscelm_get(chunk, pageind); atomic_write_p(&elm->prof_tctx_pun, tctx); } else { /* @@ -1277,12 +1125,12 @@ arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, assert(arena_mapbits_large_get(chunk, pageind) == 0); } } else - huge_prof_tctx_set(tsdn, ptr, tctx); + huge_prof_tctx_set(ptr, tctx); } JEMALLOC_INLINE void -arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, - const void *old_ptr, prof_tctx_t *old_tctx) +arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *old_tctx) { cassert(config_prof); @@ -1301,59 +1149,43 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, 0); assert(arena_mapbits_large_get(chunk, pageind) != 0); - elm = arena_miscelm_get_mutable(chunk, pageind); + elm = arena_miscelm_get(chunk, pageind); atomic_write_p(&elm->prof_tctx_pun, (prof_tctx_t *)(uintptr_t)1U); } else - huge_prof_tctx_reset(tsdn, ptr); + huge_prof_tctx_reset(ptr); } } -JEMALLOC_ALWAYS_INLINE void -arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) -{ - tsd_t *tsd; - ticker_t *decay_ticker; - - if (unlikely(tsdn_null(tsdn))) - return; - tsd = tsdn_tsd(tsdn); - decay_ticker = decay_ticker_get(tsd, arena->ind); - if (unlikely(decay_ticker == NULL)) - return; - if (unlikely(ticker_ticks(decay_ticker, nticks))) - arena_purge(tsdn, arena, false); -} - -JEMALLOC_ALWAYS_INLINE void -arena_decay_tick(tsdn_t *tsdn, arena_t *arena) -{ - - arena_decay_ticks(tsdn, arena, 1); -} - JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, - tcache_t *tcache, bool slow_path) +arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache) { - assert(!tsdn_null(tsdn) || tcache == NULL); assert(size != 0); - if (likely(tcache != NULL)) { - if (likely(size <= SMALL_MAXCLASS)) { - return (tcache_alloc_small(tsdn_tsd(tsdn), arena, - tcache, size, ind, zero, slow_path)); - } - if (likely(size <= tcache_maxclass)) { - return (tcache_alloc_large(tsdn_tsd(tsdn), arena, - tcache, size, ind, zero, slow_path)); - } - /* (size > tcache_maxclass) case falls through. */ - assert(size > tcache_maxclass); - } + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); - return (arena_malloc_hard(tsdn, arena, size, ind, zero)); + if (likely(size <= SMALL_MAXCLASS)) { + if (likely(tcache != NULL)) { + return (tcache_alloc_small(tsd, arena, tcache, size, + zero)); + } else + return (arena_malloc_small(arena, size, zero)); + } else if (likely(size <= large_maxclass)) { + /* + * Initialize tcache after checking size in order to avoid + * infinite recursion during tcache initialization. + */ + if (likely(tcache != NULL) && size <= tcache_maxclass) { + return (tcache_alloc_large(tsd, arena, tcache, size, + zero)); + } else + return (arena_malloc_large(arena, size, zero)); + } else + return (huge_malloc(tsd, arena, size, zero, tcache)); } JEMALLOC_ALWAYS_INLINE arena_t * @@ -1370,7 +1202,7 @@ arena_aalloc(const void *ptr) /* Return the size of the allocation pointed to by ptr. */ JEMALLOC_ALWAYS_INLINE size_t -arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote) +arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; @@ -1413,18 +1245,17 @@ arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote) ret = index2size(binind); } } else - ret = huge_salloc(tsdn, ptr); + ret = huge_salloc(ptr); return (ret); } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) +arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { arena_chunk_t *chunk; size_t pageind, mapbits; - assert(!tsdn_null(tsdn) || tcache == NULL); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); @@ -1437,12 +1268,10 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) if (likely(tcache != NULL)) { szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); - tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, - binind, slow_path); + tcache_dalloc_small(tsd, tcache, ptr, binind); } else { - arena_dalloc_small(tsdn, - extent_node_arena_get(&chunk->node), chunk, - ptr, pageind); + arena_dalloc_small(extent_node_arena_get( + &chunk->node), chunk, ptr, pageind); } } else { size_t size = arena_mapbits_large_size_get(chunk, @@ -1453,33 +1282,28 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) if (likely(tcache != NULL) && size - large_pad <= tcache_maxclass) { - tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, - size - large_pad, slow_path); + tcache_dalloc_large(tsd, tcache, ptr, size - + large_pad); } else { - arena_dalloc_large(tsdn, - extent_node_arena_get(&chunk->node), chunk, - ptr); + arena_dalloc_large(extent_node_arena_get( + &chunk->node), chunk, ptr); } } } else - huge_dalloc(tsdn, ptr); + huge_dalloc(tsd, ptr, tcache); } JEMALLOC_ALWAYS_INLINE void -arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, - bool slow_path) +arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { arena_chunk_t *chunk; - assert(!tsdn_null(tsdn) || tcache == NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) { if (config_prof && opt_prof) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != - 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); if (arena_mapbits_large_get(chunk, pageind) != 0) { /* * Make sure to use promoted size, not request @@ -1489,36 +1313,32 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, pageind) - large_pad; } } - assert(s2u(size) == s2u(arena_salloc(tsdn, ptr, false))); + assert(s2u(size) == s2u(arena_salloc(ptr, false))); if (likely(size <= SMALL_MAXCLASS)) { /* Small allocation. */ if (likely(tcache != NULL)) { szind_t binind = size2index(size); - tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, - binind, slow_path); + tcache_dalloc_small(tsd, tcache, ptr, binind); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_dalloc_small(tsdn, - extent_node_arena_get(&chunk->node), chunk, - ptr, pageind); + arena_dalloc_small(extent_node_arena_get( + &chunk->node), chunk, ptr, pageind); } } else { assert(config_cache_oblivious || ((uintptr_t)ptr & PAGE_MASK) == 0); - if (likely(tcache != NULL) && size <= tcache_maxclass) { - tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, - size, slow_path); - } else { - arena_dalloc_large(tsdn, - extent_node_arena_get(&chunk->node), chunk, - ptr); + if (likely(tcache != NULL) && size <= tcache_maxclass) + tcache_dalloc_large(tsd, tcache, ptr, size); + else { + arena_dalloc_large(extent_node_arena_get( + &chunk->node), chunk, ptr); } } } else - huge_dalloc(tsdn, ptr); + huge_dalloc(tsd, ptr, tcache); } # endif /* JEMALLOC_ARENA_INLINE_B */ #endif diff --git a/deps/jemalloc/include/jemalloc/internal/assert.h b/deps/jemalloc/include/jemalloc/internal/assert.h deleted file mode 100644 index 6f8f7eb93..000000000 --- a/deps/jemalloc/include/jemalloc/internal/assert.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifndef assert -#define assert(e) do { \ - if (unlikely(config_debug && !(e))) { \ - malloc_printf( \ - ": %s:%d: Failed assertion: \"%s\"\n", \ - __FILE__, __LINE__, #e); \ - abort(); \ - } \ -} while (0) -#endif - -#ifndef not_reached -#define not_reached() do { \ - if (config_debug) { \ - malloc_printf( \ - ": %s:%d: Unreachable code reached\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ - unreachable(); \ -} while (0) -#endif - -#ifndef not_implemented -#define not_implemented() do { \ - if (config_debug) { \ - malloc_printf(": %s:%d: Not implemented\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ -} while (0) -#endif - -#ifndef assert_not_implemented -#define assert_not_implemented(e) do { \ - if (unlikely(config_debug && !(e))) \ - not_implemented(); \ -} while (0) -#endif - - diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h index 3f15ea149..a9aad35d1 100644 --- a/deps/jemalloc/include/jemalloc/internal/atomic.h +++ b/deps/jemalloc/include/jemalloc/internal/atomic.h @@ -28,8 +28,8 @@ * callers. * * atomic_read_( *p) { return (*p); } - * atomic_add_( *p, x) { return (*p += x); } - * atomic_sub_( *p, x) { return (*p -= x); } + * atomic_add_( *p, x) { return (*p + x); } + * atomic_sub_( *p, x) { return (*p - x); } * bool atomic_cas_( *p, c, s) * { * if (*p != c) diff --git a/deps/jemalloc/include/jemalloc/internal/base.h b/deps/jemalloc/include/jemalloc/internal/base.h index d6b81e162..39e46ee44 100644 --- a/deps/jemalloc/include/jemalloc/internal/base.h +++ b/deps/jemalloc/include/jemalloc/internal/base.h @@ -9,13 +9,12 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *base_alloc(tsdn_t *tsdn, size_t size); -void base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident, - size_t *mapped); +void *base_alloc(size_t size); +void base_stats_get(size_t *allocated, size_t *resident, size_t *mapped); bool base_boot(void); -void base_prefork(tsdn_t *tsdn); -void base_postfork_parent(tsdn_t *tsdn); -void base_postfork_child(tsdn_t *tsdn); +void base_prefork(void); +void base_postfork_parent(void); +void base_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/bitmap.h b/deps/jemalloc/include/jemalloc/internal/bitmap.h index 36f38b59c..fcc6005c7 100644 --- a/deps/jemalloc/include/jemalloc/internal/bitmap.h +++ b/deps/jemalloc/include/jemalloc/internal/bitmap.h @@ -15,15 +15,6 @@ typedef unsigned long bitmap_t; #define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) #define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) -/* - * Do some analysis on how big the bitmap is before we use a tree. For a brute - * force linear search, if we would have to call ffs_lu() more than 2^3 times, - * use a tree instead. - */ -#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3 -# define USE_TREE -#endif - /* Number of groups required to store a given number of bits. */ #define BITMAP_BITS2GROUPS(nbits) \ ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) @@ -57,8 +48,6 @@ typedef unsigned long bitmap_t; /* * Maximum number of groups required to support LG_BITMAP_MAXBITS. */ -#ifdef USE_TREE - #if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS # define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 @@ -76,12 +65,6 @@ typedef unsigned long bitmap_t; (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) -#else /* USE_TREE */ - -#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) - -#endif /* USE_TREE */ - #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -95,7 +78,6 @@ struct bitmap_info_s { /* Logical number of bits in bitmap (stored at bottom level). */ size_t nbits; -#ifdef USE_TREE /* Number of levels necessary for nbits. */ unsigned nlevels; @@ -104,10 +86,6 @@ struct bitmap_info_s { * bottom to top (e.g. the bottom level is stored in levels[0]). */ bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; -#else /* USE_TREE */ - /* Number of groups necessary for nbits. */ - size_t ngroups; -#endif /* USE_TREE */ }; #endif /* JEMALLOC_H_STRUCTS */ @@ -115,8 +93,9 @@ struct bitmap_info_s { #ifdef JEMALLOC_H_EXTERNS void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); +size_t bitmap_info_ngroups(const bitmap_info_t *binfo); +size_t bitmap_size(size_t nbits); void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); -size_t bitmap_size(const bitmap_info_t *binfo); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -134,20 +113,10 @@ void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); JEMALLOC_INLINE bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) { -#ifdef USE_TREE - size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; + unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; bitmap_t rg = bitmap[rgoff]; /* The bitmap is full iff the root group is 0. */ return (rg == 0); -#else - size_t i; - - for (i = 0; i < binfo->ngroups; i++) { - if (bitmap[i] != 0) - return (false); - } - return (true); -#endif } JEMALLOC_INLINE bool @@ -159,7 +128,7 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) assert(bit < binfo->nbits); goff = bit >> LG_BITMAP_GROUP_NBITS; g = bitmap[goff]; - return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))); + return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); } JEMALLOC_INLINE void @@ -174,11 +143,10 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[goff]; g = *gp; - assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(bitmap_get(bitmap, binfo, bit)); -#ifdef USE_TREE /* Propagate group state transitions up the tree. */ if (g == 0) { unsigned i; @@ -187,14 +155,13 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[binfo->levels[i].group_offset + goff]; g = *gp; - assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; if (g != 0) break; } } -#endif } /* sfu: set first unset. */ @@ -207,24 +174,15 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) assert(!bitmap_full(bitmap, binfo)); -#ifdef USE_TREE i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; - bit = ffs_lu(g) - 1; + bit = jemalloc_ffsl(g) - 1; while (i > 0) { i--; g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1); + bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1); } -#else - i = 0; - g = bitmap[0]; - while ((bit = ffs_lu(g)) == 0) { - i++; - g = bitmap[i]; - } - bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1); -#endif + bitmap_set(bitmap, binfo, bit); return (bit); } @@ -235,7 +193,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) size_t goff; bitmap_t *gp; bitmap_t g; - UNUSED bool propagate; + bool propagate; assert(bit < binfo->nbits); assert(bitmap_get(bitmap, binfo, bit)); @@ -243,11 +201,10 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) gp = &bitmap[goff]; g = *gp; propagate = (g == 0); - assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(!bitmap_get(bitmap, binfo, bit)); -#ifdef USE_TREE /* Propagate group state transitions up the tree. */ if (propagate) { unsigned i; @@ -257,15 +214,14 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) gp = &bitmap[binfo->levels[i].group_offset + goff]; g = *gp; propagate = (g == 0); - assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; if (!propagate) break; } } -#endif /* USE_TREE */ } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/chunk.h b/deps/jemalloc/include/jemalloc/internal/chunk.h index 50b9904b0..5d1938353 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk.h @@ -48,30 +48,32 @@ extern size_t chunk_npages; extern const chunk_hooks_t chunk_hooks_default; -chunk_hooks_t chunk_hooks_get(tsdn_t *tsdn, arena_t *arena); -chunk_hooks_t chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, +chunk_hooks_t chunk_hooks_get(arena_t *arena); +chunk_hooks_t chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks); -bool chunk_register(tsdn_t *tsdn, const void *chunk, - const extent_node_t *node); +bool chunk_register(const void *chunk, const extent_node_t *node); void chunk_deregister(const void *chunk, const extent_node_t *node); void *chunk_alloc_base(size_t size); -void *chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, - size_t *sn, bool *zero, bool *commit, bool dalloc_node); -void *chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, - size_t *sn, bool *zero, bool *commit); -void chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t sn, - bool committed); -void chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t sn, - bool zeroed, bool committed); -bool chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset, +void *chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *new_addr, size_t size, size_t alignment, bool *zero, + bool dalloc_node); +void *chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit); +void chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, bool committed); +void chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, bool zeroed, bool committed); +void chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, bool committed); +bool chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length); +bool chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, size_t offset, size_t length); bool chunk_boot(void); +void chunk_prefork(void); +void chunk_postfork_parent(void); +void chunk_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h index da8511ba0..388f46be0 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h @@ -23,11 +23,13 @@ extern const char *dss_prec_names[]; dss_prec_t chunk_dss_prec_get(void); bool chunk_dss_prec_set(dss_prec_t dss_prec); -void *chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, - size_t size, size_t alignment, bool *zero, bool *commit); +void *chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, + size_t alignment, bool *zero, bool *commit); bool chunk_in_dss(void *chunk); -bool chunk_dss_mergeable(void *chunk_a, void *chunk_b); -void chunk_dss_boot(void); +bool chunk_dss_boot(void); +void chunk_dss_prefork(void); +void chunk_dss_postfork_parent(void); +void chunk_dss_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h index 6f2d0ac2e..7d8014c58 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h @@ -9,8 +9,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, - bool *zero, bool *commit); +void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, + bool *commit); bool chunk_dalloc_mmap(void *chunk, size_t size); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/deps/jemalloc/include/jemalloc/internal/ckh.h b/deps/jemalloc/include/jemalloc/internal/ckh.h index f75ad90b7..75c1c979f 100644 --- a/deps/jemalloc/include/jemalloc/internal/ckh.h +++ b/deps/jemalloc/include/jemalloc/internal/ckh.h @@ -40,7 +40,9 @@ struct ckh_s { #endif /* Used for pseudo-random number generation. */ - uint64_t prng_state; +#define CKH_A 1103515241 +#define CKH_C 12347 + uint32_t prng_state; /* Total number of items. */ size_t count; @@ -72,7 +74,7 @@ bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); -bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); +bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); void ckh_string_hash(const void *key, size_t r_hash[2]); bool ckh_string_keycomp(const void *k1, const void *k2); void ckh_pointer_hash(const void *key, size_t r_hash[2]); diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h index af0f6d7c5..751c14b5b 100644 --- a/deps/jemalloc/include/jemalloc/internal/ctl.h +++ b/deps/jemalloc/include/jemalloc/internal/ctl.h @@ -21,14 +21,13 @@ struct ctl_named_node_s { /* If (nchildren == 0), this is a terminal node. */ unsigned nchildren; const ctl_node_t *children; - int (*ctl)(tsd_t *, const size_t *, size_t, void *, - size_t *, void *, size_t); + int (*ctl)(const size_t *, size_t, void *, size_t *, + void *, size_t); }; struct ctl_indexed_node_s { struct ctl_node_s node; - const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t, - size_t); + const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); }; struct ctl_arena_stats_s { @@ -36,12 +35,8 @@ struct ctl_arena_stats_s { unsigned nthreads; const char *dss; ssize_t lg_dirty_mult; - ssize_t decay_time; size_t pactive; size_t pdirty; - - /* The remainder are only populated if config_stats is true. */ - arena_stats_t astats; /* Aggregate stats for small size classes, based on bin stats. */ @@ -61,7 +56,6 @@ struct ctl_stats_s { size_t metadata; size_t resident; size_t mapped; - size_t retained; unsigned narenas; ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ }; @@ -70,17 +64,16 @@ struct ctl_stats_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, - void *newp, size_t newlen); -int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, - size_t *miblenp); +int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen); +int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp); -int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); +int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen); bool ctl_boot(void); -void ctl_prefork(tsdn_t *tsdn); -void ctl_postfork_parent(tsdn_t *tsdn); -void ctl_postfork_child(tsdn_t *tsdn); +void ctl_prefork(void); +void ctl_postfork_parent(void); +void ctl_postfork_child(void); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ diff --git a/deps/jemalloc/include/jemalloc/internal/extent.h b/deps/jemalloc/include/jemalloc/internal/extent.h index 168ffe643..386d50ef4 100644 --- a/deps/jemalloc/include/jemalloc/internal/extent.h +++ b/deps/jemalloc/include/jemalloc/internal/extent.h @@ -18,20 +18,6 @@ struct extent_node_s { /* Total region size. */ size_t en_size; - /* - * Serial number (potentially non-unique). - * - * In principle serial numbers can wrap around on 32-bit systems if - * JEMALLOC_MUNMAP is defined, but as long as comparison functions fall - * back on address comparison for equal serial numbers, stable (if - * imperfect) ordering is maintained. - * - * Serial numbers may not be unique even in the absence of wrap-around, - * e.g. when splitting an extent and assigning the same serial number to - * both resulting adjacent extents. - */ - size_t en_sn; - /* * The zeroed flag is used by chunk recycling code to track whether * memory is zero-filled. @@ -59,10 +45,10 @@ struct extent_node_s { qr(extent_node_t) cc_link; union { - /* Linkage for the size/sn/address-ordered tree. */ - rb_node(extent_node_t) szsnad_link; + /* Linkage for the size/address-ordered tree. */ + rb_node(extent_node_t) szad_link; - /* Linkage for arena's achunks, huge, and node_cache lists. */ + /* Linkage for arena's huge and node_cache lists. */ ql_elm(extent_node_t) ql_link; }; @@ -75,7 +61,7 @@ typedef rb_tree(extent_node_t) extent_tree_t; /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -rb_proto(, extent_tree_szsnad_, extent_tree_t, extent_node_t) +rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) @@ -87,7 +73,6 @@ rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) arena_t *extent_node_arena_get(const extent_node_t *node); void *extent_node_addr_get(const extent_node_t *node); size_t extent_node_size_get(const extent_node_t *node); -size_t extent_node_sn_get(const extent_node_t *node); bool extent_node_zeroed_get(const extent_node_t *node); bool extent_node_committed_get(const extent_node_t *node); bool extent_node_achunk_get(const extent_node_t *node); @@ -95,13 +80,12 @@ prof_tctx_t *extent_node_prof_tctx_get(const extent_node_t *node); void extent_node_arena_set(extent_node_t *node, arena_t *arena); void extent_node_addr_set(extent_node_t *node, void *addr); void extent_node_size_set(extent_node_t *node, size_t size); -void extent_node_sn_set(extent_node_t *node, size_t sn); void extent_node_zeroed_set(extent_node_t *node, bool zeroed); void extent_node_committed_set(extent_node_t *node, bool committed); void extent_node_achunk_set(extent_node_t *node, bool achunk); void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, - size_t size, size_t sn, bool zeroed, bool committed); + size_t size, bool zeroed, bool committed); void extent_node_dirty_linkage_init(extent_node_t *node); void extent_node_dirty_insert(extent_node_t *node, arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty); @@ -130,13 +114,6 @@ extent_node_size_get(const extent_node_t *node) return (node->en_size); } -JEMALLOC_INLINE size_t -extent_node_sn_get(const extent_node_t *node) -{ - - return (node->en_sn); -} - JEMALLOC_INLINE bool extent_node_zeroed_get(const extent_node_t *node) { @@ -187,13 +164,6 @@ extent_node_size_set(extent_node_t *node, size_t size) node->en_size = size; } -JEMALLOC_INLINE void -extent_node_sn_set(extent_node_t *node, size_t sn) -{ - - node->en_sn = sn; -} - JEMALLOC_INLINE void extent_node_zeroed_set(extent_node_t *node, bool zeroed) { @@ -224,13 +194,12 @@ extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx) JEMALLOC_INLINE void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, - size_t sn, bool zeroed, bool committed) + bool zeroed, bool committed) { extent_node_arena_set(node, arena); extent_node_addr_set(node, addr); extent_node_size_set(node, size); - extent_node_sn_set(node, sn); extent_node_zeroed_set(node, zeroed); extent_node_committed_set(node, committed); extent_node_achunk_set(node, false); diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h index 1ff2d9a05..bcead337a 100644 --- a/deps/jemalloc/include/jemalloc/internal/hash.h +++ b/deps/jemalloc/include/jemalloc/internal/hash.h @@ -1,6 +1,6 @@ /* * The following hash function is based on MurmurHash3, placed into the public - * domain by Austin Appleby. See https://github.com/aappleby/smhasher for + * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for * details. */ /******************************************************************************/ @@ -49,14 +49,6 @@ JEMALLOC_INLINE uint32_t hash_get_block_32(const uint32_t *p, int i) { - /* Handle unaligned read. */ - if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) { - uint32_t ret; - - memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t)); - return (ret); - } - return (p[i]); } @@ -64,14 +56,6 @@ JEMALLOC_INLINE uint64_t hash_get_block_64(const uint64_t *p, int i) { - /* Handle unaligned read. */ - if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) { - uint64_t ret; - - memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t)); - return (ret); - } - return (p[i]); } @@ -337,18 +321,13 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, JEMALLOC_INLINE void hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) { - - assert(len <= INT_MAX); /* Unfortunate implementation limitation. */ - #if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) - hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash); + hash_x64_128(key, len, seed, (uint64_t *)r_hash); #else - { - uint64_t hashes[2]; - hash_x86_128(key, (int)len, seed, hashes); - r_hash[0] = (size_t)hashes[0]; - r_hash[1] = (size_t)hashes[1]; - } + uint64_t hashes[2]; + hash_x86_128(key, len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; #endif } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/huge.h b/deps/jemalloc/include/jemalloc/internal/huge.h index 22184d9bb..ece7af980 100644 --- a/deps/jemalloc/include/jemalloc/internal/huge.h +++ b/deps/jemalloc/include/jemalloc/internal/huge.h @@ -9,23 +9,24 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); -void *huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool zero); -bool huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, - size_t usize_min, size_t usize_max, bool zero); +void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache); +void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, + bool zero, tcache_t *tcache); +bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero); void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, size_t alignment, bool zero, tcache_t *tcache); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(tsdn_t *tsdn, void *ptr); +void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); arena_t *huge_aalloc(const void *ptr); -size_t huge_salloc(tsdn_t *tsdn, const void *ptr); -prof_tctx_t *huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr); -void huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx); -void huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr); +size_t huge_salloc(const void *ptr); +prof_tctx_t *huge_prof_tctx_get(const void *ptr); +void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); +void huge_prof_tctx_reset(const void *ptr); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index e7ace7d8c..8536a3eda 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -49,7 +49,6 @@ static const bool config_lazy_lock = false #endif ; -static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; static const bool config_prof = #ifdef JEMALLOC_PROF true @@ -161,10 +160,7 @@ static const bool config_cache_oblivious = #include #endif -#include "jemalloc/internal/ph.h" -#ifndef __PGI #define RB_COMPACT -#endif #include "jemalloc/internal/rb.h" #include "jemalloc/internal/qr.h" #include "jemalloc/internal/ql.h" @@ -187,9 +183,6 @@ static const bool config_cache_oblivious = #include "jemalloc/internal/jemalloc_internal_macros.h" -/* Page size index type. */ -typedef unsigned pszind_t; - /* Size class index type. */ typedef unsigned szind_t; @@ -239,7 +232,7 @@ typedef unsigned szind_t; # ifdef __alpha__ # define LG_QUANTUM 4 # endif -# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__)) +# if (defined(__sparc64__) || defined(__sparcv9)) # define LG_QUANTUM 4 # endif # if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) @@ -263,9 +256,6 @@ typedef unsigned szind_t; # ifdef __powerpc__ # define LG_QUANTUM 4 # endif -# ifdef __riscv__ -# define LG_QUANTUM 4 -# endif # ifdef __s390__ # define LG_QUANTUM 4 # endif @@ -327,17 +317,13 @@ typedef unsigned szind_t; #define PAGE ((size_t)(1U << LG_PAGE)) #define PAGE_MASK ((size_t)(PAGE - 1)) -/* Return the page base address for the page containing address a. */ -#define PAGE_ADDR2BASE(a) \ - ((void *)((uintptr_t)(a) & ~PAGE_MASK)) - /* Return the smallest pagesize multiple that is >= s. */ #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) /* Return the nearest aligned address at or below a. */ #define ALIGNMENT_ADDR2BASE(a, alignment) \ - ((void *)((uintptr_t)(a) & ((~(alignment)) + 1))) + ((void *)((uintptr_t)(a) & (-(alignment)))) /* Return the offset between a and the nearest aligned address at or below a. */ #define ALIGNMENT_ADDR2OFFSET(a, alignment) \ @@ -345,7 +331,7 @@ typedef unsigned szind_t; /* Return the smallest alignment multiple that is >= s. */ #define ALIGNMENT_CEILING(s, alignment) \ - (((s) + (alignment - 1)) & ((~(alignment)) + 1)) + (((s) + (alignment - 1)) & (-(alignment))) /* Declare a variable-length array. */ #if __STDC_VERSION__ < 199901L @@ -365,19 +351,14 @@ typedef unsigned szind_t; # define VARIABLE_ARRAY(type, name, count) type name[(count)] #endif -#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/witness.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" @@ -398,19 +379,14 @@ typedef unsigned szind_t; /******************************************************************************/ #define JEMALLOC_H_STRUCTS -#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/witness.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" @@ -446,27 +422,13 @@ extern bool opt_redzone; extern bool opt_utrace; extern bool opt_xmalloc; extern bool opt_zero; -extern unsigned opt_narenas; +extern size_t opt_narenas; extern bool in_valgrind; /* Number of CPUs. */ -extern unsigned ncpus; +extern unsigned ncpus; -/* Number of arenas used for automatic multiplexing of threads and arenas. */ -extern unsigned narenas_auto; - -/* - * Arenas that are used to service external requests. Not all elements of the - * arenas array are necessarily used; arenas are created lazily as needed. - */ -extern arena_t **arenas; - -/* - * pind2sz_tab encodes the same information as could be computed by - * pind2sz_compute(). - */ -extern size_t const pind2sz_tab[NPSIZES]; /* * index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by index2size_compute(). @@ -485,35 +447,31 @@ void a0dalloc(void *ptr); void *bootstrap_malloc(size_t size); void *bootstrap_calloc(size_t num, size_t size); void bootstrap_free(void *ptr); +arena_t *arenas_extend(unsigned ind); +arena_t *arena_init(unsigned ind); unsigned narenas_total_get(void); -arena_t *arena_init(tsdn_t *tsdn, unsigned ind); -arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind); -arena_t *arena_choose_hard(tsd_t *tsd, bool internal); +arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing); +arena_t *arena_choose_hard(tsd_t *tsd); void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); +unsigned arena_nbound(unsigned ind); void thread_allocated_cleanup(tsd_t *tsd); void thread_deallocated_cleanup(tsd_t *tsd); -void iarena_cleanup(tsd_t *tsd); void arena_cleanup(tsd_t *tsd); -void arenas_tdata_cleanup(tsd_t *tsd); -void narenas_tdata_cleanup(tsd_t *tsd); -void arenas_tdata_bypass_cleanup(tsd_t *tsd); +void arenas_cache_cleanup(tsd_t *tsd); +void narenas_cache_cleanup(tsd_t *tsd); +void arenas_cache_bypass_cleanup(tsd_t *tsd); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); -#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/witness.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" @@ -534,21 +492,16 @@ void jemalloc_postfork_child(void); /******************************************************************************/ #define JEMALLOC_H_INLINES -#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/witness.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/base.h" @@ -558,11 +511,6 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE -pszind_t psz2ind(size_t psz); -size_t pind2sz_compute(pszind_t pind); -size_t pind2sz_lookup(pszind_t pind); -size_t pind2sz(pszind_t pind); -size_t psz2u(size_t psz); szind_t size2index_compute(size_t size); szind_t size2index_lookup(size_t size); szind_t size2index(size_t size); @@ -573,121 +521,39 @@ size_t s2u_compute(size_t size); size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); -arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal); arena_t *arena_choose(tsd_t *tsd, arena_t *arena); -arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena); -arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, +arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, bool refresh_if_missing); -arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing); -ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE pszind_t -psz2ind(size_t psz) -{ - - if (unlikely(psz > HUGE_MAXCLASS)) - return (NPSIZES); - { - pszind_t x = lg_floor((psz<<1)-1); - pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x - - (LG_SIZE_CLASS_GROUP + LG_PAGE); - pszind_t grp = shift << LG_SIZE_CLASS_GROUP; - - pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? - LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; - - size_t delta_inverse_mask = ZI(-1) << lg_delta; - pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & - ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - - pszind_t ind = grp + mod; - return (ind); - } -} - -JEMALLOC_INLINE size_t -pind2sz_compute(pszind_t pind) -{ - - { - size_t grp = pind >> LG_SIZE_CLASS_GROUP; - size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - - size_t grp_size_mask = ~((!!grp)-1); - size_t grp_size = ((ZU(1) << (LG_PAGE + - (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; - - size_t shift = (grp == 0) ? 1 : grp; - size_t lg_delta = shift + (LG_PAGE-1); - size_t mod_size = (mod+1) << lg_delta; - - size_t sz = grp_size + mod_size; - return (sz); - } -} - -JEMALLOC_INLINE size_t -pind2sz_lookup(pszind_t pind) -{ - size_t ret = (size_t)pind2sz_tab[pind]; - assert(ret == pind2sz_compute(pind)); - return (ret); -} - -JEMALLOC_INLINE size_t -pind2sz(pszind_t pind) -{ - - assert(pind < NPSIZES); - return (pind2sz_lookup(pind)); -} - -JEMALLOC_INLINE size_t -psz2u(size_t psz) -{ - - if (unlikely(psz > HUGE_MAXCLASS)) - return (0); - { - size_t x = lg_floor((psz<<1)-1); - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? - LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; - size_t delta = ZU(1) << lg_delta; - size_t delta_mask = delta - 1; - size_t usize = (psz + delta_mask) & ~delta_mask; - return (usize); - } -} - JEMALLOC_INLINE szind_t size2index_compute(size_t size) { - if (unlikely(size > HUGE_MAXCLASS)) - return (NSIZES); #if (NTBINS != 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - szind_t lg_ceil = lg_floor(pow2_ceil_zu(size)); + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); } #endif { - szind_t x = lg_floor((size<<1)-1); - szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? + (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) + : lg_floor((size<<1)-1); + size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); - szind_t grp = shift << LG_SIZE_CLASS_GROUP; + size_t grp = shift << LG_SIZE_CLASS_GROUP; - szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; size_t delta_inverse_mask = ZI(-1) << lg_delta; - szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - szind_t index = NTBINS + grp + mod; + size_t index = NTBINS + grp + mod; return (index); } } @@ -698,7 +564,8 @@ size2index_lookup(size_t size) assert(size <= LOOKUP_MAXCLASS); { - szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]); + size_t ret = ((size_t)(size2index_tab[(size-1) >> + LG_TINY_MIN])); assert(ret == size2index_compute(size)); return (ret); } @@ -761,18 +628,18 @@ JEMALLOC_ALWAYS_INLINE size_t s2u_compute(size_t size) { - if (unlikely(size > HUGE_MAXCLASS)) - return (0); #if (NTBINS > 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); + size_t lg_ceil = lg_floor(pow2_ceil(size)); return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : (ZU(1) << lg_ceil)); } #endif { - size_t x = lg_floor((size<<1)-1); + size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? + (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) + : lg_floor((size<<1)-1); size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; size_t delta = ZU(1) << lg_delta; @@ -856,16 +723,17 @@ sa2u(size_t size, size_t alignment) return (usize); } - /* Huge size class. Beware of overflow. */ - - if (unlikely(alignment > HUGE_MAXCLASS)) - return (0); + /* Huge size class. Beware of size_t overflow. */ /* * We can't achieve subchunk alignment, so round up alignment to the * minimum that can actually be supported. */ alignment = CHUNK_CEILING(alignment); + if (alignment == 0) { + /* size_t overflow. */ + return (0); + } /* Make sure result is a huge size class. */ if (size <= chunksize) @@ -891,84 +759,45 @@ sa2u(size_t size, size_t alignment) /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) +arena_choose(tsd_t *tsd, arena_t *arena) { arena_t *ret; if (arena != NULL) return (arena); - ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); - if (unlikely(ret == NULL)) - ret = arena_choose_hard(tsd, internal); + if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) + ret = arena_choose_hard(tsd); return (ret); } JEMALLOC_INLINE arena_t * -arena_choose(tsd_t *tsd, arena_t *arena) +arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, + bool refresh_if_missing) { + arena_t *arena; + arena_t **arenas_cache = tsd_arenas_cache_get(tsd); - return (arena_choose_impl(tsd, arena, false)); -} + /* init_if_missing requires refresh_if_missing. */ + assert(!init_if_missing || refresh_if_missing); -JEMALLOC_INLINE arena_t * -arena_ichoose(tsd_t *tsd, arena_t *arena) -{ - - return (arena_choose_impl(tsd, arena, true)); -} - -JEMALLOC_INLINE arena_tdata_t * -arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) -{ - arena_tdata_t *tdata; - arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); - - if (unlikely(arenas_tdata == NULL)) { - /* arenas_tdata hasn't been initialized yet. */ - return (arena_tdata_get_hard(tsd, ind)); + if (unlikely(arenas_cache == NULL)) { + /* arenas_cache hasn't been initialized yet. */ + return (arena_get_hard(tsd, ind, init_if_missing)); } - if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) { + if (unlikely(ind >= tsd_narenas_cache_get(tsd))) { /* - * ind is invalid, cache is old (too small), or tdata to be + * ind is invalid, cache is old (too small), or arena to be * initialized. */ - return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) : - NULL); + return (refresh_if_missing ? arena_get_hard(tsd, ind, + init_if_missing) : NULL); } - - tdata = &arenas_tdata[ind]; - if (likely(tdata != NULL) || !refresh_if_missing) - return (tdata); - return (arena_tdata_get_hard(tsd, ind)); -} - -JEMALLOC_INLINE arena_t * -arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) -{ - arena_t *ret; - - assert(ind <= MALLOCX_ARENA_MAX); - - ret = arenas[ind]; - if (unlikely(ret == NULL)) { - ret = atomic_read_p((void *)&arenas[ind]); - if (init_if_missing && unlikely(ret == NULL)) - ret = arena_init(tsdn, ind); - } - return (ret); -} - -JEMALLOC_INLINE ticker_t * -decay_ticker_get(tsd_t *tsd, unsigned ind) -{ - arena_tdata_t *tdata; - - tdata = arena_tdata_get(tsd, ind, true); - if (unlikely(tdata == NULL)) - return (NULL); - return (&tdata->decay_ticker); + arena = arenas_cache[ind]; + if (likely(arena != NULL) || !refresh_if_missing) + return (arena); + return (arena_get_hard(tsd, ind, init_if_missing)); } #endif @@ -989,27 +818,27 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) #ifndef JEMALLOC_ENABLE_INLINE arena_t *iaalloc(const void *ptr); -size_t isalloc(tsdn_t *tsdn, const void *ptr, bool demote); -void *iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, - tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path); -void *ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, - bool slow_path); -void *ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, +size_t isalloc(const void *ptr, bool demote); +void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, + bool is_metadata, arena_t *arena); +void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); +void *imalloc(tsd_t *tsd, size_t size); +void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); +void *icalloc(tsd_t *tsd, size_t size); +void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena); -void *ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, +void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); -size_t ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote); +size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); -size_t p2rz(tsdn_t *tsdn, const void *ptr); -void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata, - bool slow_path); +size_t p2rz(const void *ptr); +void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata); +void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); void idalloc(tsd_t *tsd, void *ptr); -void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); -void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, - bool slow_path); -void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, - bool slow_path); +void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); @@ -1017,8 +846,8 @@ void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero); -bool ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero); +bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -1033,85 +862,100 @@ iaalloc(const void *ptr) /* * Typical usage: - * tsdn_t *tsdn = [...] * void *ptr = [...] - * size_t sz = isalloc(tsdn, ptr, config_prof); + * size_t sz = isalloc(ptr, config_prof); */ JEMALLOC_ALWAYS_INLINE size_t -isalloc(tsdn_t *tsdn, const void *ptr, bool demote) +isalloc(const void *ptr, bool demote) { assert(ptr != NULL); /* Demotion only makes sense if config_prof is true. */ assert(config_prof || !demote); - return (arena_salloc(tsdn, ptr, demote)); + return (arena_salloc(ptr, demote)); } JEMALLOC_ALWAYS_INLINE void * -iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, - bool is_metadata, arena_t *arena, bool slow_path) +iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, + arena_t *arena) { void *ret; assert(size != 0); - assert(!is_metadata || tcache == NULL); - assert(!is_metadata || arena == NULL || arena->ind < narenas_auto); - ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); + ret = arena_malloc(tsd, arena, size, zero, tcache); if (config_stats && is_metadata && likely(ret != NULL)) { - arena_metadata_allocated_add(iaalloc(ret), - isalloc(tsdn, ret, config_prof)); - } - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) -{ - - return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true), - false, NULL, slow_path)); -} - -JEMALLOC_ALWAYS_INLINE void * -ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, bool is_metadata, arena_t *arena) -{ - void *ret; - - assert(usize != 0); - assert(usize == sa2u(usize, alignment)); - assert(!is_metadata || tcache == NULL); - assert(!is_metadata || arena == NULL || arena->ind < narenas_auto); - - ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache); - assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); - if (config_stats && is_metadata && likely(ret != NULL)) { - arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn, ret, + arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, config_prof)); } return (ret); } JEMALLOC_ALWAYS_INLINE void * -ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, +imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) +{ + + return (iallocztm(tsd, size, false, tcache, false, arena)); +} + +JEMALLOC_ALWAYS_INLINE void * +imalloc(tsd_t *tsd, size_t size) +{ + + return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)); +} + +JEMALLOC_ALWAYS_INLINE void * +icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) +{ + + return (iallocztm(tsd, size, true, tcache, false, arena)); +} + +JEMALLOC_ALWAYS_INLINE void * +icalloc(tsd_t *tsd, size_t size) +{ + + return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL)); +} + +JEMALLOC_ALWAYS_INLINE void * +ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena) +{ + void *ret; + + assert(usize != 0); + assert(usize == sa2u(usize, alignment)); + + ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache); + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); + if (config_stats && is_metadata && likely(ret != NULL)) { + arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, + config_prof)); + } + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { - return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena)); + return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { - return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero, - tcache_get(tsd, true), false, NULL)); + return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, + NULL), false, NULL)); } JEMALLOC_ALWAYS_INLINE size_t -ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote) +ivsalloc(const void *ptr, bool demote) { extent_node_t *node; @@ -1123,7 +967,7 @@ ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote) assert(extent_node_addr_get(node) == ptr || extent_node_achunk_get(node)); - return (isalloc(tsdn, ptr, demote)); + return (isalloc(ptr, demote)); } JEMALLOC_INLINE size_t @@ -1141,62 +985,65 @@ u2rz(size_t usize) } JEMALLOC_INLINE size_t -p2rz(tsdn_t *tsdn, const void *ptr) +p2rz(const void *ptr) { - size_t usize = isalloc(tsdn, ptr, false); + size_t usize = isalloc(ptr, false); return (u2rz(usize)); } JEMALLOC_ALWAYS_INLINE void -idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata, - bool slow_path) +idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) { assert(ptr != NULL); - assert(!is_metadata || tcache == NULL); - assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto); if (config_stats && is_metadata) { - arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, ptr, + arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr, config_prof)); } - arena_dalloc(tsdn, ptr, tcache, slow_path); + arena_dalloc(tsd, ptr, tcache); +} + +JEMALLOC_ALWAYS_INLINE void +idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) +{ + + idalloctm(tsd, ptr, tcache, false); } JEMALLOC_ALWAYS_INLINE void idalloc(tsd_t *tsd, void *ptr) { - idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true); + idalloctm(tsd, ptr, tcache_get(tsd, false), false); } JEMALLOC_ALWAYS_INLINE void -iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) +iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { - if (slow_path && config_fill && unlikely(opt_quarantine)) + if (config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - idalloctm(tsd_tsdn(tsd), ptr, tcache, false, slow_path); + idalloctm(tsd, ptr, tcache, false); } JEMALLOC_ALWAYS_INLINE void -isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, - bool slow_path) +isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { - arena_sdalloc(tsdn, ptr, size, tcache, slow_path); + arena_sdalloc(tsd, ptr, size, tcache); } JEMALLOC_ALWAYS_INLINE void -isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path) +isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { - if (slow_path && config_fill && unlikely(opt_quarantine)) + if (config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - isdalloct(tsd_tsdn(tsd), ptr, size, tcache, slow_path); + isdalloct(tsd, ptr, size, tcache); } JEMALLOC_ALWAYS_INLINE void * @@ -1207,18 +1054,17 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t usize, copysize; usize = sa2u(size + extra, alignment); - if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) + if (usize == 0) return (NULL); - p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, arena); + p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) { if (extra == 0) return (NULL); /* Try again, without extra this time. */ usize = sa2u(size, alignment); - if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) + if (usize == 0) return (NULL); - p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, - arena); + p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) return (NULL); } @@ -1228,7 +1074,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache, true); + isqalloc(tsd, ptr, oldsize, tcache); return (p); } @@ -1264,8 +1110,8 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, } JEMALLOC_ALWAYS_INLINE bool -ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero) +ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, + bool zero) { assert(ptr != NULL); @@ -1277,7 +1123,7 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, return (true); } - return (arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero)); + return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h index c907d9109..a601d6ebb 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h @@ -17,18 +17,7 @@ # include # endif # include -# ifdef JEMALLOC_OS_UNFAIR_LOCK -# include -# endif -# ifdef JEMALLOC_GLIBC_MALLOC_HOOK -# include -# endif # include -# include -# include -# ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME -# include -# endif #endif #include diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in index def4ba550..b0f8caaf8 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -56,9 +56,9 @@ #undef JEMALLOC_HAVE_BUILTIN_CLZ /* - * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + * Defined if madvise(2) is available. */ -#undef JEMALLOC_OS_UNFAIR_LOCK +#undef JEMALLOC_HAVE_MADVISE /* * Defined if OSSpin*() functions are available, as provided by Darwin, and @@ -66,9 +66,6 @@ */ #undef JEMALLOC_OSSPIN -/* Defined if syscall(2) is usable. */ -#undef JEMALLOC_USE_SYSCALL - /* * Defined if secure_getenv(3) is available. */ @@ -79,24 +76,6 @@ */ #undef JEMALLOC_HAVE_ISSETUGID -/* Defined if pthread_atfork(3) is available. */ -#undef JEMALLOC_HAVE_PTHREAD_ATFORK - -/* - * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. - */ -#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE - -/* - * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. - */ -#undef JEMALLOC_HAVE_CLOCK_MONOTONIC - -/* - * Defined if mach_absolute_time() is available. - */ -#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME - /* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc @@ -210,16 +189,9 @@ #undef JEMALLOC_TLS /* - * Used to mark unreachable code to quiet "end of non-void" compiler warnings. - * Don't use this directly; instead use unreachable() from util.h + * ffs()/ffsl() functions to use for bitmapping. Don't use these directly; + * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h. */ -#undef JEMALLOC_INTERNAL_UNREACHABLE - -/* - * ffs*() functions to use for bitmapping. Don't use these directly; instead, - * use ffs_*() from util.h. - */ -#undef JEMALLOC_INTERNAL_FFSLL #undef JEMALLOC_INTERNAL_FFSL #undef JEMALLOC_INTERNAL_FFS @@ -241,35 +213,18 @@ #undef JEMALLOC_ZONE #undef JEMALLOC_ZONE_VERSION -/* - * Methods for determining whether the OS overcommits. - * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's - * /proc/sys/vm.overcommit_memory file. - * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. - */ -#undef JEMALLOC_SYSCTL_VM_OVERCOMMIT -#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY - -/* Defined if madvise(2) is available. */ -#undef JEMALLOC_HAVE_MADVISE - /* * Methods for purging unused pages differ between operating systems. * - * madvise(..., MADV_FREE) : This marks pages as being unused, such that they - * will be discarded rather than swapped out. - * madvise(..., MADV_DONTNEED) : This immediately discards pages, such that - * new pages will be demand-zeroed if the - * address region is later touched. + * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched. + * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being + * unused, such that they will be discarded rather + * than swapped out. */ -#undef JEMALLOC_PURGE_MADVISE_FREE #undef JEMALLOC_PURGE_MADVISE_DONTNEED - -/* - * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE - * arguments to madvise(2). - */ -#undef JEMALLOC_THP +#undef JEMALLOC_PURGE_MADVISE_FREE /* Define if operating system has alloca.h header. */ #undef JEMALLOC_HAS_ALLOCA_H @@ -286,9 +241,6 @@ /* sizeof(long) == 2^LG_SIZEOF_LONG. */ #undef LG_SIZEOF_LONG -/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ -#undef LG_SIZEOF_LONG_LONG - /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #undef LG_SIZEOF_INTMAX_T @@ -307,7 +259,4 @@ */ #undef JEMALLOC_EXPORT -/* config.malloc_conf options string. */ -#undef JEMALLOC_CONFIG_MALLOC_CONF - #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/mb.h b/deps/jemalloc/include/jemalloc/internal/mb.h index 5384728fd..3cfa78729 100644 --- a/deps/jemalloc/include/jemalloc/internal/mb.h +++ b/deps/jemalloc/include/jemalloc/internal/mb.h @@ -42,7 +42,7 @@ mb_write(void) : /* Inputs. */ : "memory" /* Clobbers. */ ); -# else +#else /* * This is hopefully enough to keep the compiler from reordering * instructions around this one. @@ -52,7 +52,7 @@ mb_write(void) : /* Inputs. */ : "memory" /* Clobbers. */ ); -# endif +#endif } #elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE void @@ -104,9 +104,9 @@ mb_write(void) { malloc_mutex_t mtx; - malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT); - malloc_mutex_lock(TSDN_NULL, &mtx); - malloc_mutex_unlock(TSDN_NULL, &mtx); + malloc_mutex_init(&mtx); + malloc_mutex_lock(&mtx); + malloc_mutex_unlock(&mtx); } #endif #endif diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h index b442d2d4e..f051f2917 100644 --- a/deps/jemalloc/include/jemalloc/internal/mutex.h +++ b/deps/jemalloc/include/jemalloc/internal/mutex.h @@ -5,25 +5,18 @@ typedef struct malloc_mutex_s malloc_mutex_t; #ifdef _WIN32 # define MALLOC_MUTEX_INITIALIZER -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) -# define MALLOC_MUTEX_INITIALIZER \ - {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} #elif (defined(JEMALLOC_OSSPIN)) -# define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} +# define MALLOC_MUTEX_INITIALIZER {0} #elif (defined(JEMALLOC_MUTEX_INIT_CB)) -# define MALLOC_MUTEX_INITIALIZER \ - {PTHREAD_MUTEX_INITIALIZER, NULL, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} #else # if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \ defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP -# define MALLOC_MUTEX_INITIALIZER \ - {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, \ - WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} # else # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT -# define MALLOC_MUTEX_INITIALIZER \ - {PTHREAD_MUTEX_INITIALIZER, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER} # endif #endif @@ -38,8 +31,6 @@ struct malloc_mutex_s { # else CRITICAL_SECTION lock; # endif -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock lock; #elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -48,7 +39,6 @@ struct malloc_mutex_s { #else pthread_mutex_t lock; #endif - witness_t witness; }; #endif /* JEMALLOC_H_STRUCTS */ @@ -62,62 +52,52 @@ extern bool isthreaded; # define isthreaded true #endif -bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, - witness_rank_t rank); -void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex); -bool malloc_mutex_boot(void); +bool malloc_mutex_init(malloc_mutex_t *mutex); +void malloc_mutex_prefork(malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(malloc_mutex_t *mutex); +bool mutex_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -void malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_lock(malloc_mutex_t *mutex); +void malloc_mutex_unlock(malloc_mutex_t *mutex); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) JEMALLOC_INLINE void -malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) +malloc_mutex_lock(malloc_mutex_t *mutex) { if (isthreaded) { - witness_assert_not_owner(tsdn, &mutex->witness); #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 AcquireSRWLockExclusive(&mutex->lock); # else EnterCriticalSection(&mutex->lock); # endif -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock_lock(&mutex->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mutex->lock); #else pthread_mutex_lock(&mutex->lock); #endif - witness_lock(tsdn, &mutex->witness); } } JEMALLOC_INLINE void -malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) +malloc_mutex_unlock(malloc_mutex_t *mutex) { if (isthreaded) { - witness_unlock(tsdn, &mutex->witness); #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 ReleaseSRWLockExclusive(&mutex->lock); # else LeaveCriticalSection(&mutex->lock); # endif -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock_unlock(&mutex->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mutex->lock); #else @@ -125,22 +105,6 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) #endif } } - -JEMALLOC_INLINE void -malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) -{ - - if (isthreaded) - witness_assert_owner(tsdn, &mutex->witness); -} - -JEMALLOC_INLINE void -malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) -{ - - if (isthreaded) - witness_assert_not_owner(tsdn, &mutex->witness); -} #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/deps/jemalloc/include/jemalloc/internal/nstime.h b/deps/jemalloc/include/jemalloc/internal/nstime.h deleted file mode 100644 index 93b27dc80..000000000 --- a/deps/jemalloc/include/jemalloc/internal/nstime.h +++ /dev/null @@ -1,48 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct nstime_s nstime_t; - -/* Maximum supported number of seconds (~584 years). */ -#define NSTIME_SEC_MAX KQU(18446744072) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct nstime_s { - uint64_t ns; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void nstime_init(nstime_t *time, uint64_t ns); -void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec); -uint64_t nstime_ns(const nstime_t *time); -uint64_t nstime_sec(const nstime_t *time); -uint64_t nstime_nsec(const nstime_t *time); -void nstime_copy(nstime_t *time, const nstime_t *source); -int nstime_compare(const nstime_t *a, const nstime_t *b); -void nstime_add(nstime_t *time, const nstime_t *addend); -void nstime_subtract(nstime_t *time, const nstime_t *subtrahend); -void nstime_imultiply(nstime_t *time, uint64_t multiplier); -void nstime_idivide(nstime_t *time, uint64_t divisor); -uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); -#ifdef JEMALLOC_JET -typedef bool (nstime_monotonic_t)(void); -extern nstime_monotonic_t *nstime_monotonic; -typedef bool (nstime_update_t)(nstime_t *); -extern nstime_update_t *nstime_update; -#else -bool nstime_monotonic(void); -bool nstime_update(nstime_t *time); -#endif - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/pages.h b/deps/jemalloc/include/jemalloc/internal/pages.h index 4ae9f156a..da7eb9686 100644 --- a/deps/jemalloc/include/jemalloc/internal/pages.h +++ b/deps/jemalloc/include/jemalloc/internal/pages.h @@ -9,16 +9,13 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *pages_map(void *addr, size_t size, bool *commit); +void *pages_map(void *addr, size_t size); void pages_unmap(void *addr, size_t size); void *pages_trim(void *addr, size_t alloc_size, size_t leadsize, - size_t size, bool *commit); + size_t size); bool pages_commit(void *addr, size_t size); bool pages_decommit(void *addr, size_t size); bool pages_purge(void *addr, size_t size); -bool pages_huge(void *addr, size_t size); -bool pages_nohuge(void *addr, size_t size); -void pages_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/ph.h b/deps/jemalloc/include/jemalloc/internal/ph.h deleted file mode 100644 index 4f91c333f..000000000 --- a/deps/jemalloc/include/jemalloc/internal/ph.h +++ /dev/null @@ -1,345 +0,0 @@ -/* - * A Pairing Heap implementation. - * - * "The Pairing Heap: A New Form of Self-Adjusting Heap" - * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf - * - * With auxiliary twopass list, described in a follow on paper. - * - * "Pairing Heaps: Experiments and Analysis" - * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf - * - ******************************************************************************* - */ - -#ifndef PH_H_ -#define PH_H_ - -/* Node structure. */ -#define phn(a_type) \ -struct { \ - a_type *phn_prev; \ - a_type *phn_next; \ - a_type *phn_lchild; \ -} - -/* Root structure. */ -#define ph(a_type) \ -struct { \ - a_type *ph_root; \ -} - -/* Internal utility macros. */ -#define phn_lchild_get(a_type, a_field, a_phn) \ - (a_phn->a_field.phn_lchild) -#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do { \ - a_phn->a_field.phn_lchild = a_lchild; \ -} while (0) - -#define phn_next_get(a_type, a_field, a_phn) \ - (a_phn->a_field.phn_next) -#define phn_prev_set(a_type, a_field, a_phn, a_prev) do { \ - a_phn->a_field.phn_prev = a_prev; \ -} while (0) - -#define phn_prev_get(a_type, a_field, a_phn) \ - (a_phn->a_field.phn_prev) -#define phn_next_set(a_type, a_field, a_phn, a_next) do { \ - a_phn->a_field.phn_next = a_next; \ -} while (0) - -#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do { \ - a_type *phn0child; \ - \ - assert(a_phn0 != NULL); \ - assert(a_phn1 != NULL); \ - assert(a_cmp(a_phn0, a_phn1) <= 0); \ - \ - phn_prev_set(a_type, a_field, a_phn1, a_phn0); \ - phn0child = phn_lchild_get(a_type, a_field, a_phn0); \ - phn_next_set(a_type, a_field, a_phn1, phn0child); \ - if (phn0child != NULL) \ - phn_prev_set(a_type, a_field, phn0child, a_phn1); \ - phn_lchild_set(a_type, a_field, a_phn0, a_phn1); \ -} while (0) - -#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do { \ - if (a_phn0 == NULL) \ - r_phn = a_phn1; \ - else if (a_phn1 == NULL) \ - r_phn = a_phn0; \ - else if (a_cmp(a_phn0, a_phn1) < 0) { \ - phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, \ - a_cmp); \ - r_phn = a_phn0; \ - } else { \ - phn_merge_ordered(a_type, a_field, a_phn1, a_phn0, \ - a_cmp); \ - r_phn = a_phn1; \ - } \ -} while (0) - -#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do { \ - a_type *head = NULL; \ - a_type *tail = NULL; \ - a_type *phn0 = a_phn; \ - a_type *phn1 = phn_next_get(a_type, a_field, phn0); \ - \ - /* \ - * Multipass merge, wherein the first two elements of a FIFO \ - * are repeatedly merged, and each result is appended to the \ - * singly linked FIFO, until the FIFO contains only a single \ - * element. We start with a sibling list but no reference to \ - * its tail, so we do a single pass over the sibling list to \ - * populate the FIFO. \ - */ \ - if (phn1 != NULL) { \ - a_type *phnrest = phn_next_get(a_type, a_field, phn1); \ - if (phnrest != NULL) \ - phn_prev_set(a_type, a_field, phnrest, NULL); \ - phn_prev_set(a_type, a_field, phn0, NULL); \ - phn_next_set(a_type, a_field, phn0, NULL); \ - phn_prev_set(a_type, a_field, phn1, NULL); \ - phn_next_set(a_type, a_field, phn1, NULL); \ - phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0); \ - head = tail = phn0; \ - phn0 = phnrest; \ - while (phn0 != NULL) { \ - phn1 = phn_next_get(a_type, a_field, phn0); \ - if (phn1 != NULL) { \ - phnrest = phn_next_get(a_type, a_field, \ - phn1); \ - if (phnrest != NULL) { \ - phn_prev_set(a_type, a_field, \ - phnrest, NULL); \ - } \ - phn_prev_set(a_type, a_field, phn0, \ - NULL); \ - phn_next_set(a_type, a_field, phn0, \ - NULL); \ - phn_prev_set(a_type, a_field, phn1, \ - NULL); \ - phn_next_set(a_type, a_field, phn1, \ - NULL); \ - phn_merge(a_type, a_field, phn0, phn1, \ - a_cmp, phn0); \ - phn_next_set(a_type, a_field, tail, \ - phn0); \ - tail = phn0; \ - phn0 = phnrest; \ - } else { \ - phn_next_set(a_type, a_field, tail, \ - phn0); \ - tail = phn0; \ - phn0 = NULL; \ - } \ - } \ - phn0 = head; \ - phn1 = phn_next_get(a_type, a_field, phn0); \ - if (phn1 != NULL) { \ - while (true) { \ - head = phn_next_get(a_type, a_field, \ - phn1); \ - assert(phn_prev_get(a_type, a_field, \ - phn0) == NULL); \ - phn_next_set(a_type, a_field, phn0, \ - NULL); \ - assert(phn_prev_get(a_type, a_field, \ - phn1) == NULL); \ - phn_next_set(a_type, a_field, phn1, \ - NULL); \ - phn_merge(a_type, a_field, phn0, phn1, \ - a_cmp, phn0); \ - if (head == NULL) \ - break; \ - phn_next_set(a_type, a_field, tail, \ - phn0); \ - tail = phn0; \ - phn0 = head; \ - phn1 = phn_next_get(a_type, a_field, \ - phn0); \ - } \ - } \ - } \ - r_phn = phn0; \ -} while (0) - -#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do { \ - a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root); \ - if (phn != NULL) { \ - phn_prev_set(a_type, a_field, a_ph->ph_root, NULL); \ - phn_next_set(a_type, a_field, a_ph->ph_root, NULL); \ - phn_prev_set(a_type, a_field, phn, NULL); \ - ph_merge_siblings(a_type, a_field, phn, a_cmp, phn); \ - assert(phn_next_get(a_type, a_field, phn) == NULL); \ - phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp, \ - a_ph->ph_root); \ - } \ -} while (0) - -#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do { \ - a_type *lchild = phn_lchild_get(a_type, a_field, a_phn); \ - if (lchild == NULL) \ - r_phn = NULL; \ - else { \ - ph_merge_siblings(a_type, a_field, lchild, a_cmp, \ - r_phn); \ - } \ -} while (0) - -/* - * The ph_proto() macro generates function prototypes that correspond to the - * functions generated by an equivalently parameterized call to ph_gen(). - */ -#define ph_proto(a_attr, a_prefix, a_ph_type, a_type) \ -a_attr void a_prefix##new(a_ph_type *ph); \ -a_attr bool a_prefix##empty(a_ph_type *ph); \ -a_attr a_type *a_prefix##first(a_ph_type *ph); \ -a_attr void a_prefix##insert(a_ph_type *ph, a_type *phn); \ -a_attr a_type *a_prefix##remove_first(a_ph_type *ph); \ -a_attr void a_prefix##remove(a_ph_type *ph, a_type *phn); - -/* - * The ph_gen() macro generates a type-specific pairing heap implementation, - * based on the above cpp macros. - */ -#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp) \ -a_attr void \ -a_prefix##new(a_ph_type *ph) \ -{ \ - \ - memset(ph, 0, sizeof(ph(a_type))); \ -} \ -a_attr bool \ -a_prefix##empty(a_ph_type *ph) \ -{ \ - \ - return (ph->ph_root == NULL); \ -} \ -a_attr a_type * \ -a_prefix##first(a_ph_type *ph) \ -{ \ - \ - if (ph->ph_root == NULL) \ - return (NULL); \ - ph_merge_aux(a_type, a_field, ph, a_cmp); \ - return (ph->ph_root); \ -} \ -a_attr void \ -a_prefix##insert(a_ph_type *ph, a_type *phn) \ -{ \ - \ - memset(&phn->a_field, 0, sizeof(phn(a_type))); \ - \ - /* \ - * Treat the root as an aux list during insertion, and lazily \ - * merge during a_prefix##remove_first(). For elements that \ - * are inserted, then removed via a_prefix##remove() before the \ - * aux list is ever processed, this makes insert/remove \ - * constant-time, whereas eager merging would make insert \ - * O(log n). \ - */ \ - if (ph->ph_root == NULL) \ - ph->ph_root = phn; \ - else { \ - phn_next_set(a_type, a_field, phn, phn_next_get(a_type, \ - a_field, ph->ph_root)); \ - if (phn_next_get(a_type, a_field, ph->ph_root) != \ - NULL) { \ - phn_prev_set(a_type, a_field, \ - phn_next_get(a_type, a_field, ph->ph_root), \ - phn); \ - } \ - phn_prev_set(a_type, a_field, phn, ph->ph_root); \ - phn_next_set(a_type, a_field, ph->ph_root, phn); \ - } \ -} \ -a_attr a_type * \ -a_prefix##remove_first(a_ph_type *ph) \ -{ \ - a_type *ret; \ - \ - if (ph->ph_root == NULL) \ - return (NULL); \ - ph_merge_aux(a_type, a_field, ph, a_cmp); \ - \ - ret = ph->ph_root; \ - \ - ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \ - ph->ph_root); \ - \ - return (ret); \ -} \ -a_attr void \ -a_prefix##remove(a_ph_type *ph, a_type *phn) \ -{ \ - a_type *replace, *parent; \ - \ - /* \ - * We can delete from aux list without merging it, but we need \ - * to merge if we are dealing with the root node. \ - */ \ - if (ph->ph_root == phn) { \ - ph_merge_aux(a_type, a_field, ph, a_cmp); \ - if (ph->ph_root == phn) { \ - ph_merge_children(a_type, a_field, ph->ph_root, \ - a_cmp, ph->ph_root); \ - return; \ - } \ - } \ - \ - /* Get parent (if phn is leftmost child) before mutating. */ \ - if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) { \ - if (phn_lchild_get(a_type, a_field, parent) != phn) \ - parent = NULL; \ - } \ - /* Find a possible replacement node, and link to parent. */ \ - ph_merge_children(a_type, a_field, phn, a_cmp, replace); \ - /* Set next/prev for sibling linked list. */ \ - if (replace != NULL) { \ - if (parent != NULL) { \ - phn_prev_set(a_type, a_field, replace, parent); \ - phn_lchild_set(a_type, a_field, parent, \ - replace); \ - } else { \ - phn_prev_set(a_type, a_field, replace, \ - phn_prev_get(a_type, a_field, phn)); \ - if (phn_prev_get(a_type, a_field, phn) != \ - NULL) { \ - phn_next_set(a_type, a_field, \ - phn_prev_get(a_type, a_field, phn), \ - replace); \ - } \ - } \ - phn_next_set(a_type, a_field, replace, \ - phn_next_get(a_type, a_field, phn)); \ - if (phn_next_get(a_type, a_field, phn) != NULL) { \ - phn_prev_set(a_type, a_field, \ - phn_next_get(a_type, a_field, phn), \ - replace); \ - } \ - } else { \ - if (parent != NULL) { \ - a_type *next = phn_next_get(a_type, a_field, \ - phn); \ - phn_lchild_set(a_type, a_field, parent, next); \ - if (next != NULL) { \ - phn_prev_set(a_type, a_field, next, \ - parent); \ - } \ - } else { \ - assert(phn_prev_get(a_type, a_field, phn) != \ - NULL); \ - phn_next_set(a_type, a_field, \ - phn_prev_get(a_type, a_field, phn), \ - phn_next_get(a_type, a_field, phn)); \ - } \ - if (phn_next_get(a_type, a_field, phn) != NULL) { \ - phn_prev_set(a_type, a_field, \ - phn_next_get(a_type, a_field, phn), \ - phn_prev_get(a_type, a_field, phn)); \ - } \ - } \ -} - -#endif /* PH_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt index c1c6c4090..a90021aa6 100644 --- a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt +++ b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt @@ -3,15 +3,12 @@ a0get a0malloc arena_aalloc arena_alloc_junk_small -arena_basic_stats_merge arena_bin_index arena_bin_info -arena_bitselm_get_const -arena_bitselm_get_mutable +arena_bitselm_get arena_boot arena_choose arena_choose_hard -arena_choose_impl arena_chunk_alloc_huge arena_chunk_cache_maybe_insert arena_chunk_cache_maybe_remove @@ -28,25 +25,18 @@ arena_dalloc_junk_small arena_dalloc_large arena_dalloc_large_junked_locked arena_dalloc_small -arena_decay_tick -arena_decay_ticks -arena_decay_time_default_get -arena_decay_time_default_set -arena_decay_time_get -arena_decay_time_set arena_dss_prec_get arena_dss_prec_set -arena_extent_sn_next arena_get -arena_ichoose +arena_get_hard arena_init arena_lg_dirty_mult_default_get arena_lg_dirty_mult_default_set arena_lg_dirty_mult_get arena_lg_dirty_mult_set arena_malloc -arena_malloc_hard arena_malloc_large +arena_malloc_small arena_mapbits_allocated_get arena_mapbits_binind_get arena_mapbits_decommitted_get @@ -57,6 +47,9 @@ arena_mapbits_large_binind_set arena_mapbits_large_get arena_mapbits_large_set arena_mapbits_large_size_get +arena_mapbitsp_get +arena_mapbitsp_read +arena_mapbitsp_write arena_mapbits_size_decode arena_mapbits_size_encode arena_mapbits_small_runind_get @@ -65,33 +58,23 @@ arena_mapbits_unallocated_set arena_mapbits_unallocated_size_get arena_mapbits_unallocated_size_set arena_mapbits_unzeroed_get -arena_mapbitsp_get_const -arena_mapbitsp_get_mutable -arena_mapbitsp_read -arena_mapbitsp_write arena_maxrun arena_maybe_purge arena_metadata_allocated_add arena_metadata_allocated_get arena_metadata_allocated_sub arena_migrate -arena_miscelm_get_const -arena_miscelm_get_mutable +arena_miscelm_get arena_miscelm_to_pageind arena_miscelm_to_rpages +arena_nbound arena_new arena_node_alloc arena_node_dalloc -arena_nthreads_dec -arena_nthreads_get -arena_nthreads_inc arena_palloc arena_postfork_child arena_postfork_parent -arena_prefork0 -arena_prefork1 -arena_prefork2 -arena_prefork3 +arena_prefork arena_prof_accum arena_prof_accum_impl arena_prof_accum_locked @@ -100,25 +83,21 @@ arena_prof_tctx_get arena_prof_tctx_reset arena_prof_tctx_set arena_ptr_small_binind_get -arena_purge +arena_purge_all arena_quarantine_junk_small arena_ralloc arena_ralloc_junk_large arena_ralloc_no_move arena_rd_to_miscelm arena_redzone_corruption -arena_reset arena_run_regind arena_run_to_miscelm arena_salloc +arenas_cache_bypass_cleanup +arenas_cache_cleanup arena_sdalloc arena_stats_merge arena_tcache_fill_small -arena_tdata_get -arena_tdata_get_hard -arenas -arenas_tdata_bypass_cleanup -arenas_tdata_cleanup atomic_add_p atomic_add_u atomic_add_uint32 @@ -134,11 +113,6 @@ atomic_sub_u atomic_sub_uint32 atomic_sub_uint64 atomic_sub_z -atomic_write_p -atomic_write_u -atomic_write_uint32 -atomic_write_uint64 -atomic_write_z base_alloc base_boot base_postfork_child @@ -148,6 +122,7 @@ base_stats_get bitmap_full bitmap_get bitmap_info_init +bitmap_info_ngroups bitmap_init bitmap_set bitmap_sfu @@ -164,25 +139,32 @@ chunk_alloc_dss chunk_alloc_mmap chunk_alloc_wrapper chunk_boot +chunk_dalloc_arena chunk_dalloc_cache chunk_dalloc_mmap chunk_dalloc_wrapper chunk_deregister chunk_dss_boot -chunk_dss_mergeable +chunk_dss_postfork_child +chunk_dss_postfork_parent chunk_dss_prec_get chunk_dss_prec_set +chunk_dss_prefork chunk_hooks_default chunk_hooks_get chunk_hooks_set chunk_in_dss chunk_lookup chunk_npages +chunk_postfork_child +chunk_postfork_parent +chunk_prefork +chunk_purge_arena chunk_purge_wrapper chunk_register -chunks_rtree chunksize chunksize_mask +chunks_rtree ckh_count ckh_delete ckh_insert @@ -201,7 +183,6 @@ ctl_nametomib ctl_postfork_child ctl_postfork_parent ctl_prefork -decay_ticker_get dss_prec_names extent_node_achunk_get extent_node_achunk_set @@ -209,8 +190,6 @@ extent_node_addr_get extent_node_addr_set extent_node_arena_get extent_node_arena_set -extent_node_committed_get -extent_node_committed_set extent_node_dirty_insert extent_node_dirty_linkage_init extent_node_dirty_remove @@ -219,12 +198,8 @@ extent_node_prof_tctx_get extent_node_prof_tctx_set extent_node_size_get extent_node_size_set -extent_node_sn_get -extent_node_sn_set extent_node_zeroed_get extent_node_zeroed_set -extent_tree_ad_destroy -extent_tree_ad_destroy_recurse extent_tree_ad_empty extent_tree_ad_first extent_tree_ad_insert @@ -242,31 +217,23 @@ extent_tree_ad_reverse_iter extent_tree_ad_reverse_iter_recurse extent_tree_ad_reverse_iter_start extent_tree_ad_search -extent_tree_szsnad_destroy -extent_tree_szsnad_destroy_recurse -extent_tree_szsnad_empty -extent_tree_szsnad_first -extent_tree_szsnad_insert -extent_tree_szsnad_iter -extent_tree_szsnad_iter_recurse -extent_tree_szsnad_iter_start -extent_tree_szsnad_last -extent_tree_szsnad_new -extent_tree_szsnad_next -extent_tree_szsnad_nsearch -extent_tree_szsnad_prev -extent_tree_szsnad_psearch -extent_tree_szsnad_remove -extent_tree_szsnad_reverse_iter -extent_tree_szsnad_reverse_iter_recurse -extent_tree_szsnad_reverse_iter_start -extent_tree_szsnad_search -ffs_llu -ffs_lu -ffs_u -ffs_u32 -ffs_u64 -ffs_zu +extent_tree_szad_empty +extent_tree_szad_first +extent_tree_szad_insert +extent_tree_szad_iter +extent_tree_szad_iter_recurse +extent_tree_szad_iter_start +extent_tree_szad_last +extent_tree_szad_new +extent_tree_szad_next +extent_tree_szad_nsearch +extent_tree_szad_prev +extent_tree_szad_psearch +extent_tree_szad_remove +extent_tree_szad_reverse_iter +extent_tree_szad_reverse_iter_recurse +extent_tree_szad_reverse_iter_start +extent_tree_szad_search get_errno hash hash_fmix_32 @@ -290,16 +257,19 @@ huge_ralloc huge_ralloc_no_move huge_salloc iaalloc -ialloc iallocztm -iarena_cleanup +icalloc +icalloct idalloc +idalloct idalloctm -in_valgrind +imalloc +imalloct index2size index2size_compute index2size_lookup index2size_tab +in_valgrind ipalloc ipalloct ipallocztm @@ -318,11 +288,7 @@ jemalloc_postfork_parent jemalloc_prefork large_maxclass lg_floor -lg_prof_sample malloc_cprintf -malloc_mutex_assert_not_owner -malloc_mutex_assert_owner -malloc_mutex_boot malloc_mutex_init malloc_mutex_lock malloc_mutex_postfork_child @@ -344,29 +310,12 @@ malloc_write map_bias map_misc_offset mb_write -narenas_auto -narenas_tdata_cleanup +mutex_boot +narenas_cache_cleanup narenas_total_get ncpus nhbins -nhclasses -nlclasses -nstime_add -nstime_compare -nstime_copy -nstime_divide -nstime_idivide -nstime_imultiply -nstime_init -nstime_init2 -nstime_monotonic -nstime_ns -nstime_nsec -nstime_sec -nstime_subtract -nstime_update opt_abort -opt_decay_time opt_dss opt_junk opt_junk_alloc @@ -385,7 +334,6 @@ opt_prof_gdump opt_prof_leak opt_prof_prefix opt_prof_thread_active_init -opt_purge opt_quarantine opt_redzone opt_stats_print @@ -394,32 +342,13 @@ opt_utrace opt_xmalloc opt_zero p2rz -pages_boot pages_commit pages_decommit -pages_huge pages_map -pages_nohuge pages_purge pages_trim pages_unmap -pind2sz -pind2sz_compute -pind2sz_lookup -pind2sz_tab -pow2_ceil_u32 -pow2_ceil_u64 -pow2_ceil_zu -prng_lg_range_u32 -prng_lg_range_u64 -prng_lg_range_zu -prng_range_u32 -prng_range_u64 -prng_range_zu -prng_state_next_u32 -prng_state_next_u64 -prng_state_next_zu -prof_active +pow2_ceil prof_active_get prof_active_get_unlocked prof_active_set @@ -429,7 +358,6 @@ prof_backtrace prof_boot0 prof_boot1 prof_boot2 -prof_bt_count prof_dump_header prof_dump_open prof_free @@ -447,8 +375,7 @@ prof_malloc_sample_object prof_mdump prof_postfork_child prof_postfork_parent -prof_prefork0 -prof_prefork1 +prof_prefork prof_realloc prof_reset prof_sample_accum_update @@ -457,7 +384,6 @@ prof_tctx_get prof_tctx_reset prof_tctx_set prof_tdata_cleanup -prof_tdata_count prof_tdata_get prof_tdata_init prof_tdata_reinit @@ -467,13 +393,11 @@ prof_thread_active_init_set prof_thread_active_set prof_thread_name_get prof_thread_name_set -psz2ind -psz2u -purge_mode_names quarantine quarantine_alloc_hook quarantine_alloc_hook_work quarantine_cleanup +register_zone rtree_child_read rtree_child_read_hard rtree_child_tryread @@ -489,8 +413,6 @@ rtree_subtree_read_hard rtree_subtree_tryread rtree_val_read rtree_val_write -run_quantize_ceil -run_quantize_floor s2u s2u_compute s2u_lookup @@ -500,8 +422,6 @@ size2index size2index_compute size2index_lookup size2index_tab -spin_adaptive -spin_init stats_cactive stats_cactive_add stats_cactive_get @@ -511,6 +431,8 @@ tcache_alloc_easy tcache_alloc_large tcache_alloc_small tcache_alloc_small_hard +tcache_arena_associate +tcache_arena_dissociate tcache_arena_reassociate tcache_bin_flush_large tcache_bin_flush_small @@ -529,103 +451,49 @@ tcache_flush tcache_get tcache_get_hard tcache_maxclass -tcache_salloc -tcache_stats_merge tcaches +tcache_salloc tcaches_create tcaches_destroy tcaches_flush tcaches_get +tcache_stats_merge thread_allocated_cleanup thread_deallocated_cleanup -ticker_copy -ticker_init -ticker_read -ticker_tick -ticker_ticks tsd_arena_get tsd_arena_set -tsd_arenap_get -tsd_arenas_tdata_bypass_get -tsd_arenas_tdata_bypass_set -tsd_arenas_tdata_bypassp_get -tsd_arenas_tdata_get -tsd_arenas_tdata_set -tsd_arenas_tdatap_get tsd_boot tsd_boot0 tsd_boot1 tsd_booted -tsd_booted_get tsd_cleanup tsd_cleanup_wrapper tsd_fetch -tsd_fetch_impl tsd_get -tsd_get_allocates -tsd_iarena_get -tsd_iarena_set -tsd_iarenap_get +tsd_wrapper_get +tsd_wrapper_set tsd_initialized tsd_init_check_recursion tsd_init_finish tsd_init_head -tsd_narenas_tdata_get -tsd_narenas_tdata_set -tsd_narenas_tdatap_get -tsd_wrapper_get -tsd_wrapper_set tsd_nominal -tsd_prof_tdata_get -tsd_prof_tdata_set -tsd_prof_tdatap_get tsd_quarantine_get tsd_quarantine_set -tsd_quarantinep_get tsd_set tsd_tcache_enabled_get tsd_tcache_enabled_set -tsd_tcache_enabledp_get tsd_tcache_get tsd_tcache_set -tsd_tcachep_get -tsd_thread_allocated_get -tsd_thread_allocated_set -tsd_thread_allocatedp_get -tsd_thread_deallocated_get -tsd_thread_deallocated_set -tsd_thread_deallocatedp_get tsd_tls tsd_tsd -tsd_tsdn -tsd_witness_fork_get -tsd_witness_fork_set -tsd_witness_forkp_get -tsd_witnesses_get -tsd_witnesses_set -tsd_witnessesp_get -tsdn_fetch -tsdn_null -tsdn_tsd +tsd_prof_tdata_get +tsd_prof_tdata_set +tsd_thread_allocated_get +tsd_thread_allocated_set +tsd_thread_deallocated_get +tsd_thread_deallocated_set u2rz valgrind_freelike_block valgrind_make_mem_defined valgrind_make_mem_noaccess valgrind_make_mem_undefined -witness_assert_lockless -witness_assert_not_owner -witness_assert_owner -witness_fork_cleanup -witness_init -witness_lock -witness_lock_error -witness_lockless_error -witness_not_owner_error -witness_owner -witness_owner_error -witness_postfork_child -witness_postfork_parent -witness_prefork -witness_unlock -witnesses_cleanup -zone_register diff --git a/deps/jemalloc/include/jemalloc/internal/prng.h b/deps/jemalloc/include/jemalloc/internal/prng.h index c2bda19c6..216d0ef47 100644 --- a/deps/jemalloc/include/jemalloc/internal/prng.h +++ b/deps/jemalloc/include/jemalloc/internal/prng.h @@ -18,13 +18,31 @@ * proportional to bit position. For example, the lowest bit has a cycle of 2, * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. + * + * Macro parameters: + * uint32_t r : Result. + * unsigned lg_range : (0..32], number of least significant bits to return. + * uint32_t state : Seed value. + * const uint32_t a, c : See above discussion. */ +#define prng32(r, lg_range, state, a, c) do { \ + assert((lg_range) > 0); \ + assert((lg_range) <= 32); \ + \ + r = (state * (a)) + (c); \ + state = r; \ + r >>= (32 - (lg_range)); \ +} while (false) -#define PRNG_A_32 UINT32_C(1103515241) -#define PRNG_C_32 UINT32_C(12347) - -#define PRNG_A_64 UINT64_C(6364136223846793005) -#define PRNG_C_64 UINT64_C(1442695040888963407) +/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ +#define prng64(r, lg_range, state, a, c) do { \ + assert((lg_range) > 0); \ + assert((lg_range) <= 64); \ + \ + r = (state * (a)) + (c); \ + state = r; \ + r >>= (64 - (lg_range)); \ +} while (false) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -38,170 +56,5 @@ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES -#ifndef JEMALLOC_ENABLE_INLINE -uint32_t prng_state_next_u32(uint32_t state); -uint64_t prng_state_next_u64(uint64_t state); -size_t prng_state_next_zu(size_t state); - -uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range, - bool atomic); -uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range); -size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic); - -uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic); -uint64_t prng_range_u64(uint64_t *state, uint64_t range); -size_t prng_range_zu(size_t *state, size_t range, bool atomic); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) -JEMALLOC_ALWAYS_INLINE uint32_t -prng_state_next_u32(uint32_t state) -{ - - return ((state * PRNG_A_32) + PRNG_C_32); -} - -JEMALLOC_ALWAYS_INLINE uint64_t -prng_state_next_u64(uint64_t state) -{ - - return ((state * PRNG_A_64) + PRNG_C_64); -} - -JEMALLOC_ALWAYS_INLINE size_t -prng_state_next_zu(size_t state) -{ - -#if LG_SIZEOF_PTR == 2 - return ((state * PRNG_A_32) + PRNG_C_32); -#elif LG_SIZEOF_PTR == 3 - return ((state * PRNG_A_64) + PRNG_C_64); -#else -#error Unsupported pointer size -#endif -} - -JEMALLOC_ALWAYS_INLINE uint32_t -prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) -{ - uint32_t ret, state1; - - assert(lg_range > 0); - assert(lg_range <= 32); - - if (atomic) { - uint32_t state0; - - do { - state0 = atomic_read_uint32(state); - state1 = prng_state_next_u32(state0); - } while (atomic_cas_uint32(state, state0, state1)); - } else { - state1 = prng_state_next_u32(*state); - *state = state1; - } - ret = state1 >> (32 - lg_range); - - return (ret); -} - -/* 64-bit atomic operations cannot be supported on all relevant platforms. */ -JEMALLOC_ALWAYS_INLINE uint64_t -prng_lg_range_u64(uint64_t *state, unsigned lg_range) -{ - uint64_t ret, state1; - - assert(lg_range > 0); - assert(lg_range <= 64); - - state1 = prng_state_next_u64(*state); - *state = state1; - ret = state1 >> (64 - lg_range); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE size_t -prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) -{ - size_t ret, state1; - - assert(lg_range > 0); - assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR)); - - if (atomic) { - size_t state0; - - do { - state0 = atomic_read_z(state); - state1 = prng_state_next_zu(state0); - } while (atomic_cas_z(state, state0, state1)); - } else { - state1 = prng_state_next_zu(*state); - *state = state1; - } - ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE uint32_t -prng_range_u32(uint32_t *state, uint32_t range, bool atomic) -{ - uint32_t ret; - unsigned lg_range; - - assert(range > 1); - - /* Compute the ceiling of lg(range). */ - lg_range = ffs_u32(pow2_ceil_u32(range)) - 1; - - /* Generate a result in [0..range) via repeated trial. */ - do { - ret = prng_lg_range_u32(state, lg_range, atomic); - } while (ret >= range); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE uint64_t -prng_range_u64(uint64_t *state, uint64_t range) -{ - uint64_t ret; - unsigned lg_range; - - assert(range > 1); - - /* Compute the ceiling of lg(range). */ - lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; - - /* Generate a result in [0..range) via repeated trial. */ - do { - ret = prng_lg_range_u64(state, lg_range); - } while (ret >= range); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE size_t -prng_range_zu(size_t *state, size_t range, bool atomic) -{ - size_t ret; - unsigned lg_range; - - assert(range > 1); - - /* Compute the ceiling of lg(range). */ - lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; - - /* Generate a result in [0..range) via repeated trial. */ - do { - ret = prng_lg_range_zu(state, lg_range, atomic); - } while (ret >= range); - - return (ret); -} -#endif - #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/prof.h b/deps/jemalloc/include/jemalloc/internal/prof.h index 8293b71ed..e5198c3e8 100644 --- a/deps/jemalloc/include/jemalloc/internal/prof.h +++ b/deps/jemalloc/include/jemalloc/internal/prof.h @@ -281,7 +281,7 @@ extern uint64_t prof_interval; extern size_t lg_prof_sample; void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); -void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, +void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); void bt_init(prof_bt_t *bt, void **vec); @@ -293,33 +293,32 @@ size_t prof_bt_count(void); const prof_cnt_t *prof_cnt_all(void); typedef int (prof_dump_open_t)(bool, const char *); extern prof_dump_open_t *prof_dump_open; -typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *); +typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *); extern prof_dump_header_t *prof_dump_header; #endif -void prof_idump(tsdn_t *tsdn); -bool prof_mdump(tsd_t *tsd, const char *filename); -void prof_gdump(tsdn_t *tsdn); +void prof_idump(void); +bool prof_mdump(const char *filename); +void prof_gdump(void); prof_tdata_t *prof_tdata_init(tsd_t *tsd); prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); void prof_reset(tsd_t *tsd, size_t lg_sample); void prof_tdata_cleanup(tsd_t *tsd); -bool prof_active_get(tsdn_t *tsdn); -bool prof_active_set(tsdn_t *tsdn, bool active); -const char *prof_thread_name_get(tsd_t *tsd); +const char *prof_thread_name_get(void); +bool prof_active_get(void); +bool prof_active_set(bool active); int prof_thread_name_set(tsd_t *tsd, const char *thread_name); -bool prof_thread_active_get(tsd_t *tsd); -bool prof_thread_active_set(tsd_t *tsd, bool active); -bool prof_thread_active_init_get(tsdn_t *tsdn); -bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); -bool prof_gdump_get(tsdn_t *tsdn); -bool prof_gdump_set(tsdn_t *tsdn, bool active); +bool prof_thread_active_get(void); +bool prof_thread_active_set(bool active); +bool prof_thread_active_init_get(void); +bool prof_thread_active_init_set(bool active_init); +bool prof_gdump_get(void); +bool prof_gdump_set(bool active); void prof_boot0(void); void prof_boot1(void); -bool prof_boot2(tsd_t *tsd); -void prof_prefork0(tsdn_t *tsdn); -void prof_prefork1(tsdn_t *tsdn); -void prof_postfork_parent(tsdn_t *tsdn); -void prof_postfork_child(tsdn_t *tsdn); +bool prof_boot2(void); +void prof_prefork(void); +void prof_postfork_parent(void); +void prof_postfork_child(void); void prof_sample_threshold_update(prof_tdata_t *tdata); #endif /* JEMALLOC_H_EXTERNS */ @@ -330,17 +329,17 @@ void prof_sample_threshold_update(prof_tdata_t *tdata); bool prof_active_get_unlocked(void); bool prof_gdump_get_unlocked(void); prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); -prof_tctx_t *prof_tctx_get(tsdn_t *tsdn, const void *ptr); -void prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, - prof_tctx_t *tctx); -void prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, - const void *old_ptr, prof_tctx_t *tctx); bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, prof_tdata_t **tdata_out); prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update); -void prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, +prof_tctx_t *prof_tctx_get(const void *ptr); +void prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); +void prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, prof_tctx_t *tctx); +void prof_malloc_sample_object(const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx); @@ -398,34 +397,34 @@ prof_tdata_get(tsd_t *tsd, bool create) } JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_tctx_get(tsdn_t *tsdn, const void *ptr) +prof_tctx_get(const void *ptr) { cassert(config_prof); assert(ptr != NULL); - return (arena_prof_tctx_get(tsdn, ptr)); + return (arena_prof_tctx_get(ptr)); } JEMALLOC_ALWAYS_INLINE void -prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) +prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) { cassert(config_prof); assert(ptr != NULL); - arena_prof_tctx_set(tsdn, ptr, usize, tctx); + arena_prof_tctx_set(ptr, usize, tctx); } JEMALLOC_ALWAYS_INLINE void -prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr, +prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, prof_tctx_t *old_tctx) { cassert(config_prof); assert(ptr != NULL); - arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx); + arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx); } JEMALLOC_ALWAYS_INLINE bool @@ -437,16 +436,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, cassert(config_prof); tdata = prof_tdata_get(tsd, true); - if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) tdata = NULL; if (tdata_out != NULL) *tdata_out = tdata; - if (unlikely(tdata == NULL)) + if (tdata == NULL) return (true); - if (likely(tdata->bytes_until_sample >= usize)) { + if (tdata->bytes_until_sample >= usize) { if (update) tdata->bytes_until_sample -= usize; return (true); @@ -480,17 +479,17 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) } JEMALLOC_ALWAYS_INLINE void -prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) +prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) { cassert(config_prof); assert(ptr != NULL); - assert(usize == isalloc(tsdn, ptr, true)); + assert(usize == isalloc(ptr, true)); if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) - prof_malloc_sample_object(tsdn, ptr, usize, tctx); + prof_malloc_sample_object(ptr, usize, tctx); else - prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U); + prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U); } JEMALLOC_ALWAYS_INLINE void @@ -504,7 +503,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); if (prof_active && !updated && ptr != NULL) { - assert(usize == isalloc(tsd_tsdn(tsd), ptr, true)); + assert(usize == isalloc(ptr, true)); if (prof_sample_accum_update(tsd, usize, true, NULL)) { /* * Don't sample. The usize passed to prof_alloc_prep() @@ -513,7 +512,6 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, * though its actual usize was insufficient to cross the * sample threshold. */ - prof_alloc_rollback(tsd, tctx, true); tctx = (prof_tctx_t *)(uintptr_t)1U; } } @@ -522,9 +520,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); if (unlikely(sampled)) - prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx); + prof_malloc_sample_object(ptr, usize, tctx); else - prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx); + prof_tctx_reset(ptr, usize, old_ptr, old_tctx); if (unlikely(old_sampled)) prof_free_sampled_object(tsd, old_usize, old_tctx); @@ -533,10 +531,10 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, JEMALLOC_ALWAYS_INLINE void prof_free(tsd_t *tsd, const void *ptr, size_t usize) { - prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr); + prof_tctx_t *tctx = prof_tctx_get(ptr); cassert(config_prof); - assert(usize == isalloc(tsd_tsdn(tsd), ptr, true)); + assert(usize == isalloc(ptr, true)); if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) prof_free_sampled_object(tsd, usize, tctx); diff --git a/deps/jemalloc/include/jemalloc/internal/rb.h b/deps/jemalloc/include/jemalloc/internal/rb.h index 3770342f8..2ca8e5933 100644 --- a/deps/jemalloc/include/jemalloc/internal/rb.h +++ b/deps/jemalloc/include/jemalloc/internal/rb.h @@ -42,6 +42,7 @@ struct { \ #define rb_tree(a_type) \ struct { \ a_type *rbt_root; \ + a_type rbt_nil; \ } /* Left accessors. */ @@ -78,15 +79,6 @@ struct { \ (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ } while (0) - -/* Node initializer. */ -#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ - /* Bookkeeping bit cannot be used by node pointer. */ \ - assert(((uintptr_t)(a_node) & 0x1) == 0); \ - rbtn_left_set(a_type, a_field, (a_node), NULL); \ - rbtn_right_set(a_type, a_field, (a_node), NULL); \ - rbtn_red_set(a_type, a_field, (a_node)); \ -} while (0) #else /* Right accessors. */ #define rbtn_right_get(a_type, a_field, a_node) \ @@ -107,26 +99,28 @@ struct { \ #define rbtn_black_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_red = false; \ } while (0) +#endif /* Node initializer. */ #define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ - rbtn_left_set(a_type, a_field, (a_node), NULL); \ - rbtn_right_set(a_type, a_field, (a_node), NULL); \ + rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ rbtn_red_set(a_type, a_field, (a_node)); \ } while (0) -#endif /* Tree initializer. */ #define rb_new(a_type, a_field, a_rbt) do { \ - (a_rbt)->rbt_root = NULL; \ + (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ + rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ + rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ } while (0) /* Internal utility macros. */ #define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ - if ((r_node) != NULL) { \ + if ((r_node) != &(a_rbt)->rbt_nil) { \ for (; \ - rbtn_left_get(a_type, a_field, (r_node)) != NULL; \ + rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ } \ } \ @@ -134,9 +128,10 @@ struct { \ #define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ - if ((r_node) != NULL) { \ - for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL; \ - (r_node) = rbtn_right_get(a_type, a_field, (r_node))) { \ + if ((r_node) != &(a_rbt)->rbt_nil) { \ + for (; rbtn_right_get(a_type, a_field, (r_node)) != \ + &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ + (r_node))) { \ } \ } \ } while (0) @@ -174,11 +169,11 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node); \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, const a_type *key); \ +a_prefix##search(a_rbt_type *rbtree, a_type *key); \ a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key); \ +a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, const a_type *key); \ +a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ a_attr void \ a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ a_attr void \ @@ -188,10 +183,7 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ a_rbt_type *, a_type *, void *), void *arg); \ a_attr a_type * \ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); \ -a_attr void \ -a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ - void *arg); + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); /* * The rb_gen() macro generates a type-specific red-black tree implementation, @@ -262,7 +254,7 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ * last/first. * * static ex_node_t * - * ex_search(ex_t *tree, const ex_node_t *key); + * ex_search(ex_t *tree, ex_node_t *key); * Description: Search for node that matches key. * Args: * tree: Pointer to an initialized red-black tree object. @@ -270,9 +262,9 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ * Ret: Node in tree that matches key, or NULL if no match. * * static ex_node_t * - * ex_nsearch(ex_t *tree, const ex_node_t *key); + * ex_nsearch(ex_t *tree, ex_node_t *key); * static ex_node_t * - * ex_psearch(ex_t *tree, const ex_node_t *key); + * ex_psearch(ex_t *tree, ex_node_t *key); * Description: Search for node that matches key. If no match is found, * return what would be key's successor/predecessor, were * key in tree. @@ -320,20 +312,6 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ * arg : Opaque pointer passed to cb(). * Ret: NULL if iteration completed, or the non-NULL callback return value * that caused termination of the iteration. - * - * static void - * ex_destroy(ex_t *tree, void (*cb)(ex_node_t *, void *), void *arg); - * Description: Iterate over the tree with post-order traversal, remove - * each node, and run the callback if non-null. This is - * used for destroying a tree without paying the cost to - * rebalance it. The tree must not be otherwise altered - * during traversal. - * Args: - * tree: Pointer to an initialized red-black tree object. - * cb : Callback function, which, if non-null, is called for each node - * during iteration. There is no way to stop iteration once it - * has begun. - * arg : Opaque pointer passed to cb(). */ #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ a_attr void \ @@ -342,30 +320,36 @@ a_prefix##new(a_rbt_type *rbtree) { \ } \ a_attr bool \ a_prefix##empty(a_rbt_type *rbtree) { \ - return (rbtree->rbt_root == NULL); \ + return (rbtree->rbt_root == &rbtree->rbt_nil); \ } \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ return (ret); \ } \ a_attr a_type * \ a_prefix##last(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ return (ret); \ } \ a_attr a_type * \ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ a_type *ret; \ - if (rbtn_right_get(a_type, a_field, node) != NULL) { \ + if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ a_field, node), ret); \ } else { \ a_type *tnode = rbtree->rbt_root; \ - assert(tnode != NULL); \ - ret = NULL; \ + assert(tnode != &rbtree->rbt_nil); \ + ret = &rbtree->rbt_nil; \ while (true) { \ int cmp = (a_cmp)(node, tnode); \ if (cmp < 0) { \ @@ -376,21 +360,24 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ } else { \ break; \ } \ - assert(tnode != NULL); \ + assert(tnode != &rbtree->rbt_nil); \ } \ } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ return (ret); \ } \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ a_type *ret; \ - if (rbtn_left_get(a_type, a_field, node) != NULL) { \ + if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ a_field, node), ret); \ } else { \ a_type *tnode = rbtree->rbt_root; \ - assert(tnode != NULL); \ - ret = NULL; \ + assert(tnode != &rbtree->rbt_nil); \ + ret = &rbtree->rbt_nil; \ while (true) { \ int cmp = (a_cmp)(node, tnode); \ if (cmp < 0) { \ @@ -401,17 +388,20 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ } else { \ break; \ } \ - assert(tnode != NULL); \ + assert(tnode != &rbtree->rbt_nil); \ } \ } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ return (ret); \ } \ a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ +a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ a_type *ret; \ int cmp; \ ret = rbtree->rbt_root; \ - while (ret != NULL \ + while (ret != &rbtree->rbt_nil \ && (cmp = (a_cmp)(key, ret)) != 0) { \ if (cmp < 0) { \ ret = rbtn_left_get(a_type, a_field, ret); \ @@ -419,14 +409,17 @@ a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ ret = rbtn_right_get(a_type, a_field, ret); \ } \ } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ return (ret); \ } \ a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ +a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ - ret = NULL; \ - while (tnode != NULL) { \ + ret = &rbtree->rbt_nil; \ + while (tnode != &rbtree->rbt_nil) { \ int cmp = (a_cmp)(key, tnode); \ if (cmp < 0) { \ ret = tnode; \ @@ -438,14 +431,17 @@ a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ break; \ } \ } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ return (ret); \ } \ a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ +a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ - ret = NULL; \ - while (tnode != NULL) { \ + ret = &rbtree->rbt_nil; \ + while (tnode != &rbtree->rbt_nil) { \ int cmp = (a_cmp)(key, tnode); \ if (cmp < 0) { \ tnode = rbtn_left_get(a_type, a_field, tnode); \ @@ -457,6 +453,9 @@ a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ break; \ } \ } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ return (ret); \ } \ a_attr void \ @@ -468,7 +467,7 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbt_node_new(a_type, a_field, rbtree, node); \ /* Wind. */ \ path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != NULL; pathp++) { \ + for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ int cmp = pathp->cmp = a_cmp(node, pathp->node); \ assert(cmp != 0); \ if (cmp < 0) { \ @@ -488,8 +487,7 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbtn_left_set(a_type, a_field, cnode, left); \ if (rbtn_red_get(a_type, a_field, left)) { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ - leftleft)) { \ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ /* Fix up 4-node. */ \ a_type *tnode; \ rbtn_black_set(a_type, a_field, leftleft); \ @@ -504,8 +502,7 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbtn_right_set(a_type, a_field, cnode, right); \ if (rbtn_red_get(a_type, a_field, right)) { \ a_type *left = rbtn_left_get(a_type, a_field, cnode); \ - if (left != NULL && rbtn_red_get(a_type, a_field, \ - left)) { \ + if (rbtn_red_get(a_type, a_field, left)) { \ /* Split 4-node. */ \ rbtn_black_set(a_type, a_field, left); \ rbtn_black_set(a_type, a_field, right); \ @@ -538,7 +535,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Wind. */ \ nodep = NULL; /* Silence compiler warning. */ \ path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != NULL; pathp++) { \ + for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ int cmp = pathp->cmp = a_cmp(node, pathp->node); \ if (cmp < 0) { \ pathp[1].node = rbtn_left_get(a_type, a_field, \ @@ -550,7 +547,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Find node's successor, in preparation for swap. */ \ pathp->cmp = 1; \ nodep = pathp; \ - for (pathp++; pathp->node != NULL; \ + for (pathp++; pathp->node != &rbtree->rbt_nil; \ pathp++) { \ pathp->cmp = -1; \ pathp[1].node = rbtn_left_get(a_type, a_field, \ @@ -593,7 +590,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ } else { \ a_type *left = rbtn_left_get(a_type, a_field, node); \ - if (left != NULL) { \ + if (left != &rbtree->rbt_nil) { \ /* node has no successor, but it has a left child. */\ /* Splice node out, without losing the left child. */\ assert(!rbtn_red_get(a_type, a_field, node)); \ @@ -613,32 +610,33 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ return; \ } else if (pathp == path) { \ /* The tree only contained one node. */ \ - rbtree->rbt_root = NULL; \ + rbtree->rbt_root = &rbtree->rbt_nil; \ return; \ } \ } \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ /* Prune red node, which requires no fixup. */ \ assert(pathp[-1].cmp < 0); \ - rbtn_left_set(a_type, a_field, pathp[-1].node, NULL); \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + &rbtree->rbt_nil); \ return; \ } \ /* The node to be pruned is black, so unwind until balance is */\ /* restored. */\ - pathp->node = NULL; \ + pathp->node = &rbtree->rbt_nil; \ for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ assert(pathp->cmp != 0); \ if (pathp->cmp < 0) { \ rbtn_left_set(a_type, a_field, pathp->node, \ pathp[1].node); \ + assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *right = rbtn_right_get(a_type, a_field, \ pathp->node); \ a_type *rightleft = rbtn_left_get(a_type, a_field, \ right); \ a_type *tnode; \ - if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ - rightleft)) { \ + if (rbtn_red_get(a_type, a_field, rightleft)) { \ /* In the following diagrams, ||, //, and \\ */\ /* indicate the path to the removed node. */\ /* */\ @@ -681,8 +679,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ pathp->node); \ a_type *rightleft = rbtn_left_get(a_type, a_field, \ right); \ - if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ - rightleft)) { \ + if (rbtn_red_get(a_type, a_field, rightleft)) { \ /* || */\ /* pathp(b) */\ /* // \ */\ @@ -736,8 +733,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ left); \ a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ leftright); \ - if (leftrightleft != NULL && rbtn_red_get(a_type, \ - a_field, leftrightleft)) { \ + if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ /* || */\ /* pathp(b) */\ /* / \\ */\ @@ -763,7 +759,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* (b) */\ /* / */\ /* (b) */\ - assert(leftright != NULL); \ + assert(leftright != &rbtree->rbt_nil); \ rbtn_red_set(a_type, a_field, leftright); \ rbtn_rotate_right(a_type, a_field, pathp->node, \ tnode); \ @@ -786,8 +782,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ return; \ } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ - leftleft)) { \ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ /* || */\ /* pathp(r) */\ /* / \\ */\ @@ -825,8 +820,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ } else { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ - leftleft)) { \ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ /* || */\ /* pathp(b) */\ /* / \\ */\ @@ -872,13 +866,13 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ a_attr a_type * \ a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == NULL) { \ - return (NULL); \ + if (node == &rbtree->rbt_nil) { \ + return (&rbtree->rbt_nil); \ } else { \ a_type *ret; \ if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ - a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node, \ - arg)) != NULL) { \ + a_field, node), cb, arg)) != &rbtree->rbt_nil \ + || (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ @@ -892,8 +886,8 @@ a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ if (cmp < 0) { \ a_type *ret; \ if ((ret = a_prefix##iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL || \ - (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_left_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ @@ -920,18 +914,21 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ } else { \ ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ } \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ return (ret); \ } \ a_attr a_type * \ a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == NULL) { \ - return (NULL); \ + if (node == &rbtree->rbt_nil) { \ + return (&rbtree->rbt_nil); \ } else { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ - (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##reverse_iter_recurse(rbtree, \ @@ -946,8 +943,8 @@ a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ if (cmp > 0) { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ - (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##reverse_iter_recurse(rbtree, \ @@ -975,29 +972,10 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ cb, arg); \ } \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ return (ret); \ -} \ -a_attr void \ -a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)( \ - a_type *, void *), void *arg) { \ - if (node == NULL) { \ - return; \ - } \ - a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field, \ - node), cb, arg); \ - rbtn_left_set(a_type, a_field, (node), NULL); \ - a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field, \ - node), cb, arg); \ - rbtn_right_set(a_type, a_field, (node), NULL); \ - if (cb) { \ - cb(node, arg); \ - } \ -} \ -a_attr void \ -a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ - void *arg) { \ - a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg); \ - rbtree->rbt_root = NULL; \ } #endif /* RB_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/rtree.h b/deps/jemalloc/include/jemalloc/internal/rtree.h index 8d0c584da..28ae9d1dd 100644 --- a/deps/jemalloc/include/jemalloc/internal/rtree.h +++ b/deps/jemalloc/include/jemalloc/internal/rtree.h @@ -15,10 +15,9 @@ typedef struct rtree_s rtree_t; * machine address width. */ #define LG_RTREE_BITS_PER_LEVEL 4 -#define RTREE_BITS_PER_LEVEL (1U << LG_RTREE_BITS_PER_LEVEL) -/* Maximum rtree height. */ +#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL) #define RTREE_HEIGHT_MAX \ - ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) + ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) /* Used for two-stage lock-free node initialization. */ #define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) @@ -112,25 +111,22 @@ unsigned rtree_start_level(rtree_t *rtree, uintptr_t key); uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); bool rtree_node_valid(rtree_node_elm_t *node); -rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm, - bool dependent); +rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm); rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, - unsigned level, bool dependent); + unsigned level); extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent); void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val); -rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level, - bool dependent); -rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level, - bool dependent); +rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level); +rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level); extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent); bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -JEMALLOC_ALWAYS_INLINE unsigned +JEMALLOC_INLINE unsigned rtree_start_level(rtree_t *rtree, uintptr_t key) { unsigned start_level; @@ -144,7 +140,7 @@ rtree_start_level(rtree_t *rtree, uintptr_t key) return (start_level); } -JEMALLOC_ALWAYS_INLINE uintptr_t +JEMALLOC_INLINE uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) { @@ -153,40 +149,37 @@ rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) rtree->levels[level].bits) - 1)); } -JEMALLOC_ALWAYS_INLINE bool +JEMALLOC_INLINE bool rtree_node_valid(rtree_node_elm_t *node) { return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); } -JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * -rtree_child_tryread(rtree_node_elm_t *elm, bool dependent) +JEMALLOC_INLINE rtree_node_elm_t * +rtree_child_tryread(rtree_node_elm_t *elm) { rtree_node_elm_t *child; /* Double-checked read (first read may be stale. */ child = elm->child; - if (!dependent && !rtree_node_valid(child)) + if (!rtree_node_valid(child)) child = atomic_read_p(&elm->pun); - assert(!dependent || child != NULL); return (child); } -JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * -rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level, - bool dependent) +JEMALLOC_INLINE rtree_node_elm_t * +rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) { rtree_node_elm_t *child; - child = rtree_child_tryread(elm, dependent); - if (!dependent && unlikely(!rtree_node_valid(child))) + child = rtree_child_tryread(elm); + if (unlikely(!rtree_node_valid(child))) child = rtree_child_read_hard(rtree, elm, level); - assert(!dependent || child != NULL); return (child); } -JEMALLOC_ALWAYS_INLINE extent_node_t * +JEMALLOC_INLINE extent_node_t * rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent) { @@ -215,119 +208,54 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val) atomic_write_p(&elm->pun, val); } -JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * -rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) +JEMALLOC_INLINE rtree_node_elm_t * +rtree_subtree_tryread(rtree_t *rtree, unsigned level) { rtree_node_elm_t *subtree; /* Double-checked read (first read may be stale. */ subtree = rtree->levels[level].subtree; - if (!dependent && unlikely(!rtree_node_valid(subtree))) + if (!rtree_node_valid(subtree)) subtree = atomic_read_p(&rtree->levels[level].subtree_pun); - assert(!dependent || subtree != NULL); return (subtree); } -JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * -rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent) +JEMALLOC_INLINE rtree_node_elm_t * +rtree_subtree_read(rtree_t *rtree, unsigned level) { rtree_node_elm_t *subtree; - subtree = rtree_subtree_tryread(rtree, level, dependent); - if (!dependent && unlikely(!rtree_node_valid(subtree))) + subtree = rtree_subtree_tryread(rtree, level); + if (unlikely(!rtree_node_valid(subtree))) subtree = rtree_subtree_read_hard(rtree, level); - assert(!dependent || subtree != NULL); return (subtree); } -JEMALLOC_ALWAYS_INLINE extent_node_t * +JEMALLOC_INLINE extent_node_t * rtree_get(rtree_t *rtree, uintptr_t key, bool dependent) { uintptr_t subkey; - unsigned start_level; - rtree_node_elm_t *node; + unsigned i, start_level; + rtree_node_elm_t *node, *child; start_level = rtree_start_level(rtree, key); - node = rtree_subtree_tryread(rtree, start_level, dependent); -#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height) - switch (start_level + RTREE_GET_BIAS) { -#define RTREE_GET_SUBTREE(level) \ - case level: \ - assert(level < (RTREE_HEIGHT_MAX-1)); \ - if (!dependent && unlikely(!rtree_node_valid(node))) \ - return (NULL); \ - subkey = rtree_subkey(rtree, key, level - \ - RTREE_GET_BIAS); \ - node = rtree_child_tryread(&node[subkey], dependent); \ - /* Fall through. */ -#define RTREE_GET_LEAF(level) \ - case level: \ - assert(level == (RTREE_HEIGHT_MAX-1)); \ - if (!dependent && unlikely(!rtree_node_valid(node))) \ - return (NULL); \ - subkey = rtree_subkey(rtree, key, level - \ - RTREE_GET_BIAS); \ - /* \ - * node is a leaf, so it contains values rather than \ - * child pointers. \ - */ \ - return (rtree_val_read(rtree, &node[subkey], \ - dependent)); -#if RTREE_HEIGHT_MAX > 1 - RTREE_GET_SUBTREE(0) -#endif -#if RTREE_HEIGHT_MAX > 2 - RTREE_GET_SUBTREE(1) -#endif -#if RTREE_HEIGHT_MAX > 3 - RTREE_GET_SUBTREE(2) -#endif -#if RTREE_HEIGHT_MAX > 4 - RTREE_GET_SUBTREE(3) -#endif -#if RTREE_HEIGHT_MAX > 5 - RTREE_GET_SUBTREE(4) -#endif -#if RTREE_HEIGHT_MAX > 6 - RTREE_GET_SUBTREE(5) -#endif -#if RTREE_HEIGHT_MAX > 7 - RTREE_GET_SUBTREE(6) -#endif -#if RTREE_HEIGHT_MAX > 8 - RTREE_GET_SUBTREE(7) -#endif -#if RTREE_HEIGHT_MAX > 9 - RTREE_GET_SUBTREE(8) -#endif -#if RTREE_HEIGHT_MAX > 10 - RTREE_GET_SUBTREE(9) -#endif -#if RTREE_HEIGHT_MAX > 11 - RTREE_GET_SUBTREE(10) -#endif -#if RTREE_HEIGHT_MAX > 12 - RTREE_GET_SUBTREE(11) -#endif -#if RTREE_HEIGHT_MAX > 13 - RTREE_GET_SUBTREE(12) -#endif -#if RTREE_HEIGHT_MAX > 14 - RTREE_GET_SUBTREE(13) -#endif -#if RTREE_HEIGHT_MAX > 15 - RTREE_GET_SUBTREE(14) -#endif -#if RTREE_HEIGHT_MAX > 16 -# error Unsupported RTREE_HEIGHT_MAX -#endif - RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1) -#undef RTREE_GET_SUBTREE -#undef RTREE_GET_LEAF - default: not_reached(); + for (i = start_level, node = rtree_subtree_tryread(rtree, start_level); + /**/; i++, node = child) { + if (!dependent && unlikely(!rtree_node_valid(node))) + return (NULL); + subkey = rtree_subkey(rtree, key, i); + if (i == rtree->height - 1) { + /* + * node is a leaf, so it contains values rather than + * child pointers. + */ + return (rtree_val_read(rtree, &node[subkey], + dependent)); + } + assert(i < rtree->height - 1); + child = rtree_child_tryread(&node[subkey]); } -#undef RTREE_GET_BIAS not_reached(); } @@ -340,7 +268,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) start_level = rtree_start_level(rtree, key); - node = rtree_subtree_read(rtree, start_level, false); + node = rtree_subtree_read(rtree, start_level); if (node == NULL) return (true); for (i = start_level; /**/; i++, node = child) { @@ -354,7 +282,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) return (false); } assert(i + 1 < rtree->height); - child = rtree_child_read(rtree, &node[subkey], i, false); + child = rtree_child_read(rtree, &node[subkey], i); if (child == NULL) return (true); } diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.sh b/deps/jemalloc/include/jemalloc/internal/size_classes.sh index f6fbce4ef..fc82036d3 100755 --- a/deps/jemalloc/include/jemalloc/internal/size_classes.sh +++ b/deps/jemalloc/include/jemalloc/internal/size_classes.sh @@ -48,21 +48,6 @@ size_class() { lg_p=$5 lg_kmax=$6 - if [ ${lg_delta} -ge ${lg_p} ] ; then - psz="yes" - else - pow2 ${lg_p}; p=${pow2_result} - pow2 ${lg_grp}; grp=${pow2_result} - pow2 ${lg_delta}; delta=${pow2_result} - sz=$((${grp} + ${delta} * ${ndelta})) - npgs=$((${sz} / ${p})) - if [ ${sz} -eq $((${npgs} * ${p})) ] ; then - psz="yes" - else - psz="no" - fi - fi - lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta} if [ ${pow2_result} -lt ${ndelta} ] ; then rem="yes" @@ -89,15 +74,14 @@ size_class() { else lg_delta_lookup="no" fi - printf ' SC(%3d, %6d, %8d, %6d, %3s, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${lg_delta_lookup} + printf ' SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup} # Defined upon return: - # - psz ("yes" or "no") - # - bin ("yes" or "no") # - lg_delta_lookup (${lg_delta} or "no") + # - bin ("yes" or "no") } sep_line() { - echo " \\" + echo " \\" } size_classes() { @@ -111,13 +95,12 @@ size_classes() { pow2 ${lg_g}; g=${pow2_result} echo "#define SIZE_CLASSES \\" - echo " /* index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup */ \\" + echo " /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\" ntbins=0 nlbins=0 lg_tiny_maxclass='"NA"' nbins=0 - npsizes=0 # Tiny size classes. ndelta=0 @@ -129,9 +112,6 @@ size_classes() { if [ ${lg_delta_lookup} != "no" ] ; then nlbins=$((${index} + 1)) fi - if [ ${psz} = "yes" ] ; then - npsizes=$((${npsizes} + 1)) - fi if [ ${bin} != "no" ] ; then nbins=$((${index} + 1)) fi @@ -153,25 +133,19 @@ size_classes() { index=$((${index} + 1)) lg_grp=$((${lg_grp} + 1)) lg_delta=$((${lg_delta} + 1)) - if [ ${psz} = "yes" ] ; then - npsizes=$((${npsizes} + 1)) - fi fi while [ ${ndelta} -lt ${g} ] ; do size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} index=$((${index} + 1)) ndelta=$((${ndelta} + 1)) - if [ ${psz} = "yes" ] ; then - npsizes=$((${npsizes} + 1)) - fi done # All remaining groups. lg_grp=$((${lg_grp} + ${lg_g})) - while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do + while [ ${lg_grp} -lt ${ptr_bits} ] ; do sep_line ndelta=1 - if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then + if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then ndelta_limit=$((${g} - 1)) else ndelta_limit=${g} @@ -183,9 +157,6 @@ size_classes() { # Final written value is correct: lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" fi - if [ ${psz} = "yes" ] ; then - npsizes=$((${npsizes} + 1)) - fi if [ ${bin} != "no" ] ; then nbins=$((${index} + 1)) # Final written value is correct: @@ -212,7 +183,6 @@ size_classes() { # - nlbins # - nbins # - nsizes - # - npsizes # - lg_tiny_maxclass # - lookup_maxclass # - small_maxclass @@ -230,13 +200,13 @@ cat < -# -# is in {smooth, smoother, smoothest}. -# must be greater than zero. -# must be in [0..62]; reasonable values are roughly [10..30]. -# is x decimal precision. -# is y decimal precision. - -#set -x - -cmd="sh smoothstep.sh $*" -variant=$1 -nsteps=$2 -bfp=$3 -xprec=$4 -yprec=$5 - -case "${variant}" in - smooth) - ;; - smoother) - ;; - smoothest) - ;; - *) - echo "Unsupported variant" - exit 1 - ;; -esac - -smooth() { - step=$1 - y=`echo ${yprec} k ${step} ${nsteps} / sx _2 lx 3 ^ '*' 3 lx 2 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` - h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` -} - -smoother() { - step=$1 - y=`echo ${yprec} k ${step} ${nsteps} / sx 6 lx 5 ^ '*' _15 lx 4 ^ '*' + 10 lx 3 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` - h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` -} - -smoothest() { - step=$1 - y=`echo ${yprec} k ${step} ${nsteps} / sx _20 lx 7 ^ '*' 70 lx 6 ^ '*' + _84 lx 5 ^ '*' + 35 lx 4 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` - h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` -} - -cat <iteration = 0; -} - -JEMALLOC_INLINE void -spin_adaptive(spin_t *spin) -{ - volatile uint64_t i; - - for (i = 0; i < (KQU(1) << spin->iteration); i++) - CPU_SPINWAIT; - - if (spin->iteration < 63) - spin->iteration++; -} - -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h index 04e7dae14..c91dba99d 100644 --- a/deps/jemalloc/include/jemalloc/internal/stats.h +++ b/deps/jemalloc/include/jemalloc/internal/stats.h @@ -102,14 +102,6 @@ struct arena_stats_s { /* Number of bytes currently mapped. */ size_t mapped; - /* - * Number of bytes currently retained as a side effect of munmap() being - * disabled/bypassed. Retained bytes are technically mapped (though - * always decommitted or purged), but they are excluded from the mapped - * statistic (above). - */ - size_t retained; - /* * Total number of purge sweeps, total number of madvise calls made, * and total pages purged in order to keep dirty unused memory under @@ -176,9 +168,6 @@ JEMALLOC_INLINE void stats_cactive_add(size_t size) { - assert(size > 0); - assert((size & chunksize_mask) == 0); - atomic_add_z(&stats_cactive, size); } @@ -186,9 +175,6 @@ JEMALLOC_INLINE void stats_cactive_sub(size_t size) { - assert(size > 0); - assert((size & chunksize_mask) == 0); - atomic_sub_z(&stats_cactive, size); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/tcache.h b/deps/jemalloc/include/jemalloc/internal/tcache.h index 01ba062de..5079cd266 100644 --- a/deps/jemalloc/include/jemalloc/internal/tcache.h +++ b/deps/jemalloc/include/jemalloc/internal/tcache.h @@ -70,20 +70,13 @@ struct tcache_bin_s { int low_water; /* Min # cached since last GC. */ unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ unsigned ncached; /* # of cached objects. */ - /* - * To make use of adjacent cacheline prefetch, the items in the avail - * stack goes to higher address for newer allocations. avail points - * just above the available space, which means that - * avail[-ncached, ... -1] are available items and the lowest item will - * be allocated first. - */ void **avail; /* Stack of available objects. */ }; struct tcache_s { ql_elm(tcache_t) link; /* Used for aggregating stats. */ uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - ticker_t gc_ticker; /* Drives incremental GC. */ + unsigned ev_cnt; /* Event count since incremental GC. */ szind_t next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ /* @@ -115,7 +108,7 @@ extern tcache_bin_info_t *tcache_bin_info; * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ -extern unsigned nhbins; +extern size_t nhbins; /* Maximum cached size class. */ extern size_t tcache_maxclass; @@ -130,25 +123,27 @@ extern size_t tcache_maxclass; */ extern tcaches_t *tcaches; -size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); +size_t tcache_salloc(const void *ptr); void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); -void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, bool *tcache_success); +void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind); void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, szind_t binind, unsigned rem); void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, unsigned rem, tcache_t *tcache); -void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, - arena_t *oldarena, arena_t *newarena); +void tcache_arena_associate(tcache_t *tcache, arena_t *arena); +void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, + arena_t *newarena); +void tcache_arena_dissociate(tcache_t *tcache, arena_t *arena); tcache_t *tcache_get_hard(tsd_t *tsd); -tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena); +tcache_t *tcache_create(tsd_t *tsd, arena_t *arena); void tcache_cleanup(tsd_t *tsd); void tcache_enabled_cleanup(tsd_t *tsd); -void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); +void tcache_stats_merge(tcache_t *tcache, arena_t *arena); bool tcaches_create(tsd_t *tsd, unsigned *r_ind); void tcaches_flush(tsd_t *tsd, unsigned ind); void tcaches_destroy(tsd_t *tsd, unsigned ind); -bool tcache_boot(tsdn_t *tsdn); +bool tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -160,15 +155,15 @@ void tcache_flush(void); bool tcache_enabled_get(void); tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); -void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success); +void *tcache_alloc_easy(tcache_bin_t *tbin); void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, szind_t ind, bool zero, bool slow_path); + size_t size, bool zero); void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, szind_t ind, bool zero, bool slow_path); + size_t size, bool zero); void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, - szind_t binind, bool slow_path); + szind_t binind); void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, - size_t size, bool slow_path); + size_t size); tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); #endif @@ -245,74 +240,51 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) if (TCACHE_GC_INCR == 0) return; - if (unlikely(ticker_tick(&tcache->gc_ticker))) + tcache->ev_cnt++; + assert(tcache->ev_cnt <= TCACHE_GC_INCR); + if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR)) tcache_event_hard(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) +tcache_alloc_easy(tcache_bin_t *tbin) { void *ret; if (unlikely(tbin->ncached == 0)) { tbin->low_water = -1; - *tcache_success = false; return (NULL); } - /* - * tcache_success (instead of ret) should be checked upon the return of - * this function. We avoid checking (ret == NULL) because there is - * never a null stored on the avail stack (which is unknown to the - * compiler), and eagerly checking ret would cause pipeline stall - * (waiting for the cacheline). - */ - *tcache_success = true; - ret = *(tbin->avail - tbin->ncached); tbin->ncached--; - if (unlikely((int)tbin->ncached < tbin->low_water)) tbin->low_water = tbin->ncached; - + ret = tbin->avail[tbin->ncached]; return (ret); } JEMALLOC_ALWAYS_INLINE void * tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - szind_t binind, bool zero, bool slow_path) + bool zero) { void *ret; + szind_t binind; + size_t usize; tcache_bin_t *tbin; - bool tcache_success; - size_t usize JEMALLOC_CC_SILENCE_INIT(0); + binind = size2index(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin, &tcache_success); - assert(tcache_success == (ret != NULL)); - if (unlikely(!tcache_success)) { - bool tcache_hard_success; - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - - ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, - tbin, binind, &tcache_hard_success); - if (tcache_hard_success == false) + usize = index2size(binind); + ret = tcache_alloc_easy(tbin); + if (unlikely(ret == NULL)) { + ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind); + if (ret == NULL) return (NULL); } - - assert(ret); - /* - * Only compute usize if required. The checks in the following if - * statement are all static. - */ - if (config_prof || (slow_path && config_fill) || unlikely(zero)) { - usize = index2size(binind); - assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize); - } + assert(tcache_salloc(ret) == usize); if (likely(!zero)) { - if (slow_path && config_fill) { + if (config_fill) { if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); @@ -320,7 +292,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, memset(ret, 0, usize); } } else { - if (slow_path && config_fill && unlikely(opt_junk_alloc)) { + if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } @@ -337,38 +309,28 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - szind_t binind, bool zero, bool slow_path) + bool zero) { void *ret; + szind_t binind; + size_t usize; tcache_bin_t *tbin; - bool tcache_success; + binind = size2index(size); + usize = index2size(binind); + assert(usize <= tcache_maxclass); assert(binind < nhbins); tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin, &tcache_success); - assert(tcache_success == (ret != NULL)); - if (unlikely(!tcache_success)) { + ret = tcache_alloc_easy(tbin); + if (unlikely(ret == NULL)) { /* * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - - ret = arena_malloc_large(tsd_tsdn(tsd), arena, binind, zero); + ret = arena_malloc_large(arena, usize, zero); if (ret == NULL) return (NULL); } else { - size_t usize JEMALLOC_CC_SILENCE_INIT(0); - - /* Only compute usize on demand */ - if (config_prof || (slow_path && config_fill) || - unlikely(zero)) { - usize = index2size(binind); - assert(usize <= tcache_maxclass); - } - if (config_prof && usize == LARGE_MINCLASS) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); @@ -378,11 +340,10 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, BININD_INVALID); } if (likely(!zero)) { - if (slow_path && config_fill) { - if (unlikely(opt_junk_alloc)) { - memset(ret, JEMALLOC_ALLOC_JUNK, - usize); - } else if (unlikely(opt_zero)) + if (config_fill) { + if (unlikely(opt_junk_alloc)) + memset(ret, 0xa5, usize); + else if (unlikely(opt_zero)) memset(ret, 0, usize); } } else @@ -399,15 +360,14 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, - bool slow_path) +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; - assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS); + assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - if (slow_path && config_fill && unlikely(opt_junk_free)) + if (config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; @@ -417,27 +377,26 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, (tbin_info->ncached_max >> 1)); } assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size, - bool slow_path) +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) { szind_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); - assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS); - assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass); + assert(tcache_salloc(ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(ptr) <= tcache_maxclass); binind = size2index(size); - if (slow_path && config_fill && unlikely(opt_junk_free)) + if (config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_large(ptr, size); tbin = &tcache->tbins[binind]; @@ -447,8 +406,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size, (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } @@ -457,10 +416,8 @@ JEMALLOC_ALWAYS_INLINE tcache_t * tcaches_get(tsd_t *tsd, unsigned ind) { tcaches_t *elm = &tcaches[ind]; - if (unlikely(elm->tcache == NULL)) { - elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd, - NULL)); - } + if (unlikely(elm->tcache == NULL)) + elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL)); return (elm->tcache); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/ticker.h b/deps/jemalloc/include/jemalloc/internal/ticker.h deleted file mode 100644 index 4696e56d2..000000000 --- a/deps/jemalloc/include/jemalloc/internal/ticker.h +++ /dev/null @@ -1,75 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ticker_s ticker_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct ticker_s { - int32_t tick; - int32_t nticks; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void ticker_init(ticker_t *ticker, int32_t nticks); -void ticker_copy(ticker_t *ticker, const ticker_t *other); -int32_t ticker_read(const ticker_t *ticker); -bool ticker_ticks(ticker_t *ticker, int32_t nticks); -bool ticker_tick(ticker_t *ticker); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_)) -JEMALLOC_INLINE void -ticker_init(ticker_t *ticker, int32_t nticks) -{ - - ticker->tick = nticks; - ticker->nticks = nticks; -} - -JEMALLOC_INLINE void -ticker_copy(ticker_t *ticker, const ticker_t *other) -{ - - *ticker = *other; -} - -JEMALLOC_INLINE int32_t -ticker_read(const ticker_t *ticker) -{ - - return (ticker->tick); -} - -JEMALLOC_INLINE bool -ticker_ticks(ticker_t *ticker, int32_t nticks) -{ - - if (unlikely(ticker->tick < nticks)) { - ticker->tick = ticker->nticks; - return (true); - } - ticker->tick -= nticks; - return(false); -} - -JEMALLOC_INLINE bool -ticker_tick(ticker_t *ticker) -{ - - return (ticker_ticks(ticker, 1)); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h index 9055acafd..eed7aa013 100644 --- a/deps/jemalloc/include/jemalloc/internal/tsd.h +++ b/deps/jemalloc/include/jemalloc/internal/tsd.h @@ -13,9 +13,6 @@ typedef struct tsd_init_head_s tsd_init_head_t; #endif typedef struct tsd_s tsd_t; -typedef struct tsdn_s tsdn_t; - -#define TSDN_NULL ((tsdn_t *)0) typedef enum { tsd_state_uninitialized, @@ -47,8 +44,7 @@ typedef enum { * The result is a set of generated functions, e.g.: * * bool example_tsd_boot(void) {...} - * bool example_tsd_booted_get(void) {...} - * example_t *example_tsd_get(bool init) {...} + * example_t *example_tsd_get() {...} * void example_tsd_set(example_t *val) {...} * * Note that all of the functions deal in terms of (a_type *) rather than @@ -102,10 +98,8 @@ a_attr void \ a_name##tsd_boot1(void); \ a_attr bool \ a_name##tsd_boot(void); \ -a_attr bool \ -a_name##tsd_booted_get(void); \ a_attr a_type * \ -a_name##tsd_get(bool init); \ +a_name##tsd_get(void); \ a_attr void \ a_name##tsd_set(a_type *val); @@ -207,21 +201,9 @@ a_name##tsd_boot(void) \ \ return (a_name##tsd_boot0()); \ } \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (false); \ -} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(bool init) \ +a_name##tsd_get(void) \ { \ \ assert(a_name##tsd_booted); \ @@ -264,21 +246,9 @@ a_name##tsd_boot(void) \ \ return (a_name##tsd_boot0()); \ } \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (false); \ -} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(bool init) \ +a_name##tsd_get(void) \ { \ \ assert(a_name##tsd_booted); \ @@ -337,14 +307,14 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ } \ } \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(bool init) \ +a_name##tsd_wrapper_get(void) \ { \ DWORD error = GetLastError(); \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ TlsGetValue(a_name##tsd_tsd); \ SetLastError(error); \ \ - if (init && unlikely(wrapper == NULL)) { \ + if (unlikely(wrapper == NULL)) { \ wrapper = (a_name##tsd_wrapper_t *) \ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ if (wrapper == NULL) { \ @@ -398,28 +368,14 @@ a_name##tsd_boot(void) \ a_name##tsd_boot1(); \ return (false); \ } \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (true); \ -} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(bool init) \ +a_name##tsd_get(void) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(init); \ - if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ - return (NULL); \ + wrapper = a_name##tsd_wrapper_get(); \ return (&wrapper->val); \ } \ a_attr void \ @@ -428,7 +384,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(true); \ + wrapper = a_name##tsd_wrapper_get(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -472,12 +428,12 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ } \ } \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(bool init) \ +a_name##tsd_wrapper_get(void) \ { \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ pthread_getspecific(a_name##tsd_tsd); \ \ - if (init && unlikely(wrapper == NULL)) { \ + if (unlikely(wrapper == NULL)) { \ tsd_init_block_t block; \ wrapper = tsd_init_check_recursion( \ &a_name##tsd_init_head, &block); \ @@ -534,28 +490,14 @@ a_name##tsd_boot(void) \ a_name##tsd_boot1(); \ return (false); \ } \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (true); \ -} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(bool init) \ +a_name##tsd_get(void) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(init); \ - if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ - return (NULL); \ + wrapper = a_name##tsd_wrapper_get(); \ return (&wrapper->val); \ } \ a_attr void \ @@ -564,7 +506,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(true); \ + wrapper = a_name##tsd_wrapper_get(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -594,15 +536,12 @@ struct tsd_init_head_s { O(thread_allocated, uint64_t) \ O(thread_deallocated, uint64_t) \ O(prof_tdata, prof_tdata_t *) \ - O(iarena, arena_t *) \ O(arena, arena_t *) \ - O(arenas_tdata, arena_tdata_t *) \ - O(narenas_tdata, unsigned) \ - O(arenas_tdata_bypass, bool) \ + O(arenas_cache, arena_t **) \ + O(narenas_cache, unsigned) \ + O(arenas_cache_bypass, bool) \ O(tcache_enabled, tcache_enabled_t) \ O(quarantine, quarantine_t *) \ - O(witnesses, witness_list_t) \ - O(witness_fork, bool) \ #define TSD_INITIALIZER { \ tsd_state_uninitialized, \ @@ -612,13 +551,10 @@ struct tsd_init_head_s { NULL, \ NULL, \ NULL, \ - NULL, \ 0, \ false, \ tcache_enabled_default, \ - NULL, \ - ql_head_initializer(witnesses), \ - false \ + NULL \ } struct tsd_s { @@ -629,15 +565,6 @@ MALLOC_TSD #undef O }; -/* - * Wrapper around tsd_t that makes it possible to avoid implicit conversion - * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be - * explicitly converted to tsd_t, which is non-nullable. - */ -struct tsdn_s { - tsd_t tsd; -}; - static const tsd_t tsd_initializer = TSD_INITIALIZER; malloc_tsd_types(, tsd_t) @@ -650,7 +577,7 @@ void *malloc_tsd_malloc(size_t size); void malloc_tsd_dalloc(void *wrapper); void malloc_tsd_no_cleanup(void *arg); void malloc_tsd_cleanup_register(bool (*f)(void)); -tsd_t *malloc_tsd_boot0(void); +bool malloc_tsd_boot0(void); void malloc_tsd_boot1(void); #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ !defined(_WIN32)) @@ -667,9 +594,7 @@ void tsd_cleanup(void *arg); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) -tsd_t *tsd_fetch_impl(bool init); tsd_t *tsd_fetch(void); -tsdn_t *tsd_tsdn(tsd_t *tsd); bool tsd_nominal(tsd_t *tsd); #define O(n, t) \ t *tsd_##n##p_get(tsd_t *tsd); \ @@ -677,9 +602,6 @@ t tsd_##n##_get(tsd_t *tsd); \ void tsd_##n##_set(tsd_t *tsd, t n); MALLOC_TSD #undef O -tsdn_t *tsdn_fetch(void); -bool tsdn_null(const tsdn_t *tsdn); -tsd_t *tsdn_tsd(tsdn_t *tsdn); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) @@ -687,13 +609,9 @@ malloc_tsd_externs(, tsd_t) malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch_impl(bool init) +tsd_fetch(void) { - tsd_t *tsd = tsd_get(init); - - if (!init && tsd_get_allocates() && tsd == NULL) - return (NULL); - assert(tsd != NULL); + tsd_t *tsd = tsd_get(); if (unlikely(tsd->state != tsd_state_nominal)) { if (tsd->state == tsd_state_uninitialized) { @@ -710,20 +628,6 @@ tsd_fetch_impl(bool init) return (tsd); } -JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch(void) -{ - - return (tsd_fetch_impl(true)); -} - -JEMALLOC_ALWAYS_INLINE tsdn_t * -tsd_tsdn(tsd_t *tsd) -{ - - return ((tsdn_t *)tsd); -} - JEMALLOC_INLINE bool tsd_nominal(tsd_t *tsd) { @@ -755,32 +659,6 @@ tsd_##n##_set(tsd_t *tsd, t n) \ } MALLOC_TSD #undef O - -JEMALLOC_ALWAYS_INLINE tsdn_t * -tsdn_fetch(void) -{ - - if (!tsd_booted_get()) - return (NULL); - - return (tsd_tsdn(tsd_fetch_impl(false))); -} - -JEMALLOC_ALWAYS_INLINE bool -tsdn_null(const tsdn_t *tsdn) -{ - - return (tsdn == NULL); -} - -JEMALLOC_ALWAYS_INLINE tsd_t * -tsdn_tsd(tsdn_t *tsdn) -{ - - assert(!tsdn_null(tsdn)); - - return (&tsdn->tsd); -} #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/deps/jemalloc/include/jemalloc/internal/util.h b/deps/jemalloc/include/jemalloc/internal/util.h index 4b56d652e..b2ea740fd 100644 --- a/deps/jemalloc/include/jemalloc/internal/util.h +++ b/deps/jemalloc/include/jemalloc/internal/util.h @@ -40,14 +40,6 @@ */ #define MALLOC_PRINTF_BUFSIZE 4096 -/* Junk fill patterns. */ -#ifndef JEMALLOC_ALLOC_JUNK -# define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5) -#endif -#ifndef JEMALLOC_FREE_JUNK -# define JEMALLOC_FREE_JUNK ((uint8_t)0x5a) -#endif - /* * Wrap a cpp argument that contains commas such that it isn't broken up into * multiple arguments. @@ -65,21 +57,73 @@ # define JEMALLOC_CC_SILENCE_INIT(v) #endif +#define JEMALLOC_GNUC_PREREQ(major, minor) \ + (!defined(__clang__) && \ + (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))) +#ifndef __has_builtin +# define __has_builtin(builtin) (0) +#endif +#define JEMALLOC_CLANG_HAS_BUILTIN(builtin) \ + (defined(__clang__) && __has_builtin(builtin)) + #ifdef __GNUC__ # define likely(x) __builtin_expect(!!(x), 1) # define unlikely(x) __builtin_expect(!!(x), 0) +# if JEMALLOC_GNUC_PREREQ(4, 6) || \ + JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable) +# define unreachable() __builtin_unreachable() +# else +# define unreachable() +# endif #else # define likely(x) !!(x) # define unlikely(x) !!(x) +# define unreachable() #endif -#if !defined(JEMALLOC_INTERNAL_UNREACHABLE) -# error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (unlikely(config_debug && !(e))) { \ + malloc_printf( \ + ": %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) #endif -#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE() +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + ": %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ + unreachable(); \ +} while (0) +#endif -#include "jemalloc/internal/assert.h" +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf(": %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef assert_not_implemented +#define assert_not_implemented(e) do { \ + if (unlikely(config_debug && !(e))) \ + not_implemented(); \ +} while (0) +#endif /* Use to assert a particular configuration, e.g., cassert(config_debug). */ #define cassert(c) do { \ @@ -104,9 +148,9 @@ void malloc_write(const char *s); * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating * point math. */ -size_t malloc_vsnprintf(char *str, size_t size, const char *format, +int malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap); -size_t malloc_snprintf(char *str, size_t size, const char *format, ...) +int malloc_snprintf(char *str, size_t size, const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap); @@ -119,16 +163,10 @@ void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -unsigned ffs_llu(unsigned long long bitmap); -unsigned ffs_lu(unsigned long bitmap); -unsigned ffs_u(unsigned bitmap); -unsigned ffs_zu(size_t bitmap); -unsigned ffs_u64(uint64_t bitmap); -unsigned ffs_u32(uint32_t bitmap); -uint64_t pow2_ceil_u64(uint64_t x); -uint32_t pow2_ceil_u32(uint32_t x); -size_t pow2_ceil_zu(size_t x); -unsigned lg_floor(size_t x); +int jemalloc_ffsl(long bitmap); +int jemalloc_ffs(int bitmap); +size_t pow2_ceil(size_t x); +size_t lg_floor(size_t x); void set_errno(int errnum); int get_errno(void); #endif @@ -136,115 +174,44 @@ int get_errno(void); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) /* Sanity check. */ -#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ - || !defined(JEMALLOC_INTERNAL_FFS) -# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure +#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS) +# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure #endif -JEMALLOC_ALWAYS_INLINE unsigned -ffs_llu(unsigned long long bitmap) -{ - - return (JEMALLOC_INTERNAL_FFSLL(bitmap)); -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_lu(unsigned long bitmap) +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffsl(long bitmap) { return (JEMALLOC_INTERNAL_FFSL(bitmap)); } -JEMALLOC_ALWAYS_INLINE unsigned -ffs_u(unsigned bitmap) +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffs(int bitmap) { return (JEMALLOC_INTERNAL_FFS(bitmap)); } -JEMALLOC_ALWAYS_INLINE unsigned -ffs_zu(size_t bitmap) -{ - -#if LG_SIZEOF_PTR == LG_SIZEOF_INT - return (ffs_u(bitmap)); -#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG - return (ffs_lu(bitmap)); -#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG - return (ffs_llu(bitmap)); -#else -#error No implementation for size_t ffs() -#endif -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_u64(uint64_t bitmap) -{ - -#if LG_SIZEOF_LONG == 3 - return (ffs_lu(bitmap)); -#elif LG_SIZEOF_LONG_LONG == 3 - return (ffs_llu(bitmap)); -#else -#error No implementation for 64-bit ffs() -#endif -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_u32(uint32_t bitmap) -{ - -#if LG_SIZEOF_INT == 2 - return (ffs_u(bitmap)); -#else -#error No implementation for 32-bit ffs() -#endif - return (ffs_u(bitmap)); -} - -JEMALLOC_INLINE uint64_t -pow2_ceil_u64(uint64_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x |= x >> 32; - x++; - return (x); -} - -JEMALLOC_INLINE uint32_t -pow2_ceil_u32(uint32_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x++; - return (x); -} - /* Compute the smallest power of 2 that is >= x. */ JEMALLOC_INLINE size_t -pow2_ceil_zu(size_t x) +pow2_ceil(size_t x) { + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; #if (LG_SIZEOF_PTR == 3) - return (pow2_ceil_u64(x)); -#else - return (pow2_ceil_u32(x)); + x |= x >> 32; #endif + x++; + return (x); } #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE unsigned +JEMALLOC_INLINE size_t lg_floor(size_t x) { size_t ret; @@ -255,11 +222,10 @@ lg_floor(size_t x) : "=r"(ret) // Outputs. : "r"(x) // Inputs. ); - assert(ret < UINT_MAX); - return ((unsigned)ret); + return (ret); } #elif (defined(_MSC_VER)) -JEMALLOC_INLINE unsigned +JEMALLOC_INLINE size_t lg_floor(size_t x) { unsigned long ret; @@ -271,13 +237,12 @@ lg_floor(size_t x) #elif (LG_SIZEOF_PTR == 2) _BitScanReverse(&ret, x); #else -# error "Unsupported type size for lg_floor()" +# error "Unsupported type sizes for lg_floor()" #endif - assert(ret < UINT_MAX); - return ((unsigned)ret); + return (ret); } #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) -JEMALLOC_INLINE unsigned +JEMALLOC_INLINE size_t lg_floor(size_t x) { @@ -288,11 +253,11 @@ lg_floor(size_t x) #elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); #else -# error "Unsupported type size for lg_floor()" +# error "Unsupported type sizes for lg_floor()" #endif } #else -JEMALLOC_INLINE unsigned +JEMALLOC_INLINE size_t lg_floor(size_t x) { @@ -303,13 +268,20 @@ lg_floor(size_t x) x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); -#if (LG_SIZEOF_PTR == 3) +#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG) x |= (x >> 32); -#endif - if (x == SIZE_T_MAX) - return ((8 << LG_SIZEOF_PTR) - 1); + if (x == KZU(0xffffffffffffffff)) + return (63); x++; - return (ffs_zu(x) - 2); + return (jemalloc_ffsl(x) - 2); +#elif (LG_SIZEOF_PTR == 2) + if (x == KZU(0xffffffff)) + return (31); + x++; + return (jemalloc_ffs(x) - 2); +#else +# error "Unsupported type sizes for lg_floor()" +#endif } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/valgrind.h b/deps/jemalloc/include/jemalloc/internal/valgrind.h index 877a142b6..a3380df92 100644 --- a/deps/jemalloc/include/jemalloc/internal/valgrind.h +++ b/deps/jemalloc/include/jemalloc/internal/valgrind.h @@ -30,31 +30,17 @@ * calls must be embedded in macros rather than in functions so that when * Valgrind reports errors, there are no extra stack frames in the backtraces. */ -#define JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do { \ - if (unlikely(in_valgrind && cond)) { \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsdn, ptr), \ - zero); \ - } \ +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ + if (unlikely(in_valgrind && cond)) \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ } while (0) -#define JEMALLOC_VALGRIND_REALLOC_MOVED_no(ptr, old_ptr) \ - (false) -#define JEMALLOC_VALGRIND_REALLOC_MOVED_maybe(ptr, old_ptr) \ - ((ptr) != (old_ptr)) -#define JEMALLOC_VALGRIND_REALLOC_PTR_NULL_no(ptr) \ - (false) -#define JEMALLOC_VALGRIND_REALLOC_PTR_NULL_maybe(ptr) \ - (ptr == NULL) -#define JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_no(old_ptr) \ - (false) -#define JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_maybe(old_ptr) \ - (old_ptr == NULL) -#define JEMALLOC_VALGRIND_REALLOC(moved, tsdn, ptr, usize, ptr_null, \ - old_ptr, old_usize, old_rzsize, old_ptr_null, zero) do { \ +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ + ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ + zero) do { \ if (unlikely(in_valgrind)) { \ - size_t rzsize = p2rz(tsdn, ptr); \ + size_t rzsize = p2rz(ptr); \ \ - if (!JEMALLOC_VALGRIND_REALLOC_MOVED_##moved(ptr, \ - old_ptr)) { \ + if (!maybe_moved || ptr == old_ptr) { \ VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ usize, rzsize); \ if (zero && old_usize < usize) { \ @@ -63,13 +49,11 @@ old_usize), usize - old_usize); \ } \ } else { \ - if (!JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_## \ - old_ptr_null(old_ptr)) { \ + if (!old_ptr_maybe_null || old_ptr != NULL) { \ valgrind_freelike_block(old_ptr, \ old_rzsize); \ } \ - if (!JEMALLOC_VALGRIND_REALLOC_PTR_NULL_## \ - ptr_null(ptr)) { \ + if (!ptr_maybe_null || ptr != NULL) { \ size_t copy_size = (old_usize < usize) \ ? old_usize : usize; \ size_t tail_size = usize - copy_size; \ @@ -97,8 +81,8 @@ #define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0) #define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0) #define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0) -#define JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize, \ +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ zero) do {} while (0) #define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) diff --git a/deps/jemalloc/include/jemalloc/internal/witness.h b/deps/jemalloc/include/jemalloc/internal/witness.h deleted file mode 100644 index cdf15d797..000000000 --- a/deps/jemalloc/include/jemalloc/internal/witness.h +++ /dev/null @@ -1,266 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct witness_s witness_t; -typedef unsigned witness_rank_t; -typedef ql_head(witness_t) witness_list_t; -typedef int witness_comp_t (const witness_t *, const witness_t *); - -/* - * Lock ranks. Witnesses with rank WITNESS_RANK_OMIT are completely ignored by - * the witness machinery. - */ -#define WITNESS_RANK_OMIT 0U - -#define WITNESS_RANK_INIT 1U -#define WITNESS_RANK_CTL 1U -#define WITNESS_RANK_ARENAS 2U - -#define WITNESS_RANK_PROF_DUMP 3U -#define WITNESS_RANK_PROF_BT2GCTX 4U -#define WITNESS_RANK_PROF_TDATAS 5U -#define WITNESS_RANK_PROF_TDATA 6U -#define WITNESS_RANK_PROF_GCTX 7U - -#define WITNESS_RANK_ARENA 8U -#define WITNESS_RANK_ARENA_CHUNKS 9U -#define WITNESS_RANK_ARENA_NODE_CACHE 10 - -#define WITNESS_RANK_BASE 11U - -#define WITNESS_RANK_LEAF 0xffffffffU -#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF -#define WITNESS_RANK_ARENA_HUGE WITNESS_RANK_LEAF -#define WITNESS_RANK_DSS WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT WITNESS_RANK_LEAF - -#define WITNESS_INITIALIZER(rank) {"initializer", rank, NULL, {NULL, NULL}} - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct witness_s { - /* Name, used for printing lock order reversal messages. */ - const char *name; - - /* - * Witness rank, where 0 is lowest and UINT_MAX is highest. Witnesses - * must be acquired in order of increasing rank. - */ - witness_rank_t rank; - - /* - * If two witnesses are of equal rank and they have the samp comp - * function pointer, it is called as a last attempt to differentiate - * between witnesses of equal rank. - */ - witness_comp_t *comp; - - /* Linkage for thread's currently owned locks. */ - ql_elm(witness_t) link; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void witness_init(witness_t *witness, const char *name, witness_rank_t rank, - witness_comp_t *comp); -#ifdef JEMALLOC_JET -typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *); -extern witness_lock_error_t *witness_lock_error; -#else -void witness_lock_error(const witness_list_t *witnesses, - const witness_t *witness); -#endif -#ifdef JEMALLOC_JET -typedef void (witness_owner_error_t)(const witness_t *); -extern witness_owner_error_t *witness_owner_error; -#else -void witness_owner_error(const witness_t *witness); -#endif -#ifdef JEMALLOC_JET -typedef void (witness_not_owner_error_t)(const witness_t *); -extern witness_not_owner_error_t *witness_not_owner_error; -#else -void witness_not_owner_error(const witness_t *witness); -#endif -#ifdef JEMALLOC_JET -typedef void (witness_lockless_error_t)(const witness_list_t *); -extern witness_lockless_error_t *witness_lockless_error; -#else -void witness_lockless_error(const witness_list_t *witnesses); -#endif - -void witnesses_cleanup(tsd_t *tsd); -void witness_fork_cleanup(tsd_t *tsd); -void witness_prefork(tsd_t *tsd); -void witness_postfork_parent(tsd_t *tsd); -void witness_postfork_child(tsd_t *tsd); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool witness_owner(tsd_t *tsd, const witness_t *witness); -void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness); -void witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness); -void witness_assert_lockless(tsdn_t *tsdn); -void witness_lock(tsdn_t *tsdn, witness_t *witness); -void witness_unlock(tsdn_t *tsdn, witness_t *witness); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) -JEMALLOC_INLINE bool -witness_owner(tsd_t *tsd, const witness_t *witness) -{ - witness_list_t *witnesses; - witness_t *w; - - witnesses = tsd_witnessesp_get(tsd); - ql_foreach(w, witnesses, link) { - if (w == witness) - return (true); - } - - return (false); -} - -JEMALLOC_INLINE void -witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) -{ - tsd_t *tsd; - - if (!config_debug) - return; - - if (tsdn_null(tsdn)) - return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) - return; - - if (witness_owner(tsd, witness)) - return; - witness_owner_error(witness); -} - -JEMALLOC_INLINE void -witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) -{ - tsd_t *tsd; - witness_list_t *witnesses; - witness_t *w; - - if (!config_debug) - return; - - if (tsdn_null(tsdn)) - return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) - return; - - witnesses = tsd_witnessesp_get(tsd); - ql_foreach(w, witnesses, link) { - if (w == witness) - witness_not_owner_error(witness); - } -} - -JEMALLOC_INLINE void -witness_assert_lockless(tsdn_t *tsdn) -{ - tsd_t *tsd; - witness_list_t *witnesses; - witness_t *w; - - if (!config_debug) - return; - - if (tsdn_null(tsdn)) - return; - tsd = tsdn_tsd(tsdn); - - witnesses = tsd_witnessesp_get(tsd); - w = ql_last(witnesses, link); - if (w != NULL) - witness_lockless_error(witnesses); -} - -JEMALLOC_INLINE void -witness_lock(tsdn_t *tsdn, witness_t *witness) -{ - tsd_t *tsd; - witness_list_t *witnesses; - witness_t *w; - - if (!config_debug) - return; - - if (tsdn_null(tsdn)) - return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) - return; - - witness_assert_not_owner(tsdn, witness); - - witnesses = tsd_witnessesp_get(tsd); - w = ql_last(witnesses, link); - if (w == NULL) { - /* No other locks; do nothing. */ - } else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) { - /* Forking, and relaxed ranking satisfied. */ - } else if (w->rank > witness->rank) { - /* Not forking, rank order reversal. */ - witness_lock_error(witnesses, witness); - } else if (w->rank == witness->rank && (w->comp == NULL || w->comp != - witness->comp || w->comp(w, witness) > 0)) { - /* - * Missing/incompatible comparison function, or comparison - * function indicates rank order reversal. - */ - witness_lock_error(witnesses, witness); - } - - ql_elm_new(witness, link); - ql_tail_insert(witnesses, witness, link); -} - -JEMALLOC_INLINE void -witness_unlock(tsdn_t *tsdn, witness_t *witness) -{ - tsd_t *tsd; - witness_list_t *witnesses; - - if (!config_debug) - return; - - if (tsdn_null(tsdn)) - return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) - return; - - /* - * Check whether owner before removal, rather than relying on - * witness_assert_owner() to abort, so that unit tests can test this - * function's failure mode without causing undefined behavior. - */ - if (witness_owner(tsd, witness)) { - witnesses = tsd_witnessesp_get(tsd); - ql_remove(witnesses, witness, link); - } else - witness_assert_owner(tsdn, witness); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in index 6d89435c2..ab13c3758 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -33,13 +33,5 @@ */ #undef JEMALLOC_USE_CXX_THROW -#ifdef _MSC_VER -# ifdef _WIN64 -# define LG_SIZEOF_PTR_WIN 3 -# else -# define LG_SIZEOF_PTR_WIN 2 -# endif -#endif - /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR diff --git a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in index 2956c7bb4..a7028db34 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in @@ -11,13 +11,12 @@ #define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" -# define MALLOCX_LG_ALIGN(la) ((int)(la)) +# define MALLOCX_LG_ALIGN(la) (la) # if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) +# define MALLOCX_ALIGN(a) (ffs(a)-1) # else # define MALLOCX_ALIGN(a) \ - ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ - ffs((int)(((size_t)(a))>>32))+31)) + ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) # endif # define MALLOCX_ZERO ((int)0x40) /* @@ -29,7 +28,7 @@ /* * Bias arena index bits so that 0 encodes "use an automatically chosen arena". */ -# define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) +# define MALLOCX_ARENA(a) ((int)(((a)+1) << 20)) #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) # define JEMALLOC_CXX_THROW throw() @@ -37,7 +36,32 @@ # define JEMALLOC_CXX_THROW #endif -#if _MSC_VER +#ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) +# else +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# endif +# ifndef JEMALLOC_EXPORT +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) +# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) +# else +# define JEMALLOC_FORMAT_PRINTF(s, i) +# endif +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR +#elif _MSC_VER # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) __declspec(align(s)) # define JEMALLOC_ALLOC_SIZE(s) @@ -63,31 +87,6 @@ # else # define JEMALLOC_ALLOCATOR # endif -#elif defined(JEMALLOC_HAVE_ATTR) -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE -# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) -# else -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# endif -# ifndef JEMALLOC_EXPORT -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) -# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) -# else -# define JEMALLOC_FORMAT_PRINTF(s, i) -# endif -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR #else # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) diff --git a/deps/jemalloc/include/msvc_compat/strings.h b/deps/jemalloc/include/msvc_compat/strings.h index a3ee25063..f01ffdd18 100644 --- a/deps/jemalloc/include/msvc_compat/strings.h +++ b/deps/jemalloc/include/msvc_compat/strings.h @@ -21,37 +21,7 @@ static __forceinline int ffs(int x) return (ffsl(x)); } -# ifdef _M_X64 -# pragma intrinsic(_BitScanForward64) -# endif - -static __forceinline int ffsll(unsigned __int64 x) -{ - unsigned long i; -#ifdef _M_X64 - if (_BitScanForward64(&i, x)) - return (i + 1); - return (0); #else -// Fallback for 32-bit build where 64-bit version not available -// assuming little endian - union { - unsigned __int64 ll; - unsigned long l[2]; - } s; - - s.ll = x; - - if (_BitScanForward(&i, s.l[0])) - return (i + 1); - else if(_BitScanForward(&i, s.l[1])) - return (i + 33); - return (0); -#endif -} - -#else -# define ffsll(x) __builtin_ffsll(x) # define ffsl(x) __builtin_ffsl(x) # define ffs(x) __builtin_ffs(x) #endif diff --git a/deps/jemalloc/include/msvc_compat/windows_extra.h b/deps/jemalloc/include/msvc_compat/windows_extra.h index 3008faa37..0c5e323ff 100644 --- a/deps/jemalloc/include/msvc_compat/windows_extra.h +++ b/deps/jemalloc/include/msvc_compat/windows_extra.h @@ -1,6 +1,26 @@ #ifndef MSVC_COMPAT_WINDOWS_EXTRA_H #define MSVC_COMPAT_WINDOWS_EXTRA_H -#include +#ifndef ENOENT +# define ENOENT ERROR_PATH_NOT_FOUND +#endif +#ifndef EINVAL +# define EINVAL ERROR_BAD_ARGUMENTS +#endif +#ifndef EAGAIN +# define EAGAIN ERROR_OUTOFMEMORY +#endif +#ifndef EPERM +# define EPERM ERROR_WRITE_FAULT +#endif +#ifndef EFAULT +# define EFAULT ERROR_INVALID_ADDRESS +#endif +#ifndef ENOMEM +# define ENOMEM ERROR_NOT_ENOUGH_MEMORY +#endif +#ifndef ERANGE +# define ERANGE ERROR_INVALID_DATA +#endif #endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */ diff --git a/deps/jemalloc/build-aux/install-sh b/deps/jemalloc/install-sh similarity index 100% rename from deps/jemalloc/build-aux/install-sh rename to deps/jemalloc/install-sh diff --git a/deps/jemalloc/jemalloc.pc.in b/deps/jemalloc/jemalloc.pc.in index a318e8dd3..1a3ad9b34 100644 --- a/deps/jemalloc/jemalloc.pc.in +++ b/deps/jemalloc/jemalloc.pc.in @@ -6,7 +6,7 @@ install_suffix=@install_suffix@ Name: jemalloc Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support. -URL: http://jemalloc.net/ +URL: http://www.canonware.com/jemalloc Version: @jemalloc_version@ Cflags: -I${includedir} Libs: -L${libdir} -ljemalloc${install_suffix} diff --git a/deps/jemalloc/msvc/ReadMe.txt b/deps/jemalloc/msvc/ReadMe.txt deleted file mode 100644 index 77d567da0..000000000 --- a/deps/jemalloc/msvc/ReadMe.txt +++ /dev/null @@ -1,24 +0,0 @@ - -How to build jemalloc for Windows -================================= - -1. Install Cygwin with at least the following packages: - * autoconf - * autogen - * gawk - * grep - * sed - -2. Install Visual Studio 2015 with Visual C++ - -3. Add Cygwin\bin to the PATH environment variable - -4. Open "VS2015 x86 Native Tools Command Prompt" - (note: x86/x64 doesn't matter at this point) - -5. Generate header files: - sh -c "CC=cl ./autogen.sh" - -6. Now the project can be opened and built in Visual Studio: - msvc\jemalloc_vc2015.sln - diff --git a/deps/jemalloc/msvc/jemalloc_vc2015.sln b/deps/jemalloc/msvc/jemalloc_vc2015.sln deleted file mode 100644 index aedd5e5ea..000000000 --- a/deps/jemalloc/msvc/jemalloc_vc2015.sln +++ /dev/null @@ -1,63 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 14 -VisualStudioVersion = 14.0.24720.0 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}" - ProjectSection(SolutionItems) = preProject - ReadMe.txt = ReadMe.txt - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2015\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2015\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Debug-static|x64 = Debug-static|x64 - Debug-static|x86 = Debug-static|x86 - Release|x64 = Release|x64 - Release|x86 = Release|x86 - Release-static|x64 = Release-static|x64 - Release-static|x86 = Release-static|x86 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32 - {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32 - {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj deleted file mode 100644 index 8342ab3ab..000000000 --- a/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ /dev/null @@ -1,402 +0,0 @@ - - - - - Debug-static - Win32 - - - Debug-static - x64 - - - Debug - Win32 - - - Release-static - Win32 - - - Release-static - x64 - - - Release - Win32 - - - Debug - x64 - - - Release - x64 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {8D6BB292-9E1C-413D-9F98-4864BDC1514A} - Win32Proj - jemalloc - 8.1 - - - - DynamicLibrary - true - v140 - MultiByte - - - StaticLibrary - true - v140 - MultiByte - - - DynamicLibrary - false - v140 - true - MultiByte - - - StaticLibrary - false - v140 - true - MultiByte - - - DynamicLibrary - true - v140 - MultiByte - - - StaticLibrary - true - v140 - MultiByte - - - DynamicLibrary - false - v140 - true - MultiByte - - - StaticLibrary - false - v140 - true - MultiByte - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - $(ProjectName)d - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - $(ProjectName)-$(PlatformToolset)-$(Configuration) - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - $(ProjectName)-$(PlatformToolset)-$(Configuration) - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - $(ProjectName)d - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - $(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration) - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - $(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration) - - - - - - Level3 - Disabled - _REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions) - ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - 4090;4146;4267;4334 - $(OutputPath)$(TargetName).pdb - - - Windows - true - - - - - - - Level3 - Disabled - JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions) - ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - MultiThreadedDebug - 4090;4146;4267;4334 - $(OutputPath)$(TargetName).pdb - - - Windows - true - - - - - - - Level3 - Disabled - _REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions) - ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - 4090;4146;4267;4334 - $(OutputPath)$(TargetName).pdb - - - Windows - true - - - - - - - Level3 - Disabled - JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions) - ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - MultiThreadedDebug - 4090;4146;4267;4334 - OldStyle - false - - - Windows - true - - - - - Level3 - - - MaxSpeed - true - true - _REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions) - ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - 4090;4146;4267;4334 - $(OutputPath)$(TargetName).pdb - - - Windows - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - _REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions) - ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - MultiThreaded - 4090;4146;4267;4334 - $(OutputPath)$(TargetName).pdb - - - Windows - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - _REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions) - 4090;4146;4267;4334 - $(OutputPath)$(TargetName).pdb - - - Windows - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - _REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions) - ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - MultiThreaded - 4090;4146;4267;4334 - OldStyle - - - Windows - true - true - true - - - - - - \ No newline at end of file diff --git a/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters deleted file mode 100644 index 37f0f02ae..000000000 --- a/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ /dev/null @@ -1,272 +0,0 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hh;hpp;hxx;hm;inl;inc;xsd - - - {5697dfa3-16cf-4932-b428-6e0ec6e9f98e} - - - {0cbd2ca6-42a7-4f82-8517-d7e7a14fd986} - - - {0abe6f30-49b5-46dd-8aca-6e33363fa52c} - - - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\internal - - - Header Files\msvc_compat - - - Header Files\msvc_compat - - - Header Files\msvc_compat\C99 - - - Header Files\msvc_compat\C99 - - - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.cpp b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.cpp deleted file mode 100755 index a3d1a792a..000000000 --- a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// jemalloc C++ threaded test -// Author: Rustam Abdullaev -// Public Domain - -#include -#include -#include -#include -#include -#include -#include -#include - -using std::vector; -using std::thread; -using std::uniform_int_distribution; -using std::minstd_rand; - -int test_threads() -{ - je_malloc_conf = "narenas:3"; - int narenas = 0; - size_t sz = sizeof(narenas); - je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0); - if (narenas != 3) { - printf("Error: unexpected number of arenas: %d\n", narenas); - return 1; - } - static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 }; - static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0])); - vector workers; - static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50; - je_malloc_stats_print(NULL, NULL, NULL); - size_t allocated1; - size_t sz1 = sizeof(allocated1); - je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0); - printf("\nPress Enter to start threads...\n"); - getchar(); - printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2); - for (int i = 0; i < numThreads; i++) { - workers.emplace_back([tid=i]() { - uniform_int_distribution sizeDist(0, numSizes - 1); - minstd_rand rnd(tid * 17); - uint8_t* ptrs[numAllocsMax]; - int ptrsz[numAllocsMax]; - for (int i = 0; i < numIter1; ++i) { - thread t([&]() { - for (int i = 0; i < numIter2; ++i) { - const int numAllocs = numAllocsMax - sizeDist(rnd); - for (int j = 0; j < numAllocs; j += 64) { - const int x = sizeDist(rnd); - const int sz = sizes[x]; - ptrsz[j] = sz; - ptrs[j] = (uint8_t*)je_malloc(sz); - if (!ptrs[j]) { - printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x); - exit(1); - } - for (int k = 0; k < sz; k++) - ptrs[j][k] = tid + k; - } - for (int j = 0; j < numAllocs; j += 64) { - for (int k = 0, sz = ptrsz[j]; k < sz; k++) - if (ptrs[j][k] != (uint8_t)(tid + k)) { - printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k)); - exit(1); - } - je_free(ptrs[j]); - } - } - }); - t.join(); - } - }); - } - for (thread& t : workers) { - t.join(); - } - je_malloc_stats_print(NULL, NULL, NULL); - size_t allocated2; - je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0); - size_t leaked = allocated2 - allocated1; - printf("\nDone. Leaked: %zd bytes\n", leaked); - bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet) - printf("\nTest %s!\n", (failed ? "FAILED" : "successful")); - printf("\nPress Enter to continue...\n"); - getchar(); - return failed ? 1 : 0; -} diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.h b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.h deleted file mode 100644 index 64d0cdb33..000000000 --- a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -int test_threads(); diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj deleted file mode 100644 index f5e9898f2..000000000 --- a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj +++ /dev/null @@ -1,327 +0,0 @@ - - - - - Debug-static - Win32 - - - Debug-static - x64 - - - Debug - Win32 - - - Release-static - Win32 - - - Release-static - x64 - - - Release - Win32 - - - Debug - x64 - - - Release - x64 - - - - {09028CFD-4EB7-491D-869C-0708DB97ED44} - Win32Proj - test_threads - 8.1 - - - - Application - true - v140 - MultiByte - - - Application - true - v140 - MultiByte - - - Application - false - v140 - true - MultiByte - - - Application - false - v140 - true - MultiByte - - - Application - true - v140 - MultiByte - - - Application - true - v140 - MultiByte - - - Application - false - v140 - true - MultiByte - - - Application - false - v140 - true - MultiByte - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - true - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - true - - - true - $(SolutionDir)$(Platform)\$(Configuration)\ - - - true - $(SolutionDir)$(Platform)\$(Configuration)\ - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - false - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - false - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - false - - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - - - Console - true - $(SolutionDir)$(Platform)\$(Configuration) - jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - - - - - Level3 - Disabled - JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - MultiThreadedDebug - - - Console - true - $(SolutionDir)$(Platform)\$(Configuration) - jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - - - - - Level3 - Disabled - _DEBUG;%(PreprocessorDefinitions) - ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - - - Console - true - jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - $(SolutionDir)$(Platform)\$(Configuration) - - - - - - - Level3 - Disabled - JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions) - ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - MultiThreadedDebug - - - Console - true - jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - $(SolutionDir)$(Platform)\$(Configuration) - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - - - Console - true - true - true - $(SolutionDir)$(Platform)\$(Configuration) - jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - MultiThreaded - - - Console - true - true - true - $(SolutionDir)$(Platform)\$(Configuration) - jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - - - Console - true - true - true - $(SolutionDir)$(Platform)\$(Configuration) - jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) - MultiThreaded - - - Console - true - true - true - $(SolutionDir)$(Platform)\$(Configuration) - jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - - - - - - - {8d6bb292-9e1c-413d-9f98-4864bdc1514a} - - - - - - - - - \ No newline at end of file diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters deleted file mode 100644 index 4c2334073..000000000 --- a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters +++ /dev/null @@ -1,26 +0,0 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hh;hpp;hxx;hm;inl;inc;xsd - - - - - Source Files - - - Source Files - - - - - Header Files - - - \ No newline at end of file diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads_main.cpp b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads_main.cpp deleted file mode 100644 index ffd96e6ab..000000000 --- a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads_main.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "test_threads.h" -#include -#include -#include - -using namespace std::chrono_literals; - -int main(int argc, char** argv) -{ - int rc = test_threads(); - return rc; -} diff --git a/deps/jemalloc/src/arena.c b/deps/jemalloc/src/arena.c index 648a8da3a..3081519cc 100644 --- a/deps/jemalloc/src/arena.c +++ b/deps/jemalloc/src/arena.c @@ -4,23 +4,16 @@ /******************************************************************************/ /* Data. */ -purge_mode_t opt_purge = PURGE_DEFAULT; -const char *purge_mode_names[] = { - "ratio", - "decay", - "N/A" -}; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; static ssize_t lg_dirty_mult_default; -ssize_t opt_decay_time = DECAY_TIME_DEFAULT; -static ssize_t decay_time_default; - arena_bin_info_t arena_bin_info[NBINS]; size_t map_bias; size_t map_misc_offset; size_t arena_maxrun; /* Max run size for arenas. */ size_t large_maxclass; /* Max large size class. */ +static size_t small_maxrun; /* Max run size used for small size classes. */ +static bool *small_run_tab; /* Valid small run page multiples. */ unsigned nlclasses; /* Number of large size classes. */ unsigned nhclasses; /* Number of huge size classes. */ @@ -30,57 +23,60 @@ unsigned nhclasses; /* Number of huge size classes. */ * definition. */ -static void arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, - arena_chunk_t *chunk); -static void arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, - size_t ndirty_limit); -static void arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, - bool dirty, bool cleaned, bool decommitted); -static void arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, - arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); -static void arena_bin_lower_run(arena_t *arena, arena_run_t *run, - arena_bin_t *bin); +static void arena_purge(arena_t *arena, bool all); +static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, + bool cleaned, bool decommitted); +static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin); +static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin); /******************************************************************************/ +#define CHUNK_MAP_KEY ((uintptr_t)0x1U) + +JEMALLOC_INLINE_C arena_chunk_map_misc_t * +arena_miscelm_key_create(size_t size) +{ + + return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) | + CHUNK_MAP_KEY)); +} + +JEMALLOC_INLINE_C bool +arena_miscelm_is_key(const arena_chunk_map_misc_t *miscelm) +{ + + return (((uintptr_t)miscelm & CHUNK_MAP_KEY) != 0); +} + +#undef CHUNK_MAP_KEY + JEMALLOC_INLINE_C size_t -arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm) +arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm) +{ + + assert(arena_miscelm_is_key(miscelm)); + + return (arena_mapbits_size_decode((uintptr_t)miscelm)); +} + +JEMALLOC_INLINE_C size_t +arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk; size_t pageind, mapbits; + assert(!arena_miscelm_is_key(miscelm)); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); pageind = arena_miscelm_to_pageind(miscelm); mapbits = arena_mapbits_get(chunk, pageind); return (arena_mapbits_size_decode(mapbits)); } -JEMALLOC_INLINE_C const extent_node_t * -arena_miscelm_extent_get(const arena_chunk_map_misc_t *miscelm) -{ - arena_chunk_t *chunk; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); - return (&chunk->node); -} - JEMALLOC_INLINE_C int -arena_sn_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b) -{ - size_t a_sn, b_sn; - - assert(a != NULL); - assert(b != NULL); - - a_sn = extent_node_sn_get(arena_miscelm_extent_get(a)); - b_sn = extent_node_sn_get(arena_miscelm_extent_get(b)); - - return ((a_sn > b_sn) - (a_sn < b_sn)); -} - -JEMALLOC_INLINE_C int -arena_ad_comp(const arena_chunk_map_misc_t *a, - const arena_chunk_map_misc_t *b) +arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) { uintptr_t a_miscelm = (uintptr_t)a; uintptr_t b_miscelm = (uintptr_t)b; @@ -91,79 +87,74 @@ arena_ad_comp(const arena_chunk_map_misc_t *a, return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm)); } -JEMALLOC_INLINE_C int -arena_snad_comp(const arena_chunk_map_misc_t *a, - const arena_chunk_map_misc_t *b) -{ - int ret; +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t, + rb_link, arena_run_comp) - assert(a != NULL); - assert(b != NULL); - - ret = arena_sn_comp(a, b); - if (ret != 0) - return (ret); - - ret = arena_ad_comp(a, b); - return (ret); -} - -/* Generate pairing heap functions. */ -ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t, - ph_link, arena_snad_comp) - -#ifdef JEMALLOC_JET -#undef run_quantize_floor -#define run_quantize_floor JEMALLOC_N(n_run_quantize_floor) -#endif static size_t -run_quantize_floor(size_t size) +run_quantize(size_t size) { - size_t ret; - pszind_t pind; - - assert(size > 0); - assert(size <= HUGE_MAXCLASS); - assert((size & PAGE_MASK) == 0); + size_t qsize; assert(size != 0); assert(size == PAGE_CEILING(size)); - pind = psz2ind(size - large_pad + 1); - if (pind == 0) { - /* - * Avoid underflow. This short-circuit would also do the right - * thing for all sizes in the range for which there are - * PAGE-spaced size classes, but it's simplest to just handle - * the one case that would cause erroneous results. - */ + /* Don't change sizes that are valid small run sizes. */ + if (size <= small_maxrun && small_run_tab[size >> LG_PAGE]) return (size); - } - ret = pind2sz(pind - 1) + large_pad; - assert(ret <= size); - return (ret); + + /* + * Round down to the nearest run size that can actually be requested + * during normal large allocation. Add large_pad so that cache index + * randomization can offset the allocation from the page boundary. + */ + qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad; + if (qsize <= SMALL_MAXCLASS + large_pad) + return (run_quantize(size - large_pad)); + assert(qsize <= size); + return (qsize); } -#ifdef JEMALLOC_JET -#undef run_quantize_floor -#define run_quantize_floor JEMALLOC_N(run_quantize_floor) -run_quantize_t *run_quantize_floor = JEMALLOC_N(n_run_quantize_floor); -#endif -#ifdef JEMALLOC_JET -#undef run_quantize_ceil -#define run_quantize_ceil JEMALLOC_N(n_run_quantize_ceil) -#endif static size_t -run_quantize_ceil(size_t size) +run_quantize_next(size_t size) { - size_t ret; + size_t large_run_size_next; - assert(size > 0); - assert(size <= HUGE_MAXCLASS); - assert((size & PAGE_MASK) == 0); + assert(size != 0); + assert(size == PAGE_CEILING(size)); - ret = run_quantize_floor(size); - if (ret < size) { + /* + * Return the next quantized size greater than the input size. + * Quantized sizes comprise the union of run sizes that back small + * region runs, and run sizes that back large regions with no explicit + * alignment constraints. + */ + + if (size > SMALL_MAXCLASS) { + large_run_size_next = PAGE_CEILING(index2size(size2index(size - + large_pad) + 1) + large_pad); + } else + large_run_size_next = SIZE_T_MAX; + if (size >= small_maxrun) + return (large_run_size_next); + + while (true) { + size += PAGE; + assert(size <= small_maxrun); + if (small_run_tab[size >> LG_PAGE]) { + if (large_run_size_next < size) + return (large_run_size_next); + return (size); + } + } +} + +static size_t +run_quantize_first(size_t size) +{ + size_t qsize = run_quantize(size); + + if (qsize < size) { /* * Skip a quantization that may have an adequately large run, * because under-sized runs may be mixed in. This only happens @@ -172,50 +163,72 @@ run_quantize_ceil(size_t size) * search would potentially find sufficiently aligned available * memory somewhere lower. */ - ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad; + qsize = run_quantize_next(size); } + return (qsize); +} + +JEMALLOC_INLINE_C int +arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) +{ + int ret; + uintptr_t a_miscelm = (uintptr_t)a; + size_t a_qsize = run_quantize(arena_miscelm_is_key(a) ? + arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a)); + size_t b_qsize = run_quantize(arena_miscelm_size_get(b)); + + /* + * Compare based on quantized size rather than size, in order to sort + * equally useful runs only by address. + */ + ret = (a_qsize > b_qsize) - (a_qsize < b_qsize); + if (ret == 0) { + if (!arena_miscelm_is_key(a)) { + uintptr_t b_miscelm = (uintptr_t)b; + + ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm); + } else { + /* + * Treat keys as if they are lower than anything else. + */ + ret = -1; + } + } + return (ret); } -#ifdef JEMALLOC_JET -#undef run_quantize_ceil -#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil) -run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil); -#endif + +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, + arena_chunk_map_misc_t, rb_link, arena_avail_comp) static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get( - arena_miscelm_get_const(chunk, pageind)))); + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - assert((npages << LG_PAGE) < chunksize); - assert(pind2sz(pind) <= chunksize); - arena_run_heap_insert(&arena->runs_avail[pind], - arena_miscelm_get_mutable(chunk, pageind)); + arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk, + pageind)); } static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get( - arena_miscelm_get_const(chunk, pageind)))); + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - assert((npages << LG_PAGE) < chunksize); - assert(pind2sz(pind) <= chunksize); - arena_run_heap_remove(&arena->runs_avail[pind], - arena_miscelm_get_mutable(chunk, pageind)); + arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk, + pageind)); } static void arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk, - pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); @@ -232,8 +245,7 @@ static void arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk, - pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); @@ -280,14 +292,14 @@ JEMALLOC_INLINE_C void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { void *ret; - size_t regind; + unsigned regind; arena_chunk_map_misc_t *miscelm; void *rpages; assert(run->nfree > 0); assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info)); - regind = (unsigned)bitmap_sfu(run->bitmap, &bin_info->bitmap_info); + regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info); miscelm = arena_run_to_miscelm(run); rpages = arena_miscelm_to_rpages(miscelm); ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset + @@ -304,7 +316,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) size_t mapbits = arena_mapbits_get(chunk, pageind); szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; - size_t regind = arena_run_regind(run, bin_info, ptr); + unsigned regind = arena_run_regind(run, bin_info, ptr); assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ @@ -352,30 +364,16 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) } static void -arena_nactive_add(arena_t *arena, size_t add_pages) +arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages) { if (config_stats) { - size_t cactive_add = CHUNK_CEILING((arena->nactive + - add_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << + ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + add_pages + - sub_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << LG_PAGE); - if (cactive_add != 0) - stats_cactive_add(cactive_add); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); } - arena->nactive += add_pages; -} - -static void -arena_nactive_sub(arena_t *arena, size_t sub_pages) -{ - - if (config_stats) { - size_t cactive_sub = CHUNK_CEILING(arena->nactive << LG_PAGE) - - CHUNK_CEILING((arena->nactive - sub_pages) << LG_PAGE); - if (cactive_sub != 0) - stats_cactive_sub(cactive_sub); - } - arena->nactive -= sub_pages; } static void @@ -396,7 +394,8 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, arena_avail_remove(arena, chunk, run_ind, total_pages); if (flag_dirty != 0) arena_run_dirty_remove(arena, chunk, run_ind, total_pages); - arena_nactive_add(arena, need_pages); + arena_cactive_update(arena, need_pages, 0); + arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { @@ -568,8 +567,7 @@ arena_chunk_init_spare(arena_t *arena) } static bool -arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - size_t sn, bool zero) +arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero) { /* @@ -578,67 +576,64 @@ arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, * of runs is tracked individually, and upon chunk deallocation the * entire chunk is in a consistent commit state. */ - extent_node_init(&chunk->node, arena, chunk, chunksize, sn, zero, true); + extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true); extent_node_achunk_set(&chunk->node, true); - return (chunk_register(tsdn, chunk, &chunk->node)); + return (chunk_register(chunk, &chunk->node)); } static arena_chunk_t * -arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, bool *zero, bool *commit) +arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, + bool *zero, bool *commit) { arena_chunk_t *chunk; - size_t sn; - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks, - NULL, chunksize, chunksize, &sn, zero, commit); + chunk = (arena_chunk_t *)chunk_alloc_wrapper(arena, chunk_hooks, NULL, + chunksize, chunksize, zero, commit); if (chunk != NULL && !*commit) { /* Commit header. */ if (chunk_hooks->commit(chunk, chunksize, 0, map_bias << LG_PAGE, arena->ind)) { - chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, - (void *)chunk, chunksize, sn, *zero, *commit); + chunk_dalloc_wrapper(arena, chunk_hooks, + (void *)chunk, chunksize, *commit); chunk = NULL; } } - if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, sn, - *zero)) { + if (chunk != NULL && arena_chunk_register(arena, chunk, *zero)) { if (!*commit) { /* Undo commit of header. */ chunk_hooks->decommit(chunk, chunksize, 0, map_bias << LG_PAGE, arena->ind); } - chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk, - chunksize, sn, *zero, *commit); + chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk, + chunksize, *commit); chunk = NULL; } - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); return (chunk); } static arena_chunk_t * -arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero, - bool *commit) +arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit) { arena_chunk_t *chunk; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; - size_t sn; - chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize, - chunksize, &sn, zero, commit, true); + chunk = chunk_alloc_cache(arena, &chunk_hooks, NULL, chunksize, + chunksize, zero, true); if (chunk != NULL) { - if (arena_chunk_register(tsdn, arena, chunk, sn, *zero)) { - chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, - chunksize, sn, true); + if (arena_chunk_register(arena, chunk, *zero)) { + chunk_dalloc_cache(arena, &chunk_hooks, chunk, + chunksize, true); return (NULL); } + *commit = true; } if (chunk == NULL) { - chunk = arena_chunk_alloc_internal_hard(tsdn, arena, - &chunk_hooks, zero, commit); + chunk = arena_chunk_alloc_internal_hard(arena, &chunk_hooks, + zero, commit); } if (config_stats && chunk != NULL) { @@ -650,7 +645,7 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero, } static arena_chunk_t * -arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena) +arena_chunk_init_hard(arena_t *arena) { arena_chunk_t *chunk; bool zero, commit; @@ -660,16 +655,14 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena) zero = false; commit = false; - chunk = arena_chunk_alloc_internal(tsdn, arena, &zero, &commit); + chunk = arena_chunk_alloc_internal(arena, &zero, &commit); if (chunk == NULL) return (NULL); - chunk->hugepage = true; - /* * Initialize the map to contain one maximal free untouched run. Mark - * the pages as zeroed if arena_chunk_alloc_internal() returned a zeroed - * or decommitted chunk. + * the pages as zeroed if chunk_alloc() returned a zeroed or decommitted + * chunk. */ flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED; flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED; @@ -681,18 +674,17 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena) */ if (!zero) { JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( - (void *)arena_bitselm_get_const(chunk, map_bias+1), - (size_t)((uintptr_t)arena_bitselm_get_const(chunk, - chunk_npages-1) - - (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1))); + (void *)arena_bitselm_get(chunk, map_bias+1), + (size_t)((uintptr_t) arena_bitselm_get(chunk, + chunk_npages-1) - (uintptr_t)arena_bitselm_get(chunk, + map_bias+1))); for (i = map_bias+1; i < chunk_npages-1; i++) arena_mapbits_internal_set(chunk, i, flag_unzeroed); } else { JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void - *)arena_bitselm_get_const(chunk, map_bias+1), - (size_t)((uintptr_t)arena_bitselm_get_const(chunk, - chunk_npages-1) - - (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1))); + *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t) + arena_bitselm_get(chunk, chunk_npages-1) - + (uintptr_t)arena_bitselm_get(chunk, map_bias+1))); if (config_debug) { for (i = map_bias+1; i < chunk_npages-1; i++) { assert(arena_mapbits_unzeroed_get(chunk, i) == @@ -707,84 +699,27 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena) } static arena_chunk_t * -arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena) +arena_chunk_alloc(arena_t *arena) { arena_chunk_t *chunk; if (arena->spare != NULL) chunk = arena_chunk_init_spare(arena); else { - chunk = arena_chunk_init_hard(tsdn, arena); + chunk = arena_chunk_init_hard(arena); if (chunk == NULL) return (NULL); } - ql_elm_new(&chunk->node, ql_link); - ql_tail_insert(&arena->achunks, &chunk->node, ql_link); + /* Insert the run into the runs_avail tree. */ arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias); return (chunk); } static void -arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) +arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) { - size_t sn, hugepage; - bool committed; - chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; - - chunk_deregister(chunk, &chunk->node); - - sn = extent_node_sn_get(&chunk->node); - hugepage = chunk->hugepage; - committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0); - if (!committed) { - /* - * Decommit the header. Mark the chunk as decommitted even if - * header decommit fails, since treating a partially committed - * chunk as committed has a high potential for causing later - * access of decommitted memory. - */ - chunk_hooks = chunk_hooks_get(tsdn, arena); - chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE, - arena->ind); - } - if (!hugepage) { - /* - * Convert chunk back to the default state, so that all - * subsequent chunk allocations start out with chunks that can - * be backed by transparent huge pages. - */ - pages_huge(chunk, chunksize); - } - - chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize, - sn, committed); - - if (config_stats) { - arena->stats.mapped -= chunksize; - arena->stats.metadata_mapped -= (map_bias << LG_PAGE); - } -} - -static void -arena_spare_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *spare) -{ - - assert(arena->spare != spare); - - if (arena_mapbits_dirty_get(spare, map_bias) != 0) { - arena_run_dirty_remove(arena, spare, map_bias, - chunk_npages-map_bias); - } - - arena_chunk_discard(tsdn, arena, spare); -} - -static void -arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) -{ - arena_chunk_t *spare; assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); @@ -797,14 +732,49 @@ arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) assert(arena_mapbits_decommitted_get(chunk, map_bias) == arena_mapbits_decommitted_get(chunk, chunk_npages-1)); - /* Remove run from runs_avail, so that the arena does not use it. */ + /* + * Remove run from the runs_avail tree, so that the arena does not use + * it. + */ arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias); - ql_remove(&arena->achunks, &chunk->node, ql_link); - spare = arena->spare; - arena->spare = chunk; - if (spare != NULL) - arena_spare_discard(tsdn, arena, spare); + if (arena->spare != NULL) { + arena_chunk_t *spare = arena->spare; + chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; + bool committed; + + arena->spare = chunk; + if (arena_mapbits_dirty_get(spare, map_bias) != 0) { + arena_run_dirty_remove(arena, spare, map_bias, + chunk_npages-map_bias); + } + + chunk_deregister(spare, &spare->node); + + committed = (arena_mapbits_decommitted_get(spare, map_bias) == + 0); + if (!committed) { + /* + * Decommit the header. Mark the chunk as decommitted + * even if header decommit fails, since treating a + * partially committed chunk as committed has a high + * potential for causing later access of decommitted + * memory. + */ + chunk_hooks = chunk_hooks_get(arena); + chunk_hooks.decommit(spare, chunksize, 0, map_bias << + LG_PAGE, arena->ind); + } + + chunk_dalloc_cache(arena, &chunk_hooks, (void *)spare, + chunksize, committed); + + if (config_stats) { + arena->stats.mapped -= chunksize; + arena->stats.metadata_mapped -= (map_bias << LG_PAGE); + } + } else + arena->spare = chunk; } static void @@ -846,17 +816,6 @@ arena_huge_dalloc_stats_update(arena_t *arena, size_t usize) arena->stats.hstats[index].curhchunks--; } -static void -arena_huge_reset_stats_cancel(arena_t *arena, size_t usize) -{ - szind_t index = size2index(usize) - nlclasses - NBINS; - - cassert(config_stats); - - arena->stats.ndalloc_huge++; - arena->stats.hstats[index].ndalloc--; -} - static void arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize) { @@ -888,240 +847,243 @@ arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize, } extent_node_t * -arena_node_alloc(tsdn_t *tsdn, arena_t *arena) +arena_node_alloc(arena_t *arena) { extent_node_t *node; - malloc_mutex_lock(tsdn, &arena->node_cache_mtx); + malloc_mutex_lock(&arena->node_cache_mtx); node = ql_last(&arena->node_cache, ql_link); if (node == NULL) { - malloc_mutex_unlock(tsdn, &arena->node_cache_mtx); - return (base_alloc(tsdn, sizeof(extent_node_t))); + malloc_mutex_unlock(&arena->node_cache_mtx); + return (base_alloc(sizeof(extent_node_t))); } ql_tail_remove(&arena->node_cache, extent_node_t, ql_link); - malloc_mutex_unlock(tsdn, &arena->node_cache_mtx); + malloc_mutex_unlock(&arena->node_cache_mtx); return (node); } void -arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node) +arena_node_dalloc(arena_t *arena, extent_node_t *node) { - malloc_mutex_lock(tsdn, &arena->node_cache_mtx); + malloc_mutex_lock(&arena->node_cache_mtx); ql_elm_new(node, ql_link); ql_tail_insert(&arena->node_cache, node, ql_link); - malloc_mutex_unlock(tsdn, &arena->node_cache_mtx); + malloc_mutex_unlock(&arena->node_cache_mtx); } static void * -arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, size_t *sn, - bool *zero, size_t csize) +arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, + size_t usize, size_t alignment, bool *zero, size_t csize) { void *ret; bool commit = true; - ret = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize, - alignment, sn, zero, &commit); + ret = chunk_alloc_wrapper(arena, chunk_hooks, NULL, csize, alignment, + zero, &commit); if (ret == NULL) { /* Revert optimistic stats updates. */ - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); if (config_stats) { arena_huge_malloc_stats_update_undo(arena, usize); arena->stats.mapped -= usize; } - arena_nactive_sub(arena, usize >> LG_PAGE); - malloc_mutex_unlock(tsdn, &arena->lock); + arena->nactive -= (usize >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); } return (ret); } void * -arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, size_t *sn, bool *zero) +arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, + bool *zero) { void *ret; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; size_t csize = CHUNK_CEILING(usize); - bool commit = true; - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); /* Optimistically update stats. */ if (config_stats) { arena_huge_malloc_stats_update(arena, usize); arena->stats.mapped += usize; } - arena_nactive_add(arena, usize >> LG_PAGE); + arena->nactive += (usize >> LG_PAGE); - ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize, - alignment, sn, zero, &commit, true); - malloc_mutex_unlock(tsdn, &arena->lock); + ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment, + zero, true); + malloc_mutex_unlock(&arena->lock); if (ret == NULL) { - ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks, - usize, alignment, sn, zero, csize); + ret = arena_chunk_alloc_huge_hard(arena, &chunk_hooks, usize, + alignment, zero, csize); } + if (config_stats && ret != NULL) + stats_cactive_add(usize); return (ret); } void -arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize, - size_t sn) +arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; size_t csize; csize = CHUNK_CEILING(usize); - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); if (config_stats) { arena_huge_dalloc_stats_update(arena, usize); arena->stats.mapped -= usize; + stats_cactive_sub(usize); } - arena_nactive_sub(arena, usize >> LG_PAGE); + arena->nactive -= (usize >> LG_PAGE); - chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, csize, sn, true); - malloc_mutex_unlock(tsdn, &arena->lock); + chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true); + malloc_mutex_unlock(&arena->lock); } void -arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, void *chunk, - size_t oldsize, size_t usize) +arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize, + size_t usize) { assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize)); assert(oldsize != usize); - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); if (config_stats) arena_huge_ralloc_stats_update(arena, oldsize, usize); - if (oldsize < usize) - arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE); - else - arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE); - malloc_mutex_unlock(tsdn, &arena->lock); + if (oldsize < usize) { + size_t udiff = usize - oldsize; + arena->nactive += udiff >> LG_PAGE; + if (config_stats) + stats_cactive_add(udiff); + } else { + size_t udiff = oldsize - usize; + arena->nactive -= udiff >> LG_PAGE; + if (config_stats) + stats_cactive_sub(udiff); + } + malloc_mutex_unlock(&arena->lock); } void -arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk, - size_t oldsize, size_t usize, size_t sn) +arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, + size_t usize) { size_t udiff = oldsize - usize; size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); if (config_stats) { arena_huge_ralloc_stats_update(arena, oldsize, usize); - if (cdiff != 0) + if (cdiff != 0) { arena->stats.mapped -= cdiff; + stats_cactive_sub(udiff); + } } - arena_nactive_sub(arena, udiff >> LG_PAGE); + arena->nactive -= udiff >> LG_PAGE; if (cdiff != 0) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(usize)); - chunk_dalloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff, - sn, true); + chunk_dalloc_cache(arena, &chunk_hooks, nchunk, cdiff, true); } - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); } static bool -arena_chunk_ralloc_huge_expand_hard(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *chunk, size_t oldsize, size_t usize, - size_t *sn, bool *zero, void *nchunk, size_t udiff, size_t cdiff) +arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t oldsize, size_t usize, bool *zero, void *nchunk, + size_t udiff, size_t cdiff) { bool err; bool commit = true; - err = (chunk_alloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff, - chunksize, sn, zero, &commit) == NULL); + err = (chunk_alloc_wrapper(arena, chunk_hooks, nchunk, cdiff, chunksize, + zero, &commit) == NULL); if (err) { /* Revert optimistic stats updates. */ - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); if (config_stats) { arena_huge_ralloc_stats_update_undo(arena, oldsize, usize); arena->stats.mapped -= cdiff; } - arena_nactive_sub(arena, udiff >> LG_PAGE); - malloc_mutex_unlock(tsdn, &arena->lock); + arena->nactive -= (udiff >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); } else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk, cdiff, true, arena->ind)) { - chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff, - *sn, *zero, true); + chunk_dalloc_arena(arena, chunk_hooks, nchunk, cdiff, *zero, + true); err = true; } return (err); } bool -arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk, - size_t oldsize, size_t usize, bool *zero) +arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, + size_t usize, bool *zero) { bool err; - chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena); + chunk_hooks_t chunk_hooks = chunk_hooks_get(arena); void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize)); size_t udiff = usize - oldsize; size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); - size_t sn; - bool commit = true; - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); /* Optimistically update stats. */ if (config_stats) { arena_huge_ralloc_stats_update(arena, oldsize, usize); arena->stats.mapped += cdiff; } - arena_nactive_add(arena, udiff >> LG_PAGE); + arena->nactive += (udiff >> LG_PAGE); - err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff, - chunksize, &sn, zero, &commit, true) == NULL); - malloc_mutex_unlock(tsdn, &arena->lock); + err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff, + chunksize, zero, true) == NULL); + malloc_mutex_unlock(&arena->lock); if (err) { - err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena, - &chunk_hooks, chunk, oldsize, usize, &sn, zero, nchunk, - udiff, cdiff); + err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks, + chunk, oldsize, usize, zero, nchunk, udiff, + cdiff); } else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk, cdiff, true, arena->ind)) { - chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff, - sn, *zero, true); + chunk_dalloc_arena(arena, &chunk_hooks, nchunk, cdiff, *zero, + true); err = true; } + if (config_stats && !err) + stats_cactive_add(udiff); return (err); } /* * Do first-best-fit run selection, i.e. select the lowest run that best fits. - * Run sizes are indexed, so not all candidate runs are necessarily exactly the - * same size. + * Run sizes are quantized, so not all candidate runs are necessarily exactly + * the same size. */ static arena_run_t * arena_run_first_best_fit(arena_t *arena, size_t size) { - pszind_t pind, i; - - pind = psz2ind(run_quantize_ceil(size)); - - for (i = pind; pind2sz(i) <= chunksize; i++) { - arena_chunk_map_misc_t *miscelm = arena_run_heap_first( - &arena->runs_avail[i]); - if (miscelm != NULL) - return (&miscelm->run); - } - - return (NULL); + size_t search_size = run_quantize_first(size); + arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size); + arena_chunk_map_misc_t *miscelm = + arena_avail_tree_nsearch(&arena->runs_avail, key); + if (miscelm == NULL) + return (NULL); + return (&miscelm->run); } static arena_run_t * arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { - arena_run_t *run = arena_run_first_best_fit(arena, size); + arena_run_t *run = arena_run_first_best_fit(arena, s2u(size)); if (run != NULL) { if (arena_run_split_large(arena, run, size, zero)) run = NULL; @@ -1130,7 +1092,7 @@ arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) } static arena_run_t * -arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero) +arena_run_alloc_large(arena_t *arena, size_t size, bool zero) { arena_chunk_t *chunk; arena_run_t *run; @@ -1146,9 +1108,9 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero) /* * No usable runs. Create a new chunk from which to allocate the run. */ - chunk = arena_chunk_alloc(tsdn, arena); + chunk = arena_chunk_alloc(arena); if (chunk != NULL) { - run = &arena_miscelm_get_mutable(chunk, map_bias)->run; + run = &arena_miscelm_get(chunk, map_bias)->run; if (arena_run_split_large(arena, run, size, zero)) run = NULL; return (run); @@ -1174,7 +1136,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind) } static arena_run_t * -arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind) +arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind) { arena_chunk_t *chunk; arena_run_t *run; @@ -1191,9 +1153,9 @@ arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind) /* * No usable runs. Create a new chunk from which to allocate the run. */ - chunk = arena_chunk_alloc(tsdn, arena); + chunk = arena_chunk_alloc(arena); if (chunk != NULL) { - run = &arena_miscelm_get_mutable(chunk, map_bias)->run; + run = &arena_miscelm_get(chunk, map_bias)->run; if (arena_run_split_small(arena, run, size, binind)) run = NULL; return (run); @@ -1216,239 +1178,42 @@ arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult) } ssize_t -arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena) +arena_lg_dirty_mult_get(arena_t *arena) { ssize_t lg_dirty_mult; - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); lg_dirty_mult = arena->lg_dirty_mult; - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); return (lg_dirty_mult); } bool -arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, ssize_t lg_dirty_mult) +arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult) { if (!arena_lg_dirty_mult_valid(lg_dirty_mult)) return (true); - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); arena->lg_dirty_mult = lg_dirty_mult; - arena_maybe_purge(tsdn, arena); - malloc_mutex_unlock(tsdn, &arena->lock); + arena_maybe_purge(arena); + malloc_mutex_unlock(&arena->lock); return (false); } -static void -arena_decay_deadline_init(arena_t *arena) +void +arena_maybe_purge(arena_t *arena) { - assert(opt_purge == purge_mode_decay); - - /* - * Generate a new deadline that is uniformly random within the next - * epoch after the current one. - */ - nstime_copy(&arena->decay.deadline, &arena->decay.epoch); - nstime_add(&arena->decay.deadline, &arena->decay.interval); - if (arena->decay.time > 0) { - nstime_t jitter; - - nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state, - nstime_ns(&arena->decay.interval))); - nstime_add(&arena->decay.deadline, &jitter); - } -} - -static bool -arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time) -{ - - assert(opt_purge == purge_mode_decay); - - return (nstime_compare(&arena->decay.deadline, time) <= 0); -} - -static size_t -arena_decay_backlog_npages_limit(const arena_t *arena) -{ - static const uint64_t h_steps[] = { -#define STEP(step, h, x, y) \ - h, - SMOOTHSTEP -#undef STEP - }; - uint64_t sum; - size_t npages_limit_backlog; - unsigned i; - - assert(opt_purge == purge_mode_decay); - - /* - * For each element of decay_backlog, multiply by the corresponding - * fixed-point smoothstep decay factor. Sum the products, then divide - * to round down to the nearest whole number of pages. - */ - sum = 0; - for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) - sum += arena->decay.backlog[i] * h_steps[i]; - npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP); - - return (npages_limit_backlog); -} - -static void -arena_decay_backlog_update_last(arena_t *arena) -{ - size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? - arena->ndirty - arena->decay.ndirty : 0; - arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta; -} - -static void -arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64) -{ - - if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) { - memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) * - sizeof(size_t)); - } else { - size_t nadvance_z = (size_t)nadvance_u64; - - assert((uint64_t)nadvance_z == nadvance_u64); - - memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z], - (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t)); - if (nadvance_z > 1) { - memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS - - nadvance_z], 0, (nadvance_z-1) * sizeof(size_t)); - } - } - - arena_decay_backlog_update_last(arena); -} - -static void -arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time) -{ - uint64_t nadvance_u64; - nstime_t delta; - - assert(opt_purge == purge_mode_decay); - assert(arena_decay_deadline_reached(arena, time)); - - nstime_copy(&delta, time); - nstime_subtract(&delta, &arena->decay.epoch); - nadvance_u64 = nstime_divide(&delta, &arena->decay.interval); - assert(nadvance_u64 > 0); - - /* Add nadvance_u64 decay intervals to epoch. */ - nstime_copy(&delta, &arena->decay.interval); - nstime_imultiply(&delta, nadvance_u64); - nstime_add(&arena->decay.epoch, &delta); - - /* Set a new deadline. */ - arena_decay_deadline_init(arena); - - /* Update the backlog. */ - arena_decay_backlog_update(arena, nadvance_u64); -} - -static void -arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena) -{ - size_t ndirty_limit = arena_decay_backlog_npages_limit(arena); - - if (arena->ndirty > ndirty_limit) - arena_purge_to_limit(tsdn, arena, ndirty_limit); - arena->decay.ndirty = arena->ndirty; -} - -static void -arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time) -{ - - arena_decay_epoch_advance_helper(arena, time); - arena_decay_epoch_advance_purge(tsdn, arena); -} - -static void -arena_decay_init(arena_t *arena, ssize_t decay_time) -{ - - arena->decay.time = decay_time; - if (decay_time > 0) { - nstime_init2(&arena->decay.interval, decay_time, 0); - nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS); - } - - nstime_init(&arena->decay.epoch, 0); - nstime_update(&arena->decay.epoch); - arena->decay.jitter_state = (uint64_t)(uintptr_t)arena; - arena_decay_deadline_init(arena); - arena->decay.ndirty = arena->ndirty; - memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); -} - -static bool -arena_decay_time_valid(ssize_t decay_time) -{ - - if (decay_time < -1) - return (false); - if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX) - return (true); - return (false); -} - -ssize_t -arena_decay_time_get(tsdn_t *tsdn, arena_t *arena) -{ - ssize_t decay_time; - - malloc_mutex_lock(tsdn, &arena->lock); - decay_time = arena->decay.time; - malloc_mutex_unlock(tsdn, &arena->lock); - - return (decay_time); -} - -bool -arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) -{ - - if (!arena_decay_time_valid(decay_time)) - return (true); - - malloc_mutex_lock(tsdn, &arena->lock); - /* - * Restart decay backlog from scratch, which may cause many dirty pages - * to be immediately purged. It would conceptually be possible to map - * the old backlog onto the new backlog, but there is no justification - * for such complexity since decay_time changes are intended to be - * infrequent, either between the {-1, 0, >0} states, or a one-time - * arbitrary change during initial arena configuration. - */ - arena_decay_init(arena, decay_time); - arena_maybe_purge(tsdn, arena); - malloc_mutex_unlock(tsdn, &arena->lock); - - return (false); -} - -static void -arena_maybe_purge_ratio(tsdn_t *tsdn, arena_t *arena) -{ - - assert(opt_purge == purge_mode_ratio); - /* Don't purge if the option is disabled. */ if (arena->lg_dirty_mult < 0) return; - + /* Don't recursively purge. */ + if (arena->purging) + return; /* * Iterate, since preventing recursive purging could otherwise leave too * many dirty pages. @@ -1463,68 +1228,10 @@ arena_maybe_purge_ratio(tsdn_t *tsdn, arena_t *arena) */ if (arena->ndirty <= threshold) return; - arena_purge_to_limit(tsdn, arena, threshold); + arena_purge(arena, false); } } -static void -arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena) -{ - nstime_t time; - - assert(opt_purge == purge_mode_decay); - - /* Purge all or nothing if the option is disabled. */ - if (arena->decay.time <= 0) { - if (arena->decay.time == 0) - arena_purge_to_limit(tsdn, arena, 0); - return; - } - - nstime_init(&time, 0); - nstime_update(&time); - if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch, - &time) > 0)) { - /* - * Time went backwards. Move the epoch back in time and - * generate a new deadline, with the expectation that time - * typically flows forward for long enough periods of time that - * epochs complete. Unfortunately, this strategy is susceptible - * to clock jitter triggering premature epoch advances, but - * clock jitter estimation and compensation isn't feasible here - * because calls into this code are event-driven. - */ - nstime_copy(&arena->decay.epoch, &time); - arena_decay_deadline_init(arena); - } else { - /* Verify that time does not go backwards. */ - assert(nstime_compare(&arena->decay.epoch, &time) <= 0); - } - - /* - * If the deadline has been reached, advance to the current epoch and - * purge to the new limit if necessary. Note that dirty pages created - * during the current epoch are not subject to purge until a future - * epoch, so as a result purging only happens during epoch advances. - */ - if (arena_decay_deadline_reached(arena, &time)) - arena_decay_epoch_advance(tsdn, arena, &time); -} - -void -arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) -{ - - /* Don't recursively purge. */ - if (arena->purging) - return; - - if (opt_purge == purge_mode_ratio) - arena_maybe_purge_ratio(tsdn, arena); - else - arena_maybe_purge_decay(tsdn, arena); -} - static size_t arena_dirty_count(arena_t *arena) { @@ -1560,15 +1267,35 @@ arena_dirty_count(arena_t *arena) } static size_t -arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel, +arena_compute_npurge(arena_t *arena, bool all) +{ + size_t npurge; + + /* + * Compute the minimum number of pages that this thread should try to + * purge. + */ + if (!all) { + size_t threshold = (arena->nactive >> arena->lg_dirty_mult); + threshold = threshold < chunk_npages ? chunk_npages : threshold; + + npurge = arena->ndirty - threshold; + } else + npurge = arena->ndirty; + + return (npurge); +} + +static size_t +arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, + size_t npurge, arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { arena_runs_dirty_link_t *rdelm, *rdelm_next; extent_node_t *chunkselm; size_t nstashed = 0; - /* Stash runs/chunks according to ndirty_limit. */ + /* Stash at least npurge pages. */ for (rdelm = qr_next(&arena->runs_dirty, rd_link), chunkselm = qr_next(&arena->chunks_cache, cc_link); rdelm != &arena->runs_dirty; rdelm = rdelm_next) { @@ -1577,32 +1304,24 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, if (rdelm == &chunkselm->rd) { extent_node_t *chunkselm_next; - size_t sn; - bool zero, commit; + bool zero; UNUSED void *chunk; - npages = extent_node_size_get(chunkselm) >> LG_PAGE; - if (opt_purge == purge_mode_decay && arena->ndirty - - (nstashed + npages) < ndirty_limit) - break; - chunkselm_next = qr_next(chunkselm, cc_link); /* * Allocate. chunkselm remains valid due to the * dalloc_node=false argument to chunk_alloc_cache(). */ zero = false; - commit = false; - chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks, + chunk = chunk_alloc_cache(arena, chunk_hooks, extent_node_addr_get(chunkselm), - extent_node_size_get(chunkselm), chunksize, &sn, - &zero, &commit, false); + extent_node_size_get(chunkselm), chunksize, &zero, + false); assert(chunk == extent_node_addr_get(chunkselm)); assert(zero == extent_node_zeroed_get(chunkselm)); extent_node_dirty_insert(chunkselm, purge_runs_sentinel, purge_chunks_sentinel); - assert(npages == (extent_node_size_get(chunkselm) >> - LG_PAGE)); + npages = extent_node_size_get(chunkselm) >> LG_PAGE; chunkselm = chunkselm_next; } else { arena_chunk_t *chunk = @@ -1615,9 +1334,6 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, arena_mapbits_unallocated_size_get(chunk, pageind); npages = run_size >> LG_PAGE; - if (opt_purge == purge_mode_decay && arena->ndirty - - (nstashed + npages) < ndirty_limit) - break; assert(pageind + npages <= chunk_npages); assert(arena_mapbits_dirty_get(chunk, pageind) == @@ -1628,7 +1344,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, * prior to allocation. */ if (chunk == arena->spare) - arena_chunk_alloc(tsdn, arena); + arena_chunk_alloc(arena); /* Temporarily allocate the free dirty run. */ arena_run_split_large(arena, run, run_size, false); @@ -1643,8 +1359,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, } nstashed += npages; - if (opt_purge == purge_mode_ratio && arena->ndirty - nstashed <= - ndirty_limit) + if (!all && nstashed >= npurge) break; } @@ -1652,7 +1367,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, } static size_t -arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, +arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks, arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { @@ -1664,7 +1379,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, nmadvise = 0; npurged = 0; - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); for (rdelm = qr_next(purge_runs_sentinel, rd_link), chunkselm = qr_next(purge_chunks_sentinel, cc_link); rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) { @@ -1693,17 +1408,6 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, run_size = arena_mapbits_large_size_get(chunk, pageind); npages = run_size >> LG_PAGE; - /* - * If this is the first run purged within chunk, mark - * the chunk as non-huge. This will prevent all use of - * transparent huge pages for this chunk until the chunk - * as a whole is deallocated. - */ - if (chunk->hugepage) { - pages_nohuge(chunk, chunksize); - chunk->hugepage = false; - } - assert(pageind + npages <= chunk_npages); assert(!arena_mapbits_decommitted_get(chunk, pageind)); assert(!arena_mapbits_decommitted_get(chunk, @@ -1714,7 +1418,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, flag_unzeroed = 0; flags = CHUNK_MAP_DECOMMITTED; } else { - flag_unzeroed = chunk_purge_wrapper(tsdn, arena, + flag_unzeroed = chunk_purge_wrapper(arena, chunk_hooks, chunk, chunksize, pageind << LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0; flags = flag_unzeroed; @@ -1745,7 +1449,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, if (config_stats) nmadvise++; } - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); if (config_stats) { arena->stats.nmadvise += nmadvise; @@ -1756,7 +1460,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, } static void -arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, +arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks, arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { @@ -1773,14 +1477,13 @@ arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, cc_link); void *addr = extent_node_addr_get(chunkselm); size_t size = extent_node_size_get(chunkselm); - size_t sn = extent_node_sn_get(chunkselm); bool zeroed = extent_node_zeroed_get(chunkselm); bool committed = extent_node_committed_get(chunkselm); extent_node_dirty_remove(chunkselm); - arena_node_dalloc(tsdn, arena, chunkselm); + arena_node_dalloc(arena, chunkselm); chunkselm = chunkselm_next; - chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr, - size, sn, zeroed, committed); + chunk_dalloc_arena(arena, chunk_hooks, addr, size, + zeroed, committed); } else { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm); @@ -1791,26 +1494,16 @@ arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, pageind) != 0); arena_run_t *run = &miscelm->run; qr_remove(rdelm, rd_link); - arena_run_dalloc(tsdn, arena, run, false, true, - decommitted); + arena_run_dalloc(arena, run, false, true, decommitted); } } } -/* - * NB: ndirty_limit is interpreted differently depending on opt_purge: - * - purge_mode_ratio: Purge as few dirty run/chunks as possible to reach the - * desired state: - * (arena->ndirty <= ndirty_limit) - * - purge_mode_decay: Purge as many dirty runs/chunks as possible without - * violating the invariant: - * (arena->ndirty >= ndirty_limit) - */ static void -arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) +arena_purge(arena_t *arena, bool all) { - chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena); - size_t npurge, npurged; + chunk_hooks_t chunk_hooks = chunk_hooks_get(arena); + size_t npurge, npurgeable, npurged; arena_runs_dirty_link_t purge_runs_sentinel; extent_node_t purge_chunks_sentinel; @@ -1824,183 +1517,34 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) size_t ndirty = arena_dirty_count(arena); assert(ndirty == arena->ndirty); } - assert(opt_purge != purge_mode_ratio || (arena->nactive >> - arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0); - - qr_new(&purge_runs_sentinel, rd_link); - extent_node_dirty_linkage_init(&purge_chunks_sentinel); - - npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit, - &purge_runs_sentinel, &purge_chunks_sentinel); - if (npurge == 0) - goto label_return; - npurged = arena_purge_stashed(tsdn, arena, &chunk_hooks, - &purge_runs_sentinel, &purge_chunks_sentinel); - assert(npurged == npurge); - arena_unstash_purged(tsdn, arena, &chunk_hooks, &purge_runs_sentinel, - &purge_chunks_sentinel); + assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || all); if (config_stats) arena->stats.npurge++; -label_return: + npurge = arena_compute_npurge(arena, all); + qr_new(&purge_runs_sentinel, rd_link); + extent_node_dirty_linkage_init(&purge_chunks_sentinel); + + npurgeable = arena_stash_dirty(arena, &chunk_hooks, all, npurge, + &purge_runs_sentinel, &purge_chunks_sentinel); + assert(npurgeable >= npurge); + npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel, + &purge_chunks_sentinel); + assert(npurged == npurgeable); + arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel, + &purge_chunks_sentinel); + arena->purging = false; } void -arena_purge(tsdn_t *tsdn, arena_t *arena, bool all) +arena_purge_all(arena_t *arena) { - malloc_mutex_lock(tsdn, &arena->lock); - if (all) - arena_purge_to_limit(tsdn, arena, 0); - else - arena_maybe_purge(tsdn, arena); - malloc_mutex_unlock(tsdn, &arena->lock); -} - -static void -arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk) -{ - size_t pageind, npages; - - cassert(config_prof); - assert(opt_prof); - - /* - * Iterate over the allocated runs and remove profiled allocations from - * the sample set. - */ - for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { - if (arena_mapbits_allocated_get(chunk, pageind) != 0) { - if (arena_mapbits_large_get(chunk, pageind) != 0) { - void *ptr = (void *)((uintptr_t)chunk + (pageind - << LG_PAGE)); - size_t usize = isalloc(tsd_tsdn(tsd), ptr, - config_prof); - - prof_free(tsd, ptr, usize); - npages = arena_mapbits_large_size_get(chunk, - pageind) >> LG_PAGE; - } else { - /* Skip small run. */ - size_t binind = arena_mapbits_binind_get(chunk, - pageind); - arena_bin_info_t *bin_info = - &arena_bin_info[binind]; - npages = bin_info->run_size >> LG_PAGE; - } - } else { - /* Skip unallocated run. */ - npages = arena_mapbits_unallocated_size_get(chunk, - pageind) >> LG_PAGE; - } - assert(pageind + npages <= chunk_npages); - } -} - -void -arena_reset(tsd_t *tsd, arena_t *arena) -{ - unsigned i; - extent_node_t *node; - - /* - * Locking in this function is unintuitive. The caller guarantees that - * no concurrent operations are happening in this arena, but there are - * still reasons that some locking is necessary: - * - * - Some of the functions in the transitive closure of calls assume - * appropriate locks are held, and in some cases these locks are - * temporarily dropped to avoid lock order reversal or deadlock due to - * reentry. - * - mallctl("epoch", ...) may concurrently refresh stats. While - * strictly speaking this is a "concurrent operation", disallowing - * stats refreshes would impose an inconvenient burden. - */ - - /* Remove large allocations from prof sample set. */ - if (config_prof && opt_prof) { - ql_foreach(node, &arena->achunks, ql_link) { - arena_achunk_prof_reset(tsd, arena, - extent_node_addr_get(node)); - } - } - - /* Reset curruns for large size classes. */ - if (config_stats) { - for (i = 0; i < nlclasses; i++) - arena->stats.lstats[i].curruns = 0; - } - - /* Huge allocations. */ - malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx); - for (node = ql_last(&arena->huge, ql_link); node != NULL; node = - ql_last(&arena->huge, ql_link)) { - void *ptr = extent_node_addr_get(node); - size_t usize; - - malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx); - if (config_stats || (config_prof && opt_prof)) - usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); - /* Remove huge allocation from prof sample set. */ - if (config_prof && opt_prof) - prof_free(tsd, ptr, usize); - huge_dalloc(tsd_tsdn(tsd), ptr); - malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx); - /* Cancel out unwanted effects on stats. */ - if (config_stats) - arena_huge_reset_stats_cancel(arena, usize); - } - malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx); - - malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock); - - /* Bins. */ - for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); - bin->runcur = NULL; - arena_run_heap_new(&bin->runs); - if (config_stats) { - bin->stats.curregs = 0; - bin->stats.curruns = 0; - } - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); - } - - /* - * Re-initialize runs_dirty such that the chunks_cache and runs_dirty - * chains directly correspond. - */ - qr_new(&arena->runs_dirty, rd_link); - for (node = qr_next(&arena->chunks_cache, cc_link); - node != &arena->chunks_cache; node = qr_next(node, cc_link)) { - qr_new(&node->rd, rd_link); - qr_meld(&arena->runs_dirty, &node->rd, rd_link); - } - - /* Arena chunks. */ - for (node = ql_last(&arena->achunks, ql_link); node != NULL; node = - ql_last(&arena->achunks, ql_link)) { - ql_remove(&arena->achunks, node, ql_link); - arena_chunk_discard(tsd_tsdn(tsd), arena, - extent_node_addr_get(node)); - } - - /* Spare. */ - if (arena->spare != NULL) { - arena_chunk_discard(tsd_tsdn(tsd), arena, arena->spare); - arena->spare = NULL; - } - - assert(!arena->purging); - arena->nactive = 0; - - for (i = 0; i < NPSIZES; i++) - arena_run_heap_new(&arena->runs_avail[i]); - - malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock); + malloc_mutex_lock(&arena->lock); + arena_purge(arena, true); + malloc_mutex_unlock(&arena->lock); } static void @@ -2116,9 +1660,21 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, return (size); } +static bool +arena_run_decommit(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run) +{ + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + size_t run_ind = arena_miscelm_to_pageind(miscelm); + size_t offset = run_ind << LG_PAGE; + size_t length = arena_run_size_get(arena, chunk, run, run_ind); + + return (arena->chunk_hooks.decommit(chunk, chunksize, offset, length, + arena->ind)); +} + static void -arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty, - bool cleaned, bool decommitted) +arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, + bool decommitted) { arena_chunk_t *chunk; arena_chunk_map_misc_t *miscelm; @@ -2131,7 +1687,8 @@ arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty, assert(run_ind < chunk_npages); size = arena_run_size_get(arena, chunk, run, run_ind); run_pages = (size >> LG_PAGE); - arena_nactive_sub(arena, run_pages); + arena_cactive_update(arena, 0, run_pages); + arena->nactive -= run_pages; /* * The run is dirty if the caller claims to have dirtied it, as well as @@ -2178,7 +1735,7 @@ arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty, if (size == arena_maxrun) { assert(run_ind == map_bias); assert(run_pages == (arena_maxrun >> LG_PAGE)); - arena_chunk_dalloc(tsdn, arena, chunk); + arena_chunk_dalloc(arena, chunk); } /* @@ -2189,12 +1746,21 @@ arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty, * chances of spuriously crossing the dirty page purging threshold. */ if (dirty) - arena_maybe_purge(tsdn, arena); + arena_maybe_purge(arena); } static void -arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, size_t oldsize, size_t newsize) +arena_run_dalloc_decommit(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run) +{ + bool committed = arena_run_decommit(arena, chunk, run); + + arena_run_dalloc(arena, run, committed, false, !committed); +} + +static void +arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + size_t oldsize, size_t newsize) { arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); size_t pageind = arena_miscelm_to_pageind(miscelm); @@ -2229,13 +1795,12 @@ arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind+head_npages))); - arena_run_dalloc(tsdn, arena, run, false, false, (flag_decommitted != - 0)); + arena_run_dalloc(arena, run, false, false, (flag_decommitted != 0)); } static void -arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, size_t oldsize, size_t newsize, bool dirty) +arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + size_t oldsize, size_t newsize, bool dirty) { arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); size_t pageind = arena_miscelm_to_pageind(miscelm); @@ -2272,10 +1837,20 @@ arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind+head_npages))); - tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages); + tail_miscelm = arena_miscelm_get(chunk, pageind + head_npages); tail_run = &tail_miscelm->run; - arena_run_dalloc(tsdn, arena, tail_run, dirty, false, (flag_decommitted - != 0)); + arena_run_dalloc(arena, tail_run, dirty, false, (flag_decommitted != + 0)); +} + +static arena_run_t * +arena_bin_runs_first(arena_bin_t *bin) +{ + arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs); + if (miscelm != NULL) + return (&miscelm->run); + + return (NULL); } static void @@ -2283,25 +1858,35 @@ arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) { arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); - arena_run_heap_insert(&bin->runs, miscelm); + assert(arena_run_tree_search(&bin->runs, miscelm) == NULL); + + arena_run_tree_insert(&bin->runs, miscelm); +} + +static void +arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) +{ + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + + assert(arena_run_tree_search(&bin->runs, miscelm) != NULL); + + arena_run_tree_remove(&bin->runs, miscelm); } static arena_run_t * arena_bin_nonfull_run_tryget(arena_bin_t *bin) { - arena_chunk_map_misc_t *miscelm; - - miscelm = arena_run_heap_remove_first(&bin->runs); - if (miscelm == NULL) - return (NULL); - if (config_stats) - bin->stats.reruns++; - - return (&miscelm->run); + arena_run_t *run = arena_bin_runs_first(bin); + if (run != NULL) { + arena_bin_runs_remove(bin, run); + if (config_stats) + bin->stats.reruns++; + } + return (run); } static arena_run_t * -arena_bin_nonfull_run_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin) +arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { arena_run_t *run; szind_t binind; @@ -2317,19 +1902,19 @@ arena_bin_nonfull_run_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin) bin_info = &arena_bin_info[binind]; /* Allocate a new run. */ - malloc_mutex_unlock(tsdn, &bin->lock); + malloc_mutex_unlock(&bin->lock); /******************************/ - malloc_mutex_lock(tsdn, &arena->lock); - run = arena_run_alloc_small(tsdn, arena, bin_info->run_size, binind); + malloc_mutex_lock(&arena->lock); + run = arena_run_alloc_small(arena, bin_info->run_size, binind); if (run != NULL) { /* Initialize run internals. */ run->binind = binind; run->nfree = bin_info->nregs; bitmap_init(run->bitmap, &bin_info->bitmap_info); } - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); /********************************/ - malloc_mutex_lock(tsdn, &bin->lock); + malloc_mutex_lock(&bin->lock); if (run != NULL) { if (config_stats) { bin->stats.nruns++; @@ -2352,7 +1937,7 @@ arena_bin_nonfull_run_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin) /* Re-fill bin->runcur, then call arena_run_reg_alloc(). */ static void * -arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin) +arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) { szind_t binind; arena_bin_info_t *bin_info; @@ -2361,7 +1946,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin) binind = arena_bin_index(arena, bin); bin_info = &arena_bin_info[binind]; bin->runcur = NULL; - run = arena_bin_nonfull_run_get(tsdn, arena, bin); + run = arena_bin_nonfull_run_get(arena, bin); if (bin->runcur != NULL && bin->runcur->nfree > 0) { /* * Another thread updated runcur while this one ran without the @@ -2382,11 +1967,10 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin) * were just deallocated from the run. */ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - if (run->nfree == bin_info->nregs) { - arena_dalloc_bin_run(tsdn, arena, chunk, run, - bin); - } else - arena_bin_lower_run(arena, run, bin); + if (run->nfree == bin_info->nregs) + arena_dalloc_bin_run(arena, chunk, run, bin); + else + arena_bin_lower_run(arena, chunk, run, bin); } return (ret); } @@ -2402,18 +1986,18 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin) } void -arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin, - szind_t binind, uint64_t prof_accumbytes) +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, + uint64_t prof_accumbytes) { unsigned i, nfill; arena_bin_t *bin; assert(tbin->ncached == 0); - if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) - prof_idump(tsdn); + if (config_prof && arena_prof_accum(arena, prof_accumbytes)) + prof_idump(); bin = &arena->bins[binind]; - malloc_mutex_lock(tsdn, &bin->lock); + malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> tbin->lg_fill_div); i < nfill; i++) { arena_run_t *run; @@ -2421,15 +2005,16 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin, if ((run = bin->runcur) != NULL && run->nfree > 0) ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); else - ptr = arena_bin_malloc_hard(tsdn, arena, bin); + ptr = arena_bin_malloc_hard(arena, bin); if (ptr == NULL) { /* * OOM. tbin->avail isn't yet filled down to its first * element, so the successful allocations (if any) must - * be moved just before tbin->avail before bailing out. + * be moved to the base of tbin->avail before bailing + * out. */ if (i > 0) { - memmove(tbin->avail - i, tbin->avail - nfill, + memmove(tbin->avail, &tbin->avail[nfill - i], i * sizeof(void *)); } break; @@ -2439,7 +2024,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin, true); } /* Insert such that low regions get used first. */ - *(tbin->avail - nfill + i) = ptr; + tbin->avail[nfill - 1 - i] = ptr; } if (config_stats) { bin->stats.nmalloc += i; @@ -2448,31 +2033,29 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin, bin->stats.nfills++; tbin->tstats.nrequests = 0; } - malloc_mutex_unlock(tsdn, &bin->lock); + malloc_mutex_unlock(&bin->lock); tbin->ncached = i; - arena_decay_tick(tsdn, arena); } void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero) { - size_t redzone_size = bin_info->redzone_size; - if (zero) { - memset((void *)((uintptr_t)ptr - redzone_size), - JEMALLOC_ALLOC_JUNK, redzone_size); - memset((void *)((uintptr_t)ptr + bin_info->reg_size), - JEMALLOC_ALLOC_JUNK, redzone_size); + size_t redzone_size = bin_info->redzone_size; + memset((void *)((uintptr_t)ptr - redzone_size), 0xa5, + redzone_size); + memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5, + redzone_size); } else { - memset((void *)((uintptr_t)ptr - redzone_size), - JEMALLOC_ALLOC_JUNK, bin_info->reg_interval); + memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5, + bin_info->reg_interval); } } #ifdef JEMALLOC_JET #undef arena_redzone_corruption -#define arena_redzone_corruption JEMALLOC_N(n_arena_redzone_corruption) +#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl) #endif static void arena_redzone_corruption(void *ptr, size_t usize, bool after, @@ -2487,7 +2070,7 @@ arena_redzone_corruption(void *ptr, size_t usize, bool after, #undef arena_redzone_corruption #define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption) arena_redzone_corruption_t *arena_redzone_corruption = - JEMALLOC_N(n_arena_redzone_corruption); + JEMALLOC_N(arena_redzone_corruption_impl); #endif static void @@ -2502,22 +2085,22 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) for (i = 1; i <= redzone_size; i++) { uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i); - if (*byte != JEMALLOC_ALLOC_JUNK) { + if (*byte != 0xa5) { error = true; arena_redzone_corruption(ptr, size, false, i, *byte); if (reset) - *byte = JEMALLOC_ALLOC_JUNK; + *byte = 0xa5; } } for (i = 0; i < redzone_size; i++) { uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i); - if (*byte != JEMALLOC_ALLOC_JUNK) { + if (*byte != 0xa5) { error = true; arena_redzone_corruption(ptr, size, true, i, *byte); if (reset) - *byte = JEMALLOC_ALLOC_JUNK; + *byte = 0xa5; } } } @@ -2528,7 +2111,7 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) #ifdef JEMALLOC_JET #undef arena_dalloc_junk_small -#define arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small) +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl) #endif void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) @@ -2536,14 +2119,14 @@ arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) size_t redzone_size = bin_info->redzone_size; arena_redzones_validate(ptr, bin_info, false); - memset((void *)((uintptr_t)ptr - redzone_size), JEMALLOC_FREE_JUNK, + memset((void *)((uintptr_t)ptr - redzone_size), 0x5a, bin_info->reg_interval); } #ifdef JEMALLOC_JET #undef arena_dalloc_junk_small #define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) arena_dalloc_junk_small_t *arena_dalloc_junk_small = - JEMALLOC_N(n_arena_dalloc_junk_small); + JEMALLOC_N(arena_dalloc_junk_small_impl); #endif void @@ -2561,26 +2144,27 @@ arena_quarantine_junk_small(void *ptr, size_t usize) arena_redzones_validate(ptr, bin_info, true); } -static void * -arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) +void * +arena_malloc_small(arena_t *arena, size_t size, bool zero) { void *ret; arena_bin_t *bin; - size_t usize; arena_run_t *run; + szind_t binind; + binind = size2index(size); assert(binind < NBINS); bin = &arena->bins[binind]; - usize = index2size(binind); + size = index2size(binind); - malloc_mutex_lock(tsdn, &bin->lock); + malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); else - ret = arena_bin_malloc_hard(tsdn, arena, bin); + ret = arena_bin_malloc_hard(arena, bin); if (ret == NULL) { - malloc_mutex_unlock(tsdn, &bin->lock); + malloc_mutex_unlock(&bin->lock); return (NULL); } @@ -2589,9 +2173,9 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) bin->stats.nrequests++; bin->stats.curregs++; } - malloc_mutex_unlock(tsdn, &bin->lock); - if (config_prof && !isthreaded && arena_prof_accum(tsdn, arena, usize)) - prof_idump(tsdn); + malloc_mutex_unlock(&bin->lock); + if (config_prof && !isthreaded && arena_prof_accum(arena, size)) + prof_idump(); if (!zero) { if (config_fill) { @@ -2599,35 +2183,34 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) arena_alloc_junk_small(ret, &arena_bin_info[binind], false); } else if (unlikely(opt_zero)) - memset(ret, 0, usize); + memset(ret, 0, size); } - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize); - memset(ret, 0, usize); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); } - arena_decay_tick(tsdn, arena); return (ret); } void * -arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) +arena_malloc_large(arena_t *arena, size_t size, bool zero) { void *ret; size_t usize; uintptr_t random_offset; arena_run_t *run; arena_chunk_map_misc_t *miscelm; - UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false); + UNUSED bool idump; /* Large allocation. */ - usize = index2size(binind); - malloc_mutex_lock(tsdn, &arena->lock); + usize = s2u(size); + malloc_mutex_lock(&arena->lock); if (config_cache_oblivious) { uint64_t r; @@ -2636,21 +2219,22 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64 * for 4 KiB pages and 64-byte cachelines. */ - r = prng_lg_range_zu(&arena->offset_state, LG_PAGE - - LG_CACHELINE, false); + prng64(r, LG_PAGE - LG_CACHELINE, arena->offset_state, + UINT64_C(6364136223846793009), + UINT64_C(1442695040888963409)); random_offset = ((uintptr_t)r) << LG_CACHELINE; } else random_offset = 0; - run = arena_run_alloc_large(tsdn, arena, usize + large_pad, zero); + run = arena_run_alloc_large(arena, usize + large_pad, zero); if (run == NULL) { - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); return (NULL); } miscelm = arena_run_to_miscelm(run); ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) + random_offset); if (config_stats) { - szind_t index = binind - NBINS; + szind_t index = size2index(usize) - NBINS; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; @@ -2661,45 +2245,25 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) } if (config_prof) idump = arena_prof_accum_locked(arena, usize); - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); if (config_prof && idump) - prof_idump(tsdn); + prof_idump(); if (!zero) { if (config_fill) { if (unlikely(opt_junk_alloc)) - memset(ret, JEMALLOC_ALLOC_JUNK, usize); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) memset(ret, 0, usize); } } - arena_decay_tick(tsdn, arena); return (ret); } -void * -arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, - bool zero) -{ - - assert(!tsdn_null(tsdn) || arena != NULL); - - if (likely(!tsdn_null(tsdn))) - arena = arena_choose(tsdn_tsd(tsdn), arena); - if (unlikely(arena == NULL)) - return (NULL); - - if (likely(size <= SMALL_MAXCLASS)) - return (arena_malloc_small(tsdn, arena, ind, zero)); - if (likely(size <= large_maxclass)) - return (arena_malloc_large(tsdn, arena, ind, zero)); - return (huge_malloc(tsdn, arena, index2size(ind), zero)); -} - /* Only handles large allocations that require more than page alignment. */ static void * -arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, +arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero) { void *ret; @@ -2709,21 +2273,19 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, arena_chunk_map_misc_t *miscelm; void *rpages; - assert(!tsdn_null(tsdn) || arena != NULL); assert(usize == PAGE_CEILING(usize)); - if (likely(!tsdn_null(tsdn))) - arena = arena_choose(tsdn_tsd(tsdn), arena); + arena = arena_choose(tsd, arena); if (unlikely(arena == NULL)) return (NULL); alignment = PAGE_CEILING(alignment); alloc_size = usize + large_pad + alignment - PAGE; - malloc_mutex_lock(tsdn, &arena->lock); - run = arena_run_alloc_large(tsdn, arena, alloc_size, false); + malloc_mutex_lock(&arena->lock); + run = arena_run_alloc_large(arena, alloc_size, false); if (run == NULL) { - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); return (NULL); } chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); @@ -2738,16 +2300,16 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, arena_chunk_map_misc_t *head_miscelm = miscelm; arena_run_t *head_run = run; - miscelm = arena_miscelm_get_mutable(chunk, + miscelm = arena_miscelm_get(chunk, arena_miscelm_to_pageind(head_miscelm) + (leadsize >> LG_PAGE)); run = &miscelm->run; - arena_run_trim_head(tsdn, arena, chunk, head_run, alloc_size, + arena_run_trim_head(arena, chunk, head_run, alloc_size, alloc_size - leadsize); } if (trailsize != 0) { - arena_run_trim_tail(tsdn, arena, chunk, run, usize + large_pad + + arena_run_trim_tail(arena, chunk, run, usize + large_pad + trailsize, usize + large_pad, false); } if (arena_run_init_large(arena, run, usize + large_pad, zero)) { @@ -2758,8 +2320,8 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, run_ind) != 0); assert(decommitted); /* Cause of OOM. */ - arena_run_dalloc(tsdn, arena, run, dirty, false, decommitted); - malloc_mutex_unlock(tsdn, &arena->lock); + arena_run_dalloc(arena, run, dirty, false, decommitted); + malloc_mutex_unlock(&arena->lock); return (NULL); } ret = arena_miscelm_to_rpages(miscelm); @@ -2774,20 +2336,19 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, arena->stats.lstats[index].nrequests++; arena->stats.lstats[index].curruns++; } - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); if (config_fill && !zero) { if (unlikely(opt_junk_alloc)) - memset(ret, JEMALLOC_ALLOC_JUNK, usize); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) memset(ret, 0, usize); } - arena_decay_tick(tsdn, arena); return (ret); } void * -arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, +arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache) { void *ret; @@ -2795,8 +2356,7 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE && (usize & PAGE_MASK) == 0))) { /* Small; alignment doesn't require special run placement. */ - ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero, - tcache, true); + ret = arena_malloc(tsd, arena, usize, zero, tcache); } else if (usize <= large_maxclass && alignment <= PAGE) { /* * Large; alignment doesn't require special run placement. @@ -2804,25 +2364,25 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, * the base of the run, so do some bit manipulation to retrieve * the base. */ - ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero, - tcache, true); + ret = arena_malloc(tsd, arena, usize, zero, tcache); if (config_cache_oblivious) ret = (void *)((uintptr_t)ret & ~PAGE_MASK); } else { if (likely(usize <= large_maxclass)) { - ret = arena_palloc_large(tsdn, arena, usize, alignment, + ret = arena_palloc_large(tsd, arena, usize, alignment, zero); } else if (likely(alignment <= chunksize)) - ret = huge_malloc(tsdn, arena, usize, zero); + ret = huge_malloc(tsd, arena, usize, zero, tcache); else { - ret = huge_palloc(tsdn, arena, usize, alignment, zero); + ret = huge_palloc(tsd, arena, usize, alignment, zero, + tcache); } } return (ret); } void -arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size) +arena_prof_promoted(const void *ptr, size_t size) { arena_chunk_t *chunk; size_t pageind; @@ -2831,8 +2391,8 @@ arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size) cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS); - assert(isalloc(tsdn, ptr, true) == LARGE_MINCLASS); + assert(isalloc(ptr, false) == LARGE_MINCLASS); + assert(isalloc(ptr, true) == LARGE_MINCLASS); assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); @@ -2841,8 +2401,8 @@ arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size) assert(binind < NBINS); arena_mapbits_large_binind_set(chunk, pageind, binind); - assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS); - assert(isalloc(tsdn, ptr, true) == size); + assert(isalloc(ptr, false) == LARGE_MINCLASS); + assert(isalloc(ptr, true) == size); } static void @@ -2858,51 +2418,48 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, &chunk->node), bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; - /* - * The following block's conditional is necessary because if the - * run only contains one region, then it never gets inserted - * into the non-full runs tree. - */ if (bin_info->nregs != 1) { - arena_chunk_map_misc_t *miscelm = - arena_run_to_miscelm(run); - - arena_run_heap_remove(&bin->runs, miscelm); + /* + * This block's conditional is necessary because if the + * run only contains one region, then it never gets + * inserted into the non-full runs tree. + */ + arena_bin_runs_remove(bin, run); } } } static void -arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, arena_bin_t *bin) +arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) { assert(run != bin->runcur); + assert(arena_run_tree_search(&bin->runs, arena_run_to_miscelm(run)) == + NULL); - malloc_mutex_unlock(tsdn, &bin->lock); + malloc_mutex_unlock(&bin->lock); /******************************/ - malloc_mutex_lock(tsdn, &arena->lock); - arena_run_dalloc(tsdn, arena, run, true, false, false); - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); + arena_run_dalloc_decommit(arena, chunk, run); + malloc_mutex_unlock(&arena->lock); /****************************/ - malloc_mutex_lock(tsdn, &bin->lock); + malloc_mutex_lock(&bin->lock); if (config_stats) bin->stats.curruns--; } static void -arena_bin_lower_run(arena_t *arena, arena_run_t *run, arena_bin_t *bin) +arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) { /* - * Make sure that if bin->runcur is non-NULL, it refers to the - * oldest/lowest non-full run. It is okay to NULL runcur out rather - * than proactively keeping it pointing at the oldest/lowest non-full - * run. + * Make sure that if bin->runcur is non-NULL, it refers to the lowest + * non-full run. It is okay to NULL runcur out rather than proactively + * keeping it pointing at the lowest non-full run. */ - if (bin->runcur != NULL && - arena_snad_comp(arena_run_to_miscelm(bin->runcur), - arena_run_to_miscelm(run)) > 0) { + if ((uintptr_t)run < (uintptr_t)bin->runcur) { /* Switch runcur. */ if (bin->runcur->nfree > 0) arena_bin_runs_insert(bin, bin->runcur); @@ -2914,8 +2471,8 @@ arena_bin_lower_run(arena_t *arena, arena_run_t *run, arena_bin_t *bin) } static void -arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - void *ptr, arena_chunk_map_bits_t *bitselm, bool junked) +arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_bits_t *bitselm, bool junked) { size_t pageind, rpages_ind; arena_run_t *run; @@ -2925,7 +2482,7 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); - run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run; + run = &arena_miscelm_get(chunk, rpages_ind)->run; binind = run->binind; bin = &arena->bins[binind]; bin_info = &arena_bin_info[binind]; @@ -2936,9 +2493,9 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, arena_run_reg_dalloc(run, ptr); if (run->nfree == bin_info->nregs) { arena_dissociate_bin_run(chunk, run, bin); - arena_dalloc_bin_run(tsdn, arena, chunk, run, bin); + arena_dalloc_bin_run(arena, chunk, run, bin); } else if (run->nfree == 1 && run != bin->runcur) - arena_bin_lower_run(arena, run, bin); + arena_bin_lower_run(arena, chunk, run, bin); if (config_stats) { bin->stats.ndalloc++; @@ -2947,15 +2504,15 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, } void -arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, - arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm) +arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_bits_t *bitselm) { - arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, true); + arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, true); } void -arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr, +arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm) { arena_run_t *run; @@ -2963,16 +2520,16 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t rpages_ind; rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); - run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run; + run = &arena_miscelm_get(chunk, rpages_ind)->run; bin = &arena->bins[run->binind]; - malloc_mutex_lock(tsdn, &bin->lock); - arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, false); - malloc_mutex_unlock(tsdn, &bin->lock); + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false); + malloc_mutex_unlock(&bin->lock); } void -arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t pageind) +arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind) { arena_chunk_map_bits_t *bitselm; @@ -2981,36 +2538,34 @@ arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) != BININD_INVALID); } - bitselm = arena_bitselm_get_mutable(chunk, pageind); - arena_dalloc_bin(tsdn, arena, chunk, ptr, pageind, bitselm); - arena_decay_tick(tsdn, arena); + bitselm = arena_bitselm_get(chunk, pageind); + arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm); } #ifdef JEMALLOC_JET #undef arena_dalloc_junk_large -#define arena_dalloc_junk_large JEMALLOC_N(n_arena_dalloc_junk_large) +#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl) #endif void arena_dalloc_junk_large(void *ptr, size_t usize) { if (config_fill && unlikely(opt_junk_free)) - memset(ptr, JEMALLOC_FREE_JUNK, usize); + memset(ptr, 0x5a, usize); } #ifdef JEMALLOC_JET #undef arena_dalloc_junk_large #define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large) arena_dalloc_junk_large_t *arena_dalloc_junk_large = - JEMALLOC_N(n_arena_dalloc_junk_large); + JEMALLOC_N(arena_dalloc_junk_large_impl); #endif static void -arena_dalloc_large_locked_impl(tsdn_t *tsdn, arena_t *arena, - arena_chunk_t *chunk, void *ptr, bool junked) +arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk, + void *ptr, bool junked) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk, - pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); arena_run_t *run = &miscelm->run; if (config_fill || config_stats) { @@ -3029,35 +2584,32 @@ arena_dalloc_large_locked_impl(tsdn_t *tsdn, arena_t *arena, } } - arena_run_dalloc(tsdn, arena, run, true, false, false); + arena_run_dalloc_decommit(arena, chunk, run); } void -arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena, - arena_chunk_t *chunk, void *ptr) -{ - - arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, true); -} - -void -arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, +arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) { - malloc_mutex_lock(tsdn, &arena->lock); - arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, false); - malloc_mutex_unlock(tsdn, &arena->lock); - arena_decay_tick(tsdn, arena); + arena_dalloc_large_locked_impl(arena, chunk, ptr, true); +} + +void +arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + + malloc_mutex_lock(&arena->lock); + arena_dalloc_large_locked_impl(arena, chunk, ptr, false); + malloc_mutex_unlock(&arena->lock); } static void -arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t oldsize, size_t size) +arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t oldsize, size_t size) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk, - pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); arena_run_t *run = &miscelm->run; assert(size < oldsize); @@ -3066,8 +2618,8 @@ arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, * Shrink the run, and make trailing pages available for other * allocations. */ - malloc_mutex_lock(tsdn, &arena->lock); - arena_run_trim_tail(tsdn, arena, chunk, run, oldsize + large_pad, size + + malloc_mutex_lock(&arena->lock); + arena_run_trim_tail(arena, chunk, run, oldsize + large_pad, size + large_pad, true); if (config_stats) { szind_t oldindex = size2index(oldsize) - NBINS; @@ -3085,12 +2637,12 @@ arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, arena->stats.lstats[index].nrequests++; arena->stats.lstats[index].curruns++; } - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); } static bool -arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero) +arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t oldsize, size_t usize_min, size_t usize_max, bool zero) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t npages = (oldsize + large_pad) >> LG_PAGE; @@ -3100,7 +2652,7 @@ arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, large_pad); /* Try to extend the run. */ - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk, pageind+npages) != 0) goto label_fail; @@ -3123,7 +2675,7 @@ arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, if (splitsize == 0) goto label_fail; - run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run; + run = &arena_miscelm_get(chunk, pageind+npages)->run; if (arena_run_split_large(arena, run, splitsize, zero)) goto label_fail; @@ -3131,16 +2683,10 @@ arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, /* * Zero the trailing bytes of the original allocation's * last page, since they are in an indeterminate state. - * There will always be trailing bytes, because ptr's - * offset from the beginning of the run is a multiple of - * CACHELINE in [0 .. PAGE). */ - void *zbase = (void *)((uintptr_t)ptr + oldsize); - void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase + - PAGE)); - size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase; - assert(nzero > 0); - memset(zbase, 0, nzero); + assert(PAGE_CEILING(oldsize) == oldsize); + memset((void *)((uintptr_t)ptr + oldsize), 0, + PAGE_CEILING((uintptr_t)ptr) - (uintptr_t)ptr); } size = oldsize + splitsize; @@ -3180,24 +2726,24 @@ arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, arena->stats.lstats[index].nrequests++; arena->stats.lstats[index].curruns++; } - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); return (false); } label_fail: - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); return (true); } #ifdef JEMALLOC_JET #undef arena_ralloc_junk_large -#define arena_ralloc_junk_large JEMALLOC_N(n_arena_ralloc_junk_large) +#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl) #endif static void arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize) { if (config_fill && unlikely(opt_junk_free)) { - memset((void *)((uintptr_t)ptr + usize), JEMALLOC_FREE_JUNK, + memset((void *)((uintptr_t)ptr + usize), 0x5a, old_usize - usize); } } @@ -3205,7 +2751,7 @@ arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize) #undef arena_ralloc_junk_large #define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large) arena_ralloc_junk_large_t *arena_ralloc_junk_large = - JEMALLOC_N(n_arena_ralloc_junk_large); + JEMALLOC_N(arena_ralloc_junk_large_impl); #endif /* @@ -3213,7 +2759,7 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large = * always fail if growing an object, and the following run is already in use. */ static bool -arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min, +arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero) { arena_chunk_t *chunk; @@ -3228,16 +2774,15 @@ arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min, arena = extent_node_arena_get(&chunk->node); if (oldsize < usize_max) { - bool ret = arena_ralloc_large_grow(tsdn, arena, chunk, ptr, - oldsize, usize_min, usize_max, zero); + bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize, + usize_min, usize_max, zero); if (config_fill && !ret && !zero) { if (unlikely(opt_junk_alloc)) { - memset((void *)((uintptr_t)ptr + oldsize), - JEMALLOC_ALLOC_JUNK, - isalloc(tsdn, ptr, config_prof) - oldsize); + memset((void *)((uintptr_t)ptr + oldsize), 0xa5, + isalloc(ptr, config_prof) - oldsize); } else if (unlikely(opt_zero)) { memset((void *)((uintptr_t)ptr + oldsize), 0, - isalloc(tsdn, ptr, config_prof) - oldsize); + isalloc(ptr, config_prof) - oldsize); } } return (ret); @@ -3246,27 +2791,19 @@ arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min, assert(oldsize > usize_max); /* Fill before shrinking in order avoid a race. */ arena_ralloc_junk_large(ptr, oldsize, usize_max); - arena_ralloc_large_shrink(tsdn, arena, chunk, ptr, oldsize, usize_max); + arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max); return (false); } bool -arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero) +arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) { size_t usize_min, usize_max; - /* Calls with non-zero extra had to clamp extra. */ - assert(extra == 0 || size + extra <= HUGE_MAXCLASS); - - if (unlikely(size > HUGE_MAXCLASS)) - return (true); - usize_min = s2u(size); usize_max = s2u(size + extra); if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) { - arena_chunk_t *chunk; - /* * Avoid moving the allocation if the size class can be left the * same. @@ -3274,39 +2811,37 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, if (oldsize <= SMALL_MAXCLASS) { assert(arena_bin_info[size2index(oldsize)].reg_size == oldsize); - if ((usize_max > SMALL_MAXCLASS || - size2index(usize_max) != size2index(oldsize)) && - (size > oldsize || usize_max < oldsize)) - return (true); + if ((usize_max <= SMALL_MAXCLASS && + size2index(usize_max) == size2index(oldsize)) || + (size <= oldsize && usize_max >= oldsize)) + return (false); } else { - if (usize_max <= SMALL_MAXCLASS) - return (true); - if (arena_ralloc_large(tsdn, ptr, oldsize, usize_min, - usize_max, zero)) - return (true); + if (usize_max > SMALL_MAXCLASS) { + if (!arena_ralloc_large(ptr, oldsize, usize_min, + usize_max, zero)) + return (false); + } } - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena_decay_tick(tsdn, extent_node_arena_get(&chunk->node)); - return (false); + /* Reallocation would require a move. */ + return (true); } else { - return (huge_ralloc_no_move(tsdn, ptr, oldsize, usize_min, - usize_max, zero)); + return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max, + zero)); } } static void * -arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, +arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache) { if (alignment == 0) - return (arena_malloc(tsdn, arena, usize, size2index(usize), - zero, tcache, true)); + return (arena_malloc(tsd, arena, usize, zero, tcache)); usize = sa2u(usize, alignment); - if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) + if (usize == 0) return (NULL); - return (ipalloct(tsdn, usize, alignment, zero, tcache, arena)); + return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); } void * @@ -3317,15 +2852,14 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize; usize = s2u(size); - if (unlikely(usize == 0 || size > HUGE_MAXCLASS)) + if (usize == 0) return (NULL); if (likely(usize <= large_maxclass)) { size_t copysize; /* Try to avoid moving the allocation. */ - if (!arena_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, 0, - zero)) + if (!arena_ralloc_no_move(ptr, oldsize, usize, 0, zero)) return (ptr); /* @@ -3333,8 +2867,8 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, * the object. In that case, fall back to allocating new space * and copying. */ - ret = arena_ralloc_move_helper(tsd_tsdn(tsd), arena, usize, - alignment, zero, tcache); + ret = arena_ralloc_move_helper(tsd, arena, usize, alignment, + zero, tcache); if (ret == NULL) return (NULL); @@ -3346,7 +2880,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, copysize = (usize < oldsize) ? usize : oldsize; JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache, true); + isqalloc(tsd, ptr, oldsize, tcache); } else { ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment, zero, tcache); @@ -3355,25 +2889,25 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, } dss_prec_t -arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena) +arena_dss_prec_get(arena_t *arena) { dss_prec_t ret; - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); ret = arena->dss_prec; - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); return (ret); } bool -arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec) +arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) { if (!have_dss) return (dss_prec != dss_prec_disabled); - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); arena->dss_prec = dss_prec; - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); return (false); } @@ -3388,76 +2922,27 @@ bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult) { - if (opt_purge != purge_mode_ratio) - return (true); if (!arena_lg_dirty_mult_valid(lg_dirty_mult)) return (true); atomic_write_z((size_t *)&lg_dirty_mult_default, (size_t)lg_dirty_mult); return (false); } -ssize_t -arena_decay_time_default_get(void) -{ - - return ((ssize_t)atomic_read_z((size_t *)&decay_time_default)); -} - -bool -arena_decay_time_default_set(ssize_t decay_time) -{ - - if (opt_purge != purge_mode_decay) - return (true); - if (!arena_decay_time_valid(decay_time)) - return (true); - atomic_write_z((size_t *)&decay_time_default, (size_t)decay_time); - return (false); -} - -static void -arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads, - const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, - size_t *nactive, size_t *ndirty) -{ - - *nthreads += arena_nthreads_get(arena, false); - *dss = dss_prec_names[arena->dss_prec]; - *lg_dirty_mult = arena->lg_dirty_mult; - *decay_time = arena->decay.time; - *nactive += arena->nactive; - *ndirty += arena->ndirty; -} - void -arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, - const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, - size_t *nactive, size_t *ndirty) -{ - - malloc_mutex_lock(tsdn, &arena->lock); - arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult, - decay_time, nactive, ndirty); - malloc_mutex_unlock(tsdn, &arena->lock); -} - -void -arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, - const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, +arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats) { unsigned i; - cassert(config_stats); - - malloc_mutex_lock(tsdn, &arena->lock); - arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult, - decay_time, nactive, ndirty); + malloc_mutex_lock(&arena->lock); + *dss = dss_prec_names[arena->dss_prec]; + *lg_dirty_mult = arena->lg_dirty_mult; + *nactive += arena->nactive; + *ndirty += arena->ndirty; astats->mapped += arena->stats.mapped; - astats->retained += arena->stats.retained; astats->npurge += arena->stats.npurge; astats->nmadvise += arena->stats.nmadvise; astats->purged += arena->stats.purged; @@ -3483,12 +2968,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, hstats[i].ndalloc += arena->stats.hstats[i].ndalloc; hstats[i].curhchunks += arena->stats.hstats[i].curhchunks; } - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; - malloc_mutex_lock(tsdn, &bin->lock); + malloc_mutex_lock(&bin->lock); bstats[i].nmalloc += bin->stats.nmalloc; bstats[i].ndalloc += bin->stats.ndalloc; bstats[i].nrequests += bin->stats.nrequests; @@ -3500,61 +2985,33 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, bstats[i].nruns += bin->stats.nruns; bstats[i].reruns += bin->stats.reruns; bstats[i].curruns += bin->stats.curruns; - malloc_mutex_unlock(tsdn, &bin->lock); + malloc_mutex_unlock(&bin->lock); } } -unsigned -arena_nthreads_get(arena_t *arena, bool internal) -{ - - return (atomic_read_u(&arena->nthreads[internal])); -} - -void -arena_nthreads_inc(arena_t *arena, bool internal) -{ - - atomic_add_u(&arena->nthreads[internal], 1); -} - -void -arena_nthreads_dec(arena_t *arena, bool internal) -{ - - atomic_sub_u(&arena->nthreads[internal], 1); -} - -size_t -arena_extent_sn_next(arena_t *arena) -{ - - return (atomic_add_z(&arena->extent_sn_next, 1) - 1); -} - arena_t * -arena_new(tsdn_t *tsdn, unsigned ind) +arena_new(unsigned ind) { arena_t *arena; unsigned i; + arena_bin_t *bin; /* * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly * because there is no way to clean up if base_alloc() OOMs. */ if (config_stats) { - arena = (arena_t *)base_alloc(tsdn, - CACHELINE_CEILING(sizeof(arena_t)) + - QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t))) - + (nhclasses * sizeof(malloc_huge_stats_t))); + arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t)) + + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) + + nhclasses) * sizeof(malloc_huge_stats_t)); } else - arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t)); + arena = (arena_t *)base_alloc(sizeof(arena_t)); if (arena == NULL) return (NULL); arena->ind = ind; - arena->nthreads[0] = arena->nthreads[1] = 0; - if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA)) + arena->nthreads = 0; + if (malloc_mutex_init(&arena->lock)) return (NULL); if (config_stats) { @@ -3584,15 +3041,11 @@ arena_new(tsdn_t *tsdn, unsigned ind) * deterministic seed. */ arena->offset_state = config_debug ? ind : - (size_t)(uintptr_t)arena; + (uint64_t)(uintptr_t)arena; } arena->dss_prec = chunk_dss_prec_get(); - ql_new(&arena->achunks); - - arena->extent_sn_next = 0; - arena->spare = NULL; arena->lg_dirty_mult = arena_lg_dirty_mult_default_get(); @@ -3600,42 +3053,33 @@ arena_new(tsdn_t *tsdn, unsigned ind) arena->nactive = 0; arena->ndirty = 0; - for (i = 0; i < NPSIZES; i++) - arena_run_heap_new(&arena->runs_avail[i]); - + arena_avail_tree_new(&arena->runs_avail); qr_new(&arena->runs_dirty, rd_link); qr_new(&arena->chunks_cache, cc_link); - if (opt_purge == purge_mode_decay) - arena_decay_init(arena, arena_decay_time_default_get()); - ql_new(&arena->huge); - if (malloc_mutex_init(&arena->huge_mtx, "arena_huge", - WITNESS_RANK_ARENA_HUGE)) + if (malloc_mutex_init(&arena->huge_mtx)) return (NULL); - extent_tree_szsnad_new(&arena->chunks_szsnad_cached); + extent_tree_szad_new(&arena->chunks_szad_cached); extent_tree_ad_new(&arena->chunks_ad_cached); - extent_tree_szsnad_new(&arena->chunks_szsnad_retained); + extent_tree_szad_new(&arena->chunks_szad_retained); extent_tree_ad_new(&arena->chunks_ad_retained); - if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks", - WITNESS_RANK_ARENA_CHUNKS)) + if (malloc_mutex_init(&arena->chunks_mtx)) return (NULL); ql_new(&arena->node_cache); - if (malloc_mutex_init(&arena->node_cache_mtx, "arena_node_cache", - WITNESS_RANK_ARENA_NODE_CACHE)) + if (malloc_mutex_init(&arena->node_cache_mtx)) return (NULL); arena->chunk_hooks = chunk_hooks_default; /* Initialize bins. */ for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock, "arena_bin", - WITNESS_RANK_ARENA_BIN)) + bin = &arena->bins[i]; + if (malloc_mutex_init(&bin->lock)) return (NULL); bin->runcur = NULL; - arena_run_heap_new(&bin->runs); + arena_run_tree_new(&bin->runs); if (config_stats) memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); } @@ -3667,7 +3111,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) * be twice as large in order to maintain alignment. */ if (config_fill && unlikely(opt_redzone)) { - size_t align_min = ZU(1) << (ffs_zu(bin_info->reg_size) - 1); + size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) - + 1); if (align_min <= REDZONE_MINSIZE) { bin_info->redzone_size = REDZONE_MINSIZE; pad_size = 0; @@ -3687,19 +3132,18 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) * size). */ try_run_size = PAGE; - try_nregs = (uint32_t)(try_run_size / bin_info->reg_size); + try_nregs = try_run_size / bin_info->reg_size; do { perfect_run_size = try_run_size; perfect_nregs = try_nregs; try_run_size += PAGE; - try_nregs = (uint32_t)(try_run_size / bin_info->reg_size); + try_nregs = try_run_size / bin_info->reg_size; } while (perfect_run_size != perfect_nregs * bin_info->reg_size); assert(perfect_nregs <= RUN_MAXREGS); actual_run_size = perfect_run_size; - actual_nregs = (uint32_t)((actual_run_size - pad_size) / - bin_info->reg_interval); + actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval; /* * Redzones can require enough padding that not even a single region can @@ -3711,8 +3155,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) assert(config_fill && unlikely(opt_redzone)); actual_run_size += PAGE; - actual_nregs = (uint32_t)((actual_run_size - pad_size) / - bin_info->reg_interval); + actual_nregs = (actual_run_size - pad_size) / + bin_info->reg_interval; } /* @@ -3720,8 +3164,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) */ while (actual_run_size > arena_maxrun) { actual_run_size -= PAGE; - actual_nregs = (uint32_t)((actual_run_size - pad_size) / - bin_info->reg_interval); + actual_nregs = (actual_run_size - pad_size) / + bin_info->reg_interval; } assert(actual_nregs > 0); assert(actual_run_size == s2u(actual_run_size)); @@ -3729,8 +3173,11 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) /* Copy final settings. */ bin_info->run_size = actual_run_size; bin_info->nregs = actual_nregs; - bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs * - bin_info->reg_interval) - pad_size + bin_info->redzone_size); + bin_info->reg0_offset = actual_run_size - (actual_nregs * + bin_info->reg_interval) - pad_size + bin_info->redzone_size; + + if (actual_run_size > small_maxrun) + small_maxrun = actual_run_size; assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs * bin_info->reg_interval) + pad_size == bin_info->run_size); @@ -3747,7 +3194,7 @@ bin_info_init(void) bin_info_run_size_calc(bin_info); \ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); #define BIN_INFO_INIT_bin_no(index, size) -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup) \ +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ BIN_INFO_INIT_bin_##bin(index, (ZU(1)<> + LG_PAGE)); + if (small_run_tab == NULL) + return (true); + +#define TAB_INIT_bin_yes(index, size) { \ + arena_bin_info_t *bin_info = &arena_bin_info[index]; \ + small_run_tab[bin_info->run_size >> LG_PAGE] = true; \ + } +#define TAB_INIT_bin_no(index, size) +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + TAB_INIT_bin_##bin(index, (ZU(1)<lock); + malloc_mutex_prefork(&arena->lock); + malloc_mutex_prefork(&arena->huge_mtx); + malloc_mutex_prefork(&arena->chunks_mtx); + malloc_mutex_prefork(&arena->node_cache_mtx); + for (i = 0; i < NBINS; i++) + malloc_mutex_prefork(&arena->bins[i].lock); } void -arena_prefork1(tsdn_t *tsdn, arena_t *arena) -{ - - malloc_mutex_prefork(tsdn, &arena->chunks_mtx); -} - -void -arena_prefork2(tsdn_t *tsdn, arena_t *arena) -{ - - malloc_mutex_prefork(tsdn, &arena->node_cache_mtx); -} - -void -arena_prefork3(tsdn_t *tsdn, arena_t *arena) +arena_postfork_parent(arena_t *arena) { unsigned i; for (i = 0; i < NBINS; i++) - malloc_mutex_prefork(tsdn, &arena->bins[i].lock); - malloc_mutex_prefork(tsdn, &arena->huge_mtx); + malloc_mutex_postfork_parent(&arena->bins[i].lock); + malloc_mutex_postfork_parent(&arena->node_cache_mtx); + malloc_mutex_postfork_parent(&arena->chunks_mtx); + malloc_mutex_postfork_parent(&arena->huge_mtx); + malloc_mutex_postfork_parent(&arena->lock); } void -arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) +arena_postfork_child(arena_t *arena) { unsigned i; - malloc_mutex_postfork_parent(tsdn, &arena->huge_mtx); for (i = 0; i < NBINS; i++) - malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock); - malloc_mutex_postfork_parent(tsdn, &arena->node_cache_mtx); - malloc_mutex_postfork_parent(tsdn, &arena->chunks_mtx); - malloc_mutex_postfork_parent(tsdn, &arena->lock); -} - -void -arena_postfork_child(tsdn_t *tsdn, arena_t *arena) -{ - unsigned i; - - malloc_mutex_postfork_child(tsdn, &arena->huge_mtx); - for (i = 0; i < NBINS; i++) - malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock); - malloc_mutex_postfork_child(tsdn, &arena->node_cache_mtx); - malloc_mutex_postfork_child(tsdn, &arena->chunks_mtx); - malloc_mutex_postfork_child(tsdn, &arena->lock); + malloc_mutex_postfork_child(&arena->bins[i].lock); + malloc_mutex_postfork_child(&arena->node_cache_mtx); + malloc_mutex_postfork_child(&arena->chunks_mtx); + malloc_mutex_postfork_child(&arena->huge_mtx); + malloc_mutex_postfork_child(&arena->lock); } diff --git a/deps/jemalloc/src/base.c b/deps/jemalloc/src/base.c index 5681a3f36..7cdcfed86 100644 --- a/deps/jemalloc/src/base.c +++ b/deps/jemalloc/src/base.c @@ -5,8 +5,7 @@ /* Data. */ static malloc_mutex_t base_mtx; -static size_t base_extent_sn_next; -static extent_tree_t base_avail_szsnad; +static extent_tree_t base_avail_szad; static extent_node_t *base_nodes; static size_t base_allocated; static size_t base_resident; @@ -14,13 +13,12 @@ static size_t base_mapped; /******************************************************************************/ +/* base_mtx must be held. */ static extent_node_t * -base_node_try_alloc(tsdn_t *tsdn) +base_node_try_alloc(void) { extent_node_t *node; - malloc_mutex_assert_owner(tsdn, &base_mtx); - if (base_nodes == NULL) return (NULL); node = base_nodes; @@ -29,42 +27,33 @@ base_node_try_alloc(tsdn_t *tsdn) return (node); } +/* base_mtx must be held. */ static void -base_node_dalloc(tsdn_t *tsdn, extent_node_t *node) +base_node_dalloc(extent_node_t *node) { - malloc_mutex_assert_owner(tsdn, &base_mtx); - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); *(extent_node_t **)node = base_nodes; base_nodes = node; } -static void -base_extent_node_init(extent_node_t *node, void *addr, size_t size) -{ - size_t sn = atomic_add_z(&base_extent_sn_next, 1) - 1; - - extent_node_init(node, NULL, addr, size, sn, true, true); -} - +/* base_mtx must be held. */ static extent_node_t * -base_chunk_alloc(tsdn_t *tsdn, size_t minsize) +base_chunk_alloc(size_t minsize) { extent_node_t *node; size_t csize, nsize; void *addr; - malloc_mutex_assert_owner(tsdn, &base_mtx); assert(minsize != 0); - node = base_node_try_alloc(tsdn); + node = base_node_try_alloc(); /* Allocate enough space to also carve a node out if necessary. */ nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0; csize = CHUNK_CEILING(minsize + nsize); addr = chunk_alloc_base(csize); if (addr == NULL) { if (node != NULL) - base_node_dalloc(tsdn, node); + base_node_dalloc(node); return (NULL); } base_mapped += csize; @@ -77,7 +66,7 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize) base_resident += PAGE_CEILING(nsize); } } - base_extent_node_init(node, addr, csize); + extent_node_init(node, NULL, addr, csize, true, true); return (node); } @@ -87,7 +76,7 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize) * physical memory usage. */ void * -base_alloc(tsdn_t *tsdn, size_t size) +base_alloc(size_t size) { void *ret; size_t csize, usize; @@ -101,15 +90,15 @@ base_alloc(tsdn_t *tsdn, size_t size) csize = CACHELINE_CEILING(size); usize = s2u(csize); - extent_node_init(&key, NULL, NULL, usize, 0, false, false); - malloc_mutex_lock(tsdn, &base_mtx); - node = extent_tree_szsnad_nsearch(&base_avail_szsnad, &key); + extent_node_init(&key, NULL, NULL, usize, false, false); + malloc_mutex_lock(&base_mtx); + node = extent_tree_szad_nsearch(&base_avail_szad, &key); if (node != NULL) { /* Use existing space. */ - extent_tree_szsnad_remove(&base_avail_szsnad, node); + extent_tree_szad_remove(&base_avail_szad, node); } else { /* Try to allocate more space. */ - node = base_chunk_alloc(tsdn, csize); + node = base_chunk_alloc(csize); } if (node == NULL) { ret = NULL; @@ -120,9 +109,9 @@ base_alloc(tsdn_t *tsdn, size_t size) if (extent_node_size_get(node) > csize) { extent_node_addr_set(node, (void *)((uintptr_t)ret + csize)); extent_node_size_set(node, extent_node_size_get(node) - csize); - extent_tree_szsnad_insert(&base_avail_szsnad, node); + extent_tree_szad_insert(&base_avail_szad, node); } else - base_node_dalloc(tsdn, node); + base_node_dalloc(node); if (config_stats) { base_allocated += csize; /* @@ -134,54 +123,52 @@ base_alloc(tsdn_t *tsdn, size_t size) } JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize); label_return: - malloc_mutex_unlock(tsdn, &base_mtx); + malloc_mutex_unlock(&base_mtx); return (ret); } void -base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident, - size_t *mapped) +base_stats_get(size_t *allocated, size_t *resident, size_t *mapped) { - malloc_mutex_lock(tsdn, &base_mtx); + malloc_mutex_lock(&base_mtx); assert(base_allocated <= base_resident); assert(base_resident <= base_mapped); *allocated = base_allocated; *resident = base_resident; *mapped = base_mapped; - malloc_mutex_unlock(tsdn, &base_mtx); + malloc_mutex_unlock(&base_mtx); } bool base_boot(void) { - if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE)) + if (malloc_mutex_init(&base_mtx)) return (true); - base_extent_sn_next = 0; - extent_tree_szsnad_new(&base_avail_szsnad); + extent_tree_szad_new(&base_avail_szad); base_nodes = NULL; return (false); } void -base_prefork(tsdn_t *tsdn) +base_prefork(void) { - malloc_mutex_prefork(tsdn, &base_mtx); + malloc_mutex_prefork(&base_mtx); } void -base_postfork_parent(tsdn_t *tsdn) +base_postfork_parent(void) { - malloc_mutex_postfork_parent(tsdn, &base_mtx); + malloc_mutex_postfork_parent(&base_mtx); } void -base_postfork_child(tsdn_t *tsdn) +base_postfork_child(void) { - malloc_mutex_postfork_child(tsdn, &base_mtx); + malloc_mutex_postfork_child(&base_mtx); } diff --git a/deps/jemalloc/src/bitmap.c b/deps/jemalloc/src/bitmap.c index ac0f3b381..c733372b4 100644 --- a/deps/jemalloc/src/bitmap.c +++ b/deps/jemalloc/src/bitmap.c @@ -3,8 +3,6 @@ /******************************************************************************/ -#ifdef USE_TREE - void bitmap_info_init(bitmap_info_t *binfo, size_t nbits) { @@ -34,11 +32,20 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) binfo->nbits = nbits; } -static size_t +size_t bitmap_info_ngroups(const bitmap_info_t *binfo) { - return (binfo->levels[binfo->nlevels].group_offset); + return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); +} + +size_t +bitmap_size(size_t nbits) +{ + bitmap_info_t binfo; + + bitmap_info_init(&binfo, nbits); + return (bitmap_info_ngroups(&binfo)); } void @@ -54,7 +61,8 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) * correspond to the first logical bit in the group, so extra bits * are the most significant bits of the last group. */ - memset(bitmap, 0xffU, bitmap_size(binfo)); + memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << + LG_SIZEOF_BITMAP); extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; if (extra != 0) @@ -68,44 +76,3 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; } } - -#else /* USE_TREE */ - -void -bitmap_info_init(bitmap_info_t *binfo, size_t nbits) -{ - - assert(nbits > 0); - assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); - - binfo->ngroups = BITMAP_BITS2GROUPS(nbits); - binfo->nbits = nbits; -} - -static size_t -bitmap_info_ngroups(const bitmap_info_t *binfo) -{ - - return (binfo->ngroups); -} - -void -bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - size_t extra; - - memset(bitmap, 0xffU, bitmap_size(binfo)); - extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) - & BITMAP_GROUP_NBITS_MASK; - if (extra != 0) - bitmap[binfo->ngroups - 1] >>= extra; -} - -#endif /* USE_TREE */ - -size_t -bitmap_size(const bitmap_info_t *binfo) -{ - - return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP); -} diff --git a/deps/jemalloc/src/chunk.c b/deps/jemalloc/src/chunk.c index c1c514a86..6ba1ca7a5 100644 --- a/deps/jemalloc/src/chunk.c +++ b/deps/jemalloc/src/chunk.c @@ -49,10 +49,9 @@ const chunk_hooks_t chunk_hooks_default = { * definition. */ -static void chunk_record(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szsnad, - extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, size_t sn, - bool zeroed, bool committed); +static void chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks, + extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, + void *chunk, size_t size, bool zeroed, bool committed); /******************************************************************************/ @@ -64,23 +63,23 @@ chunk_hooks_get_locked(arena_t *arena) } chunk_hooks_t -chunk_hooks_get(tsdn_t *tsdn, arena_t *arena) +chunk_hooks_get(arena_t *arena) { chunk_hooks_t chunk_hooks; - malloc_mutex_lock(tsdn, &arena->chunks_mtx); + malloc_mutex_lock(&arena->chunks_mtx); chunk_hooks = chunk_hooks_get_locked(arena); - malloc_mutex_unlock(tsdn, &arena->chunks_mtx); + malloc_mutex_unlock(&arena->chunks_mtx); return (chunk_hooks); } chunk_hooks_t -chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks) +chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks) { chunk_hooks_t old_chunk_hooks; - malloc_mutex_lock(tsdn, &arena->chunks_mtx); + malloc_mutex_lock(&arena->chunks_mtx); old_chunk_hooks = arena->chunk_hooks; /* * Copy each field atomically so that it is impossible for readers to @@ -105,14 +104,14 @@ chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks) ATOMIC_COPY_HOOK(split); ATOMIC_COPY_HOOK(merge); #undef ATOMIC_COPY_HOOK - malloc_mutex_unlock(tsdn, &arena->chunks_mtx); + malloc_mutex_unlock(&arena->chunks_mtx); return (old_chunk_hooks); } static void -chunk_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, bool locked) +chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks, + bool locked) { static const chunk_hooks_t uninitialized_hooks = CHUNK_HOOKS_INITIALIZER; @@ -120,28 +119,27 @@ chunk_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena, if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) == 0) { *chunk_hooks = locked ? chunk_hooks_get_locked(arena) : - chunk_hooks_get(tsdn, arena); + chunk_hooks_get(arena); } } static void -chunk_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena, +chunk_hooks_assure_initialized_locked(arena_t *arena, chunk_hooks_t *chunk_hooks) { - chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, true); + chunk_hooks_assure_initialized_impl(arena, chunk_hooks, true); } static void -chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks) +chunk_hooks_assure_initialized(arena_t *arena, chunk_hooks_t *chunk_hooks) { - chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, false); + chunk_hooks_assure_initialized_impl(arena, chunk_hooks, false); } bool -chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node) +chunk_register(const void *chunk, const extent_node_t *node) { assert(extent_node_addr_get(node) == chunk); @@ -161,7 +159,7 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node) high = atomic_read_z(&highchunks); } if (cur > high && prof_gdump_get_unlocked()) - prof_gdump(tsdn); + prof_gdump(); } return (false); @@ -183,35 +181,33 @@ chunk_deregister(const void *chunk, const extent_node_t *node) } /* - * Do first-best-fit chunk selection, i.e. select the oldest/lowest chunk that - * best fits. + * Do first-best-fit chunk selection, i.e. select the lowest chunk that best + * fits. */ static extent_node_t * -chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szsnad, size_t size) +chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, size_t size) { extent_node_t key; assert(size == CHUNK_CEILING(size)); - extent_node_init(&key, arena, NULL, size, 0, false, false); - return (extent_tree_szsnad_nsearch(chunks_szsnad, &key)); + extent_node_init(&key, arena, NULL, size, false, false); + return (extent_tree_szad_nsearch(chunks_szad, &key)); } static void * -chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - extent_tree_t *chunks_szsnad, extent_tree_t *chunks_ad, bool cache, - void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero, - bool *commit, bool dalloc_node) +chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks, + extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, + void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit, + bool dalloc_node) { void *ret; extent_node_t *node; size_t alloc_size, leadsize, trailsize; bool zeroed, committed; - assert(CHUNK_CEILING(size) == size); - assert(alignment > 0); assert(new_addr == NULL || alignment == chunksize); - assert(CHUNK_ADDR2BASE(new_addr) == new_addr); /* * Cached chunks use the node linkage embedded in their headers, in * which case dalloc_node is true, and new_addr is non-NULL because @@ -219,23 +215,24 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, */ assert(dalloc_node || new_addr != NULL); - alloc_size = size + CHUNK_CEILING(alignment) - chunksize; + alloc_size = CHUNK_CEILING(s2u(size + alignment - chunksize)); /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); - malloc_mutex_lock(tsdn, &arena->chunks_mtx); - chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks); + malloc_mutex_lock(&arena->chunks_mtx); + chunk_hooks_assure_initialized_locked(arena, chunk_hooks); if (new_addr != NULL) { extent_node_t key; - extent_node_init(&key, arena, new_addr, alloc_size, 0, false, + extent_node_init(&key, arena, new_addr, alloc_size, false, false); node = extent_tree_ad_search(chunks_ad, &key); } else { - node = chunk_first_best_fit(arena, chunks_szsnad, alloc_size); + node = chunk_first_best_fit(arena, chunks_szad, chunks_ad, + alloc_size); } if (node == NULL || (new_addr != NULL && extent_node_size_get(node) < size)) { - malloc_mutex_unlock(tsdn, &arena->chunks_mtx); + malloc_mutex_unlock(&arena->chunks_mtx); return (NULL); } leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node), @@ -244,7 +241,6 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, assert(extent_node_size_get(node) >= leadsize + size); trailsize = extent_node_size_get(node) - leadsize - size; ret = (void *)((uintptr_t)extent_node_addr_get(node) + leadsize); - *sn = extent_node_sn_get(node); zeroed = extent_node_zeroed_get(node); if (zeroed) *zero = true; @@ -255,17 +251,17 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, if (leadsize != 0 && chunk_hooks->split(extent_node_addr_get(node), extent_node_size_get(node), leadsize, size, false, arena->ind)) { - malloc_mutex_unlock(tsdn, &arena->chunks_mtx); + malloc_mutex_unlock(&arena->chunks_mtx); return (NULL); } /* Remove node from the tree. */ - extent_tree_szsnad_remove(chunks_szsnad, node); + extent_tree_szad_remove(chunks_szad, node); extent_tree_ad_remove(chunks_ad, node); arena_chunk_cache_maybe_remove(arena, node, cache); if (leadsize != 0) { /* Insert the leading space as a smaller chunk. */ extent_node_size_set(node, leadsize); - extent_tree_szsnad_insert(chunks_szsnad, node); + extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); arena_chunk_cache_maybe_insert(arena, node, cache); node = NULL; @@ -275,42 +271,41 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, if (chunk_hooks->split(ret, size + trailsize, size, trailsize, false, arena->ind)) { if (dalloc_node && node != NULL) - arena_node_dalloc(tsdn, arena, node); - malloc_mutex_unlock(tsdn, &arena->chunks_mtx); - chunk_record(tsdn, arena, chunk_hooks, chunks_szsnad, - chunks_ad, cache, ret, size + trailsize, *sn, - zeroed, committed); + arena_node_dalloc(arena, node); + malloc_mutex_unlock(&arena->chunks_mtx); + chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, + cache, ret, size + trailsize, zeroed, committed); return (NULL); } /* Insert the trailing space as a smaller chunk. */ if (node == NULL) { - node = arena_node_alloc(tsdn, arena); + node = arena_node_alloc(arena); if (node == NULL) { - malloc_mutex_unlock(tsdn, &arena->chunks_mtx); - chunk_record(tsdn, arena, chunk_hooks, - chunks_szsnad, chunks_ad, cache, ret, size - + trailsize, *sn, zeroed, committed); + malloc_mutex_unlock(&arena->chunks_mtx); + chunk_record(arena, chunk_hooks, chunks_szad, + chunks_ad, cache, ret, size + trailsize, + zeroed, committed); return (NULL); } } extent_node_init(node, arena, (void *)((uintptr_t)(ret) + size), - trailsize, *sn, zeroed, committed); - extent_tree_szsnad_insert(chunks_szsnad, node); + trailsize, zeroed, committed); + extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); arena_chunk_cache_maybe_insert(arena, node, cache); node = NULL; } if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) { - malloc_mutex_unlock(tsdn, &arena->chunks_mtx); - chunk_record(tsdn, arena, chunk_hooks, chunks_szsnad, chunks_ad, - cache, ret, size, *sn, zeroed, committed); + malloc_mutex_unlock(&arena->chunks_mtx); + chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, cache, + ret, size, zeroed, committed); return (NULL); } - malloc_mutex_unlock(tsdn, &arena->chunks_mtx); + malloc_mutex_unlock(&arena->chunks_mtx); assert(dalloc_node || node != NULL); if (dalloc_node && node != NULL) - arena_node_dalloc(tsdn, arena, node); + arena_node_dalloc(arena, node); if (*zero) { if (!zeroed) memset(ret, 0, size); @@ -318,11 +313,10 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, size_t i; size_t *p = (size_t *)(uintptr_t)ret; + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); } - if (config_valgrind) - JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size); } return (ret); } @@ -334,29 +328,39 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, * them if they are returned. */ static void * -chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, - size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec) +chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit, dss_prec_t dss_prec) { void *ret; + chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; assert(size != 0); assert((size & chunksize_mask) == 0); assert(alignment != 0); assert((alignment & chunksize_mask) == 0); + /* Retained. */ + if ((ret = chunk_recycle(arena, &chunk_hooks, + &arena->chunks_szad_retained, &arena->chunks_ad_retained, false, + new_addr, size, alignment, zero, commit, true)) != NULL) + return (ret); + /* "primary" dss. */ if (have_dss && dss_prec == dss_prec_primary && (ret = - chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero, - commit)) != NULL) - return (ret); - /* mmap. */ - if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) != + chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) != NULL) return (ret); + /* + * mmap. Requesting an address is not implemented for + * chunk_alloc_mmap(), so only call it if (new_addr == NULL). + */ + if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero, + commit)) != NULL) + return (ret); /* "secondary" dss. */ if (have_dss && dss_prec == dss_prec_secondary && (ret = - chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero, - commit)) != NULL) + chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) != + NULL) return (ret); /* All strategies for allocation failed. */ @@ -376,7 +380,7 @@ chunk_alloc_base(size_t size) */ zero = true; commit = true; - ret = chunk_alloc_mmap(NULL, size, chunksize, &zero, &commit); + ret = chunk_alloc_mmap(size, chunksize, &zero, &commit); if (ret == NULL) return (NULL); if (config_valgrind) @@ -386,33 +390,37 @@ chunk_alloc_base(size_t size) } void * -chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero, - bool *commit, bool dalloc_node) +chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, + size_t size, size_t alignment, bool *zero, bool dalloc_node) { void *ret; + bool commit; assert(size != 0); assert((size & chunksize_mask) == 0); assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - ret = chunk_recycle(tsdn, arena, chunk_hooks, - &arena->chunks_szsnad_cached, &arena->chunks_ad_cached, true, - new_addr, size, alignment, sn, zero, commit, dalloc_node); + commit = true; + ret = chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_cached, + &arena->chunks_ad_cached, true, new_addr, size, alignment, zero, + &commit, dalloc_node); if (ret == NULL) return (NULL); + assert(commit); if (config_valgrind) JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); return (ret); } static arena_t * -chunk_arena_get(tsdn_t *tsdn, unsigned arena_ind) +chunk_arena_get(unsigned arena_ind) { arena_t *arena; - arena = arena_get(tsdn, arena_ind, false); + /* Dodge tsd for a0 in order to avoid bootstrapping issues. */ + arena = (arena_ind == 0) ? a0get() : arena_get(tsd_fetch(), arena_ind, + false, true); /* * The arena we're allocating on behalf of must have been initialized * already. @@ -422,12 +430,14 @@ chunk_arena_get(tsdn_t *tsdn, unsigned arena_ind) } static void * -chunk_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr, - size_t size, size_t alignment, bool *zero, bool *commit) +chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, + bool *commit, unsigned arena_ind) { void *ret; + arena_t *arena; - ret = chunk_alloc_core(tsdn, arena, new_addr, size, alignment, zero, + arena = chunk_arena_get(arena_ind); + ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, commit, arena->dss_prec); if (ret == NULL) return (NULL); @@ -437,80 +447,26 @@ chunk_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr, return (ret); } -static void * -chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, - bool *commit, unsigned arena_ind) -{ - tsdn_t *tsdn; - arena_t *arena; - - tsdn = tsdn_fetch(); - arena = chunk_arena_get(tsdn, arena_ind); - - return (chunk_alloc_default_impl(tsdn, arena, new_addr, size, alignment, - zero, commit)); -} - -static void * -chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero, - bool *commit) -{ - void *ret; - - assert(size != 0); - assert((size & chunksize_mask) == 0); - assert(alignment != 0); - assert((alignment & chunksize_mask) == 0); - - ret = chunk_recycle(tsdn, arena, chunk_hooks, - &arena->chunks_szsnad_retained, &arena->chunks_ad_retained, false, - new_addr, size, alignment, sn, zero, commit, true); - - if (config_stats && ret != NULL) - arena->stats.retained -= size; - - return (ret); -} - void * -chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero, - bool *commit) +chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit) { void *ret; - chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks); - - ret = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size, - alignment, sn, zero, commit); - if (ret == NULL) { - if (chunk_hooks->alloc == chunk_alloc_default) { - /* Call directly to propagate tsdn. */ - ret = chunk_alloc_default_impl(tsdn, arena, new_addr, - size, alignment, zero, commit); - } else { - ret = chunk_hooks->alloc(new_addr, size, alignment, - zero, commit, arena->ind); - } - - if (ret == NULL) - return (NULL); - - *sn = arena_extent_sn_next(arena); - - if (config_valgrind && chunk_hooks->alloc != - chunk_alloc_default) - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize); - } - + chunk_hooks_assure_initialized(arena, chunk_hooks); + ret = chunk_hooks->alloc(new_addr, size, alignment, zero, commit, + arena->ind); + if (ret == NULL) + return (NULL); + if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize); return (ret); } static void -chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - extent_tree_t *chunks_szsnad, extent_tree_t *chunks_ad, bool cache, - void *chunk, size_t size, size_t sn, bool zeroed, bool committed) +chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks, + extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, + void *chunk, size_t size, bool zeroed, bool committed) { bool unzeroed; extent_node_t *node, *prev; @@ -520,9 +476,9 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, unzeroed = cache || !zeroed; JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); - malloc_mutex_lock(tsdn, &arena->chunks_mtx); - chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks); - extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, 0, + malloc_mutex_lock(&arena->chunks_mtx); + chunk_hooks_assure_initialized_locked(arena, chunk_hooks); + extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, false, false); node = extent_tree_ad_nsearch(chunks_ad, &key); /* Try to coalesce forward. */ @@ -534,21 +490,19 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, /* * Coalesce chunk with the following address range. This does * not change the position within chunks_ad, so only - * remove/insert from/into chunks_szsnad. + * remove/insert from/into chunks_szad. */ - extent_tree_szsnad_remove(chunks_szsnad, node); + extent_tree_szad_remove(chunks_szad, node); arena_chunk_cache_maybe_remove(arena, node, cache); extent_node_addr_set(node, chunk); extent_node_size_set(node, size + extent_node_size_get(node)); - if (sn < extent_node_sn_get(node)) - extent_node_sn_set(node, sn); extent_node_zeroed_set(node, extent_node_zeroed_get(node) && !unzeroed); - extent_tree_szsnad_insert(chunks_szsnad, node); + extent_tree_szad_insert(chunks_szad, node); arena_chunk_cache_maybe_insert(arena, node, cache); } else { /* Coalescing forward failed, so insert a new node. */ - node = arena_node_alloc(tsdn, arena); + node = arena_node_alloc(arena); if (node == NULL) { /* * Node allocation failed, which is an exceedingly @@ -557,15 +511,15 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, * a virtual memory leak. */ if (cache) { - chunk_purge_wrapper(tsdn, arena, chunk_hooks, - chunk, size, 0, size); + chunk_purge_wrapper(arena, chunk_hooks, chunk, + size, 0, size); } goto label_return; } - extent_node_init(node, arena, chunk, size, sn, !unzeroed, + extent_node_init(node, arena, chunk, size, !unzeroed, committed); extent_tree_ad_insert(chunks_ad, node); - extent_tree_szsnad_insert(chunks_szsnad, node); + extent_tree_szad_insert(chunks_szad, node); arena_chunk_cache_maybe_insert(arena, node, cache); } @@ -579,33 +533,31 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, /* * Coalesce chunk with the previous address range. This does * not change the position within chunks_ad, so only - * remove/insert node from/into chunks_szsnad. + * remove/insert node from/into chunks_szad. */ - extent_tree_szsnad_remove(chunks_szsnad, prev); + extent_tree_szad_remove(chunks_szad, prev); extent_tree_ad_remove(chunks_ad, prev); arena_chunk_cache_maybe_remove(arena, prev, cache); - extent_tree_szsnad_remove(chunks_szsnad, node); + extent_tree_szad_remove(chunks_szad, node); arena_chunk_cache_maybe_remove(arena, node, cache); extent_node_addr_set(node, extent_node_addr_get(prev)); extent_node_size_set(node, extent_node_size_get(prev) + extent_node_size_get(node)); - if (extent_node_sn_get(prev) < extent_node_sn_get(node)) - extent_node_sn_set(node, extent_node_sn_get(prev)); extent_node_zeroed_set(node, extent_node_zeroed_get(prev) && extent_node_zeroed_get(node)); - extent_tree_szsnad_insert(chunks_szsnad, node); + extent_tree_szad_insert(chunks_szad, node); arena_chunk_cache_maybe_insert(arena, node, cache); - arena_node_dalloc(tsdn, arena, prev); + arena_node_dalloc(arena, prev); } label_return: - malloc_mutex_unlock(tsdn, &arena->chunks_mtx); + malloc_mutex_unlock(&arena->chunks_mtx); } void -chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, size_t sn, bool committed) +chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, + size_t size, bool committed) { assert(chunk != NULL); @@ -613,49 +565,24 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, assert(size != 0); assert((size & chunksize_mask) == 0); - chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szsnad_cached, - &arena->chunks_ad_cached, true, chunk, size, sn, false, - committed); - arena_maybe_purge(tsdn, arena); -} - -static bool -chunk_dalloc_default_impl(void *chunk, size_t size) -{ - - if (!have_dss || !chunk_in_dss(chunk)) - return (chunk_dalloc_mmap(chunk, size)); - return (true); -} - -static bool -chunk_dalloc_default(void *chunk, size_t size, bool committed, - unsigned arena_ind) -{ - - return (chunk_dalloc_default_impl(chunk, size)); + chunk_record(arena, chunk_hooks, &arena->chunks_szad_cached, + &arena->chunks_ad_cached, true, chunk, size, false, committed); + arena_maybe_purge(arena); } void -chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, size_t sn, bool zeroed, bool committed) +chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, + size_t size, bool zeroed, bool committed) { - bool err; assert(chunk != NULL); assert(CHUNK_ADDR2BASE(chunk) == chunk); assert(size != 0); assert((size & chunksize_mask) == 0); - chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks); + chunk_hooks_assure_initialized(arena, chunk_hooks); /* Try to deallocate. */ - if (chunk_hooks->dalloc == chunk_dalloc_default) { - /* Call directly to propagate tsdn. */ - err = chunk_dalloc_default_impl(chunk, size); - } else - err = chunk_hooks->dalloc(chunk, size, committed, arena->ind); - - if (!err) + if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind)) return; /* Try to decommit; purge if that fails. */ if (committed) { @@ -664,12 +591,29 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, } zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size, arena->ind); - chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szsnad_retained, - &arena->chunks_ad_retained, false, chunk, size, sn, zeroed, - committed); + chunk_record(arena, chunk_hooks, &arena->chunks_szad_retained, + &arena->chunks_ad_retained, false, chunk, size, zeroed, committed); +} - if (config_stats) - arena->stats.retained += size; +static bool +chunk_dalloc_default(void *chunk, size_t size, bool committed, + unsigned arena_ind) +{ + + if (!have_dss || !chunk_in_dss(chunk)) + return (chunk_dalloc_mmap(chunk, size)); + return (true); +} + +void +chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, + size_t size, bool committed) +{ + + chunk_hooks_assure_initialized(arena, chunk_hooks); + chunk_hooks->dalloc(chunk, size, committed, arena->ind); + if (config_valgrind && chunk_hooks->dalloc != chunk_dalloc_default) + JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); } static bool @@ -690,9 +634,8 @@ chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length, length)); } -static bool -chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length, - unsigned arena_ind) +bool +chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length) { assert(chunk != NULL); @@ -705,12 +648,21 @@ chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length, length)); } -bool -chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, size_t offset, size_t length) +static bool +chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length, + unsigned arena_ind) { - chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks); + return (chunk_purge_arena(chunk_arena_get(arena_ind), chunk, offset, + length)); +} + +bool +chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, + size_t size, size_t offset, size_t length) +{ + + chunk_hooks_assure_initialized(arena, chunk_hooks); return (chunk_hooks->purge(chunk, size, offset, length, arena->ind)); } @@ -724,31 +676,24 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b, return (false); } -static bool -chunk_merge_default_impl(void *chunk_a, void *chunk_b) -{ - - if (!maps_coalesce) - return (true); - if (have_dss && !chunk_dss_mergeable(chunk_a, chunk_b)) - return (true); - - return (false); -} - static bool chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, bool committed, unsigned arena_ind) { - return (chunk_merge_default_impl(chunk_a, chunk_b)); + if (!maps_coalesce) + return (true); + if (have_dss && chunk_in_dss(chunk_a) != chunk_in_dss(chunk_b)) + return (true); + + return (false); } static rtree_node_elm_t * chunks_rtree_node_alloc(size_t nelms) { - return ((rtree_node_elm_t *)base_alloc(TSDN_NULL, nelms * + return ((rtree_node_elm_t *)base_alloc(nelms * sizeof(rtree_node_elm_t))); } @@ -771,7 +716,7 @@ chunk_boot(void) * so pages_map will always take fast path. */ if (!opt_lg_chunk) { - opt_lg_chunk = ffs_u((unsigned)info.dwAllocationGranularity) + opt_lg_chunk = jemalloc_ffs((int)info.dwAllocationGranularity) - 1; } #else @@ -785,11 +730,32 @@ chunk_boot(void) chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> LG_PAGE); - if (have_dss) - chunk_dss_boot(); - if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk), chunks_rtree_node_alloc, NULL)) + if (have_dss && chunk_dss_boot()) + return (true); + if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk, chunks_rtree_node_alloc, NULL)) return (true); return (false); } + +void +chunk_prefork(void) +{ + + chunk_dss_prefork(); +} + +void +chunk_postfork_parent(void) +{ + + chunk_dss_postfork_parent(); +} + +void +chunk_postfork_child(void) +{ + + chunk_dss_postfork_child(); +} diff --git a/deps/jemalloc/src/chunk_dss.c b/deps/jemalloc/src/chunk_dss.c index ee3f83888..61fc91696 100644 --- a/deps/jemalloc/src/chunk_dss.c +++ b/deps/jemalloc/src/chunk_dss.c @@ -10,19 +10,20 @@ const char *dss_prec_names[] = { "N/A" }; +/* Current dss precedence default, used when creating new arenas. */ +static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT; + /* - * Current dss precedence default, used when creating new arenas. NB: This is - * stored as unsigned rather than dss_prec_t because in principle there's no - * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use - * atomic operations to synchronize the setting. + * Protects sbrk() calls. This avoids malloc races among threads, though it + * does not protect against races with threads that call sbrk() directly. */ -static unsigned dss_prec_default = (unsigned)DSS_PREC_DEFAULT; +static malloc_mutex_t dss_mtx; /* Base address of the DSS. */ static void *dss_base; -/* Atomic boolean indicating whether the DSS is exhausted. */ -static unsigned dss_exhausted; -/* Atomic current upper limit on DSS addresses. */ +/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ +static void *dss_prev; +/* Current upper limit on DSS addresses. */ static void *dss_max; /******************************************************************************/ @@ -46,7 +47,9 @@ chunk_dss_prec_get(void) if (!have_dss) return (dss_prec_disabled); - ret = (dss_prec_t)atomic_read_u(&dss_prec_default); + malloc_mutex_lock(&dss_mtx); + ret = dss_prec_default; + malloc_mutex_unlock(&dss_mtx); return (ret); } @@ -56,46 +59,15 @@ chunk_dss_prec_set(dss_prec_t dss_prec) if (!have_dss) return (dss_prec != dss_prec_disabled); - atomic_write_u(&dss_prec_default, (unsigned)dss_prec); + malloc_mutex_lock(&dss_mtx); + dss_prec_default = dss_prec; + malloc_mutex_unlock(&dss_mtx); return (false); } -static void * -chunk_dss_max_update(void *new_addr) -{ - void *max_cur; - spin_t spinner; - - /* - * Get the current end of the DSS as max_cur and assure that dss_max is - * up to date. - */ - spin_init(&spinner); - while (true) { - void *max_prev = atomic_read_p(&dss_max); - - max_cur = chunk_dss_sbrk(0); - if ((uintptr_t)max_prev > (uintptr_t)max_cur) { - /* - * Another thread optimistically updated dss_max. Wait - * for it to finish. - */ - spin_adaptive(&spinner); - continue; - } - if (!atomic_cas_p(&dss_max, max_prev, max_cur)) - break; - } - /* Fixed new_addr can only be supported if it is at the edge of DSS. */ - if (new_addr != NULL && max_cur != new_addr) - return (NULL); - - return (max_cur); -} - void * -chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, - size_t alignment, bool *zero, bool *commit) +chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit) { cassert(have_dss); assert(size > 0 && (size & chunksize_mask) == 0); @@ -108,20 +80,28 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, if ((intptr_t)size < 0) return (NULL); - if (!atomic_read_u(&dss_exhausted)) { + malloc_mutex_lock(&dss_mtx); + if (dss_prev != (void *)-1) { + /* * The loop is necessary to recover from races with other * threads that are using the DSS for something other than * malloc. */ - while (true) { - void *ret, *cpad, *max_cur, *dss_next, *dss_prev; + do { + void *ret, *cpad, *dss_next; size_t gap_size, cpad_size; intptr_t incr; + /* Avoid an unnecessary system call. */ + if (new_addr != NULL && dss_max != new_addr) + break; - max_cur = chunk_dss_max_update(new_addr); - if (max_cur == NULL) - goto label_oom; + /* Get the current end of the DSS. */ + dss_max = chunk_dss_sbrk(0); + + /* Make sure the earlier condition still holds. */ + if (new_addr != NULL && dss_max != new_addr) + break; /* * Calculate how much padding is necessary to @@ -140,29 +120,22 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, cpad_size = (uintptr_t)ret - (uintptr_t)cpad; dss_next = (void *)((uintptr_t)ret + size); if ((uintptr_t)ret < (uintptr_t)dss_max || - (uintptr_t)dss_next < (uintptr_t)dss_max) - goto label_oom; /* Wrap-around. */ + (uintptr_t)dss_next < (uintptr_t)dss_max) { + /* Wrap-around. */ + malloc_mutex_unlock(&dss_mtx); + return (NULL); + } incr = gap_size + cpad_size + size; - - /* - * Optimistically update dss_max, and roll back below if - * sbrk() fails. No other thread will try to extend the - * DSS while dss_max is greater than the current DSS - * max reported by sbrk(0). - */ - if (atomic_cas_p(&dss_max, max_cur, dss_next)) - continue; - - /* Try to allocate. */ dss_prev = chunk_dss_sbrk(incr); - if (dss_prev == max_cur) { + if (dss_prev == dss_max) { /* Success. */ + dss_max = dss_next; + malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; - chunk_dalloc_wrapper(tsdn, arena, + chunk_dalloc_wrapper(arena, &chunk_hooks, cpad, cpad_size, - arena_extent_sn_next(arena), false, true); } if (*zero) { @@ -174,65 +147,68 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, *commit = pages_decommit(ret, size); return (ret); } - - /* - * Failure, whether due to OOM or a race with a raw - * sbrk() call from outside the allocator. Try to roll - * back optimistic dss_max update; if rollback fails, - * it's due to another caller of this function having - * succeeded since this invocation started, in which - * case rollback is not necessary. - */ - atomic_cas_p(&dss_max, dss_next, max_cur); - if (dss_prev == (void *)-1) { - /* OOM. */ - atomic_write_u(&dss_exhausted, (unsigned)true); - goto label_oom; - } - } + } while (dss_prev != (void *)-1); } -label_oom: + malloc_mutex_unlock(&dss_mtx); + return (NULL); } -static bool -chunk_in_dss_helper(void *chunk, void *max) -{ - - return ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk < - (uintptr_t)max); -} - bool chunk_in_dss(void *chunk) { + bool ret; cassert(have_dss); - return (chunk_in_dss_helper(chunk, atomic_read_p(&dss_max))); + malloc_mutex_lock(&dss_mtx); + if ((uintptr_t)chunk >= (uintptr_t)dss_base + && (uintptr_t)chunk < (uintptr_t)dss_max) + ret = true; + else + ret = false; + malloc_mutex_unlock(&dss_mtx); + + return (ret); } bool -chunk_dss_mergeable(void *chunk_a, void *chunk_b) -{ - void *max; - - cassert(have_dss); - - max = atomic_read_p(&dss_max); - return (chunk_in_dss_helper(chunk_a, max) == - chunk_in_dss_helper(chunk_b, max)); -} - -void chunk_dss_boot(void) { cassert(have_dss); + if (malloc_mutex_init(&dss_mtx)) + return (true); dss_base = chunk_dss_sbrk(0); - dss_exhausted = (unsigned)(dss_base == (void *)-1); + dss_prev = dss_base; dss_max = dss_base; + + return (false); +} + +void +chunk_dss_prefork(void) +{ + + if (have_dss) + malloc_mutex_prefork(&dss_mtx); +} + +void +chunk_dss_postfork_parent(void) +{ + + if (have_dss) + malloc_mutex_postfork_parent(&dss_mtx); +} + +void +chunk_dss_postfork_child(void) +{ + + if (have_dss) + malloc_mutex_postfork_child(&dss_mtx); } /******************************************************************************/ diff --git a/deps/jemalloc/src/chunk_mmap.c b/deps/jemalloc/src/chunk_mmap.c index 73fc497af..b9ba74191 100644 --- a/deps/jemalloc/src/chunk_mmap.c +++ b/deps/jemalloc/src/chunk_mmap.c @@ -16,22 +16,23 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit) do { void *pages; size_t leadsize; - pages = pages_map(NULL, alloc_size, commit); + pages = pages_map(NULL, alloc_size); if (pages == NULL) return (NULL); leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - (uintptr_t)pages; - ret = pages_trim(pages, alloc_size, leadsize, size, commit); + ret = pages_trim(pages, alloc_size, leadsize, size); } while (ret == NULL); assert(ret != NULL); *zero = true; + if (!*commit) + *commit = pages_decommit(ret, size); return (ret); } void * -chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero, - bool *commit) +chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit) { void *ret; size_t offset; @@ -52,10 +53,9 @@ chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero, assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - ret = pages_map(new_addr, size, commit); - if (ret == NULL || ret == new_addr) - return (ret); - assert(new_addr == NULL); + ret = pages_map(NULL, size); + if (ret == NULL) + return (NULL); offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { pages_unmap(ret, size); @@ -64,6 +64,8 @@ chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero, assert(ret != NULL); *zero = true; + if (!*commit) + *commit = pages_decommit(ret, size); return (ret); } diff --git a/deps/jemalloc/src/ckh.c b/deps/jemalloc/src/ckh.c index 159bd8ae1..53a1c1ef1 100644 --- a/deps/jemalloc/src/ckh.c +++ b/deps/jemalloc/src/ckh.c @@ -99,8 +99,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * Cycle through the cells in the bucket, starting at a random position. * The randomness avoids worst-case search overhead as buckets fill up. */ - offset = (unsigned)prng_lg_range_u64(&ckh->prng_state, - LG_CKH_BUCKET_CELLS); + prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; @@ -142,8 +141,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, * were an item for which both hashes indicated the same * bucket. */ - i = (unsigned)prng_lg_range_u64(&ckh->prng_state, - LG_CKH_BUCKET_CELLS); + prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; assert(cell->key != NULL); @@ -249,7 +247,8 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) { bool ret; ckhc_t *tab, *ttab; - unsigned lg_prevbuckets, lg_curcells; + size_t lg_curcells; + unsigned lg_prevbuckets; #ifdef CKH_COUNT ckh->ngrows++; @@ -267,12 +266,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) lg_curcells++; usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { + if (usize == 0) { ret = true; goto label_return; } - tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, - true, NULL, true, arena_ichoose(tsd, NULL)); + tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, + true, NULL); if (tab == NULL) { ret = true; goto label_return; @@ -284,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsd_tsdn(tsd), tab, NULL, true, true); + idalloctm(tsd, tab, tcache_get(tsd, false), true); break; } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; } @@ -303,8 +302,8 @@ static void ckh_shrink(tsd_t *tsd, ckh_t *ckh) { ckhc_t *tab, *ttab; - size_t usize; - unsigned lg_prevbuckets, lg_curcells; + size_t lg_curcells, usize; + unsigned lg_prevbuckets; /* * It is possible (though unlikely, given well behaved hashes) that the @@ -313,10 +312,10 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) + if (usize == 0) return; - tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL, - true, arena_ichoose(tsd, NULL)); + tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, + NULL); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -331,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsd_tsdn(tsd), tab, NULL, true, true); + idalloctm(tsd, tab, tcache_get(tsd, false), true); #ifdef CKH_COUNT ckh->nshrinks++; #endif @@ -339,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; #ifdef CKH_COUNT @@ -388,12 +387,12 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh->keycomp = keycomp; usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); - if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { + if (usize == 0) { ret = true; goto label_return; } - ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, - NULL, true, arena_ichoose(tsd, NULL)); + ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, + NULL); if (ckh->tab == NULL) { ret = true; goto label_return; @@ -422,9 +421,9 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) (unsigned long long)ckh->nrelocs); #endif - idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); if (config_debug) - memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t)); + memset(ckh, 0x5a, sizeof(ckh_t)); } size_t diff --git a/deps/jemalloc/src/ctl.c b/deps/jemalloc/src/ctl.c index bc78b2055..3de8e602d 100644 --- a/deps/jemalloc/src/ctl.c +++ b/deps/jemalloc/src/ctl.c @@ -24,7 +24,7 @@ ctl_named_node(const ctl_node_t *node) } JEMALLOC_INLINE_C const ctl_named_node_t * -ctl_named_children(const ctl_named_node_t *node, size_t index) +ctl_named_children(const ctl_named_node_t *node, int index) { const ctl_named_node_t *children = ctl_named_node(node->children); @@ -42,25 +42,25 @@ ctl_indexed_node(const ctl_node_t *node) /* Function prototypes for non-inline static functions. */ #define CTL_PROTO(n) \ -static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \ - void *oldp, size_t *oldlenp, void *newp, size_t newlen); +static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen); #define INDEX_PROTO(n) \ -static const ctl_named_node_t *n##_index(tsdn_t *tsdn, \ - const size_t *mib, size_t miblen, size_t i); +static const ctl_named_node_t *n##_index(const size_t *mib, \ + size_t miblen, size_t i); static bool ctl_arena_init(ctl_arena_stats_t *astats); static void ctl_arena_clear(ctl_arena_stats_t *astats); -static void ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, +static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena); static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats); -static void ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i); -static bool ctl_grow(tsdn_t *tsdn); -static void ctl_refresh(tsdn_t *tsdn); -static bool ctl_init(tsdn_t *tsdn); -static int ctl_lookup(tsdn_t *tsdn, const char *name, - ctl_node_t const **nodesp, size_t *mibp, size_t *depthp); +static void ctl_arena_refresh(arena_t *arena, unsigned i); +static bool ctl_grow(void); +static void ctl_refresh(void); +static bool ctl_init(void); +static int ctl_lookup(const char *name, ctl_node_t const **nodesp, + size_t *mibp, size_t *depthp); CTL_PROTO(version) CTL_PROTO(epoch) @@ -77,7 +77,6 @@ CTL_PROTO(config_cache_oblivious) CTL_PROTO(config_debug) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) -CTL_PROTO(config_malloc_conf) CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) @@ -92,9 +91,7 @@ CTL_PROTO(opt_abort) CTL_PROTO(opt_dss) CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) -CTL_PROTO(opt_purge) CTL_PROTO(opt_lg_dirty_mult) -CTL_PROTO(opt_decay_time) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) @@ -117,13 +114,10 @@ CTL_PROTO(opt_prof_accum) CTL_PROTO(tcache_create) CTL_PROTO(tcache_flush) CTL_PROTO(tcache_destroy) -static void arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all); CTL_PROTO(arena_i_purge) -CTL_PROTO(arena_i_decay) -CTL_PROTO(arena_i_reset) +static void arena_purge(unsigned arena_ind); CTL_PROTO(arena_i_dss) CTL_PROTO(arena_i_lg_dirty_mult) -CTL_PROTO(arena_i_decay_time) CTL_PROTO(arena_i_chunk_hooks) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) @@ -137,7 +131,6 @@ INDEX_PROTO(arenas_hchunk_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) CTL_PROTO(arenas_lg_dirty_mult) -CTL_PROTO(arenas_decay_time) CTL_PROTO(arenas_quantum) CTL_PROTO(arenas_page) CTL_PROTO(arenas_tcache_max) @@ -188,11 +181,9 @@ INDEX_PROTO(stats_arenas_i_hchunks_j) CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_dss) CTL_PROTO(stats_arenas_i_lg_dirty_mult) -CTL_PROTO(stats_arenas_i_decay_time) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_mapped) -CTL_PROTO(stats_arenas_i_retained) CTL_PROTO(stats_arenas_i_npurge) CTL_PROTO(stats_arenas_i_nmadvise) CTL_PROTO(stats_arenas_i_purged) @@ -205,7 +196,6 @@ CTL_PROTO(stats_active) CTL_PROTO(stats_metadata) CTL_PROTO(stats_resident) CTL_PROTO(stats_mapped) -CTL_PROTO(stats_retained) /******************************************************************************/ /* mallctl tree. */ @@ -251,7 +241,6 @@ static const ctl_named_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, - {NAME("malloc_conf"), CTL(config_malloc_conf)}, {NAME("munmap"), CTL(config_munmap)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, @@ -269,9 +258,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("dss"), CTL(opt_dss)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, - {NAME("purge"), CTL(opt_purge)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, - {NAME("decay_time"), CTL(opt_decay_time)}, {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)}, @@ -301,11 +288,8 @@ static const ctl_named_node_t tcache_node[] = { static const ctl_named_node_t arena_i_node[] = { {NAME("purge"), CTL(arena_i_purge)}, - {NAME("decay"), CTL(arena_i_decay)}, - {NAME("reset"), CTL(arena_i_reset)}, {NAME("dss"), CTL(arena_i_dss)}, {NAME("lg_dirty_mult"), CTL(arena_i_lg_dirty_mult)}, - {NAME("decay_time"), CTL(arena_i_decay_time)}, {NAME("chunk_hooks"), CTL(arena_i_chunk_hooks)} }; static const ctl_named_node_t super_arena_i_node[] = { @@ -355,7 +339,6 @@ static const ctl_named_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("lg_dirty_mult"), CTL(arenas_lg_dirty_mult)}, - {NAME("decay_time"), CTL(arenas_decay_time)}, {NAME("quantum"), CTL(arenas_quantum)}, {NAME("page"), CTL(arenas_page)}, {NAME("tcache_max"), CTL(arenas_tcache_max)}, @@ -456,11 +439,9 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("dss"), CTL(stats_arenas_i_dss)}, {NAME("lg_dirty_mult"), CTL(stats_arenas_i_lg_dirty_mult)}, - {NAME("decay_time"), CTL(stats_arenas_i_decay_time)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, - {NAME("retained"), CTL(stats_arenas_i_retained)}, {NAME("npurge"), CTL(stats_arenas_i_npurge)}, {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, {NAME("purged"), CTL(stats_arenas_i_purged)}, @@ -487,7 +468,6 @@ static const ctl_named_node_t stats_node[] = { {NAME("metadata"), CTL(stats_metadata)}, {NAME("resident"), CTL(stats_resident)}, {NAME("mapped"), CTL(stats_mapped)}, - {NAME("retained"), CTL(stats_retained)}, {NAME("arenas"), CHILD(indexed, stats_arenas)} }; @@ -539,10 +519,8 @@ static void ctl_arena_clear(ctl_arena_stats_t *astats) { - astats->nthreads = 0; astats->dss = dss_prec_names[dss_prec_limit]; astats->lg_dirty_mult = -1; - astats->decay_time = -1; astats->pactive = 0; astats->pdirty = 0; if (config_stats) { @@ -560,27 +538,20 @@ ctl_arena_clear(ctl_arena_stats_t *astats) } static void -ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena) +ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) { unsigned i; - if (config_stats) { - arena_stats_merge(tsdn, arena, &cstats->nthreads, &cstats->dss, - &cstats->lg_dirty_mult, &cstats->decay_time, - &cstats->pactive, &cstats->pdirty, &cstats->astats, - cstats->bstats, cstats->lstats, cstats->hstats); + arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult, + &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats, + cstats->lstats, cstats->hstats); - for (i = 0; i < NBINS; i++) { - cstats->allocated_small += cstats->bstats[i].curregs * - index2size(i); - cstats->nmalloc_small += cstats->bstats[i].nmalloc; - cstats->ndalloc_small += cstats->bstats[i].ndalloc; - cstats->nrequests_small += cstats->bstats[i].nrequests; - } - } else { - arena_basic_stats_merge(tsdn, arena, &cstats->nthreads, - &cstats->dss, &cstats->lg_dirty_mult, &cstats->decay_time, - &cstats->pactive, &cstats->pdirty); + for (i = 0; i < NBINS; i++) { + cstats->allocated_small += cstats->bstats[i].curregs * + index2size(i); + cstats->nmalloc_small += cstats->bstats[i].nmalloc; + cstats->ndalloc_small += cstats->bstats[i].ndalloc; + cstats->nrequests_small += cstats->bstats[i].nrequests; } } @@ -589,91 +560,89 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) { unsigned i; - sstats->nthreads += astats->nthreads; sstats->pactive += astats->pactive; sstats->pdirty += astats->pdirty; - if (config_stats) { - sstats->astats.mapped += astats->astats.mapped; - sstats->astats.retained += astats->astats.retained; - sstats->astats.npurge += astats->astats.npurge; - sstats->astats.nmadvise += astats->astats.nmadvise; - sstats->astats.purged += astats->astats.purged; + sstats->astats.mapped += astats->astats.mapped; + sstats->astats.npurge += astats->astats.npurge; + sstats->astats.nmadvise += astats->astats.nmadvise; + sstats->astats.purged += astats->astats.purged; - sstats->astats.metadata_mapped += - astats->astats.metadata_mapped; - sstats->astats.metadata_allocated += - astats->astats.metadata_allocated; + sstats->astats.metadata_mapped += astats->astats.metadata_mapped; + sstats->astats.metadata_allocated += astats->astats.metadata_allocated; - sstats->allocated_small += astats->allocated_small; - sstats->nmalloc_small += astats->nmalloc_small; - sstats->ndalloc_small += astats->ndalloc_small; - sstats->nrequests_small += astats->nrequests_small; + sstats->allocated_small += astats->allocated_small; + sstats->nmalloc_small += astats->nmalloc_small; + sstats->ndalloc_small += astats->ndalloc_small; + sstats->nrequests_small += astats->nrequests_small; - sstats->astats.allocated_large += - astats->astats.allocated_large; - sstats->astats.nmalloc_large += astats->astats.nmalloc_large; - sstats->astats.ndalloc_large += astats->astats.ndalloc_large; - sstats->astats.nrequests_large += - astats->astats.nrequests_large; + sstats->astats.allocated_large += astats->astats.allocated_large; + sstats->astats.nmalloc_large += astats->astats.nmalloc_large; + sstats->astats.ndalloc_large += astats->astats.ndalloc_large; + sstats->astats.nrequests_large += astats->astats.nrequests_large; - sstats->astats.allocated_huge += astats->astats.allocated_huge; - sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; - sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; + sstats->astats.allocated_huge += astats->astats.allocated_huge; + sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; + sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; - for (i = 0; i < NBINS; i++) { - sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; - sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; - sstats->bstats[i].nrequests += - astats->bstats[i].nrequests; - sstats->bstats[i].curregs += astats->bstats[i].curregs; - if (config_tcache) { - sstats->bstats[i].nfills += - astats->bstats[i].nfills; - sstats->bstats[i].nflushes += - astats->bstats[i].nflushes; - } - sstats->bstats[i].nruns += astats->bstats[i].nruns; - sstats->bstats[i].reruns += astats->bstats[i].reruns; - sstats->bstats[i].curruns += astats->bstats[i].curruns; + for (i = 0; i < NBINS; i++) { + sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; + sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; + sstats->bstats[i].nrequests += astats->bstats[i].nrequests; + sstats->bstats[i].curregs += astats->bstats[i].curregs; + if (config_tcache) { + sstats->bstats[i].nfills += astats->bstats[i].nfills; + sstats->bstats[i].nflushes += + astats->bstats[i].nflushes; } + sstats->bstats[i].nruns += astats->bstats[i].nruns; + sstats->bstats[i].reruns += astats->bstats[i].reruns; + sstats->bstats[i].curruns += astats->bstats[i].curruns; + } - for (i = 0; i < nlclasses; i++) { - sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; - sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; - sstats->lstats[i].nrequests += - astats->lstats[i].nrequests; - sstats->lstats[i].curruns += astats->lstats[i].curruns; - } + for (i = 0; i < nlclasses; i++) { + sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; + sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; + sstats->lstats[i].nrequests += astats->lstats[i].nrequests; + sstats->lstats[i].curruns += astats->lstats[i].curruns; + } - for (i = 0; i < nhclasses; i++) { - sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc; - sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc; - sstats->hstats[i].curhchunks += - astats->hstats[i].curhchunks; - } + for (i = 0; i < nhclasses; i++) { + sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc; + sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc; + sstats->hstats[i].curhchunks += astats->hstats[i].curhchunks; } } static void -ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i) +ctl_arena_refresh(arena_t *arena, unsigned i) { ctl_arena_stats_t *astats = &ctl_stats.arenas[i]; ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas]; ctl_arena_clear(astats); - ctl_arena_stats_amerge(tsdn, astats, arena); - /* Merge into sum stats as well. */ - ctl_arena_stats_smerge(sstats, astats); + + sstats->nthreads += astats->nthreads; + if (config_stats) { + ctl_arena_stats_amerge(astats, arena); + /* Merge into sum stats as well. */ + ctl_arena_stats_smerge(sstats, astats); + } else { + astats->pactive += arena->nactive; + astats->pdirty += arena->ndirty; + /* Merge into sum stats as well. */ + sstats->pactive += arena->nactive; + sstats->pdirty += arena->ndirty; + } } static bool -ctl_grow(tsdn_t *tsdn) +ctl_grow(void) { ctl_arena_stats_t *astats; /* Initialize new arena. */ - if (arena_init(tsdn, ctl_stats.narenas) == NULL) + if (arena_init(ctl_stats.narenas) == NULL) return (true); /* Allocate extended arena stats. */ @@ -708,32 +677,47 @@ ctl_grow(tsdn_t *tsdn) } static void -ctl_refresh(tsdn_t *tsdn) +ctl_refresh(void) { + tsd_t *tsd; unsigned i; + bool refreshed; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); /* * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ + ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); - for (i = 0; i < ctl_stats.narenas; i++) - tarenas[i] = arena_get(tsdn, i, false); + tsd = tsd_fetch(); + for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { + tarenas[i] = arena_get(tsd, i, false, false); + if (tarenas[i] == NULL && !refreshed) { + tarenas[i] = arena_get(tsd, i, false, true); + refreshed = true; + } + } + + for (i = 0; i < ctl_stats.narenas; i++) { + if (tarenas[i] != NULL) + ctl_stats.arenas[i].nthreads = arena_nbound(i); + else + ctl_stats.arenas[i].nthreads = 0; + } for (i = 0; i < ctl_stats.narenas; i++) { bool initialized = (tarenas[i] != NULL); ctl_stats.arenas[i].initialized = initialized; if (initialized) - ctl_arena_refresh(tsdn, tarenas[i], i); + ctl_arena_refresh(tarenas[i], i); } if (config_stats) { size_t base_allocated, base_resident, base_mapped; - base_stats_get(tsdn, &base_allocated, &base_resident, - &base_mapped); + base_stats_get(&base_allocated, &base_resident, &base_mapped); ctl_stats.allocated = ctl_stats.arenas[ctl_stats.narenas].allocated_small + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large + @@ -750,19 +734,17 @@ ctl_refresh(tsdn_t *tsdn) ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE); ctl_stats.mapped = base_mapped + ctl_stats.arenas[ctl_stats.narenas].astats.mapped; - ctl_stats.retained = - ctl_stats.arenas[ctl_stats.narenas].astats.retained; } ctl_epoch++; } static bool -ctl_init(tsdn_t *tsdn) +ctl_init(void) { bool ret; - malloc_mutex_lock(tsdn, &ctl_mtx); + malloc_mutex_lock(&ctl_mtx); if (!ctl_initialized) { /* * Allocate space for one extra arena stats element, which @@ -804,19 +786,19 @@ ctl_init(tsdn_t *tsdn) ctl_stats.arenas[ctl_stats.narenas].initialized = true; ctl_epoch = 0; - ctl_refresh(tsdn); + ctl_refresh(); ctl_initialized = true; } ret = false; label_return: - malloc_mutex_unlock(tsdn, &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } static int -ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp, - size_t *mibp, size_t *depthp) +ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, + size_t *depthp) { int ret; const char *elm, *tdot, *dot; @@ -868,7 +850,7 @@ ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp, } inode = ctl_indexed_node(node->children); - node = inode->index(tsdn, mibp, *depthp, (size_t)index); + node = inode->index(mibp, *depthp, (size_t)index); if (node == NULL) { ret = ENOENT; goto label_return; @@ -912,8 +894,8 @@ label_return: } int -ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) { int ret; size_t depth; @@ -921,19 +903,19 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, size_t mib[CTL_MAX_DEPTH]; const ctl_named_node_t *node; - if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) { + if (!ctl_initialized && ctl_init()) { ret = EAGAIN; goto label_return; } depth = CTL_MAX_DEPTH; - ret = ctl_lookup(tsd_tsdn(tsd), name, nodes, mib, &depth); + ret = ctl_lookup(name, nodes, mib, &depth); if (ret != 0) goto label_return; node = ctl_named_node(nodes[depth-1]); if (node != NULL && node->ctl) - ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen); + ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen); else { /* The name refers to a partial path through the ctl tree. */ ret = ENOENT; @@ -944,29 +926,29 @@ label_return: } int -ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp) +ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) { int ret; - if (!ctl_initialized && ctl_init(tsdn)) { + if (!ctl_initialized && ctl_init()) { ret = EAGAIN; goto label_return; } - ret = ctl_lookup(tsdn, name, NULL, mibp, miblenp); + ret = ctl_lookup(name, NULL, mibp, miblenp); label_return: return(ret); } int -ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; const ctl_named_node_t *node; size_t i; - if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) { + if (!ctl_initialized && ctl_init()) { ret = EAGAIN; goto label_return; } @@ -978,7 +960,7 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, assert(node->nchildren > 0); if (ctl_named_node(node->children) != NULL) { /* Children are named. */ - if (node->nchildren <= (unsigned)mib[i]) { + if (node->nchildren <= mib[i]) { ret = ENOENT; goto label_return; } @@ -988,7 +970,7 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, /* Indexed element. */ inode = ctl_indexed_node(node->children); - node = inode->index(tsd_tsdn(tsd), mib, miblen, mib[i]); + node = inode->index(mib, miblen, mib[i]); if (node == NULL) { ret = ENOENT; goto label_return; @@ -998,7 +980,7 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, /* Call the ctl function. */ if (node && node->ctl) - ret = node->ctl(tsd, mib, miblen, oldp, oldlenp, newp, newlen); + ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); else { /* Partial MIB. */ ret = ENOENT; @@ -1012,7 +994,7 @@ bool ctl_boot(void) { - if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL)) + if (malloc_mutex_init(&ctl_mtx)) return (true); ctl_initialized = false; @@ -1021,24 +1003,24 @@ ctl_boot(void) } void -ctl_prefork(tsdn_t *tsdn) +ctl_prefork(void) { - malloc_mutex_prefork(tsdn, &ctl_mtx); + malloc_mutex_prefork(&ctl_mtx); } void -ctl_postfork_parent(tsdn_t *tsdn) +ctl_postfork_parent(void) { - malloc_mutex_postfork_parent(tsdn, &ctl_mtx); + malloc_mutex_postfork_parent(&ctl_mtx); } void -ctl_postfork_child(tsdn_t *tsdn) +ctl_postfork_child(void) { - malloc_mutex_postfork_child(tsdn, &ctl_mtx); + malloc_mutex_postfork_child(&ctl_mtx); } /******************************************************************************/ @@ -1095,8 +1077,8 @@ ctl_postfork_child(tsdn_t *tsdn) */ #define CTL_RO_CLGEN(c, l, n, v, t) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ @@ -1104,7 +1086,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ if (!(c)) \ return (ENOENT); \ if (l) \ - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); \ + malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ oldval = (v); \ READ(oldval, t); \ @@ -1112,47 +1094,47 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ ret = 0; \ label_return: \ if (l) \ - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); \ + malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } #define CTL_RO_CGEN(c, n, v, t) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ \ if (!(c)) \ return (ENOENT); \ - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); \ + malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ label_return: \ - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); \ + malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } #define CTL_RO_GEN(n, v, t) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ \ - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); \ + malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ label_return: \ - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); \ + malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -1162,8 +1144,8 @@ label_return: \ */ #define CTL_RO_NL_CGEN(c, n, v, t) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ @@ -1181,8 +1163,8 @@ label_return: \ #define CTL_RO_NL_GEN(n, v, t) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ @@ -1198,15 +1180,17 @@ label_return: \ #define CTL_TSD_RO_NL_CGEN(c, n, m, t) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ { \ int ret; \ t oldval; \ + tsd_t *tsd; \ \ if (!(c)) \ return (ENOENT); \ READONLY(); \ + tsd = tsd_fetch(); \ oldval = (m(tsd)); \ READ(oldval, t); \ \ @@ -1215,17 +1199,17 @@ label_return: \ return (ret); \ } -#define CTL_RO_CONFIG_GEN(n, t) \ +#define CTL_RO_BOOL_CONFIG_GEN(n) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ { \ int ret; \ - t oldval; \ + bool oldval; \ \ READONLY(); \ oldval = n; \ - READ(oldval, t); \ + READ(oldval, bool); \ \ ret = 0; \ label_return: \ @@ -1237,51 +1221,48 @@ label_return: \ CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) static int -epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; UNUSED uint64_t newval; - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(&ctl_mtx); WRITE(newval, uint64_t); if (newp != NULL) - ctl_refresh(tsd_tsdn(tsd)); + ctl_refresh(); READ(ctl_epoch, uint64_t); ret = 0; label_return: - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } /******************************************************************************/ -CTL_RO_CONFIG_GEN(config_cache_oblivious, bool) -CTL_RO_CONFIG_GEN(config_debug, bool) -CTL_RO_CONFIG_GEN(config_fill, bool) -CTL_RO_CONFIG_GEN(config_lazy_lock, bool) -CTL_RO_CONFIG_GEN(config_malloc_conf, const char *) -CTL_RO_CONFIG_GEN(config_munmap, bool) -CTL_RO_CONFIG_GEN(config_prof, bool) -CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) -CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) -CTL_RO_CONFIG_GEN(config_stats, bool) -CTL_RO_CONFIG_GEN(config_tcache, bool) -CTL_RO_CONFIG_GEN(config_tls, bool) -CTL_RO_CONFIG_GEN(config_utrace, bool) -CTL_RO_CONFIG_GEN(config_valgrind, bool) -CTL_RO_CONFIG_GEN(config_xmalloc, bool) +CTL_RO_BOOL_CONFIG_GEN(config_cache_oblivious) +CTL_RO_BOOL_CONFIG_GEN(config_debug) +CTL_RO_BOOL_CONFIG_GEN(config_fill) +CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_munmap) +CTL_RO_BOOL_CONFIG_GEN(config_prof) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) +CTL_RO_BOOL_CONFIG_GEN(config_stats) +CTL_RO_BOOL_CONFIG_GEN(config_tcache) +CTL_RO_BOOL_CONFIG_GEN(config_tls) +CTL_RO_BOOL_CONFIG_GEN(config_utrace) +CTL_RO_BOOL_CONFIG_GEN(config_valgrind) +CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) -CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) -CTL_RO_NL_GEN(opt_purge, purge_mode_names[opt_purge], const char *) +CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) -CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *) CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) @@ -1306,18 +1287,20 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) /******************************************************************************/ static int -thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; + tsd_t *tsd; arena_t *oldarena; unsigned newind, oldind; + tsd = tsd_fetch(); oldarena = arena_choose(tsd, NULL); if (oldarena == NULL) return (EAGAIN); - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(&ctl_mtx); newind = oldind = oldarena->ind; WRITE(newind, unsigned); READ(oldind, unsigned); @@ -1331,7 +1314,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, } /* Initialize arena if necessary. */ - newarena = arena_get(tsd_tsdn(tsd), newind, true); + newarena = arena_get(tsd, newind, true, true); if (newarena == NULL) { ret = EAGAIN; goto label_return; @@ -1341,15 +1324,15 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, if (config_tcache) { tcache_t *tcache = tsd_tcache_get(tsd); if (tcache != NULL) { - tcache_arena_reassociate(tsd_tsdn(tsd), tcache, - oldarena, newarena); + tcache_arena_reassociate(tcache, oldarena, + newarena); } } } ret = 0; label_return: - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1363,8 +1346,8 @@ CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp, tsd_thread_deallocatedp_get, uint64_t *) static int -thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) +thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; bool oldval; @@ -1388,8 +1371,8 @@ label_return: } static int -thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) +thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -1407,7 +1390,7 @@ label_return: } static int -thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, +thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -1418,16 +1401,20 @@ thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, READ_XOR_WRITE(); if (newp != NULL) { + tsd_t *tsd; + if (newlen != sizeof(const char *)) { ret = EINVAL; goto label_return; } + tsd = tsd_fetch(); + if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) != 0) goto label_return; } else { - const char *oldname = prof_thread_name_get(tsd); + const char *oldname = prof_thread_name_get(); READ(oldname, const char *); } @@ -1437,7 +1424,7 @@ label_return: } static int -thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, +thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -1446,13 +1433,13 @@ thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, if (!config_prof) return (ENOENT); - oldval = prof_thread_active_get(tsd); + oldval = prof_thread_active_get(); if (newp != NULL) { if (newlen != sizeof(bool)) { ret = EINVAL; goto label_return; } - if (prof_thread_active_set(tsd, *(bool *)newp)) { + if (prof_thread_active_set(*(bool *)newp)) { ret = EAGAIN; goto label_return; } @@ -1467,16 +1454,19 @@ label_return: /******************************************************************************/ static int -tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; + tsd_t *tsd; unsigned tcache_ind; if (!config_tcache) return (ENOENT); - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + tsd = tsd_fetch(); + + malloc_mutex_lock(&ctl_mtx); READONLY(); if (tcaches_create(tsd, &tcache_ind)) { ret = EFAULT; @@ -1486,20 +1476,23 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, ret = 0; label_return: - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } static int -tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; + tsd_t *tsd; unsigned tcache_ind; if (!config_tcache) return (ENOENT); + tsd = tsd_fetch(); + WRITEONLY(); tcache_ind = UINT_MAX; WRITE(tcache_ind, unsigned); @@ -1515,15 +1508,18 @@ label_return: } static int -tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, +tcache_destroy_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; + tsd_t *tsd; unsigned tcache_ind; if (!config_tcache) return (ENOENT); + tsd = tsd_fetch(); + WRITEONLY(); tcache_ind = UINT_MAX; WRITE(tcache_ind, unsigned); @@ -1540,105 +1536,48 @@ label_return: /******************************************************************************/ +/* ctl_mutex must be held during execution of this function. */ static void -arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all) +arena_purge(unsigned arena_ind) { + tsd_t *tsd; + unsigned i; + bool refreshed; + VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - malloc_mutex_lock(tsdn, &ctl_mtx); - { - unsigned narenas = ctl_stats.narenas; - - if (arena_ind == narenas) { - unsigned i; - VARIABLE_ARRAY(arena_t *, tarenas, narenas); - - for (i = 0; i < narenas; i++) - tarenas[i] = arena_get(tsdn, i, false); - - /* - * No further need to hold ctl_mtx, since narenas and - * tarenas contain everything needed below. - */ - malloc_mutex_unlock(tsdn, &ctl_mtx); - - for (i = 0; i < narenas; i++) { - if (tarenas[i] != NULL) - arena_purge(tsdn, tarenas[i], all); - } - } else { - arena_t *tarena; - - assert(arena_ind < narenas); - - tarena = arena_get(tsdn, arena_ind, false); - - /* No further need to hold ctl_mtx. */ - malloc_mutex_unlock(tsdn, &ctl_mtx); - - if (tarena != NULL) - arena_purge(tsdn, tarena, all); + tsd = tsd_fetch(); + for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { + tarenas[i] = arena_get(tsd, i, false, false); + if (tarenas[i] == NULL && !refreshed) { + tarenas[i] = arena_get(tsd, i, false, true); + refreshed = true; } } -} -static int -arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - - READONLY(); - WRITEONLY(); - arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], true); - - ret = 0; -label_return: - return (ret); -} - -static int -arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - - READONLY(); - WRITEONLY(); - arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], false); - - ret = 0; -label_return: - return (ret); -} - -static int -arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - unsigned arena_ind; - arena_t *arena; - - READONLY(); - WRITEONLY(); - - if ((config_valgrind && unlikely(in_valgrind)) || (config_fill && - unlikely(opt_quarantine))) { - ret = EFAULT; - goto label_return; - } - - arena_ind = (unsigned)mib[1]; - if (config_debug) { - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + if (arena_ind == ctl_stats.narenas) { + unsigned i; + for (i = 0; i < ctl_stats.narenas; i++) { + if (tarenas[i] != NULL) + arena_purge_all(tarenas[i]); + } + } else { assert(arena_ind < ctl_stats.narenas); - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + if (tarenas[arena_ind] != NULL) + arena_purge_all(tarenas[arena_ind]); } - assert(arena_ind >= opt_narenas); +} - arena = arena_get(tsd_tsdn(tsd), arena_ind, false); +static int +arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; - arena_reset(tsd, arena); + READONLY(); + WRITEONLY(); + malloc_mutex_lock(&ctl_mtx); + arena_purge(mib[1]); + malloc_mutex_unlock(&ctl_mtx); ret = 0; label_return: @@ -1646,16 +1585,16 @@ label_return: } static int -arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; const char *dss = NULL; - unsigned arena_ind = (unsigned)mib[1]; + unsigned arena_ind = mib[1]; dss_prec_t dss_prec_old = dss_prec_limit; dss_prec_t dss_prec = dss_prec_limit; - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(&ctl_mtx); WRITE(dss, const char *); if (dss != NULL) { int i; @@ -1676,13 +1615,13 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, } if (arena_ind < ctl_stats.narenas) { - arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false); + arena_t *arena = arena_get(tsd_fetch(), arena_ind, false, true); if (arena == NULL || (dss_prec != dss_prec_limit && - arena_dss_prec_set(tsd_tsdn(tsd), arena, dss_prec))) { + arena_dss_prec_set(arena, dss_prec))) { ret = EFAULT; goto label_return; } - dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena); + dss_prec_old = arena_dss_prec_get(arena); } else { if (dss_prec != dss_prec_limit && chunk_dss_prec_set(dss_prec)) { @@ -1697,61 +1636,26 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, ret = 0; label_return: - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } static int -arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - unsigned arena_ind = (unsigned)mib[1]; - arena_t *arena; - - arena = arena_get(tsd_tsdn(tsd), arena_ind, false); - if (arena == NULL) { - ret = EFAULT; - goto label_return; - } - - if (oldp != NULL && oldlenp != NULL) { - size_t oldval = arena_lg_dirty_mult_get(tsd_tsdn(tsd), arena); - READ(oldval, ssize_t); - } - if (newp != NULL) { - if (newlen != sizeof(ssize_t)) { - ret = EINVAL; - goto label_return; - } - if (arena_lg_dirty_mult_set(tsd_tsdn(tsd), arena, - *(ssize_t *)newp)) { - ret = EFAULT; - goto label_return; - } - } - - ret = 0; -label_return: - return (ret); -} - -static int -arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, +arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena_ind = (unsigned)mib[1]; + unsigned arena_ind = mib[1]; arena_t *arena; - arena = arena_get(tsd_tsdn(tsd), arena_ind, false); + arena = arena_get(tsd_fetch(), arena_ind, false, true); if (arena == NULL) { ret = EFAULT; goto label_return; } if (oldp != NULL && oldlenp != NULL) { - size_t oldval = arena_decay_time_get(tsd_tsdn(tsd), arena); + size_t oldval = arena_lg_dirty_mult_get(arena); READ(oldval, ssize_t); } if (newp != NULL) { @@ -1759,8 +1663,7 @@ arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, ret = EINVAL; goto label_return; } - if (arena_decay_time_set(tsd_tsdn(tsd), arena, - *(ssize_t *)newp)) { + if (arena_lg_dirty_mult_set(arena, *(ssize_t *)newp)) { ret = EFAULT; goto label_return; } @@ -1772,25 +1675,24 @@ label_return: } static int -arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) +arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena_ind = (unsigned)mib[1]; + unsigned arena_ind = mib[1]; arena_t *arena; - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(&ctl_mtx); if (arena_ind < narenas_total_get() && (arena = - arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) { + arena_get(tsd_fetch(), arena_ind, false, true)) != NULL) { if (newp != NULL) { chunk_hooks_t old_chunk_hooks, new_chunk_hooks; WRITE(new_chunk_hooks, chunk_hooks_t); - old_chunk_hooks = chunk_hooks_set(tsd_tsdn(tsd), arena, + old_chunk_hooks = chunk_hooks_set(arena, &new_chunk_hooks); READ(old_chunk_hooks, chunk_hooks_t); } else { - chunk_hooks_t old_chunk_hooks = - chunk_hooks_get(tsd_tsdn(tsd), arena); + chunk_hooks_t old_chunk_hooks = chunk_hooks_get(arena); READ(old_chunk_hooks, chunk_hooks_t); } } else { @@ -1799,16 +1701,16 @@ arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, } ret = 0; label_return: - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } static const ctl_named_node_t * -arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) +arena_i_index(const size_t *mib, size_t miblen, size_t i) { - const ctl_named_node_t *ret; + const ctl_named_node_t * ret; - malloc_mutex_lock(tsdn, &ctl_mtx); + malloc_mutex_lock(&ctl_mtx); if (i > ctl_stats.narenas) { ret = NULL; goto label_return; @@ -1816,20 +1718,20 @@ arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) ret = super_arena_i_node; label_return: - malloc_mutex_unlock(tsdn, &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } /******************************************************************************/ static int -arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, +arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned narenas; - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(&ctl_mtx); READONLY(); if (*oldlenp != sizeof(unsigned)) { ret = EINVAL; @@ -1840,23 +1742,23 @@ arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, ret = 0; label_return: - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } static int -arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, +arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned nread, i; - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(&ctl_mtx); READONLY(); if (*oldlenp != ctl_stats.narenas * sizeof(bool)) { ret = EINVAL; nread = (*oldlenp < ctl_stats.narenas * sizeof(bool)) - ? (unsigned)(*oldlenp / sizeof(bool)) : ctl_stats.narenas; + ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas; } else { ret = 0; nread = ctl_stats.narenas; @@ -1866,13 +1768,13 @@ arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized; label_return: - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } static int -arenas_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) +arenas_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -1896,32 +1798,6 @@ label_return: return (ret); } -static int -arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - - if (oldp != NULL && oldlenp != NULL) { - size_t oldval = arena_decay_time_default_get(); - READ(oldval, ssize_t); - } - if (newp != NULL) { - if (newlen != sizeof(ssize_t)) { - ret = EINVAL; - goto label_return; - } - if (arena_decay_time_default_set(*(ssize_t *)newp)) { - ret = EFAULT; - goto label_return; - } - } - - ret = 0; -label_return: - return (ret); -} - CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) @@ -1931,7 +1807,7 @@ CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) static const ctl_named_node_t * -arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) +arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { if (i > NBINS) @@ -1940,9 +1816,9 @@ arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned) -CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t) static const ctl_named_node_t * -arenas_lrun_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) +arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { if (i > nlclasses) @@ -1951,10 +1827,9 @@ arenas_lrun_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned) -CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]), - size_t) +CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+mib[2]), size_t) static const ctl_named_node_t * -arenas_hchunk_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) +arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i) { if (i > nhclasses) @@ -1963,15 +1838,15 @@ arenas_hchunk_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) } static int -arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; unsigned narenas; - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(&ctl_mtx); READONLY(); - if (ctl_grow(tsd_tsdn(tsd))) { + if (ctl_grow()) { ret = EAGAIN; goto label_return; } @@ -1980,40 +1855,14 @@ arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, ret = 0; label_return: - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } /******************************************************************************/ static int -prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - bool oldval; - - if (!config_prof) - return (ENOENT); - - if (newp != NULL) { - if (newlen != sizeof(bool)) { - ret = EINVAL; - goto label_return; - } - oldval = prof_thread_active_init_set(tsd_tsdn(tsd), - *(bool *)newp); - } else - oldval = prof_thread_active_init_get(tsd_tsdn(tsd)); - READ(oldval, bool); - - ret = 0; -label_return: - return (ret); -} - -static int -prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, +prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -2027,9 +1876,9 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, ret = EINVAL; goto label_return; } - oldval = prof_active_set(tsd_tsdn(tsd), *(bool *)newp); + oldval = prof_thread_active_init_set(*(bool *)newp); } else - oldval = prof_active_get(tsd_tsdn(tsd)); + oldval = prof_thread_active_init_get(); READ(oldval, bool); ret = 0; @@ -2038,8 +1887,33 @@ label_return: } static int -prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (!config_prof) + return (ENOENT); + + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + oldval = prof_active_set(*(bool *)newp); + } else + oldval = prof_active_get(); + READ(oldval, bool); + + ret = 0; +label_return: + return (ret); +} + +static int +prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; const char *filename = NULL; @@ -2050,7 +1924,7 @@ prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, WRITEONLY(); WRITE(filename, const char *); - if (prof_mdump(tsd, filename)) { + if (prof_mdump(filename)) { ret = EFAULT; goto label_return; } @@ -2061,8 +1935,8 @@ label_return: } static int -prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; bool oldval; @@ -2075,9 +1949,9 @@ prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, ret = EINVAL; goto label_return; } - oldval = prof_gdump_set(tsd_tsdn(tsd), *(bool *)newp); + oldval = prof_gdump_set(*(bool *)newp); } else - oldval = prof_gdump_get(tsd_tsdn(tsd)); + oldval = prof_gdump_get(); READ(oldval, bool); ret = 0; @@ -2086,11 +1960,12 @@ label_return: } static int -prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; size_t lg_sample = lg_prof_sample; + tsd_t *tsd; if (!config_prof) return (ENOENT); @@ -2100,6 +1975,8 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, if (lg_sample >= (sizeof(uint64_t) << 3)) lg_sample = (sizeof(uint64_t) << 3) - 1; + tsd = tsd_fetch(); + prof_reset(tsd, lg_sample); ret = 0; @@ -2118,20 +1995,15 @@ CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t) CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) -CTL_RO_CGEN(config_stats, stats_retained, ctl_stats.retained, size_t) CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult, ssize_t) -CTL_RO_GEN(stats_arenas_i_decay_time, ctl_stats.arenas[mib[2]].decay_time, - ssize_t) CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, ctl_stats.arenas[mib[2]].astats.mapped, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_retained, - ctl_stats.arenas[mib[2]].astats.retained, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, @@ -2188,8 +2060,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) static const ctl_named_node_t * -stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, - size_t j) +stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) { if (j > NBINS) @@ -2207,8 +2078,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) static const ctl_named_node_t * -stats_arenas_i_lruns_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, - size_t j) +stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) { if (j > nlclasses) @@ -2227,8 +2097,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks, ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t) static const ctl_named_node_t * -stats_arenas_i_hchunks_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, - size_t j) +stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j) { if (j > nhclasses) @@ -2237,11 +2106,11 @@ stats_arenas_i_hchunks_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, } static const ctl_named_node_t * -stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) +stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { const ctl_named_node_t * ret; - malloc_mutex_lock(tsdn, &ctl_mtx); + malloc_mutex_lock(&ctl_mtx); if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) { ret = NULL; goto label_return; @@ -2249,6 +2118,6 @@ stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) ret = super_stats_arenas_i_node; label_return: - malloc_mutex_unlock(tsdn, &ctl_mtx); + malloc_mutex_unlock(&ctl_mtx); return (ret); } diff --git a/deps/jemalloc/src/extent.c b/deps/jemalloc/src/extent.c index 218156c60..13f94411c 100644 --- a/deps/jemalloc/src/extent.c +++ b/deps/jemalloc/src/extent.c @@ -3,48 +3,45 @@ /******************************************************************************/ -/* - * Round down to the nearest chunk size that can actually be requested during - * normal huge allocation. - */ JEMALLOC_INLINE_C size_t extent_quantize(size_t size) { - size_t ret; - szind_t ind; - assert(size > 0); - - ind = size2index(size + 1); - if (ind == 0) { - /* Avoid underflow. */ - return (index2size(0)); - } - ret = index2size(ind - 1); - assert(ret <= size); - return (ret); + /* + * Round down to the nearest chunk size that can actually be requested + * during normal huge allocation. + */ + return (index2size(size2index(size + 1) - 1)); } JEMALLOC_INLINE_C int -extent_sz_comp(const extent_node_t *a, const extent_node_t *b) +extent_szad_comp(extent_node_t *a, extent_node_t *b) { + int ret; size_t a_qsize = extent_quantize(extent_node_size_get(a)); size_t b_qsize = extent_quantize(extent_node_size_get(b)); - return ((a_qsize > b_qsize) - (a_qsize < b_qsize)); + /* + * Compare based on quantized size rather than size, in order to sort + * equally useful extents only by address. + */ + ret = (a_qsize > b_qsize) - (a_qsize < b_qsize); + if (ret == 0) { + uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); + uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); + + ret = (a_addr > b_addr) - (a_addr < b_addr); + } + + return (ret); } -JEMALLOC_INLINE_C int -extent_sn_comp(const extent_node_t *a, const extent_node_t *b) -{ - size_t a_sn = extent_node_sn_get(a); - size_t b_sn = extent_node_sn_get(b); - - return ((a_sn > b_sn) - (a_sn < b_sn)); -} +/* Generate red-black tree functions. */ +rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link, + extent_szad_comp) JEMALLOC_INLINE_C int -extent_ad_comp(const extent_node_t *a, const extent_node_t *b) +extent_ad_comp(extent_node_t *a, extent_node_t *b) { uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); @@ -52,26 +49,5 @@ extent_ad_comp(const extent_node_t *a, const extent_node_t *b) return ((a_addr > b_addr) - (a_addr < b_addr)); } -JEMALLOC_INLINE_C int -extent_szsnad_comp(const extent_node_t *a, const extent_node_t *b) -{ - int ret; - - ret = extent_sz_comp(a, b); - if (ret != 0) - return (ret); - - ret = extent_sn_comp(a, b); - if (ret != 0) - return (ret); - - ret = extent_ad_comp(a, b); - return (ret); -} - -/* Generate red-black tree functions. */ -rb_gen(, extent_tree_szsnad_, extent_tree_t, extent_node_t, szsnad_link, - extent_szsnad_comp) - /* Generate red-black tree functions. */ rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, ad_link, extent_ad_comp) diff --git a/deps/jemalloc/src/huge.c b/deps/jemalloc/src/huge.c index 8abd8c00c..1e9a66512 100644 --- a/deps/jemalloc/src/huge.c +++ b/deps/jemalloc/src/huge.c @@ -15,21 +15,12 @@ huge_node_get(const void *ptr) } static bool -huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node) +huge_node_set(const void *ptr, extent_node_t *node) { assert(extent_node_addr_get(node) == ptr); assert(!extent_node_achunk_get(node)); - return (chunk_register(tsdn, ptr, node)); -} - -static void -huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node) -{ - bool err; - - err = huge_node_set(tsdn, ptr, node); - assert(!err); + return (chunk_register(ptr, node)); } static void @@ -40,39 +31,39 @@ huge_node_unset(const void *ptr, const extent_node_t *node) } void * -huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero) +huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache) { + size_t usize; - assert(usize == s2u(usize)); + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ + return (NULL); + } - return (huge_palloc(tsdn, arena, usize, chunksize, zero)); + return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache)); } void * -huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, - bool zero) +huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, + bool zero, tcache_t *tcache) { void *ret; - size_t ausize; - arena_t *iarena; + size_t usize; extent_node_t *node; - size_t sn; bool is_zeroed; /* Allocate one or more contiguous chunks for this request. */ - assert(!tsdn_null(tsdn) || arena != NULL); - - ausize = sa2u(usize, alignment); - if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS)) + usize = sa2u(size, alignment); + if (unlikely(usize == 0)) return (NULL); - assert(ausize >= chunksize); + assert(usize >= chunksize); /* Allocate an extent node with which to track the chunk. */ - iarena = (!tsdn_null(tsdn)) ? arena_ichoose(tsdn_tsd(tsdn), NULL) : - a0get(); - node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)), - CACHELINE, false, NULL, true, iarena); + node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), + CACHELINE, false, tcache, true, arena); if (node == NULL) return (NULL); @@ -81,35 +72,33 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - if (likely(!tsdn_null(tsdn))) - arena = arena_choose(tsdn_tsd(tsdn), arena); - if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn, - arena, usize, alignment, &sn, &is_zeroed)) == NULL) { - idalloctm(tsdn, node, NULL, true, true); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena, + size, alignment, &is_zeroed)) == NULL) { + idalloctm(tsd, node, tcache, true); return (NULL); } - extent_node_init(node, arena, ret, usize, sn, is_zeroed, true); + extent_node_init(node, arena, ret, size, is_zeroed, true); - if (huge_node_set(tsdn, ret, node)) { - arena_chunk_dalloc_huge(tsdn, arena, ret, usize, sn); - idalloctm(tsdn, node, NULL, true, true); + if (huge_node_set(ret, node)) { + arena_chunk_dalloc_huge(arena, ret, size); + idalloctm(tsd, node, tcache, true); return (NULL); } /* Insert node into huge. */ - malloc_mutex_lock(tsdn, &arena->huge_mtx); + malloc_mutex_lock(&arena->huge_mtx); ql_elm_new(node, ql_link); ql_tail_insert(&arena->huge, node, ql_link); - malloc_mutex_unlock(tsdn, &arena->huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed) - memset(ret, 0, usize); + memset(ret, 0, size); } else if (config_fill && unlikely(opt_junk_alloc)) - memset(ret, JEMALLOC_ALLOC_JUNK, usize); + memset(ret, 0xa5, size); - arena_decay_tick(tsdn, arena); return (ret); } @@ -127,7 +116,7 @@ huge_dalloc_junk(void *ptr, size_t usize) * unmapped. */ if (!config_munmap || (have_dss && chunk_in_dss(ptr))) - memset(ptr, JEMALLOC_FREE_JUNK, usize); + memset(ptr, 0x5a, usize); } } #ifdef JEMALLOC_JET @@ -137,8 +126,8 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); #endif static void -huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize, - size_t usize_min, size_t usize_max, bool zero) +huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero) { size_t usize, usize_next; extent_node_t *node; @@ -162,28 +151,24 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize, if (oldsize > usize) { size_t sdiff = oldsize - usize; if (config_fill && unlikely(opt_junk_free)) { - memset((void *)((uintptr_t)ptr + usize), - JEMALLOC_FREE_JUNK, sdiff); + memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff); post_zeroed = false; } else { - post_zeroed = !chunk_purge_wrapper(tsdn, arena, - &chunk_hooks, ptr, CHUNK_CEILING(oldsize), usize, - sdiff); + post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, + ptr, CHUNK_CEILING(oldsize), usize, sdiff); } } else post_zeroed = pre_zeroed; - malloc_mutex_lock(tsdn, &arena->huge_mtx); + malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ - huge_node_unset(ptr, node); assert(extent_node_size_get(node) != usize); extent_node_size_set(node, usize); - huge_node_reset(tsdn, ptr, node); /* Update zeroed. */ extent_node_zeroed_set(node, post_zeroed); - malloc_mutex_unlock(tsdn, &arena->huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); - arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize); + arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize); /* Fill if necessary (growing). */ if (oldsize < usize) { @@ -193,15 +178,14 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize, usize - oldsize); } } else if (config_fill && unlikely(opt_junk_alloc)) { - memset((void *)((uintptr_t)ptr + oldsize), - JEMALLOC_ALLOC_JUNK, usize - oldsize); + memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize - + oldsize); } } } static bool -huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize, - size_t usize) +huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) { extent_node_t *node; arena_t *arena; @@ -212,7 +196,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize, node = huge_node_get(ptr); arena = extent_node_arena_get(node); pre_zeroed = extent_node_zeroed_get(node); - chunk_hooks = chunk_hooks_get(tsdn, arena); + chunk_hooks = chunk_hooks_get(arena); assert(oldsize > usize); @@ -229,59 +213,53 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize, sdiff); post_zeroed = false; } else { - post_zeroed = !chunk_purge_wrapper(tsdn, arena, - &chunk_hooks, CHUNK_ADDR2BASE((uintptr_t)ptr + - usize), CHUNK_CEILING(oldsize), + post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, + CHUNK_ADDR2BASE((uintptr_t)ptr + usize), + CHUNK_CEILING(oldsize), CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff); } } else post_zeroed = pre_zeroed; - malloc_mutex_lock(tsdn, &arena->huge_mtx); + malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ - huge_node_unset(ptr, node); extent_node_size_set(node, usize); - huge_node_reset(tsdn, ptr, node); /* Update zeroed. */ extent_node_zeroed_set(node, post_zeroed); - malloc_mutex_unlock(tsdn, &arena->huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); /* Zap the excess chunks. */ - arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize, - extent_node_sn_get(node)); + arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize); return (false); } static bool -huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize, - size_t usize, bool zero) { +huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) { extent_node_t *node; arena_t *arena; bool is_zeroed_subchunk, is_zeroed_chunk; node = huge_node_get(ptr); arena = extent_node_arena_get(node); - malloc_mutex_lock(tsdn, &arena->huge_mtx); + malloc_mutex_lock(&arena->huge_mtx); is_zeroed_subchunk = extent_node_zeroed_get(node); - malloc_mutex_unlock(tsdn, &arena->huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); /* - * Use is_zeroed_chunk to detect whether the trailing memory is zeroed, - * update extent's zeroed field, and zero as necessary. + * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so + * that it is possible to make correct junk/zero fill decisions below. */ - is_zeroed_chunk = false; - if (arena_chunk_ralloc_huge_expand(tsdn, arena, ptr, oldsize, usize, + is_zeroed_chunk = zero; + + if (arena_chunk_ralloc_huge_expand(arena, ptr, oldsize, usize, &is_zeroed_chunk)) return (true); - malloc_mutex_lock(tsdn, &arena->huge_mtx); - huge_node_unset(ptr, node); + malloc_mutex_lock(&arena->huge_mtx); + /* Update the size of the huge allocation. */ extent_node_size_set(node, usize); - extent_node_zeroed_set(node, extent_node_zeroed_get(node) && - is_zeroed_chunk); - huge_node_reset(tsdn, ptr, node); - malloc_mutex_unlock(tsdn, &arena->huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed_subchunk) { @@ -294,21 +272,19 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize, CHUNK_CEILING(oldsize)); } } else if (config_fill && unlikely(opt_junk_alloc)) { - memset((void *)((uintptr_t)ptr + oldsize), JEMALLOC_ALLOC_JUNK, - usize - oldsize); + memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize - + oldsize); } return (false); } bool -huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min, +huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero) { assert(s2u(oldsize) == oldsize); - /* The following should have been caught by callers. */ - assert(usize_min > 0 && usize_max <= HUGE_MAXCLASS); /* Both allocations must be huge to avoid a move. */ if (oldsize < chunksize || usize_max < chunksize) @@ -316,18 +292,13 @@ huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min, if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) { /* Attempt to expand the allocation in-place. */ - if (!huge_ralloc_no_move_expand(tsdn, ptr, oldsize, usize_max, - zero)) { - arena_decay_tick(tsdn, huge_aalloc(ptr)); + if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, zero)) return (false); - } /* Try again, this time with usize_min. */ if (usize_min < usize_max && CHUNK_CEILING(usize_min) > - CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsdn, - ptr, oldsize, usize_min, zero)) { - arena_decay_tick(tsdn, huge_aalloc(ptr)); + CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr, + oldsize, usize_min, zero)) return (false); - } } /* @@ -336,46 +307,36 @@ huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min, */ if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) { - huge_ralloc_no_move_similar(tsdn, ptr, oldsize, usize_min, - usize_max, zero); - arena_decay_tick(tsdn, huge_aalloc(ptr)); + huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max, + zero); return (false); } /* Attempt to shrink the allocation in-place. */ - if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) { - if (!huge_ralloc_no_move_shrink(tsdn, ptr, oldsize, - usize_max)) { - arena_decay_tick(tsdn, huge_aalloc(ptr)); - return (false); - } - } + if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) + return (huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)); return (true); } static void * -huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool zero) +huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache) { if (alignment <= chunksize) - return (huge_malloc(tsdn, arena, usize, zero)); - return (huge_palloc(tsdn, arena, usize, alignment, zero)); + return (huge_malloc(tsd, arena, usize, zero, tcache)); + return (huge_palloc(tsd, arena, usize, alignment, zero, tcache)); } void * -huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, - size_t usize, size_t alignment, bool zero, tcache_t *tcache) +huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, + size_t alignment, bool zero, tcache_t *tcache) { void *ret; size_t copysize; - /* The following should have been caught by callers. */ - assert(usize > 0 && usize <= HUGE_MAXCLASS); - /* Try to avoid moving the allocation. */ - if (!huge_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, usize, - zero)) + if (!huge_ralloc_no_move(ptr, oldsize, usize, usize, zero)) return (ptr); /* @@ -383,19 +344,19 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, * different size class. In that case, fall back to allocating new * space and copying. */ - ret = huge_ralloc_move_helper(tsd_tsdn(tsd), arena, usize, alignment, - zero); + ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero, + tcache); if (ret == NULL) return (NULL); copysize = (usize < oldsize) ? usize : oldsize; memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache, true); + isqalloc(tsd, ptr, oldsize, tcache); return (ret); } void -huge_dalloc(tsdn_t *tsdn, void *ptr) +huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { extent_node_t *node; arena_t *arena; @@ -403,18 +364,15 @@ huge_dalloc(tsdn_t *tsdn, void *ptr) node = huge_node_get(ptr); arena = extent_node_arena_get(node); huge_node_unset(ptr, node); - malloc_mutex_lock(tsdn, &arena->huge_mtx); + malloc_mutex_lock(&arena->huge_mtx); ql_remove(&arena->huge, node, ql_link); - malloc_mutex_unlock(tsdn, &arena->huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); huge_dalloc_junk(extent_node_addr_get(node), extent_node_size_get(node)); - arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node), - extent_node_addr_get(node), extent_node_size_get(node), - extent_node_sn_get(node)); - idalloctm(tsdn, node, NULL, true, true); - - arena_decay_tick(tsdn, arena); + arena_chunk_dalloc_huge(extent_node_arena_get(node), + extent_node_addr_get(node), extent_node_size_get(node)); + idalloctm(tsd, node, tcache, true); } arena_t * @@ -425,7 +383,7 @@ huge_aalloc(const void *ptr) } size_t -huge_salloc(tsdn_t *tsdn, const void *ptr) +huge_salloc(const void *ptr) { size_t size; extent_node_t *node; @@ -433,15 +391,15 @@ huge_salloc(tsdn_t *tsdn, const void *ptr) node = huge_node_get(ptr); arena = extent_node_arena_get(node); - malloc_mutex_lock(tsdn, &arena->huge_mtx); + malloc_mutex_lock(&arena->huge_mtx); size = extent_node_size_get(node); - malloc_mutex_unlock(tsdn, &arena->huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); return (size); } prof_tctx_t * -huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr) +huge_prof_tctx_get(const void *ptr) { prof_tctx_t *tctx; extent_node_t *node; @@ -449,29 +407,29 @@ huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr) node = huge_node_get(ptr); arena = extent_node_arena_get(node); - malloc_mutex_lock(tsdn, &arena->huge_mtx); + malloc_mutex_lock(&arena->huge_mtx); tctx = extent_node_prof_tctx_get(node); - malloc_mutex_unlock(tsdn, &arena->huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); return (tctx); } void -huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) +huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) { extent_node_t *node; arena_t *arena; node = huge_node_get(ptr); arena = extent_node_arena_get(node); - malloc_mutex_lock(tsdn, &arena->huge_mtx); + malloc_mutex_lock(&arena->huge_mtx); extent_node_prof_tctx_set(node, tctx); - malloc_mutex_unlock(tsdn, &arena->huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); } void -huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr) +huge_prof_tctx_reset(const void *ptr) { - huge_prof_tctx_set(tsdn, ptr, (prof_tctx_t *)(uintptr_t)1U); + huge_prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); } diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c index 07389ca2f..fe77c2475 100644 --- a/deps/jemalloc/src/jemalloc.c +++ b/deps/jemalloc/src/jemalloc.c @@ -5,11 +5,7 @@ /* Data. */ /* Runtime configuration options. */ -const char *je_malloc_conf -#ifndef _WIN32 - JEMALLOC_ATTR(weak) -#endif - ; +const char *je_malloc_conf JEMALLOC_ATTR(weak); bool opt_abort = #ifdef JEMALLOC_DEBUG true @@ -44,14 +40,14 @@ bool opt_redzone = false; bool opt_utrace = false; bool opt_xmalloc = false; bool opt_zero = false; -unsigned opt_narenas = 0; +size_t opt_narenas = 0; /* Initialized to true if the process is running inside Valgrind. */ bool in_valgrind; unsigned ncpus; -/* Protects arenas initialization. */ +/* Protects arenas initialization (arenas, narenas_total). */ static malloc_mutex_t arenas_lock; /* * Arenas that are used to service external requests. Not all elements of the @@ -61,10 +57,10 @@ static malloc_mutex_t arenas_lock; * arenas. arenas[narenas_auto..narenas_total) are only used if the application * takes some action to create them and allocate from them. */ -arena_t **arenas; -static unsigned narenas_total; /* Use narenas_total_*(). */ +static arena_t **arenas; +static unsigned narenas_total; static arena_t *a0; /* arenas[0]; read-only after initialization. */ -unsigned narenas_auto; /* Read-only after initialization. */ +static unsigned narenas_auto; /* Read-only after initialization. */ typedef enum { malloc_init_uninitialized = 3, @@ -74,37 +70,9 @@ typedef enum { } malloc_init_t; static malloc_init_t malloc_init_state = malloc_init_uninitialized; -/* False should be the common case. Set to true to trigger initialization. */ -static bool malloc_slow = true; - -/* When malloc_slow is true, set the corresponding bits for sanity check. */ -enum { - flag_opt_junk_alloc = (1U), - flag_opt_junk_free = (1U << 1), - flag_opt_quarantine = (1U << 2), - flag_opt_zero = (1U << 3), - flag_opt_utrace = (1U << 4), - flag_in_valgrind = (1U << 5), - flag_opt_xmalloc = (1U << 6) -}; -static uint8_t malloc_slow_flags; - -JEMALLOC_ALIGNED(CACHELINE) -const size_t pind2sz_tab[NPSIZES] = { -#define PSZ_yes(lg_grp, ndelta, lg_delta) \ - (((ZU(1)< MALLOCX_ARENA_MAX) return (NULL); - if (ind == narenas_total_get()) - narenas_total_inc(); + if (ind == narenas_total) { + unsigned narenas_new = narenas_total + 1; + arena_t **arenas_new = + (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new * + sizeof(arena_t *))); + if (arenas_new == NULL) + return (NULL); + memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *)); + arenas_new[ind] = NULL; + /* + * Deallocate only if arenas came from a0malloc() (not + * base_alloc()). + */ + if (narenas_total != narenas_auto) + a0dalloc(arenas); + arenas = arenas_new; + narenas_total = narenas_new; + } /* * Another thread may have already initialized arenas[ind] if it's an * auto arena. */ - arena = arena_get(tsdn, ind, false); + arena = arenas[ind]; if (arena != NULL) { assert(ind < narenas_auto); return (arena); } /* Actually initialize the arena. */ - arena = arena_new(tsdn, ind); - arena_set(ind, arena); + arena = arenas[ind] = arena_new(ind); return (arena); } arena_t * -arena_init(tsdn_t *tsdn, unsigned ind) +arena_init(unsigned ind) { arena_t *arena; - malloc_mutex_lock(tsdn, &arenas_lock); - arena = arena_init_locked(tsdn, ind); - malloc_mutex_unlock(tsdn, &arenas_lock); + malloc_mutex_lock(&arenas_lock); + arena = arena_init_locked(ind); + malloc_mutex_unlock(&arenas_lock); return (arena); } +unsigned +narenas_total_get(void) +{ + unsigned narenas; + + malloc_mutex_lock(&arenas_lock); + narenas = narenas_total; + malloc_mutex_unlock(&arenas_lock); + + return (narenas); +} + static void -arena_bind(tsd_t *tsd, unsigned ind, bool internal) +arena_bind_locked(tsd_t *tsd, unsigned ind) { arena_t *arena; - if (!tsd_nominal(tsd)) - return; + arena = arenas[ind]; + arena->nthreads++; - arena = arena_get(tsd_tsdn(tsd), ind, false); - arena_nthreads_inc(arena, internal); - - if (internal) - tsd_iarena_set(tsd, arena); - else + if (tsd_nominal(tsd)) tsd_arena_set(tsd, arena); } +static void +arena_bind(tsd_t *tsd, unsigned ind) +{ + + malloc_mutex_lock(&arenas_lock); + arena_bind_locked(tsd, ind); + malloc_mutex_unlock(&arenas_lock); +} + void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) { arena_t *oldarena, *newarena; - oldarena = arena_get(tsd_tsdn(tsd), oldind, false); - newarena = arena_get(tsd_tsdn(tsd), newind, false); - arena_nthreads_dec(oldarena, false); - arena_nthreads_inc(newarena, false); + malloc_mutex_lock(&arenas_lock); + oldarena = arenas[oldind]; + newarena = arenas[newind]; + oldarena->nthreads--; + newarena->nthreads++; + malloc_mutex_unlock(&arenas_lock); tsd_arena_set(tsd, newarena); } +unsigned +arena_nbound(unsigned ind) +{ + unsigned nthreads; + + malloc_mutex_lock(&arenas_lock); + nthreads = arenas[ind]->nthreads; + malloc_mutex_unlock(&arenas_lock); + return (nthreads); +} + static void -arena_unbind(tsd_t *tsd, unsigned ind, bool internal) +arena_unbind(tsd_t *tsd, unsigned ind) { arena_t *arena; - arena = arena_get(tsd_tsdn(tsd), ind, false); - arena_nthreads_dec(arena, internal); - if (internal) - tsd_iarena_set(tsd, NULL); - else - tsd_arena_set(tsd, NULL); + malloc_mutex_lock(&arenas_lock); + arena = arenas[ind]; + arena->nthreads--; + malloc_mutex_unlock(&arenas_lock); + tsd_arena_set(tsd, NULL); } -arena_tdata_t * -arena_tdata_get_hard(tsd_t *tsd, unsigned ind) +arena_t * +arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) { - arena_tdata_t *tdata, *arenas_tdata_old; - arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); - unsigned narenas_tdata_old, i; - unsigned narenas_tdata = tsd_narenas_tdata_get(tsd); + arena_t *arena; + arena_t **arenas_cache = tsd_arenas_cache_get(tsd); + unsigned narenas_cache = tsd_narenas_cache_get(tsd); unsigned narenas_actual = narenas_total_get(); - /* - * Dissociate old tdata array (and set up for deallocation upon return) - * if it's too small. - */ - if (arenas_tdata != NULL && narenas_tdata < narenas_actual) { - arenas_tdata_old = arenas_tdata; - narenas_tdata_old = narenas_tdata; - arenas_tdata = NULL; - narenas_tdata = 0; - tsd_arenas_tdata_set(tsd, arenas_tdata); - tsd_narenas_tdata_set(tsd, narenas_tdata); - } else { - arenas_tdata_old = NULL; - narenas_tdata_old = 0; + /* Deallocate old cache if it's too small. */ + if (arenas_cache != NULL && narenas_cache < narenas_actual) { + a0dalloc(arenas_cache); + arenas_cache = NULL; + narenas_cache = 0; + tsd_arenas_cache_set(tsd, arenas_cache); + tsd_narenas_cache_set(tsd, narenas_cache); } - /* Allocate tdata array if it's missing. */ - if (arenas_tdata == NULL) { - bool *arenas_tdata_bypassp = tsd_arenas_tdata_bypassp_get(tsd); - narenas_tdata = (ind < narenas_actual) ? narenas_actual : ind+1; + /* Allocate cache if it's missing. */ + if (arenas_cache == NULL) { + bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd); + assert(ind < narenas_actual || !init_if_missing); + narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1; - if (tsd_nominal(tsd) && !*arenas_tdata_bypassp) { - *arenas_tdata_bypassp = true; - arenas_tdata = (arena_tdata_t *)a0malloc( - sizeof(arena_tdata_t) * narenas_tdata); - *arenas_tdata_bypassp = false; + if (tsd_nominal(tsd) && !*arenas_cache_bypassp) { + *arenas_cache_bypassp = true; + arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) * + narenas_cache); + *arenas_cache_bypassp = false; } - if (arenas_tdata == NULL) { - tdata = NULL; - goto label_return; + if (arenas_cache == NULL) { + /* + * This function must always tell the truth, even if + * it's slow, so don't let OOM, thread cleanup (note + * tsd_nominal check), nor recursive allocation + * avoidance (note arenas_cache_bypass check) get in the + * way. + */ + if (ind >= narenas_actual) + return (NULL); + malloc_mutex_lock(&arenas_lock); + arena = arenas[ind]; + malloc_mutex_unlock(&arenas_lock); + return (arena); } - assert(tsd_nominal(tsd) && !*arenas_tdata_bypassp); - tsd_arenas_tdata_set(tsd, arenas_tdata); - tsd_narenas_tdata_set(tsd, narenas_tdata); + assert(tsd_nominal(tsd) && !*arenas_cache_bypassp); + tsd_arenas_cache_set(tsd, arenas_cache); + tsd_narenas_cache_set(tsd, narenas_cache); } /* - * Copy to tdata array. It's possible that the actual number of arenas - * has increased since narenas_total_get() was called above, but that - * causes no correctness issues unless two threads concurrently execute - * the arenas.extend mallctl, which we trust mallctl synchronization to + * Copy to cache. It's possible that the actual number of arenas has + * increased since narenas_total_get() was called above, but that causes + * no correctness issues unless two threads concurrently execute the + * arenas.extend mallctl, which we trust mallctl synchronization to * prevent. */ - - /* Copy/initialize tickers. */ - for (i = 0; i < narenas_actual; i++) { - if (i < narenas_tdata_old) { - ticker_copy(&arenas_tdata[i].decay_ticker, - &arenas_tdata_old[i].decay_ticker); - } else { - ticker_init(&arenas_tdata[i].decay_ticker, - DECAY_NTICKS_PER_UPDATE); - } - } - if (narenas_tdata > narenas_actual) { - memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t) - * (narenas_tdata - narenas_actual)); + malloc_mutex_lock(&arenas_lock); + memcpy(arenas_cache, arenas, sizeof(arena_t *) * narenas_actual); + malloc_mutex_unlock(&arenas_lock); + if (narenas_cache > narenas_actual) { + memset(&arenas_cache[narenas_actual], 0, sizeof(arena_t *) * + (narenas_cache - narenas_actual)); } - /* Read the refreshed tdata array. */ - tdata = &arenas_tdata[ind]; -label_return: - if (arenas_tdata_old != NULL) - a0dalloc(arenas_tdata_old); - return (tdata); + /* Read the refreshed cache, and init the arena if necessary. */ + arena = arenas_cache[ind]; + if (init_if_missing && arena == NULL) + arena = arenas_cache[ind] = arena_init(ind); + return (arena); } /* Slow path, called only by arena_choose(). */ arena_t * -arena_choose_hard(tsd_t *tsd, bool internal) +arena_choose_hard(tsd_t *tsd) { - arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL); + arena_t *ret; if (narenas_auto > 1) { - unsigned i, j, choose[2], first_null; - - /* - * Determine binding for both non-internal and internal - * allocation. - * - * choose[0]: For application allocation. - * choose[1]: For internal metadata allocation. - */ - - for (j = 0; j < 2; j++) - choose[j] = 0; + unsigned i, choose, first_null; + choose = 0; first_null = narenas_auto; - malloc_mutex_lock(tsd_tsdn(tsd), &arenas_lock); - assert(arena_get(tsd_tsdn(tsd), 0, false) != NULL); + malloc_mutex_lock(&arenas_lock); + assert(a0get() != NULL); for (i = 1; i < narenas_auto; i++) { - if (arena_get(tsd_tsdn(tsd), i, false) != NULL) { + if (arenas[i] != NULL) { /* * Choose the first arena that has the lowest * number of threads assigned to it. */ - for (j = 0; j < 2; j++) { - if (arena_nthreads_get(arena_get( - tsd_tsdn(tsd), i, false), !!j) < - arena_nthreads_get(arena_get( - tsd_tsdn(tsd), choose[j], false), - !!j)) - choose[j] = i; - } + if (arenas[i]->nthreads < + arenas[choose]->nthreads) + choose = i; } else if (first_null == narenas_auto) { /* * Record the index of the first uninitialized @@ -641,40 +605,27 @@ arena_choose_hard(tsd_t *tsd, bool internal) } } - for (j = 0; j < 2; j++) { - if (arena_nthreads_get(arena_get(tsd_tsdn(tsd), - choose[j], false), !!j) == 0 || first_null == - narenas_auto) { - /* - * Use an unloaded arena, or the least loaded - * arena if all arenas are already initialized. - */ - if (!!j == internal) { - ret = arena_get(tsd_tsdn(tsd), - choose[j], false); - } - } else { - arena_t *arena; - - /* Initialize a new arena. */ - choose[j] = first_null; - arena = arena_init_locked(tsd_tsdn(tsd), - choose[j]); - if (arena == NULL) { - malloc_mutex_unlock(tsd_tsdn(tsd), - &arenas_lock); - return (NULL); - } - if (!!j == internal) - ret = arena; + if (arenas[choose]->nthreads == 0 + || first_null == narenas_auto) { + /* + * Use an unloaded arena, or the least loaded arena if + * all arenas are already initialized. + */ + ret = arenas[choose]; + } else { + /* Initialize a new arena. */ + choose = first_null; + ret = arena_init_locked(choose); + if (ret == NULL) { + malloc_mutex_unlock(&arenas_lock); + return (NULL); } - arena_bind(tsd, choose[j], !!j); } - malloc_mutex_unlock(tsd_tsdn(tsd), &arenas_lock); + arena_bind_locked(tsd, choose); + malloc_mutex_unlock(&arenas_lock); } else { - ret = arena_get(tsd_tsdn(tsd), 0, false); - arena_bind(tsd, 0, false); - arena_bind(tsd, 0, true); + ret = a0get(); + arena_bind(tsd, 0); } return (ret); @@ -694,16 +645,6 @@ thread_deallocated_cleanup(tsd_t *tsd) /* Do nothing. */ } -void -iarena_cleanup(tsd_t *tsd) -{ - arena_t *iarena; - - iarena = tsd_iarena_get(tsd); - if (iarena != NULL) - arena_unbind(tsd, iarena->ind, true); -} - void arena_cleanup(tsd_t *tsd) { @@ -711,33 +652,30 @@ arena_cleanup(tsd_t *tsd) arena = tsd_arena_get(tsd); if (arena != NULL) - arena_unbind(tsd, arena->ind, false); + arena_unbind(tsd, arena->ind); } void -arenas_tdata_cleanup(tsd_t *tsd) +arenas_cache_cleanup(tsd_t *tsd) { - arena_tdata_t *arenas_tdata; + arena_t **arenas_cache; - /* Prevent tsd->arenas_tdata from being (re)created. */ - *tsd_arenas_tdata_bypassp_get(tsd) = true; - - arenas_tdata = tsd_arenas_tdata_get(tsd); - if (arenas_tdata != NULL) { - tsd_arenas_tdata_set(tsd, NULL); - a0dalloc(arenas_tdata); + arenas_cache = tsd_arenas_cache_get(tsd); + if (arenas_cache != NULL) { + tsd_arenas_cache_set(tsd, NULL); + a0dalloc(arenas_cache); } } void -narenas_tdata_cleanup(tsd_t *tsd) +narenas_cache_cleanup(tsd_t *tsd) { /* Do nothing. */ } void -arenas_tdata_bypass_cleanup(tsd_t *tsd) +arenas_cache_bypass_cleanup(tsd_t *tsd) { /* Do nothing. */ @@ -748,11 +686,8 @@ stats_print_atexit(void) { if (config_tcache && config_stats) { - tsdn_t *tsdn; unsigned narenas, i; - tsdn = tsdn_fetch(); - /* * Merge stats from extant threads. This is racy, since * individual threads do not lock when recording tcache stats @@ -761,7 +696,7 @@ stats_print_atexit(void) * continue to allocate. */ for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { - arena_t *arena = arena_get(tsdn, i, false); + arena_t *arena = arenas[i]; if (arena != NULL) { tcache_t *tcache; @@ -771,11 +706,11 @@ stats_print_atexit(void) * and bin locks in the opposite order, * deadlocks may result. */ - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); ql_foreach(tcache, &arena->tcache_ql, link) { - tcache_stats_merge(tsdn, tcache, arena); + tcache_stats_merge(tcache, arena); } - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); } } } @@ -812,20 +747,6 @@ malloc_ncpus(void) SYSTEM_INFO si; GetSystemInfo(&si); result = si.dwNumberOfProcessors; -#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT) - /* - * glibc >= 2.6 has the CPU_COUNT macro. - * - * glibc's sysconf() uses isspace(). glibc allocates for the first time - * *before* setting up the isspace tables. Therefore we need a - * different method to get the number of CPUs. - */ - { - cpu_set_t set; - - pthread_getaffinity_np(pthread_self(), sizeof(set), &set); - result = CPU_COUNT(&set); - } #else result = sysconf(_SC_NPROCESSORS_ONLN); #endif @@ -917,26 +838,6 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, (int)vlen, v); } -static void -malloc_slow_flag_init(void) -{ - /* - * Combine the runtime options into malloc_slow for fast path. Called - * after processing all the options. - */ - malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0) - | (opt_junk_free ? flag_opt_junk_free : 0) - | (opt_quarantine ? flag_opt_quarantine : 0) - | (opt_zero ? flag_opt_zero : 0) - | (opt_utrace ? flag_opt_utrace : 0) - | (opt_xmalloc ? flag_opt_xmalloc : 0); - - if (config_valgrind) - malloc_slow_flags |= (in_valgrind ? flag_in_valgrind : 0); - - malloc_slow = (malloc_slow_flags != 0); -} - static void malloc_conf_init(void) { @@ -963,13 +864,10 @@ malloc_conf_init(void) opt_tcache = false; } - for (i = 0; i < 4; i++) { + for (i = 0; i < 3; i++) { /* Get runtime configuration. */ switch (i) { case 0: - opts = config_malloc_conf; - break; - case 1: if (je_malloc_conf != NULL) { /* * Use options that were compiled into the @@ -982,8 +880,8 @@ malloc_conf_init(void) opts = buf; } break; - case 2: { - ssize_t linklen = 0; + case 1: { + int linklen = 0; #ifndef _WIN32 int saved_errno = errno; const char *linkname = @@ -1009,7 +907,7 @@ malloc_conf_init(void) buf[linklen] = '\0'; opts = buf; break; - } case 3: { + } case 2: { const char *envname = #ifdef JEMALLOC_PREFIX JEMALLOC_CPREFIX"MALLOC_CONF" @@ -1056,11 +954,7 @@ malloc_conf_init(void) if (cont) \ continue; \ } -#define CONF_MIN_no(um, min) false -#define CONF_MIN_yes(um, min) ((um) < (min)) -#define CONF_MAX_no(um, max) false -#define CONF_MAX_yes(um, max) ((um) > (max)) -#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip) \ +#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ if (CONF_MATCH(n)) { \ uintmax_t um; \ char *end; \ @@ -1073,35 +967,24 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } else if (clip) { \ - if (CONF_MIN_##check_min(um, \ - (min))) \ - o = (t)(min); \ - else if (CONF_MAX_##check_max( \ - um, (max))) \ - o = (t)(max); \ + if ((min) != 0 && um < (min)) \ + o = (min); \ + else if (um > (max)) \ + o = (max); \ else \ - o = (t)um; \ + o = um; \ } else { \ - if (CONF_MIN_##check_min(um, \ - (min)) || \ - CONF_MAX_##check_max(um, \ - (max))) { \ + if (((min) != 0 && um < (min)) \ + || um > (max)) { \ malloc_conf_error( \ "Out-of-range " \ "conf value", \ k, klen, v, vlen); \ } else \ - o = (t)um; \ + o = um; \ } \ continue; \ } -#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, \ - clip) \ - CONF_HANDLE_T_U(unsigned, o, n, min, max, \ - check_min, check_max, clip) -#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip) \ - CONF_HANDLE_T_U(size_t, o, n, min, max, \ - check_min, check_max, clip) #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ if (CONF_MATCH(n)) { \ long l; \ @@ -1144,7 +1027,7 @@ malloc_conf_init(void) */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1), - (sizeof(size_t) << 3) - 1, yes, yes, true) + (sizeof(size_t) << 3) - 1, true) if (strncmp("dss", k, klen) == 0) { int i; bool match = false; @@ -1169,47 +1052,17 @@ malloc_conf_init(void) } continue; } - CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1, - UINT_MAX, yes, no, false) - if (strncmp("purge", k, klen) == 0) { - int i; - bool match = false; - for (i = 0; i < purge_mode_limit; i++) { - if (strncmp(purge_mode_names[i], v, - vlen) == 0) { - opt_purge = (purge_mode_t)i; - match = true; - break; - } - } - if (!match) { - malloc_conf_error("Invalid conf value", - k, klen, v, vlen); - } - continue; - } + CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, + SIZE_T_MAX, false) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1, - NSTIME_SEC_MAX); CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true) if (config_fill) { if (CONF_MATCH("junk")) { if (CONF_MATCH_VALUE("true")) { - if (config_valgrind && - unlikely(in_valgrind)) { - malloc_conf_error( - "Deallocation-time " - "junk filling cannot " - "be enabled while " - "running inside " - "Valgrind", k, klen, v, - vlen); - } else { - opt_junk = "true"; - opt_junk_alloc = true; - opt_junk_free = true; - } + opt_junk = "true"; + opt_junk_alloc = opt_junk_free = + true; } else if (CONF_MATCH_VALUE("false")) { opt_junk = "false"; opt_junk_alloc = opt_junk_free = @@ -1219,20 +1072,9 @@ malloc_conf_init(void) opt_junk_alloc = true; opt_junk_free = false; } else if (CONF_MATCH_VALUE("free")) { - if (config_valgrind && - unlikely(in_valgrind)) { - malloc_conf_error( - "Deallocation-time " - "junk filling cannot " - "be enabled while " - "running inside " - "Valgrind", k, klen, v, - vlen); - } else { - opt_junk = "free"; - opt_junk_alloc = false; - opt_junk_free = true; - } + opt_junk = "free"; + opt_junk_alloc = false; + opt_junk_free = true; } else { malloc_conf_error( "Invalid conf value", k, @@ -1241,7 +1083,7 @@ malloc_conf_init(void) continue; } CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", - 0, SIZE_T_MAX, no, no, false) + 0, SIZE_T_MAX, false) CONF_HANDLE_BOOL(opt_redzone, "redzone", true) CONF_HANDLE_BOOL(opt_zero, "zero", true) } @@ -1278,8 +1120,8 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_prof_thread_active_init, "prof_thread_active_init", true) CONF_HANDLE_SIZE_T(opt_lg_prof_sample, - "lg_prof_sample", 0, (sizeof(uint64_t) << 3) - - 1, no, yes, true) + "lg_prof_sample", 0, + (sizeof(uint64_t) << 3) - 1, true) CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum", true) CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, @@ -1295,14 +1137,7 @@ malloc_conf_init(void) malloc_conf_error("Invalid conf pair", k, klen, v, vlen); #undef CONF_MATCH -#undef CONF_MATCH_VALUE #undef CONF_HANDLE_BOOL -#undef CONF_MIN_no -#undef CONF_MIN_yes -#undef CONF_MAX_no -#undef CONF_MAX_yes -#undef CONF_HANDLE_T_U -#undef CONF_HANDLE_UNSIGNED #undef CONF_HANDLE_SIZE_T #undef CONF_HANDLE_SSIZE_T #undef CONF_HANDLE_CHAR_P @@ -1310,6 +1145,7 @@ malloc_conf_init(void) } } +/* init_lock must be held. */ static bool malloc_init_hard_needed(void) { @@ -1325,14 +1161,11 @@ malloc_init_hard_needed(void) } #ifdef JEMALLOC_THREADED_INIT if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) { - spin_t spinner; - /* Busy-wait until the initializing thread completes. */ - spin_init(&spinner); do { - malloc_mutex_unlock(TSDN_NULL, &init_lock); - spin_adaptive(&spinner); - malloc_mutex_lock(TSDN_NULL, &init_lock); + malloc_mutex_unlock(&init_lock); + CPU_SPINWAIT; + malloc_mutex_lock(&init_lock); } while (!malloc_initialized()); return (false); } @@ -1340,8 +1173,9 @@ malloc_init_hard_needed(void) return (true); } +/* init_lock must be held. */ static bool -malloc_init_hard_a0_locked() +malloc_init_hard_a0_locked(void) { malloc_initializer = INITIALIZER; @@ -1357,7 +1191,6 @@ malloc_init_hard_a0_locked() abort(); } } - pages_boot(); if (base_boot()) return (true); if (chunk_boot()) @@ -1366,28 +1199,26 @@ malloc_init_hard_a0_locked() return (true); if (config_prof) prof_boot1(); - arena_boot(); - if (config_tcache && tcache_boot(TSDN_NULL)) + if (arena_boot()) return (true); - if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS)) + if (config_tcache && tcache_boot()) + return (true); + if (malloc_mutex_init(&arenas_lock)) return (true); /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ - narenas_auto = 1; - narenas_total_set(narenas_auto); + narenas_total = narenas_auto = 1; arenas = &a0; memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* * Initialize one arena here. The rest are lazily created in * arena_choose_hard(). */ - if (arena_init(TSDN_NULL, 0) == NULL) + if (arena_init(0) == NULL) return (true); - malloc_init_state = malloc_init_a0_initialized; - return (false); } @@ -1396,42 +1227,45 @@ malloc_init_hard_a0(void) { bool ret; - malloc_mutex_lock(TSDN_NULL, &init_lock); + malloc_mutex_lock(&init_lock); ret = malloc_init_hard_a0_locked(); - malloc_mutex_unlock(TSDN_NULL, &init_lock); + malloc_mutex_unlock(&init_lock); return (ret); } -/* Initialize data structures which may trigger recursive allocation. */ -static bool +/* + * Initialize data structures which may trigger recursive allocation. + * + * init_lock must be held. + */ +static void malloc_init_hard_recursible(void) { malloc_init_state = malloc_init_recursible; + malloc_mutex_unlock(&init_lock); ncpus = malloc_ncpus(); -#if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \ - && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \ - !defined(__native_client__)) - /* LinuxThreads' pthread_atfork() allocates. */ +#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ + && !defined(_WIN32) && !defined(__native_client__)) + /* LinuxThreads's pthread_atfork() allocates. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { malloc_write(": Error in pthread_atfork()\n"); if (opt_abort) abort(); - return (true); } #endif - - return (false); + malloc_mutex_lock(&init_lock); } +/* init_lock must be held. */ static bool -malloc_init_hard_finish(tsdn_t *tsdn) +malloc_init_hard_finish(void) { - if (malloc_mutex_boot()) + if (mutex_boot()) return (true); if (opt_narenas == 0) { @@ -1446,69 +1280,68 @@ malloc_init_hard_finish(tsdn_t *tsdn) } narenas_auto = opt_narenas; /* - * Limit the number of arenas to the indexing range of MALLOCX_ARENA(). + * Make sure that the arenas array can be allocated. In practice, this + * limit is enough to allow the allocator to function, but the ctl + * machinery will fail to allocate memory at far lower limits. */ - if (narenas_auto > MALLOCX_ARENA_MAX) { - narenas_auto = MALLOCX_ARENA_MAX; + if (narenas_auto > chunksize / sizeof(arena_t *)) { + narenas_auto = chunksize / sizeof(arena_t *); malloc_printf(": Reducing narenas to limit (%d)\n", narenas_auto); } - narenas_total_set(narenas_auto); + narenas_total = narenas_auto; /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(tsdn, sizeof(arena_t *) * - (MALLOCX_ARENA_MAX+1)); + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); if (arenas == NULL) return (true); + /* + * Zero the array. In practice, this should always be pre-zeroed, + * since it was just mmap()ed, but let's be sure. + */ + memset(arenas, 0, sizeof(arena_t *) * narenas_total); /* Copy the pointer to the one arena that was already initialized. */ - arena_set(0, a0); + arenas[0] = a0; malloc_init_state = malloc_init_initialized; - malloc_slow_flag_init(); - return (false); } static bool malloc_init_hard(void) { - tsd_t *tsd; #if defined(_WIN32) && _WIN32_WINNT < 0x0600 _init_init_lock(); #endif - malloc_mutex_lock(TSDN_NULL, &init_lock); + malloc_mutex_lock(&init_lock); if (!malloc_init_hard_needed()) { - malloc_mutex_unlock(TSDN_NULL, &init_lock); + malloc_mutex_unlock(&init_lock); return (false); } if (malloc_init_state != malloc_init_a0_initialized && malloc_init_hard_a0_locked()) { - malloc_mutex_unlock(TSDN_NULL, &init_lock); + malloc_mutex_unlock(&init_lock); + return (true); + } + if (malloc_tsd_boot0()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (config_prof && prof_boot2()) { + malloc_mutex_unlock(&init_lock); return (true); } - malloc_mutex_unlock(TSDN_NULL, &init_lock); - /* Recursive allocation relies on functional tsd. */ - tsd = malloc_tsd_boot0(); - if (tsd == NULL) - return (true); - if (malloc_init_hard_recursible()) - return (true); - malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); + malloc_init_hard_recursible(); - if (config_prof && prof_boot2(tsd)) { - malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); + if (malloc_init_hard_finish()) { + malloc_mutex_unlock(&init_lock); return (true); } - if (malloc_init_hard_finish(tsd_tsdn(tsd))) { - malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); - return (true); - } - - malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); + malloc_mutex_unlock(&init_lock); malloc_tsd_boot1(); return (false); } @@ -1522,104 +1355,61 @@ malloc_init_hard(void) */ static void * -ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero, - prof_tctx_t *tctx, bool slow_path) +imalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - szind_t ind_large = size2index(LARGE_MINCLASS); - p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path); + p = imalloc(tsd, LARGE_MINCLASS); if (p == NULL) return (NULL); - arena_prof_promoted(tsd_tsdn(tsd), p, usize); + arena_prof_promoted(p, usize); } else - p = ialloc(tsd, usize, ind, zero, slow_path); + p = imalloc(tsd, usize); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path) +imalloc_prof(tsd_t *tsd, size_t usize) { void *p; prof_tctx_t *tctx; tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = ialloc_prof_sample(tsd, usize, ind, zero, tctx, slow_path); + p = imalloc_prof_sample(tsd, usize, tctx); else - p = ialloc(tsd, usize, ind, zero, slow_path); + p = imalloc(tsd, usize); if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); } - prof_malloc(tsd_tsdn(tsd), p, usize, tctx); + prof_malloc(p, usize, tctx); return (p); } -/* - * ialloc_body() is inlined so that fast and slow paths are generated separately - * with statically known slow_path. - * - * This function guarantees that *tsdn is non-NULL on success. - */ JEMALLOC_ALWAYS_INLINE_C void * -ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize, - bool slow_path) -{ - tsd_t *tsd; - szind_t ind; - - if (slow_path && unlikely(malloc_init())) { - *tsdn = NULL; - return (NULL); - } - - tsd = tsd_fetch(); - *tsdn = tsd_tsdn(tsd); - witness_assert_lockless(tsd_tsdn(tsd)); - - ind = size2index(size); - if (unlikely(ind >= NSIZES)) - return (NULL); - - if (config_stats || (config_prof && opt_prof) || (slow_path && - config_valgrind && unlikely(in_valgrind))) { - *usize = index2size(ind); - assert(*usize > 0 && *usize <= HUGE_MAXCLASS); - } - - if (config_prof && opt_prof) - return (ialloc_prof(tsd, *usize, ind, zero, slow_path)); - - return (ialloc(tsd, size, ind, zero, slow_path)); -} - -JEMALLOC_ALWAYS_INLINE_C void -ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func, - bool update_errno, bool slow_path) +imalloc_body(size_t size, tsd_t **tsd, size_t *usize) { - assert(!tsdn_null(tsdn) || ret == NULL); + if (unlikely(malloc_init())) + return (NULL); + *tsd = tsd_fetch(); - if (unlikely(ret == NULL)) { - if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) { - malloc_printf(": Error in %s(): out of " - "memory\n", func); - abort(); - } - if (update_errno) - set_errno(ENOMEM); + if (config_prof && opt_prof) { + *usize = s2u(size); + if (unlikely(*usize == 0)) + return (NULL); + return (imalloc_prof(*tsd, *usize)); } - if (config_stats && likely(ret != NULL)) { - assert(usize == isalloc(tsdn, ret, config_prof)); - *tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize; - } - witness_assert_lockless(tsdn); + + if (config_stats || (config_valgrind && unlikely(in_valgrind))) + *usize = s2u(size); + return (imalloc(*tsd, size)); } JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN @@ -1628,22 +1418,27 @@ JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_malloc(size_t size) { void *ret; - tsdn_t *tsdn; + tsd_t *tsd; size_t usize JEMALLOC_CC_SILENCE_INIT(0); if (size == 0) size = 1; - if (likely(!malloc_slow)) { - ret = ialloc_body(size, false, &tsdn, &usize, false); - ialloc_post_check(ret, tsdn, usize, "malloc", true, false); - } else { - ret = ialloc_body(size, false, &tsdn, &usize, true); - ialloc_post_check(ret, tsdn, usize, "malloc", true, true); - UTRACE(0, size, ret); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false); + ret = imalloc_body(size, &tsd, &usize); + if (unlikely(ret == NULL)) { + if (config_xmalloc && unlikely(opt_xmalloc)) { + malloc_write(": Error in malloc(): " + "out of memory\n"); + abort(); + } + set_errno(ENOMEM); } - + if (config_stats && likely(ret != NULL)) { + assert(usize == isalloc(ret, config_prof)); + *tsd_thread_allocatedp_get(tsd) += usize; + } + UTRACE(0, size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); return (ret); } @@ -1660,7 +1455,7 @@ imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize, p = ipalloc(tsd, LARGE_MINCLASS, alignment, false); if (p == NULL) return (NULL); - arena_prof_promoted(tsd_tsdn(tsd), p, usize); + arena_prof_promoted(p, usize); } else p = ipalloc(tsd, usize, alignment, false); @@ -1682,7 +1477,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize) prof_alloc_rollback(tsd, tctx, true); return (NULL); } - prof_malloc(tsd_tsdn(tsd), p, usize, tctx); + prof_malloc(p, usize, tctx); return (p); } @@ -1699,12 +1494,10 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) assert(min_alignment != 0); if (unlikely(malloc_init())) { - tsd = NULL; result = NULL; goto label_oom; } tsd = tsd_fetch(); - witness_assert_lockless(tsd_tsdn(tsd)); if (size == 0) size = 1; @@ -1722,7 +1515,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) } usize = sa2u(size, alignment); - if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { + if (unlikely(usize == 0)) { result = NULL; goto label_oom; } @@ -1739,13 +1532,10 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) ret = 0; label_return: if (config_stats && likely(result != NULL)) { - assert(usize == isalloc(tsd_tsdn(tsd), result, config_prof)); + assert(usize == isalloc(result, config_prof)); *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, size, result); - JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd_tsdn(tsd), result, usize, - false); - witness_assert_lockless(tsd_tsdn(tsd)); return (ret); label_oom: assert(result == NULL); @@ -1755,7 +1545,6 @@ label_oom: abort(); } ret = ENOMEM; - witness_assert_lockless(tsd_tsdn(tsd)); goto label_return; } @@ -1763,10 +1552,9 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW JEMALLOC_ATTR(nonnull(1)) je_posix_memalign(void **memptr, size_t alignment, size_t size) { - int ret; - - ret = imemalign(memptr, alignment, size, sizeof(void *)); - + int ret = imemalign(memptr, alignment, size, sizeof(void *)); + JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr, + config_prof), false); return (ret); } @@ -1782,45 +1570,114 @@ je_aligned_alloc(size_t alignment, size_t size) ret = NULL; set_errno(err); } - + JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof), + false); return (ret); } +static void * +icalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) +{ + void *p; + + if (tctx == NULL) + return (NULL); + if (usize <= SMALL_MAXCLASS) { + p = icalloc(tsd, LARGE_MINCLASS); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = icalloc(tsd, usize); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +icalloc_prof(tsd_t *tsd, size_t usize) +{ + void *p; + prof_tctx_t *tctx; + + tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) + p = icalloc_prof_sample(tsd, usize, tctx); + else + p = icalloc(tsd, usize); + if (unlikely(p == NULL)) { + prof_alloc_rollback(tsd, tctx, true); + return (NULL); + } + prof_malloc(p, usize, tctx); + + return (p); +} + JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2) je_calloc(size_t num, size_t size) { void *ret; - tsdn_t *tsdn; + tsd_t *tsd; size_t num_size; size_t usize JEMALLOC_CC_SILENCE_INIT(0); + if (unlikely(malloc_init())) { + num_size = 0; + ret = NULL; + goto label_return; + } + tsd = tsd_fetch(); + num_size = num * size; if (unlikely(num_size == 0)) { if (num == 0 || size == 0) num_size = 1; - else - num_size = HUGE_MAXCLASS + 1; /* Trigger OOM. */ + else { + ret = NULL; + goto label_return; + } /* * Try to avoid division here. We know that it isn't possible to * overflow during multiplication if neither operand uses any of the * most significant half of the bits in a size_t. */ } else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) << - 2))) && (num_size / size != num))) - num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */ - - if (likely(!malloc_slow)) { - ret = ialloc_body(num_size, true, &tsdn, &usize, false); - ialloc_post_check(ret, tsdn, usize, "calloc", true, false); - } else { - ret = ialloc_body(num_size, true, &tsdn, &usize, true); - ialloc_post_check(ret, tsdn, usize, "calloc", true, true); - UTRACE(0, num_size, ret); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, true); + 2))) && (num_size / size != num))) { + /* size_t overflow. */ + ret = NULL; + goto label_return; } + if (config_prof && opt_prof) { + usize = s2u(num_size); + if (unlikely(usize == 0)) { + ret = NULL; + goto label_return; + } + ret = icalloc_prof(tsd, usize); + } else { + if (config_stats || (config_valgrind && unlikely(in_valgrind))) + usize = s2u(num_size); + ret = icalloc(tsd, num_size); + } + +label_return: + if (unlikely(ret == NULL)) { + if (config_xmalloc && unlikely(opt_xmalloc)) { + malloc_write(": Error in calloc(): out of " + "memory\n"); + abort(); + } + set_errno(ENOMEM); + } + if (config_stats && likely(ret != NULL)) { + assert(usize == isalloc(ret, config_prof)); + *tsd_thread_allocatedp_get(tsd) += usize; + } + UTRACE(0, num_size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true); return (ret); } @@ -1836,7 +1693,7 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize, p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false); if (p == NULL) return (NULL); - arena_prof_promoted(tsd_tsdn(tsd), p, usize); + arena_prof_promoted(p, usize); } else p = iralloc(tsd, old_ptr, old_usize, usize, 0, false); @@ -1851,7 +1708,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) prof_tctx_t *old_tctx, *tctx; prof_active = prof_active_get_unlocked(); - old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr); + old_tctx = prof_tctx_get(old_ptr); tctx = prof_alloc_prep(tsd, usize, prof_active, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx); @@ -1868,41 +1725,32 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) } JEMALLOC_INLINE_C void -ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) +ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) { size_t usize; UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); - witness_assert_lockless(tsd_tsdn(tsd)); - assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); if (config_prof && opt_prof) { - usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); + usize = isalloc(ptr, config_prof); prof_free(tsd, ptr, usize); } else if (config_stats || config_valgrind) - usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); + usize = isalloc(ptr, config_prof); if (config_stats) *tsd_thread_deallocatedp_get(tsd) += usize; - - if (likely(!slow_path)) - iqalloc(tsd, ptr, tcache, false); - else { - if (config_valgrind && unlikely(in_valgrind)) - rzsize = p2rz(tsd_tsdn(tsd), ptr); - iqalloc(tsd, ptr, tcache, true); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); - } + if (config_valgrind && unlikely(in_valgrind)) + rzsize = p2rz(ptr); + iqalloc(tsd, ptr, tcache); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); } JEMALLOC_INLINE_C void -isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) +isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache) { UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); - witness_assert_lockless(tsd_tsdn(tsd)); - assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); @@ -1911,8 +1759,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) if (config_stats) *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) - rzsize = p2rz(tsd_tsdn(tsd), ptr); - isqalloc(tsd, ptr, usize, tcache, slow_path); + rzsize = p2rz(ptr); + isqalloc(tsd, ptr, usize, tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } @@ -1922,57 +1770,44 @@ JEMALLOC_ALLOC_SIZE(2) je_realloc(void *ptr, size_t size) { void *ret; - tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL); + tsd_t *tsd JEMALLOC_CC_SILENCE_INIT(NULL); size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_usize = 0; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); if (unlikely(size == 0)) { if (ptr != NULL) { - tsd_t *tsd; - /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); tsd = tsd_fetch(); - ifree(tsd, ptr, tcache_get(tsd, false), true); + ifree(tsd, ptr, tcache_get(tsd, false)); return (NULL); } size = 1; } if (likely(ptr != NULL)) { - tsd_t *tsd; - assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); tsd = tsd_fetch(); - witness_assert_lockless(tsd_tsdn(tsd)); - - old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); - if (config_valgrind && unlikely(in_valgrind)) { - old_rzsize = config_prof ? p2rz(tsd_tsdn(tsd), ptr) : - u2rz(old_usize); - } + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && unlikely(in_valgrind)) + old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); if (config_prof && opt_prof) { usize = s2u(size); - ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ? - NULL : irealloc_prof(tsd, ptr, old_usize, usize); + ret = unlikely(usize == 0) ? NULL : irealloc_prof(tsd, + ptr, old_usize, usize); } else { if (config_stats || (config_valgrind && unlikely(in_valgrind))) usize = s2u(size); ret = iralloc(tsd, ptr, old_usize, size, 0, false); } - tsdn = tsd_tsdn(tsd); } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - if (likely(!malloc_slow)) - ret = ialloc_body(size, false, &tsdn, &usize, false); - else - ret = ialloc_body(size, false, &tsdn, &usize, true); - assert(!tsdn_null(tsdn) || ret == NULL); + ret = imalloc_body(size, &tsd, &usize); } if (unlikely(ret == NULL)) { @@ -1984,17 +1819,13 @@ je_realloc(void *ptr, size_t size) set_errno(ENOMEM); } if (config_stats && likely(ret != NULL)) { - tsd_t *tsd; - - assert(usize == isalloc(tsdn, ret, config_prof)); - tsd = tsdn_tsd(tsdn); + assert(usize == isalloc(ret, config_prof)); *tsd_thread_allocatedp_get(tsd) += usize; *tsd_thread_deallocatedp_get(tsd) += old_usize; } UTRACE(ptr, size, ret); - JEMALLOC_VALGRIND_REALLOC(maybe, tsdn, ret, usize, maybe, ptr, - old_usize, old_rzsize, maybe, false); - witness_assert_lockless(tsdn); + JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize, + old_rzsize, true, false); return (ret); } @@ -2005,12 +1836,7 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) { tsd_t *tsd = tsd_fetch(); - witness_assert_lockless(tsd_tsdn(tsd)); - if (likely(!malloc_slow)) - ifree(tsd, ptr, tcache_get(tsd, false), false); - else - ifree(tsd, ptr, tcache_get(tsd, false), true); - witness_assert_lockless(tsd_tsdn(tsd)); + ifree(tsd, ptr, tcache_get(tsd, false)); } } @@ -2031,6 +1857,7 @@ je_memalign(size_t alignment, size_t size) void *ret JEMALLOC_CC_SILENCE_INIT(NULL); if (unlikely(imemalign(&ret, alignment, size, 1) != 0)) ret = NULL; + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } #endif @@ -2044,6 +1871,7 @@ je_valloc(size_t size) void *ret JEMALLOC_CC_SILENCE_INIT(NULL); if (unlikely(imemalign(&ret, PAGE, size, 1) != 0)) ret = NULL; + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } #endif @@ -2073,29 +1901,6 @@ JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc; JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = je_memalign; # endif - -#ifdef CPU_COUNT -/* - * To enable static linking with glibc, the libc specific malloc interface must - * be implemented also, so none of glibc's malloc.o functions are added to the - * link. - */ -#define ALIAS(je_fn) __attribute__((alias (#je_fn), used)) -/* To force macro expansion of je_ prefix before stringification. */ -#define PREALIAS(je_fn) ALIAS(je_fn) -void *__libc_malloc(size_t size) PREALIAS(je_malloc); -void __libc_free(void* ptr) PREALIAS(je_free); -void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc); -void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc); -void *__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign); -void *__libc_valloc(size_t size) PREALIAS(je_valloc); -int __posix_memalign(void** r, size_t a, size_t s) - PREALIAS(je_posix_memalign); -#undef PREALIAS -#undef ALIAS - -#endif - #endif /* @@ -2107,7 +1912,7 @@ int __posix_memalign(void** r, size_t a, size_t s) */ JEMALLOC_ALWAYS_INLINE_C bool -imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, +imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) { @@ -2118,8 +1923,7 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, *alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags); *usize = sa2u(size, *alignment); } - if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS)) - return (true); + assert(*usize != 0); *zero = MALLOCX_ZERO_GET(flags); if ((flags & MALLOCX_TCACHE_MASK) != 0) { if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) @@ -2130,7 +1934,7 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, *tcache = tcache_get(tsd, true); if ((flags & MALLOCX_ARENA_MASK) != 0) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - *arena = arena_get(tsd_tsdn(tsd), arena_ind, true); + *arena = arena_get(tsd, arena_ind, true, true); if (unlikely(*arena == NULL)) return (true); } else @@ -2138,44 +1942,59 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, return (false); } -JEMALLOC_ALWAYS_INLINE_C void * -imallocx_flags(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, arena_t *arena, bool slow_path) +JEMALLOC_ALWAYS_INLINE_C bool +imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, + size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) +{ + + if (likely(flags == 0)) { + *usize = s2u(size); + assert(*usize != 0); + *alignment = 0; + *zero = false; + *tcache = tcache_get(tsd, true); + *arena = NULL; + return (false); + } else { + return (imallocx_flags_decode_hard(tsd, size, flags, usize, + alignment, zero, tcache, arena)); + } +} + +JEMALLOC_ALWAYS_INLINE_C void * +imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { - szind_t ind; if (unlikely(alignment != 0)) - return (ipalloct(tsdn, usize, alignment, zero, tcache, arena)); - ind = size2index(usize); - assert(ind < NSIZES); - return (iallocztm(tsdn, usize, ind, zero, tcache, false, arena, - slow_path)); + return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); + if (unlikely(zero)) + return (icalloct(tsd, usize, tcache, arena)); + return (imalloct(tsd, usize, tcache, arena)); } static void * -imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, arena_t *arena, bool slow_path) +imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { void *p; if (usize <= SMALL_MAXCLASS) { assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); - p = imallocx_flags(tsdn, LARGE_MINCLASS, alignment, zero, - tcache, arena, slow_path); + p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache, + arena); if (p == NULL) return (NULL); - arena_prof_promoted(tsdn, p, usize); - } else { - p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena, - slow_path); - } + arena_prof_promoted(p, usize); + } else + p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path) +imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) { void *p; size_t alignment; @@ -2188,27 +2007,25 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path) &zero, &tcache, &arena))) return (NULL); tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true); - if (likely((uintptr_t)tctx == (uintptr_t)1U)) { - p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero, - tcache, arena, slow_path); - } else if ((uintptr_t)tctx > (uintptr_t)1U) { - p = imallocx_prof_sample(tsd_tsdn(tsd), *usize, alignment, zero, - tcache, arena, slow_path); + if (likely((uintptr_t)tctx == (uintptr_t)1U)) + p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); + else if ((uintptr_t)tctx > (uintptr_t)1U) { + p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache, + arena); } else p = NULL; if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); } - prof_malloc(tsd_tsdn(tsd), p, *usize, tctx); + prof_malloc(p, *usize, tctx); assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, - bool slow_path) +imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) { void *p; size_t alignment; @@ -2216,78 +2033,56 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, tcache_t *tcache; arena_t *arena; - if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment, - &zero, &tcache, &arena))) + if (likely(flags == 0)) { + if (config_stats || (config_valgrind && unlikely(in_valgrind))) + *usize = s2u(size); + return (imalloc(tsd, size)); + } + + if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, + &alignment, &zero, &tcache, &arena))) return (NULL); - p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero, tcache, - arena, slow_path); + p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); return (p); } -/* This function guarantees that *tsdn is non-NULL on success. */ -JEMALLOC_ALWAYS_INLINE_C void * -imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize, - bool slow_path) -{ - tsd_t *tsd; - - if (slow_path && unlikely(malloc_init())) { - *tsdn = NULL; - return (NULL); - } - - tsd = tsd_fetch(); - *tsdn = tsd_tsdn(tsd); - witness_assert_lockless(tsd_tsdn(tsd)); - - if (likely(flags == 0)) { - szind_t ind = size2index(size); - if (unlikely(ind >= NSIZES)) - return (NULL); - if (config_stats || (config_prof && opt_prof) || (slow_path && - config_valgrind && unlikely(in_valgrind))) { - *usize = index2size(ind); - assert(*usize > 0 && *usize <= HUGE_MAXCLASS); - } - - if (config_prof && opt_prof) { - return (ialloc_prof(tsd, *usize, ind, false, - slow_path)); - } - - return (ialloc(tsd, size, ind, false, slow_path)); - } - - if (config_prof && opt_prof) - return (imallocx_prof(tsd, size, flags, usize, slow_path)); - - return (imallocx_no_prof(tsd, size, flags, usize, slow_path)); -} - JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_mallocx(size_t size, int flags) { - tsdn_t *tsdn; + tsd_t *tsd; void *p; size_t usize; assert(size != 0); - if (likely(!malloc_slow)) { - p = imallocx_body(size, flags, &tsdn, &usize, false); - ialloc_post_check(p, tsdn, usize, "mallocx", false, false); - } else { - p = imallocx_body(size, flags, &tsdn, &usize, true); - ialloc_post_check(p, tsdn, usize, "mallocx", false, true); - UTRACE(0, size, p); - JEMALLOC_VALGRIND_MALLOC(p != NULL, tsdn, p, usize, - MALLOCX_ZERO_GET(flags)); - } + if (unlikely(malloc_init())) + goto label_oom; + tsd = tsd_fetch(); + if (config_prof && opt_prof) + p = imallocx_prof(tsd, size, flags, &usize); + else + p = imallocx_no_prof(tsd, size, flags, &usize); + if (unlikely(p == NULL)) + goto label_oom; + + if (config_stats) { + assert(usize == isalloc(p, config_prof)); + *tsd_thread_allocatedp_get(tsd) += usize; + } + UTRACE(0, size, p); + JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags)); return (p); +label_oom: + if (config_xmalloc && unlikely(opt_xmalloc)) { + malloc_write(": Error in mallocx(): out of memory\n"); + abort(); + } + UTRACE(0, size, 0); + return (NULL); } static void * @@ -2304,7 +2099,7 @@ irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, zero, tcache, arena); if (p == NULL) return (NULL); - arena_prof_promoted(tsd_tsdn(tsd), p, usize); + arena_prof_promoted(p, usize); } else { p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero, tcache, arena); @@ -2323,8 +2118,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, prof_tctx_t *old_tctx, *tctx; prof_active = prof_active_get_unlocked(); - old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr); - tctx = prof_alloc_prep(tsd, *usize, prof_active, false); + old_tctx = prof_tctx_get(old_ptr); + tctx = prof_alloc_prep(tsd, *usize, prof_active, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize, alignment, zero, tcache, arena, tctx); @@ -2333,7 +2128,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, tcache, arena); } if (unlikely(p == NULL)) { - prof_alloc_rollback(tsd, tctx, false); + prof_alloc_rollback(tsd, tctx, true); return (NULL); } @@ -2346,9 +2141,9 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, * be the same as the current usize because of in-place large * reallocation. Therefore, query the actual value of usize. */ - *usize = isalloc(tsd_tsdn(tsd), p, config_prof); + *usize = isalloc(p, config_prof); } - prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr, + prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr, old_usize, old_tctx); return (p); @@ -2374,11 +2169,10 @@ je_rallocx(void *ptr, size_t size, int flags) assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); tsd = tsd_fetch(); - witness_assert_lockless(tsd_tsdn(tsd)); if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena = arena_get(tsd_tsdn(tsd), arena_ind, true); + arena = arena_get(tsd, arena_ind, true, true); if (unlikely(arena == NULL)) goto label_oom; } else @@ -2392,14 +2186,13 @@ je_rallocx(void *ptr, size_t size, int flags) } else tcache = tcache_get(tsd, true); - old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); + old_usize = isalloc(ptr, config_prof); if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) - goto label_oom; + assert(usize != 0); p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize, zero, tcache, arena); if (unlikely(p == NULL)) @@ -2410,7 +2203,7 @@ je_rallocx(void *ptr, size_t size, int flags) if (unlikely(p == NULL)) goto label_oom; if (config_stats || (config_valgrind && unlikely(in_valgrind))) - usize = isalloc(tsd_tsdn(tsd), p, config_prof); + usize = isalloc(p, config_prof); } assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); @@ -2419,9 +2212,8 @@ je_rallocx(void *ptr, size_t size, int flags) *tsd_thread_deallocatedp_get(tsd) += old_usize; } UTRACE(ptr, size, p); - JEMALLOC_VALGRIND_REALLOC(maybe, tsd_tsdn(tsd), p, usize, no, ptr, - old_usize, old_rzsize, no, zero); - witness_assert_lockless(tsd_tsdn(tsd)); + JEMALLOC_VALGRIND_REALLOC(true, p, usize, false, ptr, old_usize, + old_rzsize, false, zero); return (p); label_oom: if (config_xmalloc && unlikely(opt_xmalloc)) { @@ -2429,33 +2221,31 @@ label_oom: abort(); } UTRACE(ptr, size, 0); - witness_assert_lockless(tsd_tsdn(tsd)); return (NULL); } JEMALLOC_ALWAYS_INLINE_C size_t -ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size, - size_t extra, size_t alignment, bool zero) +ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, bool zero) { size_t usize; - if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero)) + if (ixalloc(ptr, old_usize, size, extra, alignment, zero)) return (old_usize); - usize = isalloc(tsdn, ptr, config_prof); + usize = isalloc(ptr, config_prof); return (usize); } static size_t -ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size, - size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx) +ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, bool zero, prof_tctx_t *tctx) { size_t usize; if (tctx == NULL) return (old_usize); - usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment, - zero); + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero); return (usize); } @@ -2469,36 +2259,23 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, prof_tctx_t *old_tctx, *tctx; prof_active = prof_active_get_unlocked(); - old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr); + old_tctx = prof_tctx_get(ptr); /* * usize isn't knowable before ixalloc() returns when extra is non-zero. * Therefore, compute its maximum possible value and use that in * prof_alloc_prep() to decide whether to capture a backtrace. * prof_realloc() will use the actual usize to decide whether to sample. */ - if (alignment == 0) { - usize_max = s2u(size+extra); - assert(usize_max > 0 && usize_max <= HUGE_MAXCLASS); - } else { - usize_max = sa2u(size+extra, alignment); - if (unlikely(usize_max == 0 || usize_max > HUGE_MAXCLASS)) { - /* - * usize_max is out of range, and chances are that - * allocation will fail, but use the maximum possible - * value and carry on with prof_alloc_prep(), just in - * case allocation succeeds. - */ - usize_max = HUGE_MAXCLASS; - } - } + usize_max = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, + alignment); + assert(usize_max != 0); tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); - if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { - usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize, - size, extra, alignment, zero, tctx); + usize = ixallocx_prof_sample(ptr, old_usize, size, extra, + alignment, zero, tctx); } else { - usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size, - extra, alignment, zero); + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero); } if (usize == old_usize) { prof_alloc_rollback(tsd, tctx, false); @@ -2525,25 +2302,18 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); tsd = tsd_fetch(); - witness_assert_lockless(tsd_tsdn(tsd)); - old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); + old_usize = isalloc(ptr, config_prof); - /* - * The API explicitly absolves itself of protecting against (size + - * extra) numerical overflow, but we may need to clamp extra to avoid - * exceeding HUGE_MAXCLASS. - * - * Ordinarily, size limit checking is handled deeper down, but here we - * have to check as part of (size + extra) clamping, since we need the - * clamped value in the above helper functions. - */ - if (unlikely(size > HUGE_MAXCLASS)) { - usize = old_usize; - goto label_not_resized; - } - if (unlikely(HUGE_MAXCLASS - size < extra)) + /* Clamp extra if necessary to avoid (size + extra) overflow. */ + if (unlikely(size + extra > HUGE_MAXCLASS)) { + /* Check for size overflow. */ + if (unlikely(size > HUGE_MAXCLASS)) { + usize = old_usize; + goto label_not_resized; + } extra = HUGE_MAXCLASS - size; + } if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); @@ -2552,8 +2322,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) usize = ixallocx_prof(tsd, ptr, old_usize, size, extra, alignment, zero); } else { - usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size, - extra, alignment, zero); + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero); } if (unlikely(usize == old_usize)) goto label_not_resized; @@ -2562,11 +2332,10 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) *tsd_thread_allocatedp_get(tsd) += usize; *tsd_thread_deallocatedp_get(tsd) += old_usize; } - JEMALLOC_VALGRIND_REALLOC(no, tsd_tsdn(tsd), ptr, usize, no, ptr, - old_usize, old_rzsize, no, zero); + JEMALLOC_VALGRIND_REALLOC(false, ptr, usize, false, ptr, old_usize, + old_rzsize, false, zero); label_not_resized: UTRACE(ptr, size, ptr); - witness_assert_lockless(tsd_tsdn(tsd)); return (usize); } @@ -2575,20 +2344,15 @@ JEMALLOC_ATTR(pure) je_sallocx(const void *ptr, int flags) { size_t usize; - tsdn_t *tsdn; assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); - tsdn = tsdn_fetch(); - witness_assert_lockless(tsdn); - if (config_ivsalloc) - usize = ivsalloc(tsdn, ptr, config_prof); + usize = ivsalloc(ptr, config_prof); else - usize = isalloc(tsdn, ptr, config_prof); + usize = isalloc(ptr, config_prof); - witness_assert_lockless(tsdn); return (usize); } @@ -2602,7 +2366,6 @@ je_dallocx(void *ptr, int flags) assert(malloc_initialized() || IS_INITIALIZER); tsd = tsd_fetch(); - witness_assert_lockless(tsd_tsdn(tsd)); if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) tcache = NULL; @@ -2612,25 +2375,19 @@ je_dallocx(void *ptr, int flags) tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - if (likely(!malloc_slow)) - ifree(tsd, ptr, tcache, false); - else - ifree(tsd, ptr, tcache, true); - witness_assert_lockless(tsd_tsdn(tsd)); + ifree(tsd_fetch(), ptr, tcache); } JEMALLOC_ALWAYS_INLINE_C size_t -inallocx(tsdn_t *tsdn, size_t size, int flags) +inallocx(size_t size, int flags) { size_t usize; - witness_assert_lockless(tsdn); - if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) usize = s2u(size); else usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags)); - witness_assert_lockless(tsdn); + assert(usize != 0); return (usize); } @@ -2643,11 +2400,10 @@ je_sdallocx(void *ptr, size_t size, int flags) assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); - tsd = tsd_fetch(); - usize = inallocx(tsd_tsdn(tsd), size, flags); - assert(usize == isalloc(tsd_tsdn(tsd), ptr, config_prof)); + usize = inallocx(size, flags); + assert(usize == isalloc(ptr, config_prof)); - witness_assert_lockless(tsd_tsdn(tsd)); + tsd = tsd_fetch(); if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) tcache = NULL; @@ -2657,116 +2413,75 @@ je_sdallocx(void *ptr, size_t size, int flags) tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - if (likely(!malloc_slow)) - isfree(tsd, ptr, usize, tcache, false); - else - isfree(tsd, ptr, usize, tcache, true); - witness_assert_lockless(tsd_tsdn(tsd)); + isfree(tsd, ptr, usize, tcache); } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW JEMALLOC_ATTR(pure) je_nallocx(size_t size, int flags) { - size_t usize; - tsdn_t *tsdn; assert(size != 0); if (unlikely(malloc_init())) return (0); - tsdn = tsdn_fetch(); - witness_assert_lockless(tsdn); - - usize = inallocx(tsdn, size, flags); - if (unlikely(usize > HUGE_MAXCLASS)) - return (0); - - witness_assert_lockless(tsdn); - return (usize); + return (inallocx(size, flags)); } JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - int ret; - tsd_t *tsd; if (unlikely(malloc_init())) return (EAGAIN); - tsd = tsd_fetch(); - witness_assert_lockless(tsd_tsdn(tsd)); - ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen); - witness_assert_lockless(tsd_tsdn(tsd)); - return (ret); + return (ctl_byname(name, oldp, oldlenp, newp, newlen)); } JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { - int ret; - tsdn_t *tsdn; if (unlikely(malloc_init())) return (EAGAIN); - tsdn = tsdn_fetch(); - witness_assert_lockless(tsdn); - ret = ctl_nametomib(tsdn, name, mibp, miblenp); - witness_assert_lockless(tsdn); - return (ret); + return (ctl_nametomib(name, mibp, miblenp)); } JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - int ret; - tsd_t *tsd; if (unlikely(malloc_init())) return (EAGAIN); - tsd = tsd_fetch(); - witness_assert_lockless(tsd_tsdn(tsd)); - ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen); - witness_assert_lockless(tsd_tsdn(tsd)); - return (ret); + return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); } JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) { - tsdn_t *tsdn; - tsdn = tsdn_fetch(); - witness_assert_lockless(tsdn); stats_print(write_cb, cbopaque, opts); - witness_assert_lockless(tsdn); } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { size_t ret; - tsdn_t *tsdn; assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); - tsdn = tsdn_fetch(); - witness_assert_lockless(tsdn); - if (config_ivsalloc) - ret = ivsalloc(tsdn, ptr, config_prof); + ret = ivsalloc(ptr, config_prof); else - ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr, config_prof); + ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof); - witness_assert_lockless(tsdn); return (ret); } @@ -2792,7 +2507,6 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) * to trigger the deadlock described above, but doing so would involve forking * via a library constructor that runs before jemalloc's runs. */ -#ifndef JEMALLOC_JET JEMALLOC_ATTR(constructor) static void jemalloc_constructor(void) @@ -2800,7 +2514,6 @@ jemalloc_constructor(void) malloc_init(); } -#endif #ifndef JEMALLOC_MUTEX_INIT_CB void @@ -2810,9 +2523,7 @@ JEMALLOC_EXPORT void _malloc_prefork(void) #endif { - tsd_t *tsd; - unsigned i, j, narenas; - arena_t *arena; + unsigned i; #ifdef JEMALLOC_MUTEX_INIT_CB if (!malloc_initialized()) @@ -2820,40 +2531,16 @@ _malloc_prefork(void) #endif assert(malloc_initialized()); - tsd = tsd_fetch(); - - narenas = narenas_total_get(); - - witness_prefork(tsd); /* Acquire all mutexes in a safe order. */ - ctl_prefork(tsd_tsdn(tsd)); - malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock); - prof_prefork0(tsd_tsdn(tsd)); - for (i = 0; i < 3; i++) { - for (j = 0; j < narenas; j++) { - if ((arena = arena_get(tsd_tsdn(tsd), j, false)) != - NULL) { - switch (i) { - case 0: - arena_prefork0(tsd_tsdn(tsd), arena); - break; - case 1: - arena_prefork1(tsd_tsdn(tsd), arena); - break; - case 2: - arena_prefork2(tsd_tsdn(tsd), arena); - break; - default: not_reached(); - } - } - } + ctl_prefork(); + prof_prefork(); + malloc_mutex_prefork(&arenas_lock); + for (i = 0; i < narenas_total; i++) { + if (arenas[i] != NULL) + arena_prefork(arenas[i]); } - base_prefork(tsd_tsdn(tsd)); - for (i = 0; i < narenas; i++) { - if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) - arena_prefork3(tsd_tsdn(tsd), arena); - } - prof_prefork1(tsd_tsdn(tsd)); + chunk_prefork(); + base_prefork(); } #ifndef JEMALLOC_MUTEX_INIT_CB @@ -2864,8 +2551,7 @@ JEMALLOC_EXPORT void _malloc_postfork(void) #endif { - tsd_t *tsd; - unsigned i, narenas; + unsigned i; #ifdef JEMALLOC_MUTEX_INIT_CB if (!malloc_initialized()) @@ -2873,44 +2559,35 @@ _malloc_postfork(void) #endif assert(malloc_initialized()); - tsd = tsd_fetch(); - - witness_postfork_parent(tsd); /* Release all mutexes, now that fork() has completed. */ - base_postfork_parent(tsd_tsdn(tsd)); - for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { - arena_t *arena; - - if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) - arena_postfork_parent(tsd_tsdn(tsd), arena); + base_postfork_parent(); + chunk_postfork_parent(); + for (i = 0; i < narenas_total; i++) { + if (arenas[i] != NULL) + arena_postfork_parent(arenas[i]); } - prof_postfork_parent(tsd_tsdn(tsd)); - malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock); - ctl_postfork_parent(tsd_tsdn(tsd)); + malloc_mutex_postfork_parent(&arenas_lock); + prof_postfork_parent(); + ctl_postfork_parent(); } void jemalloc_postfork_child(void) { - tsd_t *tsd; - unsigned i, narenas; + unsigned i; assert(malloc_initialized()); - tsd = tsd_fetch(); - - witness_postfork_child(tsd); /* Release all mutexes, now that fork() has completed. */ - base_postfork_child(tsd_tsdn(tsd)); - for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { - arena_t *arena; - - if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) - arena_postfork_child(tsd_tsdn(tsd), arena); + base_postfork_child(); + chunk_postfork_child(); + for (i = 0; i < narenas_total; i++) { + if (arenas[i] != NULL) + arena_postfork_child(arenas[i]); } - prof_postfork_child(tsd_tsdn(tsd)); - malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock); - ctl_postfork_child(tsd_tsdn(tsd)); + malloc_mutex_postfork_child(&arenas_lock); + prof_postfork_child(); + ctl_postfork_child(); } /******************************************************************************/ @@ -2930,10 +2607,9 @@ je_get_defrag_hint(void* ptr, int *bin_util, int *run_util) { if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { /* indication that this is not a LARGE alloc */ arena_t *arena = extent_node_arena_get(&chunk->node); size_t rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); - arena_run_t *run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run; + arena_run_t *run = &arena_miscelm_get(chunk, rpages_ind)->run; arena_bin_t *bin = &arena->bins[run->binind]; - tsd_t *tsd = tsd_fetch(); - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + malloc_mutex_lock(&bin->lock); /* runs that are in the same chunk in as the current chunk, are likely to be the next currun */ if (chunk != (arena_chunk_t *)CHUNK_ADDR2BASE(bin->runcur)) { arena_bin_info_t *bin_info = &arena_bin_info[run->binind]; @@ -2942,7 +2618,7 @@ je_get_defrag_hint(void* ptr, int *bin_util, int *run_util) { *run_util = ((bin_info->nregs - run->nfree)<<16) / bin_info->nregs; defrag = 1; } - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + malloc_mutex_unlock(&bin->lock); } } return defrag; diff --git a/deps/jemalloc/src/mutex.c b/deps/jemalloc/src/mutex.c index 6333e73d6..2d47af976 100644 --- a/deps/jemalloc/src/mutex.c +++ b/deps/jemalloc/src/mutex.c @@ -69,7 +69,7 @@ JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, #endif bool -malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank) +malloc_mutex_init(malloc_mutex_t *mutex) { #ifdef _WIN32 @@ -80,8 +80,6 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank) _CRT_SPINCOUNT)) return (true); # endif -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - mutex->lock = OS_UNFAIR_LOCK_INIT; #elif (defined(JEMALLOC_OSSPIN)) mutex->lock = 0; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -105,34 +103,31 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank) } pthread_mutexattr_destroy(&attr); #endif - if (config_debug) - witness_init(&mutex->witness, name, rank, NULL); return (false); } void -malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex) +malloc_mutex_prefork(malloc_mutex_t *mutex) { - malloc_mutex_lock(tsdn, mutex); + malloc_mutex_lock(mutex); } void -malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex) +malloc_mutex_postfork_parent(malloc_mutex_t *mutex) { - malloc_mutex_unlock(tsdn, mutex); + malloc_mutex_unlock(mutex); } void -malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) +malloc_mutex_postfork_child(malloc_mutex_t *mutex) { #ifdef JEMALLOC_MUTEX_INIT_CB - malloc_mutex_unlock(tsdn, mutex); + malloc_mutex_unlock(mutex); #else - if (malloc_mutex_init(mutex, mutex->witness.name, - mutex->witness.rank)) { + if (malloc_mutex_init(mutex)) { malloc_printf(": Error re-initializing mutex in " "child\n"); if (opt_abort) @@ -142,7 +137,7 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) } bool -malloc_mutex_boot(void) +mutex_boot(void) { #ifdef JEMALLOC_MUTEX_INIT_CB diff --git a/deps/jemalloc/src/nstime.c b/deps/jemalloc/src/nstime.c deleted file mode 100644 index 0948e29fa..000000000 --- a/deps/jemalloc/src/nstime.c +++ /dev/null @@ -1,194 +0,0 @@ -#include "jemalloc/internal/jemalloc_internal.h" - -#define BILLION UINT64_C(1000000000) - -void -nstime_init(nstime_t *time, uint64_t ns) -{ - - time->ns = ns; -} - -void -nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec) -{ - - time->ns = sec * BILLION + nsec; -} - -uint64_t -nstime_ns(const nstime_t *time) -{ - - return (time->ns); -} - -uint64_t -nstime_sec(const nstime_t *time) -{ - - return (time->ns / BILLION); -} - -uint64_t -nstime_nsec(const nstime_t *time) -{ - - return (time->ns % BILLION); -} - -void -nstime_copy(nstime_t *time, const nstime_t *source) -{ - - *time = *source; -} - -int -nstime_compare(const nstime_t *a, const nstime_t *b) -{ - - return ((a->ns > b->ns) - (a->ns < b->ns)); -} - -void -nstime_add(nstime_t *time, const nstime_t *addend) -{ - - assert(UINT64_MAX - time->ns >= addend->ns); - - time->ns += addend->ns; -} - -void -nstime_subtract(nstime_t *time, const nstime_t *subtrahend) -{ - - assert(nstime_compare(time, subtrahend) >= 0); - - time->ns -= subtrahend->ns; -} - -void -nstime_imultiply(nstime_t *time, uint64_t multiplier) -{ - - assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) << - 2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns)); - - time->ns *= multiplier; -} - -void -nstime_idivide(nstime_t *time, uint64_t divisor) -{ - - assert(divisor != 0); - - time->ns /= divisor; -} - -uint64_t -nstime_divide(const nstime_t *time, const nstime_t *divisor) -{ - - assert(divisor->ns != 0); - - return (time->ns / divisor->ns); -} - -#ifdef _WIN32 -# define NSTIME_MONOTONIC true -static void -nstime_get(nstime_t *time) -{ - FILETIME ft; - uint64_t ticks_100ns; - - GetSystemTimeAsFileTime(&ft); - ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime; - - nstime_init(time, ticks_100ns * 100); -} -#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE -# define NSTIME_MONOTONIC true -static void -nstime_get(nstime_t *time) -{ - struct timespec ts; - - clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); - nstime_init2(time, ts.tv_sec, ts.tv_nsec); -} -#elif JEMALLOC_HAVE_CLOCK_MONOTONIC -# define NSTIME_MONOTONIC true -static void -nstime_get(nstime_t *time) -{ - struct timespec ts; - - clock_gettime(CLOCK_MONOTONIC, &ts); - nstime_init2(time, ts.tv_sec, ts.tv_nsec); -} -#elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME -# define NSTIME_MONOTONIC true -static void -nstime_get(nstime_t *time) -{ - - nstime_init(time, mach_absolute_time()); -} -#else -# define NSTIME_MONOTONIC false -static void -nstime_get(nstime_t *time) -{ - struct timeval tv; - - gettimeofday(&tv, NULL); - nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000); -} -#endif - -#ifdef JEMALLOC_JET -#undef nstime_monotonic -#define nstime_monotonic JEMALLOC_N(n_nstime_monotonic) -#endif -bool -nstime_monotonic(void) -{ - - return (NSTIME_MONOTONIC); -#undef NSTIME_MONOTONIC -} -#ifdef JEMALLOC_JET -#undef nstime_monotonic -#define nstime_monotonic JEMALLOC_N(nstime_monotonic) -nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic); -#endif - -#ifdef JEMALLOC_JET -#undef nstime_update -#define nstime_update JEMALLOC_N(n_nstime_update) -#endif -bool -nstime_update(nstime_t *time) -{ - nstime_t old_time; - - nstime_copy(&old_time, time); - nstime_get(time); - - /* Handle non-monotonic clocks. */ - if (unlikely(nstime_compare(&old_time, time) > 0)) { - nstime_copy(time, &old_time); - return (true); - } - - return (false); -} -#ifdef JEMALLOC_JET -#undef nstime_update -#define nstime_update JEMALLOC_N(nstime_update) -nstime_update_t *nstime_update = JEMALLOC_N(n_nstime_update); -#endif diff --git a/deps/jemalloc/src/pages.c b/deps/jemalloc/src/pages.c index 5f0c9669d..83a167f67 100644 --- a/deps/jemalloc/src/pages.c +++ b/deps/jemalloc/src/pages.c @@ -1,49 +1,29 @@ #define JEMALLOC_PAGES_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT -#include -#endif - -/******************************************************************************/ -/* Data. */ - -#ifndef _WIN32 -# define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) -# define PAGES_PROT_DECOMMIT (PROT_NONE) -static int mmap_flags; -#endif -static bool os_overcommits; - /******************************************************************************/ void * -pages_map(void *addr, size_t size, bool *commit) +pages_map(void *addr, size_t size) { void *ret; assert(size != 0); - if (os_overcommits) - *commit = true; - #ifdef _WIN32 /* * If VirtualAlloc can't allocate at the given address when one is * given, it fails and returns NULL. */ - ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), + ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); #else /* * We don't use MAP_FIXED here, because it can cause the *replacement* * of existing mappings, and we only want to create new mappings. */ - { - int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; - - ret = mmap(addr, size, prot, mmap_flags, -1, 0); - } + ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, + -1, 0); assert(ret != NULL); if (ret == MAP_FAILED) @@ -87,8 +67,7 @@ pages_unmap(void *addr, size_t size) } void * -pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, - bool *commit) +pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) { void *ret = (void *)((uintptr_t)addr + leadsize); @@ -98,7 +77,7 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, void *new_addr; pages_unmap(addr, alloc_size); - new_addr = pages_map(ret, size, commit); + new_addr = pages_map(ret, size); if (new_addr == ret) return (ret); if (new_addr) @@ -122,17 +101,17 @@ static bool pages_commit_impl(void *addr, size_t size, bool commit) { - if (os_overcommits) - return (true); - -#ifdef _WIN32 - return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, - PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); -#else - { - int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; - void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, - -1, 0); +#ifndef _WIN32 + /* + * The following decommit/commit implementation is functional, but + * always disabled because it doesn't add value beyong improved + * debugging (at the cost of extra system calls) on systems that + * overcommit. + */ + if (false) { + int prot = commit ? (PROT_READ | PROT_WRITE) : PROT_NONE; + void *result = mmap(addr, size, prot, MAP_PRIVATE | MAP_ANON | + MAP_FIXED, -1, 0); if (result == MAP_FAILED) return (true); if (result != addr) { @@ -146,6 +125,7 @@ pages_commit_impl(void *addr, size_t size, bool commit) return (false); } #endif + return (true); } bool @@ -170,16 +150,15 @@ pages_purge(void *addr, size_t size) #ifdef _WIN32 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); unzeroed = true; -#elif (defined(JEMALLOC_PURGE_MADVISE_FREE) || \ - defined(JEMALLOC_PURGE_MADVISE_DONTNEED)) -# if defined(JEMALLOC_PURGE_MADVISE_FREE) -# define JEMALLOC_MADV_PURGE MADV_FREE -# define JEMALLOC_MADV_ZEROS false -# elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) +#elif defined(JEMALLOC_HAVE_MADVISE) +# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED # define JEMALLOC_MADV_PURGE MADV_DONTNEED # define JEMALLOC_MADV_ZEROS true +# elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +# define JEMALLOC_MADV_ZEROS false # else -# error No madvise(2) flag defined for purging unused dirty pages +# error "No madvise(2) flag defined for purging unused dirty pages." # endif int err = madvise(addr, size, JEMALLOC_MADV_PURGE); unzeroed = (!JEMALLOC_MADV_ZEROS || err != 0); @@ -192,111 +171,3 @@ pages_purge(void *addr, size_t size) return (unzeroed); } -bool -pages_huge(void *addr, size_t size) -{ - - assert(PAGE_ADDR2BASE(addr) == addr); - assert(PAGE_CEILING(size) == size); - -#ifdef JEMALLOC_THP - return (madvise(addr, size, MADV_HUGEPAGE) != 0); -#else - return (false); -#endif -} - -bool -pages_nohuge(void *addr, size_t size) -{ - - assert(PAGE_ADDR2BASE(addr) == addr); - assert(PAGE_CEILING(size) == size); - -#ifdef JEMALLOC_THP - return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); -#else - return (false); -#endif -} - -#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT -static bool -os_overcommits_sysctl(void) -{ - int vm_overcommit; - size_t sz; - - sz = sizeof(vm_overcommit); - if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) - return (false); /* Error. */ - - return ((vm_overcommit & 0x3) == 0); -} -#endif - -#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY -/* - * Use syscall(2) rather than {open,read,close}(2) when possible to avoid - * reentry during bootstrapping if another library has interposed system call - * wrappers. - */ -static bool -os_overcommits_proc(void) -{ - int fd; - char buf[1]; - ssize_t nread; - -#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) - fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); -#else - fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); -#endif - if (fd == -1) - return (false); /* Error. */ - -#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) - nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf)); -#else - nread = read(fd, &buf, sizeof(buf)); -#endif - -#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) - syscall(SYS_close, fd); -#else - close(fd); -#endif - - if (nread < 1) - return (false); /* Error. */ - /* - * /proc/sys/vm/overcommit_memory meanings: - * 0: Heuristic overcommit. - * 1: Always overcommit. - * 2: Never overcommit. - */ - return (buf[0] == '0' || buf[0] == '1'); -} -#endif - -void -pages_boot(void) -{ - -#ifndef _WIN32 - mmap_flags = MAP_PRIVATE | MAP_ANON; -#endif - -#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT - os_overcommits = os_overcommits_sysctl(); -#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) - os_overcommits = os_overcommits_proc(); -# ifdef MAP_NORESERVE - if (os_overcommits) - mmap_flags |= MAP_NORESERVE; -# endif -#else - os_overcommits = false; -#endif -} diff --git a/deps/jemalloc/src/prng.c b/deps/jemalloc/src/prng.c deleted file mode 100644 index 76646a2a4..000000000 --- a/deps/jemalloc/src/prng.c +++ /dev/null @@ -1,2 +0,0 @@ -#define JEMALLOC_PRNG_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff --git a/deps/jemalloc/src/prof.c b/deps/jemalloc/src/prof.c index c89dade1f..5d2b9598f 100644 --- a/deps/jemalloc/src/prof.c +++ b/deps/jemalloc/src/prof.c @@ -109,7 +109,7 @@ static char prof_dump_buf[ 1 #endif ]; -static size_t prof_dump_buf_end; +static unsigned prof_dump_buf_end; static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ @@ -121,13 +121,13 @@ static bool prof_booted = false; * definition. */ -static bool prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx); +static bool prof_tctx_should_destroy(prof_tctx_t *tctx); static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); -static bool prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, +static bool prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached); static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached); -static char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name); +static char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name); /******************************************************************************/ /* Red-black trees. */ @@ -213,23 +213,22 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) } if ((uintptr_t)tctx > (uintptr_t)1U) { - malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); + malloc_mutex_lock(tctx->tdata->lock); tctx->prepared = false; - if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) + if (prof_tctx_should_destroy(tctx)) prof_tctx_destroy(tsd, tctx); else - malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock); + malloc_mutex_unlock(tctx->tdata->lock); } } void -prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, - prof_tctx_t *tctx) +prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { - prof_tctx_set(tsdn, ptr, usize, tctx); + prof_tctx_set(ptr, usize, tctx); - malloc_mutex_lock(tsdn, tctx->tdata->lock); + malloc_mutex_lock(tctx->tdata->lock); tctx->cnts.curobjs++; tctx->cnts.curbytes += usize; if (opt_prof_accum) { @@ -237,23 +236,23 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, tctx->cnts.accumbytes += usize; } tctx->prepared = false; - malloc_mutex_unlock(tsdn, tctx->tdata->lock); + malloc_mutex_unlock(tctx->tdata->lock); } void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { - malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); + malloc_mutex_lock(tctx->tdata->lock); assert(tctx->cnts.curobjs > 0); assert(tctx->cnts.curbytes >= usize); tctx->cnts.curobjs--; tctx->cnts.curbytes -= usize; - if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) + if (prof_tctx_should_destroy(tctx)) prof_tctx_destroy(tsd, tctx); else - malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock); + malloc_mutex_unlock(tctx->tdata->lock); } void @@ -278,7 +277,7 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata) tdata->enq = true; } - malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx); + malloc_mutex_lock(&bt2gctx_mtx); } JEMALLOC_INLINE_C void @@ -288,7 +287,7 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata) cassert(config_prof); assert(tdata == prof_tdata_get(tsd, false)); - malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx); + malloc_mutex_unlock(&bt2gctx_mtx); if (tdata != NULL) { bool idump, gdump; @@ -301,9 +300,9 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata) tdata->enq_gdump = false; if (idump) - prof_idump(tsd_tsdn(tsd)); + prof_idump(); if (gdump) - prof_gdump(tsd_tsdn(tsd)); + prof_gdump(); } } @@ -547,15 +546,14 @@ prof_tdata_mutex_choose(uint64_t thr_uid) } static prof_gctx_t * -prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) +prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) { /* * Create a single allocation that has space for vec of length bt->len. */ - size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *)); - prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size, - size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true), - true); + prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t, + vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true), + true, NULL); if (gctx == NULL) return (NULL); gctx->lock = prof_gctx_mutex_choose(); @@ -587,7 +585,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, * into this function. */ prof_enter(tsd, tdata_self); - malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); + malloc_mutex_lock(gctx->lock); assert(gctx->nlimbo != 0); if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { /* Remove gctx from bt2gctx. */ @@ -595,25 +593,24 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, not_reached(); prof_leave(tsd, tdata_self); /* Destroy gctx. */ - malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); - idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true); + malloc_mutex_unlock(gctx->lock); + idalloctm(tsd, gctx, tcache_get(tsd, false), true); } else { /* * Compensate for increment in prof_tctx_destroy() or * prof_lookup(). */ gctx->nlimbo--; - malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); + malloc_mutex_unlock(gctx->lock); prof_leave(tsd, tdata_self); } } +/* tctx->tdata->lock must be held. */ static bool -prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) +prof_tctx_should_destroy(prof_tctx_t *tctx) { - malloc_mutex_assert_owner(tsdn, tctx->tdata->lock); - if (opt_prof_accum) return (false); if (tctx->cnts.curobjs != 0) @@ -636,6 +633,7 @@ prof_gctx_should_destroy(prof_gctx_t *gctx) return (true); } +/* tctx->tdata->lock is held upon entry, and released before return. */ static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) { @@ -643,8 +641,6 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) prof_gctx_t *gctx = tctx->gctx; bool destroy_tdata, destroy_tctx, destroy_gctx; - malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock); - assert(tctx->cnts.curobjs == 0); assert(tctx->cnts.curbytes == 0); assert(!opt_prof_accum); @@ -652,10 +648,10 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) assert(tctx->cnts.accumbytes == 0); ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); - destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false); - malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); + destroy_tdata = prof_tdata_should_destroy(tdata, false); + malloc_mutex_unlock(tdata->lock); - malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); + malloc_mutex_lock(gctx->lock); switch (tctx->state) { case prof_tctx_state_nominal: tctx_tree_remove(&gctx->tctxs, tctx); @@ -695,19 +691,17 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) destroy_tctx = false; destroy_gctx = false; } - malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); + malloc_mutex_unlock(gctx->lock); if (destroy_gctx) { prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx, tdata); } - malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock); - if (destroy_tdata) prof_tdata_destroy(tsd, tdata, false); if (destroy_tctx) - idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true); + idalloctm(tsd, tctx, tcache_get(tsd, false), true); } static bool @@ -727,7 +721,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, prof_enter(tsd, tdata); if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) { /* bt has never been seen before. Insert it. */ - gctx.p = prof_gctx_create(tsd_tsdn(tsd), bt); + gctx.p = prof_gctx_create(tsd, bt); if (gctx.v == NULL) { prof_leave(tsd, tdata); return (true); @@ -736,7 +730,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tsd, tdata); - idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true); + idalloctm(tsd, gctx.v, tcache_get(tsd, false), true); return (true); } new_gctx = true; @@ -745,9 +739,9 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, * Increment nlimbo, in order to avoid a race condition with * prof_tctx_destroy()/prof_gctx_try_destroy(). */ - malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock); + malloc_mutex_lock(gctx.p->lock); gctx.p->nlimbo++; - malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock); + malloc_mutex_unlock(gctx.p->lock); new_gctx = false; } prof_leave(tsd, tdata); @@ -774,12 +768,13 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) if (tdata == NULL) return (NULL); - malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); + malloc_mutex_lock(tdata->lock); not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v); if (!not_found) /* Note double negative! */ ret.p->prepared = true; - malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); + malloc_mutex_unlock(tdata->lock); if (not_found) { + tcache_t *tcache; void *btkey; prof_gctx_t *gctx; bool new_gctx, error; @@ -793,9 +788,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) return (NULL); /* Link a prof_tctx_t into gctx for this thread. */ - ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t), - size2index(sizeof(prof_tctx_t)), false, NULL, true, - arena_ichoose(tsd, NULL), true); + tcache = tcache_get(tsd, true); + ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true, + NULL); if (ret.p == NULL) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); @@ -809,41 +804,41 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) ret.p->tctx_uid = tdata->tctx_uid_next++; ret.p->prepared = true; ret.p->state = prof_tctx_state_initializing; - malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); + malloc_mutex_lock(tdata->lock); error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v); - malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); + malloc_mutex_unlock(tdata->lock); if (error) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); - idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true); + idalloctm(tsd, ret.v, tcache, true); return (NULL); } - malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); + malloc_mutex_lock(gctx->lock); ret.p->state = prof_tctx_state_nominal; tctx_tree_insert(&gctx->tctxs, ret.p); gctx->nlimbo--; - malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); + malloc_mutex_unlock(gctx->lock); } return (ret.p); } -/* - * The bodies of this function and prof_leakcheck() are compiled out unless heap - * profiling is enabled, so that it is possible to compile jemalloc with - * floating point support completely disabled. Avoiding floating point code is - * important on memory-constrained systems, but it also enables a workaround for - * versions of glibc that don't properly save/restore floating point registers - * during dynamic lazy symbol loading (which internally calls into whatever - * malloc implementation happens to be integrated into the application). Note - * that some compilers (e.g. gcc 4.8) may use floating point registers for fast - * memory moves, so jemalloc must be compiled with such optimizations disabled - * (e.g. - * -mno-sse) in order for the workaround to be complete. - */ void prof_sample_threshold_update(prof_tdata_t *tdata) { + /* + * The body of this function is compiled out unless heap profiling is + * enabled, so that it is possible to compile jemalloc with floating + * point support completely disabled. Avoiding floating point code is + * important on memory-constrained systems, but it also enables a + * workaround for versions of glibc that don't properly save/restore + * floating point registers during dynamic lazy symbol loading (which + * internally calls into whatever malloc implementation happens to be + * integrated into the application). Note that some compilers (e.g. + * gcc 4.8) may use floating point registers for fast memory moves, so + * jemalloc must be compiled with such optimizations disabled (e.g. + * -mno-sse) in order for the workaround to be complete. + */ #ifdef JEMALLOC_PROF uint64_t r; double u; @@ -874,7 +869,8 @@ prof_sample_threshold_update(prof_tdata_t *tdata) * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ - r = prng_lg_range_u64(&tdata->prng_state, 53); + prng64(r, 53, tdata->prng_state, UINT64_C(6364136223846793005), + UINT64_C(1442695040888963407)); u = (double)r * (1.0/9007199254740992.0L); tdata->bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) @@ -897,13 +893,11 @@ size_t prof_tdata_count(void) { size_t tdata_count = 0; - tsdn_t *tsdn; - tsdn = tsdn_fetch(); - malloc_mutex_lock(tsdn, &tdatas_mtx); + malloc_mutex_lock(&tdatas_mtx); tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter, (void *)&tdata_count); - malloc_mutex_unlock(tsdn, &tdatas_mtx); + malloc_mutex_unlock(&tdatas_mtx); return (tdata_count); } @@ -922,9 +916,9 @@ prof_bt_count(void) if (tdata == NULL) return (0); - malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx); + malloc_mutex_lock(&bt2gctx_mtx); bt_count = ckh_count(&bt2gctx); - malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx); + malloc_mutex_unlock(&bt2gctx_mtx); return (bt_count); } @@ -994,7 +988,7 @@ prof_dump_close(bool propagate_err) static bool prof_dump_write(bool propagate_err, const char *s) { - size_t i, slen, n; + unsigned i, slen, n; cassert(config_prof); @@ -1037,21 +1031,20 @@ prof_dump_printf(bool propagate_err, const char *format, ...) return (ret); } +/* tctx->tdata->lock is held. */ static void -prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) +prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata) { - malloc_mutex_assert_owner(tsdn, tctx->tdata->lock); - - malloc_mutex_lock(tsdn, tctx->gctx->lock); + malloc_mutex_lock(tctx->gctx->lock); switch (tctx->state) { case prof_tctx_state_initializing: - malloc_mutex_unlock(tsdn, tctx->gctx->lock); + malloc_mutex_unlock(tctx->gctx->lock); return; case prof_tctx_state_nominal: tctx->state = prof_tctx_state_dumping; - malloc_mutex_unlock(tsdn, tctx->gctx->lock); + malloc_mutex_unlock(tctx->gctx->lock); memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t)); @@ -1070,12 +1063,11 @@ prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) } } +/* gctx->lock is held. */ static void -prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) +prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx) { - malloc_mutex_assert_owner(tsdn, gctx->lock); - gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs; gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes; if (opt_prof_accum) { @@ -1084,12 +1076,10 @@ prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) } } +/* tctx->gctx is held. */ static prof_tctx_t * prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { - tsdn_t *tsdn = (tsdn_t *)arg; - - malloc_mutex_assert_owner(tsdn, tctx->gctx->lock); switch (tctx->state) { case prof_tctx_state_nominal: @@ -1097,7 +1087,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) break; case prof_tctx_state_dumping: case prof_tctx_state_purgatory: - prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx); + prof_tctx_merge_gctx(tctx, tctx->gctx); break; default: not_reached(); @@ -1106,18 +1096,11 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) return (NULL); } -struct prof_tctx_dump_iter_arg_s { - tsdn_t *tsdn; - bool propagate_err; -}; - +/* gctx->lock is held. */ static prof_tctx_t * -prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) +prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { - struct prof_tctx_dump_iter_arg_s *arg = - (struct prof_tctx_dump_iter_arg_s *)opaque; - - malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock); + bool propagate_err = *(bool *)arg; switch (tctx->state) { case prof_tctx_state_initializing: @@ -1126,7 +1109,7 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) break; case prof_tctx_state_dumping: case prof_tctx_state_purgatory: - if (prof_dump_printf(arg->propagate_err, + if (prof_dump_printf(propagate_err, " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": " "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs, tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs, @@ -1139,14 +1122,12 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) return (NULL); } +/* tctx->gctx is held. */ static prof_tctx_t * prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { - tsdn_t *tsdn = (tsdn_t *)arg; prof_tctx_t *ret; - malloc_mutex_assert_owner(tsdn, tctx->gctx->lock); - switch (tctx->state) { case prof_tctx_state_nominal: /* New since dumping started; ignore. */ @@ -1167,12 +1148,12 @@ label_return: } static void -prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) +prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) { cassert(config_prof); - malloc_mutex_lock(tsdn, gctx->lock); + malloc_mutex_lock(gctx->lock); /* * Increment nlimbo so that gctx won't go away before dump. @@ -1184,26 +1165,19 @@ prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t)); - malloc_mutex_unlock(tsdn, gctx->lock); + malloc_mutex_unlock(gctx->lock); } -struct prof_gctx_merge_iter_arg_s { - tsdn_t *tsdn; - size_t leak_ngctx; -}; - static prof_gctx_t * -prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) +prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) { - struct prof_gctx_merge_iter_arg_s *arg = - (struct prof_gctx_merge_iter_arg_s *)opaque; + size_t *leak_ngctx = (size_t *)arg; - malloc_mutex_lock(arg->tsdn, gctx->lock); - tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, - (void *)arg->tsdn); + malloc_mutex_lock(gctx->lock); + tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, NULL); if (gctx->cnt_summed.curobjs != 0) - arg->leak_ngctx++; - malloc_mutex_unlock(arg->tsdn, gctx->lock); + (*leak_ngctx)++; + malloc_mutex_unlock(gctx->lock); return (NULL); } @@ -1222,7 +1196,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) */ while ((gctx = gctx_tree_first(gctxs)) != NULL) { gctx_tree_remove(gctxs, gctx); - malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); + malloc_mutex_lock(gctx->lock); { prof_tctx_t *next; @@ -1230,15 +1204,14 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) do { prof_tctx_t *to_destroy = tctx_tree_iter(&gctx->tctxs, next, - prof_tctx_finish_iter, - (void *)tsd_tsdn(tsd)); + prof_tctx_finish_iter, NULL); if (to_destroy != NULL) { next = tctx_tree_next(&gctx->tctxs, to_destroy); tctx_tree_remove(&gctx->tctxs, to_destroy); - idalloctm(tsd_tsdn(tsd), to_destroy, - NULL, true, true); + idalloctm(tsd, to_destroy, + tcache_get(tsd, false), true); } else next = NULL; } while (next != NULL); @@ -1246,26 +1219,19 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) gctx->nlimbo--; if (prof_gctx_should_destroy(gctx)) { gctx->nlimbo++; - malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); + malloc_mutex_unlock(gctx->lock); prof_gctx_try_destroy(tsd, tdata, gctx, tdata); } else - malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); + malloc_mutex_unlock(gctx->lock); } } -struct prof_tdata_merge_iter_arg_s { - tsdn_t *tsdn; - prof_cnt_t cnt_all; -}; - static prof_tdata_t * -prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, - void *opaque) +prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) { - struct prof_tdata_merge_iter_arg_s *arg = - (struct prof_tdata_merge_iter_arg_s *)opaque; + prof_cnt_t *cnt_all = (prof_cnt_t *)arg; - malloc_mutex_lock(arg->tsdn, tdata->lock); + malloc_mutex_lock(tdata->lock); if (!tdata->expired) { size_t tabind; union { @@ -1277,17 +1243,17 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t)); for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL, &tctx.v);) - prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata); + prof_tctx_merge_tdata(tctx.p, tdata); - arg->cnt_all.curobjs += tdata->cnt_summed.curobjs; - arg->cnt_all.curbytes += tdata->cnt_summed.curbytes; + cnt_all->curobjs += tdata->cnt_summed.curobjs; + cnt_all->curbytes += tdata->cnt_summed.curbytes; if (opt_prof_accum) { - arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs; - arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes; + cnt_all->accumobjs += tdata->cnt_summed.accumobjs; + cnt_all->accumbytes += tdata->cnt_summed.accumbytes; } } else tdata->dumping = false; - malloc_mutex_unlock(arg->tsdn, tdata->lock); + malloc_mutex_unlock(tdata->lock); return (NULL); } @@ -1316,7 +1282,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) #define prof_dump_header JEMALLOC_N(prof_dump_header_impl) #endif static bool -prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all) +prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) { bool ret; @@ -1327,10 +1293,10 @@ prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all) cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) return (true); - malloc_mutex_lock(tsdn, &tdatas_mtx); + malloc_mutex_lock(&tdatas_mtx); ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, (void *)&propagate_err) != NULL); - malloc_mutex_unlock(tsdn, &tdatas_mtx); + malloc_mutex_unlock(&tdatas_mtx); return (ret); } #ifdef JEMALLOC_JET @@ -1339,16 +1305,15 @@ prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all) prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl); #endif +/* gctx->lock is held. */ static bool -prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx, - const prof_bt_t *bt, prof_gctx_tree_t *gctxs) +prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt, + prof_gctx_tree_t *gctxs) { bool ret; unsigned i; - struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg; cassert(config_prof); - malloc_mutex_assert_owner(tsdn, gctx->lock); /* Avoid dumping such gctx's that have no useful data. */ if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) || @@ -1382,10 +1347,8 @@ prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx, goto label_return; } - prof_tctx_dump_iter_arg.tsdn = tsdn; - prof_tctx_dump_iter_arg.propagate_err = propagate_err; if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, - (void *)&prof_tctx_dump_iter_arg) != NULL) { + (void *)&propagate_err) != NULL) { ret = true; goto label_return; } @@ -1395,7 +1358,6 @@ label_return: return (ret); } -#ifndef _WIN32 JEMALLOC_FORMAT_PRINTF(1, 2) static int prof_open_maps(const char *format, ...) @@ -1411,18 +1373,6 @@ prof_open_maps(const char *format, ...) return (mfd); } -#endif - -static int -prof_getpid(void) -{ - -#ifdef _WIN32 - return (GetCurrentProcessId()); -#else - return (getpid()); -#endif -} static bool prof_dump_maps(bool propagate_err) @@ -1433,11 +1383,9 @@ prof_dump_maps(bool propagate_err) cassert(config_prof); #ifdef __FreeBSD__ mfd = prof_open_maps("/proc/curproc/map"); -#elif defined(_WIN32) - mfd = -1; // Not implemented #else { - int pid = prof_getpid(); + int pid = getpid(); mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid); if (mfd == -1) @@ -1478,66 +1426,39 @@ label_return: return (ret); } -/* - * See prof_sample_threshold_update() comment for why the body of this function - * is conditionally compiled. - */ static void prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx, const char *filename) { -#ifdef JEMALLOC_PROF - /* - * Scaling is equivalent AdjustSamples() in jeprof, but the result may - * differ slightly from what jeprof reports, because here we scale the - * summary values, whereas jeprof scales each context individually and - * reports the sums of the scaled values. - */ if (cnt_all->curbytes != 0) { - double sample_period = (double)((uint64_t)1 << lg_prof_sample); - double ratio = (((double)cnt_all->curbytes) / - (double)cnt_all->curobjs) / sample_period; - double scale_factor = 1.0 / (1.0 - exp(-ratio)); - uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes) - * scale_factor); - uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) * - scale_factor); - - malloc_printf(": Leak approximation summary: ~%"FMTu64 - " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n", - curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs != - 1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : ""); + malloc_printf(": Leak summary: %"FMTu64" byte%s, %" + FMTu64" object%s, %zu context%s\n", + cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", + cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", + leak_ngctx, (leak_ngctx != 1) ? "s" : ""); malloc_printf( ": Run jeprof on \"%s\" for leak detail\n", filename); } -#endif } -struct prof_gctx_dump_iter_arg_s { - tsdn_t *tsdn; - bool propagate_err; -}; - static prof_gctx_t * -prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) +prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) { prof_gctx_t *ret; - struct prof_gctx_dump_iter_arg_s *arg = - (struct prof_gctx_dump_iter_arg_s *)opaque; + bool propagate_err = *(bool *)arg; - malloc_mutex_lock(arg->tsdn, gctx->lock); + malloc_mutex_lock(gctx->lock); - if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt, - gctxs)) { + if (prof_dump_gctx(propagate_err, gctx, &gctx->bt, gctxs)) { ret = gctx; goto label_return; } ret = NULL; label_return: - malloc_mutex_unlock(arg->tsdn, gctx->lock); + malloc_mutex_unlock(gctx->lock); return (ret); } @@ -1545,14 +1466,13 @@ static bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) { prof_tdata_t *tdata; - struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg; + prof_cnt_t cnt_all; size_t tabind; union { prof_gctx_t *p; void *v; } gctx; - struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg; - struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg; + size_t leak_ngctx; prof_gctx_tree_t gctxs; cassert(config_prof); @@ -1561,7 +1481,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) if (tdata == NULL) return (true); - malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx); + malloc_mutex_lock(&prof_dump_mtx); prof_enter(tsd, tdata); /* @@ -1570,24 +1490,20 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) */ gctx_tree_new(&gctxs); for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) - prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, &gctxs); + prof_dump_gctx_prep(gctx.p, &gctxs); /* * Iterate over tdatas, and for the non-expired ones snapshot their tctx * stats and merge them into the associated gctx's. */ - prof_tdata_merge_iter_arg.tsdn = tsd_tsdn(tsd); - memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t)); - malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); - tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, - (void *)&prof_tdata_merge_iter_arg); - malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); + memset(&cnt_all, 0, sizeof(prof_cnt_t)); + malloc_mutex_lock(&tdatas_mtx); + tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, (void *)&cnt_all); + malloc_mutex_unlock(&tdatas_mtx); /* Merge tctx stats into gctx's. */ - prof_gctx_merge_iter_arg.tsdn = tsd_tsdn(tsd); - prof_gctx_merge_iter_arg.leak_ngctx = 0; - gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, - (void *)&prof_gctx_merge_iter_arg); + leak_ngctx = 0; + gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, (void *)&leak_ngctx); prof_leave(tsd, tdata); @@ -1596,15 +1512,12 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) goto label_open_close_error; /* Dump profile header. */ - if (prof_dump_header(tsd_tsdn(tsd), propagate_err, - &prof_tdata_merge_iter_arg.cnt_all)) + if (prof_dump_header(propagate_err, &cnt_all)) goto label_write_error; /* Dump per gctx profile stats. */ - prof_gctx_dump_iter_arg.tsdn = tsd_tsdn(tsd); - prof_gctx_dump_iter_arg.propagate_err = propagate_err; if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, - (void *)&prof_gctx_dump_iter_arg) != NULL) + (void *)&propagate_err) != NULL) goto label_write_error; /* Dump /proc//maps if possible. */ @@ -1615,18 +1528,17 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) goto label_open_close_error; prof_gctx_finish(tsd, &gctxs); - malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); + malloc_mutex_unlock(&prof_dump_mtx); + + if (leakcheck) + prof_leakcheck(&cnt_all, leak_ngctx, filename); - if (leakcheck) { - prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all, - prof_gctx_merge_iter_arg.leak_ngctx, filename); - } return (false); label_write_error: prof_dump_close(propagate_err); label_open_close_error: prof_gctx_finish(tsd, &gctxs); - malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); + malloc_mutex_unlock(&prof_dump_mtx); return (true); } @@ -1642,12 +1554,12 @@ prof_dump_filename(char *filename, char v, uint64_t vseq) /* "...v.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"FMTu64".%c%"FMTu64".heap", - opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq); + opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); } else { /* "....heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"FMTu64".%c.heap", - opt_prof_prefix, prof_getpid(), prof_dump_seq, v); + opt_prof_prefix, (int)getpid(), prof_dump_seq, v); } prof_dump_seq++; } @@ -1666,23 +1578,23 @@ prof_fdump(void) return; tsd = tsd_fetch(); - malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); + malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'f', VSEQ_INVALID); - malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); + malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(tsd, false, filename, opt_prof_leak); } void -prof_idump(tsdn_t *tsdn) +prof_idump(void) { tsd_t *tsd; prof_tdata_t *tdata; cassert(config_prof); - if (!prof_booted || tsdn_null(tsdn)) + if (!prof_booted) return; - tsd = tsdn_tsd(tsdn); + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, false); if (tdata == NULL) return; @@ -1693,48 +1605,50 @@ prof_idump(tsdn_t *tsdn) if (opt_prof_prefix[0] != '\0') { char filename[PATH_MAX + 1]; - malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); + malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'i', prof_dump_iseq); prof_dump_iseq++; - malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); + malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(tsd, false, filename, false); } } bool -prof_mdump(tsd_t *tsd, const char *filename) +prof_mdump(const char *filename) { + tsd_t *tsd; char filename_buf[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (!opt_prof || !prof_booted) return (true); + tsd = tsd_fetch(); if (filename == NULL) { /* No filename specified, so automatically generate one. */ if (opt_prof_prefix[0] == '\0') return (true); - malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); + malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename_buf, 'm', prof_dump_mseq); prof_dump_mseq++; - malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); + malloc_mutex_unlock(&prof_dump_seq_mtx); filename = filename_buf; } return (prof_dump(tsd, true, filename, false)); } void -prof_gdump(tsdn_t *tsdn) +prof_gdump(void) { tsd_t *tsd; prof_tdata_t *tdata; cassert(config_prof); - if (!prof_booted || tsdn_null(tsdn)) + if (!prof_booted) return; - tsd = tsdn_tsd(tsdn); + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, false); if (tdata == NULL) return; @@ -1745,10 +1659,10 @@ prof_gdump(tsdn_t *tsdn) if (opt_prof_prefix[0] != '\0') { char filename[DUMP_FILENAME_BUFSIZE]; - malloc_mutex_lock(tsdn, &prof_dump_seq_mtx); + malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'u', prof_dump_useq); prof_dump_useq++; - malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx); + malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(tsd, false, filename, false); } } @@ -1777,14 +1691,14 @@ prof_bt_keycomp(const void *k1, const void *k2) } JEMALLOC_INLINE_C uint64_t -prof_thr_uid_alloc(tsdn_t *tsdn) +prof_thr_uid_alloc(void) { uint64_t thr_uid; - malloc_mutex_lock(tsdn, &next_thr_uid_mtx); + malloc_mutex_lock(&next_thr_uid_mtx); thr_uid = next_thr_uid; next_thr_uid++; - malloc_mutex_unlock(tsdn, &next_thr_uid_mtx); + malloc_mutex_unlock(&next_thr_uid_mtx); return (thr_uid); } @@ -1794,13 +1708,14 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, char *thread_name, bool active) { prof_tdata_t *tdata; + tcache_t *tcache; cassert(config_prof); /* Initialize an empty cache for this thread. */ - tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t), - size2index(sizeof(prof_tdata_t)), false, NULL, true, - arena_get(TSDN_NULL, 0, true), true); + tcache = tcache_get(tsd, true); + tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false, + tcache, true, NULL); if (tdata == NULL) return (NULL); @@ -1812,9 +1727,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, tdata->expired = false; tdata->tctx_uid_next = 0; - if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, - prof_bt_keycomp)) { - idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true); + if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, + prof_bt_hash, prof_bt_keycomp)) { + idalloctm(tsd, tdata, tcache, true); return (NULL); } @@ -1828,9 +1743,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, tdata->dumping = false; tdata->active = active; - malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); + malloc_mutex_lock(&tdatas_mtx); tdata_tree_insert(&tdatas, tdata); - malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); + malloc_mutex_unlock(&tdatas_mtx); return (tdata); } @@ -1839,12 +1754,13 @@ prof_tdata_t * prof_tdata_init(tsd_t *tsd) { - return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0, - NULL, prof_thread_active_init_get(tsd_tsdn(tsd)))); + return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0, NULL, + prof_thread_active_init_get())); } +/* tdata->lock must be held. */ static bool -prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) +prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached) { if (tdata->attached && !even_if_attached) @@ -1854,40 +1770,32 @@ prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) return (true); } -static bool -prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, - bool even_if_attached) -{ - - malloc_mutex_assert_owner(tsdn, tdata->lock); - - return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached)); -} - +/* tdatas_mtx must be held. */ static void prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { + tcache_t *tcache; - malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx); + assert(prof_tdata_should_destroy(tdata, even_if_attached)); + assert(tsd_prof_tdata_get(tsd) != tdata); tdata_tree_remove(&tdatas, tdata); - assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached)); - + tcache = tcache_get(tsd, false); if (tdata->thread_name != NULL) - idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true); + idalloctm(tsd, tdata->thread_name, tcache, true); ckh_delete(tsd, &tdata->bt2tctx); - idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true); + idalloctm(tsd, tdata, tcache, true); } static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { - malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); + malloc_mutex_lock(&tdatas_mtx); prof_tdata_destroy_locked(tsd, tdata, even_if_attached); - malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); + malloc_mutex_unlock(&tdatas_mtx); } static void @@ -1895,10 +1803,9 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) { bool destroy_tdata; - malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); + malloc_mutex_lock(tdata->lock); if (tdata->attached) { - destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, - true); + destroy_tdata = prof_tdata_should_destroy(tdata, true); /* * Only detach if !destroy_tdata, because detaching would allow * another thread to win the race to destroy tdata. @@ -1908,7 +1815,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) tsd_prof_tdata_set(tsd, NULL); } else destroy_tdata = false; - malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); + malloc_mutex_unlock(tdata->lock); if (destroy_tdata) prof_tdata_destroy(tsd, tdata, true); } @@ -1919,7 +1826,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) uint64_t thr_uid = tdata->thr_uid; uint64_t thr_discrim = tdata->thr_discrim + 1; char *thread_name = (tdata->thread_name != NULL) ? - prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL; + prof_thread_name_alloc(tsd, tdata->thread_name) : NULL; bool active = tdata->active; prof_tdata_detach(tsd, tdata); @@ -1928,18 +1835,18 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) } static bool -prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) +prof_tdata_expire(prof_tdata_t *tdata) { bool destroy_tdata; - malloc_mutex_lock(tsdn, tdata->lock); + malloc_mutex_lock(tdata->lock); if (!tdata->expired) { tdata->expired = true; destroy_tdata = tdata->attached ? false : - prof_tdata_should_destroy(tsdn, tdata, false); + prof_tdata_should_destroy(tdata, false); } else destroy_tdata = false; - malloc_mutex_unlock(tsdn, tdata->lock); + malloc_mutex_unlock(tdata->lock); return (destroy_tdata); } @@ -1947,9 +1854,8 @@ prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) static prof_tdata_t * prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) { - tsdn_t *tsdn = (tsdn_t *)arg; - return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL); + return (prof_tdata_expire(tdata) ? tdata : NULL); } void @@ -1959,15 +1865,15 @@ prof_reset(tsd_t *tsd, size_t lg_sample) assert(lg_sample < (sizeof(uint64_t) << 3)); - malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx); - malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); + malloc_mutex_lock(&prof_dump_mtx); + malloc_mutex_lock(&tdatas_mtx); lg_prof_sample = lg_sample; next = NULL; do { prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next, - prof_tdata_reset_iter, (void *)tsd); + prof_tdata_reset_iter, NULL); if (to_destroy != NULL) { next = tdata_tree_next(&tdatas, to_destroy); prof_tdata_destroy_locked(tsd, to_destroy, false); @@ -1975,8 +1881,8 @@ prof_reset(tsd_t *tsd, size_t lg_sample) next = NULL; } while (next != NULL); - malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); - malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); + malloc_mutex_unlock(&tdatas_mtx); + malloc_mutex_unlock(&prof_dump_mtx); } void @@ -1993,33 +1899,35 @@ prof_tdata_cleanup(tsd_t *tsd) } bool -prof_active_get(tsdn_t *tsdn) +prof_active_get(void) { bool prof_active_current; - malloc_mutex_lock(tsdn, &prof_active_mtx); + malloc_mutex_lock(&prof_active_mtx); prof_active_current = prof_active; - malloc_mutex_unlock(tsdn, &prof_active_mtx); + malloc_mutex_unlock(&prof_active_mtx); return (prof_active_current); } bool -prof_active_set(tsdn_t *tsdn, bool active) +prof_active_set(bool active) { bool prof_active_old; - malloc_mutex_lock(tsdn, &prof_active_mtx); + malloc_mutex_lock(&prof_active_mtx); prof_active_old = prof_active; prof_active = active; - malloc_mutex_unlock(tsdn, &prof_active_mtx); + malloc_mutex_unlock(&prof_active_mtx); return (prof_active_old); } const char * -prof_thread_name_get(tsd_t *tsd) +prof_thread_name_get(void) { + tsd_t *tsd; prof_tdata_t *tdata; + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, true); if (tdata == NULL) return (""); @@ -2027,7 +1935,7 @@ prof_thread_name_get(tsd_t *tsd) } static char * -prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) +prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) { char *ret; size_t size; @@ -2039,8 +1947,7 @@ prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) if (size == 1) return (""); - ret = iallocztm(tsdn, size, size2index(size), false, NULL, true, - arena_get(TSDN_NULL, 0, true), true); + ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL); if (ret == NULL) return (NULL); memcpy(ret, thread_name, size); @@ -2067,12 +1974,13 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) return (EFAULT); } - s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name); + s = prof_thread_name_alloc(tsd, thread_name); if (s == NULL) return (EAGAIN); if (tdata->thread_name != NULL) { - idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true); + idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false), + true); tdata->thread_name = NULL; } if (strlen(s) > 0) @@ -2081,10 +1989,12 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) } bool -prof_thread_active_get(tsd_t *tsd) +prof_thread_active_get(void) { + tsd_t *tsd; prof_tdata_t *tdata; + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, true); if (tdata == NULL) return (false); @@ -2092,10 +2002,12 @@ prof_thread_active_get(tsd_t *tsd) } bool -prof_thread_active_set(tsd_t *tsd, bool active) +prof_thread_active_set(bool active) { + tsd_t *tsd; prof_tdata_t *tdata; + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, true); if (tdata == NULL) return (true); @@ -2104,48 +2016,48 @@ prof_thread_active_set(tsd_t *tsd, bool active) } bool -prof_thread_active_init_get(tsdn_t *tsdn) +prof_thread_active_init_get(void) { bool active_init; - malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx); + malloc_mutex_lock(&prof_thread_active_init_mtx); active_init = prof_thread_active_init; - malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx); + malloc_mutex_unlock(&prof_thread_active_init_mtx); return (active_init); } bool -prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) +prof_thread_active_init_set(bool active_init) { bool active_init_old; - malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx); + malloc_mutex_lock(&prof_thread_active_init_mtx); active_init_old = prof_thread_active_init; prof_thread_active_init = active_init; - malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx); + malloc_mutex_unlock(&prof_thread_active_init_mtx); return (active_init_old); } bool -prof_gdump_get(tsdn_t *tsdn) +prof_gdump_get(void) { bool prof_gdump_current; - malloc_mutex_lock(tsdn, &prof_gdump_mtx); + malloc_mutex_lock(&prof_gdump_mtx); prof_gdump_current = prof_gdump_val; - malloc_mutex_unlock(tsdn, &prof_gdump_mtx); + malloc_mutex_unlock(&prof_gdump_mtx); return (prof_gdump_current); } bool -prof_gdump_set(tsdn_t *tsdn, bool gdump) +prof_gdump_set(bool gdump) { bool prof_gdump_old; - malloc_mutex_lock(tsdn, &prof_gdump_mtx); + malloc_mutex_lock(&prof_gdump_mtx); prof_gdump_old = prof_gdump_val; prof_gdump_val = gdump; - malloc_mutex_unlock(tsdn, &prof_gdump_mtx); + malloc_mutex_unlock(&prof_gdump_mtx); return (prof_gdump_old); } @@ -2186,54 +2098,47 @@ prof_boot1(void) } bool -prof_boot2(tsd_t *tsd) +prof_boot2(void) { cassert(config_prof); if (opt_prof) { + tsd_t *tsd; unsigned i; lg_prof_sample = opt_lg_prof_sample; prof_active = opt_prof_active; - if (malloc_mutex_init(&prof_active_mtx, "prof_active", - WITNESS_RANK_PROF_ACTIVE)) + if (malloc_mutex_init(&prof_active_mtx)) return (true); prof_gdump_val = opt_prof_gdump; - if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump", - WITNESS_RANK_PROF_GDUMP)) + if (malloc_mutex_init(&prof_gdump_mtx)) return (true); prof_thread_active_init = opt_prof_thread_active_init; - if (malloc_mutex_init(&prof_thread_active_init_mtx, - "prof_thread_active_init", - WITNESS_RANK_PROF_THREAD_ACTIVE_INIT)) + if (malloc_mutex_init(&prof_thread_active_init_mtx)) return (true); + tsd = tsd_fetch(); if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); - if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx", - WITNESS_RANK_PROF_BT2GCTX)) + if (malloc_mutex_init(&bt2gctx_mtx)) return (true); tdata_tree_new(&tdatas); - if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas", - WITNESS_RANK_PROF_TDATAS)) + if (malloc_mutex_init(&tdatas_mtx)) return (true); next_thr_uid = 0; - if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid", - WITNESS_RANK_PROF_NEXT_THR_UID)) + if (malloc_mutex_init(&next_thr_uid_mtx)) return (true); - if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq", - WITNESS_RANK_PROF_DUMP_SEQ)) + if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); - if (malloc_mutex_init(&prof_dump_mtx, "prof_dump", - WITNESS_RANK_PROF_DUMP)) + if (malloc_mutex_init(&prof_dump_mtx)) return (true); if (opt_prof_final && opt_prof_prefix[0] != '\0' && @@ -2243,23 +2148,21 @@ prof_boot2(tsd_t *tsd) abort(); } - gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), - PROF_NCTX_LOCKS * sizeof(malloc_mutex_t)); + gctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * + sizeof(malloc_mutex_t)); if (gctx_locks == NULL) return (true); for (i = 0; i < PROF_NCTX_LOCKS; i++) { - if (malloc_mutex_init(&gctx_locks[i], "prof_gctx", - WITNESS_RANK_PROF_GCTX)) + if (malloc_mutex_init(&gctx_locks[i])) return (true); } - tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), - PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t)); + tdata_locks = (malloc_mutex_t *)base_alloc(PROF_NTDATA_LOCKS * + sizeof(malloc_mutex_t)); if (tdata_locks == NULL) return (true); for (i = 0; i < PROF_NTDATA_LOCKS; i++) { - if (malloc_mutex_init(&tdata_locks[i], "prof_tdata", - WITNESS_RANK_PROF_TDATA)) + if (malloc_mutex_init(&tdata_locks[i])) return (true); } } @@ -2278,77 +2181,56 @@ prof_boot2(tsd_t *tsd) } void -prof_prefork0(tsdn_t *tsdn) +prof_prefork(void) { if (opt_prof) { unsigned i; - malloc_mutex_prefork(tsdn, &prof_dump_mtx); - malloc_mutex_prefork(tsdn, &bt2gctx_mtx); - malloc_mutex_prefork(tsdn, &tdatas_mtx); - for (i = 0; i < PROF_NTDATA_LOCKS; i++) - malloc_mutex_prefork(tsdn, &tdata_locks[i]); + malloc_mutex_prefork(&tdatas_mtx); + malloc_mutex_prefork(&bt2gctx_mtx); + malloc_mutex_prefork(&next_thr_uid_mtx); + malloc_mutex_prefork(&prof_dump_seq_mtx); for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_prefork(tsdn, &gctx_locks[i]); + malloc_mutex_prefork(&gctx_locks[i]); + for (i = 0; i < PROF_NTDATA_LOCKS; i++) + malloc_mutex_prefork(&tdata_locks[i]); } } void -prof_prefork1(tsdn_t *tsdn) -{ - - if (opt_prof) { - malloc_mutex_prefork(tsdn, &prof_active_mtx); - malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx); - malloc_mutex_prefork(tsdn, &prof_gdump_mtx); - malloc_mutex_prefork(tsdn, &next_thr_uid_mtx); - malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx); - } -} - -void -prof_postfork_parent(tsdn_t *tsdn) +prof_postfork_parent(void) { if (opt_prof) { unsigned i; - malloc_mutex_postfork_parent(tsdn, - &prof_thread_active_init_mtx); - malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx); - malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx); - malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx); - malloc_mutex_postfork_parent(tsdn, &prof_active_mtx); - for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]); for (i = 0; i < PROF_NTDATA_LOCKS; i++) - malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]); - malloc_mutex_postfork_parent(tsdn, &tdatas_mtx); - malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx); - malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx); + malloc_mutex_postfork_parent(&tdata_locks[i]); + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_postfork_parent(&gctx_locks[i]); + malloc_mutex_postfork_parent(&prof_dump_seq_mtx); + malloc_mutex_postfork_parent(&next_thr_uid_mtx); + malloc_mutex_postfork_parent(&bt2gctx_mtx); + malloc_mutex_postfork_parent(&tdatas_mtx); } } void -prof_postfork_child(tsdn_t *tsdn) +prof_postfork_child(void) { if (opt_prof) { unsigned i; - malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx); - malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx); - malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx); - malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx); - malloc_mutex_postfork_child(tsdn, &prof_active_mtx); - for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_postfork_child(tsdn, &gctx_locks[i]); for (i = 0; i < PROF_NTDATA_LOCKS; i++) - malloc_mutex_postfork_child(tsdn, &tdata_locks[i]); - malloc_mutex_postfork_child(tsdn, &tdatas_mtx); - malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx); - malloc_mutex_postfork_child(tsdn, &prof_dump_mtx); + malloc_mutex_postfork_child(&tdata_locks[i]); + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_postfork_child(&gctx_locks[i]); + malloc_mutex_postfork_child(&prof_dump_seq_mtx); + malloc_mutex_postfork_child(&next_thr_uid_mtx); + malloc_mutex_postfork_child(&bt2gctx_mtx); + malloc_mutex_postfork_child(&tdatas_mtx); } } diff --git a/deps/jemalloc/src/quarantine.c b/deps/jemalloc/src/quarantine.c index 18903fb5c..6c43dfcaa 100644 --- a/deps/jemalloc/src/quarantine.c +++ b/deps/jemalloc/src/quarantine.c @@ -13,22 +13,22 @@ /* Function prototypes for non-inline static functions. */ static quarantine_t *quarantine_grow(tsd_t *tsd, quarantine_t *quarantine); -static void quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine); -static void quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine, +static void quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine); +static void quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, size_t upper_bound); /******************************************************************************/ static quarantine_t * -quarantine_init(tsdn_t *tsdn, size_t lg_maxobjs) +quarantine_init(tsd_t *tsd, size_t lg_maxobjs) { quarantine_t *quarantine; - size_t size; - size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) * - sizeof(quarantine_obj_t)); - quarantine = (quarantine_t *)iallocztm(tsdn, size, size2index(size), - false, NULL, true, arena_get(TSDN_NULL, 0, true), true); + assert(tsd_nominal(tsd)); + + quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs) + + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false, + tcache_get(tsd, true), true, NULL); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -47,7 +47,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) if (!tsd_nominal(tsd)) return; - quarantine = quarantine_init(tsd_tsdn(tsd), LG_MAXOBJS_INIT); + quarantine = quarantine_init(tsd, LG_MAXOBJS_INIT); /* * Check again whether quarantine has been initialized, because * quarantine_init() may have triggered recursive initialization. @@ -55,7 +55,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) if (tsd_quarantine_get(tsd) == NULL) tsd_quarantine_set(tsd, quarantine); else - idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); } static quarantine_t * @@ -63,9 +63,9 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) { quarantine_t *ret; - ret = quarantine_init(tsd_tsdn(tsd), quarantine->lg_maxobjs + 1); + ret = quarantine_init(tsd, quarantine->lg_maxobjs + 1); if (ret == NULL) { - quarantine_drain_one(tsd_tsdn(tsd), quarantine); + quarantine_drain_one(tsd, quarantine); return (quarantine); } @@ -87,18 +87,18 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } - idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); tsd_quarantine_set(tsd, ret); return (ret); } static void -quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine) +quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine) { quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; - assert(obj->usize == isalloc(tsdn, obj->ptr, config_prof)); - idalloctm(tsdn, obj->ptr, NULL, false, true); + assert(obj->usize == isalloc(obj->ptr, config_prof)); + idalloctm(tsd, obj->ptr, NULL, false); quarantine->curbytes -= obj->usize; quarantine->curobjs--; quarantine->first = (quarantine->first + 1) & ((ZU(1) << @@ -106,24 +106,24 @@ quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine) } static void -quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine, size_t upper_bound) +quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, size_t upper_bound) { while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) - quarantine_drain_one(tsdn, quarantine); + quarantine_drain_one(tsd, quarantine); } void quarantine(tsd_t *tsd, void *ptr) { quarantine_t *quarantine; - size_t usize = isalloc(tsd_tsdn(tsd), ptr, config_prof); + size_t usize = isalloc(ptr, config_prof); cassert(config_fill); assert(opt_quarantine); if ((quarantine = tsd_quarantine_get(tsd)) == NULL) { - idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true); + idalloctm(tsd, ptr, NULL, false); return; } /* @@ -133,7 +133,7 @@ quarantine(tsd_t *tsd, void *ptr) if (quarantine->curbytes + usize > opt_quarantine) { size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine - usize : 0; - quarantine_drain(tsd_tsdn(tsd), quarantine, upper_bound); + quarantine_drain(tsd, quarantine, upper_bound); } /* Grow the quarantine ring buffer if it's full. */ if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs)) @@ -158,11 +158,11 @@ quarantine(tsd_t *tsd, void *ptr) && usize <= SMALL_MAXCLASS) arena_quarantine_junk_small(ptr, usize); else - memset(ptr, JEMALLOC_FREE_JUNK, usize); + memset(ptr, 0x5a, usize); } } else { assert(quarantine->curbytes == 0); - idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true); + idalloctm(tsd, ptr, NULL, false); } } @@ -176,8 +176,8 @@ quarantine_cleanup(tsd_t *tsd) quarantine = tsd_quarantine_get(tsd); if (quarantine != NULL) { - quarantine_drain(tsd_tsdn(tsd), quarantine, 0); - idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true); + quarantine_drain(tsd, quarantine, 0); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); tsd_quarantine_set(tsd, NULL); } } diff --git a/deps/jemalloc/src/rtree.c b/deps/jemalloc/src/rtree.c index f2e2997d5..af0d97e75 100644 --- a/deps/jemalloc/src/rtree.c +++ b/deps/jemalloc/src/rtree.c @@ -15,8 +15,6 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, { unsigned bits_in_leaf, height, i; - assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) / - RTREE_BITS_PER_LEVEL)); assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL @@ -96,15 +94,12 @@ rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp) rtree_node_elm_t *node; if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) { - spin_t spinner; - /* * Another thread is already in the process of initializing. * Spin-wait until initialization is complete. */ - spin_init(&spinner); do { - spin_adaptive(&spinner); + CPU_SPINWAIT; node = atomic_read_p((void **)elmp); } while (node == RTREE_NODE_INITIALIZING); } else { @@ -128,5 +123,5 @@ rtree_node_elm_t * rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) { - return (rtree_node_init(rtree, level+1, &elm->child)); + return (rtree_node_init(rtree, level, &elm->child)); } diff --git a/deps/jemalloc/src/spin.c b/deps/jemalloc/src/spin.c deleted file mode 100644 index 5242d95aa..000000000 --- a/deps/jemalloc/src/spin.c +++ /dev/null @@ -1,2 +0,0 @@ -#define JEMALLOC_SPIN_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff --git a/deps/jemalloc/src/stats.c b/deps/jemalloc/src/stats.c old mode 100755 new mode 100644 index 1360f3bd0..154c3e74c --- a/deps/jemalloc/src/stats.c +++ b/deps/jemalloc/src/stats.c @@ -3,7 +3,7 @@ #define CTL_GET(n, v, t) do { \ size_t sz = sizeof(t); \ - xmallctl(n, (void *)v, &sz, NULL, 0); \ + xmallctl(n, v, &sz, NULL, 0); \ } while (0) #define CTL_M2_GET(n, i, v, t) do { \ @@ -12,7 +12,7 @@ size_t sz = sizeof(t); \ xmallctlnametomib(n, mib, &miblen); \ mib[2] = (i); \ - xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0); \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ } while (0) #define CTL_M2_M4_GET(n, i, j, v, t) do { \ @@ -22,7 +22,7 @@ xmallctlnametomib(n, mib, &miblen); \ mib[2] = (i); \ mib[4] = (j); \ - xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0); \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ } while (0) /******************************************************************************/ @@ -32,108 +32,87 @@ bool opt_stats_print = false; size_t stats_cactive = 0; +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void stats_arena_bins_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i); +static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i); +static void stats_arena_hchunks_print( + void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); +static void stats_arena_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i, bool bins, bool large, bool huge); + /******************************************************************************/ static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool large, bool huge, unsigned i) + unsigned i) { size_t page; - bool config_tcache, in_gap, in_gap_prev; + bool config_tcache, in_gap; unsigned nbins, j; CTL_GET("arenas.page", &page, size_t); - CTL_GET("arenas.nbins", &nbins, unsigned); - if (json) { + CTL_GET("config.tcache", &config_tcache, bool); + if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"bins\": [\n"); + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs curruns regs" + " pgs util nfills nflushes newruns" + " reruns\n"); } else { - CTL_GET("config.tcache", &config_tcache, bool); - if (config_tcache) { - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs" - " curruns regs pgs util nfills" - " nflushes newruns reruns\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs" - " curruns regs pgs util newruns" - " reruns\n"); - } + malloc_cprintf(write_cb, cbopaque, + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs curruns regs" + " pgs util newruns reruns\n"); } + CTL_GET("arenas.nbins", &nbins, unsigned); for (j = 0, in_gap = false; j < nbins; j++) { uint64_t nruns; - size_t reg_size, run_size, curregs; - size_t curruns; - uint32_t nregs; - uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; - uint64_t nreruns; CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns, uint64_t); - in_gap_prev = in_gap; - in_gap = (nruns == 0); - - if (!json && in_gap_prev && !in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } - - CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); - CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); - CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, size_t); - - CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc, - uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc, - uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs, - size_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j, - &nrequests, uint64_t); - if (config_tcache) { - CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j, - &nfills, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j, - &nflushes, uint64_t); - } - CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, &nreruns, - uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, &curruns, - size_t); - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t{\n" - "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n" - "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n" - "\t\t\t\t\t\t\"curregs\": %zu,\n" - "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n", - nmalloc, - ndalloc, - curregs, - nrequests); - if (config_tcache) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n" - "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n", - nfills, - nflushes); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\t\"nreruns\": %"FMTu64",\n" - "\t\t\t\t\t\t\"curruns\": %zu\n" - "\t\t\t\t\t}%s\n", - nreruns, - curruns, - (j + 1 < nbins) ? "," : ""); - } else if (!in_gap) { - size_t availregs, milli; + if (nruns == 0) + in_gap = true; + else { + size_t reg_size, run_size, curregs, availregs, milli; + size_t curruns; + uint32_t nregs; + uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; + uint64_t reruns; char util[6]; /* "x.yyy". */ + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + in_gap = false; + } + CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); + CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); + CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, + size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, + &nmalloc, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, + &ndalloc, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, + &curregs, size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j, + &nrequests, uint64_t); + if (config_tcache) { + CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, + j, &nfills, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", + i, j, &nflushes, uint64_t); + } + CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, + &reruns, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, + &curruns, size_t); + availregs = nregs * curruns; milli = (availregs != 0) ? (1000 * curregs) / availregs : 1000; @@ -159,7 +138,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, reg_size, j, curregs * reg_size, nmalloc, ndalloc, nrequests, curregs, curruns, nregs, run_size / page, util, nfills, nflushes, - nruns, nreruns); + nruns, reruns); } else { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64 @@ -168,38 +147,28 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, " %12"FMTu64"\n", reg_size, j, curregs * reg_size, nmalloc, ndalloc, nrequests, curregs, curruns, nregs, - run_size / page, util, nruns, nreruns); + run_size / page, util, nruns, reruns); } } } - if (json) { + if (in_gap) { malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t]%s\n", (large || huge) ? "," : ""); - } else { - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } + " ---\n"); } } static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool huge, unsigned i) + unsigned i) { unsigned nbins, nlruns, j; - bool in_gap, in_gap_prev; + bool in_gap; + malloc_cprintf(write_cb, cbopaque, + "large: size ind allocated nmalloc ndalloc" + " nrequests curruns\n"); CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlruns", &nlruns, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"lruns\": [\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "large: size ind allocated nmalloc" - " ndalloc nrequests curruns\n"); - } for (j = 0, in_gap = false; j < nlruns; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t run_size, curruns; @@ -210,25 +179,17 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, uint64_t); CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j, &nrequests, uint64_t); - in_gap_prev = in_gap; - in_gap = (nrequests == 0); - - if (!json && in_gap_prev && !in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } - - CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t); - CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, &curruns, - size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t{\n" - "\t\t\t\t\t\t\"curruns\": %zu\n" - "\t\t\t\t\t}%s\n", - curruns, - (j + 1 < nlruns) ? "," : ""); - } else if (!in_gap) { + if (nrequests == 0) + in_gap = true; + else { + CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t); + CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, + &curruns, size_t); + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + in_gap = false; + } malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 " %12"FMTu64" %12zu\n", @@ -236,35 +197,25 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, ndalloc, nrequests, curruns); } } - if (json) { + if (in_gap) { malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t]%s\n", huge ? "," : ""); - } else { - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } + " ---\n"); } } static void stats_arena_hchunks_print(void (*write_cb)(void *, const char *), - void *cbopaque, bool json, unsigned i) + void *cbopaque, unsigned i) { unsigned nbins, nlruns, nhchunks, j; - bool in_gap, in_gap_prev; + bool in_gap; + malloc_cprintf(write_cb, cbopaque, + "huge: size ind allocated nmalloc ndalloc" + " nrequests curhchunks\n"); CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlruns", &nlruns, unsigned); CTL_GET("arenas.nhchunks", &nhchunks, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"hchunks\": [\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "huge: size ind allocated nmalloc" - " ndalloc nrequests curhchunks\n"); - } for (j = 0, in_gap = false; j < nhchunks; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t hchunk_size, curhchunks; @@ -275,25 +226,18 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *), &ndalloc, uint64_t); CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j, &nrequests, uint64_t); - in_gap_prev = in_gap; - in_gap = (nrequests == 0); - - if (!json && in_gap_prev && !in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } - - CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, size_t); - CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, j, - &curhchunks, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t{\n" - "\t\t\t\t\t\t\"curhchunks\": %zu\n" - "\t\t\t\t\t}%s\n", - curhchunks, - (j + 1 < nhchunks) ? "," : ""); - } else if (!in_gap) { + if (nrequests == 0) + in_gap = true; + else { + CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, + size_t); + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, + j, &curhchunks, size_t); + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + in_gap = false; + } malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 " %12"FMTu64" %12zu\n", @@ -302,25 +246,20 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *), nrequests, curhchunks); } } - if (json) { + if (in_gap) { malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t]\n"); - } else { - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } + " ---\n"); } } static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, unsigned i, bool bins, bool large, bool huge) + unsigned i, bool bins, bool large, bool huge) { unsigned nthreads; const char *dss; - ssize_t lg_dirty_mult, decay_time; - size_t page, pactive, pdirty, mapped, retained; + ssize_t lg_dirty_mult; + size_t page, pactive, pdirty, mapped; size_t metadata_mapped, metadata_allocated; uint64_t npurge, nmadvise, purged; size_t small_allocated; @@ -333,731 +272,88 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &page, size_t); CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"nthreads\": %u,\n", nthreads); - } else { - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); - } - + malloc_cprintf(write_cb, cbopaque, + "assigned threads: %u\n", nthreads); CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dss\": \"%s\",\n", dss); - } else { - malloc_cprintf(write_cb, cbopaque, - "dss allocation precedence: %s\n", dss); - } - + malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", + dss); CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); - if (json) { + if (lg_dirty_mult >= 0) { malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"lg_dirty_mult\": %zd,\n", lg_dirty_mult); + "min active:dirty page ratio: %u:1\n", + (1U << lg_dirty_mult)); } else { - if (opt_purge == purge_mode_ratio) { - if (lg_dirty_mult >= 0) { - malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: %u:1\n", - (1U << lg_dirty_mult)); - } else { - malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: N/A\n"); - } - } - } - - CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t); - if (json) { malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"decay_time\": %zd,\n", decay_time); - } else { - if (opt_purge == purge_mode_decay) { - if (decay_time >= 0) { - malloc_cprintf(write_cb, cbopaque, - "decay time: %zd\n", decay_time); - } else { - malloc_cprintf(write_cb, cbopaque, - "decay time: N/A\n"); - } - } + "min active:dirty page ratio: N/A\n"); } - CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t); CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t); CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t); CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t); CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pactive\": %zu,\n", pactive); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pdirty\": %zu,\n", pdirty); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"npurge\": %"FMTu64",\n", npurge); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"nmadvise\": %"FMTu64",\n", nmadvise); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"purged\": %"FMTu64",\n", purged); - } else { - malloc_cprintf(write_cb, cbopaque, - "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64 - ", purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged); - } + malloc_cprintf(write_cb, cbopaque, + "dirty pages: %zu:%zu active:dirty, %"FMTu64" sweep%s, %"FMTu64 + " madvise%s, %"FMTu64" purged\n", pactive, pdirty, npurge, npurge == + 1 ? "" : "s", nmadvise, nmadvise == 1 ? "" : "s", purged); + malloc_cprintf(write_cb, cbopaque, + " allocated nmalloc ndalloc" + " nrequests\n"); CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated, size_t); CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests, uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"small\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc" - " ndalloc nrequests\n"); - malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated, small_nmalloc, small_ndalloc, - small_nrequests); - } - + malloc_cprintf(write_cb, cbopaque, + "small: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + small_allocated, small_nmalloc, small_ndalloc, small_nrequests); CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated, size_t); CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests, uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"large\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - large_allocated, large_nmalloc, large_ndalloc, - large_nrequests); - } - + malloc_cprintf(write_cb, cbopaque, + "large: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + large_allocated, large_nmalloc, large_ndalloc, large_nrequests); CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t); CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests, uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"huge\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"allocated\": %zu,\n", huge_allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", huge_nmalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", huge_ndalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", huge_nrequests); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "huge: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); - malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated + large_allocated + huge_allocated, - small_nmalloc + large_nmalloc + huge_nmalloc, - small_ndalloc + large_ndalloc + huge_ndalloc, - small_nrequests + large_nrequests + huge_nrequests); - } - if (!json) { - malloc_cprintf(write_cb, cbopaque, - "active: %12zu\n", pactive * page); - } - + malloc_cprintf(write_cb, cbopaque, + "huge: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); + malloc_cprintf(write_cb, cbopaque, + "total: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + small_allocated + large_allocated + huge_allocated, + small_nmalloc + large_nmalloc + huge_nmalloc, + small_ndalloc + large_ndalloc + huge_ndalloc, + small_nrequests + large_nrequests + huge_nrequests); + malloc_cprintf(write_cb, cbopaque, + "active: %12zu\n", pactive * page); CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"mapped\": %zu,\n", mapped); - } else { - malloc_cprintf(write_cb, cbopaque, - "mapped: %12zu\n", mapped); - } - - CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"retained\": %zu,\n", retained); - } else { - malloc_cprintf(write_cb, cbopaque, - "retained: %12zu\n", retained); - } - + malloc_cprintf(write_cb, cbopaque, + "mapped: %12zu\n", mapped); CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped, size_t); CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"metadata\": {\n"); + malloc_cprintf(write_cb, cbopaque, + "metadata: mapped: %zu, allocated: %zu\n", + metadata_mapped, metadata_allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"mapped\": %zu,\n", metadata_mapped); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"allocated\": %zu\n", metadata_allocated); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "metadata: mapped: %zu, allocated: %zu\n", - metadata_mapped, metadata_allocated); - } - - if (bins) { - stats_arena_bins_print(write_cb, cbopaque, json, large, huge, - i); - } + if (bins) + stats_arena_bins_print(write_cb, cbopaque, i); if (large) - stats_arena_lruns_print(write_cb, cbopaque, json, huge, i); + stats_arena_lruns_print(write_cb, cbopaque, i); if (huge) - stats_arena_hchunks_print(write_cb, cbopaque, json, i); -} - -static void -stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool merged, bool unmerged) -{ - const char *cpv; - bool bv; - unsigned uv; - uint32_t u32v; - uint64_t u64v; - ssize_t ssv; - size_t sv, bsz, usz, ssz, sssz, cpsz; - - bsz = sizeof(bool); - usz = sizeof(unsigned); - ssz = sizeof(size_t); - sssz = sizeof(ssize_t); - cpsz = sizeof(const char *); - - CTL_GET("version", &cpv, const char *); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"version\": \"%s\",\n", cpv); - } else - malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); - - /* config. */ -#define CONFIG_WRITE_BOOL_JSON(n, c) \ - if (json) { \ - CTL_GET("config."#n, &bv, bool); \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false", \ - (c)); \ - } - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"config\": {\n"); - } - - CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",") - - CTL_GET("config.debug", &bv, bool); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"debug\": %s,\n", bv ? "true" : "false"); - } else { - malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", - bv ? "enabled" : "disabled"); - } - - CONFIG_WRITE_BOOL_JSON(fill, ",") - CONFIG_WRITE_BOOL_JSON(lazy_lock, ",") - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"malloc_conf\": \"%s\",\n", - config_malloc_conf); - } else { - malloc_cprintf(write_cb, cbopaque, - "config.malloc_conf: \"%s\"\n", config_malloc_conf); - } - - CONFIG_WRITE_BOOL_JSON(munmap, ",") - CONFIG_WRITE_BOOL_JSON(prof, ",") - CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",") - CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",") - CONFIG_WRITE_BOOL_JSON(stats, ",") - CONFIG_WRITE_BOOL_JSON(tcache, ",") - CONFIG_WRITE_BOOL_JSON(tls, ",") - CONFIG_WRITE_BOOL_JSON(utrace, ",") - CONFIG_WRITE_BOOL_JSON(valgrind, ",") - CONFIG_WRITE_BOOL_JSON(xmalloc, "") - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t},\n"); - } -#undef CONFIG_WRITE_BOOL_JSON - - /* opt. */ -#define OPT_WRITE_BOOL(n, c) \ - if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ - "false", (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s\n", bv ? "true" : "false"); \ - } \ - } -#define OPT_WRITE_BOOL_MUTABLE(n, m, c) { \ - bool bv2; \ - if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 && \ - je_mallctl(#m, &bv2, (void *)&bsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ - "false", (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s ("#m": %s)\n", bv ? "true" \ - : "false", bv2 ? "true" : "false"); \ - } \ - } \ -} -#define OPT_WRITE_UNSIGNED(n, c) \ - if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %u%s\n", uv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %u\n", uv); \ - } \ - } -#define OPT_WRITE_SIZE_T(n, c) \ - if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %zu%s\n", sv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zu\n", sv); \ - } \ - } -#define OPT_WRITE_SSIZE_T(n, c) \ - if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd\n", ssv); \ - } \ - } -#define OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) { \ - ssize_t ssv2; \ - if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 && \ - je_mallctl(#m, (void *)&ssv2, &sssz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd ("#m": %zd)\n", \ - ssv, ssv2); \ - } \ - } \ -} -#define OPT_WRITE_CHAR_P(n, c) \ - if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": \"%s\"\n", cpv); \ - } \ - } - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"opt\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "Run-time option settings:\n"); - } - OPT_WRITE_BOOL(abort, ",") - OPT_WRITE_SIZE_T(lg_chunk, ",") - OPT_WRITE_CHAR_P(dss, ",") - OPT_WRITE_UNSIGNED(narenas, ",") - OPT_WRITE_CHAR_P(purge, ",") - if (json || opt_purge == purge_mode_ratio) { - OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, - arenas.lg_dirty_mult, ",") - } - if (json || opt_purge == purge_mode_decay) { - OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",") - } - OPT_WRITE_CHAR_P(junk, ",") - OPT_WRITE_SIZE_T(quarantine, ",") - OPT_WRITE_BOOL(redzone, ",") - OPT_WRITE_BOOL(zero, ",") - OPT_WRITE_BOOL(utrace, ",") - OPT_WRITE_BOOL(xmalloc, ",") - OPT_WRITE_BOOL(tcache, ",") - OPT_WRITE_SSIZE_T(lg_tcache_max, ",") - OPT_WRITE_BOOL(prof, ",") - OPT_WRITE_CHAR_P(prof_prefix, ",") - OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") - OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init, - ",") - OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",") - OPT_WRITE_BOOL(prof_accum, ",") - OPT_WRITE_SSIZE_T(lg_prof_interval, ",") - OPT_WRITE_BOOL(prof_gdump, ",") - OPT_WRITE_BOOL(prof_final, ",") - OPT_WRITE_BOOL(prof_leak, ",") - /* - * stats_print is always emitted, so as long as stats_print comes last - * it's safe to unconditionally omit the comma here (rather than having - * to conditionally omit it elsewhere depending on configuration). - */ - OPT_WRITE_BOOL(stats_print, "") - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t},\n"); - } - -#undef OPT_WRITE_BOOL -#undef OPT_WRITE_BOOL_MUTABLE -#undef OPT_WRITE_SIZE_T -#undef OPT_WRITE_SSIZE_T -#undef OPT_WRITE_CHAR_P - - /* arenas. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"arenas\": {\n"); - } - - CTL_GET("arenas.narenas", &uv, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"narenas\": %u,\n", uv); - } else - malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); - - CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"lg_dirty_mult\": %zd,\n", ssv); - } else if (opt_purge == purge_mode_ratio) { - if (ssv >= 0) { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: " - "%u:1\n", (1U << ssv)); - } else { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: " - "N/A\n"); - } - } - CTL_GET("arenas.decay_time", &ssv, ssize_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"decay_time\": %zd,\n", ssv); - } else if (opt_purge == purge_mode_decay) { - malloc_cprintf(write_cb, cbopaque, - "Unused dirty page decay time: %zd%s\n", - ssv, (ssv < 0) ? " (no decay)" : ""); - } - - CTL_GET("arenas.quantum", &sv, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"quantum\": %zu,\n", sv); - } else - malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); - - CTL_GET("arenas.page", &sv, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"page\": %zu,\n", sv); - } else - malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); - - if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) { - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"tcache_max\": %zu,\n", sv); - } else { - malloc_cprintf(write_cb, cbopaque, - "Maximum thread-cached size class: %zu\n", sv); - } - } - - if (json) { - unsigned nbins, nlruns, nhchunks, i; - - CTL_GET("arenas.nbins", &nbins, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nbins\": %u,\n", nbins); - - CTL_GET("arenas.nhbins", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nhbins\": %u,\n", uv); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"bin\": [\n"); - for (i = 0; i < nbins; i++) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t{\n"); - - CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"size\": %zu,\n", sv); - - CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v); - - CTL_M2_GET("arenas.bin.0.run_size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"run_size\": %zu\n", sv); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : ""); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t],\n"); - - CTL_GET("arenas.nlruns", &nlruns, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nlruns\": %u,\n", nlruns); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"lrun\": [\n"); - for (i = 0; i < nlruns; i++) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t{\n"); - - CTL_M2_GET("arenas.lrun.0.size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"size\": %zu\n", sv); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t}%s\n", (i + 1 < nlruns) ? "," : ""); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t],\n"); - - CTL_GET("arenas.nhchunks", &nhchunks, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nhchunks\": %u,\n", nhchunks); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"hchunk\": [\n"); - for (i = 0; i < nhchunks; i++) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t{\n"); - - CTL_M2_GET("arenas.hchunk.0.size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"size\": %zu\n", sv); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t}%s\n", (i + 1 < nhchunks) ? "," : ""); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t]\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t},\n"); - } - - /* prof. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"prof\": {\n"); - - CTL_GET("prof.thread_active_init", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" : - "false"); - - CTL_GET("prof.active", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"active\": %s,\n", bv ? "true" : "false"); - - CTL_GET("prof.gdump", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false"); - - CTL_GET("prof.interval", &u64v, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"interval\": %"FMTu64",\n", u64v); - - CTL_GET("prof.lg_sample", &ssv, ssize_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"lg_sample\": %zd\n", ssv); - - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", (config_stats || merged || unmerged) ? "," : - ""); - } -} - -static void -stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool merged, bool unmerged, bool bins, bool large, bool huge) -{ - size_t *cactive; - size_t allocated, active, metadata, resident, mapped, retained; - - CTL_GET("stats.cactive", &cactive, size_t *); - CTL_GET("stats.allocated", &allocated, size_t); - CTL_GET("stats.active", &active, size_t); - CTL_GET("stats.metadata", &metadata, size_t); - CTL_GET("stats.resident", &resident, size_t); - CTL_GET("stats.mapped", &mapped, size_t); - CTL_GET("stats.retained", &retained, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"stats\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"cactive\": %zu,\n", atomic_read_z(cactive)); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"allocated\": %zu,\n", allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"active\": %zu,\n", active); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"metadata\": %zu,\n", metadata); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"resident\": %zu,\n", resident); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"mapped\": %zu,\n", mapped); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"retained\": %zu\n", retained); - - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", (merged || unmerged) ? "," : ""); - } else { - malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, metadata: %zu," - " resident: %zu, mapped: %zu, retained: %zu\n", - allocated, active, metadata, resident, mapped, retained); - malloc_cprintf(write_cb, cbopaque, - "Current active ceiling: %zu\n", - atomic_read_z(cactive)); - } - - if (merged || unmerged) { - unsigned narenas; - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"stats.arenas\": {\n"); - } - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - VARIABLE_ARRAY(bool, initialized, narenas); - size_t isz; - unsigned i, j, ninitialized; - - isz = sizeof(bool) * narenas; - xmallctl("arenas.initialized", (void *)initialized, - &isz, NULL, 0); - for (i = ninitialized = 0; i < narenas; i++) { - if (initialized[i]) - ninitialized++; - } - - /* Merged stats. */ - if (merged && (ninitialized > 1 || !unmerged)) { - /* Print merged arena stats. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"merged\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "\nMerged arenas stats:\n"); - } - stats_arena_print(write_cb, cbopaque, json, - narenas, bins, large, huge); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t}%s\n", (ninitialized > 1) ? - "," : ""); - } - } - - /* Unmerged stats. */ - for (i = j = 0; i < narenas; i++) { - if (initialized[i]) { - if (json) { - j++; - malloc_cprintf(write_cb, - cbopaque, - "\t\t\t\"%u\": {\n", i); - } else { - malloc_cprintf(write_cb, - cbopaque, "\narenas[%u]:\n", - i); - } - stats_arena_print(write_cb, cbopaque, - json, i, bins, large, huge); - if (json) { - malloc_cprintf(write_cb, - cbopaque, - "\t\t\t}%s\n", (j < - ninitialized) ? "," : ""); - } - } - } - } - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t}\n"); - } - } + stats_arena_hchunks_print(write_cb, cbopaque, i); } void @@ -1067,7 +363,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, int err; uint64_t epoch; size_t u64sz; - bool json = false; bool general = true; bool merged = true; bool unmerged = true; @@ -1084,8 +379,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, * */ epoch = 1; u64sz = sizeof(uint64_t); - err = je_mallctl("epoch", (void *)&epoch, &u64sz, (void *)&epoch, - sizeof(uint64_t)); + err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t)); if (err != 0) { if (err == EAGAIN) { malloc_write(": Memory allocation failure in " @@ -1102,9 +396,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, for (i = 0; opts[i] != '\0'; i++) { switch (opts[i]) { - case 'J': - json = true; - break; case 'g': general = false; break; @@ -1128,27 +419,222 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } - if (json) { + malloc_cprintf(write_cb, cbopaque, + "___ Begin jemalloc statistics ___\n"); + if (general) { + const char *cpv; + bool bv; + unsigned uv; + ssize_t ssv; + size_t sv, bsz, ssz, sssz, cpsz; + + bsz = sizeof(bool); + ssz = sizeof(size_t); + sssz = sizeof(ssize_t); + cpsz = sizeof(const char *); + + CTL_GET("version", &cpv, const char *); + malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); + CTL_GET("config.debug", &bv, bool); + malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", + bv ? "enabled" : "disabled"); + +#define OPT_WRITE_BOOL(n) \ + if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s\n", bv ? "true" : "false"); \ + } +#define OPT_WRITE_BOOL_MUTABLE(n, m) { \ + bool bv2; \ + if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 && \ + je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s ("#m": %s)\n", bv ? "true" \ + : "false", bv2 ? "true" : "false"); \ + } \ +} +#define OPT_WRITE_SIZE_T(n) \ + if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zu\n", sv); \ + } +#define OPT_WRITE_SSIZE_T(n) \ + if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd\n", ssv); \ + } +#define OPT_WRITE_SSIZE_T_MUTABLE(n, m) { \ + ssize_t ssv2; \ + if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 && \ + je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd ("#m": %zd)\n", \ + ssv, ssv2); \ + } \ +} +#define OPT_WRITE_CHAR_P(n) \ + if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": \"%s\"\n", cpv); \ + } + malloc_cprintf(write_cb, cbopaque, - "{\n" - "\t\"jemalloc\": {\n"); - } else { + "Run-time option settings:\n"); + OPT_WRITE_BOOL(abort) + OPT_WRITE_SIZE_T(lg_chunk) + OPT_WRITE_CHAR_P(dss) + OPT_WRITE_SIZE_T(narenas) + OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, arenas.lg_dirty_mult) + OPT_WRITE_BOOL(stats_print) + OPT_WRITE_CHAR_P(junk) + OPT_WRITE_SIZE_T(quarantine) + OPT_WRITE_BOOL(redzone) + OPT_WRITE_BOOL(zero) + OPT_WRITE_BOOL(utrace) + OPT_WRITE_BOOL(valgrind) + OPT_WRITE_BOOL(xmalloc) + OPT_WRITE_BOOL(tcache) + OPT_WRITE_SSIZE_T(lg_tcache_max) + OPT_WRITE_BOOL(prof) + OPT_WRITE_CHAR_P(prof_prefix) + OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active) + OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, + prof.thread_active_init) + OPT_WRITE_SSIZE_T(lg_prof_sample) + OPT_WRITE_BOOL(prof_accum) + OPT_WRITE_SSIZE_T(lg_prof_interval) + OPT_WRITE_BOOL(prof_gdump) + OPT_WRITE_BOOL(prof_final) + OPT_WRITE_BOOL(prof_leak) + +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_BOOL_MUTABLE +#undef OPT_WRITE_SIZE_T +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_CHAR_P + + malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); + + CTL_GET("arenas.narenas", &uv, unsigned); + malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); + + malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", + sizeof(void *)); + + CTL_GET("arenas.quantum", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", + sv); + + CTL_GET("arenas.page", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); + + CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: %u:1\n", + (1U << ssv)); + } else { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: N/A\n"); + } + if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) { + malloc_cprintf(write_cb, cbopaque, + "Maximum thread-cached size class: %zu\n", sv); + } + if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) { + CTL_GET("prof.lg_sample", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "Average profile sample interval: %"FMTu64 + " (2^%zu)\n", (((uint64_t)1U) << sv), sv); + + CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Average profile dump interval: %"FMTu64 + " (2^%zd)\n", + (((uint64_t)1U) << ssv), ssv); + } else { + malloc_cprintf(write_cb, cbopaque, + "Average profile dump interval: N/A\n"); + } + } + CTL_GET("opt.lg_chunk", &sv, size_t); malloc_cprintf(write_cb, cbopaque, - "___ Begin jemalloc statistics ___\n"); + "Chunk size: %zu (2^%zu)\n", (ZU(1) << sv), sv); } - if (general) - stats_general_print(write_cb, cbopaque, json, merged, unmerged); if (config_stats) { - stats_print_helper(write_cb, cbopaque, json, merged, unmerged, - bins, large, huge); - } - if (json) { + size_t *cactive; + size_t allocated, active, metadata, resident, mapped; + + CTL_GET("stats.cactive", &cactive, size_t *); + CTL_GET("stats.allocated", &allocated, size_t); + CTL_GET("stats.active", &active, size_t); + CTL_GET("stats.metadata", &metadata, size_t); + CTL_GET("stats.resident", &resident, size_t); + CTL_GET("stats.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, - "\t}\n" - "}\n"); - } else { + "Allocated: %zu, active: %zu, metadata: %zu," + " resident: %zu, mapped: %zu\n", + allocated, active, metadata, resident, mapped); malloc_cprintf(write_cb, cbopaque, - "--- End jemalloc statistics ---\n"); + "Current active ceiling: %zu\n", + atomic_read_z(cactive)); + + if (merged) { + unsigned narenas; + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + VARIABLE_ARRAY(bool, initialized, narenas); + size_t isz; + unsigned i, ninitialized; + + isz = sizeof(bool) * narenas; + xmallctl("arenas.initialized", initialized, + &isz, NULL, 0); + for (i = ninitialized = 0; i < narenas; i++) { + if (initialized[i]) + ninitialized++; + } + + if (ninitialized > 1 || !unmerged) { + /* Print merged arena stats. */ + malloc_cprintf(write_cb, cbopaque, + "\nMerged arenas stats:\n"); + stats_arena_print(write_cb, cbopaque, + narenas, bins, large, huge); + } + } + } + + if (unmerged) { + unsigned narenas; + + /* Print stats for each arena. */ + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + VARIABLE_ARRAY(bool, initialized, narenas); + size_t isz; + unsigned i; + + isz = sizeof(bool) * narenas; + xmallctl("arenas.initialized", initialized, + &isz, NULL, 0); + + for (i = 0; i < narenas; i++) { + if (initialized[i]) { + malloc_cprintf(write_cb, + cbopaque, + "\narenas[%u]:\n", i); + stats_arena_print(write_cb, + cbopaque, i, bins, large, + huge); + } + } + } + } } + malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n"); } diff --git a/deps/jemalloc/src/tcache.c b/deps/jemalloc/src/tcache.c old mode 100755 new mode 100644 index 21540ff46..fdafd0c62 --- a/deps/jemalloc/src/tcache.c +++ b/deps/jemalloc/src/tcache.c @@ -10,7 +10,7 @@ ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; tcache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ -unsigned nhbins; +size_t nhbins; size_t tcache_maxclass; tcaches_t *tcaches; @@ -23,11 +23,10 @@ static tcaches_t *tcaches_avail; /******************************************************************************/ -size_t -tcache_salloc(tsdn_t *tsdn, const void *ptr) +size_t tcache_salloc(const void *ptr) { - return (arena_salloc(tsdn, ptr, false)); + return (arena_salloc(ptr, false)); } void @@ -68,19 +67,20 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) tcache->next_gc_bin++; if (tcache->next_gc_bin == nhbins) tcache->next_gc_bin = 0; + tcache->ev_cnt = 0; } void * -tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, bool *tcache_success) +tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind) { void *ret; - arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ? + arena_tcache_fill_small(arena, tbin, binind, config_prof ? tcache->prof_accumbytes : 0); if (config_prof) tcache->prof_accumbytes = 0; - ret = tcache_alloc_easy(tbin, tcache_success); + ret = tcache_alloc_easy(tbin); return (ret); } @@ -102,18 +102,17 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - *(tbin->avail - 1)); + tbin->avail[0]); arena_t *bin_arena = extent_node_arena_get(&chunk->node); arena_bin_t *bin = &bin_arena->bins[binind]; if (config_prof && bin_arena == arena) { - if (arena_prof_accum(tsd_tsdn(tsd), arena, - tcache->prof_accumbytes)) - prof_idump(tsd_tsdn(tsd)); + if (arena_prof_accum(arena, tcache->prof_accumbytes)) + prof_idump(); tcache->prof_accumbytes = 0; } - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + malloc_mutex_lock(&bin->lock); if (config_stats && bin_arena == arena) { assert(!merged_stats); merged_stats = true; @@ -123,16 +122,16 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, } ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = *(tbin->avail - 1 - i); + ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (extent_node_arena_get(&chunk->node) == bin_arena) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_bits_t *bitselm = - arena_bitselm_get_mutable(chunk, pageind); - arena_dalloc_bin_junked_locked(tsd_tsdn(tsd), - bin_arena, chunk, ptr, bitselm); + arena_bitselm_get(chunk, pageind); + arena_dalloc_bin_junked_locked(bin_arena, chunk, + ptr, bitselm); } else { /* * This object was allocated via a different @@ -140,12 +139,11 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, * locked. Stash the object, so that it can be * handled in a future pass. */ - *(tbin->avail - 1 - ndeferred) = ptr; + tbin->avail[ndeferred] = ptr; ndeferred++; } } - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); - arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred); + malloc_mutex_unlock(&bin->lock); } if (config_stats && !merged_stats) { /* @@ -153,15 +151,15 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, * arena, so the stats didn't get merged. Manually do so now. */ arena_bin_t *bin = &arena->bins[binind]; - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + malloc_mutex_lock(&bin->lock); bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + malloc_mutex_unlock(&bin->lock); } - memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * - sizeof(void *)); + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); tbin->ncached = rem; if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; @@ -184,13 +182,13 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - *(tbin->avail - 1)); + tbin->avail[0]); arena_t *locked_arena = extent_node_arena_get(&chunk->node); UNUSED bool idump; if (config_prof) idump = false; - malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock); + malloc_mutex_lock(&locked_arena->lock); if ((config_prof || config_stats) && locked_arena == arena) { if (config_prof) { idump = arena_prof_accum_locked(arena, @@ -208,13 +206,13 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, } ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = *(tbin->avail - 1 - i); + ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (extent_node_arena_get(&chunk->node) == locked_arena) { - arena_dalloc_large_junked_locked(tsd_tsdn(tsd), - locked_arena, chunk, ptr); + arena_dalloc_large_junked_locked(locked_arena, + chunk, ptr); } else { /* * This object was allocated via a different @@ -222,56 +220,62 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, * Stash the object, so that it can be handled * in a future pass. */ - *(tbin->avail - 1 - ndeferred) = ptr; + tbin->avail[ndeferred] = ptr; ndeferred++; } } - malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock); + malloc_mutex_unlock(&locked_arena->lock); if (config_prof && idump) - prof_idump(tsd_tsdn(tsd)); - arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush - - ndeferred); + prof_idump(); } if (config_stats && !merged_stats) { /* * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock); + malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[binind - NBINS].nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; - malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock); + malloc_mutex_unlock(&arena->lock); } - memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * - sizeof(void *)); + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); tbin->ncached = rem; if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; } -static void -tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) +void +tcache_arena_associate(tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Link into list of extant tcaches. */ - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); ql_elm_new(tcache, link); ql_tail_insert(&arena->tcache_ql, tcache, link); - malloc_mutex_unlock(tsdn, &arena->lock); + malloc_mutex_unlock(&arena->lock); } } -static void -tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) +void +tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, arena_t *newarena) +{ + + tcache_arena_dissociate(tcache, oldarena); + tcache_arena_associate(tcache, newarena); +} + +void +tcache_arena_dissociate(tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(tsdn, &arena->lock); + malloc_mutex_lock(&arena->lock); if (config_debug) { bool in_ql = false; tcache_t *iter; @@ -284,20 +288,11 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) assert(in_ql); } ql_remove(&arena->tcache_ql, tcache, link); - tcache_stats_merge(tsdn, tcache, arena); - malloc_mutex_unlock(tsdn, &arena->lock); + tcache_stats_merge(tcache, arena); + malloc_mutex_unlock(&arena->lock); } } -void -tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena, - arena_t *newarena) -{ - - tcache_arena_dissociate(tsdn, tcache, oldarena); - tcache_arena_associate(tsdn, tcache, newarena); -} - tcache_t * tcache_get_hard(tsd_t *tsd) { @@ -311,11 +306,11 @@ tcache_get_hard(tsd_t *tsd) arena = arena_choose(tsd, NULL); if (unlikely(arena == NULL)) return (NULL); - return (tcache_create(tsd_tsdn(tsd), arena)); + return (tcache_create(tsd, arena)); } tcache_t * -tcache_create(tsdn_t *tsdn, arena_t *arena) +tcache_create(tsd_t *tsd, arena_t *arena) { tcache_t *tcache; size_t size, stack_offset; @@ -329,26 +324,18 @@ tcache_create(tsdn_t *tsdn, arena_t *arena) /* Avoid false cacheline sharing. */ size = sa2u(size, CACHELINE); - tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true, - arena_get(TSDN_NULL, 0, true)); + tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, a0get()); if (tcache == NULL) return (NULL); - tcache_arena_associate(tsdn, tcache, arena); - - ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR); + tcache_arena_associate(tcache, arena); assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); for (i = 0; i < nhbins; i++) { tcache->tbins[i].lg_fill_div = 1; - stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); - /* - * avail points past the available space. Allocations will - * access the slots toward higher addresses (for the benefit of - * prefetch). - */ tcache->tbins[i].avail = (void **)((uintptr_t)tcache + (uintptr_t)stack_offset); + stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } return (tcache); @@ -361,7 +348,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) unsigned i; arena = arena_choose(tsd, NULL); - tcache_arena_dissociate(tsd_tsdn(tsd), tcache, arena); + tcache_arena_dissociate(tcache, arena); for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; @@ -369,9 +356,9 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) if (config_stats && tbin->tstats.nrequests != 0) { arena_bin_t *bin = &arena->bins[i]; - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + malloc_mutex_lock(&bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + malloc_mutex_unlock(&bin->lock); } } @@ -380,19 +367,19 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) tcache_bin_flush_large(tsd, tbin, i, 0, tcache); if (config_stats && tbin->tstats.nrequests != 0) { - malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock); + malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[i - NBINS].nrequests += tbin->tstats.nrequests; - malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock); + malloc_mutex_unlock(&arena->lock); } } if (config_prof && tcache->prof_accumbytes > 0 && - arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes)) - prof_idump(tsd_tsdn(tsd)); + arena_prof_accum(arena, tcache->prof_accumbytes)) + prof_idump(); - idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true); + idalloctm(tsd, tcache, false, true); } void @@ -416,22 +403,21 @@ tcache_enabled_cleanup(tsd_t *tsd) /* Do nothing. */ } +/* Caller must own arena->lock. */ void -tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) +tcache_stats_merge(tcache_t *tcache, arena_t *arena) { unsigned i; cassert(config_stats); - malloc_mutex_assert_owner(tsdn, &arena->lock); - /* Merge and reset tcache stats. */ for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; tcache_bin_t *tbin = &tcache->tbins[i]; - malloc_mutex_lock(tsdn, &bin->lock); + malloc_mutex_lock(&bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; - malloc_mutex_unlock(tsdn, &bin->lock); + malloc_mutex_unlock(&bin->lock); tbin->tstats.nrequests = 0; } @@ -447,12 +433,11 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) bool tcaches_create(tsd_t *tsd, unsigned *r_ind) { - arena_t *arena; tcache_t *tcache; tcaches_t *elm; if (tcaches == NULL) { - tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) * + tcaches = base_alloc(sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1)); if (tcaches == NULL) return (true); @@ -460,10 +445,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) return (true); - arena = arena_ichoose(tsd, NULL); - if (unlikely(arena == NULL)) - return (true); - tcache = tcache_create(tsd_tsdn(tsd), arena); + tcache = tcache_create(tsd, a0get()); if (tcache == NULL) return (true); @@ -471,7 +453,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) elm = tcaches_avail; tcaches_avail = tcaches_avail->next; elm->tcache = tcache; - *r_ind = (unsigned)(elm - tcaches); + *r_ind = elm - tcaches; } else { elm = &tcaches[tcaches_past]; elm->tcache = tcache; @@ -509,7 +491,7 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) } bool -tcache_boot(tsdn_t *tsdn) +tcache_boot(void) { unsigned i; @@ -517,17 +499,17 @@ tcache_boot(tsdn_t *tsdn) * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is * known. */ - if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) < SMALL_MAXCLASS) + if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS) tcache_maxclass = SMALL_MAXCLASS; - else if ((ZU(1) << opt_lg_tcache_max) > large_maxclass) + else if ((1U << opt_lg_tcache_max) > large_maxclass) tcache_maxclass = large_maxclass; else - tcache_maxclass = (ZU(1) << opt_lg_tcache_max); + tcache_maxclass = (1U << opt_lg_tcache_max); nhbins = size2index(tcache_maxclass) + 1; /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, nhbins * + tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * sizeof(tcache_bin_info_t)); if (tcache_bin_info == NULL) return (true); diff --git a/deps/jemalloc/src/ticker.c b/deps/jemalloc/src/ticker.c deleted file mode 100644 index db0902404..000000000 --- a/deps/jemalloc/src/ticker.c +++ /dev/null @@ -1,2 +0,0 @@ -#define JEMALLOC_TICKER_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff --git a/deps/jemalloc/src/tsd.c b/deps/jemalloc/src/tsd.c index ec69a51c3..9ffe9afef 100644 --- a/deps/jemalloc/src/tsd.c +++ b/deps/jemalloc/src/tsd.c @@ -77,7 +77,7 @@ tsd_cleanup(void *arg) /* Do nothing. */ break; case tsd_state_nominal: -#define O(n, t) \ +#define O(n, t) \ n##_cleanup(tsd); MALLOC_TSD #undef O @@ -106,17 +106,15 @@ MALLOC_TSD } } -tsd_t * +bool malloc_tsd_boot0(void) { - tsd_t *tsd; ncleanups = 0; if (tsd_boot0()) - return (NULL); - tsd = tsd_fetch(); - *tsd_arenas_tdata_bypassp_get(tsd) = true; - return (tsd); + return (true); + *tsd_arenas_cache_bypassp_get(tsd_fetch()) = true; + return (false); } void @@ -124,7 +122,7 @@ malloc_tsd_boot1(void) { tsd_boot1(); - *tsd_arenas_tdata_bypassp_get(tsd_fetch()) = false; + *tsd_arenas_cache_bypassp_get(tsd_fetch()) = false; } #ifdef _WIN32 @@ -150,15 +148,13 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) #ifdef _MSC_VER # ifdef _M_IX86 # pragma comment(linker, "/INCLUDE:__tls_used") -# pragma comment(linker, "/INCLUDE:_tls_callback") # else # pragma comment(linker, "/INCLUDE:_tls_used") -# pragma comment(linker, "/INCLUDE:tls_callback") # endif # pragma section(".CRT$XLY",long,read) #endif JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) -BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, +static BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; #endif @@ -171,10 +167,10 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) tsd_init_block_t *iter; /* Check whether this thread has already inserted into the list. */ - malloc_mutex_lock(TSDN_NULL, &head->lock); + malloc_mutex_lock(&head->lock); ql_foreach(iter, &head->blocks, link) { if (iter->thread == self) { - malloc_mutex_unlock(TSDN_NULL, &head->lock); + malloc_mutex_unlock(&head->lock); return (iter->data); } } @@ -182,7 +178,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) ql_elm_new(block, link); block->thread = self; ql_tail_insert(&head->blocks, block, link); - malloc_mutex_unlock(TSDN_NULL, &head->lock); + malloc_mutex_unlock(&head->lock); return (NULL); } @@ -190,8 +186,8 @@ void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { - malloc_mutex_lock(TSDN_NULL, &head->lock); + malloc_mutex_lock(&head->lock); ql_remove(&head->blocks, block, link); - malloc_mutex_unlock(TSDN_NULL, &head->lock); + malloc_mutex_unlock(&head->lock); } #endif diff --git a/deps/jemalloc/src/util.c b/deps/jemalloc/src/util.c old mode 100755 new mode 100644 index dd8c23630..4cb0d6c1e --- a/deps/jemalloc/src/util.c +++ b/deps/jemalloc/src/util.c @@ -1,7 +1,3 @@ -/* - * Define simple versions of assertion macros that won't recurse in case - * of assertion failures in malloc_*printf(). - */ #define assert(e) do { \ if (config_debug && !(e)) { \ malloc_write(": Failed assertion\n"); \ @@ -14,7 +10,6 @@ malloc_write(": Unreachable code reached\n"); \ abort(); \ } \ - unreachable(); \ } while (0) #define not_implemented() do { \ @@ -49,19 +44,15 @@ static void wrtmessage(void *cbopaque, const char *s) { -#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write) +#ifdef SYS_write /* * Use syscall(2) rather than write(2) when possible in order to avoid * the possibility of memory allocation within libc. This is necessary * on FreeBSD; most operating systems do not have this problem though. - * - * syscall() returns long or int, depending on platform, so capture the - * unused result in the widest plausible type to avoid compiler - * warnings. */ - UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); + UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); #else - UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s)); + UNUSED int result = write(STDERR_FILENO, s, strlen(s)); #endif } @@ -91,7 +82,7 @@ buferror(int err, char *buf, size_t buflen) #ifdef _WIN32 FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, - (LPSTR)buf, (DWORD)buflen, NULL); + (LPSTR)buf, buflen, NULL); return (0); #elif defined(__GLIBC__) && defined(_GNU_SOURCE) char *b = strerror_r(err, buf, buflen); @@ -200,7 +191,7 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) p++; } if (neg) - ret = (uintmax_t)(-((intmax_t)ret)); + ret = -ret; if (p == ns) { /* No conversion performed. */ @@ -315,9 +306,10 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) return (s); } -size_t +int malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) { + int ret; size_t i; const char *f; @@ -408,8 +400,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) int prec = -1; int width = -1; unsigned char len = '?'; - char *s; - size_t slen; f++; /* Flags. */ @@ -500,6 +490,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) } /* Conversion specifier. */ switch (*f) { + char *s; + size_t slen; case '%': /* %% */ APPEND_C(*f); @@ -585,19 +577,20 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) str[i] = '\0'; else str[size - 1] = '\0'; + ret = i; #undef APPEND_C #undef APPEND_S #undef APPEND_PADDED_S #undef GET_ARG_NUMERIC - return (i); + return (ret); } JEMALLOC_FORMAT_PRINTF(3, 4) -size_t +int malloc_snprintf(char *str, size_t size, const char *format, ...) { - size_t ret; + int ret; va_list ap; va_start(ap, format); @@ -655,12 +648,3 @@ malloc_printf(const char *format, ...) malloc_vcprintf(NULL, NULL, format, ap); va_end(ap); } - -/* - * Restore normal assertion macros, in order to make it possible to compile all - * C files as a single concatenation. - */ -#undef assert -#undef not_reached -#undef not_implemented -#include "jemalloc/internal/assert.h" diff --git a/deps/jemalloc/src/witness.c b/deps/jemalloc/src/witness.c deleted file mode 100644 index 23753f246..000000000 --- a/deps/jemalloc/src/witness.c +++ /dev/null @@ -1,136 +0,0 @@ -#define JEMALLOC_WITNESS_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -void -witness_init(witness_t *witness, const char *name, witness_rank_t rank, - witness_comp_t *comp) -{ - - witness->name = name; - witness->rank = rank; - witness->comp = comp; -} - -#ifdef JEMALLOC_JET -#undef witness_lock_error -#define witness_lock_error JEMALLOC_N(n_witness_lock_error) -#endif -void -witness_lock_error(const witness_list_t *witnesses, const witness_t *witness) -{ - witness_t *w; - - malloc_printf(": Lock rank order reversal:"); - ql_foreach(w, witnesses, link) { - malloc_printf(" %s(%u)", w->name, w->rank); - } - malloc_printf(" %s(%u)\n", witness->name, witness->rank); - abort(); -} -#ifdef JEMALLOC_JET -#undef witness_lock_error -#define witness_lock_error JEMALLOC_N(witness_lock_error) -witness_lock_error_t *witness_lock_error = JEMALLOC_N(n_witness_lock_error); -#endif - -#ifdef JEMALLOC_JET -#undef witness_owner_error -#define witness_owner_error JEMALLOC_N(n_witness_owner_error) -#endif -void -witness_owner_error(const witness_t *witness) -{ - - malloc_printf(": Should own %s(%u)\n", witness->name, - witness->rank); - abort(); -} -#ifdef JEMALLOC_JET -#undef witness_owner_error -#define witness_owner_error JEMALLOC_N(witness_owner_error) -witness_owner_error_t *witness_owner_error = JEMALLOC_N(n_witness_owner_error); -#endif - -#ifdef JEMALLOC_JET -#undef witness_not_owner_error -#define witness_not_owner_error JEMALLOC_N(n_witness_not_owner_error) -#endif -void -witness_not_owner_error(const witness_t *witness) -{ - - malloc_printf(": Should not own %s(%u)\n", witness->name, - witness->rank); - abort(); -} -#ifdef JEMALLOC_JET -#undef witness_not_owner_error -#define witness_not_owner_error JEMALLOC_N(witness_not_owner_error) -witness_not_owner_error_t *witness_not_owner_error = - JEMALLOC_N(n_witness_not_owner_error); -#endif - -#ifdef JEMALLOC_JET -#undef witness_lockless_error -#define witness_lockless_error JEMALLOC_N(n_witness_lockless_error) -#endif -void -witness_lockless_error(const witness_list_t *witnesses) -{ - witness_t *w; - - malloc_printf(": Should not own any locks:"); - ql_foreach(w, witnesses, link) { - malloc_printf(" %s(%u)", w->name, w->rank); - } - malloc_printf("\n"); - abort(); -} -#ifdef JEMALLOC_JET -#undef witness_lockless_error -#define witness_lockless_error JEMALLOC_N(witness_lockless_error) -witness_lockless_error_t *witness_lockless_error = - JEMALLOC_N(n_witness_lockless_error); -#endif - -void -witnesses_cleanup(tsd_t *tsd) -{ - - witness_assert_lockless(tsd_tsdn(tsd)); - - /* Do nothing. */ -} - -void -witness_fork_cleanup(tsd_t *tsd) -{ - - /* Do nothing. */ -} - -void -witness_prefork(tsd_t *tsd) -{ - - tsd_witness_fork_set(tsd, true); -} - -void -witness_postfork_parent(tsd_t *tsd) -{ - - tsd_witness_fork_set(tsd, false); -} - -void -witness_postfork_child(tsd_t *tsd) -{ -#ifndef JEMALLOC_MUTEX_INIT_CB - witness_list_t *witnesses; - - witnesses = tsd_witnessesp_get(tsd); - ql_new(witnesses); -#endif - tsd_witness_fork_set(tsd, false); -} diff --git a/deps/jemalloc/src/zone.c b/deps/jemalloc/src/zone.c index 0571920e4..12e1734a9 100644 --- a/deps/jemalloc/src/zone.c +++ b/deps/jemalloc/src/zone.c @@ -4,7 +4,7 @@ #endif /* - * The malloc_default_purgeable_zone() function is only available on >= 10.6. + * The malloc_default_purgeable_zone function is only available on >= 10.6. * We need to check whether it is present at runtime, thus the weak_import. */ extern malloc_zone_t *malloc_default_purgeable_zone(void) @@ -13,9 +13,8 @@ JEMALLOC_ATTR(weak_import); /******************************************************************************/ /* Data. */ -static malloc_zone_t *default_zone, *purgeable_zone; -static malloc_zone_t jemalloc_zone; -static struct malloc_introspection_t jemalloc_zone_introspect; +static malloc_zone_t zone; +static struct malloc_introspection_t zone_introspect; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -57,7 +56,7 @@ zone_size(malloc_zone_t *zone, void *ptr) * not work in practice, we must check all pointers to assure that they * reside within a mapped chunk before determining size. */ - return (ivsalloc(tsdn_fetch(), ptr, config_prof)); + return (ivsalloc(ptr, config_prof)); } static void * @@ -88,7 +87,7 @@ static void zone_free(malloc_zone_t *zone, void *ptr) { - if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0) { + if (ivsalloc(ptr, config_prof) != 0) { je_free(ptr); return; } @@ -100,7 +99,7 @@ static void * zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) { - if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0) + if (ivsalloc(ptr, config_prof) != 0) return (je_realloc(ptr, size)); return (realloc(ptr, size)); @@ -122,11 +121,9 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { - size_t alloc_size; - alloc_size = ivsalloc(tsdn_fetch(), ptr, config_prof); - if (alloc_size != 0) { - assert(alloc_size == size); + if (ivsalloc(ptr, config_prof) != 0) { + assert(ivsalloc(ptr, config_prof) == size); je_free(ptr); return; } @@ -165,103 +162,89 @@ static void zone_force_unlock(malloc_zone_t *zone) { - /* - * Call jemalloc_postfork_child() rather than - * jemalloc_postfork_parent(), because this function is executed by both - * parent and child. The parent can tolerate having state - * reinitialized, but the child cannot unlock mutexes that were locked - * by the parent. - */ if (isthreaded) - jemalloc_postfork_child(); + jemalloc_postfork_parent(); } -static void -zone_init(void) +JEMALLOC_ATTR(constructor) +void +register_zone(void) { - jemalloc_zone.size = (void *)zone_size; - jemalloc_zone.malloc = (void *)zone_malloc; - jemalloc_zone.calloc = (void *)zone_calloc; - jemalloc_zone.valloc = (void *)zone_valloc; - jemalloc_zone.free = (void *)zone_free; - jemalloc_zone.realloc = (void *)zone_realloc; - jemalloc_zone.destroy = (void *)zone_destroy; - jemalloc_zone.zone_name = "jemalloc_zone"; - jemalloc_zone.batch_malloc = NULL; - jemalloc_zone.batch_free = NULL; - jemalloc_zone.introspect = &jemalloc_zone_introspect; - jemalloc_zone.version = JEMALLOC_ZONE_VERSION; -#if (JEMALLOC_ZONE_VERSION >= 5) - jemalloc_zone.memalign = zone_memalign; -#endif -#if (JEMALLOC_ZONE_VERSION >= 6) - jemalloc_zone.free_definite_size = zone_free_definite_size; -#endif -#if (JEMALLOC_ZONE_VERSION >= 8) - jemalloc_zone.pressure_relief = NULL; -#endif - - jemalloc_zone_introspect.enumerator = NULL; - jemalloc_zone_introspect.good_size = (void *)zone_good_size; - jemalloc_zone_introspect.check = NULL; - jemalloc_zone_introspect.print = NULL; - jemalloc_zone_introspect.log = NULL; - jemalloc_zone_introspect.force_lock = (void *)zone_force_lock; - jemalloc_zone_introspect.force_unlock = (void *)zone_force_unlock; - jemalloc_zone_introspect.statistics = NULL; -#if (JEMALLOC_ZONE_VERSION >= 6) - jemalloc_zone_introspect.zone_locked = NULL; -#endif -#if (JEMALLOC_ZONE_VERSION >= 7) - jemalloc_zone_introspect.enable_discharge_checking = NULL; - jemalloc_zone_introspect.disable_discharge_checking = NULL; - jemalloc_zone_introspect.discharge = NULL; -# ifdef __BLOCKS__ - jemalloc_zone_introspect.enumerate_discharged_pointers = NULL; -# else - jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL; -# endif -#endif -} - -static malloc_zone_t * -zone_default_get(void) -{ - malloc_zone_t **zones = NULL; - unsigned int num_zones = 0; - /* - * On OSX 10.12, malloc_default_zone returns a special zone that is not - * present in the list of registered zones. That zone uses a "lite zone" - * if one is present (apparently enabled when malloc stack logging is - * enabled), or the first registered zone otherwise. In practice this - * means unless malloc stack logging is enabled, the first registered - * zone is the default. So get the list of zones to get the first one, - * instead of relying on malloc_default_zone. + * If something else replaced the system default zone allocator, don't + * register jemalloc's. */ - if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, - (vm_address_t**)&zones, &num_zones)) { - /* - * Reset the value in case the failure happened after it was - * set. - */ - num_zones = 0; + malloc_zone_t *default_zone = malloc_default_zone(); + malloc_zone_t *purgeable_zone = NULL; + if (!default_zone->zone_name || + strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) { + return; } - if (num_zones) - return (zones[0]); + zone.size = (void *)zone_size; + zone.malloc = (void *)zone_malloc; + zone.calloc = (void *)zone_calloc; + zone.valloc = (void *)zone_valloc; + zone.free = (void *)zone_free; + zone.realloc = (void *)zone_realloc; + zone.destroy = (void *)zone_destroy; + zone.zone_name = "jemalloc_zone"; + zone.batch_malloc = NULL; + zone.batch_free = NULL; + zone.introspect = &zone_introspect; + zone.version = JEMALLOC_ZONE_VERSION; +#if (JEMALLOC_ZONE_VERSION >= 5) + zone.memalign = zone_memalign; +#endif +#if (JEMALLOC_ZONE_VERSION >= 6) + zone.free_definite_size = zone_free_definite_size; +#endif +#if (JEMALLOC_ZONE_VERSION >= 8) + zone.pressure_relief = NULL; +#endif - return (malloc_default_zone()); -} + zone_introspect.enumerator = NULL; + zone_introspect.good_size = (void *)zone_good_size; + zone_introspect.check = NULL; + zone_introspect.print = NULL; + zone_introspect.log = NULL; + zone_introspect.force_lock = (void *)zone_force_lock; + zone_introspect.force_unlock = (void *)zone_force_unlock; + zone_introspect.statistics = NULL; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone_introspect.zone_locked = NULL; +#endif +#if (JEMALLOC_ZONE_VERSION >= 7) + zone_introspect.enable_discharge_checking = NULL; + zone_introspect.disable_discharge_checking = NULL; + zone_introspect.discharge = NULL; +#ifdef __BLOCKS__ + zone_introspect.enumerate_discharged_pointers = NULL; +#else + zone_introspect.enumerate_unavailable_without_blocks = NULL; +#endif +#endif -/* As written, this function can only promote jemalloc_zone. */ -static void -zone_promote(void) -{ - malloc_zone_t *zone; + /* + * The default purgeable zone is created lazily by OSX's libc. It uses + * the default zone when it is created for "small" allocations + * (< 15 KiB), but assumes the default zone is a scalable_zone. This + * obviously fails when the default zone is the jemalloc zone, so + * malloc_default_purgeable_zone is called beforehand so that the + * default purgeable zone is created when the default zone is still + * a scalable_zone. As purgeable zones only exist on >= 10.6, we need + * to check for the existence of malloc_default_purgeable_zone() at + * run time. + */ + if (malloc_default_purgeable_zone != NULL) + purgeable_zone = malloc_default_purgeable_zone(); + + /* Register the custom zone. At this point it won't be the default. */ + malloc_zone_register(&zone); do { + default_zone = malloc_default_zone(); /* * Unregister and reregister the default zone. On OSX >= 10.6, * unregistering takes the last registered zone and places it @@ -272,7 +255,6 @@ zone_promote(void) */ malloc_zone_unregister(default_zone); malloc_zone_register(default_zone); - /* * On OSX 10.6, having the default purgeable zone appear before * the default zone makes some things crash because it thinks it @@ -284,47 +266,9 @@ zone_promote(void) * above, i.e. the default zone. Registering it again then puts * it at the end, obviously after the default zone. */ - if (purgeable_zone != NULL) { + if (purgeable_zone) { malloc_zone_unregister(purgeable_zone); malloc_zone_register(purgeable_zone); } - - zone = zone_default_get(); - } while (zone != &jemalloc_zone); -} - -JEMALLOC_ATTR(constructor) -void -zone_register(void) -{ - - /* - * If something else replaced the system default zone allocator, don't - * register jemalloc's. - */ - default_zone = zone_default_get(); - if (!default_zone->zone_name || strcmp(default_zone->zone_name, - "DefaultMallocZone") != 0) - return; - - /* - * The default purgeable zone is created lazily by OSX's libc. It uses - * the default zone when it is created for "small" allocations - * (< 15 KiB), but assumes the default zone is a scalable_zone. This - * obviously fails when the default zone is the jemalloc zone, so - * malloc_default_purgeable_zone() is called beforehand so that the - * default purgeable zone is created when the default zone is still - * a scalable_zone. As purgeable zones only exist on >= 10.6, we need - * to check for the existence of malloc_default_purgeable_zone() at - * run time. - */ - purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL : - malloc_default_purgeable_zone(); - - /* Register the custom zone. At this point it won't be the default. */ - zone_init(); - malloc_zone_register(&jemalloc_zone); - - /* Promote the custom zone to be default. */ - zone_promote(); + } while (malloc_default_zone() != &zone); } diff --git a/deps/jemalloc/test/include/test/jemalloc_test.h.in b/deps/jemalloc/test/include/test/jemalloc_test.h.in index 1f36e4695..455569da4 100644 --- a/deps/jemalloc/test/include/test/jemalloc_test.h.in +++ b/deps/jemalloc/test/include/test/jemalloc_test.h.in @@ -11,6 +11,7 @@ #ifdef _WIN32 # include "msvc_compat/strings.h" #endif +#include #ifdef _WIN32 # include @@ -19,6 +20,39 @@ # include #endif +/******************************************************************************/ +/* + * Define always-enabled assertion macros, so that test assertions execute even + * if assertions are disabled in the library code. These definitions must + * exist prior to including "jemalloc/internal/util.h". + */ +#define assert(e) do { \ + if (!(e)) { \ + malloc_printf( \ + ": %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) + +#define not_reached() do { \ + malloc_printf( \ + ": %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ +} while (0) + +#define not_implemented() do { \ + malloc_printf(": %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ +} while (0) + +#define assert_not_implemented(e) do { \ + if (!(e)) \ + not_implemented(); \ +} while (0) + #include "test/jemalloc_test_defs.h" #ifdef JEMALLOC_OSSPIN @@ -53,14 +87,6 @@ # include "jemalloc/internal/jemalloc_internal_defs.h" # include "jemalloc/internal/jemalloc_internal_macros.h" -static const bool config_debug = -#ifdef JEMALLOC_DEBUG - true -#else - false -#endif - ; - # define JEMALLOC_N(n) @private_namespace@##n # include "jemalloc/internal/private_namespace.h" @@ -68,7 +94,6 @@ static const bool config_debug = # define JEMALLOC_H_STRUCTS # define JEMALLOC_H_EXTERNS # define JEMALLOC_H_INLINES -# include "jemalloc/internal/nstime.h" # include "jemalloc/internal/util.h" # include "jemalloc/internal/qr.h" # include "jemalloc/internal/ql.h" @@ -124,40 +149,3 @@ static const bool config_debug = #include "test/thd.h" #define MEXP 19937 #include "test/SFMT.h" - -/******************************************************************************/ -/* - * Define always-enabled assertion macros, so that test assertions execute even - * if assertions are disabled in the library code. - */ -#undef assert -#undef not_reached -#undef not_implemented -#undef assert_not_implemented - -#define assert(e) do { \ - if (!(e)) { \ - malloc_printf( \ - ": %s:%d: Failed assertion: \"%s\"\n", \ - __FILE__, __LINE__, #e); \ - abort(); \ - } \ -} while (0) - -#define not_reached() do { \ - malloc_printf( \ - ": %s:%d: Unreachable code reached\n", \ - __FILE__, __LINE__); \ - abort(); \ -} while (0) - -#define not_implemented() do { \ - malloc_printf(": %s:%d: Not implemented\n", \ - __FILE__, __LINE__); \ - abort(); \ -} while (0) - -#define assert_not_implemented(e) do { \ - if (!(e)) \ - not_implemented(); \ -} while (0) diff --git a/deps/jemalloc/test/include/test/mtx.h b/deps/jemalloc/test/include/test/mtx.h index 58afbc3d1..bbe822f54 100644 --- a/deps/jemalloc/test/include/test/mtx.h +++ b/deps/jemalloc/test/include/test/mtx.h @@ -8,8 +8,6 @@ typedef struct { #ifdef _WIN32 CRITICAL_SECTION lock; -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock lock; #elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #else diff --git a/deps/jemalloc/test/include/test/test.h b/deps/jemalloc/test/include/test/test.h index c8112eb8b..3cf901fc4 100644 --- a/deps/jemalloc/test/include/test/test.h +++ b/deps/jemalloc/test/include/test/test.h @@ -311,9 +311,6 @@ label_test_end: \ #define test(...) \ p_test(__VA_ARGS__, NULL) -#define test_no_malloc_init(...) \ - p_test_no_malloc_init(__VA_ARGS__, NULL) - #define test_skip_if(e) do { \ if (e) { \ test_skip("%s:%s:%d: Test skipped: (%s)", \ @@ -327,7 +324,6 @@ void test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); /* For private use by macros. */ test_status_t p_test(test_t *t, ...); -test_status_t p_test_no_malloc_init(test_t *t, ...); void p_test_init(const char *name); void p_test_fini(void); void p_test_fail(const char *prefix, const char *message); diff --git a/deps/jemalloc/test/include/test/timer.h b/deps/jemalloc/test/include/test/timer.h index ace6191b8..a7fefdfd1 100644 --- a/deps/jemalloc/test/include/test/timer.h +++ b/deps/jemalloc/test/include/test/timer.h @@ -1,8 +1,23 @@ /* Simple timer, for use in benchmark reporting. */ +#include +#include + +#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ + && _POSIX_MONOTONIC_CLOCK >= 0 + typedef struct { - nstime_t t0; - nstime_t t1; +#ifdef _WIN32 + FILETIME ft0; + FILETIME ft1; +#elif JEMALLOC_CLOCK_GETTIME + struct timespec ts0; + struct timespec ts1; + int clock_id; +#else + struct timeval tv0; + struct timeval tv1; +#endif } timedelta_t; void timer_start(timedelta_t *timer); diff --git a/deps/jemalloc/test/integration/MALLOCX_ARENA.c b/deps/jemalloc/test/integration/MALLOCX_ARENA.c old mode 100755 new mode 100644 index 910a096fd..30c203ae6 --- a/deps/jemalloc/test/integration/MALLOCX_ARENA.c +++ b/deps/jemalloc/test/integration/MALLOCX_ARENA.c @@ -19,8 +19,8 @@ thd_start(void *arg) size_t sz; sz = sizeof(arena_ind); - assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0), - 0, "Error in arenas.extend"); + assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0, + "Error in arenas.extend"); if (thread_ind % 4 != 3) { size_t mib[3]; diff --git a/deps/jemalloc/test/integration/aligned_alloc.c b/deps/jemalloc/test/integration/aligned_alloc.c index 58438421d..609001487 100644 --- a/deps/jemalloc/test/integration/aligned_alloc.c +++ b/deps/jemalloc/test/integration/aligned_alloc.c @@ -1,20 +1,9 @@ #include "test/jemalloc_test.h" #define CHUNK 0x400000 -#define MAXALIGN (((size_t)1) << 23) - -/* - * On systems which can't merge extents, tests that call this function generate - * a lot of dirty memory very quickly. Purging between cycles mitigates - * potential OOM on e.g. 32-bit Windows. - */ -static void -purge(void) -{ - - assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, - "Unexpected mallctl error"); -} +/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ +#define MAXALIGN ((size_t)0x2000000LU) +#define NITER 4 TEST_BEGIN(test_alignment_errors) { @@ -85,7 +74,6 @@ TEST_END TEST_BEGIN(test_alignment_and_size) { -#define NITER 4 size_t alignment, size, total; unsigned i; void *ps[NITER]; @@ -122,9 +110,7 @@ TEST_BEGIN(test_alignment_and_size) } } } - purge(); } -#undef NITER } TEST_END diff --git a/deps/jemalloc/test/integration/allocated.c b/deps/jemalloc/test/integration/allocated.c old mode 100755 new mode 100644 index 6ce145b3e..3630e80ce --- a/deps/jemalloc/test/integration/allocated.c +++ b/deps/jemalloc/test/integration/allocated.c @@ -18,14 +18,14 @@ thd_start(void *arg) size_t sz, usize; sz = sizeof(a0); - if ((err = mallctl("thread.allocated", (void *)&a0, &sz, NULL, 0))) { + if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) { if (err == ENOENT) goto label_ENOENT; test_fail("%s(): Error in mallctl(): %s", __func__, strerror(err)); } sz = sizeof(ap0); - if ((err = mallctl("thread.allocatedp", (void *)&ap0, &sz, NULL, 0))) { + if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) { if (err == ENOENT) goto label_ENOENT; test_fail("%s(): Error in mallctl(): %s", __func__, @@ -36,15 +36,14 @@ thd_start(void *arg) "storage"); sz = sizeof(d0); - if ((err = mallctl("thread.deallocated", (void *)&d0, &sz, NULL, 0))) { + if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) { if (err == ENOENT) goto label_ENOENT; test_fail("%s(): Error in mallctl(): %s", __func__, strerror(err)); } sz = sizeof(dp0); - if ((err = mallctl("thread.deallocatedp", (void *)&dp0, &sz, NULL, - 0))) { + if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) { if (err == ENOENT) goto label_ENOENT; test_fail("%s(): Error in mallctl(): %s", __func__, @@ -58,9 +57,9 @@ thd_start(void *arg) assert_ptr_not_null(p, "Unexpected malloc() error"); sz = sizeof(a1); - mallctl("thread.allocated", (void *)&a1, &sz, NULL, 0); + mallctl("thread.allocated", &a1, &sz, NULL, 0); sz = sizeof(ap1); - mallctl("thread.allocatedp", (void *)&ap1, &sz, NULL, 0); + mallctl("thread.allocatedp", &ap1, &sz, NULL, 0); assert_u64_eq(*ap1, a1, "Dereferenced \"thread.allocatedp\" value should equal " "\"thread.allocated\" value"); @@ -75,9 +74,9 @@ thd_start(void *arg) free(p); sz = sizeof(d1); - mallctl("thread.deallocated", (void *)&d1, &sz, NULL, 0); + mallctl("thread.deallocated", &d1, &sz, NULL, 0); sz = sizeof(dp1); - mallctl("thread.deallocatedp", (void *)&dp1, &sz, NULL, 0); + mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0); assert_u64_eq(*dp1, d1, "Dereferenced \"thread.deallocatedp\" value should equal " "\"thread.deallocated\" value"); diff --git a/deps/jemalloc/test/integration/chunk.c b/deps/jemalloc/test/integration/chunk.c index 94cf0025a..af1c9a53e 100644 --- a/deps/jemalloc/test/integration/chunk.c +++ b/deps/jemalloc/test/integration/chunk.c @@ -121,10 +121,6 @@ TEST_BEGIN(test_chunk) { void *p; size_t old_size, new_size, large0, large1, huge0, huge1, huge2, sz; - unsigned arena_ind; - int flags; - size_t hooks_mib[3], purge_mib[3]; - size_t hooks_miblen, purge_miblen; chunk_hooks_t new_hooks = { chunk_alloc, chunk_dalloc, @@ -136,21 +132,11 @@ TEST_BEGIN(test_chunk) }; bool xallocx_success_a, xallocx_success_b, xallocx_success_c; - sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0), - 0, "Unexpected mallctl() failure"); - flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE; - /* Install custom chunk hooks. */ - hooks_miblen = sizeof(hooks_mib)/sizeof(size_t); - assert_d_eq(mallctlnametomib("arena.0.chunk_hooks", hooks_mib, - &hooks_miblen), 0, "Unexpected mallctlnametomib() failure"); - hooks_mib[1] = (size_t)arena_ind; old_size = sizeof(chunk_hooks_t); new_size = sizeof(chunk_hooks_t); - assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks, - &old_size, (void *)&new_hooks, new_size), 0, - "Unexpected chunk_hooks error"); + assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size, + &new_hooks, new_size), 0, "Unexpected chunk_hooks error"); orig_hooks = old_hooks; assert_ptr_ne(old_hooks.alloc, chunk_alloc, "Unexpected alloc error"); assert_ptr_ne(old_hooks.dalloc, chunk_dalloc, @@ -165,63 +151,59 @@ TEST_BEGIN(test_chunk) /* Get large size classes. */ sz = sizeof(size_t); - assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large0, &sz, NULL, - 0), 0, "Unexpected arenas.lrun.0.size failure"); - assert_d_eq(mallctl("arenas.lrun.1.size", (void *)&large1, &sz, NULL, - 0), 0, "Unexpected arenas.lrun.1.size failure"); + assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0, + "Unexpected arenas.lrun.0.size failure"); + assert_d_eq(mallctl("arenas.lrun.1.size", &large1, &sz, NULL, 0), 0, + "Unexpected arenas.lrun.1.size failure"); /* Get huge size classes. */ - assert_d_eq(mallctl("arenas.hchunk.0.size", (void *)&huge0, &sz, NULL, - 0), 0, "Unexpected arenas.hchunk.0.size failure"); - assert_d_eq(mallctl("arenas.hchunk.1.size", (void *)&huge1, &sz, NULL, - 0), 0, "Unexpected arenas.hchunk.1.size failure"); - assert_d_eq(mallctl("arenas.hchunk.2.size", (void *)&huge2, &sz, NULL, - 0), 0, "Unexpected arenas.hchunk.2.size failure"); + assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0, + "Unexpected arenas.hchunk.0.size failure"); + assert_d_eq(mallctl("arenas.hchunk.1.size", &huge1, &sz, NULL, 0), 0, + "Unexpected arenas.hchunk.1.size failure"); + assert_d_eq(mallctl("arenas.hchunk.2.size", &huge2, &sz, NULL, 0), 0, + "Unexpected arenas.hchunk.2.size failure"); /* Test dalloc/decommit/purge cascade. */ - purge_miblen = sizeof(purge_mib)/sizeof(size_t); - assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen), - 0, "Unexpected mallctlnametomib() failure"); - purge_mib[1] = (size_t)arena_ind; do_dalloc = false; do_decommit = false; - p = mallocx(huge0 * 2, flags); + p = mallocx(huge0 * 2, 0); assert_ptr_not_null(p, "Unexpected mallocx() error"); did_dalloc = false; did_decommit = false; did_purge = false; did_split = false; - xallocx_success_a = (xallocx(p, huge0, 0, flags) == huge0); - assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0), - 0, "Unexpected arena.%u.purge error", arena_ind); + xallocx_success_a = (xallocx(p, huge0, 0, 0) == huge0); + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected arena.0.purge error"); if (xallocx_success_a) { assert_true(did_dalloc, "Expected dalloc"); assert_false(did_decommit, "Unexpected decommit"); assert_true(did_purge, "Expected purge"); } assert_true(did_split, "Expected split"); - dallocx(p, flags); + dallocx(p, 0); do_dalloc = true; /* Test decommit/commit and observe split/merge. */ do_dalloc = false; do_decommit = true; - p = mallocx(huge0 * 2, flags); + p = mallocx(huge0 * 2, 0); assert_ptr_not_null(p, "Unexpected mallocx() error"); did_decommit = false; did_commit = false; did_split = false; did_merge = false; - xallocx_success_b = (xallocx(p, huge0, 0, flags) == huge0); - assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0), - 0, "Unexpected arena.%u.purge error", arena_ind); + xallocx_success_b = (xallocx(p, huge0, 0, 0) == huge0); + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected arena.0.purge error"); if (xallocx_success_b) assert_true(did_split, "Expected split"); - xallocx_success_c = (xallocx(p, huge0 * 2, 0, flags) == huge0 * 2); + xallocx_success_c = (xallocx(p, huge0 * 2, 0, 0) == huge0 * 2); assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match"); if (xallocx_success_b && xallocx_success_c) assert_true(did_merge, "Expected merge"); - dallocx(p, flags); + dallocx(p, 0); do_dalloc = true; do_decommit = false; @@ -232,43 +214,43 @@ TEST_BEGIN(test_chunk) * successful xallocx() from size=huge2 to size=huge1 is * guaranteed to leave trailing purgeable memory. */ - p = mallocx(huge2, flags); + p = mallocx(huge2, 0); assert_ptr_not_null(p, "Unexpected mallocx() error"); did_purge = false; - assert_zu_eq(xallocx(p, huge1, 0, flags), huge1, + assert_zu_eq(xallocx(p, huge1, 0, 0), huge1, "Unexpected xallocx() failure"); assert_true(did_purge, "Expected purge"); - dallocx(p, flags); + dallocx(p, 0); } /* Test decommit for large allocations. */ do_decommit = true; - p = mallocx(large1, flags); + p = mallocx(large1, 0); assert_ptr_not_null(p, "Unexpected mallocx() error"); - assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0), - 0, "Unexpected arena.%u.purge error", arena_ind); + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected arena.0.purge error"); did_decommit = false; - assert_zu_eq(xallocx(p, large0, 0, flags), large0, + assert_zu_eq(xallocx(p, large0, 0, 0), large0, "Unexpected xallocx() failure"); - assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0), - 0, "Unexpected arena.%u.purge error", arena_ind); + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected arena.0.purge error"); did_commit = false; - assert_zu_eq(xallocx(p, large1, 0, flags), large1, + assert_zu_eq(xallocx(p, large1, 0, 0), large1, "Unexpected xallocx() failure"); assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match"); - dallocx(p, flags); + dallocx(p, 0); do_decommit = false; /* Make sure non-huge allocation succeeds. */ - p = mallocx(42, flags); + p = mallocx(42, 0); assert_ptr_not_null(p, "Unexpected mallocx() error"); - dallocx(p, flags); + dallocx(p, 0); /* Restore chunk hooks. */ - assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL, - (void *)&old_hooks, new_size), 0, "Unexpected chunk_hooks error"); - assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks, - &old_size, NULL, 0), 0, "Unexpected chunk_hooks error"); + assert_d_eq(mallctl("arena.0.chunk_hooks", NULL, NULL, &old_hooks, + new_size), 0, "Unexpected chunk_hooks error"); + assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size, + NULL, 0), 0, "Unexpected chunk_hooks error"); assert_ptr_eq(old_hooks.alloc, orig_hooks.alloc, "Unexpected alloc error"); assert_ptr_eq(old_hooks.dalloc, orig_hooks.dalloc, diff --git a/deps/jemalloc/test/integration/mallocx.c b/deps/jemalloc/test/integration/mallocx.c old mode 100755 new mode 100644 index d709eb301..6253175d6 --- a/deps/jemalloc/test/integration/mallocx.c +++ b/deps/jemalloc/test/integration/mallocx.c @@ -1,9 +1,5 @@ #include "test/jemalloc_test.h" -#ifdef JEMALLOC_FILL -const char *malloc_conf = "junk:false"; -#endif - static unsigned get_nsizes_impl(const char *cmd) { @@ -11,7 +7,7 @@ get_nsizes_impl(const char *cmd) size_t z; z = sizeof(unsigned); - assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0, + assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0, "Unexpected mallctl(\"%s\", ...) failure", cmd); return (ret); @@ -37,7 +33,7 @@ get_size_impl(const char *cmd, size_t ind) 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd); mib[2] = ind; z = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), + assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0), 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind); return (ret); @@ -50,84 +46,43 @@ get_huge_size(size_t ind) return (get_size_impl("arenas.hchunk.0.size", ind)); } -/* - * On systems which can't merge extents, tests that call this function generate - * a lot of dirty memory very quickly. Purging between cycles mitigates - * potential OOM on e.g. 32-bit Windows. - */ -static void -purge(void) -{ - - assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, - "Unexpected mallctl error"); -} - -TEST_BEGIN(test_overflow) -{ - size_t hugemax; - - hugemax = get_huge_size(get_nhuge()-1); - - assert_ptr_null(mallocx(hugemax+1, 0), - "Expected OOM for mallocx(size=%#zx, 0)", hugemax+1); - - assert_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0), - "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1); - - assert_ptr_null(mallocx(SIZE_T_MAX, 0), - "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX); - - assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)), - "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))", - ZU(PTRDIFF_MAX)+1); -} -TEST_END - TEST_BEGIN(test_oom) { - size_t hugemax; - bool oom; - void *ptrs[3]; - unsigned i; + size_t hugemax, size, alignment; + + hugemax = get_huge_size(get_nhuge()-1); /* - * It should be impossible to allocate three objects that each consume - * nearly half the virtual address space. + * It should be impossible to allocate two objects that each consume + * more than half the virtual address space. */ - hugemax = get_huge_size(get_nhuge()-1); - oom = false; - for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) { - ptrs[i] = mallocx(hugemax, 0); - if (ptrs[i] == NULL) - oom = true; + { + void *p; + + p = mallocx(hugemax, 0); + if (p != NULL) { + assert_ptr_null(mallocx(hugemax, 0), + "Expected OOM for mallocx(size=%#zx, 0)", hugemax); + dallocx(p, 0); + } } - assert_true(oom, - "Expected OOM during series of calls to mallocx(size=%zu, 0)", - hugemax); - for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) { - if (ptrs[i] != NULL) - dallocx(ptrs[i], 0); - } - purge(); #if LG_SIZEOF_PTR == 3 - assert_ptr_null(mallocx(0x8000000000000000ULL, - MALLOCX_ALIGN(0x8000000000000000ULL)), - "Expected OOM for mallocx()"); - assert_ptr_null(mallocx(0x8000000000000000ULL, - MALLOCX_ALIGN(0x80000000)), - "Expected OOM for mallocx()"); + size = ZU(0x8000000000000000); + alignment = ZU(0x8000000000000000); #else - assert_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)), - "Expected OOM for mallocx()"); + size = ZU(0x80000000); + alignment = ZU(0x80000000); #endif + assert_ptr_null(mallocx(size, MALLOCX_ALIGN(alignment)), + "Expected OOM for mallocx(size=%#zx, MALLOCX_ALIGN(%#zx)", size, + alignment); } TEST_END TEST_BEGIN(test_basic) { -#define MAXSZ (((size_t)1) << 23) +#define MAXSZ (((size_t)1) << 26) size_t sz; for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) { @@ -136,28 +91,23 @@ TEST_BEGIN(test_basic) nsz = nallocx(sz, 0); assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); p = mallocx(sz, 0); - assert_ptr_not_null(p, - "Unexpected mallocx(size=%zx, flags=0) error", sz); + assert_ptr_not_null(p, "Unexpected mallocx() error"); rsz = sallocx(p, 0); assert_zu_ge(rsz, sz, "Real size smaller than expected"); assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch"); dallocx(p, 0); p = mallocx(sz, 0); - assert_ptr_not_null(p, - "Unexpected mallocx(size=%zx, flags=0) error", sz); + assert_ptr_not_null(p, "Unexpected mallocx() error"); dallocx(p, 0); nsz = nallocx(sz, MALLOCX_ZERO); assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); p = mallocx(sz, MALLOCX_ZERO); - assert_ptr_not_null(p, - "Unexpected mallocx(size=%zx, flags=MALLOCX_ZERO) error", - nsz); + assert_ptr_not_null(p, "Unexpected mallocx() error"); rsz = sallocx(p, 0); assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch"); dallocx(p, 0); - purge(); } #undef MAXSZ } @@ -165,7 +115,7 @@ TEST_END TEST_BEGIN(test_alignment_and_size) { -#define MAXALIGN (((size_t)1) << 23) +#define MAXALIGN (((size_t)1) << 25) #define NITER 4 size_t nsz, rsz, sz, alignment, total; unsigned i; @@ -215,7 +165,6 @@ TEST_BEGIN(test_alignment_and_size) } } } - purge(); } #undef MAXALIGN #undef NITER @@ -227,7 +176,6 @@ main(void) { return (test( - test_overflow, test_oom, test_basic, test_alignment_and_size)); diff --git a/deps/jemalloc/test/integration/overflow.c b/deps/jemalloc/test/integration/overflow.c old mode 100755 new mode 100644 index 84a35652c..303d9b2d3 --- a/deps/jemalloc/test/integration/overflow.c +++ b/deps/jemalloc/test/integration/overflow.c @@ -8,8 +8,8 @@ TEST_BEGIN(test_overflow) void *p; sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.nhchunks", (void *)&nhchunks, &sz, NULL, 0), - 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0, + "Unexpected mallctl() error"); miblen = sizeof(mib) / sizeof(size_t); assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0, @@ -17,8 +17,8 @@ TEST_BEGIN(test_overflow) mib[2] = nhchunks - 1; sz = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz, - NULL, 0), 0, "Unexpected mallctlbymib() error"); + assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0, + "Unexpected mallctlbymib() error"); assert_ptr_null(malloc(max_size_class + 1), "Expected OOM due to over-sized allocation request"); diff --git a/deps/jemalloc/test/integration/posix_memalign.c b/deps/jemalloc/test/integration/posix_memalign.c index e22e10200..19741c6cb 100644 --- a/deps/jemalloc/test/integration/posix_memalign.c +++ b/deps/jemalloc/test/integration/posix_memalign.c @@ -1,20 +1,9 @@ #include "test/jemalloc_test.h" #define CHUNK 0x400000 -#define MAXALIGN (((size_t)1) << 23) - -/* - * On systems which can't merge extents, tests that call this function generate - * a lot of dirty memory very quickly. Purging between cycles mitigates - * potential OOM on e.g. 32-bit Windows. - */ -static void -purge(void) -{ - - assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, - "Unexpected mallctl error"); -} +/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ +#define MAXALIGN ((size_t)0x2000000LU) +#define NITER 4 TEST_BEGIN(test_alignment_errors) { @@ -77,7 +66,6 @@ TEST_END TEST_BEGIN(test_alignment_and_size) { -#define NITER 4 size_t alignment, size, total; unsigned i; int err; @@ -116,9 +104,7 @@ TEST_BEGIN(test_alignment_and_size) } } } - purge(); } -#undef NITER } TEST_END diff --git a/deps/jemalloc/test/integration/rallocx.c b/deps/jemalloc/test/integration/rallocx.c old mode 100755 new mode 100644 index 506bf1c90..be1b27b73 --- a/deps/jemalloc/test/integration/rallocx.c +++ b/deps/jemalloc/test/integration/rallocx.c @@ -1,51 +1,5 @@ #include "test/jemalloc_test.h" -static unsigned -get_nsizes_impl(const char *cmd) -{ - unsigned ret; - size_t z; - - z = sizeof(unsigned); - assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0, - "Unexpected mallctl(\"%s\", ...) failure", cmd); - - return (ret); -} - -static unsigned -get_nhuge(void) -{ - - return (get_nsizes_impl("arenas.nhchunks")); -} - -static size_t -get_size_impl(const char *cmd, size_t ind) -{ - size_t ret; - size_t z; - size_t mib[4]; - size_t miblen = 4; - - z = sizeof(size_t); - assert_d_eq(mallctlnametomib(cmd, mib, &miblen), - 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd); - mib[2] = ind; - z = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), - 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind); - - return (ret); -} - -static size_t -get_huge_size(size_t ind) -{ - - return (get_size_impl("arenas.hchunk.0.size", ind)); -} - TEST_BEGIN(test_grow_and_shrink) { void *p, *q; @@ -184,22 +138,22 @@ TEST_END TEST_BEGIN(test_lg_align_and_zero) { void *p, *q; - unsigned lg_align; - size_t sz; + size_t lg_align, sz; #define MAX_LG_ALIGN 25 #define MAX_VALIDATE (ZU(1) << 22) - lg_align = 0; + lg_align = ZU(0); p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO); assert_ptr_not_null(p, "Unexpected mallocx() error"); for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) { q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO); assert_ptr_not_null(q, - "Unexpected rallocx() error for lg_align=%u", lg_align); + "Unexpected rallocx() error for lg_align=%zu", lg_align); assert_ptr_null( (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)), - "%p inadequately aligned for lg_align=%u", q, lg_align); + "%p inadequately aligned for lg_align=%zu", + q, lg_align); sz = sallocx(q, 0); if ((sz << 1) <= MAX_VALIDATE) { assert_false(validate_fill(q, 0, 0, sz), @@ -219,33 +173,6 @@ TEST_BEGIN(test_lg_align_and_zero) } TEST_END -TEST_BEGIN(test_overflow) -{ - size_t hugemax; - void *p; - - hugemax = get_huge_size(get_nhuge()-1); - - p = mallocx(1, 0); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - - assert_ptr_null(rallocx(p, hugemax+1, 0), - "Expected OOM for rallocx(p, size=%#zx, 0)", hugemax+1); - - assert_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0), - "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1); - - assert_ptr_null(rallocx(p, SIZE_T_MAX, 0), - "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX); - - assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)), - "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))", - ZU(PTRDIFF_MAX)+1); - - dallocx(p, 0); -} -TEST_END - int main(void) { @@ -254,6 +181,5 @@ main(void) test_grow_and_shrink, test_zero, test_align, - test_lg_align_and_zero, - test_overflow)); + test_lg_align_and_zero)); } diff --git a/deps/jemalloc/test/integration/sdallocx.c b/deps/jemalloc/test/integration/sdallocx.c index f92e0589c..b84817d76 100644 --- a/deps/jemalloc/test/integration/sdallocx.c +++ b/deps/jemalloc/test/integration/sdallocx.c @@ -1,7 +1,7 @@ #include "test/jemalloc_test.h" -#define MAXALIGN (((size_t)1) << 22) -#define NITER 3 +#define MAXALIGN (((size_t)1) << 25) +#define NITER 4 TEST_BEGIN(test_basic) { diff --git a/deps/jemalloc/test/integration/thread_arena.c b/deps/jemalloc/test/integration/thread_arena.c old mode 100755 new mode 100644 index 7a35a6351..67be53513 --- a/deps/jemalloc/test/integration/thread_arena.c +++ b/deps/jemalloc/test/integration/thread_arena.c @@ -16,8 +16,8 @@ thd_start(void *arg) free(p); size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, - (void *)&main_arena_ind, sizeof(main_arena_ind)))) { + if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind, + sizeof(main_arena_ind)))) { char buf[BUFERROR_BUF]; buferror(err, buf, sizeof(buf)); @@ -25,8 +25,7 @@ thd_start(void *arg) } size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL, - 0))) { + if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { char buf[BUFERROR_BUF]; buferror(err, buf, sizeof(buf)); @@ -51,8 +50,7 @@ TEST_BEGIN(test_thread_arena) assert_ptr_not_null(p, "Error in malloc()"); size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL, - 0))) { + if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { char buf[BUFERROR_BUF]; buferror(err, buf, sizeof(buf)); diff --git a/deps/jemalloc/test/integration/thread_tcache_enabled.c b/deps/jemalloc/test/integration/thread_tcache_enabled.c old mode 100755 new mode 100644 index 2c2825e19..f4e89c682 --- a/deps/jemalloc/test/integration/thread_tcache_enabled.c +++ b/deps/jemalloc/test/integration/thread_tcache_enabled.c @@ -16,8 +16,7 @@ thd_start(void *arg) bool e0, e1; sz = sizeof(bool); - if ((err = mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL, - 0))) { + if ((err = mallctl("thread.tcache.enabled", &e0, &sz, NULL, 0))) { if (err == ENOENT) { assert_false(config_tcache, "ENOENT should only be returned if tcache is " @@ -28,53 +27,53 @@ thd_start(void *arg) if (e0) { e1 = false; - assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, - (void *)&e1, sz), 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), + 0, "Unexpected mallctl() error"); assert_true(e0, "tcache should be enabled"); } e1 = true; - assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, - (void *)&e1, sz), 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); assert_false(e0, "tcache should be disabled"); e1 = true; - assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, - (void *)&e1, sz), 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); assert_true(e0, "tcache should be enabled"); e1 = false; - assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, - (void *)&e1, sz), 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); assert_true(e0, "tcache should be enabled"); e1 = false; - assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, - (void *)&e1, sz), 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); assert_false(e0, "tcache should be disabled"); free(malloc(1)); e1 = true; - assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, - (void *)&e1, sz), 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); assert_false(e0, "tcache should be disabled"); free(malloc(1)); e1 = true; - assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, - (void *)&e1, sz), 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); assert_true(e0, "tcache should be enabled"); free(malloc(1)); e1 = false; - assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, - (void *)&e1, sz), 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); assert_true(e0, "tcache should be enabled"); free(malloc(1)); e1 = false; - assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, - (void *)&e1, sz), 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); assert_false(e0, "tcache should be disabled"); free(malloc(1)); diff --git a/deps/jemalloc/test/integration/xallocx.c b/deps/jemalloc/test/integration/xallocx.c old mode 100755 new mode 100644 index 67e0a0e71..373625219 --- a/deps/jemalloc/test/integration/xallocx.c +++ b/deps/jemalloc/test/integration/xallocx.c @@ -1,28 +1,5 @@ #include "test/jemalloc_test.h" -#ifdef JEMALLOC_FILL -const char *malloc_conf = "junk:false"; -#endif - -/* - * Use a separate arena for xallocx() extension/contraction tests so that - * internal allocation e.g. by heap profiling can't interpose allocations where - * xallocx() would ordinarily be able to extend. - */ -static unsigned -arena_ind(void) -{ - static unsigned ind = 0; - - if (ind == 0) { - size_t sz = sizeof(ind); - assert_d_eq(mallctl("arenas.extend", (void *)&ind, &sz, NULL, - 0), 0, "Unexpected mallctl failure creating arena"); - } - - return (ind); -} - TEST_BEGIN(test_same_size) { void *p; @@ -78,7 +55,7 @@ get_nsizes_impl(const char *cmd) size_t z; z = sizeof(unsigned); - assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0, + assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0, "Unexpected mallctl(\"%s\", ...) failure", cmd); return (ret); @@ -118,7 +95,7 @@ get_size_impl(const char *cmd, size_t ind) 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd); mib[2] = ind; z = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), + assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0), 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind); return (ret); @@ -241,7 +218,6 @@ TEST_END TEST_BEGIN(test_extra_large) { - int flags = MALLOCX_ARENA(arena_ind()); size_t smallmax, large0, large1, large2, huge0, hugemax; void *p; @@ -253,122 +229,121 @@ TEST_BEGIN(test_extra_large) huge0 = get_huge_size(0); hugemax = get_huge_size(get_nhuge()-1); - p = mallocx(large2, flags); + p = mallocx(large2, 0); assert_ptr_not_null(p, "Unexpected mallocx() error"); - assert_zu_eq(xallocx(p, large2, 0, flags), large2, + assert_zu_eq(xallocx(p, large2, 0, 0), large2, "Unexpected xallocx() behavior"); /* Test size decrease with zero extra. */ - assert_zu_eq(xallocx(p, large0, 0, flags), large0, + assert_zu_eq(xallocx(p, large0, 0, 0), large0, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, smallmax, 0, flags), large0, + assert_zu_eq(xallocx(p, smallmax, 0, 0), large0, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large2, 0, flags), large2, + assert_zu_eq(xallocx(p, large2, 0, 0), large2, "Unexpected xallocx() behavior"); /* Test size decrease with non-zero extra. */ - assert_zu_eq(xallocx(p, large0, large2 - large0, flags), large2, + assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large1, large2 - large1, flags), large2, + assert_zu_eq(xallocx(p, large1, large2 - large1, 0), large2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large0, large1 - large0, flags), large1, + assert_zu_eq(xallocx(p, large0, large1 - large0, 0), large1, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, flags), large0, + assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, 0), large0, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large0, 0, flags), large0, + assert_zu_eq(xallocx(p, large0, 0, 0), large0, "Unexpected xallocx() behavior"); /* Test size increase with zero extra. */ - assert_zu_eq(xallocx(p, large2, 0, flags), large2, + assert_zu_eq(xallocx(p, large2, 0, 0), large2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge0, 0, flags), large2, + assert_zu_eq(xallocx(p, huge0, 0, 0), large2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large0, 0, flags), large0, + assert_zu_eq(xallocx(p, large0, 0, 0), large0, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_lt(xallocx(p, large0, huge0 - large0, flags), huge0, + assert_zu_lt(xallocx(p, large0, huge0 - large0, 0), huge0, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large0, 0, flags), large0, + assert_zu_eq(xallocx(p, large0, 0, 0), large0, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_eq(xallocx(p, large0, large2 - large0, flags), large2, + assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, large2, 0, flags), large2, + assert_zu_eq(xallocx(p, large2, 0, 0), large2, "Unexpected xallocx() behavior"); /* Test size+extra overflow. */ - assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, flags), huge0, + assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, 0), huge0, "Unexpected xallocx() behavior"); - dallocx(p, flags); + dallocx(p, 0); } TEST_END TEST_BEGIN(test_extra_huge) { - int flags = MALLOCX_ARENA(arena_ind()); - size_t largemax, huge1, huge2, huge3, hugemax; + size_t largemax, huge0, huge1, huge2, hugemax; void *p; /* Get size classes. */ largemax = get_large_size(get_nlarge()-1); + huge0 = get_huge_size(0); huge1 = get_huge_size(1); huge2 = get_huge_size(2); - huge3 = get_huge_size(3); hugemax = get_huge_size(get_nhuge()-1); - p = mallocx(huge3, flags); + p = mallocx(huge2, 0); assert_ptr_not_null(p, "Unexpected mallocx() error"); - assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, + assert_zu_eq(xallocx(p, huge2, 0, 0), huge2, "Unexpected xallocx() behavior"); /* Test size decrease with zero extra. */ - assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, + assert_zu_ge(xallocx(p, huge0, 0, 0), huge0, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, largemax, 0, flags), huge1, + assert_zu_ge(xallocx(p, largemax, 0, 0), huge0, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, + assert_zu_eq(xallocx(p, huge2, 0, 0), huge2, "Unexpected xallocx() behavior"); /* Test size decrease with non-zero extra. */ - assert_zu_eq(xallocx(p, huge1, huge3 - huge1, flags), huge3, + assert_zu_eq(xallocx(p, huge0, huge2 - huge0, 0), huge2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge2, huge3 - huge2, flags), huge3, + assert_zu_eq(xallocx(p, huge1, huge2 - huge1, 0), huge2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge1, huge2 - huge1, flags), huge2, + assert_zu_eq(xallocx(p, huge0, huge1 - huge0, 0), huge1, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, largemax, huge1 - largemax, flags), huge1, + assert_zu_ge(xallocx(p, largemax, huge0 - largemax, 0), huge0, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, + assert_zu_ge(xallocx(p, huge0, 0, 0), huge0, "Unexpected xallocx() behavior"); /* Test size increase with zero extra. */ - assert_zu_le(xallocx(p, huge3, 0, flags), huge3, + assert_zu_le(xallocx(p, huge2, 0, 0), huge2, "Unexpected xallocx() behavior"); - assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge3, + assert_zu_le(xallocx(p, hugemax+1, 0, 0), huge2, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, + assert_zu_ge(xallocx(p, huge0, 0, 0), huge0, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_le(xallocx(p, huge1, SIZE_T_MAX - huge1, flags), hugemax, + assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, 0), hugemax, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, + assert_zu_ge(xallocx(p, huge0, 0, 0), huge0, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_le(xallocx(p, huge1, huge3 - huge1, flags), huge3, + assert_zu_le(xallocx(p, huge0, huge2 - huge0, 0), huge2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, + assert_zu_eq(xallocx(p, huge2, 0, 0), huge2, "Unexpected xallocx() behavior"); /* Test size+extra overflow. */ - assert_zu_le(xallocx(p, huge3, hugemax - huge3 + 1, flags), hugemax, + assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, 0), hugemax, "Unexpected xallocx() behavior"); - dallocx(p, flags); + dallocx(p, 0); } TEST_END @@ -413,13 +388,12 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len) static void test_zero(size_t szmin, size_t szmax) { - int flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO; size_t sz, nsz; void *p; #define FILL_BYTE 0x7aU sz = szmax; - p = mallocx(sz, flags); + p = mallocx(sz, MALLOCX_ZERO); assert_ptr_not_null(p, "Unexpected mallocx() error"); assert_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu", sz); @@ -434,14 +408,14 @@ test_zero(size_t szmin, size_t szmax) /* Shrink in place so that we can expect growing in place to succeed. */ sz = szmin; - assert_zu_eq(xallocx(p, sz, 0, flags), sz, + assert_zu_eq(xallocx(p, sz, 0, MALLOCX_ZERO), sz, "Unexpected xallocx() error"); assert_false(validate_fill(p, FILL_BYTE, 0, sz), "Memory not filled: sz=%zu", sz); for (sz = szmin; sz < szmax; sz = nsz) { - nsz = nallocx(sz+1, flags); - assert_zu_eq(xallocx(p, sz+1, 0, flags), nsz, + nsz = nallocx(sz+1, MALLOCX_ZERO); + assert_zu_eq(xallocx(p, sz+1, 0, MALLOCX_ZERO), nsz, "Unexpected xallocx() failure"); assert_false(validate_fill(p, FILL_BYTE, 0, sz), "Memory not filled: sz=%zu", sz); @@ -452,7 +426,7 @@ test_zero(size_t szmin, size_t szmax) "Memory not filled: nsz=%zu", nsz); } - dallocx(p, flags); + dallocx(p, 0); } TEST_BEGIN(test_zero_large) diff --git a/deps/jemalloc/test/src/mtx.c b/deps/jemalloc/test/src/mtx.c index 8a5dfdd99..73bd02f6d 100644 --- a/deps/jemalloc/test/src/mtx.c +++ b/deps/jemalloc/test/src/mtx.c @@ -11,8 +11,6 @@ mtx_init(mtx_t *mtx) #ifdef _WIN32 if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT)) return (true); -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - mtx->lock = OS_UNFAIR_LOCK_INIT; #elif (defined(JEMALLOC_OSSPIN)) mtx->lock = 0; #else @@ -35,7 +33,6 @@ mtx_fini(mtx_t *mtx) { #ifdef _WIN32 -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) #elif (defined(JEMALLOC_OSSPIN)) #else pthread_mutex_destroy(&mtx->lock); @@ -48,8 +45,6 @@ mtx_lock(mtx_t *mtx) #ifdef _WIN32 EnterCriticalSection(&mtx->lock); -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock_lock(&mtx->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mtx->lock); #else @@ -63,8 +58,6 @@ mtx_unlock(mtx_t *mtx) #ifdef _WIN32 LeaveCriticalSection(&mtx->lock); -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock_unlock(&mtx->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mtx->lock); #else diff --git a/deps/jemalloc/test/src/test.c b/deps/jemalloc/test/src/test.c index d70cc7501..8173614cf 100644 --- a/deps/jemalloc/test/src/test.c +++ b/deps/jemalloc/test/src/test.c @@ -60,30 +60,32 @@ p_test_fini(void) malloc_printf("%s: %s\n", test_name, test_status_string(test_status)); } -static test_status_t -p_test_impl(bool do_malloc_init, test_t *t, va_list ap) +test_status_t +p_test(test_t *t, ...) { test_status_t ret; + va_list ap; - if (do_malloc_init) { - /* - * Make sure initialization occurs prior to running tests. - * Tests are special because they may use internal facilities - * prior to triggering initialization as a side effect of - * calling into the public API. - */ - if (nallocx(1, 0) == 0) { - malloc_printf("Initialization error"); - return (test_status_fail); - } + /* + * Make sure initialization occurs prior to running tests. Tests are + * special because they may use internal facilities prior to triggering + * initialization as a side effect of calling into the public API. This + * is a final safety that works even if jemalloc_constructor() doesn't + * run, as for MSVC builds. + */ + if (nallocx(1, 0) == 0) { + malloc_printf("Initialization error"); + return (test_status_fail); } ret = test_status_pass; + va_start(ap, t); for (; t != NULL; t = va_arg(ap, test_t *)) { t(); if (test_status > ret) ret = test_status; } + va_end(ap); malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n", test_status_string(test_status_pass), @@ -96,34 +98,6 @@ p_test_impl(bool do_malloc_init, test_t *t, va_list ap) return (ret); } -test_status_t -p_test(test_t *t, ...) -{ - test_status_t ret; - va_list ap; - - ret = test_status_pass; - va_start(ap, t); - ret = p_test_impl(true, t, ap); - va_end(ap); - - return (ret); -} - -test_status_t -p_test_no_malloc_init(test_t *t, ...) -{ - test_status_t ret; - va_list ap; - - ret = test_status_pass; - va_start(ap, t); - ret = p_test_impl(false, t, ap); - va_end(ap); - - return (ret); -} - void p_test_fail(const char *prefix, const char *message) { diff --git a/deps/jemalloc/test/src/timer.c b/deps/jemalloc/test/src/timer.c index 3c7e63a26..0c93abaf9 100644 --- a/deps/jemalloc/test/src/timer.c +++ b/deps/jemalloc/test/src/timer.c @@ -4,26 +4,50 @@ void timer_start(timedelta_t *timer) { - nstime_init(&timer->t0, 0); - nstime_update(&timer->t0); +#ifdef _WIN32 + GetSystemTimeAsFileTime(&timer->ft0); +#elif JEMALLOC_CLOCK_GETTIME + if (sysconf(_SC_MONOTONIC_CLOCK) <= 0) + timer->clock_id = CLOCK_REALTIME; + else + timer->clock_id = CLOCK_MONOTONIC; + clock_gettime(timer->clock_id, &timer->ts0); +#else + gettimeofday(&timer->tv0, NULL); +#endif } void timer_stop(timedelta_t *timer) { - nstime_copy(&timer->t1, &timer->t0); - nstime_update(&timer->t1); +#ifdef _WIN32 + GetSystemTimeAsFileTime(&timer->ft0); +#elif JEMALLOC_CLOCK_GETTIME + clock_gettime(timer->clock_id, &timer->ts1); +#else + gettimeofday(&timer->tv1, NULL); +#endif } uint64_t timer_usec(const timedelta_t *timer) { - nstime_t delta; - nstime_copy(&delta, &timer->t1); - nstime_subtract(&delta, &timer->t0); - return (nstime_ns(&delta) / 1000); +#ifdef _WIN32 + uint64_t t0, t1; + t0 = (((uint64_t)timer->ft0.dwHighDateTime) << 32) | + timer->ft0.dwLowDateTime; + t1 = (((uint64_t)timer->ft1.dwHighDateTime) << 32) | + timer->ft1.dwLowDateTime; + return ((t1 - t0) / 10); +#elif JEMALLOC_CLOCK_GETTIME + return (((timer->ts1.tv_sec - timer->ts0.tv_sec) * 1000000) + + (timer->ts1.tv_nsec - timer->ts0.tv_nsec) / 1000); +#else + return (((timer->tv1.tv_sec - timer->tv0.tv_sec) * 1000000) + + timer->tv1.tv_usec - timer->tv0.tv_usec); +#endif } void @@ -32,8 +56,9 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) uint64_t t0 = timer_usec(a); uint64_t t1 = timer_usec(b); uint64_t mult; - size_t i = 0; - size_t j, n; + unsigned i = 0; + unsigned j; + int n; /* Whole. */ n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1); diff --git a/deps/jemalloc/test/stress/microbench.c b/deps/jemalloc/test/stress/microbench.c index 7dc45f89c..ee39fea7f 100644 --- a/deps/jemalloc/test/stress/microbench.c +++ b/deps/jemalloc/test/stress/microbench.c @@ -1,8 +1,7 @@ #include "test/jemalloc_test.h" JEMALLOC_INLINE_C void -time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter, - void (*func)(void)) +time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void)) { uint64_t i; diff --git a/deps/jemalloc/test/unit/a0.c b/deps/jemalloc/test/unit/a0.c deleted file mode 100644 index b9ba45a3d..000000000 --- a/deps/jemalloc/test/unit/a0.c +++ /dev/null @@ -1,19 +0,0 @@ -#include "test/jemalloc_test.h" - -TEST_BEGIN(test_a0) -{ - void *p; - - p = a0malloc(1); - assert_ptr_not_null(p, "Unexpected a0malloc() error"); - a0dalloc(p); -} -TEST_END - -int -main(void) -{ - - return (test_no_malloc_init( - test_a0)); -} diff --git a/deps/jemalloc/test/unit/arena_reset.c b/deps/jemalloc/test/unit/arena_reset.c deleted file mode 100755 index adf9baa5d..000000000 --- a/deps/jemalloc/test/unit/arena_reset.c +++ /dev/null @@ -1,159 +0,0 @@ -#include "test/jemalloc_test.h" - -#ifdef JEMALLOC_PROF -const char *malloc_conf = "prof:true,lg_prof_sample:0"; -#endif - -static unsigned -get_nsizes_impl(const char *cmd) -{ - unsigned ret; - size_t z; - - z = sizeof(unsigned); - assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0, - "Unexpected mallctl(\"%s\", ...) failure", cmd); - - return (ret); -} - -static unsigned -get_nsmall(void) -{ - - return (get_nsizes_impl("arenas.nbins")); -} - -static unsigned -get_nlarge(void) -{ - - return (get_nsizes_impl("arenas.nlruns")); -} - -static unsigned -get_nhuge(void) -{ - - return (get_nsizes_impl("arenas.nhchunks")); -} - -static size_t -get_size_impl(const char *cmd, size_t ind) -{ - size_t ret; - size_t z; - size_t mib[4]; - size_t miblen = 4; - - z = sizeof(size_t); - assert_d_eq(mallctlnametomib(cmd, mib, &miblen), - 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd); - mib[2] = ind; - z = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), - 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind); - - return (ret); -} - -static size_t -get_small_size(size_t ind) -{ - - return (get_size_impl("arenas.bin.0.size", ind)); -} - -static size_t -get_large_size(size_t ind) -{ - - return (get_size_impl("arenas.lrun.0.size", ind)); -} - -static size_t -get_huge_size(size_t ind) -{ - - return (get_size_impl("arenas.hchunk.0.size", ind)); -} - -TEST_BEGIN(test_arena_reset) -{ -#define NHUGE 4 - unsigned arena_ind, nsmall, nlarge, nhuge, nptrs, i; - size_t sz, miblen; - void **ptrs; - int flags; - size_t mib[3]; - tsdn_t *tsdn; - - test_skip_if((config_valgrind && unlikely(in_valgrind)) || (config_fill - && unlikely(opt_quarantine))); - - sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0), - 0, "Unexpected mallctl() failure"); - - flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE; - - nsmall = get_nsmall(); - nlarge = get_nlarge(); - nhuge = get_nhuge() > NHUGE ? NHUGE : get_nhuge(); - nptrs = nsmall + nlarge + nhuge; - ptrs = (void **)malloc(nptrs * sizeof(void *)); - assert_ptr_not_null(ptrs, "Unexpected malloc() failure"); - - /* Allocate objects with a wide range of sizes. */ - for (i = 0; i < nsmall; i++) { - sz = get_small_size(i); - ptrs[i] = mallocx(sz, flags); - assert_ptr_not_null(ptrs[i], - "Unexpected mallocx(%zu, %#x) failure", sz, flags); - } - for (i = 0; i < nlarge; i++) { - sz = get_large_size(i); - ptrs[nsmall + i] = mallocx(sz, flags); - assert_ptr_not_null(ptrs[i], - "Unexpected mallocx(%zu, %#x) failure", sz, flags); - } - for (i = 0; i < nhuge; i++) { - sz = get_huge_size(i); - ptrs[nsmall + nlarge + i] = mallocx(sz, flags); - assert_ptr_not_null(ptrs[i], - "Unexpected mallocx(%zu, %#x) failure", sz, flags); - } - - tsdn = tsdn_fetch(); - - /* Verify allocations. */ - for (i = 0; i < nptrs; i++) { - assert_zu_gt(ivsalloc(tsdn, ptrs[i], false), 0, - "Allocation should have queryable size"); - } - - /* Reset. */ - miblen = sizeof(mib)/sizeof(size_t); - assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0, - "Unexpected mallctlnametomib() failure"); - mib[1] = (size_t)arena_ind; - assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0, - "Unexpected mallctlbymib() failure"); - - /* Verify allocations no longer exist. */ - for (i = 0; i < nptrs; i++) { - assert_zu_eq(ivsalloc(tsdn, ptrs[i], false), 0, - "Allocation should no longer exist"); - } - - free(ptrs); -} -TEST_END - -int -main(void) -{ - - return (test( - test_arena_reset)); -} diff --git a/deps/jemalloc/test/unit/bitmap.c b/deps/jemalloc/test/unit/bitmap.c index a2dd54630..7da583d85 100644 --- a/deps/jemalloc/test/unit/bitmap.c +++ b/deps/jemalloc/test/unit/bitmap.c @@ -6,11 +6,7 @@ TEST_BEGIN(test_bitmap_size) prev_size = 0; for (i = 1; i <= BITMAP_MAXBITS; i++) { - bitmap_info_t binfo; - size_t size; - - bitmap_info_init(&binfo, i); - size = bitmap_size(&binfo); + size_t size = bitmap_size(i); assert_true(size >= prev_size, "Bitmap size is smaller than expected"); prev_size = size; @@ -27,8 +23,8 @@ TEST_BEGIN(test_bitmap_init) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc( - bitmap_size(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) { @@ -50,8 +46,8 @@ TEST_BEGIN(test_bitmap_set) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc( - bitmap_size(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) @@ -73,8 +69,8 @@ TEST_BEGIN(test_bitmap_unset) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc( - bitmap_size(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) @@ -101,9 +97,9 @@ TEST_BEGIN(test_bitmap_sfu) bitmap_info_t binfo; bitmap_info_init(&binfo, i); { - size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc( - bitmap_size(&binfo)); + ssize_t j; + bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); /* Iteratively set bits starting at the beginning. */ @@ -119,7 +115,7 @@ TEST_BEGIN(test_bitmap_sfu) * Iteratively unset bits starting at the end, and * verify that bitmap_sfu() reaches the unset bits. */ - for (j = i - 1; j < i; j--) { /* (i..0] */ + for (j = i - 1; j >= 0; j--) { bitmap_unset(bitmap, &binfo, j); assert_zd_eq(bitmap_sfu(bitmap, &binfo), j, "First unset bit should the bit previously " diff --git a/deps/jemalloc/test/unit/ckh.c b/deps/jemalloc/test/unit/ckh.c index 2cbc22688..b11759599 100644 --- a/deps/jemalloc/test/unit/ckh.c +++ b/deps/jemalloc/test/unit/ckh.c @@ -7,8 +7,8 @@ TEST_BEGIN(test_new_delete) tsd = tsd_fetch(); - assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, - ckh_string_keycomp), "Unexpected ckh_new() error"); + assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp), + "Unexpected ckh_new() error"); ckh_delete(tsd, &ckh); assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash, @@ -32,8 +32,8 @@ TEST_BEGIN(test_count_insert_search_remove) tsd = tsd_fetch(); - assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, - ckh_string_keycomp), "Unexpected ckh_new() error"); + assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp), + "Unexpected ckh_new() error"); assert_zu_eq(ckh_count(&ckh), 0, "ckh_count() should return %zu, but it returned %zu", ZU(0), ckh_count(&ckh)); diff --git a/deps/jemalloc/test/unit/decay.c b/deps/jemalloc/test/unit/decay.c deleted file mode 100755 index 5af8f8074..000000000 --- a/deps/jemalloc/test/unit/decay.c +++ /dev/null @@ -1,374 +0,0 @@ -#include "test/jemalloc_test.h" - -const char *malloc_conf = "purge:decay,decay_time:1"; - -static nstime_monotonic_t *nstime_monotonic_orig; -static nstime_update_t *nstime_update_orig; - -static unsigned nupdates_mock; -static nstime_t time_mock; -static bool monotonic_mock; - -static bool -nstime_monotonic_mock(void) -{ - - return (monotonic_mock); -} - -static bool -nstime_update_mock(nstime_t *time) -{ - - nupdates_mock++; - if (monotonic_mock) - nstime_copy(time, &time_mock); - return (!monotonic_mock); -} - -TEST_BEGIN(test_decay_ticks) -{ - ticker_t *decay_ticker; - unsigned tick0, tick1; - size_t sz, huge0, large0; - void *p; - - test_skip_if(opt_purge != purge_mode_decay); - - decay_ticker = decay_ticker_get(tsd_fetch(), 0); - assert_ptr_not_null(decay_ticker, - "Unexpected failure getting decay ticker"); - - sz = sizeof(size_t); - assert_d_eq(mallctl("arenas.hchunk.0.size", (void *)&huge0, &sz, NULL, - 0), 0, "Unexpected mallctl failure"); - assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large0, &sz, NULL, - 0), 0, "Unexpected mallctl failure"); - - /* - * Test the standard APIs using a huge size class, since we can't - * control tcache interactions (except by completely disabling tcache - * for the entire test program). - */ - - /* malloc(). */ - tick0 = ticker_read(decay_ticker); - p = malloc(huge0); - assert_ptr_not_null(p, "Unexpected malloc() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()"); - /* free(). */ - tick0 = ticker_read(decay_ticker); - free(p); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, "Expected ticker to tick during free()"); - - /* calloc(). */ - tick0 = ticker_read(decay_ticker); - p = calloc(1, huge0); - assert_ptr_not_null(p, "Unexpected calloc() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()"); - free(p); - - /* posix_memalign(). */ - tick0 = ticker_read(decay_ticker); - assert_d_eq(posix_memalign(&p, sizeof(size_t), huge0), 0, - "Unexpected posix_memalign() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during posix_memalign()"); - free(p); - - /* aligned_alloc(). */ - tick0 = ticker_read(decay_ticker); - p = aligned_alloc(sizeof(size_t), huge0); - assert_ptr_not_null(p, "Unexpected aligned_alloc() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during aligned_alloc()"); - free(p); - - /* realloc(). */ - /* Allocate. */ - tick0 = ticker_read(decay_ticker); - p = realloc(NULL, huge0); - assert_ptr_not_null(p, "Unexpected realloc() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); - /* Reallocate. */ - tick0 = ticker_read(decay_ticker); - p = realloc(p, huge0); - assert_ptr_not_null(p, "Unexpected realloc() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); - /* Deallocate. */ - tick0 = ticker_read(decay_ticker); - realloc(p, 0); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); - - /* - * Test the *allocx() APIs using huge, large, and small size classes, - * with tcache explicitly disabled. - */ - { - unsigned i; - size_t allocx_sizes[3]; - allocx_sizes[0] = huge0; - allocx_sizes[1] = large0; - allocx_sizes[2] = 1; - - for (i = 0; i < sizeof(allocx_sizes) / sizeof(size_t); i++) { - sz = allocx_sizes[i]; - - /* mallocx(). */ - tick0 = ticker_read(decay_ticker); - p = mallocx(sz, MALLOCX_TCACHE_NONE); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during mallocx() (sz=%zu)", - sz); - /* rallocx(). */ - tick0 = ticker_read(decay_ticker); - p = rallocx(p, sz, MALLOCX_TCACHE_NONE); - assert_ptr_not_null(p, "Unexpected rallocx() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during rallocx() (sz=%zu)", - sz); - /* xallocx(). */ - tick0 = ticker_read(decay_ticker); - xallocx(p, sz, 0, MALLOCX_TCACHE_NONE); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during xallocx() (sz=%zu)", - sz); - /* dallocx(). */ - tick0 = ticker_read(decay_ticker); - dallocx(p, MALLOCX_TCACHE_NONE); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during dallocx() (sz=%zu)", - sz); - /* sdallocx(). */ - p = mallocx(sz, MALLOCX_TCACHE_NONE); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tick0 = ticker_read(decay_ticker); - sdallocx(p, sz, MALLOCX_TCACHE_NONE); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during sdallocx() " - "(sz=%zu)", sz); - } - } - - /* - * Test tcache fill/flush interactions for large and small size classes, - * using an explicit tcache. - */ - if (config_tcache) { - unsigned tcache_ind, i; - size_t tcache_sizes[2]; - tcache_sizes[0] = large0; - tcache_sizes[1] = 1; - - sz = sizeof(unsigned); - assert_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz, - NULL, 0), 0, "Unexpected mallctl failure"); - - for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) { - sz = tcache_sizes[i]; - - /* tcache fill. */ - tick0 = ticker_read(decay_ticker); - p = mallocx(sz, MALLOCX_TCACHE(tcache_ind)); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during tcache fill " - "(sz=%zu)", sz); - /* tcache flush. */ - dallocx(p, MALLOCX_TCACHE(tcache_ind)); - tick0 = ticker_read(decay_ticker); - assert_d_eq(mallctl("tcache.flush", NULL, NULL, - (void *)&tcache_ind, sizeof(unsigned)), 0, - "Unexpected mallctl failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during tcache flush " - "(sz=%zu)", sz); - } - } -} -TEST_END - -TEST_BEGIN(test_decay_ticker) -{ -#define NPS 1024 - int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); - void *ps[NPS]; - uint64_t epoch; - uint64_t npurge0 = 0; - uint64_t npurge1 = 0; - size_t sz, large; - unsigned i, nupdates0; - nstime_t time, decay_time, deadline; - - test_skip_if(opt_purge != purge_mode_decay); - - /* - * Allocate a bunch of large objects, pause the clock, deallocate the - * objects, restore the clock, then [md]allocx() in a tight loop to - * verify the ticker triggers purging. - */ - - if (config_tcache) { - size_t tcache_max; - - sz = sizeof(size_t); - assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, - &sz, NULL, 0), 0, "Unexpected mallctl failure"); - large = nallocx(tcache_max + 1, flags); - } else { - sz = sizeof(size_t); - assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large, &sz, - NULL, 0), 0, "Unexpected mallctl failure"); - } - - assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, - "Unexpected mallctl failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, - sizeof(uint64_t)), 0, "Unexpected mallctl failure"); - sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz, - NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result"); - - for (i = 0; i < NPS; i++) { - ps[i] = mallocx(large, flags); - assert_ptr_not_null(ps[i], "Unexpected mallocx() failure"); - } - - nupdates_mock = 0; - nstime_init(&time_mock, 0); - nstime_update(&time_mock); - monotonic_mock = true; - - nstime_monotonic_orig = nstime_monotonic; - nstime_update_orig = nstime_update; - nstime_monotonic = nstime_monotonic_mock; - nstime_update = nstime_update_mock; - - for (i = 0; i < NPS; i++) { - dallocx(ps[i], flags); - nupdates0 = nupdates_mock; - assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, - "Unexpected arena.0.decay failure"); - assert_u_gt(nupdates_mock, nupdates0, - "Expected nstime_update() to be called"); - } - - nstime_monotonic = nstime_monotonic_orig; - nstime_update = nstime_update_orig; - - nstime_init(&time, 0); - nstime_update(&time); - nstime_init2(&decay_time, opt_decay_time, 0); - nstime_copy(&deadline, &time); - nstime_add(&deadline, &decay_time); - do { - for (i = 0; i < DECAY_NTICKS_PER_UPDATE / 2; i++) { - void *p = mallocx(1, flags); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - dallocx(p, flags); - } - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, - sizeof(uint64_t)), 0, "Unexpected mallctl failure"); - sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1, - &sz, NULL, 0), config_stats ? 0 : ENOENT, - "Unexpected mallctl result"); - - nstime_update(&time); - } while (nstime_compare(&time, &deadline) <= 0 && npurge1 == npurge0); - - if (config_stats) - assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); -#undef NPS -} -TEST_END - -TEST_BEGIN(test_decay_nonmonotonic) -{ -#define NPS (SMOOTHSTEP_NSTEPS + 1) - int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); - void *ps[NPS]; - uint64_t epoch; - uint64_t npurge0 = 0; - uint64_t npurge1 = 0; - size_t sz, large0; - unsigned i, nupdates0; - - test_skip_if(opt_purge != purge_mode_decay); - - sz = sizeof(size_t); - assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large0, &sz, NULL, - 0), 0, "Unexpected mallctl failure"); - - assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, - "Unexpected mallctl failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, - sizeof(uint64_t)), 0, "Unexpected mallctl failure"); - sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz, - NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result"); - - nupdates_mock = 0; - nstime_init(&time_mock, 0); - nstime_update(&time_mock); - monotonic_mock = false; - - nstime_monotonic_orig = nstime_monotonic; - nstime_update_orig = nstime_update; - nstime_monotonic = nstime_monotonic_mock; - nstime_update = nstime_update_mock; - - for (i = 0; i < NPS; i++) { - ps[i] = mallocx(large0, flags); - assert_ptr_not_null(ps[i], "Unexpected mallocx() failure"); - } - - for (i = 0; i < NPS; i++) { - dallocx(ps[i], flags); - nupdates0 = nupdates_mock; - assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, - "Unexpected arena.0.decay failure"); - assert_u_gt(nupdates_mock, nupdates0, - "Expected nstime_update() to be called"); - } - - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, - sizeof(uint64_t)), 0, "Unexpected mallctl failure"); - sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1, &sz, - NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result"); - - if (config_stats) - assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred"); - - nstime_monotonic = nstime_monotonic_orig; - nstime_update = nstime_update_orig; -#undef NPS -} -TEST_END - -int -main(void) -{ - - return (test( - test_decay_ticks, - test_decay_ticker, - test_decay_nonmonotonic)); -} diff --git a/deps/jemalloc/test/unit/fork.c b/deps/jemalloc/test/unit/fork.c deleted file mode 100644 index c530797c4..000000000 --- a/deps/jemalloc/test/unit/fork.c +++ /dev/null @@ -1,64 +0,0 @@ -#include "test/jemalloc_test.h" - -#ifndef _WIN32 -#include -#endif - -TEST_BEGIN(test_fork) -{ -#ifndef _WIN32 - void *p; - pid_t pid; - - p = malloc(1); - assert_ptr_not_null(p, "Unexpected malloc() failure"); - - pid = fork(); - - free(p); - - p = malloc(64); - assert_ptr_not_null(p, "Unexpected malloc() failure"); - free(p); - - if (pid == -1) { - /* Error. */ - test_fail("Unexpected fork() failure"); - } else if (pid == 0) { - /* Child. */ - _exit(0); - } else { - int status; - - /* Parent. */ - while (true) { - if (waitpid(pid, &status, 0) == -1) - test_fail("Unexpected waitpid() failure"); - if (WIFSIGNALED(status)) { - test_fail("Unexpected child termination due to " - "signal %d", WTERMSIG(status)); - break; - } - if (WIFEXITED(status)) { - if (WEXITSTATUS(status) != 0) { - test_fail( - "Unexpected child exit value %d", - WEXITSTATUS(status)); - } - break; - } - } - } -#else - test_skip("fork(2) is irrelevant to Windows"); -#endif -} -TEST_END - -int -main(void) -{ - - return (test( - test_fork)); -} diff --git a/deps/jemalloc/test/unit/hash.c b/deps/jemalloc/test/unit/hash.c index 010c9d76f..77a8cede9 100644 --- a/deps/jemalloc/test/unit/hash.c +++ b/deps/jemalloc/test/unit/hash.c @@ -35,7 +35,7 @@ typedef enum { hash_variant_x64_128 } hash_variant_t; -static int +static size_t hash_variant_bits(hash_variant_t variant) { @@ -59,20 +59,19 @@ hash_variant_string(hash_variant_t variant) } } -#define KEY_SIZE 256 static void -hash_variant_verify_key(hash_variant_t variant, uint8_t *key) +hash_variant_verify(hash_variant_t variant) { - const int hashbytes = hash_variant_bits(variant) / 8; - const int hashes_size = hashbytes * 256; - VARIABLE_ARRAY(uint8_t, hashes, hashes_size); + const size_t hashbytes = hash_variant_bits(variant) / 8; + uint8_t key[256]; + VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256); VARIABLE_ARRAY(uint8_t, final, hashbytes); unsigned i; uint32_t computed, expected; - memset(key, 0, KEY_SIZE); - memset(hashes, 0, hashes_size); - memset(final, 0, hashbytes); + memset(key, 0, sizeof(key)); + memset(hashes, 0, sizeof(hashes)); + memset(final, 0, sizeof(final)); /* * Hash keys of the form {0}, {0,1}, {0,1,2}, ..., {0,1,...,255} as the @@ -103,17 +102,17 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) /* Hash the result array. */ switch (variant) { case hash_variant_x86_32: { - uint32_t out = hash_x86_32(hashes, hashes_size, 0); + uint32_t out = hash_x86_32(hashes, hashbytes*256, 0); memcpy(final, &out, sizeof(out)); break; } case hash_variant_x86_128: { uint64_t out[2]; - hash_x86_128(hashes, hashes_size, 0, out); + hash_x86_128(hashes, hashbytes*256, 0, out); memcpy(final, out, sizeof(out)); break; } case hash_variant_x64_128: { uint64_t out[2]; - hash_x64_128(hashes, hashes_size, 0, out); + hash_x64_128(hashes, hashbytes*256, 0, out); memcpy(final, out, sizeof(out)); break; } default: not_reached(); @@ -140,19 +139,6 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) hash_variant_string(variant), expected, computed); } -static void -hash_variant_verify(hash_variant_t variant) -{ -#define MAX_ALIGN 16 - uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)]; - unsigned i; - - for (i = 0; i < MAX_ALIGN; i++) - hash_variant_verify_key(variant, &key[i]); -#undef MAX_ALIGN -} -#undef KEY_SIZE - TEST_BEGIN(test_hash_x86_32) { diff --git a/deps/jemalloc/test/unit/junk.c b/deps/jemalloc/test/unit/junk.c index 460bd524d..b23dd1e95 100644 --- a/deps/jemalloc/test/unit/junk.c +++ b/deps/jemalloc/test/unit/junk.c @@ -29,7 +29,7 @@ arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info) arena_dalloc_junk_small_orig(ptr, bin_info); for (i = 0; i < bin_info->reg_size; i++) { - assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK, + assert_c_eq(((char *)ptr)[i], 0x5a, "Missing junk fill for byte %zu/%zu of deallocated region", i, bin_info->reg_size); } @@ -44,7 +44,7 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize) arena_dalloc_junk_large_orig(ptr, usize); for (i = 0; i < usize; i++) { - assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK, + assert_c_eq(((char *)ptr)[i], 0x5a, "Missing junk fill for byte %zu/%zu of deallocated region", i, usize); } @@ -69,7 +69,7 @@ huge_dalloc_junk_intercept(void *ptr, size_t usize) static void test_junk(size_t sz_min, size_t sz_max) { - uint8_t *s; + char *s; size_t sz_prev, sz, i; if (opt_junk_free) { @@ -82,23 +82,23 @@ test_junk(size_t sz_min, size_t sz_max) } sz_prev = 0; - s = (uint8_t *)mallocx(sz_min, 0); + s = (char *)mallocx(sz_min, 0); assert_ptr_not_null((void *)s, "Unexpected mallocx() failure"); for (sz = sallocx(s, 0); sz <= sz_max; sz_prev = sz, sz = sallocx(s, 0)) { if (sz_prev > 0) { - assert_u_eq(s[0], 'a', + assert_c_eq(s[0], 'a', "Previously allocated byte %zu/%zu is corrupted", ZU(0), sz_prev); - assert_u_eq(s[sz_prev-1], 'a', + assert_c_eq(s[sz_prev-1], 'a', "Previously allocated byte %zu/%zu is corrupted", sz_prev-1, sz_prev); } for (i = sz_prev; i < sz; i++) { if (opt_junk_alloc) { - assert_u_eq(s[i], JEMALLOC_ALLOC_JUNK, + assert_c_eq(s[i], 0xa5, "Newly allocated byte %zu/%zu isn't " "junk-filled", i, sz); } @@ -107,7 +107,7 @@ test_junk(size_t sz_min, size_t sz_max) if (xallocx(s, sz+1, 0, 0) == sz) { watch_junking(s); - s = (uint8_t *)rallocx(s, sz+1, 0); + s = (char *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); assert_true(!opt_junk_free || saw_junking, @@ -244,6 +244,7 @@ int main(void) { + assert(!config_fill || opt_junk_alloc || opt_junk_free); return (test( test_junk_small, test_junk_large, diff --git a/deps/jemalloc/test/unit/junk_alloc.c b/deps/jemalloc/test/unit/junk_alloc.c index a5895b5c0..8db3331d2 100644 --- a/deps/jemalloc/test/unit/junk_alloc.c +++ b/deps/jemalloc/test/unit/junk_alloc.c @@ -1,3 +1,3 @@ -#define JEMALLOC_TEST_JUNK_OPT "junk:alloc" +#define JEMALLOC_TEST_JUNK_OPT "junk:alloc" #include "junk.c" #undef JEMALLOC_TEST_JUNK_OPT diff --git a/deps/jemalloc/test/unit/junk_free.c b/deps/jemalloc/test/unit/junk_free.c index bb5183c90..482a61d07 100644 --- a/deps/jemalloc/test/unit/junk_free.c +++ b/deps/jemalloc/test/unit/junk_free.c @@ -1,3 +1,3 @@ -#define JEMALLOC_TEST_JUNK_OPT "junk:free" +#define JEMALLOC_TEST_JUNK_OPT "junk:free" #include "junk.c" #undef JEMALLOC_TEST_JUNK_OPT diff --git a/deps/jemalloc/test/unit/mallctl.c b/deps/jemalloc/test/unit/mallctl.c old mode 100755 new mode 100644 index 2353c92c1..31e354ca7 --- a/deps/jemalloc/test/unit/mallctl.c +++ b/deps/jemalloc/test/unit/mallctl.c @@ -12,18 +12,16 @@ TEST_BEGIN(test_mallctl_errors) EPERM, "mallctl() should return EPERM on attempt to write " "read-only value"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, - sizeof(epoch)-1), EINVAL, - "mallctl() should return EINVAL for input size mismatch"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, - sizeof(epoch)+1), EINVAL, - "mallctl() should return EINVAL for input size mismatch"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)-1), + EINVAL, "mallctl() should return EINVAL for input size mismatch"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)+1), + EINVAL, "mallctl() should return EINVAL for input size mismatch"); sz = sizeof(epoch)-1; - assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL, + assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL, "mallctl() should return EINVAL for output size mismatch"); sz = sizeof(epoch)+1; - assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL, + assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL, "mallctl() should return EINVAL for output size mismatch"); } TEST_END @@ -58,20 +56,18 @@ TEST_BEGIN(test_mallctlbymib_errors) assert_d_eq(mallctlnametomib("epoch", mib, &miblen), 0, "Unexpected mallctlnametomib() failure"); - assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch, + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch, sizeof(epoch)-1), EINVAL, "mallctlbymib() should return EINVAL for input size mismatch"); - assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch, + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch, sizeof(epoch)+1), EINVAL, "mallctlbymib() should return EINVAL for input size mismatch"); sz = sizeof(epoch)-1; - assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0), - EINVAL, + assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL, "mallctlbymib() should return EINVAL for output size mismatch"); sz = sizeof(epoch)+1; - assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0), - EINVAL, + assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL, "mallctlbymib() should return EINVAL for output size mismatch"); } TEST_END @@ -87,19 +83,18 @@ TEST_BEGIN(test_mallctl_read_write) assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); /* Read. */ - assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, NULL, 0), 0, + assert_d_eq(mallctl("epoch", &old_epoch, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); /* Write. */ - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch, - sizeof(new_epoch)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &new_epoch, sizeof(new_epoch)), + 0, "Unexpected mallctl() failure"); assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); /* Read+write. */ - assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, - (void *)&new_epoch, sizeof(new_epoch)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", &old_epoch, &sz, &new_epoch, + sizeof(new_epoch)), 0, "Unexpected mallctl() failure"); assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); } TEST_END @@ -122,30 +117,29 @@ TEST_END TEST_BEGIN(test_mallctl_config) { -#define TEST_MALLCTL_CONFIG(config, t) do { \ - t oldval; \ +#define TEST_MALLCTL_CONFIG(config) do { \ + bool oldval; \ size_t sz = sizeof(oldval); \ - assert_d_eq(mallctl("config."#config, (void *)&oldval, &sz, \ - NULL, 0), 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("config."#config, &oldval, &sz, NULL, 0), \ + 0, "Unexpected mallctl() failure"); \ assert_b_eq(oldval, config_##config, "Incorrect config value"); \ assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \ } while (0) - TEST_MALLCTL_CONFIG(cache_oblivious, bool); - TEST_MALLCTL_CONFIG(debug, bool); - TEST_MALLCTL_CONFIG(fill, bool); - TEST_MALLCTL_CONFIG(lazy_lock, bool); - TEST_MALLCTL_CONFIG(malloc_conf, const char *); - TEST_MALLCTL_CONFIG(munmap, bool); - TEST_MALLCTL_CONFIG(prof, bool); - TEST_MALLCTL_CONFIG(prof_libgcc, bool); - TEST_MALLCTL_CONFIG(prof_libunwind, bool); - TEST_MALLCTL_CONFIG(stats, bool); - TEST_MALLCTL_CONFIG(tcache, bool); - TEST_MALLCTL_CONFIG(tls, bool); - TEST_MALLCTL_CONFIG(utrace, bool); - TEST_MALLCTL_CONFIG(valgrind, bool); - TEST_MALLCTL_CONFIG(xmalloc, bool); + TEST_MALLCTL_CONFIG(cache_oblivious); + TEST_MALLCTL_CONFIG(debug); + TEST_MALLCTL_CONFIG(fill); + TEST_MALLCTL_CONFIG(lazy_lock); + TEST_MALLCTL_CONFIG(munmap); + TEST_MALLCTL_CONFIG(prof); + TEST_MALLCTL_CONFIG(prof_libgcc); + TEST_MALLCTL_CONFIG(prof_libunwind); + TEST_MALLCTL_CONFIG(stats); + TEST_MALLCTL_CONFIG(tcache); + TEST_MALLCTL_CONFIG(tls); + TEST_MALLCTL_CONFIG(utrace); + TEST_MALLCTL_CONFIG(valgrind); + TEST_MALLCTL_CONFIG(xmalloc); #undef TEST_MALLCTL_CONFIG } @@ -159,8 +153,7 @@ TEST_BEGIN(test_mallctl_opt) t oldval; \ size_t sz = sizeof(oldval); \ int expected = config_##config ? 0 : ENOENT; \ - int result = mallctl("opt."#opt, (void *)&oldval, &sz, NULL, \ - 0); \ + int result = mallctl("opt."#opt, &oldval, &sz, NULL, 0); \ assert_d_eq(result, expected, \ "Unexpected mallctl() result for opt."#opt); \ assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \ @@ -169,10 +162,8 @@ TEST_BEGIN(test_mallctl_opt) TEST_MALLCTL_OPT(bool, abort, always); TEST_MALLCTL_OPT(size_t, lg_chunk, always); TEST_MALLCTL_OPT(const char *, dss, always); - TEST_MALLCTL_OPT(unsigned, narenas, always); - TEST_MALLCTL_OPT(const char *, purge, always); + TEST_MALLCTL_OPT(size_t, narenas, always); TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always); - TEST_MALLCTL_OPT(ssize_t, decay_time, always); TEST_MALLCTL_OPT(bool, stats_print, always); TEST_MALLCTL_OPT(const char *, junk, fill); TEST_MALLCTL_OPT(size_t, quarantine, fill); @@ -203,7 +194,7 @@ TEST_BEGIN(test_manpage_example) size_t len, miblen; len = sizeof(nbins); - assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0, + assert_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0, "Unexpected mallctl() failure"); miblen = 4; @@ -214,8 +205,8 @@ TEST_BEGIN(test_manpage_example) mib[2] = i; len = sizeof(bin_size); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len, - NULL, 0), 0, "Unexpected mallctlbymib() failure"); + assert_d_eq(mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0), + 0, "Unexpected mallctlbymib() failure"); /* Do something with bin_size... */ } } @@ -264,25 +255,25 @@ TEST_BEGIN(test_tcache) /* Create tcaches. */ for (i = 0; i < NTCACHES; i++) { sz = sizeof(unsigned); - assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL, - 0), 0, "Unexpected mallctl() failure, i=%u", i); + assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0, + "Unexpected mallctl() failure, i=%u", i); } /* Exercise tcache ID recycling. */ for (i = 0; i < NTCACHES; i++) { - assert_d_eq(mallctl("tcache.destroy", NULL, NULL, - (void *)&tis[i], sizeof(unsigned)), 0, - "Unexpected mallctl() failure, i=%u", i); + assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i], + sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", + i); } for (i = 0; i < NTCACHES; i++) { sz = sizeof(unsigned); - assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL, - 0), 0, "Unexpected mallctl() failure, i=%u", i); + assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0, + "Unexpected mallctl() failure, i=%u", i); } /* Flush empty tcaches. */ for (i = 0; i < NTCACHES; i++) { - assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i], + assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i], sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", i); } @@ -327,16 +318,16 @@ TEST_BEGIN(test_tcache) /* Flush some non-empty tcaches. */ for (i = 0; i < NTCACHES/2; i++) { - assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i], + assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i], sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", i); } /* Destroy tcaches. */ for (i = 0; i < NTCACHES; i++) { - assert_d_eq(mallctl("tcache.destroy", NULL, NULL, - (void *)&tis[i], sizeof(unsigned)), 0, - "Unexpected mallctl() failure, i=%u", i); + assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i], + sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", + i); } } TEST_END @@ -346,17 +337,15 @@ TEST_BEGIN(test_thread_arena) unsigned arena_old, arena_new, narenas; size_t sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect"); arena_new = narenas - 1; - assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz, - (void *)&arena_new, sizeof(unsigned)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new, + sizeof(unsigned)), 0, "Unexpected mallctl() failure"); arena_new = 0; - assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz, - (void *)&arena_new, sizeof(unsigned)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new, + sizeof(unsigned)), 0, "Unexpected mallctl() failure"); } TEST_END @@ -365,20 +354,17 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult) ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult; size_t sz = sizeof(ssize_t); - test_skip_if(opt_purge != purge_mode_ratio); - - assert_d_eq(mallctl("arena.0.lg_dirty_mult", - (void *)&orig_lg_dirty_mult, &sz, NULL, 0), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arena.0.lg_dirty_mult", &orig_lg_dirty_mult, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); lg_dirty_mult = -2; assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL, - (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT, + &lg_dirty_mult, sizeof(ssize_t)), EFAULT, "Unexpected mallctl() success"); lg_dirty_mult = (sizeof(size_t) << 3); assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL, - (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT, + &lg_dirty_mult, sizeof(ssize_t)), EFAULT, "Unexpected mallctl() success"); for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1; @@ -386,48 +372,15 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult) = lg_dirty_mult, lg_dirty_mult++) { ssize_t old_lg_dirty_mult; - assert_d_eq(mallctl("arena.0.lg_dirty_mult", - (void *)&old_lg_dirty_mult, &sz, (void *)&lg_dirty_mult, - sizeof(ssize_t)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arena.0.lg_dirty_mult", &old_lg_dirty_mult, + &sz, &lg_dirty_mult, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult, "Unexpected old arena.0.lg_dirty_mult"); } } TEST_END -TEST_BEGIN(test_arena_i_decay_time) -{ - ssize_t decay_time, orig_decay_time, prev_decay_time; - size_t sz = sizeof(ssize_t); - - test_skip_if(opt_purge != purge_mode_decay); - - assert_d_eq(mallctl("arena.0.decay_time", (void *)&orig_decay_time, &sz, - NULL, 0), 0, "Unexpected mallctl() failure"); - - decay_time = -2; - assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL, - (void *)&decay_time, sizeof(ssize_t)), EFAULT, - "Unexpected mallctl() success"); - - decay_time = 0x7fffffff; - assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL, - (void *)&decay_time, sizeof(ssize_t)), 0, - "Unexpected mallctl() failure"); - - for (prev_decay_time = decay_time, decay_time = -1; - decay_time < 20; prev_decay_time = decay_time, decay_time++) { - ssize_t old_decay_time; - - assert_d_eq(mallctl("arena.0.decay_time", (void *)&old_decay_time, - &sz, (void *)&decay_time, sizeof(ssize_t)), 0, - "Unexpected mallctl() failure"); - assert_zd_eq(old_decay_time, prev_decay_time, - "Unexpected old arena.0.decay_time"); - } -} -TEST_END - TEST_BEGIN(test_arena_i_purge) { unsigned narenas; @@ -438,29 +391,9 @@ TEST_BEGIN(test_arena_i_purge) assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, "Unexpected mallctl() failure"); - assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0), - 0, "Unexpected mallctl() failure"); - assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0, - "Unexpected mallctlnametomib() failure"); - mib[1] = narenas; - assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0, - "Unexpected mallctlbymib() failure"); -} -TEST_END - -TEST_BEGIN(test_arena_i_decay) -{ - unsigned narenas; - size_t sz = sizeof(unsigned); - size_t mib[3]; - size_t miblen = 3; - - assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, + assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); - - assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0), - 0, "Unexpected mallctl() failure"); - assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0, + assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0, "Unexpected mallctlnametomib() failure"); mib[1] = narenas; assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0, @@ -480,35 +413,31 @@ TEST_BEGIN(test_arena_i_dss) "Unexpected mallctlnametomib() error"); dss_prec_new = "disabled"; - assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, - (void *)&dss_prec_new, sizeof(dss_prec_new)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new, + sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); assert_str_ne(dss_prec_old, "primary", "Unexpected default for dss precedence"); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz, - (void *)&dss_prec_old, sizeof(dss_prec_old)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old, + sizeof(dss_prec_old)), 0, "Unexpected mallctl() failure"); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL, - 0), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); assert_str_ne(dss_prec_old, "primary", "Unexpected value for dss precedence"); mib[1] = narenas_total_get(); dss_prec_new = "disabled"; - assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, - (void *)&dss_prec_new, sizeof(dss_prec_new)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new, + sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); assert_str_ne(dss_prec_old, "primary", "Unexpected default for dss precedence"); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz, - (void *)&dss_prec_old, sizeof(dss_prec_new)), 0, - "Unexpected mallctl() failure"); + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old, + sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL, - 0), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); assert_str_ne(dss_prec_old, "primary", "Unexpected value for dss precedence"); } @@ -519,14 +448,14 @@ TEST_BEGIN(test_arenas_initialized) unsigned narenas; size_t sz = sizeof(narenas); - assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); { VARIABLE_ARRAY(bool, initialized, narenas); sz = narenas * sizeof(bool); - assert_d_eq(mallctl("arenas.initialized", (void *)initialized, - &sz, NULL, 0), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.initialized", initialized, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); } } TEST_END @@ -536,19 +465,17 @@ TEST_BEGIN(test_arenas_lg_dirty_mult) ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult; size_t sz = sizeof(ssize_t); - test_skip_if(opt_purge != purge_mode_ratio); - - assert_d_eq(mallctl("arenas.lg_dirty_mult", (void *)&orig_lg_dirty_mult, - &sz, NULL, 0), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.lg_dirty_mult", &orig_lg_dirty_mult, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); lg_dirty_mult = -2; assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL, - (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT, + &lg_dirty_mult, sizeof(ssize_t)), EFAULT, "Unexpected mallctl() success"); lg_dirty_mult = (sizeof(size_t) << 3); assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL, - (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT, + &lg_dirty_mult, sizeof(ssize_t)), EFAULT, "Unexpected mallctl() success"); for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1; @@ -556,56 +483,23 @@ TEST_BEGIN(test_arenas_lg_dirty_mult) lg_dirty_mult, lg_dirty_mult++) { ssize_t old_lg_dirty_mult; - assert_d_eq(mallctl("arenas.lg_dirty_mult", - (void *)&old_lg_dirty_mult, &sz, (void *)&lg_dirty_mult, - sizeof(ssize_t)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.lg_dirty_mult", &old_lg_dirty_mult, + &sz, &lg_dirty_mult, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult, "Unexpected old arenas.lg_dirty_mult"); } } TEST_END -TEST_BEGIN(test_arenas_decay_time) -{ - ssize_t decay_time, orig_decay_time, prev_decay_time; - size_t sz = sizeof(ssize_t); - - test_skip_if(opt_purge != purge_mode_decay); - - assert_d_eq(mallctl("arenas.decay_time", (void *)&orig_decay_time, &sz, - NULL, 0), 0, "Unexpected mallctl() failure"); - - decay_time = -2; - assert_d_eq(mallctl("arenas.decay_time", NULL, NULL, - (void *)&decay_time, sizeof(ssize_t)), EFAULT, - "Unexpected mallctl() success"); - - decay_time = 0x7fffffff; - assert_d_eq(mallctl("arenas.decay_time", NULL, NULL, - (void *)&decay_time, sizeof(ssize_t)), 0, - "Expected mallctl() failure"); - - for (prev_decay_time = decay_time, decay_time = -1; - decay_time < 20; prev_decay_time = decay_time, decay_time++) { - ssize_t old_decay_time; - - assert_d_eq(mallctl("arenas.decay_time", - (void *)&old_decay_time, &sz, (void *)&decay_time, - sizeof(ssize_t)), 0, "Unexpected mallctl() failure"); - assert_zd_eq(old_decay_time, prev_decay_time, - "Unexpected old arenas.decay_time"); - } -} -TEST_END - TEST_BEGIN(test_arenas_constants) { #define TEST_ARENAS_CONSTANT(t, name, expected) do { \ t name; \ size_t sz = sizeof(t); \ - assert_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL, \ - 0), 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("arenas."#name, &name, &sz, NULL, 0), 0, \ + "Unexpected mallctl() failure"); \ assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) @@ -625,8 +519,8 @@ TEST_BEGIN(test_arenas_bin_constants) #define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do { \ t name; \ size_t sz = sizeof(t); \ - assert_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz, \ - NULL, 0), 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("arenas.bin.0."#name, &name, &sz, NULL, 0), \ + 0, "Unexpected mallctl() failure"); \ assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) @@ -644,8 +538,8 @@ TEST_BEGIN(test_arenas_lrun_constants) #define TEST_ARENAS_LRUN_CONSTANT(t, name, expected) do { \ t name; \ size_t sz = sizeof(t); \ - assert_d_eq(mallctl("arenas.lrun.0."#name, (void *)&name, &sz, \ - NULL, 0), 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("arenas.lrun.0."#name, &name, &sz, NULL, \ + 0), 0, "Unexpected mallctl() failure"); \ assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) @@ -661,8 +555,8 @@ TEST_BEGIN(test_arenas_hchunk_constants) #define TEST_ARENAS_HCHUNK_CONSTANT(t, name, expected) do { \ t name; \ size_t sz = sizeof(t); \ - assert_d_eq(mallctl("arenas.hchunk.0."#name, (void *)&name, \ - &sz, NULL, 0), 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("arenas.hchunk.0."#name, &name, &sz, NULL, \ + 0), 0, "Unexpected mallctl() failure"); \ assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) @@ -677,12 +571,12 @@ TEST_BEGIN(test_arenas_extend) unsigned narenas_before, arena, narenas_after; size_t sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz, - NULL, 0), 0, "Unexpected mallctl() failure"); - assert_d_eq(mallctl("arenas.extend", (void *)&arena, &sz, NULL, 0), 0, + assert_d_eq(mallctl("arenas.narenas", &narenas_before, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.extend", &arena, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.narenas", &narenas_after, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); - assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL, - 0), 0, "Unexpected mallctl() failure"); assert_u_eq(narenas_before+1, narenas_after, "Unexpected number of arenas before versus after extension"); @@ -696,14 +590,12 @@ TEST_BEGIN(test_stats_arenas) #define TEST_STATS_ARENAS(t, name) do { \ t name; \ size_t sz = sizeof(t); \ - assert_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz, \ - NULL, 0), 0, "Unexpected mallctl() failure"); \ + assert_d_eq(mallctl("stats.arenas.0."#name, &name, &sz, NULL, \ + 0), 0, "Unexpected mallctl() failure"); \ } while (0) - TEST_STATS_ARENAS(unsigned, nthreads); TEST_STATS_ARENAS(const char *, dss); - TEST_STATS_ARENAS(ssize_t, lg_dirty_mult); - TEST_STATS_ARENAS(ssize_t, decay_time); + TEST_STATS_ARENAS(unsigned, nthreads); TEST_STATS_ARENAS(size_t, pactive); TEST_STATS_ARENAS(size_t, pdirty); @@ -728,13 +620,10 @@ main(void) test_tcache, test_thread_arena, test_arena_i_lg_dirty_mult, - test_arena_i_decay_time, test_arena_i_purge, - test_arena_i_decay, test_arena_i_dss, test_arenas_initialized, test_arenas_lg_dirty_mult, - test_arenas_decay_time, test_arenas_constants, test_arenas_bin_constants, test_arenas_lrun_constants, diff --git a/deps/jemalloc/test/unit/math.c b/deps/jemalloc/test/unit/math.c index adb72bed9..ebec77a62 100644 --- a/deps/jemalloc/test/unit/math.c +++ b/deps/jemalloc/test/unit/math.c @@ -5,10 +5,6 @@ #include -#ifdef __PGI -#undef INFINITY -#endif - #ifndef INFINITY #define INFINITY (DBL_MAX + DBL_MAX) #endif diff --git a/deps/jemalloc/test/unit/nstime.c b/deps/jemalloc/test/unit/nstime.c deleted file mode 100644 index 0368bc26e..000000000 --- a/deps/jemalloc/test/unit/nstime.c +++ /dev/null @@ -1,227 +0,0 @@ -#include "test/jemalloc_test.h" - -#define BILLION UINT64_C(1000000000) - -TEST_BEGIN(test_nstime_init) -{ - nstime_t nst; - - nstime_init(&nst, 42000000043); - assert_u64_eq(nstime_ns(&nst), 42000000043, "ns incorrectly read"); - assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read"); - assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read"); -} -TEST_END - -TEST_BEGIN(test_nstime_init2) -{ - nstime_t nst; - - nstime_init2(&nst, 42, 43); - assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read"); - assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read"); -} -TEST_END - -TEST_BEGIN(test_nstime_copy) -{ - nstime_t nsta, nstb; - - nstime_init2(&nsta, 42, 43); - nstime_init(&nstb, 0); - nstime_copy(&nstb, &nsta); - assert_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied"); - assert_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied"); -} -TEST_END - -TEST_BEGIN(test_nstime_compare) -{ - nstime_t nsta, nstb; - - nstime_init2(&nsta, 42, 43); - nstime_copy(&nstb, &nsta); - assert_d_eq(nstime_compare(&nsta, &nstb), 0, "Times should be equal"); - assert_d_eq(nstime_compare(&nstb, &nsta), 0, "Times should be equal"); - - nstime_init2(&nstb, 42, 42); - assert_d_eq(nstime_compare(&nsta, &nstb), 1, - "nsta should be greater than nstb"); - assert_d_eq(nstime_compare(&nstb, &nsta), -1, - "nstb should be less than nsta"); - - nstime_init2(&nstb, 42, 44); - assert_d_eq(nstime_compare(&nsta, &nstb), -1, - "nsta should be less than nstb"); - assert_d_eq(nstime_compare(&nstb, &nsta), 1, - "nstb should be greater than nsta"); - - nstime_init2(&nstb, 41, BILLION - 1); - assert_d_eq(nstime_compare(&nsta, &nstb), 1, - "nsta should be greater than nstb"); - assert_d_eq(nstime_compare(&nstb, &nsta), -1, - "nstb should be less than nsta"); - - nstime_init2(&nstb, 43, 0); - assert_d_eq(nstime_compare(&nsta, &nstb), -1, - "nsta should be less than nstb"); - assert_d_eq(nstime_compare(&nstb, &nsta), 1, - "nstb should be greater than nsta"); -} -TEST_END - -TEST_BEGIN(test_nstime_add) -{ - nstime_t nsta, nstb; - - nstime_init2(&nsta, 42, 43); - nstime_copy(&nstb, &nsta); - nstime_add(&nsta, &nstb); - nstime_init2(&nstb, 84, 86); - assert_d_eq(nstime_compare(&nsta, &nstb), 0, - "Incorrect addition result"); - - nstime_init2(&nsta, 42, BILLION - 1); - nstime_copy(&nstb, &nsta); - nstime_add(&nsta, &nstb); - nstime_init2(&nstb, 85, BILLION - 2); - assert_d_eq(nstime_compare(&nsta, &nstb), 0, - "Incorrect addition result"); -} -TEST_END - -TEST_BEGIN(test_nstime_subtract) -{ - nstime_t nsta, nstb; - - nstime_init2(&nsta, 42, 43); - nstime_copy(&nstb, &nsta); - nstime_subtract(&nsta, &nstb); - nstime_init(&nstb, 0); - assert_d_eq(nstime_compare(&nsta, &nstb), 0, - "Incorrect subtraction result"); - - nstime_init2(&nsta, 42, 43); - nstime_init2(&nstb, 41, 44); - nstime_subtract(&nsta, &nstb); - nstime_init2(&nstb, 0, BILLION - 1); - assert_d_eq(nstime_compare(&nsta, &nstb), 0, - "Incorrect subtraction result"); -} -TEST_END - -TEST_BEGIN(test_nstime_imultiply) -{ - nstime_t nsta, nstb; - - nstime_init2(&nsta, 42, 43); - nstime_imultiply(&nsta, 10); - nstime_init2(&nstb, 420, 430); - assert_d_eq(nstime_compare(&nsta, &nstb), 0, - "Incorrect multiplication result"); - - nstime_init2(&nsta, 42, 666666666); - nstime_imultiply(&nsta, 3); - nstime_init2(&nstb, 127, 999999998); - assert_d_eq(nstime_compare(&nsta, &nstb), 0, - "Incorrect multiplication result"); -} -TEST_END - -TEST_BEGIN(test_nstime_idivide) -{ - nstime_t nsta, nstb; - - nstime_init2(&nsta, 42, 43); - nstime_copy(&nstb, &nsta); - nstime_imultiply(&nsta, 10); - nstime_idivide(&nsta, 10); - assert_d_eq(nstime_compare(&nsta, &nstb), 0, - "Incorrect division result"); - - nstime_init2(&nsta, 42, 666666666); - nstime_copy(&nstb, &nsta); - nstime_imultiply(&nsta, 3); - nstime_idivide(&nsta, 3); - assert_d_eq(nstime_compare(&nsta, &nstb), 0, - "Incorrect division result"); -} -TEST_END - -TEST_BEGIN(test_nstime_divide) -{ - nstime_t nsta, nstb, nstc; - - nstime_init2(&nsta, 42, 43); - nstime_copy(&nstb, &nsta); - nstime_imultiply(&nsta, 10); - assert_u64_eq(nstime_divide(&nsta, &nstb), 10, - "Incorrect division result"); - - nstime_init2(&nsta, 42, 43); - nstime_copy(&nstb, &nsta); - nstime_imultiply(&nsta, 10); - nstime_init(&nstc, 1); - nstime_add(&nsta, &nstc); - assert_u64_eq(nstime_divide(&nsta, &nstb), 10, - "Incorrect division result"); - - nstime_init2(&nsta, 42, 43); - nstime_copy(&nstb, &nsta); - nstime_imultiply(&nsta, 10); - nstime_init(&nstc, 1); - nstime_subtract(&nsta, &nstc); - assert_u64_eq(nstime_divide(&nsta, &nstb), 9, - "Incorrect division result"); -} -TEST_END - -TEST_BEGIN(test_nstime_monotonic) -{ - - nstime_monotonic(); -} -TEST_END - -TEST_BEGIN(test_nstime_update) -{ - nstime_t nst; - - nstime_init(&nst, 0); - - assert_false(nstime_update(&nst), "Basic time update failed."); - - /* Only Rip Van Winkle sleeps this long. */ - { - nstime_t addend; - nstime_init2(&addend, 631152000, 0); - nstime_add(&nst, &addend); - } - { - nstime_t nst0; - nstime_copy(&nst0, &nst); - assert_true(nstime_update(&nst), - "Update should detect time roll-back."); - assert_d_eq(nstime_compare(&nst, &nst0), 0, - "Time should not have been modified"); - } -} -TEST_END - -int -main(void) -{ - - return (test( - test_nstime_init, - test_nstime_init2, - test_nstime_copy, - test_nstime_compare, - test_nstime_add, - test_nstime_subtract, - test_nstime_imultiply, - test_nstime_idivide, - test_nstime_divide, - test_nstime_monotonic, - test_nstime_update)); -} diff --git a/deps/jemalloc/test/unit/pack.c b/deps/jemalloc/test/unit/pack.c deleted file mode 100644 index 0b6ffcd21..000000000 --- a/deps/jemalloc/test/unit/pack.c +++ /dev/null @@ -1,206 +0,0 @@ -#include "test/jemalloc_test.h" - -const char *malloc_conf = - /* Use smallest possible chunk size. */ - "lg_chunk:0" - /* Immediately purge to minimize fragmentation. */ - ",lg_dirty_mult:-1" - ",decay_time:-1" - ; - -/* - * Size class that is a divisor of the page size, ideally 4+ regions per run. - */ -#if LG_PAGE <= 14 -#define SZ (ZU(1) << (LG_PAGE - 2)) -#else -#define SZ 4096 -#endif - -/* - * Number of chunks to consume at high water mark. Should be at least 2 so that - * if mmap()ed memory grows downward, downward growth of mmap()ed memory is - * tested. - */ -#define NCHUNKS 8 - -static unsigned -binind_compute(void) -{ - size_t sz; - unsigned nbins, i; - - sz = sizeof(nbins); - assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0, - "Unexpected mallctl failure"); - - for (i = 0; i < nbins; i++) { - size_t mib[4]; - size_t miblen = sizeof(mib)/sizeof(size_t); - size_t size; - - assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib, - &miblen), 0, "Unexpected mallctlnametomb failure"); - mib[2] = (size_t)i; - - sz = sizeof(size); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL, - 0), 0, "Unexpected mallctlbymib failure"); - if (size == SZ) - return (i); - } - - test_fail("Unable to compute nregs_per_run"); - return (0); -} - -static size_t -nregs_per_run_compute(void) -{ - uint32_t nregs; - size_t sz; - unsigned binind = binind_compute(); - size_t mib[4]; - size_t miblen = sizeof(mib)/sizeof(size_t); - - assert_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0, - "Unexpected mallctlnametomb failure"); - mib[2] = (size_t)binind; - sz = sizeof(nregs); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL, - 0), 0, "Unexpected mallctlbymib failure"); - return (nregs); -} - -static size_t -npages_per_run_compute(void) -{ - size_t sz; - unsigned binind = binind_compute(); - size_t mib[4]; - size_t miblen = sizeof(mib)/sizeof(size_t); - size_t run_size; - - assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0, - "Unexpected mallctlnametomb failure"); - mib[2] = (size_t)binind; - sz = sizeof(run_size); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&run_size, &sz, NULL, - 0), 0, "Unexpected mallctlbymib failure"); - return (run_size >> LG_PAGE); -} - -static size_t -npages_per_chunk_compute(void) -{ - - return ((chunksize >> LG_PAGE) - map_bias); -} - -static size_t -nruns_per_chunk_compute(void) -{ - - return (npages_per_chunk_compute() / npages_per_run_compute()); -} - -static unsigned -arenas_extend_mallctl(void) -{ - unsigned arena_ind; - size_t sz; - - sz = sizeof(arena_ind); - assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0), - 0, "Error in arenas.extend"); - - return (arena_ind); -} - -static void -arena_reset_mallctl(unsigned arena_ind) -{ - size_t mib[3]; - size_t miblen = sizeof(mib)/sizeof(size_t); - - assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0, - "Unexpected mallctlnametomib() failure"); - mib[1] = (size_t)arena_ind; - assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0, - "Unexpected mallctlbymib() failure"); -} - -TEST_BEGIN(test_pack) -{ - unsigned arena_ind = arenas_extend_mallctl(); - size_t nregs_per_run = nregs_per_run_compute(); - size_t nruns_per_chunk = nruns_per_chunk_compute(); - size_t nruns = nruns_per_chunk * NCHUNKS; - size_t nregs = nregs_per_run * nruns; - VARIABLE_ARRAY(void *, ptrs, nregs); - size_t i, j, offset; - - /* Fill matrix. */ - for (i = offset = 0; i < nruns; i++) { - for (j = 0; j < nregs_per_run; j++) { - void *p = mallocx(SZ, MALLOCX_ARENA(arena_ind) | - MALLOCX_TCACHE_NONE); - assert_ptr_not_null(p, - "Unexpected mallocx(%zu, MALLOCX_ARENA(%u) |" - " MALLOCX_TCACHE_NONE) failure, run=%zu, reg=%zu", - SZ, arena_ind, i, j); - ptrs[(i * nregs_per_run) + j] = p; - } - } - - /* - * Free all but one region of each run, but rotate which region is - * preserved, so that subsequent allocations exercise the within-run - * layout policy. - */ - offset = 0; - for (i = offset = 0; - i < nruns; - i++, offset = (offset + 1) % nregs_per_run) { - for (j = 0; j < nregs_per_run; j++) { - void *p = ptrs[(i * nregs_per_run) + j]; - if (offset == j) - continue; - dallocx(p, MALLOCX_ARENA(arena_ind) | - MALLOCX_TCACHE_NONE); - } - } - - /* - * Logically refill matrix, skipping preserved regions and verifying - * that the matrix is unmodified. - */ - offset = 0; - for (i = offset = 0; - i < nruns; - i++, offset = (offset + 1) % nregs_per_run) { - for (j = 0; j < nregs_per_run; j++) { - void *p; - - if (offset == j) - continue; - p = mallocx(SZ, MALLOCX_ARENA(arena_ind) | - MALLOCX_TCACHE_NONE); - assert_ptr_eq(p, ptrs[(i * nregs_per_run) + j], - "Unexpected refill discrepancy, run=%zu, reg=%zu\n", - i, j); - } - } - - /* Clean up. */ - arena_reset_mallctl(arena_ind); -} -TEST_END - -int -main(void) -{ - - return (test( - test_pack)); -} diff --git a/deps/jemalloc/test/unit/pages.c b/deps/jemalloc/test/unit/pages.c deleted file mode 100644 index d31a35e68..000000000 --- a/deps/jemalloc/test/unit/pages.c +++ /dev/null @@ -1,27 +0,0 @@ -#include "test/jemalloc_test.h" - -TEST_BEGIN(test_pages_huge) -{ - bool commit; - void *pages; - - commit = true; - pages = pages_map(NULL, PAGE, &commit); - assert_ptr_not_null(pages, "Unexpected pages_map() error"); - - assert_false(pages_huge(pages, PAGE), - "Unexpected pages_huge() result"); - assert_false(pages_nohuge(pages, PAGE), - "Unexpected pages_nohuge() result"); - - pages_unmap(pages, PAGE); -} -TEST_END - -int -main(void) -{ - - return (test( - test_pages_huge)); -} diff --git a/deps/jemalloc/test/unit/ph.c b/deps/jemalloc/test/unit/ph.c deleted file mode 100644 index da442f07e..000000000 --- a/deps/jemalloc/test/unit/ph.c +++ /dev/null @@ -1,290 +0,0 @@ -#include "test/jemalloc_test.h" - -typedef struct node_s node_t; - -struct node_s { -#define NODE_MAGIC 0x9823af7e - uint32_t magic; - phn(node_t) link; - uint64_t key; -}; - -static int -node_cmp(const node_t *a, const node_t *b) -{ - int ret; - - ret = (a->key > b->key) - (a->key < b->key); - if (ret == 0) { - /* - * Duplicates are not allowed in the heap, so force an - * arbitrary ordering for non-identical items with equal keys. - */ - ret = (((uintptr_t)a) > ((uintptr_t)b)) - - (((uintptr_t)a) < ((uintptr_t)b)); - } - return (ret); -} - -static int -node_cmp_magic(const node_t *a, const node_t *b) { - - assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic"); - assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic"); - - return (node_cmp(a, b)); -} - -typedef ph(node_t) heap_t; -ph_gen(static, heap_, heap_t, node_t, link, node_cmp_magic); - -static void -node_print(const node_t *node, unsigned depth) -{ - unsigned i; - node_t *leftmost_child, *sibling; - - for (i = 0; i < depth; i++) - malloc_printf("\t"); - malloc_printf("%2"FMTu64"\n", node->key); - - leftmost_child = phn_lchild_get(node_t, link, node); - if (leftmost_child == NULL) - return; - node_print(leftmost_child, depth + 1); - - for (sibling = phn_next_get(node_t, link, leftmost_child); sibling != - NULL; sibling = phn_next_get(node_t, link, sibling)) { - node_print(sibling, depth + 1); - } -} - -static void -heap_print(const heap_t *heap) -{ - node_t *auxelm; - - malloc_printf("vvv heap %p vvv\n", heap); - if (heap->ph_root == NULL) - goto label_return; - - node_print(heap->ph_root, 0); - - for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL; - auxelm = phn_next_get(node_t, link, auxelm)) { - assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t, - link, auxelm)), auxelm, - "auxelm's prev doesn't link to auxelm"); - node_print(auxelm, 0); - } - -label_return: - malloc_printf("^^^ heap %p ^^^\n", heap); -} - -static unsigned -node_validate(const node_t *node, const node_t *parent) -{ - unsigned nnodes = 1; - node_t *leftmost_child, *sibling; - - if (parent != NULL) { - assert_d_ge(node_cmp_magic(node, parent), 0, - "Child is less than parent"); - } - - leftmost_child = phn_lchild_get(node_t, link, node); - if (leftmost_child == NULL) - return (nnodes); - assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child), - (void *)node, "Leftmost child does not link to node"); - nnodes += node_validate(leftmost_child, node); - - for (sibling = phn_next_get(node_t, link, leftmost_child); sibling != - NULL; sibling = phn_next_get(node_t, link, sibling)) { - assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t, - link, sibling)), sibling, - "sibling's prev doesn't link to sibling"); - nnodes += node_validate(sibling, node); - } - return (nnodes); -} - -static unsigned -heap_validate(const heap_t *heap) -{ - unsigned nnodes = 0; - node_t *auxelm; - - if (heap->ph_root == NULL) - goto label_return; - - nnodes += node_validate(heap->ph_root, NULL); - - for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL; - auxelm = phn_next_get(node_t, link, auxelm)) { - assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t, - link, auxelm)), auxelm, - "auxelm's prev doesn't link to auxelm"); - nnodes += node_validate(auxelm, NULL); - } - -label_return: - if (false) - heap_print(heap); - return (nnodes); -} - -TEST_BEGIN(test_ph_empty) -{ - heap_t heap; - - heap_new(&heap); - assert_true(heap_empty(&heap), "Heap should be empty"); - assert_ptr_null(heap_first(&heap), "Unexpected node"); -} -TEST_END - -static void -node_remove(heap_t *heap, node_t *node) -{ - - heap_remove(heap, node); - - node->magic = 0; -} - -static node_t * -node_remove_first(heap_t *heap) -{ - node_t *node = heap_remove_first(heap); - node->magic = 0; - return (node); -} - -TEST_BEGIN(test_ph_random) -{ -#define NNODES 25 -#define NBAGS 250 -#define SEED 42 - sfmt_t *sfmt; - uint64_t bag[NNODES]; - heap_t heap; - node_t nodes[NNODES]; - unsigned i, j, k; - - sfmt = init_gen_rand(SEED); - for (i = 0; i < NBAGS; i++) { - switch (i) { - case 0: - /* Insert in order. */ - for (j = 0; j < NNODES; j++) - bag[j] = j; - break; - case 1: - /* Insert in reverse order. */ - for (j = 0; j < NNODES; j++) - bag[j] = NNODES - j - 1; - break; - default: - for (j = 0; j < NNODES; j++) - bag[j] = gen_rand64_range(sfmt, NNODES); - } - - for (j = 1; j <= NNODES; j++) { - /* Initialize heap and nodes. */ - heap_new(&heap); - assert_u_eq(heap_validate(&heap), 0, - "Incorrect node count"); - for (k = 0; k < j; k++) { - nodes[k].magic = NODE_MAGIC; - nodes[k].key = bag[k]; - } - - /* Insert nodes. */ - for (k = 0; k < j; k++) { - heap_insert(&heap, &nodes[k]); - if (i % 13 == 12) { - /* Trigger merging. */ - assert_ptr_not_null(heap_first(&heap), - "Heap should not be empty"); - } - assert_u_eq(heap_validate(&heap), k + 1, - "Incorrect node count"); - } - - assert_false(heap_empty(&heap), - "Heap should not be empty"); - - /* Remove nodes. */ - switch (i % 4) { - case 0: - for (k = 0; k < j; k++) { - assert_u_eq(heap_validate(&heap), j - k, - "Incorrect node count"); - node_remove(&heap, &nodes[k]); - assert_u_eq(heap_validate(&heap), j - k - - 1, "Incorrect node count"); - } - break; - case 1: - for (k = j; k > 0; k--) { - node_remove(&heap, &nodes[k-1]); - assert_u_eq(heap_validate(&heap), k - 1, - "Incorrect node count"); - } - break; - case 2: { - node_t *prev = NULL; - for (k = 0; k < j; k++) { - node_t *node = node_remove_first(&heap); - assert_u_eq(heap_validate(&heap), j - k - - 1, "Incorrect node count"); - if (prev != NULL) { - assert_d_ge(node_cmp(node, - prev), 0, - "Bad removal order"); - } - prev = node; - } - break; - } case 3: { - node_t *prev = NULL; - for (k = 0; k < j; k++) { - node_t *node = heap_first(&heap); - assert_u_eq(heap_validate(&heap), j - k, - "Incorrect node count"); - if (prev != NULL) { - assert_d_ge(node_cmp(node, - prev), 0, - "Bad removal order"); - } - node_remove(&heap, node); - assert_u_eq(heap_validate(&heap), j - k - - 1, "Incorrect node count"); - prev = node; - } - break; - } default: - not_reached(); - } - - assert_ptr_null(heap_first(&heap), - "Heap should be empty"); - assert_true(heap_empty(&heap), "Heap should be empty"); - } - } - fini_gen_rand(sfmt); -#undef NNODES -#undef SEED -} -TEST_END - -int -main(void) -{ - - return (test( - test_ph_empty, - test_ph_random)); -} diff --git a/deps/jemalloc/test/unit/prng.c b/deps/jemalloc/test/unit/prng.c deleted file mode 100644 index 80c9d733f..000000000 --- a/deps/jemalloc/test/unit/prng.c +++ /dev/null @@ -1,263 +0,0 @@ -#include "test/jemalloc_test.h" - -static void -test_prng_lg_range_u32(bool atomic) -{ - uint32_t sa, sb, ra, rb; - unsigned lg_range; - - sa = 42; - ra = prng_lg_range_u32(&sa, 32, atomic); - sa = 42; - rb = prng_lg_range_u32(&sa, 32, atomic); - assert_u32_eq(ra, rb, - "Repeated generation should produce repeated results"); - - sb = 42; - rb = prng_lg_range_u32(&sb, 32, atomic); - assert_u32_eq(ra, rb, - "Equivalent generation should produce equivalent results"); - - sa = 42; - ra = prng_lg_range_u32(&sa, 32, atomic); - rb = prng_lg_range_u32(&sa, 32, atomic); - assert_u32_ne(ra, rb, - "Full-width results must not immediately repeat"); - - sa = 42; - ra = prng_lg_range_u32(&sa, 32, atomic); - for (lg_range = 31; lg_range > 0; lg_range--) { - sb = 42; - rb = prng_lg_range_u32(&sb, lg_range, atomic); - assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)), - 0, "High order bits should be 0, lg_range=%u", lg_range); - assert_u32_eq(rb, (ra >> (32 - lg_range)), - "Expected high order bits of full-width result, " - "lg_range=%u", lg_range); - } -} - -static void -test_prng_lg_range_u64(void) -{ - uint64_t sa, sb, ra, rb; - unsigned lg_range; - - sa = 42; - ra = prng_lg_range_u64(&sa, 64); - sa = 42; - rb = prng_lg_range_u64(&sa, 64); - assert_u64_eq(ra, rb, - "Repeated generation should produce repeated results"); - - sb = 42; - rb = prng_lg_range_u64(&sb, 64); - assert_u64_eq(ra, rb, - "Equivalent generation should produce equivalent results"); - - sa = 42; - ra = prng_lg_range_u64(&sa, 64); - rb = prng_lg_range_u64(&sa, 64); - assert_u64_ne(ra, rb, - "Full-width results must not immediately repeat"); - - sa = 42; - ra = prng_lg_range_u64(&sa, 64); - for (lg_range = 63; lg_range > 0; lg_range--) { - sb = 42; - rb = prng_lg_range_u64(&sb, lg_range); - assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)), - 0, "High order bits should be 0, lg_range=%u", lg_range); - assert_u64_eq(rb, (ra >> (64 - lg_range)), - "Expected high order bits of full-width result, " - "lg_range=%u", lg_range); - } -} - -static void -test_prng_lg_range_zu(bool atomic) -{ - size_t sa, sb, ra, rb; - unsigned lg_range; - - sa = 42; - ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); - sa = 42; - rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); - assert_zu_eq(ra, rb, - "Repeated generation should produce repeated results"); - - sb = 42; - rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); - assert_zu_eq(ra, rb, - "Equivalent generation should produce equivalent results"); - - sa = 42; - ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); - rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); - assert_zu_ne(ra, rb, - "Full-width results must not immediately repeat"); - - sa = 42; - ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); - for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0; - lg_range--) { - sb = 42; - rb = prng_lg_range_zu(&sb, lg_range, atomic); - assert_zu_eq((rb & (SIZE_T_MAX << lg_range)), - 0, "High order bits should be 0, lg_range=%u", lg_range); - assert_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - - lg_range)), "Expected high order bits of full-width " - "result, lg_range=%u", lg_range); - } -} - -TEST_BEGIN(test_prng_lg_range_u32_nonatomic) -{ - - test_prng_lg_range_u32(false); -} -TEST_END - -TEST_BEGIN(test_prng_lg_range_u32_atomic) -{ - - test_prng_lg_range_u32(true); -} -TEST_END - -TEST_BEGIN(test_prng_lg_range_u64_nonatomic) -{ - - test_prng_lg_range_u64(); -} -TEST_END - -TEST_BEGIN(test_prng_lg_range_zu_nonatomic) -{ - - test_prng_lg_range_zu(false); -} -TEST_END - -TEST_BEGIN(test_prng_lg_range_zu_atomic) -{ - - test_prng_lg_range_zu(true); -} -TEST_END - -static void -test_prng_range_u32(bool atomic) -{ - uint32_t range; -#define MAX_RANGE 10000000 -#define RANGE_STEP 97 -#define NREPS 10 - - for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { - uint32_t s; - unsigned rep; - - s = range; - for (rep = 0; rep < NREPS; rep++) { - uint32_t r = prng_range_u32(&s, range, atomic); - - assert_u32_lt(r, range, "Out of range"); - } - } -} - -static void -test_prng_range_u64(void) -{ - uint64_t range; -#define MAX_RANGE 10000000 -#define RANGE_STEP 97 -#define NREPS 10 - - for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { - uint64_t s; - unsigned rep; - - s = range; - for (rep = 0; rep < NREPS; rep++) { - uint64_t r = prng_range_u64(&s, range); - - assert_u64_lt(r, range, "Out of range"); - } - } -} - -static void -test_prng_range_zu(bool atomic) -{ - size_t range; -#define MAX_RANGE 10000000 -#define RANGE_STEP 97 -#define NREPS 10 - - for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { - size_t s; - unsigned rep; - - s = range; - for (rep = 0; rep < NREPS; rep++) { - size_t r = prng_range_zu(&s, range, atomic); - - assert_zu_lt(r, range, "Out of range"); - } - } -} - -TEST_BEGIN(test_prng_range_u32_nonatomic) -{ - - test_prng_range_u32(false); -} -TEST_END - -TEST_BEGIN(test_prng_range_u32_atomic) -{ - - test_prng_range_u32(true); -} -TEST_END - -TEST_BEGIN(test_prng_range_u64_nonatomic) -{ - - test_prng_range_u64(); -} -TEST_END - -TEST_BEGIN(test_prng_range_zu_nonatomic) -{ - - test_prng_range_zu(false); -} -TEST_END - -TEST_BEGIN(test_prng_range_zu_atomic) -{ - - test_prng_range_zu(true); -} -TEST_END - -int -main(void) -{ - - return (test( - test_prng_lg_range_u32_nonatomic, - test_prng_lg_range_u32_atomic, - test_prng_lg_range_u64_nonatomic, - test_prng_lg_range_zu_nonatomic, - test_prng_lg_range_zu_atomic, - test_prng_range_u32_nonatomic, - test_prng_range_u32_atomic, - test_prng_range_u64_nonatomic, - test_prng_range_zu_nonatomic, - test_prng_range_zu_atomic)); -} diff --git a/deps/jemalloc/test/unit/prof_accum.c b/deps/jemalloc/test/unit/prof_accum.c old mode 100755 new mode 100644 index d941b5bc6..fd229e0fd --- a/deps/jemalloc/test/unit/prof_accum.c +++ b/deps/jemalloc/test/unit/prof_accum.c @@ -68,9 +68,8 @@ TEST_BEGIN(test_idump) test_skip_if(!config_prof); active = true; - assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active, - sizeof(active)), 0, - "Unexpected mallctl failure while activating profiling"); + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure while activating profiling"); prof_dump_open = prof_dump_open_intercept; diff --git a/deps/jemalloc/test/unit/prof_active.c b/deps/jemalloc/test/unit/prof_active.c old mode 100755 new mode 100644 index d00943a4c..814909572 --- a/deps/jemalloc/test/unit/prof_active.c +++ b/deps/jemalloc/test/unit/prof_active.c @@ -12,7 +12,7 @@ mallctl_bool_get(const char *name, bool expected, const char *func, int line) size_t sz; sz = sizeof(old); - assert_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0, + assert_d_eq(mallctl(name, &old, &sz, NULL, 0), 0, "%s():%d: Unexpected mallctl failure reading %s", func, line, name); assert_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line, name); @@ -26,8 +26,7 @@ mallctl_bool_set(const char *name, bool old_expected, bool val_new, size_t sz; sz = sizeof(old); - assert_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new, - sizeof(val_new)), 0, + assert_d_eq(mallctl(name, &old, &sz, &val_new, sizeof(val_new)), 0, "%s():%d: Unexpected mallctl failure reading/writing %s", func, line, name); assert_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func, diff --git a/deps/jemalloc/test/unit/prof_gdump.c b/deps/jemalloc/test/unit/prof_gdump.c old mode 100755 new mode 100644 index 996cb6704..a0e6ee921 --- a/deps/jemalloc/test/unit/prof_gdump.c +++ b/deps/jemalloc/test/unit/prof_gdump.c @@ -28,9 +28,8 @@ TEST_BEGIN(test_gdump) test_skip_if(!config_prof); active = true; - assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active, - sizeof(active)), 0, - "Unexpected mallctl failure while activating profiling"); + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure while activating profiling"); prof_dump_open = prof_dump_open_intercept; @@ -46,8 +45,8 @@ TEST_BEGIN(test_gdump) gdump = false; sz = sizeof(gdump_old); - assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz, - (void *)&gdump, sizeof(gdump)), 0, + assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump, + sizeof(gdump)), 0, "Unexpected mallctl failure while disabling prof.gdump"); assert(gdump_old); did_prof_dump_open = false; @@ -57,8 +56,8 @@ TEST_BEGIN(test_gdump) gdump = true; sz = sizeof(gdump_old); - assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz, - (void *)&gdump, sizeof(gdump)), 0, + assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump, + sizeof(gdump)), 0, "Unexpected mallctl failure while enabling prof.gdump"); assert(!gdump_old); did_prof_dump_open = false; diff --git a/deps/jemalloc/test/unit/prof_idump.c b/deps/jemalloc/test/unit/prof_idump.c old mode 100755 new mode 100644 index 16c6462de..bdea53ecd --- a/deps/jemalloc/test/unit/prof_idump.c +++ b/deps/jemalloc/test/unit/prof_idump.c @@ -29,9 +29,8 @@ TEST_BEGIN(test_idump) test_skip_if(!config_prof); active = true; - assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active, - sizeof(active)), 0, - "Unexpected mallctl failure while activating profiling"); + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure while activating profiling"); prof_dump_open = prof_dump_open_intercept; diff --git a/deps/jemalloc/test/unit/prof_reset.c b/deps/jemalloc/test/unit/prof_reset.c old mode 100755 new mode 100644 index 59d70796a..69983e5e5 --- a/deps/jemalloc/test/unit/prof_reset.c +++ b/deps/jemalloc/test/unit/prof_reset.c @@ -20,8 +20,8 @@ static void set_prof_active(bool active) { - assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active, - sizeof(active)), 0, "Unexpected mallctl failure"); + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure"); } static size_t @@ -30,8 +30,7 @@ get_lg_prof_sample(void) size_t lg_prof_sample; size_t sz = sizeof(size_t); - assert_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz, - NULL, 0), 0, + assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0, "Unexpected mallctl failure while reading profiling sample rate"); return (lg_prof_sample); } @@ -40,7 +39,7 @@ static void do_prof_reset(size_t lg_prof_sample) { assert_d_eq(mallctl("prof.reset", NULL, NULL, - (void *)&lg_prof_sample, sizeof(size_t)), 0, + &lg_prof_sample, sizeof(size_t)), 0, "Unexpected mallctl failure while resetting profile data"); assert_zu_eq(lg_prof_sample, get_lg_prof_sample(), "Expected profile sample rate change"); @@ -55,8 +54,8 @@ TEST_BEGIN(test_prof_reset_basic) test_skip_if(!config_prof); sz = sizeof(size_t); - assert_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig, - &sz, NULL, 0), 0, + assert_d_eq(mallctl("opt.lg_prof_sample", &lg_prof_sample_orig, &sz, + NULL, 0), 0, "Unexpected mallctl failure while reading profiling sample rate"); assert_zu_eq(lg_prof_sample_orig, 0, "Unexpected profiling sample rate"); @@ -95,8 +94,7 @@ TEST_END bool prof_dump_header_intercepted = false; prof_cnt_t cnt_all_copy = {0, 0, 0, 0}; static bool -prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err, - const prof_cnt_t *cnt_all) +prof_dump_header_intercept(bool propagate_err, const prof_cnt_t *cnt_all) { prof_dump_header_intercepted = true; diff --git a/deps/jemalloc/test/unit/prof_thread_name.c b/deps/jemalloc/test/unit/prof_thread_name.c old mode 100755 new mode 100644 index 9ec549776..f501158d7 --- a/deps/jemalloc/test/unit/prof_thread_name.c +++ b/deps/jemalloc/test/unit/prof_thread_name.c @@ -12,9 +12,8 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func, size_t sz; sz = sizeof(thread_name_old); - assert_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz, - NULL, 0), 0, - "%s():%d: Unexpected mallctl failure reading thread.prof.name", + assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz, NULL, 0), + 0, "%s():%d: Unexpected mallctl failure reading thread.prof.name", func, line); assert_str_eq(thread_name_old, thread_name_expected, "%s():%d: Unexpected thread.prof.name value", func, line); @@ -27,8 +26,8 @@ mallctl_thread_name_set_impl(const char *thread_name, const char *func, int line) { - assert_d_eq(mallctl("thread.prof.name", NULL, NULL, - (void *)&thread_name, sizeof(thread_name)), 0, + assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, + sizeof(thread_name)), 0, "%s():%d: Unexpected mallctl failure reading thread.prof.name", func, line); mallctl_thread_name_get_impl(thread_name, func, line); @@ -47,15 +46,15 @@ TEST_BEGIN(test_prof_thread_name_validation) /* NULL input shouldn't be allowed. */ thread_name = NULL; - assert_d_eq(mallctl("thread.prof.name", NULL, NULL, - (void *)&thread_name, sizeof(thread_name)), EFAULT, + assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, + sizeof(thread_name)), EFAULT, "Unexpected mallctl result writing \"%s\" to thread.prof.name", thread_name); /* '\n' shouldn't be allowed. */ thread_name = "hi\nthere"; - assert_d_eq(mallctl("thread.prof.name", NULL, NULL, - (void *)&thread_name, sizeof(thread_name)), EFAULT, + assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, + sizeof(thread_name)), EFAULT, "Unexpected mallctl result writing \"%s\" to thread.prof.name", thread_name); @@ -65,9 +64,8 @@ TEST_BEGIN(test_prof_thread_name_validation) size_t sz; sz = sizeof(thread_name_old); - assert_d_eq(mallctl("thread.prof.name", - (void *)&thread_name_old, &sz, (void *)&thread_name, - sizeof(thread_name)), EPERM, + assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz, + &thread_name, sizeof(thread_name)), EPERM, "Unexpected mallctl result writing \"%s\" to " "thread.prof.name", thread_name); } diff --git a/deps/jemalloc/test/unit/rb.c b/deps/jemalloc/test/unit/rb.c index cf3d3a783..b38eb0e33 100644 --- a/deps/jemalloc/test/unit/rb.c +++ b/deps/jemalloc/test/unit/rb.c @@ -3,7 +3,7 @@ #define rbtn_black_height(a_type, a_field, a_rbt, r_height) do { \ a_type *rbp_bh_t; \ for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; \ - rbp_bh_t != NULL; \ + rbp_bh_t != &(a_rbt)->rbt_nil; \ rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) { \ if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) { \ (r_height)++; \ @@ -21,7 +21,7 @@ struct node_s { }; static int -node_cmp(const node_t *a, const node_t *b) { +node_cmp(node_t *a, node_t *b) { int ret; assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic"); @@ -68,43 +68,38 @@ TEST_BEGIN(test_rb_empty) TEST_END static unsigned -tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) +tree_recurse(node_t *node, unsigned black_height, unsigned black_depth, + node_t *nil) { unsigned ret = 0; - node_t *left_node; - node_t *right_node; - - if (node == NULL) - return (ret); - - left_node = rbtn_left_get(node_t, link, node); - right_node = rbtn_right_get(node_t, link, node); + node_t *left_node = rbtn_left_get(node_t, link, node); + node_t *right_node = rbtn_right_get(node_t, link, node); if (!rbtn_red_get(node_t, link, node)) black_depth++; /* Red nodes must be interleaved with black nodes. */ if (rbtn_red_get(node_t, link, node)) { - if (left_node != NULL) - assert_false(rbtn_red_get(node_t, link, left_node), - "Node should be black"); - if (right_node != NULL) - assert_false(rbtn_red_get(node_t, link, right_node), - "Node should be black"); + assert_false(rbtn_red_get(node_t, link, left_node), + "Node should be black"); + assert_false(rbtn_red_get(node_t, link, right_node), + "Node should be black"); } + if (node == nil) + return (ret); /* Self. */ assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic"); /* Left subtree. */ - if (left_node != NULL) - ret += tree_recurse(left_node, black_height, black_depth); + if (left_node != nil) + ret += tree_recurse(left_node, black_height, black_depth, nil); else ret += (black_depth != black_height); /* Right subtree. */ - if (right_node != NULL) - ret += tree_recurse(right_node, black_height, black_depth); + if (right_node != nil) + ret += tree_recurse(right_node, black_height, black_depth, nil); else ret += (black_depth != black_height); @@ -186,7 +181,8 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) node->magic = 0; rbtn_black_height(node_t, link, tree, black_height); - imbalances = tree_recurse(tree->rbt_root, black_height, 0); + imbalances = tree_recurse(tree->rbt_root, black_height, 0, + &(tree->rbt_nil)); assert_u_eq(imbalances, 0, "Tree is unbalanced"); assert_u_eq(tree_iterate(tree), nnodes-1, "Unexpected node iteration count"); @@ -216,15 +212,6 @@ remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) return (ret); } -static void -destroy_cb(node_t *node, void *data) -{ - unsigned *nnodes = (unsigned *)data; - - assert_u_gt(*nnodes, 0, "Destruction removed too many nodes"); - (*nnodes)--; -} - TEST_BEGIN(test_rb_random) { #define NNODES 25 @@ -257,6 +244,7 @@ TEST_BEGIN(test_rb_random) for (j = 1; j <= NNODES; j++) { /* Initialize tree and nodes. */ tree_new(&tree); + tree.rbt_nil.magic = 0; for (k = 0; k < j; k++) { nodes[k].magic = NODE_MAGIC; nodes[k].key = bag[k]; @@ -269,7 +257,7 @@ TEST_BEGIN(test_rb_random) rbtn_black_height(node_t, link, &tree, black_height); imbalances = tree_recurse(tree.rbt_root, - black_height, 0); + black_height, 0, &(tree.rbt_nil)); assert_u_eq(imbalances, 0, "Tree is unbalanced"); @@ -290,7 +278,7 @@ TEST_BEGIN(test_rb_random) } /* Remove nodes. */ - switch (i % 5) { + switch (i % 4) { case 0: for (k = 0; k < j; k++) node_remove(&tree, &nodes[k], j - k); @@ -326,12 +314,6 @@ TEST_BEGIN(test_rb_random) assert_u_eq(nnodes, 0, "Removal terminated early"); break; - } case 4: { - unsigned nnodes = j; - tree_destroy(&tree, destroy_cb, &nnodes); - assert_u_eq(nnodes, 0, - "Destruction terminated early"); - break; } default: not_reached(); } diff --git a/deps/jemalloc/test/unit/run_quantize.c b/deps/jemalloc/test/unit/run_quantize.c deleted file mode 100644 index 089176f39..000000000 --- a/deps/jemalloc/test/unit/run_quantize.c +++ /dev/null @@ -1,149 +0,0 @@ -#include "test/jemalloc_test.h" - -TEST_BEGIN(test_small_run_size) -{ - unsigned nbins, i; - size_t sz, run_size; - size_t mib[4]; - size_t miblen = sizeof(mib) / sizeof(size_t); - - /* - * Iterate over all small size classes, get their run sizes, and verify - * that the quantized size is the same as the run size. - */ - - sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0, - "Unexpected mallctl failure"); - - assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0, - "Unexpected mallctlnametomib failure"); - for (i = 0; i < nbins; i++) { - mib[2] = i; - sz = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&run_size, &sz, - NULL, 0), 0, "Unexpected mallctlbymib failure"); - assert_zu_eq(run_size, run_quantize_floor(run_size), - "Small run quantization should be a no-op (run_size=%zu)", - run_size); - assert_zu_eq(run_size, run_quantize_ceil(run_size), - "Small run quantization should be a no-op (run_size=%zu)", - run_size); - } -} -TEST_END - -TEST_BEGIN(test_large_run_size) -{ - bool cache_oblivious; - unsigned nlruns, i; - size_t sz, run_size_prev, ceil_prev; - size_t mib[4]; - size_t miblen = sizeof(mib) / sizeof(size_t); - - /* - * Iterate over all large size classes, get their run sizes, and verify - * that the quantized size is the same as the run size. - */ - - sz = sizeof(bool); - assert_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious, - &sz, NULL, 0), 0, "Unexpected mallctl failure"); - - sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.nlruns", (void *)&nlruns, &sz, NULL, 0), 0, - "Unexpected mallctl failure"); - - assert_d_eq(mallctlnametomib("arenas.lrun.0.size", mib, &miblen), 0, - "Unexpected mallctlnametomib failure"); - for (i = 0; i < nlruns; i++) { - size_t lrun_size, run_size, floor, ceil; - - mib[2] = i; - sz = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&lrun_size, &sz, - NULL, 0), 0, "Unexpected mallctlbymib failure"); - run_size = cache_oblivious ? lrun_size + PAGE : lrun_size; - floor = run_quantize_floor(run_size); - ceil = run_quantize_ceil(run_size); - - assert_zu_eq(run_size, floor, - "Large run quantization should be a no-op for precise " - "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size); - assert_zu_eq(run_size, ceil, - "Large run quantization should be a no-op for precise " - "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size); - - if (i > 0) { - assert_zu_eq(run_size_prev, run_quantize_floor(run_size - - PAGE), "Floor should be a precise size"); - if (run_size_prev < ceil_prev) { - assert_zu_eq(ceil_prev, run_size, - "Ceiling should be a precise size " - "(run_size_prev=%zu, ceil_prev=%zu, " - "run_size=%zu)", run_size_prev, ceil_prev, - run_size); - } - } - run_size_prev = floor; - ceil_prev = run_quantize_ceil(run_size + PAGE); - } -} -TEST_END - -TEST_BEGIN(test_monotonic) -{ - unsigned nbins, nlruns, i; - size_t sz, floor_prev, ceil_prev; - - /* - * Iterate over all run sizes and verify that - * run_quantize_{floor,ceil}() are monotonic. - */ - - sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0, - "Unexpected mallctl failure"); - - sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.nlruns", (void *)&nlruns, &sz, NULL, 0), 0, - "Unexpected mallctl failure"); - - floor_prev = 0; - ceil_prev = 0; - for (i = 1; i <= chunksize >> LG_PAGE; i++) { - size_t run_size, floor, ceil; - - run_size = i << LG_PAGE; - floor = run_quantize_floor(run_size); - ceil = run_quantize_ceil(run_size); - - assert_zu_le(floor, run_size, - "Floor should be <= (floor=%zu, run_size=%zu, ceil=%zu)", - floor, run_size, ceil); - assert_zu_ge(ceil, run_size, - "Ceiling should be >= (floor=%zu, run_size=%zu, ceil=%zu)", - floor, run_size, ceil); - - assert_zu_le(floor_prev, floor, "Floor should be monotonic " - "(floor_prev=%zu, floor=%zu, run_size=%zu, ceil=%zu)", - floor_prev, floor, run_size, ceil); - assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic " - "(floor=%zu, run_size=%zu, ceil_prev=%zu, ceil=%zu)", - floor, run_size, ceil_prev, ceil); - - floor_prev = floor; - ceil_prev = ceil; - } -} -TEST_END - -int -main(void) -{ - - return (test( - test_small_run_size, - test_large_run_size, - test_monotonic)); -} diff --git a/deps/jemalloc/test/unit/size_classes.c b/deps/jemalloc/test/unit/size_classes.c old mode 100755 new mode 100644 index 81cc60617..d3aaebd77 --- a/deps/jemalloc/test/unit/size_classes.c +++ b/deps/jemalloc/test/unit/size_classes.c @@ -8,8 +8,8 @@ get_max_size_class(void) size_t sz, miblen, max_size_class; sz = sizeof(unsigned); - assert_d_eq(mallctl("arenas.nhchunks", (void *)&nhchunks, &sz, NULL, 0), - 0, "Unexpected mallctl() error"); + assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0, + "Unexpected mallctl() error"); miblen = sizeof(mib) / sizeof(size_t); assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0, @@ -17,8 +17,8 @@ get_max_size_class(void) mib[2] = nhchunks - 1; sz = sizeof(size_t); - assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz, - NULL, 0), 0, "Unexpected mallctlbymib() error"); + assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0, + "Unexpected mallctlbymib() error"); return (max_size_class); } @@ -80,105 +80,10 @@ TEST_BEGIN(test_size_classes) } TEST_END -TEST_BEGIN(test_psize_classes) -{ - size_t size_class, max_size_class; - pszind_t pind, max_pind; - - max_size_class = get_max_size_class(); - max_pind = psz2ind(max_size_class); - - for (pind = 0, size_class = pind2sz(pind); pind < max_pind || - size_class < max_size_class; pind++, size_class = - pind2sz(pind)) { - assert_true(pind < max_pind, - "Loop conditionals should be equivalent; pind=%u, " - "size_class=%zu (%#zx)", pind, size_class, size_class); - assert_true(size_class < max_size_class, - "Loop conditionals should be equivalent; pind=%u, " - "size_class=%zu (%#zx)", pind, size_class, size_class); - - assert_u_eq(pind, psz2ind(size_class), - "psz2ind() does not reverse pind2sz(): pind=%u -->" - " size_class=%zu --> pind=%u --> size_class=%zu", pind, - size_class, psz2ind(size_class), - pind2sz(psz2ind(size_class))); - assert_zu_eq(size_class, pind2sz(psz2ind(size_class)), - "pind2sz() does not reverse psz2ind(): pind=%u -->" - " size_class=%zu --> pind=%u --> size_class=%zu", pind, - size_class, psz2ind(size_class), - pind2sz(psz2ind(size_class))); - - assert_u_eq(pind+1, psz2ind(size_class+1), - "Next size_class does not round up properly"); - - assert_zu_eq(size_class, (pind > 0) ? - psz2u(pind2sz(pind-1)+1) : psz2u(1), - "psz2u() does not round up to size class"); - assert_zu_eq(size_class, psz2u(size_class-1), - "psz2u() does not round up to size class"); - assert_zu_eq(size_class, psz2u(size_class), - "psz2u() does not compute same size class"); - assert_zu_eq(psz2u(size_class+1), pind2sz(pind+1), - "psz2u() does not round up to next size class"); - } - - assert_u_eq(pind, psz2ind(pind2sz(pind)), - "psz2ind() does not reverse pind2sz()"); - assert_zu_eq(max_size_class, pind2sz(psz2ind(max_size_class)), - "pind2sz() does not reverse psz2ind()"); - - assert_zu_eq(size_class, psz2u(pind2sz(pind-1)+1), - "psz2u() does not round up to size class"); - assert_zu_eq(size_class, psz2u(size_class-1), - "psz2u() does not round up to size class"); - assert_zu_eq(size_class, psz2u(size_class), - "psz2u() does not compute same size class"); -} -TEST_END - -TEST_BEGIN(test_overflow) -{ - size_t max_size_class; - - max_size_class = get_max_size_class(); - - assert_u_eq(size2index(max_size_class+1), NSIZES, - "size2index() should return NSIZES on overflow"); - assert_u_eq(size2index(ZU(PTRDIFF_MAX)+1), NSIZES, - "size2index() should return NSIZES on overflow"); - assert_u_eq(size2index(SIZE_T_MAX), NSIZES, - "size2index() should return NSIZES on overflow"); - - assert_zu_eq(s2u(max_size_class+1), 0, - "s2u() should return 0 for unsupported size"); - assert_zu_eq(s2u(ZU(PTRDIFF_MAX)+1), 0, - "s2u() should return 0 for unsupported size"); - assert_zu_eq(s2u(SIZE_T_MAX), 0, - "s2u() should return 0 on overflow"); - - assert_u_eq(psz2ind(max_size_class+1), NPSIZES, - "psz2ind() should return NPSIZES on overflow"); - assert_u_eq(psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES, - "psz2ind() should return NPSIZES on overflow"); - assert_u_eq(psz2ind(SIZE_T_MAX), NPSIZES, - "psz2ind() should return NPSIZES on overflow"); - - assert_zu_eq(psz2u(max_size_class+1), 0, - "psz2u() should return 0 for unsupported size"); - assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), 0, - "psz2u() should return 0 for unsupported size"); - assert_zu_eq(psz2u(SIZE_T_MAX), 0, - "psz2u() should return 0 on overflow"); -} -TEST_END - int main(void) { return (test( - test_size_classes, - test_psize_classes, - test_overflow)); + test_size_classes)); } diff --git a/deps/jemalloc/test/unit/smoothstep.c b/deps/jemalloc/test/unit/smoothstep.c deleted file mode 100644 index 4cfb21343..000000000 --- a/deps/jemalloc/test/unit/smoothstep.c +++ /dev/null @@ -1,106 +0,0 @@ -#include "test/jemalloc_test.h" - -static const uint64_t smoothstep_tab[] = { -#define STEP(step, h, x, y) \ - h, - SMOOTHSTEP -#undef STEP -}; - -TEST_BEGIN(test_smoothstep_integral) -{ - uint64_t sum, min, max; - unsigned i; - - /* - * The integral of smoothstep in the [0..1] range equals 1/2. Verify - * that the fixed point representation's integral is no more than - * rounding error distant from 1/2. Regarding rounding, each table - * element is rounded down to the nearest fixed point value, so the - * integral may be off by as much as SMOOTHSTEP_NSTEPS ulps. - */ - sum = 0; - for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) - sum += smoothstep_tab[i]; - - max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1); - min = max - SMOOTHSTEP_NSTEPS; - - assert_u64_ge(sum, min, - "Integral too small, even accounting for truncation"); - assert_u64_le(sum, max, "Integral exceeds 1/2"); - if (false) { - malloc_printf("%"FMTu64" ulps under 1/2 (limit %d)\n", - max - sum, SMOOTHSTEP_NSTEPS); - } -} -TEST_END - -TEST_BEGIN(test_smoothstep_monotonic) -{ - uint64_t prev_h; - unsigned i; - - /* - * The smoothstep function is monotonic in [0..1], i.e. its slope is - * non-negative. In practice we want to parametrize table generation - * such that piecewise slope is greater than zero, but do not require - * that here. - */ - prev_h = 0; - for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) { - uint64_t h = smoothstep_tab[i]; - assert_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i); - prev_h = h; - } - assert_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1], - (KQU(1) << SMOOTHSTEP_BFP), "Last step must equal 1"); -} -TEST_END - -TEST_BEGIN(test_smoothstep_slope) -{ - uint64_t prev_h, prev_delta; - unsigned i; - - /* - * The smoothstep slope strictly increases until x=0.5, and then - * strictly decreases until x=1.0. Verify the slightly weaker - * requirement of monotonicity, so that inadequate table precision does - * not cause false test failures. - */ - prev_h = 0; - prev_delta = 0; - for (i = 0; i < SMOOTHSTEP_NSTEPS / 2 + SMOOTHSTEP_NSTEPS % 2; i++) { - uint64_t h = smoothstep_tab[i]; - uint64_t delta = h - prev_h; - assert_u64_ge(delta, prev_delta, - "Slope must monotonically increase in 0.0 <= x <= 0.5, " - "i=%u", i); - prev_h = h; - prev_delta = delta; - } - - prev_h = KQU(1) << SMOOTHSTEP_BFP; - prev_delta = 0; - for (i = SMOOTHSTEP_NSTEPS-1; i >= SMOOTHSTEP_NSTEPS / 2; i--) { - uint64_t h = smoothstep_tab[i]; - uint64_t delta = prev_h - h; - assert_u64_ge(delta, prev_delta, - "Slope must monotonically decrease in 0.5 <= x <= 1.0, " - "i=%u", i); - prev_h = h; - prev_delta = delta; - } -} -TEST_END - -int -main(void) -{ - - return (test( - test_smoothstep_integral, - test_smoothstep_monotonic, - test_smoothstep_slope)); -} diff --git a/deps/jemalloc/test/unit/stats.c b/deps/jemalloc/test/unit/stats.c old mode 100755 new mode 100644 index 315717dfb..8e4bc631e --- a/deps/jemalloc/test/unit/stats.c +++ b/deps/jemalloc/test/unit/stats.c @@ -7,18 +7,18 @@ TEST_BEGIN(test_stats_summary) int expected = config_stats ? 0 : ENOENT; sz = sizeof(cactive); - assert_d_eq(mallctl("stats.cactive", (void *)&cactive, &sz, NULL, 0), - expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.cactive", &cactive, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL, - 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0), + assert_d_eq(mallctl("stats.allocated", &allocated, &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0), - expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0), + assert_d_eq(mallctl("stats.active", &active, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.resident", &resident, &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.mapped", &mapped, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); if (config_stats) { assert_zu_le(active, *cactive, @@ -45,19 +45,19 @@ TEST_BEGIN(test_stats_huge) p = mallocx(large_maxclass+1, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.huge.allocated", (void *)&allocated, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", (void *)&nmalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", (void *)&ndalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.huge.nrequests", (void *)&nrequests, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz, NULL, + 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz, NULL, + 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.nrequests", &nrequests, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_zu_gt(allocated, 0, @@ -83,8 +83,8 @@ TEST_BEGIN(test_stats_arenas_summary) uint64_t npurge, nmadvise, purged; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, - sizeof(arena)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); little = mallocx(SMALL_MAXCLASS, 0); assert_ptr_not_null(little, "Unexpected mallocx() failure"); @@ -93,26 +93,22 @@ TEST_BEGIN(test_stats_arenas_summary) huge = mallocx(chunksize, 0); assert_ptr_not_null(huge, "Unexpected mallocx() failure"); - dallocx(little, 0); - dallocx(large, 0); - dallocx(huge, 0); - assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, "Unexpected mallctl() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL, - 0), expected, "Unexepected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.mapped", &mapped, &sz, NULL, 0), + expected, "Unexepected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge, &sz, NULL, - 0), expected, "Unexepected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.nmadvise", (void *)&nmadvise, &sz, - NULL, 0), expected, "Unexepected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.purged", (void *)&purged, &sz, NULL, - 0), expected, "Unexepected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge, &sz, NULL, 0), + expected, "Unexepected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.nmadvise", &nmadvise, &sz, NULL, 0), + expected, "Unexepected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.purged", &purged, &sz, NULL, 0), + expected, "Unexepected mallctl() result"); if (config_stats) { assert_u64_gt(npurge, 0, @@ -120,6 +116,10 @@ TEST_BEGIN(test_stats_arenas_summary) assert_u64_le(nmadvise, purged, "nmadvise should be no greater than purged"); } + + dallocx(little, 0); + dallocx(large, 0); + dallocx(huge, 0); } TEST_END @@ -150,8 +150,8 @@ TEST_BEGIN(test_stats_arenas_small) no_lazy_lock(); /* Lazy locking would dodge tcache testing. */ arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, - sizeof(arena)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); p = mallocx(SMALL_MAXCLASS, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); @@ -159,21 +159,19 @@ TEST_BEGIN(test_stats_arenas_small) assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), config_tcache ? 0 : ENOENT, "Unexpected mallctl() result"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.small.allocated", - (void *)&allocated, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.small.allocated", &allocated, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.small.nrequests", - (void *)&nrequests, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.small.nrequests", &nrequests, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_zu_gt(allocated, 0, @@ -199,36 +197,34 @@ TEST_BEGIN(test_stats_arenas_large) int expected = config_stats ? 0 : ENOENT; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, - sizeof(arena)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); p = mallocx(large_maxclass, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.large.allocated", - (void *)&allocated, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.large.allocated", &allocated, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.large.nrequests", - (void *)&nrequests, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.large.nrequests", &nrequests, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_zu_gt(allocated, 0, "allocated should be greater than zero"); - assert_u64_gt(nmalloc, 0, + assert_zu_gt(nmalloc, 0, "nmalloc should be greater than zero"); - assert_u64_ge(nmalloc, ndalloc, + assert_zu_ge(nmalloc, ndalloc, "nmalloc should be at least as large as ndalloc"); - assert_u64_gt(nrequests, 0, + assert_zu_gt(nrequests, 0, "nrequests should be greater than zero"); } @@ -245,30 +241,30 @@ TEST_BEGIN(test_stats_arenas_huge) int expected = config_stats ? 0 : ENOENT; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, - sizeof(arena)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); p = mallocx(chunksize, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.huge.allocated", (void *)&allocated, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", (void *)&nmalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", (void *)&ndalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_zu_gt(allocated, 0, "allocated should be greater than zero"); - assert_u64_gt(nmalloc, 0, + assert_zu_gt(nmalloc, 0, "nmalloc should be greater than zero"); - assert_u64_ge(nmalloc, ndalloc, + assert_zu_ge(nmalloc, ndalloc, "nmalloc should be at least as large as ndalloc"); } @@ -286,8 +282,8 @@ TEST_BEGIN(test_stats_arenas_bins) int expected = config_stats ? 0 : ENOENT; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, - sizeof(arena)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); p = mallocx(arena_bin_info[0].reg_size, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); @@ -295,36 +291,35 @@ TEST_BEGIN(test_stats_arenas_bins) assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), config_tcache ? 0 : ENOENT, "Unexpected mallctl() result"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", (void *)&nmalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", (void *)&ndalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests", - (void *)&nrequests, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); - sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", (void *)&curregs, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - - sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", (void *)&nfills, - &sz, NULL, 0), config_tcache ? expected : ENOENT, - "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", (void *)&nflushes, - &sz, NULL, 0), config_tcache ? expected : ENOENT, - "Unexpected mallctl() result"); - - assert_d_eq(mallctl("stats.arenas.0.bins.0.nruns", (void *)&nruns, &sz, + assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests", &nrequests, &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.bins.0.nreruns", (void *)&nreruns, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.bins.0.curruns", (void *)&curruns, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", &curregs, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", &nfills, &sz, + NULL, 0), config_tcache ? expected : ENOENT, + "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", &nflushes, &sz, + NULL, 0), config_tcache ? expected : ENOENT, + "Unexpected mallctl() result"); + + assert_d_eq(mallctl("stats.arenas.0.bins.0.nruns", &nruns, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nreruns", &nreruns, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(size_t); + assert_d_eq(mallctl("stats.arenas.0.bins.0.curruns", &curruns, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_u64_gt(nmalloc, 0, @@ -360,26 +355,25 @@ TEST_BEGIN(test_stats_arenas_lruns) int expected = config_stats ? 0 : ENOENT; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, - sizeof(arena)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); p = mallocx(LARGE_MINCLASS, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.lruns.0.nmalloc", (void *)&nmalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.lruns.0.ndalloc", (void *)&ndalloc, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.lruns.0.nrequests", - (void *)&nrequests, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.nrequests", &nrequests, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.lruns.0.curruns", (void *)&curruns, - &sz, NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.curruns", &curruns, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_u64_gt(nmalloc, 0, @@ -405,26 +399,23 @@ TEST_BEGIN(test_stats_arenas_hchunks) int expected = config_stats ? 0 : ENOENT; arena = 0; - assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena, - sizeof(arena)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); p = mallocx(chunksize, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), - 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.hchunks.0.nmalloc", - (void *)&nmalloc, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.arenas.0.hchunks.0.ndalloc", - (void *)&ndalloc, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.hchunks.0.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.hchunks.0.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.hchunks.0.curhchunks", - (void *)&curhchunks, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.hchunks.0.curhchunks", &curhchunks, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_u64_gt(nmalloc, 0, diff --git a/deps/jemalloc/test/unit/ticker.c b/deps/jemalloc/test/unit/ticker.c deleted file mode 100644 index e737020ab..000000000 --- a/deps/jemalloc/test/unit/ticker.c +++ /dev/null @@ -1,76 +0,0 @@ -#include "test/jemalloc_test.h" - -TEST_BEGIN(test_ticker_tick) -{ -#define NREPS 2 -#define NTICKS 3 - ticker_t ticker; - int32_t i, j; - - ticker_init(&ticker, NTICKS); - for (i = 0; i < NREPS; i++) { - for (j = 0; j < NTICKS; j++) { - assert_u_eq(ticker_read(&ticker), NTICKS - j, - "Unexpected ticker value (i=%d, j=%d)", i, j); - assert_false(ticker_tick(&ticker), - "Unexpected ticker fire (i=%d, j=%d)", i, j); - } - assert_u32_eq(ticker_read(&ticker), 0, - "Expected ticker depletion"); - assert_true(ticker_tick(&ticker), - "Expected ticker fire (i=%d)", i); - assert_u32_eq(ticker_read(&ticker), NTICKS, - "Expected ticker reset"); - } -#undef NTICKS -} -TEST_END - -TEST_BEGIN(test_ticker_ticks) -{ -#define NTICKS 3 - ticker_t ticker; - - ticker_init(&ticker, NTICKS); - - assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); - assert_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire"); - assert_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value"); - assert_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire"); - assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); - - assert_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire"); - assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); -#undef NTICKS -} -TEST_END - -TEST_BEGIN(test_ticker_copy) -{ -#define NTICKS 3 - ticker_t ta, tb; - - ticker_init(&ta, NTICKS); - ticker_copy(&tb, &ta); - assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); - assert_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire"); - assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); - - ticker_tick(&ta); - ticker_copy(&tb, &ta); - assert_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value"); - assert_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire"); - assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); -#undef NTICKS -} -TEST_END - -int -main(void) -{ - - return (test( - test_ticker_tick, - test_ticker_ticks, - test_ticker_copy)); -} diff --git a/deps/jemalloc/test/unit/tsd.c b/deps/jemalloc/test/unit/tsd.c index d5f96ac36..8be787fda 100644 --- a/deps/jemalloc/test/unit/tsd.c +++ b/deps/jemalloc/test/unit/tsd.c @@ -58,18 +58,18 @@ thd_start(void *arg) data_t d = (data_t)(uintptr_t)arg; void *p; - assert_x_eq(*data_tsd_get(true), DATA_INIT, + assert_x_eq(*data_tsd_get(), DATA_INIT, "Initial tsd get should return initialization value"); p = malloc(1); assert_ptr_not_null(p, "Unexpected malloc() failure"); data_tsd_set(&d); - assert_x_eq(*data_tsd_get(true), d, + assert_x_eq(*data_tsd_get(), d, "After tsd set, tsd get should return value that was set"); d = 0; - assert_x_eq(*data_tsd_get(true), (data_t)(uintptr_t)arg, + assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg, "Resetting local data should have no effect on tsd"); free(p); @@ -79,7 +79,7 @@ thd_start(void *arg) TEST_BEGIN(test_tsd_main_thread) { - thd_start((void *)(uintptr_t)0xa5f3e329); + thd_start((void *) 0xa5f3e329); } TEST_END @@ -99,11 +99,6 @@ int main(void) { - /* Core tsd bootstrapping must happen prior to data_tsd_boot(). */ - if (nallocx(1, 0) == 0) { - malloc_printf("Initialization error"); - return (test_status_fail); - } data_tsd_boot(); return (test( diff --git a/deps/jemalloc/test/unit/util.c b/deps/jemalloc/test/unit/util.c index b1f9abd9b..8ab39a458 100644 --- a/deps/jemalloc/test/unit/util.c +++ b/deps/jemalloc/test/unit/util.c @@ -1,54 +1,33 @@ #include "test/jemalloc_test.h" -#define TEST_POW2_CEIL(t, suf, pri) do { \ - unsigned i, pow2; \ - t x; \ - \ - assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result"); \ - \ - for (i = 0; i < sizeof(t) * 8; i++) { \ - assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) \ - << i, "Unexpected result"); \ - } \ - \ - for (i = 2; i < sizeof(t) * 8; i++) { \ - assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1), \ - ((t)1) << i, "Unexpected result"); \ - } \ - \ - for (i = 0; i < sizeof(t) * 8 - 1; i++) { \ - assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1), \ - ((t)1) << (i+1), "Unexpected result"); \ - } \ - \ - for (pow2 = 1; pow2 < 25; pow2++) { \ - for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2; \ - x++) { \ - assert_##suf##_eq(pow2_ceil_##suf(x), \ - ((t)1) << pow2, \ - "Unexpected result, x=%"pri, x); \ - } \ - } \ -} while (0) - -TEST_BEGIN(test_pow2_ceil_u64) +TEST_BEGIN(test_pow2_ceil) { + unsigned i, pow2; + size_t x; - TEST_POW2_CEIL(uint64_t, u64, FMTu64); -} -TEST_END + assert_zu_eq(pow2_ceil(0), 0, "Unexpected result"); -TEST_BEGIN(test_pow2_ceil_u32) -{ + for (i = 0; i < sizeof(size_t) * 8; i++) { + assert_zu_eq(pow2_ceil(ZU(1) << i), ZU(1) << i, + "Unexpected result"); + } - TEST_POW2_CEIL(uint32_t, u32, FMTu32); -} -TEST_END + for (i = 2; i < sizeof(size_t) * 8; i++) { + assert_zu_eq(pow2_ceil((ZU(1) << i) - 1), ZU(1) << i, + "Unexpected result"); + } -TEST_BEGIN(test_pow2_ceil_zu) -{ + for (i = 0; i < sizeof(size_t) * 8 - 1; i++) { + assert_zu_eq(pow2_ceil((ZU(1) << i) + 1), ZU(1) << (i+1), + "Unexpected result"); + } - TEST_POW2_CEIL(size_t, zu, "zu"); + for (pow2 = 1; pow2 < 25; pow2++) { + for (x = (ZU(1) << (pow2-1)) + 1; x <= ZU(1) << pow2; x++) { + assert_zu_eq(pow2_ceil(x), ZU(1) << pow2, + "Unexpected result, x=%zu", x); + } + } } TEST_END @@ -75,7 +54,6 @@ TEST_BEGIN(test_malloc_strtoumax) }; #define ERR(e) e, #e #define KUMAX(x) ((uintmax_t)x##ULL) -#define KSMAX(x) ((uintmax_t)(intmax_t)x##LL) struct test_s tests[] = { {"0", "0", -1, ERR(EINVAL), UINTMAX_MAX}, {"0", "0", 1, ERR(EINVAL), UINTMAX_MAX}, @@ -88,13 +66,13 @@ TEST_BEGIN(test_malloc_strtoumax) {"42", "", 0, ERR(0), KUMAX(42)}, {"+42", "", 0, ERR(0), KUMAX(42)}, - {"-42", "", 0, ERR(0), KSMAX(-42)}, + {"-42", "", 0, ERR(0), KUMAX(-42)}, {"042", "", 0, ERR(0), KUMAX(042)}, {"+042", "", 0, ERR(0), KUMAX(042)}, - {"-042", "", 0, ERR(0), KSMAX(-042)}, + {"-042", "", 0, ERR(0), KUMAX(-042)}, {"0x42", "", 0, ERR(0), KUMAX(0x42)}, {"+0x42", "", 0, ERR(0), KUMAX(0x42)}, - {"-0x42", "", 0, ERR(0), KSMAX(-0x42)}, + {"-0x42", "", 0, ERR(0), KUMAX(-0x42)}, {"0", "", 0, ERR(0), KUMAX(0)}, {"1", "", 0, ERR(0), KUMAX(1)}, @@ -131,7 +109,6 @@ TEST_BEGIN(test_malloc_strtoumax) }; #undef ERR #undef KUMAX -#undef KSMAX unsigned i; for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) { @@ -162,14 +139,14 @@ TEST_BEGIN(test_malloc_snprintf_truncated) { #define BUFLEN 15 char buf[BUFLEN]; - size_t result; + int result; size_t len; -#define TEST(expected_str_untruncated, ...) do { \ +#define TEST(expected_str_untruncated, ...) do { \ result = malloc_snprintf(buf, len, __VA_ARGS__); \ assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0, \ "Unexpected string inequality (\"%s\" vs \"%s\")", \ - buf, expected_str_untruncated); \ - assert_zu_eq(result, strlen(expected_str_untruncated), \ + buf, expected_str_untruncated); \ + assert_d_eq(result, strlen(expected_str_untruncated), \ "Unexpected result"); \ } while (0) @@ -195,11 +172,11 @@ TEST_BEGIN(test_malloc_snprintf) { #define BUFLEN 128 char buf[BUFLEN]; - size_t result; + int result; #define TEST(expected_str, ...) do { \ result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__); \ assert_str_eq(buf, expected_str, "Unexpected output"); \ - assert_zu_eq(result, strlen(expected_str), "Unexpected result");\ + assert_d_eq(result, strlen(expected_str), "Unexpected result"); \ } while (0) TEST("hello", "hello"); @@ -309,9 +286,7 @@ main(void) { return (test( - test_pow2_ceil_u64, - test_pow2_ceil_u32, - test_pow2_ceil_zu, + test_pow2_ceil, test_malloc_strtoumax_no_endptr, test_malloc_strtoumax, test_malloc_snprintf_truncated, diff --git a/deps/jemalloc/test/unit/witness.c b/deps/jemalloc/test/unit/witness.c deleted file mode 100644 index ed172753c..000000000 --- a/deps/jemalloc/test/unit/witness.c +++ /dev/null @@ -1,278 +0,0 @@ -#include "test/jemalloc_test.h" - -static witness_lock_error_t *witness_lock_error_orig; -static witness_owner_error_t *witness_owner_error_orig; -static witness_not_owner_error_t *witness_not_owner_error_orig; -static witness_lockless_error_t *witness_lockless_error_orig; - -static bool saw_lock_error; -static bool saw_owner_error; -static bool saw_not_owner_error; -static bool saw_lockless_error; - -static void -witness_lock_error_intercept(const witness_list_t *witnesses, - const witness_t *witness) -{ - - saw_lock_error = true; -} - -static void -witness_owner_error_intercept(const witness_t *witness) -{ - - saw_owner_error = true; -} - -static void -witness_not_owner_error_intercept(const witness_t *witness) -{ - - saw_not_owner_error = true; -} - -static void -witness_lockless_error_intercept(const witness_list_t *witnesses) -{ - - saw_lockless_error = true; -} - -static int -witness_comp(const witness_t *a, const witness_t *b) -{ - - assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank"); - - return (strcmp(a->name, b->name)); -} - -static int -witness_comp_reverse(const witness_t *a, const witness_t *b) -{ - - assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank"); - - return (-strcmp(a->name, b->name)); -} - -TEST_BEGIN(test_witness) -{ - witness_t a, b; - tsdn_t *tsdn; - - test_skip_if(!config_debug); - - tsdn = tsdn_fetch(); - - witness_assert_lockless(tsdn); - - witness_init(&a, "a", 1, NULL); - witness_assert_not_owner(tsdn, &a); - witness_lock(tsdn, &a); - witness_assert_owner(tsdn, &a); - - witness_init(&b, "b", 2, NULL); - witness_assert_not_owner(tsdn, &b); - witness_lock(tsdn, &b); - witness_assert_owner(tsdn, &b); - - witness_unlock(tsdn, &a); - witness_unlock(tsdn, &b); - - witness_assert_lockless(tsdn); -} -TEST_END - -TEST_BEGIN(test_witness_comp) -{ - witness_t a, b, c, d; - tsdn_t *tsdn; - - test_skip_if(!config_debug); - - tsdn = tsdn_fetch(); - - witness_assert_lockless(tsdn); - - witness_init(&a, "a", 1, witness_comp); - witness_assert_not_owner(tsdn, &a); - witness_lock(tsdn, &a); - witness_assert_owner(tsdn, &a); - - witness_init(&b, "b", 1, witness_comp); - witness_assert_not_owner(tsdn, &b); - witness_lock(tsdn, &b); - witness_assert_owner(tsdn, &b); - witness_unlock(tsdn, &b); - - witness_lock_error_orig = witness_lock_error; - witness_lock_error = witness_lock_error_intercept; - saw_lock_error = false; - - witness_init(&c, "c", 1, witness_comp_reverse); - witness_assert_not_owner(tsdn, &c); - assert_false(saw_lock_error, "Unexpected witness lock error"); - witness_lock(tsdn, &c); - assert_true(saw_lock_error, "Expected witness lock error"); - witness_unlock(tsdn, &c); - - saw_lock_error = false; - - witness_init(&d, "d", 1, NULL); - witness_assert_not_owner(tsdn, &d); - assert_false(saw_lock_error, "Unexpected witness lock error"); - witness_lock(tsdn, &d); - assert_true(saw_lock_error, "Expected witness lock error"); - witness_unlock(tsdn, &d); - - witness_unlock(tsdn, &a); - - witness_assert_lockless(tsdn); - - witness_lock_error = witness_lock_error_orig; -} -TEST_END - -TEST_BEGIN(test_witness_reversal) -{ - witness_t a, b; - tsdn_t *tsdn; - - test_skip_if(!config_debug); - - witness_lock_error_orig = witness_lock_error; - witness_lock_error = witness_lock_error_intercept; - saw_lock_error = false; - - tsdn = tsdn_fetch(); - - witness_assert_lockless(tsdn); - - witness_init(&a, "a", 1, NULL); - witness_init(&b, "b", 2, NULL); - - witness_lock(tsdn, &b); - assert_false(saw_lock_error, "Unexpected witness lock error"); - witness_lock(tsdn, &a); - assert_true(saw_lock_error, "Expected witness lock error"); - - witness_unlock(tsdn, &a); - witness_unlock(tsdn, &b); - - witness_assert_lockless(tsdn); - - witness_lock_error = witness_lock_error_orig; -} -TEST_END - -TEST_BEGIN(test_witness_recursive) -{ - witness_t a; - tsdn_t *tsdn; - - test_skip_if(!config_debug); - - witness_not_owner_error_orig = witness_not_owner_error; - witness_not_owner_error = witness_not_owner_error_intercept; - saw_not_owner_error = false; - - witness_lock_error_orig = witness_lock_error; - witness_lock_error = witness_lock_error_intercept; - saw_lock_error = false; - - tsdn = tsdn_fetch(); - - witness_assert_lockless(tsdn); - - witness_init(&a, "a", 1, NULL); - - witness_lock(tsdn, &a); - assert_false(saw_lock_error, "Unexpected witness lock error"); - assert_false(saw_not_owner_error, "Unexpected witness not owner error"); - witness_lock(tsdn, &a); - assert_true(saw_lock_error, "Expected witness lock error"); - assert_true(saw_not_owner_error, "Expected witness not owner error"); - - witness_unlock(tsdn, &a); - - witness_assert_lockless(tsdn); - - witness_owner_error = witness_owner_error_orig; - witness_lock_error = witness_lock_error_orig; - -} -TEST_END - -TEST_BEGIN(test_witness_unlock_not_owned) -{ - witness_t a; - tsdn_t *tsdn; - - test_skip_if(!config_debug); - - witness_owner_error_orig = witness_owner_error; - witness_owner_error = witness_owner_error_intercept; - saw_owner_error = false; - - tsdn = tsdn_fetch(); - - witness_assert_lockless(tsdn); - - witness_init(&a, "a", 1, NULL); - - assert_false(saw_owner_error, "Unexpected owner error"); - witness_unlock(tsdn, &a); - assert_true(saw_owner_error, "Expected owner error"); - - witness_assert_lockless(tsdn); - - witness_owner_error = witness_owner_error_orig; -} -TEST_END - -TEST_BEGIN(test_witness_lockful) -{ - witness_t a; - tsdn_t *tsdn; - - test_skip_if(!config_debug); - - witness_lockless_error_orig = witness_lockless_error; - witness_lockless_error = witness_lockless_error_intercept; - saw_lockless_error = false; - - tsdn = tsdn_fetch(); - - witness_assert_lockless(tsdn); - - witness_init(&a, "a", 1, NULL); - - assert_false(saw_lockless_error, "Unexpected lockless error"); - witness_assert_lockless(tsdn); - - witness_lock(tsdn, &a); - witness_assert_lockless(tsdn); - assert_true(saw_lockless_error, "Expected lockless error"); - - witness_unlock(tsdn, &a); - - witness_assert_lockless(tsdn); - - witness_lockless_error = witness_lockless_error_orig; -} -TEST_END - -int -main(void) -{ - - return (test( - test_witness, - test_witness_comp, - test_witness_reversal, - test_witness_recursive, - test_witness_unlock_not_owned, - test_witness_lockful)); -} diff --git a/deps/jemalloc/test/unit/zero.c b/deps/jemalloc/test/unit/zero.c index 30ebe37a4..93afc2b87 100644 --- a/deps/jemalloc/test/unit/zero.c +++ b/deps/jemalloc/test/unit/zero.c @@ -8,41 +8,39 @@ const char *malloc_conf = static void test_zero(size_t sz_min, size_t sz_max) { - uint8_t *s; + char *s; size_t sz_prev, sz, i; -#define MAGIC ((uint8_t)0x61) sz_prev = 0; - s = (uint8_t *)mallocx(sz_min, 0); + s = (char *)mallocx(sz_min, 0); assert_ptr_not_null((void *)s, "Unexpected mallocx() failure"); for (sz = sallocx(s, 0); sz <= sz_max; sz_prev = sz, sz = sallocx(s, 0)) { if (sz_prev > 0) { - assert_u_eq(s[0], MAGIC, + assert_c_eq(s[0], 'a', "Previously allocated byte %zu/%zu is corrupted", ZU(0), sz_prev); - assert_u_eq(s[sz_prev-1], MAGIC, + assert_c_eq(s[sz_prev-1], 'a', "Previously allocated byte %zu/%zu is corrupted", sz_prev-1, sz_prev); } for (i = sz_prev; i < sz; i++) { - assert_u_eq(s[i], 0x0, + assert_c_eq(s[i], 0x0, "Newly allocated byte %zu/%zu isn't zero-filled", i, sz); - s[i] = MAGIC; + s[i] = 'a'; } if (xallocx(s, sz+1, 0, 0) == sz) { - s = (uint8_t *)rallocx(s, sz+1, 0); + s = (char *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); } } dallocx(s, 0); -#undef MAGIC } TEST_BEGIN(test_zero_small) From f07d53f7838666270d9da94663b526dac3ac11f1 Mon Sep 17 00:00:00 2001 From: oranagra Date: Mon, 30 Jan 2017 12:53:13 -0800 Subject: [PATCH 0322/1722] add test for active defrag --- tests/unit/memefficiency.tcl | 45 ++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index 7ca9a705b..12cb84584 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -35,3 +35,48 @@ start_server {tags {"memefficiency"}} { } } } + +start_server {tags {"defrag"}} { + if {[string match {*jemalloc*} [s mem_allocator]]} { + test "Actove defrag" { + r config set activedefrag no + r config set active-defrag-threshold-lower 5 + r config set active-defrag-ignore-bytes 2mb + r config set maxmemory 100mb + r config set maxmemory-policy allkeys-lru + r debug populate 700000 asdf 150 + r debug populate 170000 asdf 300 + set frag [s mem_fragmentation_ratio] + assert {$frag >= 1.7} + r config set activedefrag yes + after 1500 ;# active defrag tests the status once a second. + set hits [s active_defrag_hits] + + # wait for the active defrag to stop working + set tries 0 + while { True } { + incr tries + after 500 + set prev_hits $hits + set hits [s active_defrag_hits] + if {$hits == $prev_hits} { + break + } + assert {$tries < 100} + } + + # TODO: we need to expose more accurate fragmentation info + # i.e. the allocator used and active pages + # instead we currently look at RSS so we need to ask for purge + r memory purge + + # test the the fragmentation is lower and that the defragger stopped working + set frag [s mem_fragmentation_ratio] + assert {$frag < 1.4} + set misses [s active_defrag_misses] + after 500 + set misses2 [s active_defrag_misses] + assert {$misses2 == $misses} + } + } +} From 72dbb1d09547f1e95c7f2c40bd65c49093ef3704 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 22 Apr 2017 13:21:41 +0200 Subject: [PATCH 0323/1722] Defrag: fix test false positive. Apparently 1.4 is too low compared to what you get in certain setups (including mine). I raised it to 1.55 that hopefully is still enough to test that the fragmentation went down from 1.7 but without incurring in issues, however the test setup may be still fragile so certain times this may lead to false positives again, it's hard to test for these things in a determinsitic way. Related to #3786. --- tests/unit/memefficiency.tcl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index 12cb84584..5f0bed176 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -38,7 +38,7 @@ start_server {tags {"memefficiency"}} { start_server {tags {"defrag"}} { if {[string match {*jemalloc*} [s mem_allocator]]} { - test "Actove defrag" { + test "Active defrag" { r config set activedefrag no r config set active-defrag-threshold-lower 5 r config set active-defrag-ignore-bytes 2mb @@ -70,9 +70,10 @@ start_server {tags {"defrag"}} { # instead we currently look at RSS so we need to ask for purge r memory purge - # test the the fragmentation is lower and that the defragger stopped working + # Test the the fragmentation is lower and that the defragger + # stopped working set frag [s mem_fragmentation_ratio] - assert {$frag < 1.4} + assert {$frag < 1.55} set misses [s active_defrag_misses] after 500 set misses2 [s active_defrag_misses] From 86b0650a4d5d71b2a1018d1b2acf5bbf48352fc5 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 22 Apr 2017 15:59:53 +0200 Subject: [PATCH 0324/1722] Defrag: test currently disabled, too many false positives. Related to #3786. --- tests/unit/memefficiency.tcl | 80 ++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index 5f0bed176..f452f0224 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -36,48 +36,50 @@ start_server {tags {"memefficiency"}} { } } -start_server {tags {"defrag"}} { - if {[string match {*jemalloc*} [s mem_allocator]]} { - test "Active defrag" { - r config set activedefrag no - r config set active-defrag-threshold-lower 5 - r config set active-defrag-ignore-bytes 2mb - r config set maxmemory 100mb - r config set maxmemory-policy allkeys-lru - r debug populate 700000 asdf 150 - r debug populate 170000 asdf 300 - set frag [s mem_fragmentation_ratio] - assert {$frag >= 1.7} - r config set activedefrag yes - after 1500 ;# active defrag tests the status once a second. - set hits [s active_defrag_hits] - - # wait for the active defrag to stop working - set tries 0 - while { True } { - incr tries - after 500 - set prev_hits $hits +if 0 { + start_server {tags {"defrag"}} { + if {[string match {*jemalloc*} [s mem_allocator]]} { + test "Active defrag" { + r config set activedefrag no + r config set active-defrag-threshold-lower 5 + r config set active-defrag-ignore-bytes 2mb + r config set maxmemory 100mb + r config set maxmemory-policy allkeys-lru + r debug populate 700000 asdf 150 + r debug populate 170000 asdf 300 + set frag [s mem_fragmentation_ratio] + assert {$frag >= 1.7} + r config set activedefrag yes + after 1500 ;# active defrag tests the status once a second. set hits [s active_defrag_hits] - if {$hits == $prev_hits} { - break + + # wait for the active defrag to stop working + set tries 0 + while { True } { + incr tries + after 500 + set prev_hits $hits + set hits [s active_defrag_hits] + if {$hits == $prev_hits} { + break + } + assert {$tries < 100} } - assert {$tries < 100} + + # TODO: we need to expose more accurate fragmentation info + # i.e. the allocator used and active pages + # instead we currently look at RSS so we need to ask for purge + r memory purge + + # Test the the fragmentation is lower and that the defragger + # stopped working + set frag [s mem_fragmentation_ratio] + assert {$frag < 1.55} + set misses [s active_defrag_misses] + after 500 + set misses2 [s active_defrag_misses] + assert {$misses2 == $misses} } - - # TODO: we need to expose more accurate fragmentation info - # i.e. the allocator used and active pages - # instead we currently look at RSS so we need to ask for purge - r memory purge - - # Test the the fragmentation is lower and that the defragger - # stopped working - set frag [s mem_fragmentation_ratio] - assert {$frag < 1.55} - set misses [s active_defrag_misses] - after 500 - set misses2 [s active_defrag_misses] - assert {$misses2 == $misses} } } } From 0ee4c2743f4b81dde0dd5c13e020c459c3d206af Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 27 Apr 2017 17:04:07 +0200 Subject: [PATCH 0325/1722] PSYNC2: fix master cleanup when caching it. The master client cleanup was incomplete: resetClient() was missing and the output buffer of the client was not reset, so pending commands related to the previous connection could be still sent. The first problem caused the client argument vector to be, at times, half populated, so that when the correct replication stream arrived the protcol got mixed to the arugments creating invalid commands that nobody called. Thanks to @yangsiran for also investigating this problem, after already providing important design / implementation hints for the original PSYNC2 issues (see referenced Github issue). Note that this commit adds a new function to the list library of Redis in order to be able to reset a list without destroying it. Related to issue #3899. --- src/adlist.c | 16 ++++++++++++---- src/adlist.h | 1 + src/replication.c | 10 +++++++--- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/adlist.c b/src/adlist.c index f171d3ecc..2fb61a6f1 100644 --- a/src/adlist.c +++ b/src/adlist.c @@ -52,10 +52,8 @@ list *listCreate(void) return list; } -/* Free the whole list. - * - * This function can't fail. */ -void listRelease(list *list) +/* Remove all the elements from the list without destroying the list itself. */ +void listEmpty(list *list) { unsigned long len; listNode *current, *next; @@ -68,6 +66,16 @@ void listRelease(list *list) zfree(current); current = next; } + list->head = list->tail = NULL; + list->len = 0; +} + +/* Free the whole list. + * + * This function can't fail. */ +void listRelease(list *list) +{ + listEmpty(list); zfree(list); } diff --git a/src/adlist.h b/src/adlist.h index be322552f..e457a979e 100644 --- a/src/adlist.h +++ b/src/adlist.h @@ -72,6 +72,7 @@ typedef struct list { /* Prototypes */ list *listCreate(void); void listRelease(list *list); +void listEmpty(list *list); list *listAddNodeHead(list *list, void *value); list *listAddNodeTail(list *list, void *value); list *listInsertNode(list *list, listNode *old_node, void *value, int after); diff --git a/src/replication.c b/src/replication.c index 1828eb8bf..6be5d2631 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2119,13 +2119,17 @@ void replicationCacheMaster(client *c) { /* Unlink the client from the server structures. */ unlinkClient(c); - /* Fix the master specific fields: we want to discard to non processed - * query buffers and non processed offsets, including pending - * transactions. */ + /* Reset the master client so that's ready to accept new commands: + * we want to discard te non processed query buffers and non processed + * offsets, including pending transactions, already populated arguments, + * pending outputs to the master. */ sdsclear(server.master->querybuf); sdsclear(server.master->pending_querybuf); server.master->read_reploff = server.master->reploff; if (c->flags & CLIENT_MULTI) discardTransaction(c); + listEmpty(c->reply); + c->bufpos = 0; + resetClient(c); /* Save the master. Server.master will be set to null later by * replicationHandleMasterDisconnection(). */ From 167765ed327452f4df3fd63df6fa0bb635af456a Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 28 Apr 2017 10:37:07 +0200 Subject: [PATCH 0326/1722] Regression test for PSYNC2 issue #3899 added. Experimentally verified that it can trigger the issue reverting the fix. At least on my system... Being the bug time/backlog dependant, it is very hard to tell if this test will be able to trigger the problem consistently, however even if it triggers the problem once in a while, we'll see it in the CI environment at http://ci.redis.io. --- tests/integration/psync2-reg.tcl | 61 ++++++++++++++++++++++++++++++++ tests/test_helper.tcl | 1 + 2 files changed, 62 insertions(+) create mode 100644 tests/integration/psync2-reg.tcl diff --git a/tests/integration/psync2-reg.tcl b/tests/integration/psync2-reg.tcl new file mode 100644 index 000000000..d41be51b8 --- /dev/null +++ b/tests/integration/psync2-reg.tcl @@ -0,0 +1,61 @@ +# Issue 3899 regression test. +# We create a chain of three instances: master -> slave -> slave2 +# and continuously break the link while traffic is generated by +# redis-benchmark. At the end we check that the data is the same +# everywhere. + +start_server {tags {"psync2"}} { +start_server {} { +start_server {} { + set start_time [clock seconds] ; # Test start time + + # Config + set debug_msg 0 ; # Enable additional debug messages + + set duration 20 ; # Total test seconds + + for {set j 0} {$j < 3} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"} + } + + # Setup the replication and backlog parameters + $R(1) slaveof $R_host(0) $R_port(0) + $R(2) slaveof $R_host(0) $R_port(0) + $R(0) set foo bar + wait_for_condition 50 1000 { + [$R(1) dbsize] == 1 && [$R(2) dbsize] == 1 + } else { + fail "Slaves not replicating from master" + } + + $R(0) config set repl-backlog-size 10mb + $R(1) config set repl-backlog-size 10mb + + set cycle_start_time [clock milliseconds] + set bench_pid [exec src/redis-benchmark -p $R_port(0) -n 10000000 -r 1000 incr __rand_int__ > /dev/null &] + while 1 { + set elapsed [expr {[clock milliseconds]-$cycle_start_time}] + if {$elapsed > $duration*1000} break + if {rand() < .05} { + test "Kill first slave link with the master" { + $R(1) client kill type master + } + } + if {rand() < .05} { + test "Kill chained slave link with the master" { + $R(2) client kill type master + } + } + after 100 + } + + wait_for_condition 50 1000 { + ([$R(0) debug digest] eq [$R(1) debug digest]) && + ([$R(1) debug digest] eq [$R(2) debug digest]) + } else { + fail "The three instances have different data sets" + } +}}} diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index fdfe6a01b..41c867803 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -42,6 +42,7 @@ set ::all_tests { integration/convert-zipmap-hash-on-load integration/logging integration/psync2 + integration/psync2-reg unit/pubsub unit/slowlog unit/scripting From 78cfb8474b236564c97bf8a4d814111087316aa2 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 28 Apr 2017 11:16:37 +0200 Subject: [PATCH 0327/1722] Regression test for #3899 fixed. --- tests/integration/psync2-reg.tcl | 55 +++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/tests/integration/psync2-reg.tcl b/tests/integration/psync2-reg.tcl index d41be51b8..ba610a3b8 100644 --- a/tests/integration/psync2-reg.tcl +++ b/tests/integration/psync2-reg.tcl @@ -7,11 +7,11 @@ start_server {tags {"psync2"}} { start_server {} { start_server {} { - set start_time [clock seconds] ; # Test start time - # Config set debug_msg 0 ; # Enable additional debug messages + set no_exit 0 ; # Do not exit at end of the test + set duration 20 ; # Total test seconds for {set j 0} {$j < 3} {incr j} { @@ -22,40 +22,57 @@ start_server {} { } # Setup the replication and backlog parameters - $R(1) slaveof $R_host(0) $R_port(0) - $R(2) slaveof $R_host(0) $R_port(0) - $R(0) set foo bar - wait_for_condition 50 1000 { - [$R(1) dbsize] == 1 && [$R(2) dbsize] == 1 - } else { - fail "Slaves not replicating from master" + test "PSYNC2 #3899 regression: setup" { + $R(1) slaveof $R_host(0) $R_port(0) + $R(2) slaveof $R_host(0) $R_port(0) + $R(0) set foo bar + wait_for_condition 50 1000 { + [$R(1) dbsize] == 1 && [$R(2) dbsize] == 1 + } else { + fail "Slaves not replicating from master" + } + $R(0) config set repl-backlog-size 10mb + $R(1) config set repl-backlog-size 10mb } - $R(0) config set repl-backlog-size 10mb - $R(1) config set repl-backlog-size 10mb - set cycle_start_time [clock milliseconds] set bench_pid [exec src/redis-benchmark -p $R_port(0) -n 10000000 -r 1000 incr __rand_int__ > /dev/null &] while 1 { set elapsed [expr {[clock milliseconds]-$cycle_start_time}] if {$elapsed > $duration*1000} break if {rand() < .05} { - test "Kill first slave link with the master" { + test "PSYNC2 #3899 regression: kill first slave" { $R(1) client kill type master } } if {rand() < .05} { - test "Kill chained slave link with the master" { + test "PSYNC2 #3899 regression: kill chained slave" { $R(2) client kill type master } } after 100 } + exec kill -9 $bench_pid - wait_for_condition 50 1000 { - ([$R(0) debug digest] eq [$R(1) debug digest]) && - ([$R(1) debug digest] eq [$R(2) debug digest]) - } else { - fail "The three instances have different data sets" + if {$debug_msg} { + for {set j 0} {$j < 100} {incr j} { + if { + [$R(0) debug digest] == [$R(1) debug digest] && + [$R(1) debug digest] == [$R(2) debug digest] + } break + puts [$R(0) debug digest] + puts [$R(1) debug digest] + puts [$R(2) debug digest] + after 1000 + } + } + + test "PSYNC2 #3899 regression: verify consistency" { + wait_for_condition 50 1000 { + ([$R(0) debug digest] eq [$R(1) debug digest]) && + ([$R(1) debug digest] eq [$R(2) debug digest]) + } else { + fail "The three instances have different data sets" + } } }}} From 2d1ae6f06d6a5c5bd3fcf2a25f1386dfcacc4894 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 28 Apr 2017 18:41:10 +0200 Subject: [PATCH 0328/1722] Modules TSC: GIL and cooperative multi tasking setup. --- src/module.c | 28 +++++++++++++++++++++++++++- src/server.c | 3 +++ src/server.h | 1 + 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index c81330b63..cbc6ff627 100644 --- a/src/module.c +++ b/src/module.c @@ -105,7 +105,7 @@ struct RedisModuleCtx { int flags; /* REDISMODULE_CTX_... flags. */ void **postponed_arrays; /* To set with RM_ReplySetArrayLength(). */ int postponed_arrays_count; /* Number of entries in postponed_arrays. */ - void *blocked_privdata; /* Privdata set when unblocking a clinet. */ + void *blocked_privdata; /* Privdata set when unblocking a client. */ /* Used if there is the REDISMODULE_CTX_KEYS_POS_REQUEST flag set. */ int *keys_pos; @@ -203,6 +203,10 @@ typedef struct RedisModuleBlockedClient { static pthread_mutex_t moduleUnblockedClientsMutex = PTHREAD_MUTEX_INITIALIZER; static list *moduleUnblockedClients; +/* We need a mutex that is unlocked / relocked in beforeSleep() in order to + * allow thread safe contexts to execute commands at a safe moment. */ +static pthread_mutex_t moduleGIL = PTHREAD_MUTEX_INITIALIZER; + /* -------------------------------------------------------------------------- * Prototypes * -------------------------------------------------------------------------- */ @@ -3278,6 +3282,24 @@ void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx) { return ctx->blocked_privdata; } +/* -------------------------------------------------------------------------- + * Thread Safe Contexts + * -------------------------------------------------------------------------- */ + +/* Operations executed in thread safe contexts use a global lock in order to + * be ran at a safe time. This function unlocks and re-acquire the locks: + * hopefully with *any* sane implementation of pthreads, this will allow the + * modules to make progresses. + * + * This function is called in beforeSleep(). */ +void moduleCooperativeMultiTaskingCycle(void) { + if (dictSize(modules) == 0) return; /* No modules, no async ops. */ + pthread_mutex_unlock(&moduleGIL); + /* Here hopefully thread modules waiting to be executed at a safe time + * should be able to acquire the lock. */ + pthread_mutex_lock(&moduleGIL); +} + /* -------------------------------------------------------------------------- * Modules API internals * -------------------------------------------------------------------------- */ @@ -3329,6 +3351,10 @@ void moduleInitModulesSystem(void) { * and we do not want to block not in the read nor in the write half. */ anetNonBlock(NULL,server.module_blocked_pipe[0]); anetNonBlock(NULL,server.module_blocked_pipe[1]); + + /* Our thread-safe contexts GIL must start with already locked: + * it is just unlocked when it's safe. */ + pthread_mutex_lock(&moduleGIL); } /* Load all the modules in the server.loadmodule_queue list, which is diff --git a/src/server.c b/src/server.c index 72914c53b..e9013bf60 100644 --- a/src/server.c +++ b/src/server.c @@ -1172,6 +1172,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { void beforeSleep(struct aeEventLoop *eventLoop) { UNUSED(eventLoop); + /* Give some run time to modules threads using thread safe contexts. */ + moduleCooperativeMultiTaskingCycle(); + /* Call the Redis Cluster before sleep function. Note that this function * may change the state of Redis Cluster (from ok to fail or vice versa), * so it's a good idea to call it before serving the unblocked clients diff --git a/src/server.h b/src/server.h index 8cc172149..956370296 100644 --- a/src/server.h +++ b/src/server.h @@ -1294,6 +1294,7 @@ void unblockClientFromModule(client *c); void moduleHandleBlockedClients(void); void moduleBlockedClientTimedOut(client *c); void moduleBlockedClientPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask); +void moduleCooperativeMultiTaskingCycle(void); /* Utils */ long long ustime(void); From 161772068137dabccbe66832e0f9bd9dc9d5afc2 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 2 May 2017 12:53:10 +0200 Subject: [PATCH 0329/1722] Modules TSC: Basic TS context creeation and handling. --- src/module.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index cbc6ff627..171887ff3 100644 --- a/src/module.c +++ b/src/module.c @@ -95,10 +95,15 @@ typedef struct RedisModulePoolAllocBlock { * * Note that not all the context structure is always filled with actual values * but only the fields needed in a given context. */ + +struct RedisModuleBlockedClient; + struct RedisModuleCtx { void *getapifuncptr; /* NOTE: Must be the first field. */ struct RedisModule *module; /* Module reference. */ client *client; /* Client calling a command. */ + struct RedisModuleBlockedClient *blocked_client; /* Blocked client for + thread safe context. */ struct AutoMemEntry *amqueue; /* Auto memory queue of objects to free. */ int amqueue_len; /* Number of slots in amqueue. */ int amqueue_used; /* Number of used slots in amqueue. */ @@ -115,12 +120,13 @@ struct RedisModuleCtx { }; typedef struct RedisModuleCtx RedisModuleCtx; -#define REDISMODULE_CTX_INIT {(void*)(unsigned long)&RM_GetApi, NULL, NULL, NULL, 0, 0, 0, NULL, 0, NULL, NULL, 0, NULL} +#define REDISMODULE_CTX_INIT {(void*)(unsigned long)&RM_GetApi, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, 0, NULL, NULL, 0, NULL} #define REDISMODULE_CTX_MULTI_EMITTED (1<<0) #define REDISMODULE_CTX_AUTO_MEMORY (1<<1) #define REDISMODULE_CTX_KEYS_POS_REQUEST (1<<2) #define REDISMODULE_CTX_BLOCKED_REPLY (1<<3) #define REDISMODULE_CTX_BLOCKED_TIMEOUT (1<<4) +#define REDISMODULE_CTX_THREAD_SAFE (1<<5) /* This represents a Redis key opened with RM_OpenKey(). */ struct RedisModuleKey { @@ -198,6 +204,8 @@ typedef struct RedisModuleBlockedClient { void *privdata; /* Module private data that may be used by the reply or timeout callback. It is set via the RedisModule_UnblockClient() API. */ + client *reply_client; /* Fake client used to accumulate replies + in thread safe contexts. */ } RedisModuleBlockedClient; static pthread_mutex_t moduleUnblockedClientsMutex = PTHREAD_MUTEX_INITIALIZER; @@ -3165,6 +3173,7 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc bc->timeout_callback = timeout_callback; bc->free_privdata = free_privdata; bc->privdata = NULL; + bc->reply_client = NULL; c->bpop.timeout = timeout_ms ? (mstime()+timeout_ms) : 0; blockClient(c,BLOCKED_MODULE); @@ -3300,6 +3309,56 @@ void moduleCooperativeMultiTaskingCycle(void) { pthread_mutex_lock(&moduleGIL); } +/* Return a context which can be used inside threads to make Redis context + * calls with certain modules APIs. If 'bc' is not NULL then the module will + * be bound to a blocked client, and it will be possible to use the + * `RedisModule_Reply*` family of functions to accumulate a reply for when the + * client will be unblocked. Otherwise the thread safe context will be + * detached by a specific client. + * + * To call non-reply APIs, the thread safe context must be prepared with: + * + * RedisModule_ThreadSafeCallStart(ctx); + * ... make your call here ... + * RedisModule_ThreadSafeCallStop(ctx); + * + * This is not needed when using `RedisModule_Reply*` functions, assuming + * that a blocked client was used when the context was created, otherwise + * no RedisModule_Reply* call should be made at all. + */ +RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) { + RedisModuleCtx *ctx = zmalloc(sizeof(*ctx)); + RedisModuleCtx empty = REDISMODULE_CTX_INIT; + memcpy(ctx,&empty,sizeof(empty)); + if (bc) { + ctx->blocked_client = bc; + if (bc->reply_client == NULL) + bc->reply_client = createClient(-1); + } + ctx->flags |= REDISMODULE_CTX_THREAD_SAFE; + return ctx; +} + +/* Release a thread safe context. */ +void RM_FreeThreadSafeContext(RedisModuleCtx *ctx) { + moduleFreeContext(ctx); + zfree(ctx); +} + +/* Acquire the server lock before executing a thread safe API call. + * This is not needed for `RedisModule_Reply*` calls when there is + * a blocked client connected to the thread safe context. */ +void RM_ThreadSafeContextLock(RedisModuleCtx *ctx) { + DICT_NOTUSED(ctx); + pthread_mutex_lock(&moduleGIL); +} + +/* Release the server lock after a thread safe API call was executed. */ +void RM_ThreadSafeContextUnlock(RedisModuleCtx *ctx) { + DICT_NOTUSED(ctx); + pthread_mutex_unlock(&moduleGIL); +} + /* -------------------------------------------------------------------------- * Modules API internals * -------------------------------------------------------------------------- */ From 59375337044aff0fb2706b79ee795f7ec8bbd80d Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 2 May 2017 15:05:39 +0200 Subject: [PATCH 0330/1722] Modules TSC: Handling of RM_Reply* functions. --- src/adlist.c | 12 ++++++++ src/adlist.h | 1 + src/module.c | 83 +++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 82 insertions(+), 14 deletions(-) diff --git a/src/adlist.c b/src/adlist.c index 2fb61a6f1..96575c72e 100644 --- a/src/adlist.c +++ b/src/adlist.c @@ -341,3 +341,15 @@ void listRotate(list *list) { tail->next = list->head; list->head = tail; } + +/* Add all the elements of the list 'o' at the end of the + * list 'l'. The list 'other' remains empty but otherwise valid. */ +void listJoin(list *l, list *o) { + l->tail->next = o->head; + o->head->prev = l->tail; + l->tail = o->tail; + + /* Setup other as an empty list. */ + o->head = l->tail = NULL; + o->len = 0; +} diff --git a/src/adlist.h b/src/adlist.h index e457a979e..c954fac87 100644 --- a/src/adlist.h +++ b/src/adlist.h @@ -86,6 +86,7 @@ listNode *listIndex(list *list, long index); void listRewind(list *list, listIter *li); void listRewindTail(list *list, listIter *li); void listRotate(list *list); +void listJoin(list *l, list *o); /* Directions for iterators */ #define AL_START_HEAD 0 diff --git a/src/module.c b/src/module.c index 171887ff3..5a6189ebc 100644 --- a/src/module.c +++ b/src/module.c @@ -953,10 +953,31 @@ int RM_WrongArity(RedisModuleCtx *ctx) { return REDISMODULE_OK; } +/* Return the client object the `RM_Reply*` functions should target. + * Normally this is just `ctx->client`, that is the client that called + * the module command, however in the case of thread safe contexts there + * is no directly associated client (since it would not be safe to access + * the client from a thread), so instead the blocked client object referenced + * in the thread safe context, has a fake client that we just use to accumulate + * the replies. Later, when the client is unblocked, the accumulated replies + * are appended to the actual client. + * + * The function returns the client pointer depending on the context, or + * NULL if there is no potential client. This happens when we are in the + * context of a thread safe context that was not initialized with a blocked + * client object. */ +client *moduleGetReplyClient(RedisModuleCtx *ctx) { + if (ctx->client) return ctx->client; + if (ctx->blocked_client) return ctx->blocked_client->reply_client; + return NULL; +} + /* Send an integer reply to the client, with the specified long long value. * The function always returns REDISMODULE_OK. */ int RM_ReplyWithLongLong(RedisModuleCtx *ctx, long long ll) { - addReplyLongLong(ctx->client,ll); + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return REDISMODULE_OK; + addReplyLongLong(c,ll); return REDISMODULE_OK; } @@ -964,10 +985,12 @@ int RM_ReplyWithLongLong(RedisModuleCtx *ctx, long long ll) { * ReplyWithSimpleString() and ReplyWithError(). * The function always returns REDISMODULE_OK. */ int replyWithStatus(RedisModuleCtx *ctx, const char *msg, char *prefix) { + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return REDISMODULE_OK; sds strmsg = sdsnewlen(prefix,1); strmsg = sdscat(strmsg,msg); strmsg = sdscatlen(strmsg,"\r\n",2); - addReplySds(ctx->client,strmsg); + addReplySds(c,strmsg); return REDISMODULE_OK; } @@ -1010,14 +1033,16 @@ int RM_ReplyWithSimpleString(RedisModuleCtx *ctx, const char *msg) { * * The function always returns REDISMODULE_OK. */ int RM_ReplyWithArray(RedisModuleCtx *ctx, long len) { + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return REDISMODULE_OK; if (len == REDISMODULE_POSTPONED_ARRAY_LEN) { ctx->postponed_arrays = zrealloc(ctx->postponed_arrays,sizeof(void*)* (ctx->postponed_arrays_count+1)); ctx->postponed_arrays[ctx->postponed_arrays_count] = - addDeferredMultiBulkLength(ctx->client); + addDeferredMultiBulkLength(c); ctx->postponed_arrays_count++; } else { - addReplyMultiBulkLen(ctx->client,len); + addReplyMultiBulkLen(c,len); } return REDISMODULE_OK; } @@ -1049,6 +1074,8 @@ int RM_ReplyWithArray(RedisModuleCtx *ctx, long len) { * that is not easy to calculate in advance the number of elements. */ void RM_ReplySetArrayLength(RedisModuleCtx *ctx, long len) { + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return; if (ctx->postponed_arrays_count == 0) { serverLog(LL_WARNING, "API misuse detected in module %s: " @@ -1058,7 +1085,7 @@ void RM_ReplySetArrayLength(RedisModuleCtx *ctx, long len) { return; } ctx->postponed_arrays_count--; - setDeferredMultiBulkLength(ctx->client, + setDeferredMultiBulkLength(c, ctx->postponed_arrays[ctx->postponed_arrays_count], len); if (ctx->postponed_arrays_count == 0) { @@ -1071,7 +1098,9 @@ void RM_ReplySetArrayLength(RedisModuleCtx *ctx, long len) { * * The function always returns REDISMODULE_OK. */ int RM_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len) { - addReplyBulkCBuffer(ctx->client,(char*)buf,len); + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return REDISMODULE_OK; + addReplyBulkCBuffer(c,(char*)buf,len); return REDISMODULE_OK; } @@ -1079,7 +1108,9 @@ int RM_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len) { * * The function always returns REDISMODULE_OK. */ int RM_ReplyWithString(RedisModuleCtx *ctx, RedisModuleString *str) { - addReplyBulk(ctx->client,str); + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return REDISMODULE_OK; + addReplyBulk(c,str); return REDISMODULE_OK; } @@ -1088,7 +1119,9 @@ int RM_ReplyWithString(RedisModuleCtx *ctx, RedisModuleString *str) { * * The function always returns REDISMODULE_OK. */ int RM_ReplyWithNull(RedisModuleCtx *ctx) { - addReply(ctx->client,shared.nullbulk); + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return REDISMODULE_OK; + addReply(c,shared.nullbulk); return REDISMODULE_OK; } @@ -1099,8 +1132,10 @@ int RM_ReplyWithNull(RedisModuleCtx *ctx) { * * The function always returns REDISMODULE_OK. */ int RM_ReplyWithCallReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply) { + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return REDISMODULE_OK; sds proto = sdsnewlen(reply->proto, reply->protolen); - addReplySds(ctx->client,proto); + addReplySds(c,proto); return REDISMODULE_OK; } @@ -1111,7 +1146,9 @@ int RM_ReplyWithCallReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply) { * * The function always returns REDISMODULE_OK. */ int RM_ReplyWithDouble(RedisModuleCtx *ctx, double d) { - addReplyDouble(ctx->client,d); + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return REDISMODULE_OK; + addReplyDouble(c,d); return REDISMODULE_OK; } @@ -3173,7 +3210,8 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc bc->timeout_callback = timeout_callback; bc->free_privdata = free_privdata; bc->privdata = NULL; - bc->reply_client = NULL; + bc->reply_client = createClient(-1); + bc->reply_client->flags |= CLIENT_MODULE; c->bpop.timeout = timeout_ms ? (mstime()+timeout_ms) : 0; blockClient(c,BLOCKED_MODULE); @@ -3236,7 +3274,9 @@ void moduleHandleBlockedClients(void) { /* Release the lock during the loop, as long as we don't * touch the shared list. */ - if (c != NULL && bc->reply_callback != NULL) { + /* Call the reply callback if the client is valid and we have + * any callback. */ + if (c && bc->reply_callback) { RedisModuleCtx ctx = REDISMODULE_CTX_INIT; ctx.flags |= REDISMODULE_CTX_BLOCKED_REPLY; ctx.blocked_privdata = bc->privdata; @@ -3246,8 +3286,24 @@ void moduleHandleBlockedClients(void) { moduleHandlePropagationAfterCommandCallback(&ctx); moduleFreeContext(&ctx); } + + /* Free privdata if any. */ if (bc->privdata && bc->free_privdata) bc->free_privdata(bc->privdata); + + /* It is possible that this blocked client object accumulated + * replies to send to the client in a thread safe context. + * We need to glue such replies to the client output buffer and + * free the temporary client we just used for the replies. */ + if (c) { + if (bc->reply_client->bufpos) + addReplyString(c,bc->reply_client->buf, + bc->reply_client->bufpos); + if (listLength(bc->reply_client->reply)) + listJoin(c->reply,bc->reply_client->reply); + } + freeClient(bc->reply_client); + if (c != NULL) unblockClient(c); /* Free 'bc' only after unblocking the client, since it is * referenced in the client blocking context, and must be valid @@ -3332,8 +3388,7 @@ RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) { memcpy(ctx,&empty,sizeof(empty)); if (bc) { ctx->blocked_client = bc; - if (bc->reply_client == NULL) - bc->reply_client = createClient(-1); + ctx->module = bc->module; } ctx->flags |= REDISMODULE_CTX_THREAD_SAFE; return ctx; From 346677a10c2ab3f6a817622ab1b7eff84a40ce29 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 2 May 2017 15:19:28 +0200 Subject: [PATCH 0331/1722] Modules TSC: Export symbols of the new API. --- src/module.c | 4 ++++ src/redismodule.h | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/src/module.c b/src/module.c index 5a6189ebc..e9b95f974 100644 --- a/src/module.c +++ b/src/module.c @@ -3760,4 +3760,8 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(GetBlockedClientPrivateData); REGISTER_API(AbortBlock); REGISTER_API(Milliseconds); + REGISTER_API(GetThreadSafeContext); + REGISTER_API(FreeThreadSafeContext); + REGISTER_API(ThreadSafeContextLock); + REGISTER_API(ThreadSafeContextUnlock); } diff --git a/src/redismodule.h b/src/redismodule.h index fcd6aeca3..2f2e3c923 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -214,6 +214,10 @@ int REDISMODULE_API_FUNC(RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ct void *REDISMODULE_API_FUNC(RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx); int REDISMODULE_API_FUNC(RedisModule_AbortBlock)(RedisModuleBlockedClient *bc); long long REDISMODULE_API_FUNC(RedisModule_Milliseconds)(void); +RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetThreadSafeContext)(RedisModuleBlockedClient *bc); +void REDISMODULE_API_FUNC(RedisModule_FreeThreadSafeContext)(RedisModuleCtx *ctx); +void REDISMODULE_API_FUNC(RedisModule_ThreadSafeContextLock)(RedisModuleCtx *ctx); +void REDISMODULE_API_FUNC(RedisModule_ThreadSafeContextUnlock)(RedisModuleCtx *ctx); /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); @@ -322,6 +326,10 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(GetBlockedClientPrivateData); REDISMODULE_GET_API(AbortBlock); REDISMODULE_GET_API(Milliseconds); + REDISMODULE_GET_API(GetThreadSafeContext); + REDISMODULE_GET_API(FreeThreadSafeContext); + REDISMODULE_GET_API(ThreadSafeContextLock); + REDISMODULE_GET_API(ThreadSafeContextUnlock); RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; From 441c323498e5524c8d92d0f51e3e00a6751418a6 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 May 2017 11:26:21 +0200 Subject: [PATCH 0332/1722] Modules TSC: Release the GIL for all the time we are blocked. Instead of giving the module background operations just a small time to run in the beforeSleep() function, we can have the lock released for all the time we are blocked in the multiplexing syscall. --- src/ae.c | 12 +++++++++++ src/ae.h | 2 ++ src/module.c | 45 ++++++++++++++++++++++++---------------- src/modules/helloblock.c | 42 +++++++++++++++++++++++++++++++++++++ src/server.c | 17 ++++++++++++--- src/server.h | 4 +++- 6 files changed, 100 insertions(+), 22 deletions(-) diff --git a/src/ae.c b/src/ae.c index e66808a81..ecbaa94f3 100644 --- a/src/ae.c +++ b/src/ae.c @@ -75,6 +75,7 @@ aeEventLoop *aeCreateEventLoop(int setsize) { eventLoop->stop = 0; eventLoop->maxfd = -1; eventLoop->beforesleep = NULL; + eventLoop->aftersleep = NULL; if (aeApiCreate(eventLoop) == -1) goto err; /* Events with mask == AE_NONE are not set. So let's initialize the * vector with it. */ @@ -397,7 +398,14 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags) } } + /* Call the multiplexing API, will return only on timeout or when + * some event fires. */ numevents = aeApiPoll(eventLoop, tvp); + + /* After sleep callback. */ + if (eventLoop->aftersleep != NULL) + eventLoop->aftersleep(eventLoop); + for (j = 0; j < numevents; j++) { aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd]; int mask = eventLoop->fired[j].mask; @@ -463,3 +471,7 @@ char *aeGetApiName(void) { void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) { eventLoop->beforesleep = beforesleep; } + +void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep) { + eventLoop->aftersleep = aftersleep; +} diff --git a/src/ae.h b/src/ae.h index 827c4c9e4..e3617759b 100644 --- a/src/ae.h +++ b/src/ae.h @@ -98,6 +98,7 @@ typedef struct aeEventLoop { int stop; void *apidata; /* This is used for polling API specific data */ aeBeforeSleepProc *beforesleep; + aeBeforeSleepProc *aftersleep; } aeEventLoop; /* Prototypes */ @@ -117,6 +118,7 @@ int aeWait(int fd, int mask, long long milliseconds); void aeMain(aeEventLoop *eventLoop); char *aeGetApiName(void); void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep); +void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep); int aeGetSetSize(aeEventLoop *eventLoop); int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); diff --git a/src/module.c b/src/module.c index e9b95f974..9b78a4a56 100644 --- a/src/module.c +++ b/src/module.c @@ -433,6 +433,7 @@ void moduleFreeContext(RedisModuleCtx *ctx) { "calls.", ctx->module->name); } + if (ctx->flags & REDISMODULE_CTX_THREAD_SAFE) freeClient(ctx->client); } /* Helper function for when a command callback is called, in order to handle @@ -967,8 +968,10 @@ int RM_WrongArity(RedisModuleCtx *ctx) { * context of a thread safe context that was not initialized with a blocked * client object. */ client *moduleGetReplyClient(RedisModuleCtx *ctx) { - if (ctx->client) return ctx->client; - if (ctx->blocked_client) return ctx->blocked_client->reply_client; + if (!(ctx->flags & REDISMODULE_CTX_THREAD_SAFE) && ctx->client) + return ctx->client; + if (ctx->blocked_client) + return ctx->blocked_client->reply_client; return NULL; } @@ -3351,20 +3354,6 @@ void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx) { * Thread Safe Contexts * -------------------------------------------------------------------------- */ -/* Operations executed in thread safe contexts use a global lock in order to - * be ran at a safe time. This function unlocks and re-acquire the locks: - * hopefully with *any* sane implementation of pthreads, this will allow the - * modules to make progresses. - * - * This function is called in beforeSleep(). */ -void moduleCooperativeMultiTaskingCycle(void) { - if (dictSize(modules) == 0) return; /* No modules, no async ops. */ - pthread_mutex_unlock(&moduleGIL); - /* Here hopefully thread modules waiting to be executed at a safe time - * should be able to acquire the lock. */ - pthread_mutex_lock(&moduleGIL); -} - /* Return a context which can be used inside threads to make Redis context * calls with certain modules APIs. If 'bc' is not NULL then the module will * be bound to a blocked client, and it will be possible to use the @@ -3381,7 +3370,9 @@ void moduleCooperativeMultiTaskingCycle(void) { * This is not needed when using `RedisModule_Reply*` functions, assuming * that a blocked client was used when the context was created, otherwise * no RedisModule_Reply* call should be made at all. - */ + * + * TODO: thread safe contexts do not inherit the blocked client + * selected database. */ RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) { RedisModuleCtx *ctx = zmalloc(sizeof(*ctx)); RedisModuleCtx empty = REDISMODULE_CTX_INIT; @@ -3391,6 +3382,11 @@ RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) { ctx->module = bc->module; } ctx->flags |= REDISMODULE_CTX_THREAD_SAFE; + /* Even when the context is associated with a blocked client, we can't + * access it safely from another thread, so we create a fake client here + * in order to keep things like the currently selected database and similar + * things. */ + ctx->client = createClient(-1); return ctx; } @@ -3405,12 +3401,20 @@ void RM_FreeThreadSafeContext(RedisModuleCtx *ctx) { * a blocked client connected to the thread safe context. */ void RM_ThreadSafeContextLock(RedisModuleCtx *ctx) { DICT_NOTUSED(ctx); - pthread_mutex_lock(&moduleGIL); + moduleAcquireGIL(); } /* Release the server lock after a thread safe API call was executed. */ void RM_ThreadSafeContextUnlock(RedisModuleCtx *ctx) { DICT_NOTUSED(ctx); + moduleReleaseGIL(); +} + +void moduleAcquireGIL(void) { + pthread_mutex_lock(&moduleGIL); +} + +void moduleReleaseGIL(void) { pthread_mutex_unlock(&moduleGIL); } @@ -3655,6 +3659,11 @@ void moduleCommand(client *c) { } } +/* Return the number of registered modules. */ +size_t moduleCount(void) { + return dictSize(modules); +} + /* Register all the APIs we export. Keep this function at the end of the * file so that's easy to seek it to add new entries. */ void moduleRegisterCoreAPI(void) { diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c index 71ec9b121..e760e33fb 100644 --- a/src/modules/helloblock.c +++ b/src/modules/helloblock.c @@ -105,6 +105,45 @@ int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a return REDISMODULE_OK; } +/* The thread entry point that actually executes the blocking part + * of the command HELLO.KEYS. */ +void *HelloKeys_ThreadMain(void *arg) { + RedisModuleBlockedClient *bc = arg; + RedisModuleCtx *ctx = RedisModule_GetThreadSafeContext(bc); + + RedisModule_ThreadSafeContextLock(ctx); + RedisModule_ReplyWithLongLong(ctx,1234); + RedisModule_ThreadSafeContextUnlock(ctx); + + RedisModule_UnblockClient(bc,NULL); + return NULL; +} + +/* HELLO.KEYS -- Return all the keys in the current database without blocking + * the server. The keys do not represent a point-in-time state so only the keys + * that were in the database from the start to the end are guaranteed to be + * there. */ +int HelloKeys_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + if (argc != 1) return RedisModule_WrongArity(ctx); + + pthread_t tid; + + /* Note that when blocking the client we do not set any callback: no + * timeout is possible since we passed '0', nor we need a reply callback + * because we'll use the thread safe context to accumulate a reply. */ + RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,NULL,NULL,NULL,0); + + /* Now that we setup a blocking client, we need to pass the control + * to the thread. However we need to pass arguments to the thread: + * the reference to the blocked client handle. */ + if (pthread_create(&tid,NULL,HelloKeys_ThreadMain,bc) != 0) { + RedisModule_AbortBlock(bc); + return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread"); + } + return REDISMODULE_OK; +} + /* This function must be present on each Redis module. It is used in order to * register the commands into the Redis server. */ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { @@ -117,6 +156,9 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) if (RedisModule_CreateCommand(ctx,"hello.block", HelloBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR) return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,"hello.keys", + HelloKeys_RedisCommand,"",0,0,0) == REDISMODULE_ERR) + return REDISMODULE_ERR; return REDISMODULE_OK; } diff --git a/src/server.c b/src/server.c index e9013bf60..6be12cffe 100644 --- a/src/server.c +++ b/src/server.c @@ -1172,9 +1172,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { void beforeSleep(struct aeEventLoop *eventLoop) { UNUSED(eventLoop); - /* Give some run time to modules threads using thread safe contexts. */ - moduleCooperativeMultiTaskingCycle(); - /* Call the Redis Cluster before sleep function. Note that this function * may change the state of Redis Cluster (from ok to fail or vice versa), * so it's a good idea to call it before serving the unblocked clients @@ -1219,6 +1216,19 @@ void beforeSleep(struct aeEventLoop *eventLoop) { /* Handle writes with pending output buffers. */ handleClientsWithPendingWrites(); + + /* Before we are going to sleep, let the threads access the dataset by + * releasing the GIL. Redis main thread will not touch anything at this + * time. */ + if (moduleCount()) moduleReleaseGIL(); +} + +/* This function is called immadiately after the event loop multiplexing + * API returned, and the control is going to soon return to Redis by invoking + * the different events callbacks. */ +void afterSleep(struct aeEventLoop *eventLoop) { + UNUSED(eventLoop); + if (moduleCount()) moduleAcquireGIL(); } /* =========================== Server initialization ======================== */ @@ -3808,6 +3818,7 @@ int main(int argc, char **argv) { } aeSetBeforeSleepProc(server.el,beforeSleep); + aeSetAfterSleepProc(server.el,afterSleep); aeMain(server.el); aeDeleteEventLoop(server.el); return 0; diff --git a/src/server.h b/src/server.h index 956370296..2bc49299b 100644 --- a/src/server.h +++ b/src/server.h @@ -1294,7 +1294,9 @@ void unblockClientFromModule(client *c); void moduleHandleBlockedClients(void); void moduleBlockedClientTimedOut(client *c); void moduleBlockedClientPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask); -void moduleCooperativeMultiTaskingCycle(void); +size_t moduleCount(void); +void moduleAcquireGIL(void); +void moduleReleaseGIL(void); /* Utils */ long long ustime(void); From a61003ccffcd430a44738668d36f93319a8b9de5 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 May 2017 14:07:10 +0200 Subject: [PATCH 0333/1722] Module: fix RedisModule_Call() "l" specifier to create a raw string. --- src/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index 9b78a4a56..e14a39433 100644 --- a/src/module.c +++ b/src/module.c @@ -2448,7 +2448,7 @@ robj **moduleCreateArgvFromUserFormat(const char *cmdname, const char *fmt, int argv[argc++] = createStringObject(buf,len); } else if (*p == 'l') { long ll = va_arg(ap,long long); - argv[argc++] = createStringObjectFromLongLong(ll); + argv[argc++] = createObject(OBJ_STRING,sdsfromlonglong(ll)); } else if (*p == 'v') { /* A vector of strings */ robj **v = va_arg(ap, void*); From e89b3655b86a5ab1eeb8a3c12e6cbb317197caa4 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 May 2017 14:08:12 +0200 Subject: [PATCH 0334/1722] Modules TSC: HELLO.KEYS example draft finished. --- src/modules/helloblock.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c index e760e33fb..e834ec786 100644 --- a/src/modules/helloblock.c +++ b/src/modules/helloblock.c @@ -106,15 +106,44 @@ int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a } /* The thread entry point that actually executes the blocking part - * of the command HELLO.KEYS. */ + * of the command HELLO.KEYS. + * + * Note: this implementation is very simple on purpose, so no duplicated + * keys (returned by SCAN) are filtered. However adding such a functionality + * would be trivial just using any data structure implementing a dictionary + * in order to filter the duplicated items. */ void *HelloKeys_ThreadMain(void *arg) { RedisModuleBlockedClient *bc = arg; RedisModuleCtx *ctx = RedisModule_GetThreadSafeContext(bc); + long long cursor = 1; + size_t replylen = 0; - RedisModule_ThreadSafeContextLock(ctx); - RedisModule_ReplyWithLongLong(ctx,1234); - RedisModule_ThreadSafeContextUnlock(ctx); + RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN); + do { + RedisModule_ThreadSafeContextLock(ctx); + RedisModuleCallReply *reply = RedisModule_Call(ctx, + "SCAN","l",(long long)cursor); + RedisModule_ThreadSafeContextUnlock(ctx); + size_t items = RedisModule_CallReplyLength(reply); + size_t j; + for (j = 0; j < items; j++) { + RedisModuleCallReply *ele = + RedisModule_CallReplyArrayElement(reply,j); + if (j == 0) { + RedisModuleString *s = RedisModule_CreateStringFromCallReply(ele); + RedisModule_StringToLongLong(s,&cursor); + RedisModule_FreeString(ctx,s); + } else { + RedisModule_ReplyWithCallReply(ctx,ele); + replylen++; + } + } + RedisModule_FreeCallReply(reply); + } while (cursor != 0); + RedisModule_ReplySetArrayLength(ctx,replylen); + + RedisModule_FreeThreadSafeContext(ctx); RedisModule_UnblockClient(bc,NULL); return NULL; } From b6e4ca1402ed76d8dd1678b406a58c8ade5e471b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 May 2017 14:10:21 +0200 Subject: [PATCH 0335/1722] Modules: remove unused var in example module. --- src/modules/helloworld.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/modules/helloworld.c b/src/modules/helloworld.c index 4e30af2a0..3b00dea77 100644 --- a/src/modules/helloworld.c +++ b/src/modules/helloworld.c @@ -241,7 +241,6 @@ int HelloRepl1_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a { REDISMODULE_NOT_USED(argv); REDISMODULE_NOT_USED(argc); - RedisModuleCallReply *reply; RedisModule_AutoMemory(ctx); /* This will be replicated *after* the two INCR statements, since @@ -258,8 +257,8 @@ int HelloRepl1_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a /* Using the "!" modifier we replicate the command if it * modified the dataset in some way. */ - reply = RedisModule_Call(ctx,"INCR","c!","foo"); - reply = RedisModule_Call(ctx,"INCR","c!","bar"); + RedisModule_Call(ctx,"INCR","c!","foo"); + RedisModule_Call(ctx,"INCR","c!","bar"); RedisModule_ReplyWithLongLong(ctx,0); From 8871307574930ff7fba5b5375d1443edea020816 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 May 2017 14:15:25 +0200 Subject: [PATCH 0336/1722] adlist: fix listJoin() to handle empty lists. --- src/adlist.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/adlist.c b/src/adlist.c index 96575c72e..0f2e4a559 100644 --- a/src/adlist.c +++ b/src/adlist.c @@ -345,8 +345,14 @@ void listRotate(list *list) { /* Add all the elements of the list 'o' at the end of the * list 'l'. The list 'other' remains empty but otherwise valid. */ void listJoin(list *l, list *o) { - l->tail->next = o->head; - o->head->prev = l->tail; + if (o->head) + o->head->prev = l->tail; + + if (l->tail) + l->tail->next = o->head; + else + l->head = o->head; + l->tail = o->tail; /* Setup other as an empty list. */ From 833ebe4ab96630a03d08691c60d566dd46ce5021 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 May 2017 14:53:56 +0200 Subject: [PATCH 0337/1722] adlist: fix final list count in listJoin(). --- src/adlist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/adlist.c b/src/adlist.c index 0f2e4a559..f0a261b61 100644 --- a/src/adlist.c +++ b/src/adlist.c @@ -354,6 +354,7 @@ void listJoin(list *l, list *o) { l->head = o->head; l->tail = o->tail; + l->len += o->len; /* Setup other as an empty list. */ o->head = l->tail = NULL; From 98c48456e802ec4234a083411d061485d0c59d38 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 May 2017 14:54:48 +0200 Subject: [PATCH 0338/1722] Modules TSC: put the client in the pending write list. --- src/module.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index e14a39433..0ae7ac461 100644 --- a/src/module.c +++ b/src/module.c @@ -3307,7 +3307,19 @@ void moduleHandleBlockedClients(void) { } freeClient(bc->reply_client); - if (c != NULL) unblockClient(c); + if (c != NULL) { + unblockClient(c); + /* Put the client in the list of clients that need to write + * if there are pending replies here. This is needed since + * during a non blocking command the client may receive output. */ + if (clientHasPendingReplies(c) && + !(c->flags & CLIENT_PENDING_WRITE)) + { + c->flags |= CLIENT_PENDING_WRITE; + listAddNodeHead(server.clients_pending_write,c); + } + } + /* Free 'bc' only after unblocking the client, since it is * referenced in the client blocking context, and must be valid * when calling unblockClient(). */ From 86274d062f69ee4af566f60bcbec12cd42ceee69 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 May 2017 23:43:49 +0200 Subject: [PATCH 0339/1722] Modules TSC: HELLO.KEYS reply format fixed. --- src/modules/helloblock.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c index e834ec786..3ebf10e11 100644 --- a/src/modules/helloblock.c +++ b/src/modules/helloblock.c @@ -115,7 +115,7 @@ int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a void *HelloKeys_ThreadMain(void *arg) { RedisModuleBlockedClient *bc = arg; RedisModuleCtx *ctx = RedisModule_GetThreadSafeContext(bc); - long long cursor = 1; + long long cursor = 0; size_t replylen = 0; RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN); @@ -125,19 +125,21 @@ void *HelloKeys_ThreadMain(void *arg) { "SCAN","l",(long long)cursor); RedisModule_ThreadSafeContextUnlock(ctx); - size_t items = RedisModule_CallReplyLength(reply); - size_t j; - for (j = 0; j < items; j++) { + RedisModuleCallReply *cr_cursor = + RedisModule_CallReplyArrayElement(reply,0); + RedisModuleCallReply *cr_keys = + RedisModule_CallReplyArrayElement(reply,1); + + RedisModuleString *s = RedisModule_CreateStringFromCallReply(cr_cursor); + RedisModule_StringToLongLong(s,&cursor); + RedisModule_FreeString(ctx,s); + + size_t items = RedisModule_CallReplyLength(cr_keys); + for (size_t j = 0; j < items; j++) { RedisModuleCallReply *ele = - RedisModule_CallReplyArrayElement(reply,j); - if (j == 0) { - RedisModuleString *s = RedisModule_CreateStringFromCallReply(ele); - RedisModule_StringToLongLong(s,&cursor); - RedisModule_FreeString(ctx,s); - } else { - RedisModule_ReplyWithCallReply(ctx,ele); - replylen++; - } + RedisModule_CallReplyArrayElement(cr_keys,j); + RedisModule_ReplyWithCallReply(ctx,ele); + replylen++; } RedisModule_FreeCallReply(reply); } while (cursor != 0); From 71e9725dc71e9d4c9bc9ffde472b07f5453fbf18 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 4 May 2017 10:35:32 +0200 Subject: [PATCH 0340/1722] Lazyfree: fix lazyfreeGetPendingObjectsCount() race reading counter. --- src/lazyfree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lazyfree.c b/src/lazyfree.c index 8d56e1031..64ed68466 100644 --- a/src/lazyfree.c +++ b/src/lazyfree.c @@ -8,7 +8,9 @@ pthread_mutex_t lazyfree_objects_mutex = PTHREAD_MUTEX_INITIALIZER; /* Return the number of currently pending objects to free. */ size_t lazyfreeGetPendingObjectsCount(void) { - return lazyfree_objects; + size_t aux; + atomicGet(lazyfree_objects,aux,lazyfree_objects_mutex); + return aux; } /* Return the amount of work needed in order to free an object. From ac4a9384e9644b6ed34a103eb44f5c6ed0432dbf Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 4 May 2017 17:00:53 +0200 Subject: [PATCH 0341/1722] Simplify atomicvar.h usage by having the mutex name implicit. --- src/atomicvar.h | 30 +++++++++++++++--------------- src/lazyfree.c | 16 +++++++--------- src/zmalloc.c | 6 +++--- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/atomicvar.h b/src/atomicvar.h index c522db3e9..1efa7bffb 100644 --- a/src/atomicvar.h +++ b/src/atomicvar.h @@ -54,40 +54,40 @@ #if defined(__ATOMIC_RELAXED) && !defined(__sun) && (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057) /* Implementation using __atomic macros. */ -#define atomicIncr(var,count,mutex) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) -#define atomicDecr(var,count,mutex) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED) -#define atomicGet(var,dstvar,mutex) do { \ +#define atomicIncr(var,count) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) +#define atomicDecr(var,count) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED) +#define atomicGet(var,dstvar) do { \ dstvar = __atomic_load_n(&var,__ATOMIC_RELAXED); \ } while(0) #elif defined(HAVE_ATOMIC) /* Implementation using __sync macros. */ -#define atomicIncr(var,count,mutex) __sync_add_and_fetch(&var,(count)) -#define atomicDecr(var,count,mutex) __sync_sub_and_fetch(&var,(count)) -#define atomicGet(var,dstvar,mutex) do { \ +#define atomicIncr(var,count) __sync_add_and_fetch(&var,(count)) +#define atomicDecr(var,count) __sync_sub_and_fetch(&var,(count)) +#define atomicGet(var,dstvar) do { \ dstvar = __sync_sub_and_fetch(&var,0); \ } while(0) #else /* Implementation using pthread mutex. */ -#define atomicIncr(var,count,mutex) do { \ - pthread_mutex_lock(&mutex); \ +#define atomicIncr(var,count) do { \ + pthread_mutex_lock(&var ## _mutex); \ var += (count); \ - pthread_mutex_unlock(&mutex); \ + pthread_mutex_unlock(&var ## _mutex); \ } while(0) -#define atomicDecr(var,count,mutex) do { \ - pthread_mutex_lock(&mutex); \ +#define atomicDecr(var,count) do { \ + pthread_mutex_lock(&var ## _mutex); \ var -= (count); \ - pthread_mutex_unlock(&mutex); \ + pthread_mutex_unlock(&var ## _mutex); \ } while(0) -#define atomicGet(var,dstvar,mutex) do { \ - pthread_mutex_lock(&mutex); \ +#define atomicGet(var,dstvar) do { \ + pthread_mutex_lock(&var ## _mutex); \ dstvar = var; \ - pthread_mutex_unlock(&mutex); \ + pthread_mutex_unlock(&var ## _mutex); \ } while(0) #endif diff --git a/src/lazyfree.c b/src/lazyfree.c index 64ed68466..809ebdb57 100644 --- a/src/lazyfree.c +++ b/src/lazyfree.c @@ -9,7 +9,7 @@ pthread_mutex_t lazyfree_objects_mutex = PTHREAD_MUTEX_INITIALIZER; /* Return the number of currently pending objects to free. */ size_t lazyfreeGetPendingObjectsCount(void) { size_t aux; - atomicGet(lazyfree_objects,aux,lazyfree_objects_mutex); + atomicGet(lazyfree_objects,aux); return aux; } @@ -67,7 +67,7 @@ int dbAsyncDelete(redisDb *db, robj *key) { /* If releasing the object is too much work, let's put it into the * lazy free list. */ if (free_effort > LAZYFREE_THRESHOLD) { - atomicIncr(lazyfree_objects,1,lazyfree_objects_mutex); + atomicIncr(lazyfree_objects,1); bioCreateBackgroundJob(BIO_LAZY_FREE,val,NULL,NULL); dictSetVal(db->dict,de,NULL); } @@ -91,8 +91,7 @@ void emptyDbAsync(redisDb *db) { dict *oldht1 = db->dict, *oldht2 = db->expires; db->dict = dictCreate(&dbDictType,NULL); db->expires = dictCreate(&keyptrDictType,NULL); - atomicIncr(lazyfree_objects,dictSize(oldht1), - lazyfree_objects_mutex); + atomicIncr(lazyfree_objects,dictSize(oldht1)); bioCreateBackgroundJob(BIO_LAZY_FREE,NULL,oldht1,oldht2); } @@ -104,8 +103,7 @@ void slotToKeyFlushAsync(void) { server.cluster->slots_to_keys = raxNew(); memset(server.cluster->slots_keys_count,0, sizeof(server.cluster->slots_keys_count)); - atomicIncr(lazyfree_objects,old->numele, - lazyfree_objects_mutex); + atomicIncr(lazyfree_objects,old->numele); bioCreateBackgroundJob(BIO_LAZY_FREE,NULL,NULL,old); } @@ -113,7 +111,7 @@ void slotToKeyFlushAsync(void) { * updating the count of objects to release. */ void lazyfreeFreeObjectFromBioThread(robj *o) { decrRefCount(o); - atomicDecr(lazyfree_objects,1,lazyfree_objects_mutex); + atomicDecr(lazyfree_objects,1); } /* Release a database from the lazyfree thread. The 'db' pointer is the @@ -125,7 +123,7 @@ void lazyfreeFreeDatabaseFromBioThread(dict *ht1, dict *ht2) { size_t numkeys = dictSize(ht1); dictRelease(ht1); dictRelease(ht2); - atomicDecr(lazyfree_objects,numkeys,lazyfree_objects_mutex); + atomicDecr(lazyfree_objects,numkeys); } /* Release the skiplist mapping Redis Cluster keys to slots in the @@ -133,5 +131,5 @@ void lazyfreeFreeDatabaseFromBioThread(dict *ht1, dict *ht2) { void lazyfreeFreeSlotsMapFromBioThread(rax *rt) { size_t len = rt->numele; raxFree(rt); - atomicDecr(lazyfree_objects,len,lazyfree_objects_mutex); + atomicDecr(lazyfree_objects,len); } diff --git a/src/zmalloc.c b/src/zmalloc.c index f653760a7..dfcfc01d4 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -74,7 +74,7 @@ void zlibc_free(void *ptr) { size_t _n = (__n); \ if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \ if (zmalloc_thread_safe) { \ - atomicIncr(used_memory,__n,used_memory_mutex); \ + atomicIncr(used_memory,__n); \ } else { \ used_memory += _n; \ } \ @@ -84,7 +84,7 @@ void zlibc_free(void *ptr) { size_t _n = (__n); \ if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \ if (zmalloc_thread_safe) { \ - atomicDecr(used_memory,__n,used_memory_mutex); \ + atomicDecr(used_memory,__n); \ } else { \ used_memory -= _n; \ } \ @@ -222,7 +222,7 @@ size_t zmalloc_used_memory(void) { size_t um; if (zmalloc_thread_safe) { - atomicGet(used_memory,um,used_memory_mutex); + atomicGet(used_memory,um); } else { um = used_memory; } From 42948bc052746e7ecfe3cb58c1cca2b043e0b359 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 9 May 2017 11:57:09 +0200 Subject: [PATCH 0342/1722] Modules TSC: Improve inter-thread synchronization. More work to do with server.unixtime and similar. Need to write Helgrind suppression file in order to suppress the valse positives. --- src/atomicvar.h | 48 +++++++++++++++++++++++++++++++++++++++++------- src/evict.c | 15 +++++++++++++++ src/networking.c | 5 ++++- src/server.c | 15 +++++++++++---- src/server.h | 12 ++++-------- 5 files changed, 75 insertions(+), 20 deletions(-) diff --git a/src/atomicvar.h b/src/atomicvar.h index 1efa7bffb..9b5628ad6 100644 --- a/src/atomicvar.h +++ b/src/atomicvar.h @@ -3,18 +3,29 @@ * * The exported interaface is composed of three macros: * - * atomicIncr(var,count,mutex) -- Increment the atomic counter - * atomicDecr(var,count,mutex) -- Decrement the atomic counter - * atomicGet(var,dstvar,mutex) -- Fetch the atomic counter value + * atomicIncr(var,count) -- Increment the atomic counter + * atomicGetIncr(var,oldvalue_var,count) -- Get and increment the atomic counter + * atomicDecr(var,count) -- Decrement the atomic counter + * atomicGet(var,dstvar) -- Fetch the atomic counter value + * atomicSet(var,value) -- Set the atomic counter value + * + * The variable 'var' should also have a declared mutex with the same + * name and the "_mutex" postfix, for instance: + * + * long myvar; + * pthread_mutex_t myvar_mutex; + * atomicSet(myvar,12345); * * If atomic primitives are availble (tested in config.h) the mutex * is not used. * - * Never use return value from the macros. To update and get use instead: + * Never use return value from the macros, instead use the AtomicGetIncr() + * if you need to get the current value and increment it atomically, like + * in the followign example: * - * atomicIncr(mycounter,...); - * atomicGet(mycounter,newvalue); - * doSomethingWith(newvalue); + * long oldvalue; + * atomicGetIncr(myvar,oldvalue,1); + * doSomethingWith(oldvalue); * * ---------------------------------------------------------------------------- * @@ -55,19 +66,29 @@ /* Implementation using __atomic macros. */ #define atomicIncr(var,count) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) +#define atomicGetIncr(var,oldvalue_var,count) do { \ + oldvalue_var = __atomic_fetch_add(&var,(count),__ATOMIC_RELAXED); \ +} while(0) #define atomicDecr(var,count) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED) #define atomicGet(var,dstvar) do { \ dstvar = __atomic_load_n(&var,__ATOMIC_RELAXED); \ } while(0) +#define atomicSet(var,value) __atomic_store_n(&var,value,__ATOMIC_RELAXED) #elif defined(HAVE_ATOMIC) /* Implementation using __sync macros. */ #define atomicIncr(var,count) __sync_add_and_fetch(&var,(count)) +#define atomicGetIncr(var,oldvalue_var,count) do { \ + oldvalue_var = __sync_fetch_and_add(&var,(count)); \ +} while(0) #define atomicDecr(var,count) __sync_sub_and_fetch(&var,(count)) #define atomicGet(var,dstvar) do { \ dstvar = __sync_sub_and_fetch(&var,0); \ } while(0) +#define atomicSet(var,value) do { \ + while(!__sync_bool_compare_and_swap(&var,var,value)); \ +} while(0) #else /* Implementation using pthread mutex. */ @@ -78,6 +99,13 @@ pthread_mutex_unlock(&var ## _mutex); \ } while(0) +#define atomicGetIncr(var,oldvalue_var,count) do { \ + pthread_mutex_lock(&var ## _mutex); \ + oldvalue_var = var; \ + var += (count); \ + pthread_mutex_unlock(&var ## _mutex); \ +} while(0) + #define atomicDecr(var,count) do { \ pthread_mutex_lock(&var ## _mutex); \ var -= (count); \ @@ -89,6 +117,12 @@ dstvar = var; \ pthread_mutex_unlock(&var ## _mutex); \ } while(0) + +#define atomicSet(var,value) do { \ + pthread_mutex_lock(&var ## _mutex); \ + var = value; \ + pthread_mutex_unlock(&var ## _mutex); \ +} while(0) #endif #endif /* __ATOMIC_VAR_H */ diff --git a/src/evict.c b/src/evict.c index 62753c5a7..bf5bea6b0 100644 --- a/src/evict.c +++ b/src/evict.c @@ -32,6 +32,7 @@ #include "server.h" #include "bio.h" +#include "atomicvar.h" /* ---------------------------------------------------------------------------- * Data structures @@ -72,6 +73,20 @@ unsigned int getLRUClock(void) { return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX; } +/* This function is used to obtain the current LRU clock. + * If the current resolution is lower than the frequency we refresh the + * LRU clock (as it should be in production servers) we return the + * precomputed value, otherwise we need to resort to a system call. */ +unsigned int LRU_CLOCK(void) { + unsigned int lruclock; + if (1000/server.hz <= LRU_CLOCK_RESOLUTION) { + atomicGet(server.lruclock,lruclock); + } else { + lruclock = getLRUClock(); + } + return lruclock; +} + /* Given an object returns the min number of milliseconds the object was never * requested, using an approximated LRU algorithm. */ unsigned long long estimateObjectIdleTime(robj *o) { diff --git a/src/networking.c b/src/networking.c index fae8e52bd..efaca1bc6 100644 --- a/src/networking.c +++ b/src/networking.c @@ -28,6 +28,7 @@ */ #include "server.h" +#include "atomicvar.h" #include #include #include @@ -88,7 +89,9 @@ client *createClient(int fd) { } selectDb(c,0); - c->id = server.next_client_id++; + uint64_t client_id; + atomicGetIncr(server.next_client_id,client_id,1); + c->id = client_id; c->fd = fd; c->name = NULL; c->bufpos = 0; diff --git a/src/server.c b/src/server.c index 6be12cffe..e1858cb53 100644 --- a/src/server.c +++ b/src/server.c @@ -32,6 +32,7 @@ #include "slowlog.h" #include "bio.h" #include "latency.h" +#include "atomicvar.h" #include #include @@ -68,7 +69,8 @@ double R_Zero, R_PosInf, R_NegInf, R_Nan; /*================================= Globals ================================= */ /* Global vars */ -struct redisServer server; /* server global state */ +struct redisServer server; /* Server global state */ +volatile unsigned long lru_clock; /* Server global current LRU time. */ /* Our command table. * @@ -976,7 +978,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { * * Note that you can change the resolution altering the * LRU_CLOCK_RESOLUTION define. */ - server.lruclock = getLRUClock(); + unsigned long lruclock = getLRUClock(); + atomicSet(server.lruclock,lruclock); /* Record the max memory used since the server was started. */ if (zmalloc_used_memory() > server.stat_peak_memory) @@ -1420,6 +1423,7 @@ void initServerConfig(void) { server.cluster_announce_bus_port = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT; server.migrate_cached_sockets = dictCreate(&migrateCacheDictType,NULL); server.next_client_id = 1; /* Client IDs, start from 1 .*/ + pthread_mutex_init(&server.next_client_id_mutex,NULL); server.loading_process_events_interval_bytes = (1024*1024*2); server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION; server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE; @@ -1427,7 +1431,8 @@ void initServerConfig(void) { server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO; server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; - server.lruclock = getLRUClock(); + unsigned int lruclock = getLRUClock(); + atomicSet(server.lruclock,lruclock); resetServerSaveParams(); appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */ @@ -2809,6 +2814,8 @@ sds genRedisInfoString(char *section) { call_uname = 0; } + unsigned int lruclock; + atomicGet(server.lruclock,lruclock); info = sdscatprintf(info, "# Server\r\n" "redis_version:%s\r\n" @@ -2848,7 +2855,7 @@ sds genRedisInfoString(char *section) { (intmax_t)uptime, (intmax_t)(uptime/(3600*24)), server.hz, - (unsigned long) server.lruclock, + (unsigned long) lruclock, server.executable ? server.executable : "", server.configfile ? server.configfile : ""); } diff --git a/src/server.h b/src/server.h index 2bc49299b..ea46e5e23 100644 --- a/src/server.h +++ b/src/server.h @@ -563,19 +563,13 @@ typedef struct RedisModuleIO { typedef struct redisObject { unsigned type:4; unsigned encoding:4; - unsigned lru:LRU_BITS; /* LRU time (relative to server.lruclock) or + unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or * LFU data (least significant 8 bits frequency * and most significant 16 bits decreas time). */ int refcount; void *ptr; } robj; -/* Macro used to obtain the current LRU clock. - * If the current resolution is lower than the frequency we refresh the - * LRU clock (as it should be in production servers) we return the - * precomputed value, otherwise we need to resort to a system call. */ -#define LRU_CLOCK() ((1000/server.hz <= LRU_CLOCK_RESOLUTION) ? server.lruclock : getLRUClock()) - /* Macro used to initialize a Redis object allocated on the stack. * Note that this macro is taken near the structure definition to make sure * we'll update it when the structure is changed, to avoid bugs like @@ -866,7 +860,7 @@ struct redisServer { dict *commands; /* Command table */ dict *orig_commands; /* Command table before command renaming. */ aeEventLoop *el; - unsigned lruclock:LRU_BITS; /* Clock for LRU eviction */ + unsigned int lruclock; /* Clock for LRU eviction */ int shutdown_asap; /* SHUTDOWN needed ASAP */ int activerehashing; /* Incremental rehash in serverCron() */ int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */ @@ -906,6 +900,7 @@ struct redisServer { char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */ dict *migrate_cached_sockets;/* MIGRATE cached sockets */ uint64_t next_client_id; /* Next client unique ID. Incremental. */ + pthread_mutex_t next_client_id_mutex; int protected_mode; /* Don't accept external connections. */ /* RDB / AOF loading information */ int loading; /* We are loading data from disk if true */ @@ -1608,6 +1603,7 @@ void updateCachedTime(void); void resetServerStats(void); void activeDefragCycle(void); unsigned int getLRUClock(void); +unsigned int LRU_CLOCK(void); const char *evictPolicyToString(void); struct redisMemOverhead *getMemoryOverheadData(void); void freeMemoryOverheadData(struct redisMemOverhead *mh); From 61eb08813b49fb9b1693ed6f05a1a61c1d94fbbb Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 9 May 2017 16:32:49 +0200 Subject: [PATCH 0343/1722] Modules TSC: Add mutex for server.lruclock. Only useful for when no atomic builtins are available. --- src/server.c | 1 + src/server.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/server.c b/src/server.c index e1858cb53..4fd2f9438 100644 --- a/src/server.c +++ b/src/server.c @@ -1432,6 +1432,7 @@ void initServerConfig(void) { server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; unsigned int lruclock = getLRUClock(); + pthread_mutex_init(&server.lruclock_mutex,NULL); atomicSet(server.lruclock,lruclock); resetServerSaveParams(); diff --git a/src/server.h b/src/server.h index ea46e5e23..12ccd8c0b 100644 --- a/src/server.h +++ b/src/server.h @@ -861,6 +861,7 @@ struct redisServer { dict *orig_commands; /* Command table before command renaming. */ aeEventLoop *el; unsigned int lruclock; /* Clock for LRU eviction */ + pthread_mutex_t lruclock_mutex; int shutdown_asap; /* SHUTDOWN needed ASAP */ int activerehashing; /* Incremental rehash in serverCron() */ int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */ From 6364bb1a79ab7390dbadeee3b1d273224509b684 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 9 May 2017 16:59:51 +0200 Subject: [PATCH 0344/1722] zmalloc.c: remove thread safe mode, it's the default way. --- src/server.c | 1 - src/zmalloc.c | 24 +++--------------------- src/zmalloc.h | 1 - 3 files changed, 3 insertions(+), 23 deletions(-) diff --git a/src/server.c b/src/server.c index 4fd2f9438..0ef3168fe 100644 --- a/src/server.c +++ b/src/server.c @@ -3673,7 +3673,6 @@ int main(int argc, char **argv) { spt_init(argc, argv); #endif setlocale(LC_COLLATE,""); - zmalloc_enable_thread_safeness(); zmalloc_set_oom_handler(redisOutOfMemoryHandler); srand(time(NULL)^getpid()); gettimeofday(&tv,NULL); diff --git a/src/zmalloc.c b/src/zmalloc.c index dfcfc01d4..094dd80fa 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -73,25 +73,16 @@ void zlibc_free(void *ptr) { #define update_zmalloc_stat_alloc(__n) do { \ size_t _n = (__n); \ if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \ - if (zmalloc_thread_safe) { \ - atomicIncr(used_memory,__n); \ - } else { \ - used_memory += _n; \ - } \ + atomicIncr(used_memory,__n); \ } while(0) #define update_zmalloc_stat_free(__n) do { \ size_t _n = (__n); \ if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \ - if (zmalloc_thread_safe) { \ - atomicDecr(used_memory,__n); \ - } else { \ - used_memory -= _n; \ - } \ + atomicDecr(used_memory,__n); \ } while(0) static size_t used_memory = 0; -static int zmalloc_thread_safe = 0; pthread_mutex_t used_memory_mutex = PTHREAD_MUTEX_INITIALIZER; static void zmalloc_default_oom(size_t size) { @@ -220,19 +211,10 @@ char *zstrdup(const char *s) { size_t zmalloc_used_memory(void) { size_t um; - - if (zmalloc_thread_safe) { - atomicGet(used_memory,um); - } else { - um = used_memory; - } + atomicGet(used_memory,um); return um; } -void zmalloc_enable_thread_safeness(void) { - zmalloc_thread_safe = 1; -} - void zmalloc_set_oom_handler(void (*oom_handler)(size_t)) { zmalloc_oom_handler = oom_handler; } diff --git a/src/zmalloc.h b/src/zmalloc.h index b6d4e1d97..64f2f36aa 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -78,7 +78,6 @@ void *zrealloc(void *ptr, size_t size); void zfree(void *ptr); char *zstrdup(const char *s); size_t zmalloc_used_memory(void); -void zmalloc_enable_thread_safeness(void); void zmalloc_set_oom_handler(void (*oom_handler)(size_t)); float zmalloc_get_fragmentation_ratio(size_t rss); size_t zmalloc_get_rss(void); From abbeead6fa3784bee0f652e3dce06e650d0dbf63 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 May 2017 09:33:49 +0200 Subject: [PATCH 0345/1722] atomicvar.h: show used API in INFO. Add macro to force __sync builtin. The __sync builtin can be correctly detected by Helgrind so to force it is useful for testing. The API in the INFO output can be useful for debugging after problems are reported. --- src/atomicvar.h | 17 +++++++++++------ src/server.c | 2 ++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/atomicvar.h b/src/atomicvar.h index 9b5628ad6..84a5bbc5c 100644 --- a/src/atomicvar.h +++ b/src/atomicvar.h @@ -62,7 +62,13 @@ #ifndef __ATOMIC_VAR_H #define __ATOMIC_VAR_H -#if defined(__ATOMIC_RELAXED) && !defined(__sun) && (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057) +/* To test Redis with Helgrind (a Valgrind tool) it is useful to define + * the following macro, so that __sync macros are used: those can be detected + * by Helgrind (even if they are less efficient) so that no false positive + * is reported. */ +// #define __ATOMIC_VAR_FORCE_SYNC_MACROS + +#if !defined(__ATOMIC_VAR_FORCE_SYNC_MACROS) && defined(__ATOMIC_RELAXED) && !defined(__sun) && (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057) /* Implementation using __atomic macros. */ #define atomicIncr(var,count) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) @@ -74,6 +80,7 @@ dstvar = __atomic_load_n(&var,__ATOMIC_RELAXED); \ } while(0) #define atomicSet(var,value) __atomic_store_n(&var,value,__ATOMIC_RELAXED) +#define REDIS_ATOMIC_API "atomic-builtin" #elif defined(HAVE_ATOMIC) /* Implementation using __sync macros. */ @@ -89,6 +96,7 @@ #define atomicSet(var,value) do { \ while(!__sync_bool_compare_and_swap(&var,var,value)); \ } while(0) +#define REDIS_ATOMIC_API "sync-builtin" #else /* Implementation using pthread mutex. */ @@ -98,31 +106,28 @@ var += (count); \ pthread_mutex_unlock(&var ## _mutex); \ } while(0) - #define atomicGetIncr(var,oldvalue_var,count) do { \ pthread_mutex_lock(&var ## _mutex); \ oldvalue_var = var; \ var += (count); \ pthread_mutex_unlock(&var ## _mutex); \ } while(0) - #define atomicDecr(var,count) do { \ pthread_mutex_lock(&var ## _mutex); \ var -= (count); \ pthread_mutex_unlock(&var ## _mutex); \ } while(0) - #define atomicGet(var,dstvar) do { \ pthread_mutex_lock(&var ## _mutex); \ dstvar = var; \ pthread_mutex_unlock(&var ## _mutex); \ } while(0) - #define atomicSet(var,value) do { \ pthread_mutex_lock(&var ## _mutex); \ var = value; \ pthread_mutex_unlock(&var ## _mutex); \ } while(0) -#endif +#define REDIS_ATOMIC_API "pthread-mutex" +#endif #endif /* __ATOMIC_VAR_H */ diff --git a/src/server.c b/src/server.c index 0ef3168fe..75268b8a4 100644 --- a/src/server.c +++ b/src/server.c @@ -2827,6 +2827,7 @@ sds genRedisInfoString(char *section) { "os:%s %s %s\r\n" "arch_bits:%d\r\n" "multiplexing_api:%s\r\n" + "atomicvar_api:%s\r\n" "gcc_version:%d.%d.%d\r\n" "process_id:%ld\r\n" "run_id:%s\r\n" @@ -2845,6 +2846,7 @@ sds genRedisInfoString(char *section) { name.sysname, name.release, name.machine, server.arch_bits, aeGetApiName(), + REDIS_ATOMIC_API, #ifdef __GNUC__ __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__, #else From e6ae9c9bab4c8c1dc3a3a315c1985e2cb8b4fa66 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 May 2017 10:01:06 +0200 Subject: [PATCH 0346/1722] Modules TSC: use atomic var for server.unixtime. This avoids Helgrind complaining, but we are actually not using atomicGet() to get the unixtime value for now: too many places where it is used and given tha time_t is word-sized it should be safe in all the archs we support as it is. On the other hand, Helgrind, when Redis is compiled with "make helgrind" in order to force the __sync macros, will detect the write in updateCachedTime() as a read (because atomic functions are used) and will not complain about races. This commit also includes minor refactoring of mutex initializations and a "helgrind" target in the Makefile. --- src/Makefile | 3 +++ src/server.c | 9 ++++++--- src/server.h | 8 ++++++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/Makefile b/src/Makefile index 8f429431b..691b5aaea 100644 --- a/src/Makefile +++ b/src/Makefile @@ -272,6 +272,9 @@ noopt: valgrind: $(MAKE) OPTIMIZATION="-O0" MALLOC="libc" +helgrind: + $(MAKE) OPTIMIZATION="-O0" MALLOC="libc" CFLAGS="-D__ATOMIC_VAR_FORCE_SYNC_MACROS" + src/help.h: @../utils/generate-command-help.rb > help.h diff --git a/src/server.c b/src/server.c index 75268b8a4..e50ec6359 100644 --- a/src/server.c +++ b/src/server.c @@ -923,7 +923,8 @@ void databasesCron(void) { * every object access, and accuracy is not needed. To access a global var is * a lot faster than calling time(NULL) */ void updateCachedTime(void) { - server.unixtime = time(NULL); + time_t unixtime = time(NULL); + atomicSet(server.unixtime,unixtime); server.mstime = mstime(); } @@ -1331,6 +1332,10 @@ void createSharedObjects(void) { void initServerConfig(void) { int j; + pthread_mutex_init(&server.next_client_id_mutex,NULL); + pthread_mutex_init(&server.lruclock_mutex,NULL); + pthread_mutex_init(&server.unixtime_mutex,NULL); + getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE); server.runid[CONFIG_RUN_ID_SIZE] = '\0'; changeReplicationId(); @@ -1423,7 +1428,6 @@ void initServerConfig(void) { server.cluster_announce_bus_port = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT; server.migrate_cached_sockets = dictCreate(&migrateCacheDictType,NULL); server.next_client_id = 1; /* Client IDs, start from 1 .*/ - pthread_mutex_init(&server.next_client_id_mutex,NULL); server.loading_process_events_interval_bytes = (1024*1024*2); server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION; server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE; @@ -1432,7 +1436,6 @@ void initServerConfig(void) { server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; unsigned int lruclock = getLRUClock(); - pthread_mutex_init(&server.lruclock_mutex,NULL); atomicSet(server.lruclock,lruclock); resetServerSaveParams(); diff --git a/src/server.h b/src/server.h index 12ccd8c0b..8403ed5ba 100644 --- a/src/server.h +++ b/src/server.h @@ -861,7 +861,6 @@ struct redisServer { dict *orig_commands; /* Command table before command renaming. */ aeEventLoop *el; unsigned int lruclock; /* Clock for LRU eviction */ - pthread_mutex_t lruclock_mutex; int shutdown_asap; /* SHUTDOWN needed ASAP */ int activerehashing; /* Incremental rehash in serverCron() */ int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */ @@ -901,7 +900,6 @@ struct redisServer { char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */ dict *migrate_cached_sockets;/* MIGRATE cached sockets */ uint64_t next_client_id; /* Next client unique ID. Incremental. */ - pthread_mutex_t next_client_id_mutex; int protected_mode; /* Don't accept external connections. */ /* RDB / AOF loading information */ int loading; /* We are loading data from disk if true */ @@ -1173,6 +1171,12 @@ struct redisServer { int watchdog_period; /* Software watchdog period in ms. 0 = off */ /* System hardware info */ size_t system_memory_size; /* Total memory in system as reported by OS */ + + /* Mutexes used to protect atomic variables when atomic builtins are + * not available. */ + pthread_mutex_t lruclock_mutex; + pthread_mutex_t next_client_id_mutex; + pthread_mutex_t unixtime_mutex; }; typedef struct pubsubPattern { From 2e9c6cac74db66ce9421e3fbfbe0ddeb75055e52 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 15 May 2017 11:22:28 +0200 Subject: [PATCH 0347/1722] redis-cli --bigkeys: show error when TYPE fails. Close #3993. --- src/redis-cli.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 2a9dff712..0cb74bf04 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2024,8 +2024,13 @@ static void getKeyTypes(redisReply *keys, int *types) { keys->element[i]->str, context->err, context->errstr); exit(1); } else if(reply->type != REDIS_REPLY_STATUS) { - fprintf(stderr, "Invalid reply type (%d) for TYPE on key '%s'!\n", - reply->type, keys->element[i]->str); + if(reply->type == REDIS_REPLY_ERROR) { + fprintf(stderr, "TYPE returned an error: %s\n", reply->str); + } else { + fprintf(stderr, + "Invalid reply type (%d) for TYPE on key '%s'!\n", + reply->type, keys->element[i]->str); + } exit(1); } From f506064136d52134d02daf3962f26573ec2cae6f Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 19 May 2017 11:10:36 +0200 Subject: [PATCH 0348/1722] Collect fork() timing info only if fork succeeded. --- src/rdb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 1a5a7b2c5..570ffa843 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1870,9 +1870,6 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) { exitFromChild((retval == C_OK) ? 0 : 1); } else { /* Parent */ - server.stat_fork_time = ustime()-start; - server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */ - latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000); if (childpid == -1) { serverLog(LL_WARNING,"Can't save in background: fork: %s", strerror(errno)); @@ -1896,6 +1893,10 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) { close(pipefds[1]); closeChildInfoPipe(); } else { + server.stat_fork_time = ustime()-start; + server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */ + latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000); + serverLog(LL_NOTICE,"Background RDB transfer started by pid %d", childpid); server.rdb_save_time_start = time(NULL); From b47078f14aca29156ed84b87c21634f3257c998c Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 19 May 2017 12:03:30 +0200 Subject: [PATCH 0349/1722] More informative -MISCONF error message. --- src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index e50ec6359..9abda682b 100644 --- a/src/server.c +++ b/src/server.c @@ -1272,7 +1272,7 @@ void createSharedObjects(void) { shared.masterdownerr = createObject(OBJ_STRING,sdsnew( "-MASTERDOWN Link with MASTER is down and slave-serve-stale-data is set to 'no'.\r\n")); shared.bgsaveerr = createObject(OBJ_STRING,sdsnew( - "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n")); + "-MISCONF Redis is configured to save RDB snapshots, but it is currently not able to persist on disk. Commands that may modify the data set are disabled, because this instance is configured to report errors during writes if RDB snapshotting fails (stop-writes-on-bgsave-error option). Please check the Redis logs for details about the RDB error.\r\n")); shared.roslaveerr = createObject(OBJ_STRING,sdsnew( "-READONLY You can't write against a read only slave.\r\n")); shared.noautherr = createObject(OBJ_STRING,sdsnew( From 541e4de97648e856f15a57314da0825aebd8fc5b Mon Sep 17 00:00:00 2001 From: Zachary Marquez Date: Thu, 1 Jun 2017 16:24:10 -0500 Subject: [PATCH 0350/1722] Prevent expirations and evictions while paused Proposed fix to https://github.com/antirez/redis/issues/4027 --- src/evict.c | 6 ++++++ src/expire.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/src/evict.c b/src/evict.c index bf5bea6b0..77f63cba5 100644 --- a/src/evict.c +++ b/src/evict.c @@ -380,6 +380,12 @@ int freeMemoryIfNeeded(void) { long long delta; int slaves = listLength(server.slaves); + /* We cannot free memory while clients are paused as this will require + * evictions which modify the dataset and will break the guarantee that + * data will be static while clients are paused. */ + if (clientsArePaused()) + goto cant_free; + /* Check if we are over the memory usage limit. If we are not, no need * to subtract the slaves output buffers. We can just return ASAP. */ mem_reported = zmalloc_used_memory(); diff --git a/src/expire.c b/src/expire.c index 22b1f1da9..0e258ecbb 100644 --- a/src/expire.c +++ b/src/expire.c @@ -105,6 +105,10 @@ void activeExpireCycle(int type) { int dbs_per_call = CRON_DBS_PER_CALL; long long start = ustime(), timelimit; + /* We cannot expire keys while clients are paused as the dataset is + * supposed to be static. */ + if (clientsArePaused()) return; + if (type == ACTIVE_EXPIRE_CYCLE_FAST) { /* Don't start a fast cycle if the previous cycle did not exited * for time limt. Also don't repeat a fast cycle for the same period From eb83bd77be4283027b4a1159bb591c95dcc3fd7b Mon Sep 17 00:00:00 2001 From: Antonio Mallia Date: Sun, 4 Jun 2017 15:09:05 +0100 Subject: [PATCH 0351/1722] Fixed comment in clusterMsg version field --- src/cluster.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster.h b/src/cluster.h index 5e228c0f9..af85841c9 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -232,7 +232,7 @@ union clusterMsgData { typedef struct { char sig[4]; /* Siganture "RCmb" (Redis Cluster message bus). */ uint32_t totlen; /* Total length of this message */ - uint16_t ver; /* Protocol version, currently set to 0. */ + uint16_t ver; /* Protocol version, currently set to 1. */ uint16_t port; /* TCP base port number. */ uint16_t type; /* Message type */ uint16_t count; /* Only used for some kind of messages. */ From 88423d5bfdcd5aa24c5691a6c4cddb8018e49c4f Mon Sep 17 00:00:00 2001 From: Antonio Mallia Date: Sun, 4 Jun 2017 15:26:53 +0100 Subject: [PATCH 0352/1722] Removed duplicate 'sys/socket.h' include --- src/cluster.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index d5ad85fe7..77ec2f1b1 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include From 8606449fe1b7039847ce754850825805829fbb92 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Jun 2017 10:35:51 +0200 Subject: [PATCH 0353/1722] Fix PERSIST expired key resuscitation issue #4048. --- src/db.c | 2 +- src/expire.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/db.c b/src/db.c index 86dabac8f..6d39bb9b0 100644 --- a/src/db.c +++ b/src/db.c @@ -93,7 +93,7 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { if (expireIfNeeded(db,key) == 1) { /* Key expired. If we are in the context of a master, expireIfNeeded() - * returns 0 only when the key does not exist at all, so it's save + * returns 0 only when the key does not exist at all, so it's safe * to return NULL ASAP. */ if (server.masterhost == NULL) return NULL; diff --git a/src/expire.c b/src/expire.c index 22b1f1da9..d3a0e3f69 100644 --- a/src/expire.c +++ b/src/expire.c @@ -477,18 +477,15 @@ void pttlCommand(client *c) { /* PERSIST key */ void persistCommand(client *c) { - dictEntry *de; - - de = dictFind(c->db->dict,c->argv[1]->ptr); - if (de == NULL) { - addReply(c,shared.czero); - } else { + if (lookupKeyWrite(c->db,c->argv[1])) { if (removeExpire(c->db,c->argv[1])) { addReply(c,shared.cone); server.dirty++; } else { addReply(c,shared.czero); } + } else { + addReply(c,shared.czero); } } From 44143324d00a7133510962ca4cb1f4f523d5f294 Mon Sep 17 00:00:00 2001 From: xuchengxuan Date: Wed, 14 Jun 2017 16:42:21 +0800 Subject: [PATCH 0354/1722] Fixed comments of slowlog duration --- src/slowlog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/slowlog.h b/src/slowlog.h index 81df0b054..6b8740e86 100644 --- a/src/slowlog.h +++ b/src/slowlog.h @@ -35,7 +35,7 @@ typedef struct slowlogEntry { robj **argv; int argc; long long id; /* Unique entry identifier. */ - long long duration; /* Time spent by the query, in nanoseconds. */ + long long duration; /* Time spent by the query, in microseconds. */ time_t time; /* Unix time at which the query was executed. */ } slowlogEntry; From 29122cfa05a854a7b863f1d706693bc26ba71915 Mon Sep 17 00:00:00 2001 From: Qu Chen Date: Fri, 7 Apr 2017 22:31:11 +0000 Subject: [PATCH 0355/1722] Implement getKeys procedure for georadius and georadiusbymember commands. --- src/db.c | 38 ++++++++++++++++++++++++++++++++++++++ src/server.c | 4 ++-- src/server.h | 1 + 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/db.c b/src/db.c index 6d39bb9b0..7d1504d30 100644 --- a/src/db.c +++ b/src/db.c @@ -1312,6 +1312,44 @@ int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkey return keys; } +/* Helper function to extract keys from following commands: + * GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC] + * [COUNT count] [STORE key] [STOREDIST key] + * GEORADIUSBYMEMBER key member radius unit ... options ... */ +int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { + int i, num, *keys; + UNUSED(cmd); + + /* Check for the presence of the stored key in the command */ + int stored_key = -1; + for (i = 5; i < argc; i++) { + char *arg = argv[i]->ptr; + /* For the case when user specifies both "store" and "storedist" options, the + * second key specified would override the first key. This behavior is kept + * the same as in georadiusCommand method. + */ + if ((!strcasecmp(arg, "store") || !strcasecmp(arg, "storedist")) && ((i+1) < argc)) { + stored_key = i+1; + i++; + } + } + num = 1 + (stored_key == -1 ? 0 : 1); + + /* Keys in the command come from two places: + * argv[1] = key, + * argv[5...n] = stored key if present + */ + keys = zmalloc(sizeof(int) * num); + + /* Add all key positions to keys[] */ + keys[0] = 1; + if(num > 1) { + keys[1] = stored_key; + } + *numkeys = num; + return keys; +} + /* Slot to Key API. This is used by Redis Cluster in order to obtain in * a fast way a key that belongs to a specified hash slot. This is useful * while rehashing the cluster and in other conditions when we need to diff --git a/src/server.c b/src/server.c index 9abda682b..c08c095cb 100644 --- a/src/server.c +++ b/src/server.c @@ -290,8 +290,8 @@ struct redisCommand redisCommandTable[] = { {"wait",waitCommand,3,"s",0,NULL,0,0,0,0,0}, {"command",commandCommand,0,"lt",0,NULL,0,0,0,0,0}, {"geoadd",geoaddCommand,-5,"wm",0,NULL,1,1,1,0,0}, - {"georadius",georadiusCommand,-6,"w",0,NULL,1,1,1,0,0}, - {"georadiusbymember",georadiusByMemberCommand,-5,"w",0,NULL,1,1,1,0,0}, + {"georadius",georadiusCommand,-6,"w",0,georadiusGetKeys,1,1,1,0,0}, + {"georadiusbymember",georadiusByMemberCommand,-5,"w",0,georadiusGetKeys,1,1,1,0,0}, {"geohash",geohashCommand,-2,"r",0,NULL,1,1,1,0,0}, {"geopos",geoposCommand,-2,"r",0,NULL,1,1,1,0,0}, {"geodist",geodistCommand,-4,"r",0,NULL,1,1,1,0,0}, diff --git a/src/server.h b/src/server.h index 8403ed5ba..18924090f 100644 --- a/src/server.h +++ b/src/server.h @@ -1730,6 +1730,7 @@ int *zunionInterGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *num int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); +int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); /* Cluster */ void clusterInit(void); From 6b1c3f89ab7366c88ea224fa7d7c0847eb5d5f68 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 15 Jun 2017 12:57:54 +0200 Subject: [PATCH 0356/1722] SLOWLOG: log offending client address and name. --- src/server.c | 2 +- src/slowlog.c | 15 +++++++++++---- src/slowlog.h | 4 +++- tests/unit/slowlog.tcl | 13 ++++++++++++- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/server.c b/src/server.c index c08c095cb..ba93eb789 100644 --- a/src/server.c +++ b/src/server.c @@ -2214,7 +2214,7 @@ void call(client *c, int flags) { char *latency_event = (c->cmd->flags & CMD_FAST) ? "fast-command" : "command"; latencyAddSampleIfNeeded(latency_event,duration/1000); - slowlogPushEntryIfNeeded(c->argv,c->argc,duration); + slowlogPushEntryIfNeeded(c,c->argv,c->argc,duration); } if (flags & CMD_CALL_STATS) { c->lastcmd->microseconds += duration; diff --git a/src/slowlog.c b/src/slowlog.c index 484cf06bf..805ee1d77 100644 --- a/src/slowlog.c +++ b/src/slowlog.c @@ -45,7 +45,7 @@ /* Create a new slowlog entry. * Incrementing the ref count of all the objects retained is up to * this function. */ -slowlogEntry *slowlogCreateEntry(robj **argv, int argc, long long duration) { +slowlogEntry *slowlogCreateEntry(client *c, robj **argv, int argc, long long duration) { slowlogEntry *se = zmalloc(sizeof(*se)); int j, slargc = argc; @@ -81,6 +81,8 @@ slowlogEntry *slowlogCreateEntry(robj **argv, int argc, long long duration) { se->time = time(NULL); se->duration = duration; se->id = server.slowlog_entry_id++; + se->peerid = sdsnew(getClientPeerId(c)); + se->cname = c->name ? sdsnew(c->name->ptr) : sdsempty(); return se; } @@ -95,6 +97,8 @@ void slowlogFreeEntry(void *septr) { for (j = 0; j < se->argc; j++) decrRefCount(se->argv[j]); zfree(se->argv); + sdsfree(se->peerid); + sdsfree(se->cname); zfree(se); } @@ -109,10 +113,11 @@ void slowlogInit(void) { /* Push a new entry into the slow log. * This function will make sure to trim the slow log accordingly to the * configured max length. */ -void slowlogPushEntryIfNeeded(robj **argv, int argc, long long duration) { +void slowlogPushEntryIfNeeded(client *c, robj **argv, int argc, long long duration) { if (server.slowlog_log_slower_than < 0) return; /* Slowlog disabled */ if (duration >= server.slowlog_log_slower_than) - listAddNodeHead(server.slowlog,slowlogCreateEntry(argv,argc,duration)); + listAddNodeHead(server.slowlog, + slowlogCreateEntry(c,argv,argc,duration)); /* Remove old entries if needed. */ while (listLength(server.slowlog) > server.slowlog_max_len) @@ -152,13 +157,15 @@ void slowlogCommand(client *c) { int j; se = ln->value; - addReplyMultiBulkLen(c,4); + addReplyMultiBulkLen(c,6); addReplyLongLong(c,se->id); addReplyLongLong(c,se->time); addReplyLongLong(c,se->duration); addReplyMultiBulkLen(c,se->argc); for (j = 0; j < se->argc; j++) addReplyBulk(c,se->argv[j]); + addReplyBulkCBuffer(c,se->peerid,sdslen(se->peerid)); + addReplyBulkCBuffer(c,se->cname,sdslen(se->cname)); sent++; } setDeferredMultiBulkLength(c,totentries,sent); diff --git a/src/slowlog.h b/src/slowlog.h index 81df0b054..e3781c67b 100644 --- a/src/slowlog.h +++ b/src/slowlog.h @@ -37,11 +37,13 @@ typedef struct slowlogEntry { long long id; /* Unique entry identifier. */ long long duration; /* Time spent by the query, in nanoseconds. */ time_t time; /* Unix time at which the query was executed. */ + sds cname; /* Client name. */ + sds peerid; /* Client network address. */ } slowlogEntry; /* Exported API */ void slowlogInit(void); -void slowlogPushEntryIfNeeded(robj **argv, int argc, long long duration); +void slowlogPushEntryIfNeeded(client *c, robj **argv, int argc, long long duration); /* Exported commands */ void slowlogCommand(client *c); diff --git a/tests/unit/slowlog.tcl b/tests/unit/slowlog.tcl index b25b91e2c..fce02498b 100644 --- a/tests/unit/slowlog.tcl +++ b/tests/unit/slowlog.tcl @@ -31,12 +31,14 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} { } {0} test {SLOWLOG - logged entry sanity check} { + r client setname foobar r debug sleep 0.2 set e [lindex [r slowlog get] 0] - assert_equal [llength $e] 4 + assert_equal [llength $e] 6 assert_equal [lindex $e 0] 105 assert_equal [expr {[lindex $e 2] > 100000}] 1 assert_equal [lindex $e 3] {debug sleep 0.2} + assert_equal {foobar} [lindex $e 5] } test {SLOWLOG - commands with too many arguments are trimmed} { @@ -67,4 +69,13 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} { set e [lindex [r slowlog get] 0] assert_equal [lindex $e 3] {debug sleep 0.2} } + + test {SLOWLOG - can clean older entires} { + r client setname lastentry_client + r config set slowlog-max-len 1 + r debug sleep 0.2 + assert {[llength [r slowlog get]] == 1} + set e [lindex [r slowlog get] 0] + assert_equal {lastentry_client} [lindex $e 5] + } } From 809a73be97a97b1c2367a65db0f92efa6c658f4c Mon Sep 17 00:00:00 2001 From: xuzhou Date: Fri, 16 Jun 2017 17:51:38 +0800 Subject: [PATCH 0357/1722] Fix set with ex/px option when propagated to aof --- src/aof.c | 17 +++++++++++++++++ src/server.c | 2 ++ src/server.h | 3 ++- tests/unit/expire.tcl | 15 +++++++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/aof.c b/src/aof.c index 9b15ad1d0..071657dd4 100644 --- a/src/aof.c +++ b/src/aof.c @@ -536,6 +536,23 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a buf = catAppendOnlyGenericCommand(buf,3,tmpargv); decrRefCount(tmpargv[0]); buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); + } else if (cmd->proc == setCommand) { + int i; + robj *exarg = NULL, *pxarg = NULL; + /* Translate SET [EX seconds][PX milliseconds] to SET and PEXPIREAT */ + buf = catAppendOnlyGenericCommand(buf,3,argv); + for (i = 3; i < argc; i ++) { + if (sdsEncodedObject(argv[i]) && !strcasecmp(argv[i]->ptr, "ex")) + exarg = argv[i+1]; + + if (sdsEncodedObject(argv[i]) && !strcasecmp(argv[i]->ptr, "px")) + pxarg = argv[i+1]; + } + serverAssert(!(exarg && pxarg)); + if (exarg) + buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1],exarg); + if (pxarg) + buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1],pxarg); } else { /* All the other commands don't need translation or need the * same translation already operated in the command vector diff --git a/src/server.c b/src/server.c index ba93eb789..a3c132840 100644 --- a/src/server.c +++ b/src/server.c @@ -1500,6 +1500,8 @@ void initServerConfig(void) { server.rpopCommand = lookupCommandByCString("rpop"); server.sremCommand = lookupCommandByCString("srem"); server.execCommand = lookupCommandByCString("exec"); + server.expireCommand = lookupCommandByCString("expire"); + server.pexpireCommand = lookupCommandByCString("pexpire"); /* Slow log */ server.slowlog_log_slower_than = CONFIG_DEFAULT_SLOWLOG_LOG_SLOWER_THAN; diff --git a/src/server.h b/src/server.h index 18924090f..aaad64bdd 100644 --- a/src/server.h +++ b/src/server.h @@ -909,7 +909,8 @@ struct redisServer { off_t loading_process_events_interval_bytes; /* Fast pointers to often looked up command */ struct redisCommand *delCommand, *multiCommand, *lpushCommand, *lpopCommand, - *rpopCommand, *sremCommand, *execCommand; + *rpopCommand, *sremCommand, *execCommand, *expireCommand, + *pexpireCommand; /* Fields used only for stats */ time_t stat_starttime; /* Server start time */ long long stat_numcommands; /* Number of processed commands */ diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index 0a50dd31b..eddc7c303 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -204,4 +204,19 @@ start_server {tags {"expire"}} { catch {r expire foo ""} e set e } {*not an integer*} + + test {SET - use EX/PX option, TTL should not be reseted after loadaof} { + r config set appendonly yes + r set foo bar EX 100 + after 2000 + r debug loadaof + set ttl [r ttl foo] + assert {$ttl <= 98 && $ttl > 90} + + r set foo bar PX 100000 + after 2000 + r debug loadaof + set ttl [r ttl foo] + assert {$ttl <= 98 && $ttl > 90} + } } From 98d3075f2fac0c7cb22740705c8cefd9f4271fb8 Mon Sep 17 00:00:00 2001 From: Aric Huang Date: Fri, 16 Jun 2017 16:10:00 -0700 Subject: [PATCH 0358/1722] (fix) Update create-cluster README Fix a few typos/adjust wording in `create-cluster` README --- utils/create-cluster/README | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/create-cluster/README b/utils/create-cluster/README index 1f43748ee..f2a89839b 100644 --- a/utils/create-cluster/README +++ b/utils/create-cluster/README @@ -2,7 +2,7 @@ Create-custer is a small script used to easily start a big number of Redis instances configured to run in cluster mode. Its main goal is to allow manual testing in a condition which is not easy to replicate with the Redis cluster unit tests, for example when a lot of instances are needed in order to trigger -a give bug. +a given bug. The tool can also be used just to easily create a number of instances in a Redis Cluster in order to experiment a bit with the system. @@ -10,7 +10,7 @@ Redis Cluster in order to experiment a bit with the system. USAGE --- -To create a cluster, follow this steps: +To create a cluster, follow these steps: 1. Edit create-cluster and change the start / end port, depending on the number of instances you want to create. @@ -21,7 +21,7 @@ an actual Redis cluster will be created. In order to stop a cluster: -1. Use "./craete-cluster stop" to stop all the instances. After you stopped the instances you can use "./create-cluster start" to restart them if you change ideas. -2. Use "./create-cluster clean" to remove all the AOF / log files to restat with a clean environment. +1. Use "./create-cluster stop" to stop all the instances. After you stopped the instances you can use "./create-cluster start" to restart them if you change your mind. +2. Use "./create-cluster clean" to remove all the AOF / log files to restart with a clean environment. Use the command "./create-cluster help" to get the full list of features. From d78d6b22c61d9abe5869bec84fad43a90d9234de Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Jun 2017 09:41:11 +0200 Subject: [PATCH 0359/1722] redis-benchmark: add -t hset target. --- src/redis-benchmark.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index f382019a4..be15b2417 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -779,6 +779,13 @@ int main(int argc, const char **argv) { free(cmd); } + if (test_is_selected("hset")) { + len = redisFormatCommand(&cmd, + "HSET myset:__rand_int__ element:__rand_int__ %s",data); + benchmark("HSET",cmd,len); + free(cmd); + } + if (test_is_selected("spop")) { len = redisFormatCommand(&cmd,"SPOP myset"); benchmark("SPOP",cmd,len); From efed56d73acc75c36343749afce34c1a15094f30 Mon Sep 17 00:00:00 2001 From: xuzhou Date: Thu, 22 Jun 2017 11:06:40 +0800 Subject: [PATCH 0360/1722] Optimize set command with ex/px when updating aof. --- src/aof.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/aof.c b/src/aof.c index 071657dd4..79e2b9b70 100644 --- a/src/aof.c +++ b/src/aof.c @@ -536,16 +536,16 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a buf = catAppendOnlyGenericCommand(buf,3,tmpargv); decrRefCount(tmpargv[0]); buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); - } else if (cmd->proc == setCommand) { + } else if (cmd->proc == setCommand && argc > 3) { int i; robj *exarg = NULL, *pxarg = NULL; /* Translate SET [EX seconds][PX milliseconds] to SET and PEXPIREAT */ buf = catAppendOnlyGenericCommand(buf,3,argv); for (i = 3; i < argc; i ++) { - if (sdsEncodedObject(argv[i]) && !strcasecmp(argv[i]->ptr, "ex")) + if (!strcasecmp(argv[i]->ptr, "ex")) exarg = argv[i+1]; - if (sdsEncodedObject(argv[i]) && !strcasecmp(argv[i]->ptr, "px")) + if (!strcasecmp(argv[i]->ptr, "px")) pxarg = argv[i+1]; } serverAssert(!(exarg && pxarg)); From 7ef759e0682f0c287e870100ab23ece1593ab685 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 22 Jun 2017 11:00:21 +0200 Subject: [PATCH 0361/1722] Aesthetic changes to #4068 PR to conform to Redis coding standard. 1. Inline if ... statement if short. 2. No lines over 80 columns. --- src/aof.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/aof.c b/src/aof.c index 79e2b9b70..0593b2707 100644 --- a/src/aof.c +++ b/src/aof.c @@ -542,17 +542,16 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a /* Translate SET [EX seconds][PX milliseconds] to SET and PEXPIREAT */ buf = catAppendOnlyGenericCommand(buf,3,argv); for (i = 3; i < argc; i ++) { - if (!strcasecmp(argv[i]->ptr, "ex")) - exarg = argv[i+1]; - - if (!strcasecmp(argv[i]->ptr, "px")) - pxarg = argv[i+1]; + if (!strcasecmp(argv[i]->ptr, "ex")) exarg = argv[i+1]; + if (!strcasecmp(argv[i]->ptr, "px")) pxarg = argv[i+1]; } serverAssert(!(exarg && pxarg)); if (exarg) - buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1],exarg); + buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1], + exarg); if (pxarg) - buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1],pxarg); + buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1], + pxarg); } else { /* All the other commands don't need translation or need the * same translation already operated in the command vector From a1149c6326de783d0e8ab4fea3465452fb9b705d Mon Sep 17 00:00:00 2001 From: Suraj Narkhede Date: Thu, 22 Jun 2017 23:52:00 -0700 Subject: [PATCH 0362/1722] Fix brpop command table entry and redirect blocked clients. --- src/cluster.c | 3 ++- src/server.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 77ec2f1b1..56af347b2 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -5418,8 +5418,9 @@ int clusterRedirectBlockedClientIfNeeded(client *c) { return 1; } + /* All keys must belong to the same slot, so check first key only. */ di = dictGetIterator(c->bpop.keys); - while((de = dictNext(di)) != NULL) { + if ((de = dictNext(di)) != NULL) { robj *key = dictGetKey(de); int slot = keyHashSlot((char*)key->ptr, sdslen(key->ptr)); clusterNode *node = server.cluster->slots[slot]; diff --git a/src/server.c b/src/server.c index a3c132840..46e5bb9f7 100644 --- a/src/server.c +++ b/src/server.c @@ -152,7 +152,7 @@ struct redisCommand redisCommandTable[] = { {"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0}, {"rpop",rpopCommand,2,"wF",0,NULL,1,1,1,0,0}, {"lpop",lpopCommand,2,"wF",0,NULL,1,1,1,0,0}, - {"brpop",brpopCommand,-3,"ws",0,NULL,1,1,1,0,0}, + {"brpop",brpopCommand,-3,"ws",0,NULL,1,-2,1,0,0}, {"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0}, {"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0}, {"llen",llenCommand,2,"rF",0,NULL,1,1,1,0,0}, From 3e42887310ac6338407db65166e3d9d92543db2a Mon Sep 17 00:00:00 2001 From: Suraj Narkhede Date: Fri, 23 Jun 2017 00:30:21 -0700 Subject: [PATCH 0363/1722] Fix following issues in blocking commands: 1. brpop last key index, thus checking all keys for slots. 2. Memory leak in clusterRedirectBlockedClientIfNeeded. 3. Remove while loop in clusterRedirectBlockedClientIfNeeded. --- src/cluster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cluster.c b/src/cluster.c index 56af347b2..407ddee82 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -5438,6 +5438,7 @@ int clusterRedirectBlockedClientIfNeeded(client *c) { clusterRedirectClient(c,node,slot, CLUSTER_REDIR_MOVED); } + dictReleaseIterator(di); return 1; } } From a771a41a566a6c07c77cf97a0e29c38a662b5524 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 23 Jun 2017 11:42:25 +0200 Subject: [PATCH 0364/1722] Issue #4027: unify comment and modify return value in freeMemoryIfNeeded(). It looks safer to return C_OK from freeMemoryIfNeeded() when clients are paused because returning C_ERR may prevent success of writes. It is possible that there is no difference in practice since clients cannot execute writes while clients are paused, but it looks more correct this way, at least conceptually. Related to PR #4028. --- src/evict.c | 9 ++++----- src/expire.c | 5 +++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/evict.c b/src/evict.c index 77f63cba5..5ce5ca07f 100644 --- a/src/evict.c +++ b/src/evict.c @@ -380,11 +380,10 @@ int freeMemoryIfNeeded(void) { long long delta; int slaves = listLength(server.slaves); - /* We cannot free memory while clients are paused as this will require - * evictions which modify the dataset and will break the guarantee that - * data will be static while clients are paused. */ - if (clientsArePaused()) - goto cant_free; + /* When clients are paused the dataset should be static not just from the + * POV of clients not being able to write, but also from the POV of + * expires and evictions of keys not being performed. */ + if (clientsArePaused()) return C_OK; /* Check if we are over the memory usage limit. If we are not, no need * to subtract the slaves output buffers. We can just return ASAP. */ diff --git a/src/expire.c b/src/expire.c index 14da78ec3..a02fe566a 100644 --- a/src/expire.c +++ b/src/expire.c @@ -105,8 +105,9 @@ void activeExpireCycle(int type) { int dbs_per_call = CRON_DBS_PER_CALL; long long start = ustime(), timelimit; - /* We cannot expire keys while clients are paused as the dataset is - * supposed to be static. */ + /* When clients are paused the dataset should be static not just from the + * POV of clients not being able to write, but also from the POV of + * expires and evictions of keys not being performed. */ if (clientsArePaused()) return; if (type == ACTIVE_EXPIRE_CYCLE_FAST) { From e2a0d20737acbe7fea792ab619ec2a963137477e Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 26 Jun 2017 10:36:12 +0200 Subject: [PATCH 0365/1722] ARM: Fix stack trace generation on crash. --- src/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Makefile b/src/Makefile index 691b5aaea..24e960593 100644 --- a/src/Makefile +++ b/src/Makefile @@ -38,6 +38,11 @@ endif endif endif +# To get ARM stack traces if Redis crashes we need a special C flag. +ifneq (,$(findstring armv,$(uname_M))) + CFLAGS+=-funwind-tables +endif + # Backwards compatibility for selecting an allocator ifeq ($(USE_TCMALLOC),yes) MALLOC=tcmalloc From 627713f6274bba41ad4ae7c29430526249ac537b Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 27 Jun 2017 13:09:33 +0200 Subject: [PATCH 0366/1722] RDB modules values serialization format version 2. The original RDB serialization format was not parsable without the module loaded, becuase the structure was managed only by the module itself. Moreover RDB is a streaming protocol in the sense that it is both produce di an append-only fashion, and is also sometimes directly sent to the socket (in the case of diskless replication). The fact that modules values cannot be parsed without the relevant module loaded is a problem in many ways: RDB checking tools must have loaded modules even for doing things not involving the value at all, like splitting an RDB into N RDBs by key or alike, or just checking the RDB for sanity. In theory module values could be just a blob of data with a prefixed length in order for us to be able to skip it. However prefixing the values with a length would mean one of the following: 1. To be able to write some data at a previous offset. This breaks stremaing. 2. To bufferize values before outputting them. This breaks performances. 3. To have some chunked RDB output format. This breaks simplicity. Moreover, the above solution, still makes module values a totally opaque matter, with the fowllowing problems: 1. The RDB check tool can just skip the value without being able to at least check the general structure. For datasets composed mostly of modules values this means to just check the outer level of the RDB not actually doing any checko on most of the data itself. 2. It is not possible to do any recovering or processing of data for which a module no longer exists in the future, or is unknown. So this commit implements a different solution. The modules RDB serialization API is composed if well defined calls to store integers, floats, doubles or strings. After this commit, the parts generated by the module API have a one-byte prefix for each of the above emitted parts, and there is a final EOF byte as well. So even if we don't know exactly how to interpret a module value, we can always parse it at an high level, check the overall structure, understand the types used to store the information, and easily skip the whole value. The change is backward compatible: older RDB files can be still loaded since the new encoding has a new RDB type: MODULE_2 (of value 7). The commit also implements the ability to check RDB files for sanity taking advantage of the new feature. --- src/module.c | 75 +++++++++++++++++++++++++++++++++++----------------- src/rdb.c | 63 ++++++++++++++++++++++++++++++++++++++++--- src/rdb.h | 12 ++++++++- src/server.h | 5 ++++ 4 files changed, 127 insertions(+), 28 deletions(-) diff --git a/src/module.c b/src/module.c index 0ae7ac461..35e479927 100644 --- a/src/module.c +++ b/src/module.c @@ -2705,11 +2705,13 @@ moduleType *moduleTypeLookupModuleByID(uint64_t id) { } /* Turn an (unresolved) module ID into a type name, to show the user an - * error when RDB files contain module data we can't load. */ + * error when RDB files contain module data we can't load. + * The buffer pointed by 'name' must be 10 bytes at least. The function will + * fill it with a null terminated module name. */ void moduleTypeNameByID(char *name, uint64_t moduleid) { const char *cset = ModuleTypeNameCharSet; - name[0] = '\0'; + name[9] = '\0'; char *p = name+8; moduleid >>= 10; for (int j = 0; j < 9; j++) { @@ -2877,7 +2879,8 @@ void moduleRDBLoadError(RedisModuleIO *io) { * data types. */ void RM_SaveUnsigned(RedisModuleIO *io, uint64_t value) { if (io->error) return; - int retval = rdbSaveLen(io->rio, value); + int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_UINT); + if (retval != -1) rdbSaveLen(io->rio, value); if (retval == -1) { io->error = 1; } else { @@ -2889,13 +2892,18 @@ void RM_SaveUnsigned(RedisModuleIO *io, uint64_t value) { * be called in the context of the rdb_load method of modules implementing * new data types. */ uint64_t RM_LoadUnsigned(RedisModuleIO *io) { + if (io->ver == 2) { + uint64_t opcode = rdbLoadLen(io->rio,NULL); + if (opcode != RDB_MODULE_OPCODE_UINT) goto loaderr; + } uint64_t value; int retval = rdbLoadLenByRef(io->rio, NULL, &value); - if (retval == -1) { - moduleRDBLoadError(io); - return 0; /* Never reached. */ - } + if (retval == -1) goto loaderr; return value; + +loaderr: + moduleRDBLoadError(io); + return 0; /* Never reached. */ } /* Like RedisModule_SaveUnsigned() but for signed 64 bit values. */ @@ -2920,7 +2928,8 @@ int64_t RM_LoadSigned(RedisModuleIO *io) { * the RDB file. */ void RM_SaveString(RedisModuleIO *io, RedisModuleString *s) { if (io->error) return; - int retval = rdbSaveStringObject(io->rio,s); + int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING); + if (retval != -1) retval = rdbSaveStringObject(io->rio,s); if (retval == -1) { io->error = 1; } else { @@ -2932,7 +2941,8 @@ void RM_SaveString(RedisModuleIO *io, RedisModuleString *s) { * as input. */ void RM_SaveStringBuffer(RedisModuleIO *io, const char *str, size_t len) { if (io->error) return; - int retval = rdbSaveRawString(io->rio,(unsigned char*)str,len); + int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING); + if (retval != -1) retval = rdbSaveRawString(io->rio,(unsigned char*)str,len); if (retval == -1) { io->error = 1; } else { @@ -2942,13 +2952,18 @@ void RM_SaveStringBuffer(RedisModuleIO *io, const char *str, size_t len) { /* Implements RM_LoadString() and RM_LoadStringBuffer() */ void *moduleLoadString(RedisModuleIO *io, int plain, size_t *lenptr) { + if (io->ver == 2) { + uint64_t opcode = rdbLoadLen(io->rio,NULL); + if (opcode != RDB_MODULE_OPCODE_STRING) goto loaderr; + } void *s = rdbGenericLoadStringObject(io->rio, plain ? RDB_LOAD_PLAIN : RDB_LOAD_NONE, lenptr); - if (s == NULL) { - moduleRDBLoadError(io); - return NULL; /* Never reached. */ - } + if (s == NULL) goto loaderr; return s; + +loaderr: + moduleRDBLoadError(io); + return NULL; /* Never reached. */ } /* In the context of the rdb_load method of a module data type, loads a string @@ -2980,7 +2995,8 @@ char *RM_LoadStringBuffer(RedisModuleIO *io, size_t *lenptr) { * It is possible to load back the value with RedisModule_LoadDouble(). */ void RM_SaveDouble(RedisModuleIO *io, double value) { if (io->error) return; - int retval = rdbSaveBinaryDoubleValue(io->rio, value); + int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_DOUBLE); + if (retval != -1) retval = rdbSaveBinaryDoubleValue(io->rio, value); if (retval == -1) { io->error = 1; } else { @@ -2991,21 +3007,27 @@ void RM_SaveDouble(RedisModuleIO *io, double value) { /* In the context of the rdb_save method of a module data type, loads back the * double value saved by RedisModule_SaveDouble(). */ double RM_LoadDouble(RedisModuleIO *io) { + if (io->ver == 2) { + uint64_t opcode = rdbLoadLen(io->rio,NULL); + if (opcode != RDB_MODULE_OPCODE_DOUBLE) goto loaderr; + } double value; int retval = rdbLoadBinaryDoubleValue(io->rio, &value); - if (retval == -1) { - moduleRDBLoadError(io); - return 0; /* Never reached. */ - } + if (retval == -1) goto loaderr; return value; + +loaderr: + moduleRDBLoadError(io); + return 0; /* Never reached. */ } -/* In the context of the rdb_save method of a module data type, saves a float +/* In the context of the rdb_save method of a module data type, saves a float * value to the RDB file. The float can be a valid number, a NaN or infinity. * It is possible to load back the value with RedisModule_LoadFloat(). */ void RM_SaveFloat(RedisModuleIO *io, float value) { if (io->error) return; - int retval = rdbSaveBinaryFloatValue(io->rio, value); + int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_FLOAT); + if (retval != -1) retval = rdbSaveBinaryFloatValue(io->rio, value); if (retval == -1) { io->error = 1; } else { @@ -3016,13 +3038,18 @@ void RM_SaveFloat(RedisModuleIO *io, float value) { /* In the context of the rdb_save method of a module data type, loads back the * float value saved by RedisModule_SaveFloat(). */ float RM_LoadFloat(RedisModuleIO *io) { + if (io->ver == 2) { + uint64_t opcode = rdbLoadLen(io->rio,NULL); + if (opcode != RDB_MODULE_OPCODE_FLOAT) goto loaderr; + } float value; int retval = rdbLoadBinaryFloatValue(io->rio, &value); - if (retval == -1) { - moduleRDBLoadError(io); - return 0; /* Never reached. */ - } + if (retval == -1) goto loaderr; return value; + +loaderr: + moduleRDBLoadError(io); + return 0; /* Never reached. */ } /* -------------------------------------------------------------------------- diff --git a/src/rdb.c b/src/rdb.c index 570ffa843..18acb4195 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -623,7 +623,7 @@ int rdbSaveObjectType(rio *rdb, robj *o) { else serverPanic("Unknown hash encoding"); case OBJ_MODULE: - return rdbSaveType(rdb,RDB_TYPE_MODULE); + return rdbSaveType(rdb,RDB_TYPE_MODULE_2); default: serverPanic("Unknown object type"); } @@ -775,8 +775,12 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { if (retval == -1) return -1; io.bytes += retval; - /* Then write the module-specific representation. */ + /* Then write the module-specific representation + EOF marker. */ mt->rdb_save(&io,mv->value); + retval = rdbSaveLen(rdb,RDB_MODULE_OPCODE_EOF); + if (retval == -1) return -1; + io.bytes += retval; + if (io.ctx) { moduleFreeContext(io.ctx); zfree(io.ctx); @@ -1102,6 +1106,45 @@ void rdbRemoveTempFile(pid_t childpid) { unlink(tmpfile); } +/* This function is called by rdbLoadObject() when the code is in RDB-check + * mode and we find a module value of type 2 that can be parsed without + * the need of the actual module. The value is parsed for errors, finally + * a dummy redis object is returned just to conform to the API. */ +robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) { + uint64_t opcode; + while((opcode = rdbLoadLen(rdb,NULL)) != RDB_MODULE_OPCODE_EOF) { + if (opcode == RDB_MODULE_OPCODE_SINT || + opcode == RDB_MODULE_OPCODE_UINT) + { + uint64_t len; + if (rdbLoadLenByRef(rdb,NULL,&len) == -1) { + rdbExitReportCorruptRDB( + "Error reading integer from module %s value", modulename); + } + } else if (opcode == RDB_MODULE_OPCODE_STRING) { + robj *o = rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL); + if (o == NULL) { + rdbExitReportCorruptRDB( + "Error reading string from module %s value", modulename); + } + decrRefCount(o); + } else if (opcode == RDB_MODULE_OPCODE_FLOAT) { + float val; + if (rdbLoadBinaryFloatValue(rdb,&val) == -1) { + rdbExitReportCorruptRDB( + "Error reading float from module %s value", modulename); + } + } else if (opcode == RDB_MODULE_OPCODE_DOUBLE) { + double val; + if (rdbLoadBinaryDoubleValue(rdb,&val) == -1) { + rdbExitReportCorruptRDB( + "Error reading double from module %s value", modulename); + } + } + } + return createStringObject("module-dummy-value",18); +} + /* Load a Redis object of the specified type from the specified file. * On success a newly allocated object is returned, otherwise NULL. */ robj *rdbLoadObject(int rdbtype, rio *rdb) { @@ -1353,11 +1396,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { rdbExitReportCorruptRDB("Unknown RDB encoding type %d",rdbtype); break; } - } else if (rdbtype == RDB_TYPE_MODULE) { + } else if (rdbtype == RDB_TYPE_MODULE || rdbtype == RDB_TYPE_MODULE_2) { uint64_t moduleid = rdbLoadLen(rdb,NULL); moduleType *mt = moduleTypeLookupModuleByID(moduleid); char name[10]; + if (rdbCheckMode && rdbtype == RDB_TYPE_MODULE_2) + return rdbLoadCheckModuleValue(rdb,name); + if (mt == NULL) { moduleTypeNameByID(name,moduleid); serverLog(LL_WARNING,"The RDB file contains module data I can't load: no matching module '%s'", name); @@ -1365,9 +1411,20 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { } RedisModuleIO io; moduleInitIOContext(io,mt,rdb); + io.ver = (rdbtype == RDB_TYPE_MODULE) ? 1 : 2; /* Call the rdb_load method of the module providing the 10 bit * encoding version in the lower 10 bits of the module ID. */ void *ptr = mt->rdb_load(&io,moduleid&1023); + + /* Module v2 serialization has an EOF mark at the end. */ + if (io.ver == 2) { + uint64_t eof = rdbLoadLen(rdb,NULL); + if (eof != RDB_MODULE_OPCODE_EOF) { + serverLog(LL_WARNING,"The RDB file contains module data for the module '%s' that is not terminated by the proper module value EOF marker", name); + exit(1); + } + } + if (ptr == NULL) { moduleTypeNameByID(name,moduleid); serverLog(LL_WARNING,"The RDB file contains module data for the module type '%s', that the responsible module is not able to load. Check for modules log above for additional clues.", name); diff --git a/src/rdb.h b/src/rdb.h index efe932255..a22cb33ce 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -78,6 +78,8 @@ #define RDB_TYPE_HASH 4 #define RDB_TYPE_ZSET_2 5 /* ZSET version 2 with doubles stored in binary. */ #define RDB_TYPE_MODULE 6 +#define RDB_TYPE_MODULE_2 7 /* Module value with annotations for parsing without + the generating module being loaded. */ /* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */ /* Object types for encoded objects. */ @@ -90,7 +92,7 @@ /* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */ /* Test if a type is an object type. */ -#define rdbIsObjectType(t) ((t >= 0 && t <= 6) || (t >= 9 && t <= 14)) +#define rdbIsObjectType(t) ((t >= 0 && t <= 7) || (t >= 9 && t <= 14)) /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */ #define RDB_OPCODE_AUX 250 @@ -100,6 +102,14 @@ #define RDB_OPCODE_SELECTDB 254 #define RDB_OPCODE_EOF 255 +/* Module serialized values sub opcodes */ +#define RDB_MODULE_OPCODE_EOF 0 /* End of module value. */ +#define RDB_MODULE_OPCODE_SINT 1 /* Signed integer. */ +#define RDB_MODULE_OPCODE_UINT 2 /* Unsigned integer. */ +#define RDB_MODULE_OPCODE_FLOAT 3 /* Float. */ +#define RDB_MODULE_OPCODE_DOUBLE 4 /* Double. */ +#define RDB_MODULE_OPCODE_STRING 5 /* String. */ + /* rdbLoad...() functions flags. */ #define RDB_LOAD_NONE 0 #define RDB_LOAD_ENC (1<<0) diff --git a/src/server.h b/src/server.h index aaad64bdd..a32809d45 100644 --- a/src/server.h +++ b/src/server.h @@ -530,14 +530,19 @@ typedef struct RedisModuleIO { rio *rio; /* Rio stream. */ moduleType *type; /* Module type doing the operation. */ int error; /* True if error condition happened. */ + int ver; /* Module serialization version: 1 (old), + * 2 (current version with opcodes annotation). */ struct RedisModuleCtx *ctx; /* Optional context, see RM_GetContextFromIO()*/ } RedisModuleIO; +/* Macro to initialize an IO context. Note that the 'ver' field is populated + * inside rdb.c according to the version of the value to load. */ #define moduleInitIOContext(iovar,mtype,rioptr) do { \ iovar.rio = rioptr; \ iovar.type = mtype; \ iovar.bytes = 0; \ iovar.error = 0; \ + iovar.ver = 0; \ iovar.ctx = NULL; \ } while(0); From 91a2ea769bbd6bf710556ce1158182a8f7445018 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 29 Jun 2017 17:38:46 +0200 Subject: [PATCH 0367/1722] HMSET and MSET implementations unified. HSET now variadic. This is the first step towards getting rid of HMSET which is a command that does not make much sense once HSET is variadic, and has a saner return value. --- src/server.c | 8 ++++---- src/t_hash.c | 32 ++++++++++++++------------------ 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/server.c b/src/server.c index 46e5bb9f7..2fff8c74f 100644 --- a/src/server.c +++ b/src/server.c @@ -144,7 +144,7 @@ struct redisCommand redisCommandTable[] = { {"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0}, {"incr",incrCommand,2,"wmF",0,NULL,1,1,1,0,0}, {"decr",decrCommand,2,"wmF",0,NULL,1,1,1,0,0}, - {"mget",mgetCommand,-2,"r",0,NULL,1,-1,1,0,0}, + {"mget",mgetCommand,-2,"rF",0,NULL,1,-1,1,0,0}, {"rpush",rpushCommand,-3,"wmF",0,NULL,1,1,1,0,0}, {"lpush",lpushCommand,-3,"wmF",0,NULL,1,1,1,0,0}, {"rpushx",rpushxCommand,-3,"wmF",0,NULL,1,1,1,0,0}, @@ -198,11 +198,11 @@ struct redisCommand redisCommandTable[] = { {"zrank",zrankCommand,3,"rF",0,NULL,1,1,1,0,0}, {"zrevrank",zrevrankCommand,3,"rF",0,NULL,1,1,1,0,0}, {"zscan",zscanCommand,-3,"rR",0,NULL,1,1,1,0,0}, - {"hset",hsetCommand,4,"wmF",0,NULL,1,1,1,0,0}, + {"hset",hsetCommand,-4,"wmF",0,NULL,1,1,1,0,0}, {"hsetnx",hsetnxCommand,4,"wmF",0,NULL,1,1,1,0,0}, {"hget",hgetCommand,3,"rF",0,NULL,1,1,1,0,0}, - {"hmset",hmsetCommand,-4,"wm",0,NULL,1,1,1,0,0}, - {"hmget",hmgetCommand,-3,"r",0,NULL,1,1,1,0,0}, + {"hmset",hsetCommand,-4,"wmF",0,NULL,1,1,1,0,0}, + {"hmget",hmgetCommand,-3,"rF",0,NULL,1,1,1,0,0}, {"hincrby",hincrbyCommand,4,"wmF",0,NULL,1,1,1,0,0}, {"hincrbyfloat",hincrbyfloatCommand,4,"wmF",0,NULL,1,1,1,0,0}, {"hdel",hdelCommand,-3,"wF",0,NULL,1,1,1,0,0}, diff --git a/src/t_hash.c b/src/t_hash.c index a49559336..700a6233a 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -511,19 +511,6 @@ void hashTypeConvert(robj *o, int enc) { * Hash type commands *----------------------------------------------------------------------------*/ -void hsetCommand(client *c) { - int update; - robj *o; - - if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return; - hashTypeTryConversion(o,c->argv,2,3); - update = hashTypeSet(o,c->argv[2]->ptr,c->argv[3]->ptr,HASH_SET_COPY); - addReply(c, update ? shared.czero : shared.cone); - signalModifiedKey(c->db,c->argv[1]); - notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id); - server.dirty++; -} - void hsetnxCommand(client *c) { robj *o; if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return; @@ -540,8 +527,8 @@ void hsetnxCommand(client *c) { } } -void hmsetCommand(client *c) { - int i; +void hsetCommand(client *c) { + int i, created = 0; robj *o; if ((c->argc % 2) == 1) { @@ -551,10 +538,19 @@ void hmsetCommand(client *c) { if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return; hashTypeTryConversion(o,c->argv,2,c->argc-1); - for (i = 2; i < c->argc; i += 2) { - hashTypeSet(o,c->argv[i]->ptr,c->argv[i+1]->ptr,HASH_SET_COPY); + + for (i = 2; i < c->argc; i += 2) + created += !hashTypeSet(o,c->argv[i]->ptr,c->argv[i+1]->ptr,HASH_SET_COPY); + + /* HMSET (deprecated) and HSET return value is different. */ + char *cmdname = c->argv[0]->ptr; + if (cmdname[1] == 's' || cmdname[1] == 'S') { + /* HSET */ + addReplyLongLong(c, created); + } else { + /* HMSET */ + addReply(c, shared.ok); } - addReply(c, shared.ok); signalModifiedKey(c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id); server.dirty++; From 83bf475551c03ec67225b6123f82def642c298d4 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 30 Jun 2017 10:03:37 +0200 Subject: [PATCH 0368/1722] Added GEORADIUS(BYMEMBER)_RO variants for read-only operations. Issue #4084 shows how for a design error, GEORADIUS is a write command because of the STORE option. Because of this it does not work on readonly slaves, gets redirected to masters in Redis Cluster even when the connection is in READONLY mode and so forth. To break backward compatibility at this stage, with Redis 4.0 to be in advanced RC state, is problematic for the user base. The API can be fixed into the unstable branch soon if we'll decide to do so in order to be more consistent, and reease Redis 5.0 with this incompatibility in the future. This is still unclear. However, the ability to scale GEO queries in slaves easily is too important so this commit adds two read-only variants to the GEORADIUS and GEORADIUSBYMEMBER command: GEORADIUS_RO and GEORADIUSBYMEMBER_RO. The commands are exactly as the original commands, but they do not accept the STORE and STOREDIST options. --- src/geo.c | 35 ++++++++++++++++++++++++++--------- src/server.c | 4 +++- src/server.h | 4 +++- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/geo.c b/src/geo.c index 8423931af..90216e7dd 100644 --- a/src/geo.c +++ b/src/geo.c @@ -452,13 +452,14 @@ void geoaddCommand(client *c) { #define SORT_ASC 1 #define SORT_DESC 2 -#define RADIUS_COORDS 1 -#define RADIUS_MEMBER 2 +#define RADIUS_COORDS (1<<0) /* Search around coordinates. */ +#define RADIUS_MEMBER (1<<1) /* Search around member. */ +#define RADIUS_NOSTORE (1<<2) /* Do not acceot STORE/STOREDIST option. */ /* GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC] * [COUNT count] [STORE key] [STOREDIST key] * GEORADIUSBYMEMBER key member radius unit ... options ... */ -void georadiusGeneric(client *c, int type) { +void georadiusGeneric(client *c, int flags) { robj *key = c->argv[1]; robj *storekey = NULL; int storedist = 0; /* 0 for STORE, 1 for STOREDIST. */ @@ -473,11 +474,11 @@ void georadiusGeneric(client *c, int type) { /* Find long/lat to use for radius search based on inquiry type */ int base_args; double xy[2] = { 0 }; - if (type == RADIUS_COORDS) { + if (flags & RADIUS_COORDS) { base_args = 6; if (extractLongLatOrReply(c, c->argv + 2, xy) == C_ERR) return; - } else if (type == RADIUS_MEMBER) { + } else if (flags & RADIUS_MEMBER) { base_args = 5; robj *member = c->argv[2]; if (longLatFromMember(zobj, member, xy) == C_ERR) { @@ -485,7 +486,7 @@ void georadiusGeneric(client *c, int type) { return; } } else { - addReplyError(c, "unknown georadius search type"); + addReplyError(c, "Unknown georadius search type"); return; } @@ -522,11 +523,17 @@ void georadiusGeneric(client *c, int type) { return; } i++; - } else if (!strcasecmp(arg, "store") && (i+1) < remaining) { + } else if (!strcasecmp(arg, "store") && + (i+1) < remaining && + !(flags & RADIUS_NOSTORE)) + { storekey = c->argv[base_args+i+1]; storedist = 0; i++; - } else if (!strcasecmp(arg, "storedist") && (i+1) < remaining) { + } else if (!strcasecmp(arg, "storedist") && + (i+1) < remaining && + !(flags & RADIUS_NOSTORE)) + { storekey = c->argv[base_args+i+1]; storedist = 1; i++; @@ -671,10 +678,20 @@ void georadiusCommand(client *c) { } /* GEORADIUSBYMEMBER wrapper function. */ -void georadiusByMemberCommand(client *c) { +void georadiusbymemberCommand(client *c) { georadiusGeneric(c, RADIUS_MEMBER); } +/* GEORADIUS_RO wrapper function. */ +void georadiusroCommand(client *c) { + georadiusGeneric(c, RADIUS_COORDS|RADIUS_NOSTORE); +} + +/* GEORADIUSBYMEMBER_RO wrapper function. */ +void georadiusbymemberroCommand(client *c) { + georadiusGeneric(c, RADIUS_MEMBER|RADIUS_NOSTORE); +} + /* GEOHASH key ele1 ele2 ... eleN * * Returns an array with an 11 characters geohash representation of the diff --git a/src/server.c b/src/server.c index 2fff8c74f..a75581b97 100644 --- a/src/server.c +++ b/src/server.c @@ -291,7 +291,9 @@ struct redisCommand redisCommandTable[] = { {"command",commandCommand,0,"lt",0,NULL,0,0,0,0,0}, {"geoadd",geoaddCommand,-5,"wm",0,NULL,1,1,1,0,0}, {"georadius",georadiusCommand,-6,"w",0,georadiusGetKeys,1,1,1,0,0}, - {"georadiusbymember",georadiusByMemberCommand,-5,"w",0,georadiusGetKeys,1,1,1,0,0}, + {"georadius_ro",georadiusroCommand,-6,"r",0,georadiusGetKeys,1,1,1,0,0}, + {"georadiusbymember",georadiusbymemberCommand,-5,"w",0,georadiusGetKeys,1,1,1,0,0}, + {"georadiusbymember_ro",georadiusbymemberroCommand,-5,"r",0,georadiusGetKeys,1,1,1,0,0}, {"geohash",geohashCommand,-2,"r",0,NULL,1,1,1,0,0}, {"geopos",geoposCommand,-2,"r",0,NULL,1,1,1,0,0}, {"geodist",geodistCommand,-4,"r",0,NULL,1,1,1,0,0}, diff --git a/src/server.h b/src/server.h index a32809d45..64451f5c6 100644 --- a/src/server.h +++ b/src/server.h @@ -1956,8 +1956,10 @@ void replconfCommand(client *c); void waitCommand(client *c); void geoencodeCommand(client *c); void geodecodeCommand(client *c); -void georadiusByMemberCommand(client *c); +void georadiusbymemberCommand(client *c); +void georadiusbymemberroCommand(client *c); void georadiusCommand(client *c); +void georadiusroCommand(client *c); void geoaddCommand(client *c); void geohashCommand(client *c); void geoposCommand(client *c); From 501c7a725d8978475d5691354dcc0439515308b3 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 30 Jun 2017 12:12:00 +0200 Subject: [PATCH 0369/1722] Fix abort typo in Lua debugger help screen. --- src/scripting.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripting.c b/src/scripting.c index 1da6f763f..52fed6ac8 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -2271,7 +2271,7 @@ ldbLog(sdsnew("[e]eval Execute some Lua code (in a different callfr ldbLog(sdsnew("[r]edis Execute a Redis command.")); ldbLog(sdsnew("[m]axlen [len] Trim logged Redis replies and Lua var dumps to len.")); ldbLog(sdsnew(" Specifying zero as means unlimited.")); -ldbLog(sdsnew("[a]abort Stop the execution of the script. In sync")); +ldbLog(sdsnew("[a]bort Stop the execution of the script. In sync")); ldbLog(sdsnew(" mode dataset changes will be retained.")); ldbLog(sdsnew("")); ldbLog(sdsnew("Debugger functions you can call from Lua scripts:")); From 10d8d281646cc94be11fd0c3bc74793dc8df01a3 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 30 Jun 2017 15:41:58 +0200 Subject: [PATCH 0370/1722] redis-cli --latency: ability to run non interactively. This feature was proposed by @rosmo in PR #2643 and later redesigned in order to fit better with the other options for non-interactive modes of redis-cli. The idea is basically to allow to collect latency information in scripts, cron jobs or whateever, just running for a limited time and then producing a single output. --- src/redis-cli.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index ee24cf3c7..f8f945235 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1123,6 +1123,12 @@ static void usage(void) { " --csv Output in CSV format.\n" " --stat Print rolling stats about server: mem, clients, ...\n" " --latency Enter a special mode continuously sampling latency.\n" +" If you use this mode in an interactive session it runs\n" +" forever displaying real-time stats. Otherwise if --raw or\n" +" --csv is specified, or if you redirect the output to a non\n" +" TTY, it samples the latency for 1 second (you can use\n" +" -i to change the interval), then produces a single output\n" +" and exits.\n" " --latency-history Like --latency but tracking latency changes over time.\n" " Default time interval is 15 sec. Change it using -i.\n" " --latency-dist Shows latency as a spectrum, requires xterm 256 colors.\n" @@ -1472,6 +1478,18 @@ static int evalMode(int argc, char **argv) { * Latency and latency history modes *--------------------------------------------------------------------------- */ +static void latencyModePrint(long long min, long long max, double avg, long long count) { + if (config.output == OUTPUT_STANDARD) { + printf("min: %lld, max: %lld, avg: %.2f (%lld samples)", + min, max, avg, count); + fflush(stdout); + } else if (config.output == OUTPUT_CSV) { + printf("%lld,%lld,%.2f,%lld\n", min, max, avg, count); + } else if (config.output == OUTPUT_RAW) { + printf("%lld %lld %.2f %lld\n", min, max, avg, count); + } +} + #define LATENCY_SAMPLE_RATE 10 /* milliseconds. */ #define LATENCY_HISTORY_DEFAULT_INTERVAL 15000 /* milliseconds. */ static void latencyMode(void) { @@ -1483,6 +1501,14 @@ static void latencyMode(void) { double avg; long long history_start = mstime(); + /* Set a default for the interval in case of --latency option + * with --raw, --csv or when it is redirected to non tty. */ + if (config.interval == 0) { + config.interval = 1000; + } else { + config.interval /= 1000; /* We need to convert to milliseconds. */ + } + if (!context) exit(1); while(1) { start = mstime(); @@ -1503,9 +1529,19 @@ static void latencyMode(void) { tot += latency; avg = (double) tot/count; } - printf("\x1b[0G\x1b[2Kmin: %lld, max: %lld, avg: %.2f (%lld samples)", - min, max, avg, count); - fflush(stdout); + + if (config.output == OUTPUT_STANDARD) { + printf("\x1b[0G\x1b[2K"); /* Clear the line. */ + latencyModePrint(min,max,avg,count); + } else { + if (config.latency_history) { + latencyModePrint(min,max,avg,count); + } else if (mstime()-history_start > config.interval) { + latencyModePrint(min,max,avg,count); + exit(0); + } + } + if (config.latency_history && mstime()-history_start > history_interval) { printf(" -- %.2f seconds range\n", (float)(mstime()-history_start)/1000); From 009bf5476ace1254eb98ea199affeb065f928b2d Mon Sep 17 00:00:00 2001 From: Jun He Date: Mon, 3 Jul 2017 07:18:32 +0000 Subject: [PATCH 0371/1722] Fixed stack trace generation on aarch64 Change-Id: I9801239c98cb7362ed07e8b9ec2ba7e45749dba7 Signed-off-by: Jun He --- src/Makefile | 2 +- src/debug.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 24e960593..a1ff4258a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -39,7 +39,7 @@ endif endif # To get ARM stack traces if Redis crashes we need a special C flag. -ifneq (,$(findstring armv,$(uname_M))) +ifneq (,$(filter aarch64 armv,$(uname_M))) CFLAGS+=-funwind-tables endif diff --git a/src/debug.c b/src/debug.c index a4caa49f2..c976d0ed9 100644 --- a/src/debug.c +++ b/src/debug.c @@ -673,6 +673,8 @@ static void *getMcontextEip(ucontext_t *uc) { return (void*) uc->uc_mcontext.sc_ip; #elif defined(__arm__) /* Linux ARM */ return (void*) uc->uc_mcontext.arm_pc; + #elif defined(__aarch64__) /* Linux AArch64 */ + return (void*) uc->uc_mcontext.pc; #endif #else return NULL; From b525305f9dc7a01d66c669d7197d3ebea25bbffa Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 3 Jul 2017 19:38:31 +0200 Subject: [PATCH 0372/1722] Fix GEORADIUS edge case with huge radius. This commit closes issue #3698, at least for now, since the root cause was not fixed: the bounding box function, for huge radiuses, does not return a correct bounding box, there are points still within the radius that are left outside. So when using GEORADIUS queries with radiuses in the order of 5000 km or more, it was possible to see, at the edge of the area, certain points not correctly reported. Because the bounding box for now was used just as an optimization, and such huge radiuses are not common, for now the optimization is just switched off when the radius is near such magnitude. Three test cases found by the Continuous Integration test were added, so that we can easily trigger the bug again, both for regression testing and in order to properly fix it as some point in the future. --- src/geohash_helper.c | 56 ++++++++++++++++++++++++++++---------------- tests/unit/geo.tcl | 3 +++ 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/src/geohash_helper.c b/src/geohash_helper.c index 77d8ab392..e23f17b4e 100644 --- a/src/geohash_helper.c +++ b/src/geohash_helper.c @@ -85,7 +85,21 @@ uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) { /* Return the bounding box of the search area centered at latitude,longitude * having a radius of radius_meter. bounds[0] - bounds[2] is the minimum * and maxium longitude, while bounds[1] - bounds[3] is the minimum and - * maximum latitude. */ + * maximum latitude. + * + * This function does not behave correctly with very large radius values, for + * instance for the coordinates 81.634948934258375 30.561509253718668 and a + * radius of 7083 kilometers, it reports as bounding boxes: + * + * min_lon 7.680495, min_lat -33.119473, max_lon 155.589402, max_lat 94.242491 + * + * However, for instance, a min_lon of 7.680495 is not correct, because the + * point -1.27579540014266968 61.33421815228281559 is at less than 7000 + * kilometers away. + * + * Since this function is currently only used as an optimization, the + * optimization is not used for very big radiuses, however the function + * should be fixed. */ int geohashBoundingBox(double longitude, double latitude, double radius_meters, double *bounds) { if (!bounds) return 0; @@ -154,25 +168,27 @@ GeoHashRadius geohashGetAreasByRadius(double longitude, double latitude, double } /* Exclude the search areas that are useless. */ - if (area.latitude.min < min_lat) { - GZERO(neighbors.south); - GZERO(neighbors.south_west); - GZERO(neighbors.south_east); - } - if (area.latitude.max > max_lat) { - GZERO(neighbors.north); - GZERO(neighbors.north_east); - GZERO(neighbors.north_west); - } - if (area.longitude.min < min_lon) { - GZERO(neighbors.west); - GZERO(neighbors.south_west); - GZERO(neighbors.north_west); - } - if (area.longitude.max > max_lon) { - GZERO(neighbors.east); - GZERO(neighbors.south_east); - GZERO(neighbors.north_east); + if (steps >= 2) { + if (area.latitude.min < min_lat) { + GZERO(neighbors.south); + GZERO(neighbors.south_west); + GZERO(neighbors.south_east); + } + if (area.latitude.max > max_lat) { + GZERO(neighbors.north); + GZERO(neighbors.north_east); + GZERO(neighbors.north_west); + } + if (area.longitude.min < min_lon) { + GZERO(neighbors.west); + GZERO(neighbors.south_west); + GZERO(neighbors.north_west); + } + if (area.longitude.max > max_lon) { + GZERO(neighbors.east); + GZERO(neighbors.south_east); + GZERO(neighbors.north_east); + } } radius.hash = hash; radius.neighbors = neighbors; diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl index fdbfbf139..604697be4 100644 --- a/tests/unit/geo.tcl +++ b/tests/unit/geo.tcl @@ -49,6 +49,9 @@ proc compare_lists {List1 List2} { # # The format is: seed km lon lat set regression_vectors { + {1482225976969 7083 81.634948934258375 30.561509253718668} + {1482340074151 5416 -70.863281847379767 -46.347003465679947} + {1499014685896 6064 -89.818768962202014 -40.463868561416803} {1412 156 149.29737817929004 15.95807862745508} {441574 143 59.235461856813856 66.269555127373678} {160645 187 -101.88575239939883 49.061997951502917} From 4291a39afeb1fa5f4bf171034849a1d3aedc9cbd Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Mon, 3 Jul 2017 23:58:50 +0300 Subject: [PATCH 0373/1722] fixed #4100 --- src/module.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/module.c b/src/module.c index 35e479927..ac00867e4 100644 --- a/src/module.c +++ b/src/module.c @@ -3331,6 +3331,7 @@ void moduleHandleBlockedClients(void) { bc->reply_client->bufpos); if (listLength(bc->reply_client->reply)) listJoin(c->reply,bc->reply_client->reply); + c->reply_bytes += bc->reply_client->reply_bytes; } freeClient(bc->reply_client); From 80f2d39f6441fe72026bb8cb79e7bce8381eabee Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 4 Jul 2017 11:55:05 +0200 Subject: [PATCH 0374/1722] Add symmetrical assertion to track c->reply_buffer infinite growth. Redis clients need to have an instantaneous idea of the amount of memory they are consuming (if the number is not exact should at least be proportional to the actual memory usage). We do that adding and subtracting the SDS length when pushing / popping from the client->reply list. However it is quite simple to add bugs in such a setup, by not taking the objects in the list and the count in sync. For such reason, Redis has an assertion to track counts near 2^64: those are always the result of the counter wrapping around because we subtract more than we add. This commit adds the symmetrical assertion: when the list is empty since we sent everything, the reply_bytes count should be zero. Thanks to the new assertion it should be simple to also detect the other problem, where the count slowly increases because of over-counting. The assertion adds a conditional in the code that sends the buffer to the socket but should not create any measurable performance slowdown, listLength() just accesses a structure field, and this code path is totally dominated by write(2). Related to #4100. --- src/networking.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/networking.c b/src/networking.c index ec591245e..27006514d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -927,6 +927,10 @@ int writeToClient(int fd, client *c, int handler_installed) { listDelNode(c->reply,listFirst(c->reply)); c->sentlen = 0; c->reply_bytes -= objlen; + /* If there are no longer objects in the list, we expect + * the count of reply bytes to be exactly zero. */ + if (listLength(c->reply) == 0) + serverAssert(c->reply_bytes == 0); } } /* Note that we avoid to send more than NET_MAX_WRITES_PER_EVENT From fe48716c0c97cc6e306d98c82472947040177dbf Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 5 Jul 2017 10:10:20 +0200 Subject: [PATCH 0375/1722] Modules: no MULTI/EXEC for commands replicated from async contexts. They are technically like commands executed from external clients one after the other, and do not constitute a single atomic entity. --- src/module.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/module.c b/src/module.c index ac00867e4..e377d2712 100644 --- a/src/module.c +++ b/src/module.c @@ -1163,7 +1163,12 @@ int RM_ReplyWithDouble(RedisModuleCtx *ctx, double d) { * in the context of a command execution. EXEC will be handled by the * RedisModuleCommandDispatcher() function. */ void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx) { + /* If we already emitted MULTI return ASAP. */ if (ctx->flags & REDISMODULE_CTX_MULTI_EMITTED) return; + /* If this is a thread safe context, we do not want to wrap commands + * executed into MUTLI/EXEC, they are executed as single commands + * from an external client in essence. */ + if (ctx->flags & REDISMODULE_CTX_THREAD_SAFE) return; execCommandPropagateMulti(ctx->client); ctx->flags |= REDISMODULE_CTX_MULTI_EMITTED; } From ed7cbd5a4b7b23d317f0e5e0e932842a66d30140 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 5 Jul 2017 15:40:25 +0200 Subject: [PATCH 0376/1722] Avoid closing invalid FDs to make Valgrind happier. --- src/server.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index a75581b97..57ee43e38 100644 --- a/src/server.c +++ b/src/server.c @@ -1555,7 +1555,11 @@ int restartServer(int flags, mstime_t delay) { /* Close all file descriptors, with the exception of stdin, stdout, strerr * which are useful if we restart a Redis server which is not daemonized. */ - for (j = 3; j < (int)server.maxclients + 1024; j++) close(j); + for (j = 3; j < (int)server.maxclients + 1024; j++) { + /* Test the descriptor validity before closing it, otherwise + * Valgrind issues a warning on close(). */ + if (fcntl(j,F_GETFD) != -1) close(j); + } /* Execute the server with the original command line. */ if (delay) usleep(delay*1000); From b7b3e80a7308fd546857cf87494c832aae9d2b39 Mon Sep 17 00:00:00 2001 From: spinlock Date: Wed, 5 Jul 2017 14:25:05 +0000 Subject: [PATCH 0377/1722] Optimize addReplyBulkSds for better performance --- src/networking.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 27006514d..98e779642 100644 --- a/src/networking.c +++ b/src/networking.c @@ -561,8 +561,7 @@ void addReplyBulkCBuffer(client *c, const void *p, size_t len) { /* Add sds to reply (takes ownership of sds and frees it) */ void addReplyBulkSds(client *c, sds s) { - addReplySds(c,sdscatfmt(sdsempty(),"$%u\r\n", - (unsigned long)sdslen(s))); + addReplyLongLongWithPrefix(c,sdslen(s),'$'); addReplySds(c,s); addReply(c,shared.crlf); } From db56f485a8f83cd27b6cbde4f813b21875173d05 Mon Sep 17 00:00:00 2001 From: spinlock Date: Wed, 5 Jul 2017 14:32:07 +0000 Subject: [PATCH 0378/1722] update Makefile for test-sds --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 24e960593..70574630a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -254,7 +254,7 @@ lcov: @genhtml --legend -o lcov-html redis.info test-sds: sds.c sds.h - $(REDIS_CC) sds.c zmalloc.c -DSDS_TEST_MAIN -o /tmp/sds_test + $(REDIS_CC) sds.c zmalloc.c -DSDS_TEST_MAIN $(FINAL_LIBS) -o /tmp/sds_test /tmp/sds_test .PHONY: lcov From ed93fb8a29c40a934d4b86be921c6ad63d18813c Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 6 Jul 2017 10:29:19 +0200 Subject: [PATCH 0379/1722] Modules: DEBUG DIGEST interface. --- src/debug.c | 9 ++++++ src/module.c | 63 +++++++++++++++++++++++++++++++++++++++++ src/modules/hellotype.c | 13 ++++++++- src/redismodule.h | 6 ++++ src/server.h | 18 ++++++++++++ 5 files changed, 108 insertions(+), 1 deletion(-) diff --git a/src/debug.c b/src/debug.c index a4caa49f2..d6e12ec2a 100644 --- a/src/debug.c +++ b/src/debug.c @@ -239,6 +239,15 @@ void computeDatasetDigest(unsigned char *final) { xorDigest(digest,eledigest,20); } hashTypeReleaseIterator(hi); + } else if (o->type == OBJ_MODULE) { + RedisModuleDigest md; + moduleValue *mv = o->ptr; + moduleType *mt = mv->type; + moduleInitDigestContext(md); + if (mt->digest) { + mt->digest(&md,mv->value); + xorDigest(digest,md.x,sizeof(md.x)); + } } else { serverPanic("Unknown object type"); } diff --git a/src/module.c b/src/module.c index e377d2712..a85307ccd 100644 --- a/src/module.c +++ b/src/module.c @@ -3057,6 +3057,66 @@ loaderr: return 0; /* Never reached. */ } +/* -------------------------------------------------------------------------- + * Key digest API (DEBUG DIGEST interface for modules types) + * -------------------------------------------------------------------------- */ + +/* Add a new element to the digest. This function can be called multiple times + * one element after the other, for all the elements that constitute a given + * data structure. The function call must be followed by the call to + * `RedisModule_DigestEndSequence` eventually, when all the elements that are + * always in a given order are added. See the Redis Modules data types + * documentation for more info. However this is a quick example that uses Redis + * data types as an example. + * + * To add a sequence of unordered elements (for example in the case of a Redis + * Set), the pattern to use is: + * + * foreach element { + * AddElement(element); + * EndSequence(); + * } + * + * Because Sets are not ordered, so every element added has a position that + * does not depend from the other. However if instead our elements are + * ordered in pairs, like field-value pairs of an Hash, then one should + * use: + * + * foreach key,value { + * AddElement(key); + * AddElement(value); + * EndSquence(); + * } + * + * Because the key and value will be always in the above order, while instead + * the single key-value pairs, can appear in any position into a Redis hash. + * + * A list of ordered elements would be implemented with: + * + * foreach element { + * AddElement(element); + * } + * EndSequence(); + * + */ +void RM_DigestAddStringBuffer(RedisModuleDigest *md, unsigned char *ele, size_t len) { + mixDigest(md->o,ele,len); +} + +/* Like `RedisModule_DigestAddStringBuffer()` but takes a long long as input + * that gets converted into a string before adding it to the digest. */ +void RM_DigestAddLongLong(RedisModuleDigest *md, long long ll) { + char buf[LONG_STR_SIZE]; + size_t len = ll2string(buf,sizeof(buf),ll); + mixDigest(md->o,buf,len); +} + +/* See the doucmnetation for `RedisModule_DigestAddElement()`. */ +void RM_DigestEndSequence(RedisModuleDigest *md) { + xorDigest(md->x,md->o,sizeof(md->o)); + memset(md->o,0,sizeof(md->o)); +} + /* -------------------------------------------------------------------------- * AOF API for modules data types * -------------------------------------------------------------------------- */ @@ -3818,4 +3878,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(FreeThreadSafeContext); REGISTER_API(ThreadSafeContextLock); REGISTER_API(ThreadSafeContextUnlock); + REGISTER_API(DigestAddStringBuffer); + REGISTER_API(DigestAddLongLong); + REGISTER_API(DigestEndSequence); } diff --git a/src/modules/hellotype.c b/src/modules/hellotype.c index 027155d45..ba634c4a1 100644 --- a/src/modules/hellotype.c +++ b/src/modules/hellotype.c @@ -238,6 +238,16 @@ void HelloTypeFree(void *value) { HelloTypeReleaseObject(value); } +void HelloTypeDigest(RedisModuleDigest *md, void *value) { + struct HelloTypeObject *hto = value; + struct HelloTypeNode *node = hto->head; + while(node) { + RedisModule_DigestAddLongLong(md,node->value); + node = node->next; + } + RedisModule_DigestEndSequence(md); +} + /* This function must be present on each Redis module. It is used in order to * register the commands into the Redis server. */ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { @@ -253,7 +263,8 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) .rdb_save = HelloTypeRdbSave, .aof_rewrite = HelloTypeAofRewrite, .mem_usage = HelloTypeMemUsage, - .free = HelloTypeFree + .free = HelloTypeFree, + .digest = HelloTypeDigest }; HelloType = RedisModule_CreateDataType(ctx,"hellotype",0,&tm); diff --git a/src/redismodule.h b/src/redismodule.h index 2f2e3c923..dd14c5f4e 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -218,6 +218,9 @@ RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetThreadSafeContext)(RedisModu void REDISMODULE_API_FUNC(RedisModule_FreeThreadSafeContext)(RedisModuleCtx *ctx); void REDISMODULE_API_FUNC(RedisModule_ThreadSafeContextLock)(RedisModuleCtx *ctx); void REDISMODULE_API_FUNC(RedisModule_ThreadSafeContextUnlock)(RedisModuleCtx *ctx); +void REDISMODULE_API_FUNC(RedisModule_DigestAddStringBuffer)(RedisModuleDigest *md, unsigned char *ele, size_t len); +void REDISMODULE_API_FUNC(RedisModule_DigestAddLongLong)(RedisModuleDigest *md, long long ele); +void REDISMODULE_API_FUNC(RedisModule_DigestEndSequence)(RedisModuleDigest *md); /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); @@ -330,6 +333,9 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(FreeThreadSafeContext); REDISMODULE_GET_API(ThreadSafeContextLock); REDISMODULE_GET_API(ThreadSafeContextUnlock); + REDISMODULE_GET_API(DigestAddStringBuffer); + REDISMODULE_GET_API(DigestAddLongLong); + REDISMODULE_GET_API(DigestEndSequence); RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; diff --git a/src/server.h b/src/server.h index 64451f5c6..88690ace4 100644 --- a/src/server.h +++ b/src/server.h @@ -546,6 +546,22 @@ typedef struct RedisModuleIO { iovar.ctx = NULL; \ } while(0); +/* This is a structure used to export DEBUG DIGEST capabilities to Redis + * modules. We want to capture both the ordered and unordered elements of + * a data structure, so that a digest can be created in a way that correctly + * reflects the values. See the DEBUG DIGEST command implementation for more + * background. */ +typedef struct RedisModuleDigest { + unsigned char o[20]; /* Ordered elements. */ + unsigned char x[20]; /* Xored elements. */ +} RedisModuleDigest; + +/* Just start with a digest composed of all zero bytes. */ +#define moduleInitDigestContext(mdvar) do { \ + memset(mdvar.o,0,sizeof(mdvar.o)); \ + memset(mdvar.x,0,sizeof(mdvar.x)); \ +} while(0); + /* Objects encoding. Some kind of objects like Strings and Hashes can be * internally represented in multiple ways. The 'encoding' field of the object * is set to one of this fields for this object. */ @@ -1993,6 +2009,8 @@ void disableWatchdog(void); void watchdogScheduleSignal(int period); void serverLogHexDump(int level, char *descr, void *value, size_t len); int memtest_preserving_test(unsigned long *m, size_t bytes, int passes); +void mixDigest(unsigned char *digest, void *ptr, size_t len); +void xorDigest(unsigned char *digest, void *ptr, size_t len); #define redisDebug(fmt, ...) \ printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__) From cb3790a20913242aa97c29ec895d8206f9aafa7a Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 6 Jul 2017 11:20:49 +0200 Subject: [PATCH 0380/1722] Free IO context if any in RDB loading code. Thanks to @oranagra for spotting this bug. --- src/rdb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/rdb.c b/src/rdb.c index 18acb4195..1341942c4 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1415,6 +1415,10 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { /* Call the rdb_load method of the module providing the 10 bit * encoding version in the lower 10 bits of the module ID. */ void *ptr = mt->rdb_load(&io,moduleid&1023); + if (io.ctx) { + moduleFreeContext(io.ctx); + zfree(io.ctx); + } /* Module v2 serialization has an EOF mark at the end. */ if (io.ver == 2) { From 16b407a1ffa690adbaaf3f9da535d00d4190b5aa Mon Sep 17 00:00:00 2001 From: sunweinan Date: Thu, 6 Jul 2017 19:47:21 +0800 Subject: [PATCH 0381/1722] minor fix in listJoin(). --- src/adlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adlist.c b/src/adlist.c index f0a261b61..e87d25cee 100644 --- a/src/adlist.c +++ b/src/adlist.c @@ -357,6 +357,6 @@ void listJoin(list *l, list *o) { l->len += o->len; /* Setup other as an empty list. */ - o->head = l->tail = NULL; + o->head = o->tail = NULL; o->len = 0; } From 63ec3e0170c0a7cc1d24fbae8fe13d1044d49e30 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 10 Jul 2017 13:38:23 +0200 Subject: [PATCH 0382/1722] AOF check utility: ability to check files with RDB preamble. --- src/Makefile | 11 +++++------ src/rdb.c | 2 +- src/redis-check-aof.c | 38 ++++++++++++++++++++++++++------------ src/redis-check-rdb.c | 32 +++++++++++++++++++++----------- src/server.c | 6 ++++-- src/server.h | 7 ++++--- 6 files changed, 61 insertions(+), 35 deletions(-) diff --git a/src/Makefile b/src/Makefile index 70574630a..86e0b3fe0 100644 --- a/src/Makefile +++ b/src/Makefile @@ -144,14 +144,13 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb REDIS_CHECK_AOF_NAME=redis-check-aof -REDIS_CHECK_AOF_OBJ=redis-check-aof.o all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) @echo "" @@ -207,6 +206,10 @@ $(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME) $(REDIS_CHECK_RDB_NAME): $(REDIS_SERVER_NAME) $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_RDB_NAME) +# redis-check-aof +$(REDIS_CHECK_AOF_NAME): $(REDIS_SERVER_NAME) + $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_AOF_NAME) + # redis-cli $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ) $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o $(FINAL_LIBS) @@ -215,10 +218,6 @@ $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ) $(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ) $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a $(FINAL_LIBS) -# redis-check-aof -$(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ) - $(REDIS_LD) -o $@ $^ $(FINAL_LIBS) - dict-benchmark: dict.c zmalloc.c sds.c siphash.c $(REDIS_CC) $(FINAL_CFLAGS) $^ -D DICT_BENCHMARK_MAIN -o $@ $(FINAL_LIBS) diff --git a/src/rdb.c b/src/rdb.c index 1341942c4..792c8ff94 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -61,7 +61,7 @@ void rdbCheckThenExit(int linenum, char *reason, ...) { if (!rdbCheckMode) { serverLog(LL_WARNING, "%s", msg); char *argv[2] = {"",server.rdb_filename}; - redis_check_rdb_main(2,argv); + redis_check_rdb_main(2,argv,NULL); } else { rdbCheckError("%s",msg); } diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index 6c8f55279..09a118eb7 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -28,13 +28,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "fmacros.h" -#include -#include -#include -#include +#include "server.h" #include -#include "config.h" #define ERROR(...) { \ char __buf[1024]; \ @@ -60,7 +55,7 @@ int readLong(FILE *fp, char prefix, long *target) { return 0; } if (buf[0] != prefix) { - ERROR("Expected prefix '%c', got: '%c'",buf[0],prefix); + ERROR("Expected prefix '%c', got: '%c'",prefix,buf[0]); return 0; } *target = strtol(buf+1,&eptr,10); @@ -87,7 +82,7 @@ int readString(FILE *fp, char** target) { /* Increase length to also consume \r\n */ len += 2; - *target = (char*)malloc(len); + *target = (char*)zmalloc(len); if (!readBytes(fp,*target,len)) { return 0; } @@ -127,12 +122,12 @@ off_t process(FILE *fp) { } } } - free(str); + zfree(str); } /* Stop if the loop did not finish */ if (i < argc) { - if (str) free(str); + if (str) zfree(str); break; } } @@ -146,7 +141,7 @@ off_t process(FILE *fp) { return pos; } -int main(int argc, char **argv) { +int redis_check_aof_main(int argc, char **argv) { char *filename; int fix = 0; @@ -185,6 +180,25 @@ int main(int argc, char **argv) { exit(1); } + /* This AOF file may have an RDB preamble. Check this to start, and if this + * is the case, start processing the RDB part. */ + if (size >= 8) { /* There must be at least room for the RDB header. */ + char sig[5]; + int has_preamble = fread(sig,sizeof(sig),1,fp) == 1 && + memcmp(sig,"REDIS",sizeof(sig)) == 0; + rewind(fp); + if (has_preamble) { + printf("The AOF appears to start with an RDB preamble.\n" + "Checking the RDB preamble to start:\n"); + if (redis_check_rdb_main(argc,argv,fp) == C_ERR) { + printf("RDB preamble of AOF file is not sane, aborting.\n"); + exit(1); + } else { + printf("RDB preamble is OK, proceding with AOF tail...\n"); + } + } + } + off_t pos = process(fp); off_t diff = size-pos; printf("AOF analyzed: size=%lld, ok_up_to=%lld, diff=%lld\n", @@ -214,5 +228,5 @@ int main(int argc, char **argv) { } fclose(fp); - return 0; + exit(0); } diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 08be40f6a..4027536e5 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -173,16 +173,18 @@ void rdbCheckSetupSignals(void) { } /* Check the specified RDB file. Return 0 if the RDB looks sane, otherwise - * 1 is returned. */ -int redis_check_rdb(char *rdbfilename) { + * 1 is returned. + * The file is specified as a filename in 'rdbfilename' if 'fp' is not NULL, + * otherwise the already open file 'fp' is checked. */ +int redis_check_rdb(char *rdbfilename, FILE *fp) { uint64_t dbid; int type, rdbver; char buf[1024]; long long expiretime, now = mstime(); - FILE *fp; static rio rdb; /* Pointed by global struct riostate. */ - if ((fp = fopen(rdbfilename,"r")) == NULL) return 1; + int closefile = (fp == NULL); + if (fp == NULL && (fp = fopen(rdbfilename,"r")) == NULL) return 1; rioInitWithFile(&rdb,fp); rdbstate.rio = &rdb; @@ -310,7 +312,7 @@ int redis_check_rdb(char *rdbfilename) { } } - fclose(fp); + if (closefile) fclose(fp); return 0; eoferr: /* unexpected end of file is handled here with a fatal exit */ @@ -323,12 +325,19 @@ eoferr: /* unexpected end of file is handled here with a fatal exit */ } /* RDB check main: called form redis.c when Redis is executed with the - * redis-check-rdb alias. + * redis-check-rdb alias, on during RDB loading errors. * - * The function never returns, but exits with the status code according - * to success (RDB is sane) or error (RDB is corrupted). */ -int redis_check_rdb_main(int argc, char **argv) { - if (argc != 2) { + * The function works in two ways: can be called with argc/argv as a + * standalone executable, or called with a non NULL 'fp' argument if we + * already have an open file to check. This happens when the function + * is used to check an RDB preamble inside an AOF file. + * + * When called with fp = NULL, the function never returns, but exits with the + * status code according to success (RDB is sane) or error (RDB is corrupted). + * Otherwise if called with a non NULL fp, the function returns C_OK or + * C_ERR depending on the success or failure. */ +int redis_check_rdb_main(int argc, char **argv, FILE *fp) { + if (argc != 2 && fp == NULL) { fprintf(stderr, "Usage: %s \n", argv[0]); exit(1); } @@ -341,10 +350,11 @@ int redis_check_rdb_main(int argc, char **argv) { rdbCheckMode = 1; rdbCheckInfo("Checking RDB file %s", argv[1]); rdbCheckSetupSignals(); - int retval = redis_check_rdb(argv[1]); + int retval = redis_check_rdb(argv[1],fp); if (retval == 0) { rdbCheckInfo("\\o/ RDB looks OK! \\o/"); rdbShowGenericInfo(); } + if (fp) return (retval == 0) ? C_OK : C_ERR; exit(retval); } diff --git a/src/server.c b/src/server.c index 57ee43e38..2da6fb544 100644 --- a/src/server.c +++ b/src/server.c @@ -3711,11 +3711,13 @@ int main(int argc, char **argv) { initSentinel(); } - /* Check if we need to start in redis-check-rdb mode. We just execute + /* Check if we need to start in redis-check-rdb/aof mode. We just execute * the program main. However the program is part of the Redis executable * so that we can easily execute an RDB check on loading errors. */ if (strstr(argv[0],"redis-check-rdb") != NULL) - redis_check_rdb_main(argc,argv); + redis_check_rdb_main(argc,argv,NULL); + else if (strstr(argv[0],"redis-check-aof") != NULL) + redis_check_aof_main(argc,argv); if (argc >= 2) { j = 1; /* First option to parse in argv[] */ diff --git a/src/server.h b/src/server.h index 88690ace4..2be75f1a1 100644 --- a/src/server.h +++ b/src/server.h @@ -1770,9 +1770,10 @@ void sentinelTimer(void); char *sentinelHandleConfiguration(char **argv, int argc); void sentinelIsRunning(void); -/* redis-check-rdb */ -int redis_check_rdb(char *rdbfilename); -int redis_check_rdb_main(int argc, char **argv); +/* redis-check-rdb & aof */ +int redis_check_rdb(char *rdbfilename, FILE *fp); +int redis_check_rdb_main(int argc, char **argv, FILE *fp); +int redis_check_aof_main(int argc, char **argv); /* Scripting */ void scriptingInit(int setup); From cd3b6c9d5c8261e4c37eacf2935963d40bf3e642 Mon Sep 17 00:00:00 2001 From: Guy Benoish Date: Mon, 10 Jul 2017 14:41:57 +0300 Subject: [PATCH 0383/1722] Modules: Fix io->bytes calculation in RDB save --- src/module.c | 85 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/src/module.c b/src/module.c index 69c2f9f15..3def48207 100644 --- a/src/module.c +++ b/src/module.c @@ -2885,13 +2885,18 @@ void moduleRDBLoadError(RedisModuleIO *io) { * data types. */ void RM_SaveUnsigned(RedisModuleIO *io, uint64_t value) { if (io->error) return; + /* Save opcode. */ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_UINT); - if (retval != -1) rdbSaveLen(io->rio, value); - if (retval == -1) { - io->error = 1; - } else { - io->bytes += retval; - } + if (retval == -1) goto saveerr; + io->bytes += retval; + /* Save value. */ + retval = rdbSaveLen(io->rio, value); + if (retval == -1) goto saveerr; + io->bytes += retval; + return; + +saveerr: + io->error = 1; } /* Load an unsigned 64 bit value from the RDB file. This function should only @@ -2934,26 +2939,36 @@ int64_t RM_LoadSigned(RedisModuleIO *io) { * the RDB file. */ void RM_SaveString(RedisModuleIO *io, RedisModuleString *s) { if (io->error) return; + /* Save opcode. */ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING); - if (retval != -1) retval = rdbSaveStringObject(io->rio,s); - if (retval == -1) { - io->error = 1; - } else { - io->bytes += retval; - } + if (retval == -1) goto saveerr; + io->bytes += retval; + /* Save value. */ + retval = rdbSaveStringObject(io->rio, s); + if (retval == -1) goto saveerr; + io->bytes += retval; + return; + +saveerr: + io->error = 1; } /* Like RedisModule_SaveString() but takes a raw C pointer and length * as input. */ void RM_SaveStringBuffer(RedisModuleIO *io, const char *str, size_t len) { if (io->error) return; + /* Save opcode. */ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING); - if (retval != -1) retval = rdbSaveRawString(io->rio,(unsigned char*)str,len); - if (retval == -1) { - io->error = 1; - } else { - io->bytes += retval; - } + if (retval == -1) goto saveerr; + io->bytes += retval; + /* Save value. */ + retval = rdbSaveRawString(io->rio, (unsigned char*)str,len); + if (retval == -1) goto saveerr; + io->bytes += retval; + return; + +saveerr: + io->error = 1; } /* Implements RM_LoadString() and RM_LoadStringBuffer() */ @@ -3001,13 +3016,18 @@ char *RM_LoadStringBuffer(RedisModuleIO *io, size_t *lenptr) { * It is possible to load back the value with RedisModule_LoadDouble(). */ void RM_SaveDouble(RedisModuleIO *io, double value) { if (io->error) return; + /* Save opcode. */ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_DOUBLE); - if (retval != -1) retval = rdbSaveBinaryDoubleValue(io->rio, value); - if (retval == -1) { - io->error = 1; - } else { - io->bytes += retval; - } + if (retval == -1) goto saveerr; + io->bytes += retval; + /* Save value. */ + retval = rdbSaveBinaryDoubleValue(io->rio, value); + if (retval == -1) goto saveerr; + io->bytes += retval; + return; + +saveerr: + io->error = 1; } /* In the context of the rdb_save method of a module data type, loads back the @@ -3032,13 +3052,18 @@ loaderr: * It is possible to load back the value with RedisModule_LoadFloat(). */ void RM_SaveFloat(RedisModuleIO *io, float value) { if (io->error) return; + /* Save opcode. */ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_FLOAT); - if (retval != -1) retval = rdbSaveBinaryFloatValue(io->rio, value); - if (retval == -1) { - io->error = 1; - } else { - io->bytes += retval; - } + if (retval == -1) goto saveerr; + io->bytes += retval; + /* Save value. */ + retval = rdbSaveBinaryFloatValue(io->rio, value); + if (retval == -1) goto saveerr; + io->bytes += retval; + return; + +saveerr: + io->error = 1; } /* In the context of the rdb_save method of a module data type, loads back the From a5cb21177ad93b4cd0c4f85c871e65a65fb8ecfa Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 10 Jul 2017 16:41:25 +0200 Subject: [PATCH 0384/1722] redis-check-aof: tell users there is a --fix option. --- src/redis-check-aof.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index 09a118eb7..33bb445e7 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -220,7 +220,8 @@ int redis_check_aof_main(int argc, char **argv) { printf("Successfully truncated AOF\n"); } } else { - printf("AOF is not valid\n"); + printf("AOF is not valid. " + "Use the --fix option to try fixing it.\n"); exit(1); } } else { From ff1b4ccbcadee63070ad5bdb0f732b396d32d455 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 11 Jul 2017 00:13:52 +0200 Subject: [PATCH 0385/1722] Event loop: call after sleep() only from top level. In general we do not want before/after sleep() callbacks to be called when we re-enter the event loop, since those calls are only designed in order to perform operations every main iteration of the event loop, and re-entering is often just a way to incrementally serve clietns with error messages or other auxiliary operations. However, if we call the callbacks, we are then forced to think at before/after sleep callbacks as re-entrant, which is much harder without any good need. However here there was also a clear bug: beforeSleep() was actually never called when re-entering the event loop. But the new afterSleep() callback was. This is broken and in this instance re-entering afterSleep() caused a modules GIL dead lock. --- src/ae.c | 5 +++-- src/ae.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ae.c b/src/ae.c index ecbaa94f3..742388d85 100644 --- a/src/ae.c +++ b/src/ae.c @@ -344,6 +344,7 @@ static int processTimeEvents(aeEventLoop *eventLoop) { * if flags has AE_FILE_EVENTS set, file events are processed. * if flags has AE_TIME_EVENTS set, time events are processed. * if flags has AE_DONT_WAIT set the function returns ASAP until all + * if flags has AE_CALL_AFTER_SLEEP set, the aftersleep callback is called. * the events that's possible to process without to wait are processed. * * The function returns the number of events processed. */ @@ -403,7 +404,7 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags) numevents = aeApiPoll(eventLoop, tvp); /* After sleep callback. */ - if (eventLoop->aftersleep != NULL) + if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP) eventLoop->aftersleep(eventLoop); for (j = 0; j < numevents; j++) { @@ -460,7 +461,7 @@ void aeMain(aeEventLoop *eventLoop) { while (!eventLoop->stop) { if (eventLoop->beforesleep != NULL) eventLoop->beforesleep(eventLoop); - aeProcessEvents(eventLoop, AE_ALL_EVENTS); + aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP); } } diff --git a/src/ae.h b/src/ae.h index e3617759b..c49bfe233 100644 --- a/src/ae.h +++ b/src/ae.h @@ -46,6 +46,7 @@ #define AE_TIME_EVENTS 2 #define AE_ALL_EVENTS (AE_FILE_EVENTS|AE_TIME_EVENTS) #define AE_DONT_WAIT 4 +#define AE_CALL_AFTER_SLEEP 8 #define AE_NOMORE -1 #define AE_DELETED_EVENT_ID -1 From 89508a4fd45cfb96b920eea3b748e8e09e20e229 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 11 Jul 2017 12:33:00 +0200 Subject: [PATCH 0386/1722] Clients blocked in modules: free argv/argc later. See issue #3844 for more information. --- src/module.c | 5 +++++ src/networking.c | 13 ++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/module.c b/src/module.c index 3def48207..e0906e255 100644 --- a/src/module.c +++ b/src/module.c @@ -3304,6 +3304,11 @@ void moduleBlockedClientPipeReadable(aeEventLoop *el, int fd, void *privdata, in void unblockClientFromModule(client *c) { RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle; bc->client = NULL; + /* Reset the client for a new query since, for blocking commands implemented + * into modules, we do not it immediately after the command returns (and + * the client blocks) in order to be still able to access the argument + * vector from callbacks. */ + resetClient(c); } /* Block a client in the context of a blocking command, returning an handle diff --git a/src/networking.c b/src/networking.c index 98e779642..aeaeca967 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1332,10 +1332,17 @@ void processInputBuffer(client *c) { /* Update the applied replication offset of our master. */ c->reploff = c->read_reploff - sdslen(c->querybuf); } - resetClient(c); + + /* Don't reset the client structure for clients blocked in a + * module blocking command, so that the reply callback will + * still be able to access the client argv and argc field. + * The client will be reset in unblockClientFromModule(). */ + if (!(c->flags & CLIENT_BLOCKED) || c->btype != BLOCKED_MODULE) + resetClient(c); } - /* freeMemoryIfNeeded may flush slave output buffers. This may result - * into a slave, that may be the active client, to be freed. */ + /* freeMemoryIfNeeded may flush slave output buffers. This may + * result into a slave, that may be the active client, to be + * freed. */ if (server.current_client == NULL) break; } } From 647406c1c1e5e7ca30a5197f29696189cf707faf Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 11 Jul 2017 12:44:56 +0200 Subject: [PATCH 0387/1722] Fix isHLLObjectOrReply() to handle integer encoded strings. Close #3766. --- src/hyperloglog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 49516f824..f4b5bd1c1 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1121,6 +1121,7 @@ int isHLLObjectOrReply(client *c, robj *o) { if (checkType(c,o,OBJ_STRING)) return C_ERR; /* Error already sent. */ + if (!sdsEncodedObject(o)) goto invalid; if (stringObjectLen(o) < sizeof(*hdr)) goto invalid; hdr = o->ptr; From e1b9781bdac611ff58297b87ccdaeee87517a2de Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 11 Jul 2017 15:49:09 +0200 Subject: [PATCH 0388/1722] CLUSTER GETKEYSINSLOT: avoid overallocating. Close #3911. --- src/cluster.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/cluster.c b/src/cluster.c index 407ddee82..a516e911f 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4380,6 +4380,11 @@ void clusterCommand(client *c) { return; } + /* Avoid allocating more than needed in case of large COUNT argument + * and smaller actual number of keys. */ + unsigned int keys_in_slot = countKeysInSlot(slot); + if (maxkeys > keys_in_slot) maxkeys = keys_in_slot; + keys = zmalloc(sizeof(robj*)*maxkeys); numkeys = getKeysInSlot(slot, keys, maxkeys); addReplyMultiBulkLen(c,numkeys); From 66c47a4d0613659e622672156834b9260646546b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 12 Jul 2017 11:07:28 +0200 Subject: [PATCH 0389/1722] Fix replication of SLAVEOF inside transaction. In Redis 4.0 replication, with the introduction of PSYNC2, masters and slaves replicate commands to cascading slaves and to the replication backlog itself in a different way compared to the past. Masters actually replicate the effects of client commands. Slaves just propagate what they receive from masters. This mechanism can cause problems when the configuration of an instance is changed from master to slave inside a transaction. For instance we could send to a master instance the following sequence: MULTI SLAVEOF 127.0.0.1 0 EXEC SLAVEOF NO ONE Before the fixes in this commit, the MULTI command used to be propagated into the replication backlog, however after the SLAVEOF command the instance is a slave, so the EXEC implementation failed to also propagate the EXEC command. When the slaves of the above instance reconnected, they were incrementally synchronized just sending a "MULTI". This put the master client (in the slaves) into MULTI state, breaking the replication. Notably even Redis Sentinel uses the above approach in order to guarantee that configuration changes are always performed together with rewrites of the configuration and with clients disconnection. Sentiel does: MULTI SLAVEOF ... CONFIG REWRITE CLIENT KILL TYPE normal EXEC So this was a really problematic issue. However even with the fix in this commit, that will add the final EXEC to the replication stream in case the instance was switched from master to slave during the transaction, the result would be to increment the slave replication offset, so a successive reconnection with the new master, will not permit a successful partial resynchronization: no way the new master can provide us with the backlog needed, we incremented our offset to a value that the new master cannot have. However the EXEC implementation waits to emit the MULTI, so that if the commands inside the transaction actually do not need to be replicated, no commands propagation happens at all. From multi.c: if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) { execCommandPropagateMulti(c); must_propagate = 1; } The above code is already modified by this commit you are reading. Now also ADMIN commands do not trigger the emission of MULTI. It is actually not clear why we do not just check for CMD_WRITE... Probably I wrote it this way in order to make the code more reliable: better to over-emit MULTI than not emitting it in time. So this commit should indeed fix issue #3836 (verified), however it looks like some reconsideration of this code path is needed in the long term. BONUS POINT: The reverse bug. Even in a read only slave "B", in a replication setup like: A -> B -> C There are commands without the READONLY nor the ADMIN flag, that are also not flagged as WRITE commands. An example is just the PING command. So if we send B the following sequence: MULTI PING SLAVEOF NO ONE EXEC The result will be the reverse bug, where only EXEC is emitted, but not the previous MULTI. However this apparently does not create problems in practice but it is yet another acknowledge of the fact some work is needed here in order to make this code path less surprising. Note that there are many different approaches we could follow. For instance MULTI/EXEC blocks containing administrative commands may be allowed ONLY if all the commands are administrative ones, otherwise they could be denined. When allowed, the commands could simply never be replicated at all. --- src/multi.c | 21 ++++++++++++++++++--- src/server.h | 1 + 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/multi.c b/src/multi.c index d8384c1c5..112ce0605 100644 --- a/src/multi.c +++ b/src/multi.c @@ -117,6 +117,7 @@ void execCommand(client *c) { int orig_argc; struct redisCommand *orig_cmd; int must_propagate = 0; /* Need to propagate MULTI/EXEC to AOF / slaves? */ + int was_master = server.masterhost == NULL; if (!(c->flags & CLIENT_MULTI)) { addReplyError(c,"EXEC without MULTI"); @@ -147,11 +148,12 @@ void execCommand(client *c) { c->argv = c->mstate.commands[j].argv; c->cmd = c->mstate.commands[j].cmd; - /* Propagate a MULTI request once we encounter the first write op. + /* Propagate a MULTI request once we encounter the first command which + * is not readonly nor an administrative one. * This way we'll deliver the MULTI/..../EXEC block as a whole and * both the AOF and the replication link will have the same consistency * and atomicity guarantees. */ - if (!must_propagate && !(c->cmd->flags & CMD_READONLY)) { + if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) { execCommandPropagateMulti(c); must_propagate = 1; } @@ -167,9 +169,22 @@ void execCommand(client *c) { c->argc = orig_argc; c->cmd = orig_cmd; discardTransaction(c); + /* Make sure the EXEC command will be propagated as well if MULTI * was already propagated. */ - if (must_propagate) server.dirty++; + if (must_propagate) { + int is_master = server.masterhost == NULL; + server.dirty++; + /* If inside the MULTI/EXEC block this instance was suddenly + * switched from master to slave (using the SLAVEOF command), the + * initial MULTI was propagated into the replication backlog, but the + * rest was not. We need to make sure to at least terminate the + * backlog with the final EXEC. */ + if (server.repl_backlog && was_master && !is_master) { + char *execcmd = "*1\r\n$4\r\nEXEC\r\n"; + feedReplicationBacklog(execcmd,strlen(execcmd)); + } + } handle_monitor: /* Send EXEC to clients waiting data from MONITOR. We do it here diff --git a/src/server.h b/src/server.h index 2be75f1a1..e3b56075a 100644 --- a/src/server.h +++ b/src/server.h @@ -1504,6 +1504,7 @@ void changeReplicationId(void); void clearReplicationId2(void); void chopReplicationBacklog(void); void replicationCacheMasterUsingMyself(void); +void feedReplicationBacklog(void *ptr, size_t len); /* Generic persistence functions */ void startLoading(FILE *fp); From f6d871f4c4f9fc222d44c0c31c7d9493b5649c71 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Jul 2017 11:29:28 +0200 Subject: [PATCH 0390/1722] Markdown generation of Redis Modules API reference improved. --- src/module.c | 135 +++++++++++++++++++++--------------------- src/modules/gendoc.rb | 22 ++++--- 2 files changed, 83 insertions(+), 74 deletions(-) diff --git a/src/module.c b/src/module.c index e0906e255..7b8b17479 100644 --- a/src/module.c +++ b/src/module.c @@ -505,10 +505,10 @@ int RM_IsKeysPositionRequest(RedisModuleCtx *ctx) { * RedisModule_IsKeysPositionRequest() API and uses this function in * order to report keys, like in the following example: * - * if (RedisModule_IsKeysPositionRequest(ctx)) { - * RedisModule_KeyAtPos(ctx,1); - * RedisModule_KeyAtPos(ctx,2); - * } + * if (RedisModule_IsKeysPositionRequest(ctx)) { + * RedisModule_KeyAtPos(ctx,1); + * RedisModule_KeyAtPos(ctx,2); + * } * * Note: in the example below the get keys API would not be needed since * keys are at fixed positions. This interface is only used for commands @@ -936,8 +936,8 @@ int RM_StringAppendBuffer(RedisModuleCtx *ctx, RedisModuleString *str, const cha * Most functions always return REDISMODULE_OK so you can use it with * 'return' in order to return from the command implementation with: * - * if (... some condition ...) - * return RM_ReplyWithLongLong(ctx,mycount); + * if (... some condition ...) + * return RM_ReplyWithLongLong(ctx,mycount); * -------------------------------------------------------------------------- */ /* Send an error about the number of arguments given to the command, @@ -945,7 +945,7 @@ int RM_StringAppendBuffer(RedisModuleCtx *ctx, RedisModuleString *str, const cha * * Example: * - * if (argc != 3) return RedisModule_WrongArity(ctx); + * if (argc != 3) return RedisModule_WrongArity(ctx); */ int RM_WrongArity(RedisModuleCtx *ctx) { addReplyErrorFormat(ctx->client, @@ -1003,11 +1003,11 @@ int replyWithStatus(RedisModuleCtx *ctx, const char *msg, char *prefix) { * the initial error code. The function only provides the initial "-", so * the usage is, for example: * - * RM_ReplyWithError(ctx,"ERR Wrong Type"); + * RedisModule_ReplyWithError(ctx,"ERR Wrong Type"); * * and not just: * - * RM_ReplyWithError(ctx,"Wrong Type"); + * RedisModule_ReplyWithError(ctx,"Wrong Type"); * * The function always returns REDISMODULE_OK. */ @@ -1062,14 +1062,14 @@ int RM_ReplyWithArray(RedisModuleCtx *ctx, long len) { * For example in order to output an array like [1,[10,20,30]] we * could write: * - * RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN); - * RedisModule_ReplyWithLongLong(ctx,1); - * RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN); - * RedisModule_ReplyWithLongLong(ctx,10); - * RedisModule_ReplyWithLongLong(ctx,20); - * RedisModule_ReplyWithLongLong(ctx,30); - * RedisModule_ReplySetArrayLength(ctx,3); // Set len of 10,20,30 array. - * RedisModule_ReplySetArrayLength(ctx,2); // Set len of top array + * RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN); + * RedisModule_ReplyWithLongLong(ctx,1); + * RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN); + * RedisModule_ReplyWithLongLong(ctx,10); + * RedisModule_ReplyWithLongLong(ctx,20); + * RedisModule_ReplyWithLongLong(ctx,30); + * RedisModule_ReplySetArrayLength(ctx,3); // Set len of 10,20,30 array. + * RedisModule_ReplySetArrayLength(ctx,2); // Set len of top array * * Note that in the above example there is no reason to postpone the array * length, since we produce a fixed number of elements, but in the practice @@ -1428,8 +1428,8 @@ int RM_StringSet(RedisModuleKey *key, RedisModuleString *str) { * * The 'mode' is composed by bitwise OR-ing the following flags: * - * REDISMODULE_READ -- Read access - * REDISMODULE_WRITE -- Write access + * REDISMODULE_READ -- Read access + * REDISMODULE_WRITE -- Write access * * If the DMA is not requested for writing, the pointer returned should * only be accessed in a read-only fashion. @@ -1587,14 +1587,14 @@ int RM_ZsetAddFlagsFromCoreFlags(int flags) { * * The input flags are: * - * REDISMODULE_ZADD_XX: Element must already exist. Do nothing otherwise. - * REDISMODULE_ZADD_NX: Element must not exist. Do nothing otherwise. + * REDISMODULE_ZADD_XX: Element must already exist. Do nothing otherwise. + * REDISMODULE_ZADD_NX: Element must not exist. Do nothing otherwise. * * The output flags are: * - * REDISMODULE_ZADD_ADDED: The new element was added to the sorted set. - * REDISMODULE_ZADD_UPDATED: The score of the element was updated. - * REDISMODULE_ZADD_NOP: No operation was performed because XX or NX flags. + * REDISMODULE_ZADD_ADDED: The new element was added to the sorted set. + * REDISMODULE_ZADD_UPDATED: The score of the element was updated. + * REDISMODULE_ZADD_NOP: No operation was performed because XX or NX flags. * * On success the function returns REDISMODULE_OK. On the following errors * REDISMODULE_ERR is returned: @@ -2010,25 +2010,25 @@ int RM_ZsetRangePrev(RedisModuleKey *key) { * * Example to set the hash argv[1] to the value argv[2]: * - * RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[1],argv[2],NULL); + * RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[1],argv[2],NULL); * * The function can also be used in order to delete fields (if they exist) * by setting them to the specified value of REDISMODULE_HASH_DELETE: * - * RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[1], - * REDISMODULE_HASH_DELETE,NULL); + * RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[1], + * REDISMODULE_HASH_DELETE,NULL); * * The behavior of the command changes with the specified flags, that can be * set to REDISMODULE_HASH_NONE if no special behavior is needed. * - * REDISMODULE_HASH_NX: The operation is performed only if the field was not - * already existing in the hash. - * REDISMODULE_HASH_XX: The operation is performed only if the field was - * already existing, so that a new value could be - * associated to an existing filed, but no new fields - * are created. - * REDISMODULE_HASH_CFIELDS: The field names passed are null terminated C - * strings instead of RedisModuleString objects. + * REDISMODULE_HASH_NX: The operation is performed only if the field was not + * already existing in the hash. + * REDISMODULE_HASH_XX: The operation is performed only if the field was + * already existing, so that a new value could be + * associated to an existing filed, but no new fields + * are created. + * REDISMODULE_HASH_CFIELDS: The field names passed are null terminated C + * strings instead of RedisModuleString objects. * * Unless NX is specified, the command overwrites the old field value with * the new one. @@ -2037,8 +2037,8 @@ int RM_ZsetRangePrev(RedisModuleKey *key) { * normal C strings, so for example to delete the field "foo" the following * code can be used: * - * RedisModule_HashSet(key,REDISMODULE_HASH_CFIELDS,"foo", - * REDISMODULE_HASH_DELETE,NULL); + * RedisModule_HashSet(key,REDISMODULE_HASH_CFIELDS,"foo", + * REDISMODULE_HASH_DELETE,NULL); * * Return value: * @@ -2417,7 +2417,7 @@ RedisModuleString *RM_CreateStringFromCallReply(RedisModuleCallReply *reply) { * The integer pointed by 'flags' is populated with flags according * to special modifiers in "fmt". For now only one exists: * - * "!" -> REDISMODULE_ARGV_REPLICATE + * "!" -> REDISMODULE_ARGV_REPLICATE * * On error (format specifier error) NULL is returned and nothing is * allocated. On success the argument vector is returned. */ @@ -2607,7 +2607,7 @@ const char *RM_CallReplyProto(RedisModuleCallReply *reply, size_t *len) { * * The resulting 64 bit integer is composed as follows: * - * (high order bits) 6|6|6|6|6|6|6|6|6|10 (low order bits) + * (high order bits) 6|6|6|6|6|6|6|6|6|10 (low order bits) * * The first 6 bits value is the first character, name[0], while the last * 6 bits value, immediately before the 10 bits integer, is name[8]. @@ -3098,31 +3098,31 @@ loaderr: * To add a sequence of unordered elements (for example in the case of a Redis * Set), the pattern to use is: * - * foreach element { - * AddElement(element); - * EndSequence(); - * } + * foreach element { + * AddElement(element); + * EndSequence(); + * } * * Because Sets are not ordered, so every element added has a position that * does not depend from the other. However if instead our elements are * ordered in pairs, like field-value pairs of an Hash, then one should * use: * - * foreach key,value { - * AddElement(key); - * AddElement(value); - * EndSquence(); - * } + * foreach key,value { + * AddElement(key); + * AddElement(value); + * EndSquence(); + * } * * Because the key and value will be always in the above order, while instead * the single key-value pairs, can appear in any position into a Redis hash. * * A list of ordered elements would be implemented with: * - * foreach element { - * AddElement(element); - * } - * EndSequence(); + * foreach element { + * AddElement(element); + * } + * EndSequence(); * */ void RM_DigestAddStringBuffer(RedisModuleDigest *md, unsigned char *ele, size_t len) { @@ -3218,8 +3218,8 @@ RedisModuleCtx *RM_GetContextFromIO(RedisModuleIO *io) { /* This is the low level function implementing both: * - * RM_Log() - * RM_LogIOError() + * RM_Log() + * RM_LogIOError() * */ void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap) { @@ -3238,8 +3238,7 @@ void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_li serverLogRaw(level,msg); } -/* - * Produces a log message to the standard Redis log, the format accepts +/* Produces a log message to the standard Redis log, the format accepts * printf-alike specifiers, while level is a string describing the log * level to use when emitting the log, and must be one of the following: * @@ -3318,12 +3317,14 @@ void unblockClientFromModule(client *c) { * * The callbacks are called in the following contexts: * - * reply_callback: called after a successful RedisModule_UnblockClient() call - * in order to reply to the client and unblock it. - * reply_timeout: called when the timeout is reached in order to send an - * error to the client. - * free_privdata: called in order to free the privata data that is passed - * by RedisModule_UnblockClient() call. + * reply_callback: called after a successful RedisModule_UnblockClient() + * call in order to reply to the client and unblock it. + * + * reply_timeout: called when the timeout is reached in order to send an + * error to the client. + * + * free_privdata: called in order to free the privata data that is passed + * by RedisModule_UnblockClient() call. */ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms) { client *c = ctx->client; @@ -3499,9 +3500,9 @@ void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx) { * * To call non-reply APIs, the thread safe context must be prepared with: * - * RedisModule_ThreadSafeCallStart(ctx); - * ... make your call here ... - * RedisModule_ThreadSafeCallStop(ctx); + * RedisModule_ThreadSafeCallStart(ctx); + * ... make your call here ... + * RedisModule_ThreadSafeCallStop(ctx); * * This is not needed when using `RedisModule_Reply*` functions, assuming * that a blocked client was used when the context was created, otherwise @@ -3683,8 +3684,8 @@ int moduleLoad(const char *path, void **module_argv, int module_argc) { * C_OK is returned, otherwise C_ERR is returned and errno is set * to the following values depending on the type of error: * - * ENONET: No such module having the specified name. - * EBUSY: The module exports a new data type and can only be reloaded. */ + * * ENONET: No such module having the specified name. + * * EBUSY: The module exports a new data type and can only be reloaded. */ int moduleUnload(sds name) { struct RedisModule *module = dictFetchValue(modules,name); diff --git a/src/modules/gendoc.rb b/src/modules/gendoc.rb index b3dbf1ca7..516f5d795 100644 --- a/src/modules/gendoc.rb +++ b/src/modules/gendoc.rb @@ -6,21 +6,29 @@ def markdown(s) s = s.gsub(/\*\/$/,"") s = s.gsub(/^ \* {0,1}/,"") s = s.gsub(/^\/\* /,"") - if s[0] != ' ' - s = s.gsub(/RM_[A-z()]+/){|x| "`#{x}`"} - s = s.gsub(/RedisModule_[A-z()]+/){|x| "`#{x}`"} - s = s.gsub(/REDISMODULE_[A-z]+/){|x| "`#{x}`"} - end s.chop! while s[-1] == "\n" || s[-1] == " " - return s + lines = s.split("\n") + newlines = [] + lines.each{|l| + if l[0] != ' ' + l = l.gsub(/RM_[A-z()]+/){|x| "`#{x}`"} + l = l.gsub(/RedisModule_[A-z()]+/){|x| "`#{x}`"} + l = l.gsub(/REDISMODULE_[A-z]+/){|x| "`#{x}`"} + end + newlines << l + } + return newlines.join("\n") end # Given the source code array and the index at which an exported symbol was # detected, extracts and outputs the documentation. def docufy(src,i) m = /RM_[A-z0-9]+/.match(src[i]) + name = m[0] + name = name.sub("RM_","RedisModule_") proto = src[i].sub("{","").strip+";\n" - puts "## `#{m[0]}`\n\n" + proto = proto.sub("RM_","RedisModule_") + puts "## `#{name}`\n\n" puts " #{proto}\n" comment = "" while true From 6fd40fdbc354e4b12a2d78e745f825dc9d4f3d04 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Jul 2017 11:33:59 +0200 Subject: [PATCH 0391/1722] Modules documentation removed from source. Moving to redis-doc repository to publish via Redis.io. --- src/modules/API.md | 1329 ------------------------------------------ src/modules/BLOCK.md | 265 --------- src/modules/INTRO.md | 857 --------------------------- src/modules/TYPES.md | 379 ------------ 4 files changed, 2830 deletions(-) delete mode 100644 src/modules/API.md delete mode 100644 src/modules/BLOCK.md delete mode 100644 src/modules/INTRO.md delete mode 100644 src/modules/TYPES.md diff --git a/src/modules/API.md b/src/modules/API.md deleted file mode 100644 index e90429e3b..000000000 --- a/src/modules/API.md +++ /dev/null @@ -1,1329 +0,0 @@ -# Modules API reference - -## `RM_Alloc` - - void *RM_Alloc(size_t bytes); - -Use like malloc(). Memory allocated with this function is reported in -Redis INFO memory, used for keys eviction according to maxmemory settings -and in general is taken into account as memory allocated by Redis. -You should avoid using malloc(). - -## `RM_Calloc` - - void *RM_Calloc(size_t nmemb, size_t size); - -Use like calloc(). Memory allocated with this function is reported in -Redis INFO memory, used for keys eviction according to maxmemory settings -and in general is taken into account as memory allocated by Redis. -You should avoid using calloc() directly. - -## `RM_Realloc` - - void* RM_Realloc(void *ptr, size_t bytes); - -Use like realloc() for memory obtained with `RedisModule_Alloc()`. - -## `RM_Free` - - void RM_Free(void *ptr); - -Use like free() for memory obtained by `RedisModule_Alloc()` and -`RedisModule_Realloc()`. However you should never try to free with -`RedisModule_Free()` memory allocated with malloc() inside your module. - -## `RM_Strdup` - - char *RM_Strdup(const char *str); - -Like strdup() but returns memory allocated with `RedisModule_Alloc()`. - -## `RM_PoolAlloc` - - void *RM_PoolAlloc(RedisModuleCtx *ctx, size_t bytes); - -Return heap allocated memory that will be freed automatically when the -module callback function returns. Mostly suitable for small allocations -that are short living and must be released when the callback returns -anyway. The returned memory is aligned to the architecture word size -if at least word size bytes are requested, otherwise it is just -aligned to the next power of two, so for example a 3 bytes request is -4 bytes aligned while a 2 bytes request is 2 bytes aligned. - -There is no realloc style function since when this is needed to use the -pool allocator is not a good idea. - -The function returns NULL if `bytes` is 0. - -## `RM_GetApi` - - int RM_GetApi(const char *funcname, void **targetPtrPtr); - -Lookup the requested module API and store the function pointer into the -target pointer. The function returns `REDISMODULE_ERR` if there is no such -named API, otherwise `REDISMODULE_OK`. - -This function is not meant to be used by modules developer, it is only -used implicitly by including redismodule.h. - -## `RM_IsKeysPositionRequest` - - int RM_IsKeysPositionRequest(RedisModuleCtx *ctx); - -Return non-zero if a module command, that was declared with the -flag "getkeys-api", is called in a special way to get the keys positions -and not to get executed. Otherwise zero is returned. - -## `RM_KeyAtPos` - - void RM_KeyAtPos(RedisModuleCtx *ctx, int pos); - -When a module command is called in order to obtain the position of -keys, since it was flagged as "getkeys-api" during the registration, -the command implementation checks for this special call using the -`RedisModule_IsKeysPositionRequest()` API and uses this function in -order to report keys, like in the following example: - - if (`RedisModule_IsKeysPositionRequest(ctx))` { - `RedisModule_KeyAtPos(ctx`,1); - `RedisModule_KeyAtPos(ctx`,2); - } - - Note: in the example below the get keys API would not be needed since - keys are at fixed positions. This interface is only used for commands - with a more complex structure. - -## `RM_CreateCommand` - - int RM_CreateCommand(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep); - -Register a new command in the Redis server, that will be handled by -calling the function pointer 'func' using the RedisModule calling -convention. The function returns `REDISMODULE_ERR` if the specified command -name is already busy or a set of invalid flags were passed, otherwise -`REDISMODULE_OK` is returned and the new command is registered. - -This function must be called during the initialization of the module -inside the `RedisModule_OnLoad()` function. Calling this function outside -of the initialization function is not defined. - -The command function type is the following: - - int MyCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); - -And is supposed to always return `REDISMODULE_OK`. - -The set of flags 'strflags' specify the behavior of the command, and should -be passed as a C string compoesd of space separated words, like for -example "write deny-oom". The set of flags are: - -* **"write"**: The command may modify the data set (it may also read - from it). -* **"readonly"**: The command returns data from keys but never writes. -* **"admin"**: The command is an administrative command (may change - replication or perform similar tasks). -* **"deny-oom"**: The command may use additional memory and should be - denied during out of memory conditions. -* **"deny-script"**: Don't allow this command in Lua scripts. -* **"allow-loading"**: Allow this command while the server is loading data. - Only commands not interacting with the data set - should be allowed to run in this mode. If not sure - don't use this flag. -* **"pubsub"**: The command publishes things on Pub/Sub channels. -* **"random"**: The command may have different outputs even starting - from the same input arguments and key values. -* **"allow-stale"**: The command is allowed to run on slaves that don't - serve stale data. Don't use if you don't know what - this means. -* **"no-monitor"**: Don't propoagate the command on monitor. Use this if - the command has sensible data among the arguments. -* **"fast"**: The command time complexity is not greater - than O(log(N)) where N is the size of the collection or - anything else representing the normal scalability - issue with the command. -* **"getkeys-api"**: The command implements the interface to return - the arguments that are keys. Used when start/stop/step - is not enough because of the command syntax. -* **"no-cluster"**: The command should not register in Redis Cluster - since is not designed to work with it because, for - example, is unable to report the position of the - keys, programmatically creates key names, or any - other reason. - -## `RM_SetModuleAttribs` - - void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int apiver); - -Called by `RM_Init()` to setup the `ctx->module` structure. - -This is an internal function, Redis modules developers don't need -to use it. - -## `RM_Milliseconds` - - long long RM_Milliseconds(void); - -Return the current UNIX time in milliseconds. - -## `RM_AutoMemory` - - void RM_AutoMemory(RedisModuleCtx *ctx); - -Enable automatic memory management. See API.md for more information. - -The function must be called as the first function of a command implementation -that wants to use automatic memory. - -## `RM_CreateString` - - RedisModuleString *RM_CreateString(RedisModuleCtx *ctx, const char *ptr, size_t len); - -Create a new module string object. The returned string must be freed -with `RedisModule_FreeString()`, unless automatic memory is enabled. - -The string is created by copying the `len` bytes starting -at `ptr`. No reference is retained to the passed buffer. - -## `RM_CreateStringPrintf` - - RedisModuleString *RM_CreateStringPrintf(RedisModuleCtx *ctx, const char *fmt, ...); - -Create a new module string object from a printf format and arguments. -The returned string must be freed with `RedisModule_FreeString()`, unless -automatic memory is enabled. - -The string is created using the sds formatter function sdscatvprintf(). - -## `RM_CreateStringFromLongLong` - - RedisModuleString *RM_CreateStringFromLongLong(RedisModuleCtx *ctx, long long ll); - -Like `RedisModule_CreatString()`, but creates a string starting from a long long -integer instead of taking a buffer and its length. - -The returned string must be released with `RedisModule_FreeString()` or by -enabling automatic memory management. - -## `RM_CreateStringFromString` - - RedisModuleString *RM_CreateStringFromString(RedisModuleCtx *ctx, const RedisModuleString *str); - -Like `RedisModule_CreatString()`, but creates a string starting from another -RedisModuleString. - -The returned string must be released with `RedisModule_FreeString()` or by -enabling automatic memory management. - -## `RM_FreeString` - - void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str); - -Free a module string object obtained with one of the Redis modules API calls -that return new string objects. - -It is possible to call this function even when automatic memory management -is enabled. In that case the string will be released ASAP and removed -from the pool of string to release at the end. - -## `RM_RetainString` - - void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str); - -Every call to this function, will make the string 'str' requiring -an additional call to `RedisModule_FreeString()` in order to really -free the string. Note that the automatic freeing of the string obtained -enabling modules automatic memory management counts for one -`RedisModule_FreeString()` call (it is just executed automatically). - -Normally you want to call this function when, at the same time -the following conditions are true: - -1) You have automatic memory management enabled. -2) You want to create string objects. -3) Those string objects you create need to live *after* the callback - function(for example a command implementation) creating them returns. - -Usually you want this in order to store the created string object -into your own data structure, for example when implementing a new data -type. - -Note that when memory management is turned off, you don't need -any call to RetainString() since creating a string will always result -into a string that lives after the callback function returns, if -no FreeString() call is performed. - -## `RM_StringPtrLen` - - const char *RM_StringPtrLen(const RedisModuleString *str, size_t *len); - -Given a string module object, this function returns the string pointer -and length of the string. The returned pointer and length should only -be used for read only accesses and never modified. - -## `RM_StringToLongLong` - - int RM_StringToLongLong(const RedisModuleString *str, long long *ll); - -Convert the string into a long long integer, storing it at `*ll`. -Returns `REDISMODULE_OK` on success. If the string can't be parsed -as a valid, strict long long (no spaces before/after), `REDISMODULE_ERR` -is returned. - -## `RM_StringToDouble` - - int RM_StringToDouble(const RedisModuleString *str, double *d); - -Convert the string into a double, storing it at `*d`. -Returns `REDISMODULE_OK` on success or `REDISMODULE_ERR` if the string is -not a valid string representation of a double value. - -## `RM_StringCompare` - - int RM_StringCompare(RedisModuleString *a, RedisModuleString *b); - -Compare two string objects, returning -1, 0 or 1 respectively if -a < b, a == b, a > b. Strings are compared byte by byte as two -binary blobs without any encoding care / collation attempt. - -## `RM_StringAppendBuffer` - - int RM_StringAppendBuffer(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len); - -Append the specified buffere to the string 'str'. The string must be a -string created by the user that is referenced only a single time, otherwise -`REDISMODULE_ERR` is returend and the operation is not performed. - -## `RM_WrongArity` - - int RM_WrongArity(RedisModuleCtx *ctx); - -Send an error about the number of arguments given to the command, -citing the command name in the error message. - -Example: - - if (argc != 3) return `RedisModule_WrongArity(ctx)`; - -## `RM_ReplyWithLongLong` - - int RM_ReplyWithLongLong(RedisModuleCtx *ctx, long long ll); - -Send an integer reply to the client, with the specified long long value. -The function always returns `REDISMODULE_OK`. - -## `RM_ReplyWithError` - - int RM_ReplyWithError(RedisModuleCtx *ctx, const char *err); - -Reply with the error 'err'. - -Note that 'err' must contain all the error, including -the initial error code. The function only provides the initial "-", so -the usage is, for example: - - `RM_ReplyWithError(ctx`,"ERR Wrong Type"); - -and not just: - - `RM_ReplyWithError(ctx`,"Wrong Type"); - -The function always returns `REDISMODULE_OK`. - -## `RM_ReplyWithSimpleString` - - int RM_ReplyWithSimpleString(RedisModuleCtx *ctx, const char *msg); - -Reply with a simple string (+... \r\n in RESP protocol). This replies -are suitable only when sending a small non-binary string with small -overhead, like "OK" or similar replies. - -The function always returns `REDISMODULE_OK`. - -## `RM_ReplyWithArray` - - int RM_ReplyWithArray(RedisModuleCtx *ctx, long len); - -Reply with an array type of 'len' elements. However 'len' other calls -to `ReplyWith*` style functions must follow in order to emit the elements -of the array. - -When producing arrays with a number of element that is not known beforehand -the function can be called with the special count -`REDISMODULE_POSTPONED_ARRAY_LEN`, and the actual number of elements can be -later set with `RedisModule_ReplySetArrayLength()` (which will set the -latest "open" count if there are multiple ones). - -The function always returns `REDISMODULE_OK`. - -## `RM_ReplySetArrayLength` - - void RM_ReplySetArrayLength(RedisModuleCtx *ctx, long len); - -When `RedisModule_ReplyWithArray()` is used with the argument -`REDISMODULE_POSTPONED_ARRAY_LEN`, because we don't know beforehand the number -of items we are going to output as elements of the array, this function -will take care to set the array length. - -Since it is possible to have multiple array replies pending with unknown -length, this function guarantees to always set the latest array length -that was created in a postponed way. - -For example in order to output an array like [1,[10,20,30]] we -could write: - - `RedisModule_ReplyWithArray(ctx`,`REDISMODULE_POSTPONED_ARRAY_LEN`); - `RedisModule_ReplyWithLongLong(ctx`,1); - `RedisModule_ReplyWithArray(ctx`,`REDISMODULE_POSTPONED_ARRAY_LEN`); - `RedisModule_ReplyWithLongLong(ctx`,10); - `RedisModule_ReplyWithLongLong(ctx`,20); - `RedisModule_ReplyWithLongLong(ctx`,30); - `RedisModule_ReplySetArrayLength(ctx`,3); // Set len of 10,20,30 array. - `RedisModule_ReplySetArrayLength(ctx`,2); // Set len of top array - -Note that in the above example there is no reason to postpone the array -length, since we produce a fixed number of elements, but in the practice -the code may use an interator or other ways of creating the output so -that is not easy to calculate in advance the number of elements. - -## `RM_ReplyWithStringBuffer` - - int RM_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len); - -Reply with a bulk string, taking in input a C buffer pointer and length. - -The function always returns `REDISMODULE_OK`. - -## `RM_ReplyWithString` - - int RM_ReplyWithString(RedisModuleCtx *ctx, RedisModuleString *str); - -Reply with a bulk string, taking in input a RedisModuleString object. - -The function always returns `REDISMODULE_OK`. - -## `RM_ReplyWithNull` - - int RM_ReplyWithNull(RedisModuleCtx *ctx); - -Reply to the client with a NULL. In the RESP protocol a NULL is encoded -as the string "$-1\r\n". - -The function always returns `REDISMODULE_OK`. - -## `RM_ReplyWithCallReply` - - int RM_ReplyWithCallReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply); - -Reply exactly what a Redis command returned us with `RedisModule_Call()`. -This function is useful when we use `RedisModule_Call()` in order to -execute some command, as we want to reply to the client exactly the -same reply we obtained by the command. - -The function always returns `REDISMODULE_OK`. - -## `RM_ReplyWithDouble` - - int RM_ReplyWithDouble(RedisModuleCtx *ctx, double d); - -Send a string reply obtained converting the double 'd' into a bulk string. -This function is basically equivalent to converting a double into -a string into a C buffer, and then calling the function -`RedisModule_ReplyWithStringBuffer()` with the buffer and length. - -The function always returns `REDISMODULE_OK`. - -## `RM_Replicate` - - int RM_Replicate(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...); - -Replicate the specified command and arguments to slaves and AOF, as effect -of execution of the calling command implementation. - -The replicated commands are always wrapped into the MULTI/EXEC that -contains all the commands replicated in a given module command -execution. However the commands replicated with `RedisModule_Call()` -are the first items, the ones replicated with `RedisModule_Replicate()` -will all follow before the EXEC. - -Modules should try to use one interface or the other. - -This command follows exactly the same interface of `RedisModule_Call()`, -so a set of format specifiers must be passed, followed by arguments -matching the provided format specifiers. - -Please refer to `RedisModule_Call()` for more information. - -The command returns `REDISMODULE_ERR` if the format specifiers are invalid -or the command name does not belong to a known command. - -## `RM_ReplicateVerbatim` - - int RM_ReplicateVerbatim(RedisModuleCtx *ctx); - -This function will replicate the command exactly as it was invoked -by the client. Note that this function will not wrap the command into -a MULTI/EXEC stanza, so it should not be mixed with other replication -commands. - -Basically this form of replication is useful when you want to propagate -the command to the slaves and AOF file exactly as it was called, since -the command can just be re-executed to deterministically re-create the -new state starting from the old one. - -The function always returns `REDISMODULE_OK`. - -## `RM_GetClientId` - - unsigned long long RM_GetClientId(RedisModuleCtx *ctx); - -Return the ID of the current client calling the currently active module -command. The returned ID has a few guarantees: - -1. The ID is different for each different client, so if the same client - executes a module command multiple times, it can be recognized as - having the same ID, otherwise the ID will be different. -2. The ID increases monotonically. Clients connecting to the server later - are guaranteed to get IDs greater than any past ID previously seen. - -Valid IDs are from 1 to 2^64-1. If 0 is returned it means there is no way -to fetch the ID in the context the function was currently called. - -## `RM_GetSelectedDb` - - int RM_GetSelectedDb(RedisModuleCtx *ctx); - -Return the currently selected DB. - -## `RM_SelectDb` - - int RM_SelectDb(RedisModuleCtx *ctx, int newid); - -Change the currently selected DB. Returns an error if the id -is out of range. - -Note that the client will retain the currently selected DB even after -the Redis command implemented by the module calling this function -returns. - -If the module command wishes to change something in a different DB and -returns back to the original one, it should call `RedisModule_GetSelectedDb()` -before in order to restore the old DB number before returning. - -## `RM_OpenKey` - - void *RM_OpenKey(RedisModuleCtx *ctx, robj *keyname, int mode); - -Return an handle representing a Redis key, so that it is possible -to call other APIs with the key handle as argument to perform -operations on the key. - -The return value is the handle repesenting the key, that must be -closed with `RM_CloseKey()`. - -If the key does not exist and WRITE mode is requested, the handle -is still returned, since it is possible to perform operations on -a yet not existing key (that will be created, for example, after -a list push operation). If the mode is just READ instead, and the -key does not exist, NULL is returned. However it is still safe to -call `RedisModule_CloseKey()` and `RedisModule_KeyType()` on a NULL -value. - -## `RM_CloseKey` - - void RM_CloseKey(RedisModuleKey *key); - -Close a key handle. - -## `RM_KeyType` - - int RM_KeyType(RedisModuleKey *key); - -Return the type of the key. If the key pointer is NULL then -`REDISMODULE_KEYTYPE_EMPTY` is returned. - -## `RM_ValueLength` - - size_t RM_ValueLength(RedisModuleKey *key); - -Return the length of the value associated with the key. -For strings this is the length of the string. For all the other types -is the number of elements (just counting keys for hashes). - -If the key pointer is NULL or the key is empty, zero is returned. - -## `RM_DeleteKey` - - int RM_DeleteKey(RedisModuleKey *key); - -If the key is open for writing, remove it, and setup the key to -accept new writes as an empty key (that will be created on demand). -On success `REDISMODULE_OK` is returned. If the key is not open for -writing `REDISMODULE_ERR` is returned. - -## `RM_GetExpire` - - mstime_t RM_GetExpire(RedisModuleKey *key); - -Return the key expire value, as milliseconds of remaining TTL. -If no TTL is associated with the key or if the key is empty, -`REDISMODULE_NO_EXPIRE` is returned. - -## `RM_SetExpire` - - int RM_SetExpire(RedisModuleKey *key, mstime_t expire); - -Set a new expire for the key. If the special expire -`REDISMODULE_NO_EXPIRE` is set, the expire is cancelled if there was -one (the same as the PERSIST command). - -Note that the expire must be provided as a positive integer representing -the number of milliseconds of TTL the key should have. - -The function returns `REDISMODULE_OK` on success or `REDISMODULE_ERR` if -the key was not open for writing or is an empty key. - -## `RM_StringSet` - - int RM_StringSet(RedisModuleKey *key, RedisModuleString *str); - -If the key is open for writing, set the specified string 'str' as the -value of the key, deleting the old value if any. -On success `REDISMODULE_OK` is returned. If the key is not open for -writing or there is an active iterator, `REDISMODULE_ERR` is returned. - -## `RM_StringDMA` - - char *RM_StringDMA(RedisModuleKey *key, size_t *len, int mode); - -Prepare the key associated string value for DMA access, and returns -a pointer and size (by reference), that the user can use to read or -modify the string in-place accessing it directly via pointer. - -The 'mode' is composed by bitwise OR-ing the following flags: - -`REDISMODULE_READ` -- Read access -`REDISMODULE_WRITE` -- Write access - -If the DMA is not requested for writing, the pointer returned should -only be accessed in a read-only fashion. - -On error (wrong type) NULL is returned. - -DMA access rules: - -1. No other key writing function should be called since the moment -the pointer is obtained, for all the time we want to use DMA access -to read or modify the string. - -2. Each time `RM_StringTruncate()` is called, to continue with the DMA -access, `RM_StringDMA()` should be called again to re-obtain -a new pointer and length. - -3. If the returned pointer is not NULL, but the length is zero, no -byte can be touched (the string is empty, or the key itself is empty) -so a `RM_StringTruncate()` call should be used if there is to enlarge -the string, and later call StringDMA() again to get the pointer. - -## `RM_StringTruncate` - - int RM_StringTruncate(RedisModuleKey *key, size_t newlen); - -If the string is open for writing and is of string type, resize it, padding -with zero bytes if the new length is greater than the old one. - -After this call, `RM_StringDMA()` must be called again to continue -DMA access with the new pointer. - -The function returns `REDISMODULE_OK` on success, and `REDISMODULE_ERR` on -error, that is, the key is not open for writing, is not a string -or resizing for more than 512 MB is requested. - -If the key is empty, a string key is created with the new string value -unless the new length value requested is zero. - -## `RM_ListPush` - - int RM_ListPush(RedisModuleKey *key, int where, RedisModuleString *ele); - -Push an element into a list, on head or tail depending on 'where' argumnet. -If the key pointer is about an empty key opened for writing, the key -is created. On error (key opened for read-only operations or of the wrong -type) `REDISMODULE_ERR` is returned, otherwise `REDISMODULE_OK` is returned. - -## `RM_ListPop` - - RedisModuleString *RM_ListPop(RedisModuleKey *key, int where); - -Pop an element from the list, and returns it as a module string object -that the user should be free with `RM_FreeString()` or by enabling -automatic memory. 'where' specifies if the element should be popped from -head or tail. The command returns NULL if: -1) The list is empty. -2) The key was not open for writing. -3) The key is not a list. - -## `RM_ZsetAddFlagsToCoreFlags` - - int RM_ZsetAddFlagsToCoreFlags(int flags); - -Conversion from/to public flags of the Modules API and our private flags, -so that we have everything decoupled. - -## `RM_ZsetAddFlagsFromCoreFlags` - - int RM_ZsetAddFlagsFromCoreFlags(int flags); - -See previous function comment. - -## `RM_ZsetAdd` - - int RM_ZsetAdd(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr); - -Add a new element into a sorted set, with the specified 'score'. -If the element already exists, the score is updated. - -A new sorted set is created at value if the key is an empty open key -setup for writing. - -Additional flags can be passed to the function via a pointer, the flags -are both used to receive input and to communicate state when the function -returns. 'flagsptr' can be NULL if no special flags are used. - -The input flags are: - -`REDISMODULE_ZADD_XX`: Element must already exist. Do nothing otherwise. -`REDISMODULE_ZADD_NX`: Element must not exist. Do nothing otherwise. - -The output flags are: - -`REDISMODULE_ZADD_ADDED`: The new element was added to the sorted set. -`REDISMODULE_ZADD_UPDATED`: The score of the element was updated. -`REDISMODULE_ZADD_NOP`: No operation was performed because XX or NX flags. - -On success the function returns `REDISMODULE_OK`. On the following errors -`REDISMODULE_ERR` is returned: - -* The key was not opened for writing. -* The key is of the wrong type. -* 'score' double value is not a number (NaN). - -## `RM_ZsetIncrby` - - int RM_ZsetIncrby(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr, double *newscore); - -This function works exactly like `RM_ZsetAdd()`, but instead of setting -a new score, the score of the existing element is incremented, or if the -element does not already exist, it is added assuming the old score was -zero. - -The input and output flags, and the return value, have the same exact -meaning, with the only difference that this function will return -`REDISMODULE_ERR` even when 'score' is a valid double number, but adding it -to the existing score resuts into a NaN (not a number) condition. - -This function has an additional field 'newscore', if not NULL is filled -with the new score of the element after the increment, if no error -is returned. - -## `RM_ZsetRem` - - int RM_ZsetRem(RedisModuleKey *key, RedisModuleString *ele, int *deleted); - -Remove the specified element from the sorted set. -The function returns `REDISMODULE_OK` on success, and `REDISMODULE_ERR` -on one of the following conditions: - -* The key was not opened for writing. -* The key is of the wrong type. - -The return value does NOT indicate the fact the element was really -removed (since it existed) or not, just if the function was executed -with success. - -In order to know if the element was removed, the additional argument -'deleted' must be passed, that populates the integer by reference -setting it to 1 or 0 depending on the outcome of the operation. -The 'deleted' argument can be NULL if the caller is not interested -to know if the element was really removed. - -Empty keys will be handled correctly by doing nothing. - -## `RM_ZsetScore` - - int RM_ZsetScore(RedisModuleKey *key, RedisModuleString *ele, double *score); - -On success retrieve the double score associated at the sorted set element -'ele' and returns `REDISMODULE_OK`. Otherwise `REDISMODULE_ERR` is returned -to signal one of the following conditions: - -* There is no such element 'ele' in the sorted set. -* The key is not a sorted set. -* The key is an open empty key. - -## `RM_ZsetRangeStop` - - void RM_ZsetRangeStop(RedisModuleKey *key); - -Stop a sorted set iteration. - -## `RM_ZsetRangeEndReached` - - int RM_ZsetRangeEndReached(RedisModuleKey *key); - -Return the "End of range" flag value to signal the end of the iteration. - -## `RM_ZsetFirstInScoreRange` - - int RM_ZsetFirstInScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex); - -Setup a sorted set iterator seeking the first element in the specified -range. Returns `REDISMODULE_OK` if the iterator was correctly initialized -otherwise `REDISMODULE_ERR` is returned in the following conditions: - -1. The value stored at key is not a sorted set or the key is empty. - -The range is specified according to the two double values 'min' and 'max'. -Both can be infinite using the following two macros: - -`REDISMODULE_POSITIVE_INFINITE` for positive infinite value -`REDISMODULE_NEGATIVE_INFINITE` for negative infinite value - -'minex' and 'maxex' parameters, if true, respectively setup a range -where the min and max value are exclusive (not included) instead of -inclusive. - -## `RM_ZsetLastInScoreRange` - - int RM_ZsetLastInScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex); - -Exactly like `RedisModule_ZsetFirstInScoreRange()` but the last element of -the range is selected for the start of the iteration instead. - -## `RM_ZsetFirstInLexRange` - - int RM_ZsetFirstInLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max); - -Setup a sorted set iterator seeking the first element in the specified -lexicographical range. Returns `REDISMODULE_OK` if the iterator was correctly -initialized otherwise `REDISMODULE_ERR` is returned in the -following conditions: - -1. The value stored at key is not a sorted set or the key is empty. -2. The lexicographical range 'min' and 'max' format is invalid. - -'min' and 'max' should be provided as two RedisModuleString objects -in the same format as the parameters passed to the ZRANGEBYLEX command. -The function does not take ownership of the objects, so they can be released -ASAP after the iterator is setup. - -## `RM_ZsetLastInLexRange` - - int RM_ZsetLastInLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max); - -Exactly like `RedisModule_ZsetFirstInLexRange()` but the last element of -the range is selected for the start of the iteration instead. - -## `RM_ZsetRangeCurrentElement` - - RedisModuleString *RM_ZsetRangeCurrentElement(RedisModuleKey *key, double *score); - -Return the current sorted set element of an active sorted set iterator -or NULL if the range specified in the iterator does not include any -element. - -## `RM_ZsetRangeNext` - - int RM_ZsetRangeNext(RedisModuleKey *key); - -Go to the next element of the sorted set iterator. Returns 1 if there was -a next element, 0 if we are already at the latest element or the range -does not include any item at all. - -## `RM_ZsetRangePrev` - - int RM_ZsetRangePrev(RedisModuleKey *key); - -Go to the previous element of the sorted set iterator. Returns 1 if there was -a previous element, 0 if we are already at the first element or the range -does not include any item at all. - -## `RM_HashSet` - - int RM_HashSet(RedisModuleKey *key, int flags, ...); - -Set the field of the specified hash field to the specified value. -If the key is an empty key open for writing, it is created with an empty -hash value, in order to set the specified field. - -The function is variadic and the user must specify pairs of field -names and values, both as RedisModuleString pointers (unless the -CFIELD option is set, see later). - -Example to set the hash argv[1] to the value argv[2]: - - `RedisModule_HashSet(key`,`REDISMODULE_HASH_NONE`,argv[1],argv[2],NULL); - -The function can also be used in order to delete fields (if they exist) -by setting them to the specified value of `REDISMODULE_HASH_DELETE`: - - `RedisModule_HashSet(key`,`REDISMODULE_HASH_NONE`,argv[1], - `REDISMODULE_HASH_DELETE`,NULL); - -The behavior of the command changes with the specified flags, that can be -set to `REDISMODULE_HASH_NONE` if no special behavior is needed. - -`REDISMODULE_HASH_NX`: The operation is performed only if the field was not - already existing in the hash. -`REDISMODULE_HASH_XX`: The operation is performed only if the field was - already existing, so that a new value could be - associated to an existing filed, but no new fields - are created. -`REDISMODULE_HASH_CFIELDS`: The field names passed are null terminated C - strings instead of RedisModuleString objects. - -Unless NX is specified, the command overwrites the old field value with -the new one. - -When using `REDISMODULE_HASH_CFIELDS`, field names are reported using -normal C strings, so for example to delete the field "foo" the following -code can be used: - - `RedisModule_HashSet(key`,`REDISMODULE_HASH_CFIELDS`,"foo", - `REDISMODULE_HASH_DELETE`,NULL); - -Return value: - -The number of fields updated (that may be less than the number of fields -specified because of the XX or NX options). - -In the following case the return value is always zero: - -* The key was not open for writing. -* The key was associated with a non Hash value. - -## `RM_HashGet` - - int RM_HashGet(RedisModuleKey *key, int flags, ...); - -Get fields from an hash value. This function is called using a variable -number of arguments, alternating a field name (as a StringRedisModule -pointer) with a pointer to a StringRedisModule pointer, that is set to the -value of the field if the field exist, or NULL if the field did not exist. -At the end of the field/value-ptr pairs, NULL must be specified as last -argument to signal the end of the arguments in the variadic function. - -This is an example usage: - - RedisModuleString *first, *second; - `RedisModule_HashGet(mykey`,`REDISMODULE_HASH_NONE`,argv[1],&first, - argv[2],&second,NULL); - -As with `RedisModule_HashSet()` the behavior of the command can be specified -passing flags different than `REDISMODULE_HASH_NONE`: - -`REDISMODULE_HASH_CFIELD`: field names as null terminated C strings. - -`REDISMODULE_HASH_EXISTS`: instead of setting the value of the field -expecting a RedisModuleString pointer to pointer, the function just -reports if the field esists or not and expects an integer pointer -as the second element of each pair. - -Example of `REDISMODULE_HASH_CFIELD`: - - RedisModuleString *username, *hashedpass; - `RedisModule_HashGet(mykey`,"username",&username,"hp",&hashedpass, NULL); - -Example of `REDISMODULE_HASH_EXISTS`: - - int exists; - `RedisModule_HashGet(mykey`,argv[1],&exists,NULL); - -The function returns `REDISMODULE_OK` on success and `REDISMODULE_ERR` if -the key is not an hash value. - -Memory management: - -The returned RedisModuleString objects should be released with -`RedisModule_FreeString()`, or by enabling automatic memory management. - -## `RM_FreeCallReply_Rec` - - void RM_FreeCallReply_Rec(RedisModuleCallReply *reply, int freenested); - -Free a Call reply and all the nested replies it contains if it's an -array. - -## `RM_FreeCallReply` - - void RM_FreeCallReply(RedisModuleCallReply *reply); - -Wrapper for the recursive free reply function. This is needed in order -to have the first level function to return on nested replies, but only -if called by the module API. - -## `RM_CallReplyType` - - int RM_CallReplyType(RedisModuleCallReply *reply); - -Return the reply type. - -## `RM_CallReplyLength` - - size_t RM_CallReplyLength(RedisModuleCallReply *reply); - -Return the reply type length, where applicable. - -## `RM_CallReplyArrayElement` - - RedisModuleCallReply *RM_CallReplyArrayElement(RedisModuleCallReply *reply, size_t idx); - -Return the 'idx'-th nested call reply element of an array reply, or NULL -if the reply type is wrong or the index is out of range. - -## `RM_CallReplyInteger` - - long long RM_CallReplyInteger(RedisModuleCallReply *reply); - -Return the long long of an integer reply. - -## `RM_CallReplyStringPtr` - - const char *RM_CallReplyStringPtr(RedisModuleCallReply *reply, size_t *len); - -Return the pointer and length of a string or error reply. - -## `RM_CreateStringFromCallReply` - - RedisModuleString *RM_CreateStringFromCallReply(RedisModuleCallReply *reply); - -Return a new string object from a call reply of type string, error or -integer. Otherwise (wrong reply type) return NULL. - -## `RM_Call` - - RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...); - -Exported API to call any Redis command from modules. -On success a RedisModuleCallReply object is returned, otherwise -NULL is returned and errno is set to the following values: - -EINVAL: command non existing, wrong arity, wrong format specifier. -EPERM: operation in Cluster instance with key in non local slot. - -## `RM_CallReplyProto` - - const char *RM_CallReplyProto(RedisModuleCallReply *reply, size_t *len); - -Return a pointer, and a length, to the protocol returned by the command -that returned the reply object. - -## `RM_CreateDataType` - - moduleType *RM_CreateDataType(RedisModuleCtx *ctx, const char *name, int encver, void *typemethods_ptr); - -Register a new data type exported by the module. The parameters are the -following. Please for in depth documentation check the modules API -documentation, especially the TYPES.md file. - -* **name**: A 9 characters data type name that MUST be unique in the Redis - Modules ecosystem. Be creative... and there will be no collisions. Use - the charset A-Z a-z 9-0, plus the two "-_" characters. A good - idea is to use, for example `-`. For example - "tree-AntZ" may mean "Tree data structure by @antirez". To use both - lower case and upper case letters helps in order to prevent collisions. -* **encver**: Encoding version, which is, the version of the serialization - that a module used in order to persist data. As long as the "name" - matches, the RDB loading will be dispatched to the type callbacks - whatever 'encver' is used, however the module can understand if - the encoding it must load are of an older version of the module. - For example the module "tree-AntZ" initially used encver=0. Later - after an upgrade, it started to serialize data in a different format - and to register the type with encver=1. However this module may - still load old data produced by an older version if the rdb_load - callback is able to check the encver value and act accordingly. - The encver must be a positive value between 0 and 1023. -* **typemethods_ptr** is a pointer to a RedisModuleTypeMethods structure - that should be populated with the methods callbacks and structure - version, like in the following example: - - RedisModuleTypeMethods tm = { - .version = `REDISMODULE_TYPE_METHOD_VERSION`, - .rdb_load = myType_RDBLoadCallBack, - .rdb_save = myType_RDBSaveCallBack, - .aof_rewrite = myType_AOFRewriteCallBack, - .free = myType_FreeCallBack, - - // Optional fields - .digest = myType_DigestCallBack, - .mem_usage = myType_MemUsageCallBack, - } - -* **rdb_load**: A callback function pointer that loads data from RDB files. -* **rdb_save**: A callback function pointer that saves data to RDB files. -* **aof_rewrite**: A callback function pointer that rewrites data as commands. -* **digest**: A callback function pointer that is used for `DEBUG DIGEST`. -* **mem_usage**: A callback function pointer that is used for `MEMORY`. -* **free**: A callback function pointer that can free a type value. - -The **digest* and **mem_usage** methods should currently be omitted since -they are not yet implemented inside the Redis modules core. - -Note: the module name "AAAAAAAAA" is reserved and produces an error, it -happens to be pretty lame as well. - -If there is already a module registering a type with the same name, -and if the module name or encver is invalid, NULL is returned. -Otherwise the new type is registered into Redis, and a reference of -type RedisModuleType is returned: the caller of the function should store -this reference into a gobal variable to make future use of it in the -modules type API, since a single module may register multiple types. -Example code fragment: - - static RedisModuleType *BalancedTreeType; - - int `RedisModule_OnLoad(RedisModuleCtx` *ctx) { - // some code here ... - BalancedTreeType = `RM_CreateDataType(`...); - } - -## `RM_ModuleTypeSetValue` - - int RM_ModuleTypeSetValue(RedisModuleKey *key, moduleType *mt, void *value); - -If the key is open for writing, set the specified module type object -as the value of the key, deleting the old value if any. -On success `REDISMODULE_OK` is returned. If the key is not open for -writing or there is an active iterator, `REDISMODULE_ERR` is returned. - -## `RM_ModuleTypeGetType` - - moduleType *RM_ModuleTypeGetType(RedisModuleKey *key); - -Assuming `RedisModule_KeyType()` returned `REDISMODULE_KEYTYPE_MODULE` on -the key, returns the moduel type pointer of the value stored at key. - -If the key is NULL, is not associated with a module type, or is empty, -then NULL is returned instead. - -## `RM_ModuleTypeGetValue` - - void *RM_ModuleTypeGetValue(RedisModuleKey *key); - -Assuming `RedisModule_KeyType()` returned `REDISMODULE_KEYTYPE_MODULE` on -the key, returns the module type low-level value stored at key, as -it was set by the user via `RedisModule_ModuleTypeSet()`. - -If the key is NULL, is not associated with a module type, or is empty, -then NULL is returned instead. - -## `RM_SaveUnsigned` - - void RM_SaveUnsigned(RedisModuleIO *io, uint64_t value); - -Save an unsigned 64 bit value into the RDB file. This function should only -be called in the context of the rdb_save method of modules implementing new -data types. - -## `RM_LoadUnsigned` - - uint64_t RM_LoadUnsigned(RedisModuleIO *io); - -Load an unsigned 64 bit value from the RDB file. This function should only -be called in the context of the rdb_load method of modules implementing -new data types. - -## `RM_SaveSigned` - - void RM_SaveSigned(RedisModuleIO *io, int64_t value); - -Like `RedisModule_SaveUnsigned()` but for signed 64 bit values. - -## `RM_LoadSigned` - - int64_t RM_LoadSigned(RedisModuleIO *io); - -Like `RedisModule_LoadUnsigned()` but for signed 64 bit values. - -## `RM_SaveString` - - void RM_SaveString(RedisModuleIO *io, RedisModuleString *s); - -In the context of the rdb_save method of a module type, saves a -string into the RDB file taking as input a RedisModuleString. - -The string can be later loaded with `RedisModule_LoadString()` or -other Load family functions expecting a serialized string inside -the RDB file. - -## `RM_SaveStringBuffer` - - void RM_SaveStringBuffer(RedisModuleIO *io, const char *str, size_t len); - -Like `RedisModule_SaveString()` but takes a raw C pointer and length -as input. - -## `RM_LoadString` - - RedisModuleString *RM_LoadString(RedisModuleIO *io); - -In the context of the rdb_load method of a module data type, loads a string -from the RDB file, that was previously saved with `RedisModule_SaveString()` -functions family. - -The returned string is a newly allocated RedisModuleString object, and -the user should at some point free it with a call to `RedisModule_FreeString()`. - -If the data structure does not store strings as RedisModuleString objects, -the similar function `RedisModule_LoadStringBuffer()` could be used instead. - -## `RM_LoadStringBuffer` - - char *RM_LoadStringBuffer(RedisModuleIO *io, size_t *lenptr); - -Like `RedisModule_LoadString()` but returns an heap allocated string that -was allocated with `RedisModule_Alloc()`, and can be resized or freed with -`RedisModule_Realloc()` or `RedisModule_Free()`. - -The size of the string is stored at '*lenptr' if not NULL. -The returned string is not automatically NULL termianted, it is loaded -exactly as it was stored inisde the RDB file. - -## `RM_SaveDouble` - - void RM_SaveDouble(RedisModuleIO *io, double value); - -In the context of the rdb_save method of a module data type, saves a double -value to the RDB file. The double can be a valid number, a NaN or infinity. -It is possible to load back the value with `RedisModule_LoadDouble()`. - -## `RM_LoadDouble` - - double RM_LoadDouble(RedisModuleIO *io); - -In the context of the rdb_save method of a module data type, loads back the -double value saved by `RedisModule_SaveDouble()`. - -## `RM_SaveFloat` - - void RM_SaveFloat(RedisModuleIO *io, float value); - -In the context of the rdb_save method of a module data type, saves a float -value to the RDB file. The float can be a valid number, a NaN or infinity. -It is possible to load back the value with `RedisModule_LoadFloat()`. - -## `RM_LoadFloat` - - float RM_LoadFloat(RedisModuleIO *io); - -In the context of the rdb_save method of a module data type, loads back the -float value saved by `RedisModule_SaveFloat()`. - -## `RM_EmitAOF` - - void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...); - -Emits a command into the AOF during the AOF rewriting process. This function -is only called in the context of the aof_rewrite method of data types exported -by a module. The command works exactly like `RedisModule_Call()` in the way -the parameters are passed, but it does not return anything as the error -handling is performed by Redis itself. - -## `RM_LogRaw` - - void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap); - -This is the low level function implementing both: - - `RM_Log()` - `RM_LogIOError()` - -## `RM_Log` - - void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...); - -/* -Produces a log message to the standard Redis log, the format accepts -printf-alike specifiers, while level is a string describing the log -level to use when emitting the log, and must be one of the following: - -* "debug" -* "verbose" -* "notice" -* "warning" - -If the specified log level is invalid, verbose is used by default. -There is a fixed limit to the length of the log line this function is able -to emit, this limti is not specified but is guaranteed to be more than -a few lines of text. - -## `RM_LogIOError` - - void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ...); - -Log errors from RDB / AOF serialization callbacks. - -This function should be used when a callback is returning a critical -error to the caller since cannot load or save the data for some -critical reason. - -## `RM_BlockClient` - - RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms); - -Block a client in the context of a blocking command, returning an handle -which will be used, later, in order to block the client with a call to -`RedisModule_UnblockClient()`. The arguments specify callback functions -and a timeout after which the client is unblocked. - -The callbacks are called in the following contexts: - -reply_callback: called after a successful `RedisModule_UnblockClient()` call - in order to reply to the client and unblock it. -reply_timeout: called when the timeout is reached in order to send an - error to the client. -free_privdata: called in order to free the privata data that is passed - by `RedisModule_UnblockClient()` call. - -## `RM_UnblockClient` - - int RM_UnblockClient(RedisModuleBlockedClient *bc, void *privdata); - -Unblock a client blocked by ``RedisModule_BlockedClient``. This will trigger -the reply callbacks to be called in order to reply to the client. -The 'privdata' argument will be accessible by the reply callback, so -the caller of this function can pass any value that is needed in order to -actually reply to the client. - -A common usage for 'privdata' is a thread that computes something that -needs to be passed to the client, included but not limited some slow -to compute reply or some reply obtained via networking. - -Note: this function can be called from threads spawned by the module. - -## `RM_AbortBlock` - - int RM_AbortBlock(RedisModuleBlockedClient *bc); - -Abort a blocked client blocking operation: the client will be unblocked -without firing the reply callback. - -## `RM_IsBlockedReplyRequest` - - int RM_IsBlockedReplyRequest(RedisModuleCtx *ctx); - -Return non-zero if a module command was called in order to fill the -reply for a blocked client. - -## `RM_IsBlockedTimeoutRequest` - - int RM_IsBlockedTimeoutRequest(RedisModuleCtx *ctx); - -Return non-zero if a module command was called in order to fill the -reply for a blocked client that timed out. - -## `RM_GetBlockedClientPrivateData` - - void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx); - -Get the privata data set by `RedisModule_UnblockClient()` - diff --git a/src/modules/BLOCK.md b/src/modules/BLOCK.md deleted file mode 100644 index d4f3c93bc..000000000 --- a/src/modules/BLOCK.md +++ /dev/null @@ -1,265 +0,0 @@ -Blocking commands in Redis modules -=== - -Redis has a few blocking commands among the built-in set of commands. -One of the most used is `BLPOP` (or the symmetric `BRPOP`) which blocks -waiting for elements arriving in a list. - -The interesting fact about blocking commands is that they do not block -the whole server, but just the client calling them. Usually the reason to -block is that we expect some external event to happen: this can be -some change in the Redis data structures like in the `BLPOP` case, a -long computation happening in a thread, to receive some data from the -network, and so forth. - -Redis modules have the ability to implement blocking commands as well, -this documentation shows how the API works and describes a few patterns -that can be used in order to model blocking commands. - -How blocking and resuming works. ---- - -_Note: You may want to check the `helloblock.c` example in the Redis source tree -inside the `src/modules` directory, for a simple to understand example -on how the blocking API is applied._ - -In Redis modules, commands are implemented by callback functions that -are invoked by the Redis core when the specific command is called -by the user. Normally the callback terminates its execution sending -some reply to the client. Using the following function instead, the -function implementing the module command may request that the client -is put into the blocked state: - - RedisModuleBlockedClient *RedisModule_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms); - -The function returns a `RedisModuleBlockedClient` object, which is later -used in order to unblock the client. The arguments have the following -meaning: - -* `ctx` is the command execution context as usually in the rest of the API. -* `reply_callback` is the callback, having the same prototype of a normal command function, that is called when the client is unblocked in order to return a reply to the client. -* `timeout_callback` is the callback, having the same prototype of a normal command function that is called when the client reached the `ms` timeout. -* `free_privdata` is the callback that is called in order to free the private data. Private data is a pointer to some data that is passed between the API used to unblock the client, to the callback that will send the reply to the client. We'll see how this mechanism works later in this document. -* `ms` is the timeout in milliseconds. When the timeout is reached, the timeout callback is called and the client is automatically aborted. - -Once a client is blocked, it can be unblocked with the following API: - - int RedisModule_UnblockClient(RedisModuleBlockedClient *bc, void *privdata); - -The function takes as argument the blocked client object returned by -the previous call to `RedisModule_BlockClient()`, and unblock the client. -Immediately before the client gets unblocked, the `reply_callback` function -specified when the client was blocked is called: this function will -have access to the `privdata` pointer used here. - -IMPORTANT: The above function is thread safe, and can be called from within -a thread doing some work in order to implement the command that blocked -the client. - -The `privdata` data will be freed automatically using the `free_privdata` -callback when the client is unblocked. This is useful **since the reply -callback may never be called** in case the client timeouts or disconnects -from the server, so it's important that it's up to an external function -to have the responsibility to free the data passed if needed. - -To better understand how the API works, we can imagine writing a command -that blocks a client for one second, and then send as reply "Hello!". - -Note: arity checks and other non important things are not implemented -int his command, in order to take the example simple. - - int Example_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, - int argc) - { - RedisModuleBlockedClient *bc = - RedisModule_BlockClient(ctx,reply_func,timeout_func,NULL,0); - - pthread_t tid; - pthread_create(&tid,NULL,threadmain,bc); - - return REDISMODULE_OK; - } - - void *threadmain(void *arg) { - RedisModuleBlockedClient *bc = arg; - - sleep(1); /* Wait one second and unblock. */ - RedisModule_UnblockClient(bc,NULL); - } - -The above command blocks the client ASAP, spawining a thread that will -wait a second and will unblock the client. Let's check the reply and -timeout callbacks, which are in our case very similar, since they -just reply the client with a different reply type. - - int reply_func(RedisModuleCtx *ctx, RedisModuleString **argv, - int argc) - { - return RedisModule_ReplyWithSimpleString(ctx,"Hello!"); - } - - int timeout_func(RedisModuleCtx *ctx, RedisModuleString **argv, - int argc) - { - return RedisModule_ReplyWithNull(ctx); - } - -The reply callback just sends the "Hello!" string to the client. -The important bit here is that the reply callback is called when the -client is unblocked from the thread. - -The timeout command returns `NULL`, as it often happens with actual -Redis blocking commands timing out. - -Passing reply data when unblocking ---- - -The above example is simple to understand but lacks an important -real world aspect of an actual blocking command implementation: often -the reply function will need to know what to reply to the client, -and this information is often provided as the client is unblocked. - -We could modify the above example so that the thread generates a -random number after waiting one second. You can think at it as an -actually expansive operation of some kind. Then this random number -can be passed to the reply function so that we return it to the command -caller. In order to make this working, we modify the functions as follow: - - void *threadmain(void *arg) { - RedisModuleBlockedClient *bc = arg; - - sleep(1); /* Wait one second and unblock. */ - - long *mynumber = RedisModule_Alloc(sizeof(long)); - *mynumber = rand(); - RedisModule_UnblockClient(bc,mynumber); - } - -As you can see, now the unblocking call is passing some private data, -that is the `mynumber` pointer, to the reply callback. In order to -obtain this private data, the reply callback will use the following -fnuction: - - void *RedisModule_GetBlockedClientPrivateData(RedisModuleCtx *ctx); - -So our reply callback is modified like that: - - int reply_func(RedisModuleCtx *ctx, RedisModuleString **argv, - int argc) - { - long *mynumber = RedisModule_GetBlockedClientPrivateData(ctx); - /* IMPORTANT: don't free mynumber here, but in the - * free privdata callback. */ - return RedisModule_ReplyWithLongLong(ctx,mynumber); - } - -Note that we also need to pass a `free_privdata` function when blocking -the client with `RedisModule_BlockClient()`, since the allocated -long value must be freed. Our callback will look like the following: - - void free_privdata(void *privdata) { - RedisModule_Free(privdata); - } - -NOTE: It is important to stress that the private data is best freed in the -`free_privdata` callback becaues the reply function may not be called -if the client disconnects or timeout. - -Also note that the private data is also accessible from the timeout -callback, always using the `GetBlockedClientPrivateData()` API. - -Aborting the blocking of a client ---- - -One problem that sometimes arises is that we need to allocate resources -in order to implement the non blocking command. So we block the client, -then, for example, try to create a thread, but the thread creation function -returns an error. What to do in such a condition in order to recover? We -don't want to take the client blocked, nor we want to call `UnblockClient()` -because this will trigger the reply callback to be called. - -In this case the best thing to do is to use the following function: - - int RedisModule_AbortBlock(RedisModuleBlockedClient *bc); - -Practically this is how to use it: - - int Example_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, - int argc) - { - RedisModuleBlockedClient *bc = - RedisModule_BlockClient(ctx,reply_func,timeout_func,NULL,0); - - pthread_t tid; - if (pthread_create(&tid,NULL,threadmain,bc) != 0) { - RedisModule_AbortBlock(bc); - RedisModule_ReplyWithError(ctx,"Sorry can't create a thread"); - } - - return REDISMODULE_OK; - } - -The client will be unblocked but the reply callback will not be called. - -Implementing the command, reply and timeout callback using a single function ---- - -The following functions can be used in order to implement the reply and -callback with the same function that implements the primary command -function: - - int RedisModule_IsBlockedReplyRequest(RedisModuleCtx *ctx); - int RedisModule_IsBlockedTimeoutRequest(RedisModuleCtx *ctx); - -So I could rewrite the example command without using a separated -reply and timeout callback: - - int Example_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, - int argc) - { - if (RedisModule_IsBlockedReplyRequest(ctx)) { - long *mynumber = RedisModule_GetBlockedClientPrivateData(ctx); - return RedisModule_ReplyWithLongLong(ctx,mynumber); - } else if (RedisModule_IsBlockedTimeoutRequest) { - return RedisModule_ReplyWithNull(ctx); - } - - RedisModuleBlockedClient *bc = - RedisModule_BlockClient(ctx,reply_func,timeout_func,NULL,0); - - pthread_t tid; - if (pthread_create(&tid,NULL,threadmain,bc) != 0) { - RedisModule_AbortBlock(bc); - RedisModule_ReplyWithError(ctx,"Sorry can't create a thread"); - } - - return REDISMODULE_OK; - } - -Functionally is the same but there are people that will prefer the less -verbose implementation that concentrates most of the command logic in a -single function. - -Working on copies of data inside a thread ---- - -An interesting pattern in order to work with threads implementing the -slow part of a command, is to work with a copy of the data, so that -while some operation is performed in a key, the user continues to see -the old version. However when the thread terminated its work, the -representations are swapped and the new, processed version, is used. - -An example of this approach is the -[Neural Redis module](https://github.com/antirez/neural-redis) -where neural networks are trained in different threads while the -user can still execute and inspect their older versions. - -Future work ---- - -An API is work in progress right now in order to allow Redis modules APIs -to be called in a safe way from threads, so that the threaded command -can access the data space and do incremental operations. - -There is no ETA for this feature but it may appear in the course of the -Redis 4.0 release at some point. diff --git a/src/modules/INTRO.md b/src/modules/INTRO.md deleted file mode 100644 index 3ac6a4673..000000000 --- a/src/modules/INTRO.md +++ /dev/null @@ -1,857 +0,0 @@ -Redis Modules: an introduction to the API -=== - -The modules documentation is composed of the following files: - -* `INTRO.md` (this file). An overview about Redis Modules system and API. It's a good idea to start your reading here. -* `API.md` is generated from module.c top comments of RedisMoule functions. It is a good reference in order to understand how each function works. -* `TYPES.md` covers the implementation of native data types into modules. -* `BLOCK.md` shows how to write blocking commands that will not reply immediately, but will block the client, without blocking the Redis server, and will provide a reply whenever will be possible. - -Redis modules make possible to extend Redis functionality using external -modules, implementing new Redis commands at a speed and with features -similar to what can be done inside the core itself. - -Redis modules are dynamic libraries, that can be loaded into Redis at -startup or using the `MODULE LOAD` command. Redis exports a C API, in the -form of a single C header file called `redismodule.h`. Modules are meant -to be written in C, however it will be possible to use C++ or other languages -that have C binding functionalities. - -Modules are designed in order to be loaded into different versions of Redis, -so a given module does not need to be designed, or recompiled, in order to -run with a specific version of Redis. For this reason, the module will -register to the Redis core using a specific API version. The current API -version is "1". - -This document is about an alpha version of Redis modules. API, functionalities -and other details may change in the future. - -# Loading modules - -In order to test the module you are developing, you can load the module -using the following `redis.conf` configuration directive: - - loadmodule /path/to/mymodule.so - -It is also possible to load a module at runtime using the following command: - - MODULE LOAD /path/to/mymodule.so - -In order to list all loaded modules, use: - - MODULE LIST - -Finally, you can unload (and later reload if you wish) a module using the -following command: - - MODULE UNLOAD mymodule - -Note that `mymodule` above is not the filename without the `.so` suffix, but -instead, the name the module used to register itself into the Redis core. -The name can be obtained using `MODULE LIST`. However it is good practice -that the filename of the dynamic library is the same as the name the module -uses to register itself into the Redis core. - -# The simplest module you can write - -In order to show the different parts of a module, here we'll show a very -simple module that implements a command that outputs a random number. - - #include "redismodule.h" - #include - - int HelloworldRand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { - RedisModule_ReplyWithLongLong(ctx,rand()); - return REDISMODULE_OK; - } - - int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { - if (RedisModule_Init(ctx,"helloworld",1,REDISMODULE_APIVER_1) - == REDISMODULE_ERR) return REDISMODULE_ERR; - - if (RedisModule_CreateCommand(ctx,"helloworld.rand", - HelloworldRand_RedisCommand) == REDISMODULE_ERR) - return REDISMODULE_ERR; - - return REDISMODULE_OK; - } - -The example module has two functions. One implements a command called -HELLOWORLD.RAND. This function is specific of that module. However the -other function called `RedisModule_OnLoad()` must be present in each -Redis module. It is the entry point for the module to be initialized, -register its commands, and potentially other private data structures -it uses. - -Note that it is a good idea for modules to call commands with the -name of the module followed by a dot, and finally the command name, -like in the case of `HELLOWORLD.RAND`. This way it is less likely to -have collisions. - -Note that if different modules have colliding commands, they'll not be -able to work in Redis at the same time, since the function -`RedisModule_CreateCommand` will fail in one of the modules, so the module -loading will abort returning an error condition. - -# Module initialization - -The above example shows the usage of the function `RedisModule_Init()`. -It should be the first function called by the module `OnLoad` function. -The following is the function prototype: - - int RedisModule_Init(RedisModuleCtx *ctx, const char *modulename, - int module_version, int api_version); - -The `Init` function announces the Redis core that the module has a given -name, its version (that is reported by `MODULE LIST`), and that is willing -to use a specific version of the API. - -If the API version is wrong, the name is already taken, or there are other -similar errors, the function will return `REDISMODULE_ERR`, and the module -`OnLoad` function should return ASAP with an error. - -Before the `Init` function is called, no other API function can be called, -otherwise the module will segfault and the Redis instance will crash. - -The second function called, `RedisModule_CreateCommand`, is used in order -to register commands into the Redis core. The following is the prototype: - - int RedisModule_CreateCommand(RedisModuleCtx *ctx, const char *cmdname, - RedisModuleCmdFunc cmdfunc); - -As you can see, most Redis modules API calls all take as first argument -the `context` of the module, so that they have a reference to the module -calling it, to the command and client executing a given command, and so forth. - -To create a new command, the above function needs the context, the command -name, and the function pointer of the function implementing the command, -which must have the following prototype: - - - int mycommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); - -The command function arguments are just the context, that will be passed -to all the other API calls, the command argument vector, and total number -of arguments, as passed by the user. - -As you can see, the arguments are provided as pointers to a specific data -type, the `RedisModuleString`. This is an opaque data type you have API -functions to access and use, direct access to its fields is never needed. - -Zooming into the example command implementation, we can find another call: - - int RedisModule_ReplyWithLongLong(RedisModuleCtx *ctx, long long integer); - -This function returns an integer to the client that invoked the command, -exactly like other Redis commands do, like for example `INCR` or `SCARD`. - -# Setup and dependencies of a Redis module - -Redis modules don't depend on Redis or some other library, nor they -need to be compiled with a specific `redismodule.h` file. In order -to create a new module, just copy a recent version of `redismodule.h` -in your source tree, link all the libraries you want, and create -a dynamic library having the `RedisModule_OnLoad()` function symbol -exported. - -The module will be able to load into different versions of Redis. - -# Passing configuration parameters to Redis modules - -When the module is loaded with the `MODULE LOAD` command, or using the -`loadmodule` directive in the `redis.conf` file, the user is able to pass -configuration parameters to the module by adding arguments after the module -file name: - - loadmodule mymodule.so foo bar 1234 - -In the above example the strings `foo`, `bar` and `123` will be passed -to the module `OnLoad()` function in the `argv` argument as an array -of RedisModuleString pointers. The number of arguments passed is into `argc`. - -The way you can access those strings will be explained in the rest of this -document. Normally the module will store the module configuration parameters -in some `static` global variable that can be accessed module wide, so that -the configuration can change the behavior of different commands. - -# Working with RedisModuleString objects - -The command argument vector `argv` passed to module commands, and the -return value of other module APIs functions, are of type `RedisModuleString`. - -Usually you directly pass module strings to other API calls, however sometimes -you may need to directly access the string object. - -There are a few functions in order to work with string objects: - - const char *RedisModule_StringPtrLen(RedisModuleString *string, size_t *len); - -The above function accesses a string by returning its pointer and setting its -length in `len`. -You should never write to a string object pointer, as you can see from the -`const` pointer qualifier. - -However, if you want, you can create new string objects using the following -API: - - RedisModuleString *RedisModule_CreateString(RedisModuleCtx *ctx, const char *ptr, size_t len); - -The string returned by the above command must be freed using a corresponding -call to `RedisModule_FreeString()`: - - void RedisModule_FreeString(RedisModuleString *str); - -However if you want to avoid having to free strings, the automatic memory -management, covered later in this document, can be a good alternative, by -doing it for you. - -Note that the strings provided via the argument vector `argv` never need -to be freed. You only need to free new strings you create, or new strings -returned by other APIs, where it is specified that the returned string must -be freed. - -## Creating strings from numbers or parsing strings as numbers - -Creating a new string from an integer is a very common operation, so there -is a function to do this: - - RedisModuleString *mystr = RedisModule_CreateStringFromLongLong(ctx,10); - -Similarly in order to parse a string as a number: - - long long myval; - if (RedisModule_StringToLongLong(ctx,argv[1],&myval) == REDISMODULE_OK) { - /* Do something with 'myval' */ - } - -## Accessing Redis keys from modules - -Most Redis modules, in order to be useful, have to interact with the Redis -data space (this is not always true, for example an ID generator may -never touch Redis keys). Redis modules have two different APIs in order to -access the Redis data space, one is a low level API that provides very -fast access and a set of functions to manipulate Redis data structures. -The other API is more high level, and allows to call Redis commands and -fetch the result, similarly to how Lua scripts access Redis. - -The high level API is also useful in order to access Redis functionalities -that are not available as APIs. - -In general modules developers should prefer the low level API, because commands -implemented using the low level API run at a speed comparable to the speed -of native Redis commands. However there are definitely use cases for the -higher level API. For example often the bottleneck could be processing the -data and not accessing it. - -Also note that sometimes using the low level API is not harder compared to -the higher level one. - -# Calling Redis commands - -The high level API to access Redis is the sum of the `RedisModule_Call()` -function, together with the functions needed in order to access the -reply object returned by `Call()`. - -`RedisModule_Call` uses a special calling convention, with a format specifier -that is used to specify what kind of objects you are passing as arguments -to the function. - -Redis commands are invoked just using a command name and a list of arguments. -However when calling commands, the arguments may originate from different -kind of strings: null-terminated C strings, RedisModuleString objects as -received from the `argv` parameter in the command implementation, binary -safe C buffers with a pointer and a length, and so forth. - -For example if I want to call `INCRBY` using a first argument (the key) -a string received in the argument vector `argv`, which is an array -of RedisModuleString object pointers, and a C string representing the -number "10" as second argument (the increment), I'll use the following -function call: - - RedisModuleCallReply *reply; - reply = RedisModule_Call(ctx,"INCR","sc",argv[1],"10"); - -The first argument is the context, and the second is always a null terminated -C string with the command name. The third argument is the format specifier -where each character corresponds to the type of the arguments that will follow. -In the above case `"sc"` means a RedisModuleString object, and a null -terminated C string. The other arguments are just the two arguments as -specified. In fact `argv[1]` is a RedisModuleString and `"10"` is a null -terminated C string. - -This is the full list of format specifiers: - -* **c** -- Null terminated C string pointer. -* **b** -- C buffer, two arguments needed: C string pointer and `size_t` length. -* **s** -- RedisModuleString as received in `argv` or by other Redis module APIs returning a RedisModuleString object. -* **l** -- Long long integer. -* **v** -- Array of RedisModuleString objects. -* **!** -- This modifier just tells the function to replicate the command to slaves and AOF. It is ignored from the point of view of arguments parsing. - -The function returns a `RedisModuleCallReply` object on success, on -error NULL is returned. - -NULL is returned when the command name is invalid, the format specifier uses -characters that are not recognized, or when the command is called with the -wrong number of arguments. In the above cases the `errno` var is set to `EINVAL`. NULL is also returned when, in an instance with Cluster enabled, the target -keys are about non local hash slots. In this case `errno` is set to `EPERM`. - -## Working with RedisModuleCallReply objects. - -`RedisModuleCall` returns reply objects that can be accessed using the -`RedisModule_CallReply*` family of functions. - -In order to obtain the type or reply (corresponding to one of the data types -supported by the Redis protocol), the function `RedisModule_CallReplyType()` -is used: - - reply = RedisModule_Call(ctx,"INCR","sc",argv[1],"10"); - if (RedisModule_CallReplyType(reply) == REDISMODULE_REPLY_INTEGER) { - long long myval = RedisModule_CallReplyInteger(reply); - /* Do something with myval. */ - } - -Valid reply types are: - -* `REDISMODULE_REPLY_STRING` Bulk string or status replies. -* `REDISMODULE_REPLY_ERROR` Errors. -* `REDISMODULE_REPLY_INTEGER` Signed 64 bit integers. -* `REDISMODULE_REPLY_ARRAY` Array of replies. -* `REDISMODULE_REPLY_NULL` NULL reply. - -Strings, errors and arrays have an associated length. For strings and errors -the length corresponds to the length of the string. For arrays the length -is the number of elements. To obtain the reply length the following function -is used: - - size_t reply_len = RedisModule_CallReplyLength(reply); - -In order to obtain the value of an integer reply, the following function is used, as already shown in the example above: - - long long reply_integer_val = RedisModule_CallReplyInteger(reply); - -Called with a reply object of the wrong type, the above function always -returns `LLONG_MIN`. - -Sub elements of array replies are accessed this way: - - RedisModuleCallReply *subreply; - subreply = RedisModule_CallReplyArrayElement(reply,idx); - -The above function returns NULL if you try to access out of range elements. - -Strings and errors (which are like strings but with a different type) can -be accessed using in the following way, making sure to never write to -the resulting pointer (that is returned as as `const` pointer so that -misusing must be pretty explicit): - - size_t len; - char *ptr = RedisModule_CallReplyStringPtr(reply,&len); - -If the reply type is not a string or an error, NULL is returned. - -RedisCallReply objects are not the same as module string objects -(RedisModuleString types). However sometimes you may need to pass replies -of type string or integer, to API functions expecting a module string. - -When this is the case, you may want to evaluate if using the low level -API could be a simpler way to implement your command, or you can use -the following function in order to create a new string object from a -call reply of type string, error or integer: - - RedisModuleString *mystr = RedisModule_CreateStringFromCallReply(myreply); - -If the reply is not of the right type, NULL is returned. -The returned string object should be released with `RedisModule_FreeString()` -as usually, or by enabling automatic memory management (see corresponding -section). - -# Releasing call reply objects - -Reply objects must be freed using `RedisModule_FreeCallReply`. For arrays, -you need to free only the top level reply, not the nested replies. -Currently the module implementation provides a protection in order to avoid -crashing if you free a nested reply object for error, however this feature -is not guaranteed to be here forever, so should not be considered part -of the API. - -If you use automatic memory management (explained later in this document) -you don't need to free replies (but you still could if you wish to release -memory ASAP). - -## Returning values from Redis commands - -Like normal Redis commands, new commands implemented via modules must be -able to return values to the caller. The API exports a set of functions for -this goal, in order to return the usual types of the Redis protocol, and -arrays of such types as elemented. Also errors can be returned with any -error string and code (the error code is the initial uppercase letters in -the error message, like the "BUSY" string in the "BUSY the sever is busy" error -message). - -All the functions to send a reply to the client are called -`RedisModule_ReplyWith`. - -To return an error, use: - - RedisModule_ReplyWithError(RedisModuleCtx *ctx, const char *err); - -There is a predefined error string for key of wrong type errors: - - REDISMODULE_ERRORMSG_WRONGTYPE - -Example usage: - - RedisModule_ReplyWithError(ctx,"ERR invalid arguments"); - -We already saw how to reply with a long long in the examples above: - - RedisModule_ReplyWithLongLong(ctx,12345); - -To reply with a simple string, that can't contain binary values or newlines, -(so it's suitable to send small words, like "OK") we use: - - RedisModule_ReplyWithSimpleString(ctx,"OK"); - -It's possible to reply with "bulk strings" that are binary safe, using -two different functions: - - int RedisModule_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len); - - int RedisModule_ReplyWithString(RedisModuleCtx *ctx, RedisModuleString *str); - -The first function gets a C pointer and length. The second a RedisMoudleString -object. Use one or the other depending on the source type you have at hand. - -In order to reply with an array, you just need to use a function to emit the -array length, followed by as many calls to the above functions as the number -of elements of the array are: - - RedisModule_ReplyWithArray(ctx,2); - RedisModule_ReplyWithStringBuffer(ctx,"age",3); - RedisModule_ReplyWithLongLong(ctx,22); - -To return nested arrays is easy, your nested array element just uses another -call to `RedisModule_ReplyWithArray()` followed by the calls to emit the -sub array elements. - -## Returning arrays with dynamic length - -Sometimes it is not possible to know beforehand the number of items of -an array. As an example, think of a Redis module implementing a FACTOR -command that given a number outputs the prime factors. Instead of -factorializing the number, storing the prime factors into an array, and -later produce the command reply, a better solution is to start an array -reply where the length is not known, and set it later. This is accomplished -with a special argument to `RedisModule_ReplyWithArray()`: - - RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN); - -The above call starts an array reply so we can use other `ReplyWith` calls -in order to produce the array items. Finally in order to set the length -se use the following call: - - RedisModule_ReplySetArrayLength(ctx, number_of_items); - -In the case of the FACTOR command, this translates to some code similar -to this: - - RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN); - number_of_factors = 0; - while(still_factors) { - RedisModule_ReplyWithLongLong(ctx, some_factor); - number_of_factors++; - } - RedisModule_ReplySetArrayLength(ctx, number_of_factors); - -Another common use case for this feature is iterating over the arrays of -some collection and only returning the ones passing some kind of filtering. - -It is possible to have multiple nested arrays with postponed reply. -Each call to `SetArray()` will set the length of the latest corresponding -call to `ReplyWithArray()`: - - RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN); - ... generate 100 elements ... - RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN); - ... generate 10 elements ... - RedisModule_ReplySetArrayLength(ctx, 10); - RedisModule_ReplySetArrayLength(ctx, 100); - -This creates a 100 items array having as last element a 10 items array. - -# Arity and type checks - -Often commands need to check that the number of arguments and type of the key -is correct. In order to report a wrong arity, there is a specific function -called `RedisModule_WrongArity()`. The usage is trivial: - - if (argc != 2) return RedisModule_WrongArity(ctx); - -Checking for the wrong type involves opening the key and checking the type: - - RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1], - REDISMODULE_READ|REDISMODULE_WRITE); - - int keytype = RedisModule_KeyType(key); - if (keytype != REDISMODULE_KEYTYPE_STRING && - keytype != REDISMODULE_KEYTYPE_EMPTY) - { - RedisModule_CloseKey(key); - return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE); - } - -Note that you often want to proceed with a command both if the key -is of the expected type, or if it's empty. - -## Low level access to keys - -Low level access to keys allow to perform operations on value objects associated -to keys directly, with a speed similar to what Redis uses internally to -implement the built-in commands. - -Once a key is opened, a key pointer is returned that will be used with all the -other low level API calls in order to perform operations on the key or its -associated value. - -Because the API is meant to be very fast, it cannot do too many run-time -checks, so the user must be aware of certain rules to follow: - -* Opening the same key multiple times where at least one instance is opened for writing, is undefined and may lead to crashes. -* While a key is open, it should only be accessed via the low level key API. For example opening a key, then calling DEL on the same key using the `RedisModule_Call()` API will result into a crash. However it is safe to open a key, perform some operation with the low level API, closing it, then using other APIs to manage the same key, and later opening it again to do some more work. - -In order to open a key the `RedisModule_OpenKey` function is used. It returns -a key pointer, that we'll use with all the next calls to access and modify -the value: - - RedisModuleKey *key; - key = RedisModule_OpenKey(ctx,argv[1],REDISMODULE_READ); - -The second argument is the key name, that must be a `RedisModuleString` object. -The third argument is the mode: `REDISMODULE_READ` or `REDISMODULE_WRITE`. -It is possible to use `|` to bitwise OR the two modes to open the key in -both modes. Currently a key opened for writing can also be accessed for reading -but this is to be considered an implementation detail. The right mode should -be used in sane modules. - -You can open non exisitng keys for writing, since the keys will be created -when an attempt to write to the key is performed. However when opening keys -just for reading, `RedisModule_OpenKey` will return NULL if the key does not -exist. - -Once you are done using a key, you can close it with: - - RedisModule_CloseKey(key); - -Note that if automatic memory management is enabled, you are not forced to -close keys. When the module function returns, Redis will take care to close -all the keys which are still open. - -## Getting the key type - -In order to obtain the value of a key, use the `RedisModule_KeyType()` function: - - int keytype = RedisModule_KeyType(key); - -It returns one of the following values: - - REDISMODULE_KEYTYPE_EMPTY - REDISMODULE_KEYTYPE_STRING - REDISMODULE_KEYTYPE_LIST - REDISMODULE_KEYTYPE_HASH - REDISMODULE_KEYTYPE_SET - REDISMODULE_KEYTYPE_ZSET - -The above are just the usual Redis key types, with the addition of an empty -type, that signals the key pointer is associated with an empty key that -does not yet exists. - -## Creating new keys - -To create a new key, open it for writing and then write to it using one -of the key writing functions. Example: - - RedisModuleKey *key; - key = RedisModule_OpenKey(ctx,argv[1],REDISMODULE_READ); - if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_EMPTY) { - RedisModule_StringSet(key,argv[2]); - } - -## Deleting keys - -Just use: - - RedisModule_DeleteKey(key); - -The function returns `REDISMODULE_ERR` if the key is not open for writing. -Note that after a key gets deleted, it is setup in order to be targeted -by new key commands. For example `RedisModule_KeyType()` will return it is -an empty key, and writing to it will create a new key, possibly of another -type (depending on the API used). - -## Managing key expires (TTLs) - -To control key expires two functions are provided, that are able to set, -modify, get, and unset the time to live associated with a key. - -One function is used in order to query the current expire of an open key: - - mstime_t RedisModule_GetExpire(RedisModuleKey *key); - -The function returns the time to live of the key in milliseconds, or -`REDISMODULE_NO_EXPIRE` as a special value to signal the key has no associated -expire or does not exist at all (you can differentiate the two cases checking -if the key type is `REDISMODULE_KEYTYPE_EMPTY`). - -In order to change the expire of a key the following function is used instead: - - int RedisModule_SetExpire(RedisModuleKey *key, mstime_t expire); - -When called on a non existing key, `REDISMODULE_ERR` is returned, because -the function can only associate expires to existing open keys (non existing -open keys are only useful in order to create new values with data type -specific write operations). - -Again the `expire` time is specified in milliseconds. If the key has currently -no expire, a new expire is set. If the key already have an expire, it is -replaced with the new value. - -If the key has an expire, and the special value `REDISMODULE_NO_EXPIRE` is -used as a new expire, the expire is removed, similarly to the Redis -`PERSIST` command. In case the key was already persistent, no operation is -performed. - -## Obtaining the length of values - -There is a single function in order to retrieve the length of the value -associated to an open key. The returned length is value-specific, and is -the string length for strings, and the number of elements for the aggregated -data types (how many elements there is in a list, set, sorted set, hash). - - size_t len = RedisModule_ValueLength(key); - -If the key does not exist, 0 is returned by the function: - -## String type API - -Setting a new string value, like the Redis `SET` command does, is performed -using: - - int RedisModule_StringSet(RedisModuleKey *key, RedisModuleString *str); - -The function works exactly like the Redis `SET` command itself, that is, if -there is a prior value (of any type) it will be deleted. - -Accessing existing string values is performed using DMA (direct memory -access) for speed. The API will return a pointer and a length, so that's -possible to access and, if needed, modify the string directly. - - size_t len, j; - char *myptr = RedisModule_StringDMA(key,&len,REDISMODULE_WRITE); - for (j = 0; j < len; j++) myptr[j] = 'A'; - -In the above example we write directly on the string. Note that if you want -to write, you must be sure to ask for `WRITE` mode. - -DMA pointers are only valid if no other operations are performed with the key -before using the pointer, after the DMA call. - -Sometimes when we want to manipulate strings directly, we need to change -their size as well. For this scope, the `RedisModule_StringTruncate` function -is used. Example: - - RedisModule_StringTruncate(mykey,1024); - -The function truncates, or enlarges the string as needed, padding it with -zero bytes if the previos length is smaller than the new length we request. -If the string does not exist since `key` is associated to an open empty key, -a string value is created and associated to the key. - -Note that every time `StringTruncate()` is called, we need to re-obtain -the DMA pointer again, since the old may be invalid. - -## List type API - -It's possible to push and pop values from list values: - - int RedisModule_ListPush(RedisModuleKey *key, int where, RedisModuleString *ele); - RedisModuleString *RedisModule_ListPop(RedisModuleKey *key, int where); - -In both the APIs the `where` argument specifies if to push or pop from tail -or head, using the following macros: - - REDISMODULE_LIST_HEAD - REDISMODULE_LIST_TAIL - -Elements returned by `RedisModule_ListPop()` are like strings craeted with -`RedisModule_CreateString()`, they must be released with -`RedisModule_FreeString()` or by enabling automatic memory management. - -## Set type API - -Work in progress. - -## Sorted set type API - -Documentation missing, please refer to the top comments inside `module.c` -for the following functions: - -* `RedisModule_ZsetAdd` -* `RedisModule_ZsetIncrby` -* `RedisModule_ZsetScore` -* `RedisModule_ZsetRem` - -And for the sorted set iterator: - -* `RedisModule_ZsetRangeStop` -* `RedisModule_ZsetFirstInScoreRange` -* `RedisModule_ZsetLastInScoreRange` -* `RedisModule_ZsetFirstInLexRange` -* `RedisModule_ZsetLastInLexRange` -* `RedisModule_ZsetRangeCurrentElement` -* `RedisModule_ZsetRangeNext` -* `RedisModule_ZsetRangePrev` -* `RedisModule_ZsetRangeEndReached` - -## Hash type API - -Documentation missing, please refer to the top comments inside `module.c` -for the following functions: - -* `RedisModule_HashSet` -* `RedisModule_HashGet` - -## Iterating aggregated values - -Work in progress. - -# Replicating commands - -If you want to use module commands exactly like normal Redis commands, in the -context of replicated Redis instances, or using the AOF file for persistence, -it is important for module commands to handle their replication in a consistent -way. - -When using the higher level APIs to invoke commands, replication happens -automatically if you use the "!" modifier in the format string of -`RedisModule_Call()` as in the following example: - - reply = RedisModule_Call(ctx,"INCR","!sc",argv[1],"10"); - -As you can see the format specifier is `"!sc"`. The bang is not parsed as a -format specifier, but it internally flags the command as "must replicate". - -If you use the above programming style, there are no problems. -However sometimes things are more complex than that, and you use the low level -API. In this case, if there are no side effects in the command execution, and -it consistently always performs the same work, what is possible to do is to -replicate the command verbatim as the user executed it. To do that, you just -need to call the following function: - - RedisModule_ReplicateVerbatim(ctx); - -When you use the above API, you should not use any other replication function -since they are not guaranteed to mix well. - -However this is not the only option. It's also possible to exactly tell -Redis what commands to replicate as the effect of the command execution, using -an API similar to `RedisModule_Call()` but that instead of calling the command -sends it to the AOF / slaves stream. Example: - - RedisModule_Replicate(ctx,"INCRBY","cl","foo",my_increment); - -It's possible to call `RedisModule_Replicate` multiple times, and each -will emit a command. All the sequence emitted is wrapped between a -`MULTI/EXEC` transaction, so that the AOF and replication effects are the -same as executing a single command. - -Note that `Call()` replication and `Replicate()` replication have a rule, -in case you want to mix both forms of replication (not necessarily a good -idea if there are simpler approaches). Commands replicated with `Call()` -are always the first emitted in the final `MULTI/EXEC` block, while all -the commands emitted with `Replicate()` will follow. - -# Automatic memory management - -Normally when writing programs in the C language, programmers need to manage -memory manually. This is why the Redis modules API has functions to release -strings, close open keys, free replies, and so forth. - -However given that commands are executed in a contained environment and -with a set of strict APIs, Redis is able to provide automatic memory management -to modules, at the cost of some performance (most of the time, a very low -cost). - -When automatic memory management is enabled: - -1. You don't need to close open keys. -2. You don't need to free replies. -3. You don't need to free RedisModuleString objects. - -However you can still do it, if you want. For example, automatic memory -management may be active, but inside a loop allocating a lot of strings, -you may still want to free strings no longer used. - -In order to enable automatic memory management, just call the following -function at the start of the command implementation: - - RedisModule_AutoMemory(ctx); - -Automatic memory management is usually the way to go, however experienced -C programmers may not use it in order to gain some speed and memory usage -benefit. - -# Allocating memory into modules - -Normal C programs use `malloc()` and `free()` in order to allocate and -release memory dynamically. While in Redis modules the use of malloc is -not technically forbidden, it is a lot better to use the Redis Modules -specific functions, that are exact replacements for `malloc`, `free`, -`realloc` and `strdup`. These functions are: - - void *RedisModule_Alloc(size_t bytes); - void* RedisModule_Realloc(void *ptr, size_t bytes); - void RedisModule_Free(void *ptr); - void RedisModule_Calloc(size_t nmemb, size_t size); - char *RedisModule_Strdup(const char *str); - -They work exactly like their `libc` equivalent calls, however they use -the same allocator Redis uses, and the memory allocated using these -functions is reported by the `INFO` command in the memory section, is -accounted when enforcing the `maxmemory` policy, and in general is -a first citizen of the Redis executable. On the contrar, the method -allocated inside modules with libc `malloc()` is transparent to Redis. - -Another reason to use the modules functions in order to allocate memory -is that, when creating native data types inside modules, the RDB loading -functions can return deserialized strings (from the RDB file) directly -as `RedisModule_Alloc()` allocations, so they can be used directly to -populate data structures after loading, instead of having to copy them -to the data structure. - -## Pool allocator - -Sometimes in commands implementations, it is required to perform many -small allocations that will be not retained at the end of the command -execution, but are just functional to execute the command itself. - -This work can be more easily accomplished using the Redis pool allocator: - - void *RedisModule_PoolAlloc(RedisModuleCtx *ctx, size_t bytes); - -It works similarly to `malloc()`, and returns memory aligned to the -next power of two of greater or equal to `bytes` (for a maximum alignment -of 8 bytes). However it allocates memory in blocks, so it the overhead -of the allocations is small, and more important, the memory allocated -is automatically released when the command returns. - -So in general short living allocations are a good candidates for the pool -allocator. - -# Writing commands compatible with Redis Cluster - -Documentation missing, please check the following functions inside `module.c`: - - RedisModule_IsKeysPositionRequest(ctx); - RedisModule_KeyAtPos(ctx,pos); diff --git a/src/modules/TYPES.md b/src/modules/TYPES.md deleted file mode 100644 index 4d497356a..000000000 --- a/src/modules/TYPES.md +++ /dev/null @@ -1,379 +0,0 @@ -Native types in Redis modules -=== - -Redis modules can access Redis built-in data structures both at high level, -by calling Redis commands, and at low level, by manipulating the data structures -directly. - -By using these capabilities in order to build new abstractions on top of existing -Redis data structures, or by using strings DMA in order to encode modules -data structures into Redis strings, it is possible to create modules that -*feel like* they are exporting new data types. However, for more complex -problems, this is not enough, and the implementation of new data structures -inside the module is needed. - -We call the ability of Redis modules to implement new data structures that -feel like native Redis ones **native types support**. This document describes -the API exported by the Redis modules system in order to create new data -structures and handle the serialization in RDB files, the rewriting process -in AOF, the type reporting via the `TYPE` command, and so forth. - -Overview of native types ---- - -A module exporting a native type is composed of the following main parts: - -* The implementation of some kind of new data structure and of commands operating on the new data structure. -* A set of callbacks that handle: RDB saving, RDB loading, AOF rewriting, releasing of a value associated with a key, calculation of a value digest (hash) to be used with the `DEBUG DIGEST` command. -* A 9 characters name that is unique to each module native data type. -* An encoding version, used to persist into RDB files a module-specific data version, so that a module will be able to load older representations from RDB files. - -While to handle RDB loading, saving and AOF rewriting may look complex as a first glance, the modules API provide very high level function for handling all this, without requiring the user to handle read/write errors, so in practical terms, writing a new data structure for Redis is a simple task. - -A **very easy** to understand but complete example of native type implementation -is available inside the Redis distribution in the `/modules/hellotype.c` file. -The reader is encouraged to read the documentation by looking at this example -implementation to see how things are applied in the practice. - -Registering a new data type -=== - -In order to register a new native type into the Redis core, the module needs -to declare a global variable that will hold a reference to the data type. -The API to register the data type will return a data type reference that will -be stored in the global variable. - - static RedisModuleType *MyType; - #define MYTYPE_ENCODING_VERSION 0 - - int RedisModule_OnLoad(RedisModuleCtx *ctx) { - RedisModuleTypeMethods tm = { - .version = REDISMODULE_TYPE_METHOD_VERSION, - .rdb_load = MyTypeRDBLoad, - .rdb_save = MyTypeRDBSave, - .aof_rewrite = MyTypeAOFRewrite, - .free = MyTypeFree - }; - - MyType = RedisModule_CreateDataType(ctx, "MyType-AZ", - MYTYPE_ENCODING_VERSION, &tm); - if (MyType == NULL) return REDISMODULE_ERR; - } - -As you can see from the example above, a single API call is needed in order to -register the new type. However a number of function pointers are passed as -arguments. Certain are optionals while some are mandatory. The above set -of methods *must* be passed, while `.digest` and `.mem_usage` are optional -and are currently not actually supported by the modules internals, so for -now you can just ignore them. - -The `ctx` argument is the context that we receive in the `OnLoad` function. -The type `name` is a 9 character name in the character set that includes -from `A-Z`, `a-z`, `0-9`, plus the underscore `_` and minus `-` characters. - -Note that **this name must be unique** for each data type in the Redis -ecosystem, so be creative, use both lower-case and upper case if it makes -sense, and try to use the convention of mixing the type name with the name -of the author of the module, to create a 9 character unique name. - -**NOTE:** It is very important that the name is exactly 9 chars or the -registration of the type will fail. Read more to understand why. - -For example if I'm building a *b-tree* data structure and my name is *antirez* -I'll call my type **btree1-az**. The name, converted to a 64 bit integer, -is stored inside the RDB file when saving the type, and will be used when the -RDB data is loaded in order to resolve what module can load the data. If Redis -finds no matching module, the integer is converted back to a name in order to -provide some clue to the user about what module is missing in order to load -the data. - -The type name is also used as a reply for the `TYPE` command when called -with a key holding the registered type. - -The `encver` argument is the encoding version used by the module to store data -inside the RDB file. For example I can start with an encoding version of 0, -but later when I release version 2.0 of my module, I can switch encoding to -something better. The new module will register with an encoding version of 1, -so when it saves new RDB files, the new version will be stored on disk. However -when loading RDB files, the module `rdb_load` method will be called even if -there is data found for a different encoding version (and the encoding version -is passed as argument to `rdb_load`), so that the module can still load old -RDB files. - -The last argument is a structure used in order to pass the type methods to the -registration function: `rdb_load`, `rdb_save`, `aof_rewrite`, `digest` and -`free` and `mem_usage` are all callbacks with the following prototypes and uses: - - typedef void *(*RedisModuleTypeLoadFunc)(RedisModuleIO *rdb, int encver); - typedef void (*RedisModuleTypeSaveFunc)(RedisModuleIO *rdb, void *value); - typedef void (*RedisModuleTypeRewriteFunc)(RedisModuleIO *aof, RedisModuleString *key, void *value); - typedef size_t (*RedisModuleTypeMemUsageFunc)(void *value); - typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value); - typedef void (*RedisModuleTypeFreeFunc)(void *value); - -* `rdb_load` is called when loading data from the RDB file. It loads data in the same format as `rdb_save` produces. -* `rdb_save` is called when saving data to the RDB file. -* `aof_rewrite` is called when the AOF is being rewritten, and the module needs to tell Redis what is the sequence of commands to recreate the content of a given key. -* `digest` is called when `DEBUG DIGEST` is executed and a key holding this module type is found. Currently this is not yet implemented so the function ca be left empty. -* `mem_usage` is called when the `MEMORY` command asks for the total memory consumed by a specific key, and is used in order to get the amount of bytes used by the module value. -* `free` is called when a key with the module native type is deleted via `DEL` or in any other mean, in order to let the module reclaim the memory associated with such a value. - -Ok, but *why* modules types require a 9 characters name? ---- - -Oh, I understand you need to understand this, so here is a very specific -explanation. - -When Redis persists to RDB files, modules specific data types require to -be persisted as well. Now RDB files are sequences of key-value pairs -like the following: - - [1 byte type] [key] [a type specific value] - -The 1 byte type identifies strings, lists, sets, and so forth. In the case -of modules data, it is set to a special value of `module data`, but of -course this is not enough, we need the information needed to link a specific -value with a specific module type that is able to load and handle it. - -So when we save a `type specific value` about a module, we prefix it with -a 64 bit integer. 64 bits is large enough to store the informations needed -in order to lookup the module that can handle that specific type, but is -short enough that we can prefix each module value we store inside the RDB -without making the final RDB file too big. At the same time, this solution -of prefixing the value with a 64 bit *signature* does not require to do -strange things like defining in the RDB header a list of modules specific -types. Everything is pretty simple. - -So, what you can store in 64 bits in order to identify a given module in -a reliable way? Well if you build a character set of 64 symbols, you can -easily store 9 characters of 6 bits, and you are left with 10 bits, that -are used in order to store the *encoding version* of the type, so that -the same type can evolve in the future and provide a different and more -efficient or updated serialization format for RDB files. - -So the 64 bit prefix stored before each module value is like the following: - - 6|6|6|6|6|6|6|6|6|10 - -The first 9 elements are 6-bits characters, the final 10 bits is the -encoding version. - -When the RDB file is loaded back, it reads the 64 bit value, masks the final -10 bits, and searches for a matching module in the modules types cache. -When a matching one is found, the method to load the RDB file value is called -with the 10 bits encoding version as argument, so that the module knows -what version of the data layout to load, if it can support multiple versions. - -Now the interesting thing about all this is that, if instead the module type -cannot be resolved, since there is no loaded module having this signature, -we can convert back the 64 bit value into a 9 characters name, and print -an error to the user that includes the module type name! So that she or he -immediately realizes what's wrong. - -Setting and getting keys ---- - -After registering our new data type in the `RedisModule_OnLoad()` function, -we also need to be able to set Redis keys having as value our native type. - -This normally happens in the context of commands that write data to a key. -The native types API allow to set and get keys to module native data types, -and to test if a given key is already associated to a value of a specific data -type. - -The API uses the normal modules `RedisModule_OpenKey()` low level key access -interface in order to deal with this. This is an eaxmple of setting a -native type private data structure to a Redis key: - - RedisModuleKey *key = RedisModule_OpenKey(ctx,keyname,REDISMODULE_WRITE); - struct some_private_struct *data = createMyDataStructure(); - RedisModule_ModuleTypeSetValue(key,MyType,data); - -The function `RedisModule_ModuleTypeSetValue()` is used with a key handle open -for writing, and gets three arguments: the key handle, the reference to the -native type, as obtained during the type registration, and finally a `void*` -pointer that contains the private data implementing the module native type. - -Note that Redis has no clues at all about what your data contains. It will -just call the callbacks you provided during the method registration in order -to perform operations on the type. - -Similarly we can retrieve the private data from a key using this function: - - struct some_private_struct *data; - data = RedisModule_ModuleTypeGetValue(key); - -We can also test for a key to have our native type as value: - - if (RedisModule_ModuleTypeGetType(key) == MyType) { - /* ... do something ... */ - } - -However for the calls to do the right thing, we need to check if the key -is empty, if it contains a value of the right kind, and so forth. So -the idiomatic code to implement a command writing to our native type -is along these lines: - - RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1], - REDISMODULE_READ|REDISMODULE_WRITE); - int type = RedisModule_KeyType(key); - if (type != REDISMODULE_KEYTYPE_EMPTY && - RedisModule_ModuleTypeGetType(key) != MyType) - { - return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE); - } - -Then if we successfully verified the key is not of the wrong type, and -we are going to write to it, we usually want to create a new data structure if -the key is empty, or retrieve the reference to the value associated to the -key if there is already one: - - /* Create an empty value object if the key is currently empty. */ - struct some_private_struct *data; - if (type == REDISMODULE_KEYTYPE_EMPTY) { - data = createMyDataStructure(); - RedisModule_ModuleTypeSetValue(key,MyTyke,data); - } else { - data = RedisModule_ModuleTypeGetValue(key); - } - /* Do something with 'data'... */ - -Free method ---- - -As already mentioned, when Redis needs to free a key holding a native type -value, it needs help from the module in order to release the memory. This -is the reason why we pass a `free` callback during the type registration: - - typedef void (*RedisModuleTypeFreeFunc)(void *value); - -A trivial implementation of the free method can be something like this, -assuming our data structure is composed of a single allocation: - - void MyTypeFreeCallback(void *value) { - RedisModule_Free(value); - } - -However a more real world one will call some function that performs a more -complex memory reclaiming, by casting the void pointer to some structure -and freeing all the resources composing the value. - -RDB load and save methods ---- - -The RDB saving and loading callbacks need to create (and load back) a -representation of the data type on disk. Redis offers an high level API -that can automatically store inside the RDB file the following types: - -* Unsigned 64 bit integers. -* Signed 64 bit integers. -* Doubles. -* Strings. - -It is up to the module to find a viable representation using the above base -types. However note that while the integer and double values are stored -and loaded in an architecture and *endianess* agnostic way, if you use -the raw string saving API to, for example, save a structure on disk, you -have to care those details yourself. - -This is the list of functions performing RDB saving and loading: - - void RedisModule_SaveUnsigned(RedisModuleIO *io, uint64_t value); - uint64_t RedisModule_LoadUnsigned(RedisModuleIO *io); - void RedisModule_SaveSigned(RedisModuleIO *io, int64_t value); - int64_t RedisModule_LoadSigned(RedisModuleIO *io); - void RedisModule_SaveString(RedisModuleIO *io, RedisModuleString *s); - void RedisModule_SaveStringBuffer(RedisModuleIO *io, const char *str, size_t len); - RedisModuleString *RedisModule_LoadString(RedisModuleIO *io); - char *RedisModule_LoadStringBuffer(RedisModuleIO *io, size_t *lenptr); - void RedisModule_SaveDouble(RedisModuleIO *io, double value); - double RedisModule_LoadDouble(RedisModuleIO *io); - -The functions don't require any error checking from the module, that can -always assume calls succeed. - -As an example, imagine I've a native type that implements an array of -double values, with the following structure: - - struct double_array { - size_t count; - double *values; - }; - -My `rdb_save` method may look like the following: - - void DoubleArrayRDBSave(RedisModuleIO *io, void *ptr) { - struct dobule_array *da = ptr; - RedisModule_SaveUnsigned(io,da->count); - for (size_t j = 0; j < da->count; j++) - RedisModule_SaveDouble(io,da->values[j]); - } - -What we did was to store the number of elements followed by each double -value. So when later we'll have to load the structure in the `rdb_load` -method we'll do something like this: - - void *DoubleArrayRDBLoad(RedisModuleIO *io, int encver) { - if (encver != DOUBLE_ARRAY_ENC_VER) { - /* We should actually log an error here, or try to implement - the ability to load older versions of our data structure. */ - return NULL; - } - - struct double_array *da; - da = RedisModule_Alloc(sizeof(*da)); - da->count = RedisModule_LoadUnsigned(io); - da->values = RedisModule_Alloc(da->count * sizeof(double)); - for (size_t j = 0; j < da->count; j++) - da->values = RedisModule_LoadDouble(io); - return da; - } - -The load callback just reconstruct back the data structure from the data -we stored in the RDB file. - -Note that while there is no error handling on the API that writes and reads -from disk, still the load callback can return NULL on errors in case what -it reads does not look correct. Redis will just panic in that case. - -AOF rewriting ---- - - void RedisModule_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...); - -Handling multiple encodings ---- - - WORK IN PROGRESS - -Allocating memory ---- - -Modules data types should try to use `RedisModule_Alloc()` functions family -in order to allocate, reallocate and release heap memory used to implement the native data structures (see the other Redis Modules documentation for detailed information). - -This is not just useful in order for Redis to be able to account for the memory used by the module, but there are also more advantages: - -* Redis uses the `jemalloc` allcator, that often prevents fragmentation problems that could be caused by using the libc allocator. -* When loading strings from the RDB file, the native types API is able to return strings allocated directly with `RedisModule_Alloc()`, so that the module can directly link this memory into the data structure representation, avoiding an useless copy of the data. - -Even if you are using external libraries implementing your data structures, the -allocation functions provided by the module API is exactly compatible with -`malloc()`, `realloc()`, `free()` and `strdup()`, so converting the libraries -in order to use these functions should be trivial. - -In case you have an external library that uses libc `malloc()`, and you want -to avoid replacing manually all the calls with the Redis Modules API calls, -an approach could be to use simple macros in order to replace the libc calls -with the Redis API calls. Something like this could work: - - #define malloc RedisModule_Alloc - #define realloc RedisModule_Realloc - #define free RedisModule_Free - #define strdup RedisModule_Strdup - -However take in mind that mixing libc calls with Redis API calls will result -into troubles and crashes, so if you replace calls using macros, you need to -make sure that all the calls are correctly replaced, and that the code with -the substituted calls will never, for example, attempt to call -`RedisModule_Free()` with a pointer allocated using libc `malloc()`. From 1ac18c801b64cf87096c936070bc3ef55a6d0a04 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Jul 2017 12:07:52 +0200 Subject: [PATCH 0392/1722] Allow certain modules APIs only defining REDISMODULE_EXPERIMENTAL_API. Those calls may be subject to changes in the future, so the user should acknowledge it is using non stable API. --- src/modules/helloblock.c | 1 + src/redismodule.h | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c index 3ebf10e11..c74fcd30f 100644 --- a/src/modules/helloblock.c +++ b/src/modules/helloblock.c @@ -31,6 +31,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#define REDISMODULE_EXPERIMENTAL_API #include "../redismodule.h" #include #include diff --git a/src/redismodule.h b/src/redismodule.h index dd14c5f4e..7fc0fec40 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -207,20 +207,24 @@ int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, Re void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b); RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetContextFromIO)(RedisModuleIO *io); +long long REDISMODULE_API_FUNC(RedisModule_Milliseconds)(void); +void REDISMODULE_API_FUNC(RedisModule_DigestAddStringBuffer)(RedisModuleDigest *md, unsigned char *ele, size_t len); +void REDISMODULE_API_FUNC(RedisModule_DigestAddLongLong)(RedisModuleDigest *md, long long ele); +void REDISMODULE_API_FUNC(RedisModule_DigestEndSequence)(RedisModuleDigest *md); + +/* Experimental APIs */ +#ifdef REDISMODULE_EXPERIMENTAL_API RedisModuleBlockedClient *REDISMODULE_API_FUNC(RedisModule_BlockClient)(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms); int REDISMODULE_API_FUNC(RedisModule_UnblockClient)(RedisModuleBlockedClient *bc, void *privdata); int REDISMODULE_API_FUNC(RedisModule_IsBlockedReplyRequest)(RedisModuleCtx *ctx); int REDISMODULE_API_FUNC(RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx); void *REDISMODULE_API_FUNC(RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx); int REDISMODULE_API_FUNC(RedisModule_AbortBlock)(RedisModuleBlockedClient *bc); -long long REDISMODULE_API_FUNC(RedisModule_Milliseconds)(void); RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetThreadSafeContext)(RedisModuleBlockedClient *bc); void REDISMODULE_API_FUNC(RedisModule_FreeThreadSafeContext)(RedisModuleCtx *ctx); void REDISMODULE_API_FUNC(RedisModule_ThreadSafeContextLock)(RedisModuleCtx *ctx); void REDISMODULE_API_FUNC(RedisModule_ThreadSafeContextUnlock)(RedisModuleCtx *ctx); -void REDISMODULE_API_FUNC(RedisModule_DigestAddStringBuffer)(RedisModuleDigest *md, unsigned char *ele, size_t len); -void REDISMODULE_API_FUNC(RedisModule_DigestAddLongLong)(RedisModuleDigest *md, long long ele); -void REDISMODULE_API_FUNC(RedisModule_DigestEndSequence)(RedisModuleDigest *md); +#endif /* This is included inline inside each Redis module. */ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused)); @@ -322,20 +326,23 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(RetainString); REDISMODULE_GET_API(StringCompare); REDISMODULE_GET_API(GetContextFromIO); + REDISMODULE_GET_API(Milliseconds); + REDISMODULE_GET_API(DigestAddStringBuffer); + REDISMODULE_GET_API(DigestAddLongLong); + REDISMODULE_GET_API(DigestEndSequence); + +#ifdef REDISMODULE_EXPERIMENTAL_API + REDISMODULE_GET_API(GetThreadSafeContext); + REDISMODULE_GET_API(FreeThreadSafeContext); + REDISMODULE_GET_API(ThreadSafeContextLock); + REDISMODULE_GET_API(ThreadSafeContextUnlock); REDISMODULE_GET_API(BlockClient); REDISMODULE_GET_API(UnblockClient); REDISMODULE_GET_API(IsBlockedReplyRequest); REDISMODULE_GET_API(IsBlockedTimeoutRequest); REDISMODULE_GET_API(GetBlockedClientPrivateData); REDISMODULE_GET_API(AbortBlock); - REDISMODULE_GET_API(Milliseconds); - REDISMODULE_GET_API(GetThreadSafeContext); - REDISMODULE_GET_API(FreeThreadSafeContext); - REDISMODULE_GET_API(ThreadSafeContextLock); - REDISMODULE_GET_API(ThreadSafeContextUnlock); - REDISMODULE_GET_API(DigestAddStringBuffer); - REDISMODULE_GET_API(DigestAddLongLong); - REDISMODULE_GET_API(DigestEndSequence); +#endif RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; From b36074dae39822e07669e87d481d4ce6eda0fffb Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Jul 2017 13:02:15 +0200 Subject: [PATCH 0393/1722] Modules: fix thread safe context DB selection. Before this fix the DB currenty selected by the client blocked was not respected and operations were always performed on DB 0. --- src/module.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/module.c b/src/module.c index 7b8b17479..342612a1b 100644 --- a/src/module.c +++ b/src/module.c @@ -206,6 +206,7 @@ typedef struct RedisModuleBlockedClient { RedisModule_UnblockClient() API. */ client *reply_client; /* Fake client used to accumulate replies in thread safe contexts. */ + int dbid; /* Database number selected by the original client. */ } RedisModuleBlockedClient; static pthread_mutex_t moduleUnblockedClientsMutex = PTHREAD_MUTEX_INITIALIZER; @@ -3339,6 +3340,7 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc bc->privdata = NULL; bc->reply_client = createClient(-1); bc->reply_client->flags |= CLIENT_MODULE; + bc->dbid = c->db->id; c->bpop.timeout = timeout_ms ? (mstime()+timeout_ms) : 0; blockClient(c,BLOCKED_MODULE); @@ -3524,6 +3526,7 @@ RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) { * in order to keep things like the currently selected database and similar * things. */ ctx->client = createClient(-1); + if (bc) selectDb(ctx->client,bc->dbid); return ctx; } From 0a18ba8719bfef0f05fe24273826df1b3407215b Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 14 Jul 2017 13:04:37 +0200 Subject: [PATCH 0394/1722] changelog.tcl: output 100 lines. --- utils/releasetools/changelog.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/releasetools/changelog.tcl b/utils/releasetools/changelog.tcl index 4b5424ce2..9b3a2cddc 100755 --- a/utils/releasetools/changelog.tcl +++ b/utils/releasetools/changelog.tcl @@ -21,7 +21,7 @@ append template "\n\n" set date [clock format [clock seconds]] set template [string map [list %ver% $ver %date% $date] $template] -append template [exec git log $branch~30..$branch "--format=format:%an in commit %h:%n %s" --shortstat] +append template [exec git log $branch~100..$branch "--format=format:%an in commit %h:%n %s" --shortstat] #Older, more verbose version. # From 009a48d144c72df321b5a7e8d5822705527584bc Mon Sep 17 00:00:00 2001 From: Chris Lamb Date: Fri, 14 Jul 2017 22:53:14 +0100 Subject: [PATCH 0395/1722] Correct proceding -> proceeding typo. --- src/redis-check-aof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index 33bb445e7..c4d5a225e 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -194,7 +194,7 @@ int redis_check_aof_main(int argc, char **argv) { printf("RDB preamble of AOF file is not sane, aborting.\n"); exit(1); } else { - printf("RDB preamble is OK, proceding with AOF tail...\n"); + printf("RDB preamble is OK, proceeding with AOF tail...\n"); } } } From e43fafda623960299ba2d6eaf25f0fa681405669 Mon Sep 17 00:00:00 2001 From: WuYunlong Date: Sat, 15 Jul 2017 10:20:56 +0800 Subject: [PATCH 0396/1722] fix rewrite config: auto-aof-rewrite-min-size --- src/config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config.c b/src/config.c index 900274f65..06d869be1 100644 --- a/src/config.c +++ b/src/config.c @@ -1056,8 +1056,6 @@ void configSetCommand(client *c) { "active-defrag-cycle-max",server.active_defrag_cycle_max,1,99) { } config_set_numerical_field( "auto-aof-rewrite-percentage",server.aof_rewrite_perc,0,LLONG_MAX){ - } config_set_numerical_field( - "auto-aof-rewrite-min-size",server.aof_rewrite_min_size,0,LLONG_MAX) { } config_set_numerical_field( "hash-max-ziplist-entries",server.hash_max_ziplist_entries,0,LLONG_MAX) { } config_set_numerical_field( @@ -1136,6 +1134,8 @@ void configSetCommand(client *c) { } } config_set_memory_field("repl-backlog-size",ll) { resizeReplicationBacklog(ll); + } config_set_memory_field("auto-aof-rewrite-min-size",ll) { + server.aof_rewrite_min_size = ll; /* Enumeration fields. * config_set_enum_field(name,var,enum_var) */ From 9351639fe5ae12699f9e74e41d64abc87237b476 Mon Sep 17 00:00:00 2001 From: Leon Chen Date: Tue, 18 Jul 2017 02:28:24 -0500 Subject: [PATCH 0397/1722] fix mismatch argument --- src/cluster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index a516e911f..eeb703b40 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4194,7 +4194,7 @@ void clusterCommand(client *c) { } if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) { addReplyErrorFormat(c,"I don't know about node %s", - (char*)c->argv[3]->ptr); + (char*)c->argv[4]->ptr); return; } server.cluster->importing_slots_from[slot] = n; From f32a44951a95bdf01a3cb12bf5ff5aa1b5023746 Mon Sep 17 00:00:00 2001 From: Leon Chen Date: Thu, 20 Jul 2017 17:24:38 +0800 Subject: [PATCH 0398/1722] fix return wrong value of clusterDelNodeSlots --- src/cluster.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index eeb703b40..759ed4d06 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -3600,8 +3600,10 @@ int clusterDelNodeSlots(clusterNode *node) { int deleted = 0, j; for (j = 0; j < CLUSTER_SLOTS; j++) { - if (clusterNodeGetSlotBit(node,j)) clusterDelSlot(j); - deleted++; + if (clusterNodeGetSlotBit(node,j)) { + clusterDelSlot(j); + deleted++; + } } return deleted; } From a19700091a4c5fe7789f01e39a35b21d07a14d5d Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 20 Jul 2017 14:59:42 +0200 Subject: [PATCH 0399/1722] Fix two bugs in moduleTypeLookupModuleByID(). The function cache was not working at all, and the function returned wrong values if there where two or more modules exporting native data types. See issue #4131 for more details. --- src/module.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/module.c b/src/module.c index 342612a1b..cfa5ea4f7 100644 --- a/src/module.c +++ b/src/module.c @@ -2680,7 +2680,7 @@ moduleType *moduleTypeLookupModuleByID(uint64_t id) { /* Search in cache to start. */ int j; - for (j = 0; j < MODULE_LOOKUP_CACHE_SIZE; j++) + for (j = 0; j < MODULE_LOOKUP_CACHE_SIZE && cache[j].mt != NULL; j++) if (cache[j].id == id) return cache[j].mt; /* Slow module by module lookup. */ @@ -2688,17 +2688,20 @@ moduleType *moduleTypeLookupModuleByID(uint64_t id) { dictIterator *di = dictGetIterator(modules); dictEntry *de; - while ((de = dictNext(di)) != NULL) { + while ((de = dictNext(di)) != NULL && mt == NULL) { struct RedisModule *module = dictGetVal(de); listIter li; listNode *ln; listRewind(module->types,&li); while((ln = listNext(&li))) { - mt = ln->value; + moduleType *this_mt = ln->value; /* Compare only the 54 bit module identifier and not the * encoding version. */ - if (mt->id >> 10 == id >> 10) break; + if (this_mt->id >> 10 == id >> 10) { + mt = this_mt; + break; + } } } dictReleaseIterator(di); From 01f1eb8f874f7e75d3433efa56dd6beae6cf5512 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 20 Jul 2017 15:17:35 +0200 Subject: [PATCH 0400/1722] Make representClusterNodeFlags() more robust. This function failed when an internal-only flag was set as an only flag in a node: the string was trimmed expecting a final comma before exiting the function, causing a crash. See issue #4142. Moreover generation of flags representation only needed at DEBUG log level was always performed: a waste of CPU time. This is fixed as well by this commit. --- src/cluster.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index a516e911f..89765e37d 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1323,14 +1323,16 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { clusterNode *node; sds ci; - ci = representClusterNodeFlags(sdsempty(), flags); - serverLog(LL_DEBUG,"GOSSIP %.40s %s:%d@%d %s", - g->nodename, - g->ip, - ntohs(g->port), - ntohs(g->cport), - ci); - sdsfree(ci); + if (server.verbosity == LL_DEBUG) { + ci = representClusterNodeFlags(sdsempty(), flags); + serverLog(LL_DEBUG,"GOSSIP %.40s %s:%d@%d %s", + g->nodename, + g->ip, + ntohs(g->port), + ntohs(g->cport), + ci); + sdsfree(ci); + } /* Update our state accordingly to the gossip sections */ node = clusterLookupNode(g->nodename); @@ -3835,15 +3837,14 @@ static struct redisNodeFlags redisNodeFlagsTable[] = { /* Concatenate the comma separated list of node flags to the given SDS * string 'ci'. */ sds representClusterNodeFlags(sds ci, uint16_t flags) { - if (flags == 0) { - ci = sdscat(ci,"noflags,"); - } else { - int i, size = sizeof(redisNodeFlagsTable)/sizeof(struct redisNodeFlags); - for (i = 0; i < size; i++) { - struct redisNodeFlags *nodeflag = redisNodeFlagsTable + i; - if (flags & nodeflag->flag) ci = sdscat(ci, nodeflag->name); - } + size_t orig_len = sdslen(ci); + int i, size = sizeof(redisNodeFlagsTable)/sizeof(struct redisNodeFlags); + for (i = 0; i < size; i++) { + struct redisNodeFlags *nodeflag = redisNodeFlagsTable + i; + if (flags & nodeflag->flag) ci = sdscat(ci, nodeflag->name); } + /* If no flag was added, add the "noflags" special flag. */ + if (sdslen(ci) == orig_len) ci = sdscat(ci,"noflags,"); sdsIncrLen(ci,-1); /* Remove trailing comma. */ return ci; } From 70f29030ae1dcb1650cfababe6356013ba8e1768 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 23 Jul 2017 12:41:26 +0200 Subject: [PATCH 0401/1722] Fix typo in unblockClientFromModule() top comment. --- src/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index cfa5ea4f7..de1a740ec 100644 --- a/src/module.c +++ b/src/module.c @@ -3315,7 +3315,7 @@ void unblockClientFromModule(client *c) { } /* Block a client in the context of a blocking command, returning an handle - * which will be used, later, in order to block the client with a call to + * which will be used, later, in order to unblock the client with a call to * RedisModule_UnblockClient(). The arguments specify callback functions * and a timeout after which the client is unblocked. * From 4bd29fcdab5eb1118549e26d01a394e231a4ee96 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 23 Jul 2017 12:55:37 +0200 Subject: [PATCH 0402/1722] Modules: don't crash when Lua calls a module blocking command. Lua scripting does not support calling blocking commands, however all the native Redis commands are flagged as "s" (no scripting flag), so this is not possible at all. With modules there is no such mechanism in order to flag a command as non callable by the Lua scripting engine, moreover we cannot trust the modules users from complying all the times: it is likely that modules will be released to have blocking commands without such commands being flagged correctly, even if we provide a way to signal this fact. This commit attempts to address the problem in a short term way, by detecting that a module is trying to block in the context of the Lua scripting engine client, and preventing to do this. The module will actually believe to block as usually, but what happens is that the Lua script receives an error immediately, and the background call is ignored by the Redis engine (if not for the cleanup callbacks, once it unblocks). Long term, the more likely solution, is to introduce a new call called RedisModule_GetClientFlags(), so that a command can detect if the caller is a Lua script, and return an error, or avoid blocking at all. Being the blocking API experimental right now, more work is needed in this regard in order to reach a level well blocking module commands and all the other Redis subsystems interact peacefully. Now the effect is like the following: 127.0.0.1:6379> eval "redis.call('hello.block',1,5000)" 0 (error) ERR Error running script (call to f_b5ba35ff97bc1ef23debc4d6e9fd802da187ed53): @user_script:1: ERR Blocking module command called from Lua script This commit fixes issue #4127 in the short term. --- src/module.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/module.c b/src/module.c index de1a740ec..fda68b273 100644 --- a/src/module.c +++ b/src/module.c @@ -3332,10 +3332,15 @@ void unblockClientFromModule(client *c) { */ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms) { client *c = ctx->client; + int islua = c->flags & CLIENT_LUA; + c->bpop.module_blocked_handle = zmalloc(sizeof(RedisModuleBlockedClient)); RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle; - bc->client = c; + /* We need to handle the invalid operation of calling modules blocking + * commands from Lua. We actually create an already aborted (client set to + * NULL) blocked client handle, and actually reply to Lua with an error. */ + bc->client = islua ? NULL : c; bc->module = ctx->module; bc->reply_callback = reply_callback; bc->timeout_callback = timeout_callback; @@ -3346,7 +3351,12 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc bc->dbid = c->db->id; c->bpop.timeout = timeout_ms ? (mstime()+timeout_ms) : 0; - blockClient(c,BLOCKED_MODULE); + if (islua) { + c->bpop.module_blocked_handle = NULL; + addReplyError(c,"Blocking module command called from Lua script"); + } else { + blockClient(c,BLOCKED_MODULE); + } return bc; } From 50e96b1488d4faba98e21a2bbb18fe6b70decf9d Mon Sep 17 00:00:00 2001 From: liangsijian Date: Mon, 24 Jul 2017 19:20:31 +0800 Subject: [PATCH 0403/1722] Fix lua ldb command log --- src/scripting.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/scripting.c b/src/scripting.c index 52fed6ac8..8f8145b2c 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -443,6 +443,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { if (j == 10) { cmdlog = sdscatprintf(cmdlog," ... (%d more)", c->argc-j-1); + break; } else { cmdlog = sdscatlen(cmdlog," ",1); cmdlog = sdscatsds(cmdlog,c->argv[j]->ptr); From b4ea09446f7799e3f95d54b957f5fa617afc80af Mon Sep 17 00:00:00 2001 From: Mota Date: Sat, 28 Jan 2017 11:20:23 +0800 Subject: [PATCH 0404/1722] redis-benchmark: default value size usage update. default size of SET/GET value in usage should be 3 bytes as in main code. --- src/redis-benchmark.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index dec8ecb52..928ec31e0 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -572,8 +572,8 @@ usage: " -a Password for Redis Auth\n" " -c Number of parallel connections (default 50)\n" " -n Total number of requests (default 100000)\n" -" -d Data size of SET/GET value in bytes (default 2)\n" -" --dbnum SELECT the specified db number (default 0)\n" +" -d Data size of SET/GET value in bytes (default 3)\n" +" --dbnum SELECT the specified db number (default 0)\n" " -k 1=keep alive 0=reconnect (default 1)\n" " -r Use random keys for SET/GET/INCR, random values for SADD\n" " Using this option the benchmark will expand the string __rand_int__\n" From 38a9c8c6c0fa336eb0b49edddff0100c78be48ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fievet?= <_@sebastien-fievet.fr> Date: Wed, 26 Jul 2017 14:11:05 +0200 Subject: [PATCH 0405/1722] Fix some typos --- redis.conf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/redis.conf b/redis.conf index c54dba392..7afd35a47 100644 --- a/redis.conf +++ b/redis.conf @@ -606,7 +606,7 @@ slave-priority 100 # deletion of the object. It means that the server stops processing new commands # in order to reclaim all the memory associated with an object in a synchronous # way. If the key deleted is associated with a small object, the time needed -# in order to execute th DEL command is very small and comparable to most other +# in order to execute the DEL command is very small and comparable to most other # O(1) or O(log_N) commands in Redis. However if the key is associated with an # aggregated value containing millions of elements, the server can block for # a long time (even seconds) in order to complete the operation. @@ -621,7 +621,7 @@ slave-priority 100 # It's up to the design of the application to understand when it is a good # idea to use one or the other. However the Redis server sometimes has to # delete keys or flush the whole database as a side effect of other operations. -# Specifically Redis deletes objects independently of an user call in the +# Specifically Redis deletes objects independently of a user call in the # following scenarios: # # 1) On eviction, because of the maxmemory and maxmemory policy configurations, @@ -914,7 +914,7 @@ lua-time-limit 5000 # Docker and other containers). # # In order to make Redis Cluster working in such environments, a static -# configuration where each node known its public address is needed. The +# configuration where each node knows its public address is needed. The # following two options are used for this scope, and are: # # * cluster-announce-ip From c58e1199d85fce1e9395df1178e76ad8769421f1 Mon Sep 17 00:00:00 2001 From: Bo Cai Date: Wed, 26 Jul 2017 21:24:28 +0800 Subject: [PATCH 0406/1722] redis-cli.c typo: helpe -> helper. Signed-off-by: Bo Cai --- src/redis-cli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 61068483f..524d879ea 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -624,7 +624,7 @@ int isColorTerm(void) { return t != NULL && strstr(t,"xterm") != NULL; } -/* Helpe function for sdsCatColorizedLdbReply() appending colorize strings +/* Helper function for sdsCatColorizedLdbReply() appending colorize strings * to an SDS string. */ sds sdscatcolor(sds o, char *s, size_t len, char *color) { if (!isColorTerm()) return sdscatlen(o,s,len); From c9f7f77457d18edc0f15376f3a6148d7a1baa7c0 Mon Sep 17 00:00:00 2001 From: Bo Cai Date: Wed, 26 Jul 2017 21:33:29 +0800 Subject: [PATCH 0407/1722] redis-cli.c typo: Requets -> Requests. Signed-off-by: Bo Cai --- src/redis-cli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 61068483f..d4d9d6311 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2364,7 +2364,7 @@ static void statMode(void) { sprintf(buf,"%ld",aux); printf("%-8s",buf); - /* Requets */ + /* Requests */ aux = getLongInfoField(reply->str,"total_commands_processed"); sprintf(buf,"%ld (+%ld)",aux,requests == 0 ? 0 : aux-requests); printf("%-19s",buf); From 4ef0fb215857650f09888ad74b3f2bdf5ada62c6 Mon Sep 17 00:00:00 2001 From: Shaun Webb Date: Thu, 27 Jul 2017 09:37:37 +0900 Subject: [PATCH 0408/1722] Fix typo --- src/cluster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index a9fedce0c..4e013313a 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -650,7 +650,7 @@ unsigned int keyHashSlot(char *key, int keylen) { for (e = s+1; e < keylen; e++) if (key[e] == '}') break; - /* No '}' or nothing betweeen {} ? Hash the whole key. */ + /* No '}' or nothing between {} ? Hash the whole key. */ if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF; /* If we are here there is both a { and a } on its right. Hash From 17b80429dd8579878835b0c3d6350e501acbd074 Mon Sep 17 00:00:00 2001 From: Shaun Webb Date: Thu, 27 Jul 2017 15:27:46 +0900 Subject: [PATCH 0409/1722] Typo fix --- redis.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redis.conf b/redis.conf index c54dba392..249399e9a 100644 --- a/redis.conf +++ b/redis.conf @@ -59,7 +59,7 @@ # internet, binding to all the interfaces is dangerous and will expose the # instance to everybody on the internet. So by default we uncomment the # following bind directive, that will force Redis to listen only into -# the IPv4 lookback interface address (this means Redis will be able to +# the IPv4 loopback interface address (this means Redis will be able to # accept connections only from clients running into the same computer it # is running). # From 94a549d660425184092ae1abb5ccbccc32066fbf Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 28 Jul 2017 17:47:54 +0200 Subject: [PATCH 0410/1722] Add MEMORY DOCTOR to MEMORY HELP. --- src/object.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/object.c b/src/object.c index 08c9ad956..2565ed59f 100644 --- a/src/object.c +++ b/src/object.c @@ -1163,7 +1163,9 @@ void memoryCommand(client *c) { /* Nothing to do for other allocators. */ #endif } else if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) { - addReplyMultiBulkLen(c,4); + addReplyMultiBulkLen(c,5); + addReplyBulkCString(c, +"MEMORY DOCTOR - Outputs memory problems report"); addReplyBulkCString(c, "MEMORY USAGE [SAMPLES ] - Estimate memory usage of key"); addReplyBulkCString(c, From 69adaf75de1367e5d2885aecd4ed7b02b60e7730 Mon Sep 17 00:00:00 2001 From: Felix Krause Date: Fri, 28 Jul 2017 13:04:52 -0400 Subject: [PATCH 0411/1722] Update link to https and use inline link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 70a15790f..42ab47853 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -This README is just a fast *quick start* document. You can find more detailed documentation at http://redis.io. +This README is just a fast *quick start* document. You can find more detailed documentation at [redis.io](https://redis.io). What is Redis? -------------- From 468c15935c3bfea464f3b7c00870d02ae4d04399 Mon Sep 17 00:00:00 2001 From: jybaek Date: Thu, 3 Aug 2017 17:28:04 +0900 Subject: [PATCH 0412/1722] Add missing fclose() --- src/redis-cli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 61068483f..705d917e9 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1807,6 +1807,7 @@ static void getRDB(void) { } close(s); /* Close the file descriptor ASAP as fsync() may take time. */ fsync(fd); + close(fd); fprintf(stderr,"Transfer finished with success.\n"); exit(0); } From 3a207568cfa9863aafd75467a9b120b7df54e1e0 Mon Sep 17 00:00:00 2001 From: "jeesyn.liu" Date: Tue, 8 Aug 2017 17:45:51 +0800 Subject: [PATCH 0413/1722] fix a typo --- src/anet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anet.c b/src/anet.c index 53a56b0d2..e9530398d 100644 --- a/src/anet.c +++ b/src/anet.c @@ -237,7 +237,7 @@ int anetResolveIP(char *err, char *host, char *ipbuf, size_t ipbuf_len) { static int anetSetReuseAddr(char *err, int fd) { int yes = 1; - /* Make sure connection-intensive things like the redis benckmark + /* Make sure connection-intensive things like the redis benchmark * will be able to close/open sockets a zillion of times */ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) == -1) { anetSetError(err, "setsockopt SO_REUSEADDR: %s", strerror(errno)); From 3666698eeb119ec048dcaf8aa1e3366689568410 Mon Sep 17 00:00:00 2001 From: Chris Lamb Date: Sat, 12 Aug 2017 22:21:03 -0700 Subject: [PATCH 0414/1722] Correct spelling of "faield". --- tests/instances.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/instances.tcl b/tests/instances.tcl index 2ba67ac19..357b34818 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -318,7 +318,7 @@ proc end_tests {} { puts "GOOD! No errors." exit 0 } else { - puts "WARNING $::failed tests faield." + puts "WARNING $::failed test(s) failed." exit 1 } } From 572d8e06098116332d4424f4236f21a1ac0d0237 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Fri, 18 Aug 2017 11:27:04 +0800 Subject: [PATCH 0415/1722] Update the comment --- src/notify.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/notify.c b/src/notify.c index 94a1f2e79..79c1fc048 100644 --- a/src/notify.c +++ b/src/notify.c @@ -29,8 +29,8 @@ #include "server.h" -/* This file implements keyspace events notification via Pub/Sub ad - * described at http://redis.io/topics/keyspace-events. */ +/* This file implements keyspace events notification via Pub/Sub and + * described at https://redis.io/topics/notifications. */ /* Turn a string representing notification classes into an integer * representing notification classes flags xored. From 2f5239b038fff3451ded179d1c1e34d1e0755354 Mon Sep 17 00:00:00 2001 From: jianqingdu Date: Wed, 30 Aug 2017 21:20:14 -0500 Subject: [PATCH 0416/1722] fix not call va_end when syncWrite() failed fix not call va_end when syncWrite() failed in sendSynchronousCommand() --- src/replication.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index 6be5d2631..b31f0da9f 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1329,7 +1329,8 @@ char *sendSynchronousCommand(int flags, int fd, ...) { cmd = sdscat(cmd,arg); } cmd = sdscatlen(cmd,"\r\n",2); - + va_end(ap); + /* Transfer command to the server. */ if (syncWrite(fd,cmd,sdslen(cmd),server.repl_syncio_timeout*1000) == -1) @@ -1339,7 +1340,6 @@ char *sendSynchronousCommand(int flags, int fd, ...) { strerror(errno)); } sdsfree(cmd); - va_end(ap); } /* Read the reply from the server. */ From 000ae23df42d13c99391a3baf77f2e729e69257a Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 17 Sep 2017 07:22:16 +0300 Subject: [PATCH 0417/1722] Flush append only buffers before existing. when SHUTDOWN command is recived it is possible that some of the recent command were not yet flushed from the AOF buffer, and the server experiences data loss at shutdown. --- src/server.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index ef05f055d..312b95048 100644 --- a/src/server.c +++ b/src/server.c @@ -2536,8 +2536,9 @@ int prepareForShutdown(int flags) { "There is a child rewriting the AOF. Killing it!"); kill(server.aof_child_pid,SIGUSR1); } - /* Append only file: fsync() the AOF and exit */ + /* Append only file: flush buffers and fsync() the AOF at exit */ serverLog(LL_NOTICE,"Calling fsync() on the AOF file."); + flushAppendOnlyFile(1); aof_fsync(server.aof_fd); } From d0c77132d87747378ab5cd7b23485f7f9e2b9966 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 19 Sep 2017 10:33:14 +0200 Subject: [PATCH 0418/1722] PSYNC2: Create backlog on slave partial sync as well. A slave may be started with an RDB file able to provide enough slave to perform a successful partial SYNC with its master. However in such a case, how outlined in issue #4268, the slave backlog will not be started, since it was only initialized on full syncs attempts. This creates different problems with successive PSYNC attempts that will always result in full synchronizations. Thanks to @fdingiit for discovering the issue. --- src/replication.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/replication.c b/src/replication.c index 6be5d2631..97ce9608d 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1531,6 +1531,11 @@ int slaveTryPartialResynchronization(int fd, int read_reply) { /* Setup the replication to continue. */ sdsfree(reply); replicationResurrectCachedMaster(fd); + + /* If this instance was restarted and we read the metadata to + * PSYNC from the persistence file, our replication backlog could + * be still not initialized. Create it. */ + if (server.repl_backlog == NULL) createReplicationBacklog(); return PSYNC_CONTINUE; } From bc6b7c949d5ad5d0c8f2d8f27ab5a08d78320512 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 19 Sep 2017 23:03:39 +0200 Subject: [PATCH 0419/1722] PSYNC2: Fix the way replication info is saved/loaded from RDB. This commit attempts to fix a number of bugs reported in #4316. They are related to the way replication info like replication ID, offsets, and currently selected DB in the master client, are stored and loaded by Redis. In order to avoid inconsistencies the changes in this commit try to enforce that: 1. Replication information are only stored when the RDB file is generated by a slave that has a valid 'master' client, so that we can always extract the currently selected DB. 2. When replication informations are persisted in the RDB file, all the info for a successful PSYNC or nothing is persisted. 3. The RDB replication informations are only loaded if the instance is configured as a slave, otherwise a master can start with IDs that relate to a different history of the data set, and stil retain such IDs in the future while receiving unrelated writes. --- src/rdb.c | 34 ++++++++++++++++++++++++++-------- src/rdb.h | 1 + src/replication.c | 14 ++++---------- src/server.c | 23 ++++++++++++++++++----- 4 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 792c8ff94..9c2acf7ba 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -858,16 +858,14 @@ int rdbSaveInfoAuxFields(rio *rdb, int flags, rdbSaveInfo *rsi) { /* Handle saving options that generate aux fields. */ if (rsi) { - if (rsi->repl_stream_db && - rdbSaveAuxFieldStrInt(rdb,"repl-stream-db",rsi->repl_stream_db) - == -1) - { - return -1; - } + if (rdbSaveAuxFieldStrInt(rdb,"repl-stream-db",rsi->repl_stream_db) + == -1) return -1; + if (rdbSaveAuxFieldStrStr(rdb,"repl-id",server.replid) + == -1) return -1; + if (rdbSaveAuxFieldStrInt(rdb,"repl-offset",server.master_repl_offset) + == -1) return -1; } if (rdbSaveAuxFieldStrInt(rdb,"aof-preamble",aof_preamble) == -1) return -1; - if (rdbSaveAuxFieldStrStr(rdb,"repl-id",server.replid) == -1) return -1; - if (rdbSaveAuxFieldStrInt(rdb,"repl-offset",server.master_repl_offset) == -1) return -1; return 1; } @@ -2017,3 +2015,23 @@ void bgsaveCommand(client *c) { addReply(c,shared.err); } } + +/* Populate the rdbSaveInfo structure used to persist the replication + * information inside the RDB file. Currently the structure explicitly + * contains just the currently selected DB from the master stream, however + * if the rdbSave*() family functions receive a NULL rsi structure also + * the Replication ID/offset is not saved. The function popultes 'rsi' + * that is normally stack-allocated in the caller, returns the populated + * pointer if the instance has a valid master client, otherwise NULL + * is returned, and the RDB savign wil not persist any replication related + * information. */ +rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) { + rdbSaveInfo rsi_init = RDB_SAVE_INFO_INIT; + *rsi = rsi_init; + if (server.master) { + rsi->repl_stream_db = server.master->db->id; + return rsi; + } else { + return NULL; + } +} diff --git a/src/rdb.h b/src/rdb.h index a22cb33ce..62a13f444 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -147,5 +147,6 @@ int rdbLoadBinaryDoubleValue(rio *rdb, double *val); int rdbSaveBinaryFloatValue(rio *rdb, float val); int rdbLoadBinaryFloatValue(rio *rdb, float *val); int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi); +rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi); #endif diff --git a/src/replication.c b/src/replication.c index 97ce9608d..2b2673a4b 100644 --- a/src/replication.c +++ b/src/replication.c @@ -569,18 +569,12 @@ int startBgsaveForReplication(int mincapa) { serverLog(LL_NOTICE,"Starting BGSAVE for SYNC with target: %s", socket_target ? "slaves sockets" : "disk"); - rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; - /* If we are saving for a chained slave (that is, if we are, - * in turn, a slave of another instance), make sure after - * loadig the RDB, our slaves select the right DB: we'll just - * send the replication stream we receive from our master, so - * no way to send SELECT commands. */ - if (server.master) rsi.repl_stream_db = server.master->db->id; - + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); if (socket_target) - retval = rdbSaveToSlavesSockets(&rsi); + retval = rdbSaveToSlavesSockets(rsiptr); else - retval = rdbSaveBackground(server.rdb_filename,&rsi); + retval = rdbSaveBackground(server.rdb_filename,rsiptr); /* If we failed to BGSAVE, remove the slaves waiting for a full * resynchorinization from the list of salves, inform them with diff --git a/src/server.c b/src/server.c index 312b95048..61291fde5 100644 --- a/src/server.c +++ b/src/server.c @@ -1092,7 +1092,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { { serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...", sp->changes, (int)sp->seconds); - rdbSaveBackground(server.rdb_filename,NULL); + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + rdbSaveBackground(server.rdb_filename,rsiptr); break; } } @@ -1164,7 +1166,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY || server.lastbgsave_status == C_OK)) { - if (rdbSaveBackground(server.rdb_filename,NULL) == C_OK) + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK) server.rdb_bgsave_scheduled = 0; } @@ -2546,7 +2550,9 @@ int prepareForShutdown(int flags) { if ((server.saveparamslen > 0 && !nosave) || save) { serverLog(LL_NOTICE,"Saving the final RDB snapshot before exiting."); /* Snapshotting. Perform a SYNC SAVE and exit */ - if (rdbSave(server.rdb_filename,NULL) != C_OK) { + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + if (rdbSave(server.rdb_filename,rsiptr) != C_OK) { /* Ooops.. error saving! The best we can do is to continue * operating. Note that if there was a background saving process, * in the next cron() Redis will be notified that the background @@ -3526,13 +3532,20 @@ void loadDataFromDisk(void) { (float)(ustime()-start)/1000000); /* Restore the replication ID / offset from the RDB file. */ - if (rsi.repl_id_is_set && rsi.repl_offset != -1) { + if (server.masterhost && + rsi.repl_id_is_set && + rsi.repl_offset != -1 && + /* Note that older implementations may save a repl_stream_db + * of -1 inside the RDB file. */ + rsi.repl_stream_db != -1) + { memcpy(server.replid,rsi.repl_id,sizeof(server.replid)); server.master_repl_offset = rsi.repl_offset; /* If we are a slave, create a cached master from this * information, in order to allow partial resynchronizations * with masters. */ - if (server.masterhost) replicationCacheMasterUsingMyself(); + replicationCacheMasterUsingMyself(); + selectDb(server.cached_master,rsi.repl_stream_db); } } else if (errno != ENOENT) { serverLog(LL_WARNING,"Fatal error loading the DB: %s. Exiting.",strerror(errno)); From a45d0fc48231d222ad36bba4acb1bfba30065ae1 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Wed, 20 Sep 2017 13:47:42 +0800 Subject: [PATCH 0420/1722] PSYNC2: make persisiting replication info more solid This commit is a reinforcement of commit c1c99e9. 1. Replication information can be stored when the RDB file is generated by a mater using server.slaveseldb when server.repl_backlog is not NULL, or set repl_stream_db be -1. That's safe, because NULL server.repl_backlog will trigger full synchronization, then master will send SELECT command to replicaiton stream. 2. Only do rdbSave* when rsiptr is not NULL, if we do rdbSave* without rdbSaveInfo, slave will miss repl-stream-db. 3. Save the replication informations also in the case of SAVE command, FLUSHALL command and DEBUG reload. --- src/db.c | 4 +++- src/debug.c | 4 +++- src/rdb.c | 20 +++++++++++++++++--- src/replication.c | 14 ++++++++++---- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/db.c b/src/db.c index 7d1504d30..71c642d00 100644 --- a/src/db.c +++ b/src/db.c @@ -416,7 +416,9 @@ void flushallCommand(client *c) { /* Normally rdbSave() will reset dirty, but we don't want this here * as otherwise FLUSHALL will not be replicated nor put into the AOF. */ int saved_dirty = server.dirty; - rdbSave(server.rdb_filename,NULL); + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + rdbSave(server.rdb_filename,rsiptr); server.dirty = saved_dirty; } server.dirty++; diff --git a/src/debug.c b/src/debug.c index d6e12ec2a..5c3fd3471 100644 --- a/src/debug.c +++ b/src/debug.c @@ -335,7 +335,9 @@ void debugCommand(client *c) { if (c->argc >= 3) c->argv[2] = tryObjectEncoding(c->argv[2]); serverAssertWithInfo(c,c->argv[0],1 == 2); } else if (!strcasecmp(c->argv[1]->ptr,"reload")) { - if (rdbSave(server.rdb_filename,NULL) != C_OK) { + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + if (rdbSave(server.rdb_filename,rsiptr) != C_OK) { addReply(c,shared.err); return; } diff --git a/src/rdb.c b/src/rdb.c index 9c2acf7ba..c83a05a32 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1975,7 +1975,9 @@ void saveCommand(client *c) { addReplyError(c,"Background save already in progress"); return; } - if (rdbSave(server.rdb_filename,NULL) == C_OK) { + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + if (rdbSave(server.rdb_filename,rsiptr) == C_OK) { addReply(c,shared.ok); } else { addReply(c,shared.err); @@ -2017,21 +2019,33 @@ void bgsaveCommand(client *c) { } /* Populate the rdbSaveInfo structure used to persist the replication - * information inside the RDB file. Currently the structure explicitly + * information inside the RDB file. + * For master, if server.repl_backlog is not NULL, fill rdbSaveInfo with + * server.slaveseldb, otherwise just use init -1. + * Don't worry, master will send SELECT command to replication stream, + * because if server.repl_backlog is NULL, that will trigger full synchronization, + * function replicationSetupSlaveForFullResync() sets server.slaveseldb be -1, + * then replicationFeedSlaves() will send SELECT command when server.slaveseldb is -1. + * For slave, currently the structure explicitly * contains just the currently selected DB from the master stream, however * if the rdbSave*() family functions receive a NULL rsi structure also * the Replication ID/offset is not saved. The function popultes 'rsi' * that is normally stack-allocated in the caller, returns the populated * pointer if the instance has a valid master client, otherwise NULL - * is returned, and the RDB savign wil not persist any replication related + * is returned, and the RDB saving will not persist any replication related * information. */ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) { rdbSaveInfo rsi_init = RDB_SAVE_INFO_INIT; *rsi = rsi_init; + if (!server.masterhost) { + if (server.repl_backlog) rsi->repl_stream_db = server.slaveseldb; + return rsi; + } if (server.master) { rsi->repl_stream_db = server.master->db->id; return rsi; } else { + serverLog(LL_WARNING,"As a slave there is no valid master, can not persist replication information"); return NULL; } } diff --git a/src/replication.c b/src/replication.c index 2b2673a4b..4358d63e6 100644 --- a/src/replication.c +++ b/src/replication.c @@ -571,10 +571,16 @@ int startBgsaveForReplication(int mincapa) { rdbSaveInfo rsi, *rsiptr; rsiptr = rdbPopulateSaveInfo(&rsi); - if (socket_target) - retval = rdbSaveToSlavesSockets(rsiptr); - else - retval = rdbSaveBackground(server.rdb_filename,rsiptr); + /* Only do rdbSave* when rsiptr is not NULL, + * otherwise slave will miss repl-stream-db. */ + if (rsiptr) { + if (socket_target) + retval = rdbSaveToSlavesSockets(rsiptr); + else + retval = rdbSaveBackground(server.rdb_filename,rsiptr); + } else { + retval = C_ERR; + } /* If we failed to BGSAVE, remove the slaves waiting for a full * resynchorinization from the list of salves, inform them with From 4e01e51338b6a3b08c939a22226d6bb2a154e812 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 20 Sep 2017 11:28:13 +0200 Subject: [PATCH 0421/1722] PSYNC2: More refinements related to #4316. --- src/rdb.c | 24 +++++++++++++----------- src/replication.c | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index c83a05a32..125df6071 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2019,14 +2019,7 @@ void bgsaveCommand(client *c) { } /* Populate the rdbSaveInfo structure used to persist the replication - * information inside the RDB file. - * For master, if server.repl_backlog is not NULL, fill rdbSaveInfo with - * server.slaveseldb, otherwise just use init -1. - * Don't worry, master will send SELECT command to replication stream, - * because if server.repl_backlog is NULL, that will trigger full synchronization, - * function replicationSetupSlaveForFullResync() sets server.slaveseldb be -1, - * then replicationFeedSlaves() will send SELECT command when server.slaveseldb is -1. - * For slave, currently the structure explicitly + * information inside the RDB file. Currently the structure explicitly * contains just the currently selected DB from the master stream, however * if the rdbSave*() family functions receive a NULL rsi structure also * the Replication ID/offset is not saved. The function popultes 'rsi' @@ -2037,15 +2030,24 @@ void bgsaveCommand(client *c) { rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) { rdbSaveInfo rsi_init = RDB_SAVE_INFO_INIT; *rsi = rsi_init; + + /* If the instance is a master, we can populate the replication info + * in all the cases, even if sometimes in incomplete (but safe) form. */ if (!server.masterhost) { if (server.repl_backlog) rsi->repl_stream_db = server.slaveseldb; + /* Note that if repl_backlog is NULL, it means that histories + * following from this point will trigger a full synchronization + * generating a SELECT statement, so we can leave the currently + * selected DB set to -1. This allows a restarted master to reload + * its replication ID/offset when there are no connected slaves. */ return rsi; } + + /* If the instance is a slave we need a connected master in order to + * fetch the currently selected DB. */ if (server.master) { rsi->repl_stream_db = server.master->db->id; return rsi; - } else { - serverLog(LL_WARNING,"As a slave there is no valid master, can not persist replication information"); - return NULL; } + return NULL; } diff --git a/src/replication.c b/src/replication.c index 4358d63e6..e0b3d910e 100644 --- a/src/replication.c +++ b/src/replication.c @@ -579,6 +579,7 @@ int startBgsaveForReplication(int mincapa) { else retval = rdbSaveBackground(server.rdb_filename,rsiptr); } else { + serverLog(LL_WARNING,"BGSAVE for replication: replication information not available, can't generate the RDB file right now. Try later."); retval = C_ERR; } From 36252140aa1181123d72023a8c5fdd06fe3e96b4 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Thu, 21 Sep 2017 14:19:21 +0800 Subject: [PATCH 0422/1722] Lazyfree: avoid memory leak when free slowlog entry --- src/slowlog.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/slowlog.c b/src/slowlog.c index 805ee1d77..a95147153 100644 --- a/src/slowlog.c +++ b/src/slowlog.c @@ -72,9 +72,12 @@ slowlogEntry *slowlogCreateEntry(client *c, robj **argv, int argc, long long dur (unsigned long) sdslen(argv[j]->ptr) - SLOWLOG_ENTRY_MAX_STRING); se->argv[j] = createObject(OBJ_STRING,s); - } else { + } else if (argv[j]->refcount == OBJ_SHARED_REFCOUNT) { se->argv[j] = argv[j]; - incrRefCount(argv[j]); + } else { + /* Duplicate a string object, + * avoid memory leak for lazyfree. */ + se->argv[j] = dupStringObject(argv[j]); } } } From ab601f284f6e017a45474d7e7278cd7fb06de774 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 21 Sep 2017 12:35:04 +0200 Subject: [PATCH 0423/1722] Clarify comment in change fixing #4323. --- src/slowlog.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/slowlog.c b/src/slowlog.c index a95147153..32ec4374c 100644 --- a/src/slowlog.c +++ b/src/slowlog.c @@ -75,8 +75,12 @@ slowlogEntry *slowlogCreateEntry(client *c, robj **argv, int argc, long long dur } else if (argv[j]->refcount == OBJ_SHARED_REFCOUNT) { se->argv[j] = argv[j]; } else { - /* Duplicate a string object, - * avoid memory leak for lazyfree. */ + /* Here we need to dupliacate the string objects composing the + * argument vector of the command, because those may otherwise + * end shared with string objects stored into keys. Having + * shared objects between any part of Redis, and the data + * structure holding the data, is a problem: FLUSHALL ASYNC + * may release the shared string object and create a race. */ se->argv[j] = dupStringObject(argv[j]); } } From f5909f2c18e9c8da74035196beade316049ae940 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Wed, 27 Sep 2017 11:52:39 +0300 Subject: [PATCH 0424/1722] Added support for module context flags with RM_GetCtxFlags --- src/module.c | 69 +++++++++++++++++++++++++++++++++ src/modules/testmodule.c | 82 ++++++++++++++++++++++++++++++++++++++++ src/redismodule.h | 26 +++++++++++++ 3 files changed, 177 insertions(+) diff --git a/src/module.c b/src/module.c index fda68b273..f467c5a49 100644 --- a/src/module.c +++ b/src/module.c @@ -1262,6 +1262,74 @@ int RM_GetSelectedDb(RedisModuleCtx *ctx) { return ctx->client->db->id; } + +/* Return the current context's flags. The flags provide information on the + * current request context (whether the client is a Lua script or in a MULTI), + * and about the Redis instance in general, i.e replication and persistence. + * + * The available flags are: + * + * * REDISMODULE_CTX_FLAGS_LUA: The command is running in a Lua script + * + * * REDISMODULE_CTX_FLAGS_MULTI: The command is running inside a transaction + * + * * REDISMODULE_CTX_FLAGS_MASTER: The Redis instance is a master + * + * * REDISMODULE_CTX_FLAGS_SLAVE: The Redis instance is a slave + * + * * REDISMODULE_CTX_FLAGS_READONLY: The Redis instance is read-only + * + * * REDISMODULE_CTX_FLAGS_CLUSTER: The Redis instance is in cluster mode + * + * * REDISMODULE_CTX_FLAGS_AOF: The Redis instance has AOF enabled + * + * * REDISMODULE_CTX_FLAGS_RDB: The instance has RDB enabled + * + * * REDISMODULE_CTX_FLAGS_MAXMEMORY: The instance has Maxmemory set + * + * * REDISMODULE_CTX_FLAGS_EVICT: Maxmemory is set and has an eviction + * policy that may delete keys + */ +int RM_GetCtxFlags(RedisModuleCtx *ctx) { + + int flags = 0; + /* Client specific flags */ + if (ctx->client) { + if (ctx->client->flags & CLIENT_LUA) + flags |= REDISMODULE_CTX_FLAGS_LUA; + if (ctx->client->flags & CLIENT_MULTI) + flags |= REDISMODULE_CTX_FLAGS_MULTI; + } + + if (server.cluster_enabled) + flags |= REDISMODULE_CTX_FLAGS_CLUSTER; + + /* Maxmemory and eviction policy */ + if (server.maxmemory > 0) { + flags |= REDISMODULE_CTX_FLAGS_MAXMEMORY; + + if (server.maxmemory_policy != MAXMEMORY_NO_EVICTION) + flags |= REDISMODULE_CTX_FLAGS_EVICT; + } + + /* Persistence flags */ + if (server.aof_state != AOF_OFF) + flags |= REDISMODULE_CTX_FLAGS_AOF; + if (server.saveparamslen > 0) + flags |= REDISMODULE_CTX_FLAGS_RDB; + + /* Replication flags */ + if (server.masterhost == NULL) { + flags |= REDISMODULE_CTX_FLAGS_MASTER; + } else { + flags |= REDISMODULE_CTX_FLAGS_SLAVE; + if (server.repl_slave_ro) + flags |= REDISMODULE_CTX_FLAGS_READONLY; + } + + return flags; +} + /* Change the currently selected DB. Returns an error if the id * is out of range. * @@ -3891,6 +3959,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(IsKeysPositionRequest); REGISTER_API(KeyAtPos); REGISTER_API(GetClientId); + REGISTER_API(GetCtxFlags); REGISTER_API(PoolAlloc); REGISTER_API(CreateDataType); REGISTER_API(ModuleTypeSetValue); diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index 8da45c0ea..58e397790 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -121,6 +121,81 @@ int TestStringPrintf(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { } +/* TEST.CTXFLAGS -- Test GetCtxFlags. */ +int TestCtxFlags(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argc); + REDISMODULE_NOT_USED(argv); + + RedisModule_AutoMemory(ctx); + + int ok = 1; + const char *errString = NULL; + + #define FAIL(msg) \ + { \ + ok = 0; \ + errString = msg; \ + goto end; \ + } + + int flags = RedisModule_GetCtxFlags(ctx); + if (flags == 0) { + FAIL("Got no flags"); + } + + if (flags & REDISMODULE_CTX_FLAGS_LUA) FAIL("Lua flag was set"); + if (flags & REDISMODULE_CTX_FLAGS_MULTI) FAIL("Multi flag was set"); + + if (flags & REDISMODULE_CTX_FLAGS_AOF) FAIL("AOF Flag was set") + /* Enable AOF to test AOF flags */ + RedisModule_Call(ctx, "config", "ccc", "set", "appendonly", "yes"); + flags = RedisModule_GetCtxFlags(ctx); + if (!(flags & REDISMODULE_CTX_FLAGS_AOF)) + FAIL("AOF Flag not set after config set"); + + if (flags & REDISMODULE_CTX_FLAGS_RDB) FAIL("RDB Flag was set"); + /* Enable RDB to test RDB flags */ + RedisModule_Call(ctx, "config", "ccc", "set", "save", "900 1"); + flags = RedisModule_GetCtxFlags(ctx); + if (!(flags & REDISMODULE_CTX_FLAGS_RDB)) + FAIL("RDB Flag was not set after config set"); + + if (!(flags & REDISMODULE_CTX_FLAGS_MASTER)) FAIL("Master flag was not set"); + if (flags & REDISMODULE_CTX_FLAGS_SLAVE) FAIL("Slave flag was set"); + if (flags & REDISMODULE_CTX_FLAGS_READONLY) FAIL("Read-only flag was set"); + if (flags & REDISMODULE_CTX_FLAGS_CLUSTER) FAIL("Cluster flag was set"); + + if (flags & REDISMODULE_CTX_FLAGS_MAXMEMORY) FAIL("Maxmemory flag was set"); + ; + RedisModule_Call(ctx, "config", "ccc", "set", "maxmemory", "100000000"); + flags = RedisModule_GetCtxFlags(ctx); + if (!(flags & REDISMODULE_CTX_FLAGS_MAXMEMORY)) + FAIL("Maxmemory flag was not set after config set"); + + if (flags & REDISMODULE_CTX_FLAGS_EVICT) FAIL("Eviction flag was set"); + RedisModule_Call(ctx, "config", "ccc", "set", "maxmemory-policy", + "allkeys-lru"); + flags = RedisModule_GetCtxFlags(ctx); + if (!(flags & REDISMODULE_CTX_FLAGS_EVICT)) + FAIL("Eviction flag was not set after config set"); + + end: + /* Revert config changes */ + RedisModule_Call(ctx, "config", "ccc", "set", "appendonly", "no"); + RedisModule_Call(ctx, "config", "ccc", "set", "save", ""); + RedisModule_Call(ctx, "config", "ccc", "set", "maxmemory", "0"); + RedisModule_Call(ctx, "config", "ccc", "set", "maxmemory-policy", "noeviction"); + + if (!ok) { + RedisModule_Log(ctx, "warning", "Failed CTXFLAGS Test. Reason: %s", + errString); + return RedisModule_ReplyWithSimpleString(ctx, "ERR"); + } + + return RedisModule_ReplyWithSimpleString(ctx, "OK"); + } + + /* ----------------------------- Test framework ----------------------------- */ /* Return 1 if the reply matches the specified string, otherwise log errors @@ -188,6 +263,9 @@ int TestIt(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { T("test.call",""); if (!TestAssertStringReply(ctx,reply,"OK",2)) goto fail; + T("test.ctxflags",""); + if (!TestAssertStringReply(ctx,reply,"OK",2)) goto fail; + T("test.string.append",""); if (!TestAssertStringReply(ctx,reply,"foobar",6)) goto fail; @@ -229,6 +307,10 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) TestStringPrintf,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,"test.ctxflags", + TestCtxFlags,"readonly",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,"test.it", TestIt,"readonly",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; diff --git a/src/redismodule.h b/src/redismodule.h index 7fc0fec40..4aa35190b 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -58,6 +58,30 @@ #define REDISMODULE_HASH_CFIELDS (1<<2) #define REDISMODULE_HASH_EXISTS (1<<3) +/* Context Flags: Info about the current context returned by RM_GetCtxFlags */ + +/* The command is running in the context of a Lua script */ +#define REDISMODULE_CTX_FLAGS_LUA 0x0001 +/* The command is running inside a Redis transaction */ +#define REDISMODULE_CTX_FLAGS_MULTI 0x0002 +/* The instance is a master */ +#define REDISMODULE_CTX_FLAGS_MASTER 0x0004 +/* The instance is a slave */ +#define REDISMODULE_CTX_FLAGS_SLAVE 0x0008 +/* The instance is read-only (usually meaning it's a slave as well) */ +#define REDISMODULE_CTX_FLAGS_READONLY 0x0010 +/* The instance is running in cluster mode */ +#define REDISMODULE_CTX_FLAGS_CLUSTER 0x0020 +/* The instance has AOF enabled */ +#define REDISMODULE_CTX_FLAGS_AOF 0x0040 // +/* The instance has RDB enabled */ +#define REDISMODULE_CTX_FLAGS_RDB 0x0080 // +/* The instance has Maxmemory set */ +#define REDISMODULE_CTX_FLAGS_MAXMEMORY 0x0100 +/* Maxmemory is set and has an eviction policy that may delete keys */ +#define REDISMODULE_CTX_FLAGS_EVICT 0x0200 + + /* A special pointer that we can use between the core and the module to signal * field deletion, and that is impossible to be a valid pointer. */ #define REDISMODULE_HASH_DELETE ((RedisModuleString*)(long)1) @@ -183,6 +207,7 @@ int REDISMODULE_API_FUNC(RedisModule_HashGet)(RedisModuleKey *key, int flags, .. int REDISMODULE_API_FUNC(RedisModule_IsKeysPositionRequest)(RedisModuleCtx *ctx); void REDISMODULE_API_FUNC(RedisModule_KeyAtPos)(RedisModuleCtx *ctx, int pos); unsigned long long REDISMODULE_API_FUNC(RedisModule_GetClientId)(RedisModuleCtx *ctx); +int REDISMODULE_API_FUNC(RedisModule_GetCtxFlags)(RedisModuleCtx *ctx); void *REDISMODULE_API_FUNC(RedisModule_PoolAlloc)(RedisModuleCtx *ctx, size_t bytes); RedisModuleType *REDISMODULE_API_FUNC(RedisModule_CreateDataType)(RedisModuleCtx *ctx, const char *name, int encver, RedisModuleTypeMethods *typemethods); int REDISMODULE_API_FUNC(RedisModule_ModuleTypeSetValue)(RedisModuleKey *key, RedisModuleType *mt, void *value); @@ -302,6 +327,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(IsKeysPositionRequest); REDISMODULE_GET_API(KeyAtPos); REDISMODULE_GET_API(GetClientId); + REDISMODULE_GET_API(GetCtxFlags); REDISMODULE_GET_API(PoolAlloc); REDISMODULE_GET_API(CreateDataType); REDISMODULE_GET_API(ModuleTypeSetValue); From c3777932f6fcb234c9ae9f8acbfa9eff146ba893 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Wed, 27 Sep 2017 11:56:40 +0300 Subject: [PATCH 0425/1722] Renamed GetCtxFlags to GetContextFlags --- src/module.c | 4 ++-- src/modules/testmodule.c | 12 ++++++------ src/redismodule.h | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/module.c b/src/module.c index f467c5a49..ab5e5518a 100644 --- a/src/module.c +++ b/src/module.c @@ -1290,7 +1290,7 @@ int RM_GetSelectedDb(RedisModuleCtx *ctx) { * * REDISMODULE_CTX_FLAGS_EVICT: Maxmemory is set and has an eviction * policy that may delete keys */ -int RM_GetCtxFlags(RedisModuleCtx *ctx) { +int RM_GetContextFlags(RedisModuleCtx *ctx) { int flags = 0; /* Client specific flags */ @@ -3959,7 +3959,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(IsKeysPositionRequest); REGISTER_API(KeyAtPos); REGISTER_API(GetClientId); - REGISTER_API(GetCtxFlags); + REGISTER_API(GetContextFlags); REGISTER_API(PoolAlloc); REGISTER_API(CreateDataType); REGISTER_API(ModuleTypeSetValue); diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index 58e397790..a0d706fea 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -121,7 +121,7 @@ int TestStringPrintf(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { } -/* TEST.CTXFLAGS -- Test GetCtxFlags. */ +/* TEST.CTXFLAGS -- Test GetContextFlags. */ int TestCtxFlags(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { REDISMODULE_NOT_USED(argc); REDISMODULE_NOT_USED(argv); @@ -138,7 +138,7 @@ int TestCtxFlags(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { goto end; \ } - int flags = RedisModule_GetCtxFlags(ctx); + int flags = RedisModule_GetContextFlags(ctx); if (flags == 0) { FAIL("Got no flags"); } @@ -149,14 +149,14 @@ int TestCtxFlags(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { if (flags & REDISMODULE_CTX_FLAGS_AOF) FAIL("AOF Flag was set") /* Enable AOF to test AOF flags */ RedisModule_Call(ctx, "config", "ccc", "set", "appendonly", "yes"); - flags = RedisModule_GetCtxFlags(ctx); + flags = RedisModule_GetContextFlags(ctx); if (!(flags & REDISMODULE_CTX_FLAGS_AOF)) FAIL("AOF Flag not set after config set"); if (flags & REDISMODULE_CTX_FLAGS_RDB) FAIL("RDB Flag was set"); /* Enable RDB to test RDB flags */ RedisModule_Call(ctx, "config", "ccc", "set", "save", "900 1"); - flags = RedisModule_GetCtxFlags(ctx); + flags = RedisModule_GetContextFlags(ctx); if (!(flags & REDISMODULE_CTX_FLAGS_RDB)) FAIL("RDB Flag was not set after config set"); @@ -168,14 +168,14 @@ int TestCtxFlags(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { if (flags & REDISMODULE_CTX_FLAGS_MAXMEMORY) FAIL("Maxmemory flag was set"); ; RedisModule_Call(ctx, "config", "ccc", "set", "maxmemory", "100000000"); - flags = RedisModule_GetCtxFlags(ctx); + flags = RedisModule_GetContextFlags(ctx); if (!(flags & REDISMODULE_CTX_FLAGS_MAXMEMORY)) FAIL("Maxmemory flag was not set after config set"); if (flags & REDISMODULE_CTX_FLAGS_EVICT) FAIL("Eviction flag was set"); RedisModule_Call(ctx, "config", "ccc", "set", "maxmemory-policy", "allkeys-lru"); - flags = RedisModule_GetCtxFlags(ctx); + flags = RedisModule_GetContextFlags(ctx); if (!(flags & REDISMODULE_CTX_FLAGS_EVICT)) FAIL("Eviction flag was not set after config set"); diff --git a/src/redismodule.h b/src/redismodule.h index 4aa35190b..8df203aba 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -58,7 +58,7 @@ #define REDISMODULE_HASH_CFIELDS (1<<2) #define REDISMODULE_HASH_EXISTS (1<<3) -/* Context Flags: Info about the current context returned by RM_GetCtxFlags */ +/* Context Flags: Info about the current context returned by RM_GetContextFlags */ /* The command is running in the context of a Lua script */ #define REDISMODULE_CTX_FLAGS_LUA 0x0001 @@ -207,7 +207,7 @@ int REDISMODULE_API_FUNC(RedisModule_HashGet)(RedisModuleKey *key, int flags, .. int REDISMODULE_API_FUNC(RedisModule_IsKeysPositionRequest)(RedisModuleCtx *ctx); void REDISMODULE_API_FUNC(RedisModule_KeyAtPos)(RedisModuleCtx *ctx, int pos); unsigned long long REDISMODULE_API_FUNC(RedisModule_GetClientId)(RedisModuleCtx *ctx); -int REDISMODULE_API_FUNC(RedisModule_GetCtxFlags)(RedisModuleCtx *ctx); +int REDISMODULE_API_FUNC(RedisModule_GetContextFlags)(RedisModuleCtx *ctx); void *REDISMODULE_API_FUNC(RedisModule_PoolAlloc)(RedisModuleCtx *ctx, size_t bytes); RedisModuleType *REDISMODULE_API_FUNC(RedisModule_CreateDataType)(RedisModuleCtx *ctx, const char *name, int encver, RedisModuleTypeMethods *typemethods); int REDISMODULE_API_FUNC(RedisModule_ModuleTypeSetValue)(RedisModuleKey *key, RedisModuleType *mt, void *value); @@ -327,7 +327,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(IsKeysPositionRequest); REDISMODULE_GET_API(KeyAtPos); REDISMODULE_GET_API(GetClientId); - REDISMODULE_GET_API(GetCtxFlags); + REDISMODULE_GET_API(GetContextFlags); REDISMODULE_GET_API(PoolAlloc); REDISMODULE_GET_API(CreateDataType); REDISMODULE_GET_API(ModuleTypeSetValue); From 6f376956204c4841a50910f7469bc41bad25cbba Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Wed, 27 Sep 2017 15:17:53 +0300 Subject: [PATCH 0426/1722] Added safety net preventing redis from crashing if a module decide to block in MULTI --- src/module.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/module.c b/src/module.c index ab5e5518a..ae76ae612 100644 --- a/src/module.c +++ b/src/module.c @@ -3401,14 +3401,16 @@ void unblockClientFromModule(client *c) { RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms) { client *c = ctx->client; int islua = c->flags & CLIENT_LUA; + int ismulti = c->flags & CLIENT_MULTI; c->bpop.module_blocked_handle = zmalloc(sizeof(RedisModuleBlockedClient)); RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle; /* We need to handle the invalid operation of calling modules blocking - * commands from Lua. We actually create an already aborted (client set to - * NULL) blocked client handle, and actually reply to Lua with an error. */ - bc->client = islua ? NULL : c; + * commands from Lua or MULTI. We actually create an already aborted + * (client set to NULL) blocked client handle, and actually reply with + * an error. */ + bc->client = (islua || ismulti) ? NULL : c; bc->module = ctx->module; bc->reply_callback = reply_callback; bc->timeout_callback = timeout_callback; @@ -3419,9 +3421,10 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc bc->dbid = c->db->id; c->bpop.timeout = timeout_ms ? (mstime()+timeout_ms) : 0; - if (islua) { + if (islua || ismulti) { c->bpop.module_blocked_handle = NULL; - addReplyError(c,"Blocking module command called from Lua script"); + addReplyError(c, islua ? "Blocking module command called from Lua script" : + "Blocking module command called from transaction"); } else { blockClient(c,BLOCKED_MODULE); } From 7fb3864e952b6906e47df30a718eda080556b0ba Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Thu, 28 Sep 2017 16:21:21 +0800 Subject: [PATCH 0427/1722] Modules: handle the conflict of registering commands --- src/module.c | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/src/module.c b/src/module.c index fda68b273..4604c0bef 100644 --- a/src/module.c +++ b/src/module.c @@ -615,7 +615,7 @@ int RM_CreateCommand(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc c sds cmdname = sdsnew(name); /* Check if the command name is busy. */ - if (lookupCommand((char*)name) != NULL) { + if (lookupCommand(cmdname) != NULL) { sdsfree(cmdname); return REDISMODULE_ERR; } @@ -3661,6 +3661,28 @@ void moduleFreeModuleStructure(struct RedisModule *module) { zfree(module); } +void moduleUnregisterCommands(struct RedisModule *module) { + /* Unregister all the commands registered by this module. */ + dictIterator *di = dictGetSafeIterator(server.commands); + dictEntry *de; + while ((de = dictNext(di)) != NULL) { + struct redisCommand *cmd = dictGetVal(de); + if (cmd->proc == RedisModuleCommandDispatcher) { + RedisModuleCommandProxy *cp = + (void*)(unsigned long)cmd->getkeys_proc; + sds cmdname = cp->rediscmd->name; + if (cp->module == module) { + dictDelete(server.commands,cmdname); + dictDelete(server.orig_commands,cmdname); + sdsfree(cmdname); + zfree(cp->rediscmd); + zfree(cp); + } + } + } + dictReleaseIterator(di); +} + /* Load a module and initialize it. On success C_OK is returned, otherwise * C_ERR is returned. */ int moduleLoad(const char *path, void **module_argv, int module_argc) { @@ -3681,7 +3703,10 @@ int moduleLoad(const char *path, void **module_argv, int module_argc) { return C_ERR; } if (onload((void*)&ctx,module_argv,module_argc) == REDISMODULE_ERR) { - if (ctx.module) moduleFreeModuleStructure(ctx.module); + if (ctx.module) { + moduleUnregisterCommands(ctx.module); + moduleFreeModuleStructure(ctx.module); + } dlclose(handle); serverLog(LL_WARNING, "Module %s initialization failed. Module not loaded",path); @@ -3715,25 +3740,7 @@ int moduleUnload(sds name) { return REDISMODULE_ERR; } - /* Unregister all the commands registered by this module. */ - dictIterator *di = dictGetSafeIterator(server.commands); - dictEntry *de; - while ((de = dictNext(di)) != NULL) { - struct redisCommand *cmd = dictGetVal(de); - if (cmd->proc == RedisModuleCommandDispatcher) { - RedisModuleCommandProxy *cp = - (void*)(unsigned long)cmd->getkeys_proc; - sds cmdname = cp->rediscmd->name; - if (cp->module == module) { - dictDelete(server.commands,cmdname); - dictDelete(server.orig_commands,cmdname); - sdsfree(cmdname); - zfree(cp->rediscmd); - zfree(cp); - } - } - } - dictReleaseIterator(di); + moduleUnregisterCommands(module); /* Unregister all the hooks. TODO: Yet no hooks support here. */ From f1333eb992b3b864ee9882ef0a0112bf75d6067d Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Thu, 28 Sep 2017 17:38:40 +0800 Subject: [PATCH 0428/1722] Modules: handle the busy module name --- src/module.c | 16 +++++++++++++++- src/redismodule.h | 5 ++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/module.c b/src/module.c index 4604c0bef..2303b00ee 100644 --- a/src/module.c +++ b/src/module.c @@ -650,7 +650,7 @@ int RM_CreateCommand(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc c * * This is an internal function, Redis modules developers don't need * to use it. */ -void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int apiver){ +void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int apiver) { RedisModule *module; if (ctx->module != NULL) return; @@ -662,6 +662,19 @@ void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int api ctx->module = module; } +/* Return non-zero if the module name is busy. + * Otherwise zero is returned. */ +int RM_IsModuleNameBusy(const char *name) { + sds modulename = sdsnew(name); + + /* Check if the module name is busy. */ + if (dictFind(modules,modulename) != NULL) { + sdsfree(modulename); + return 1; + } + return 0; +} + /* Return the current UNIX time in milliseconds. */ long long RM_Milliseconds(void) { return mstime(); @@ -3835,6 +3848,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(Strdup); REGISTER_API(CreateCommand); REGISTER_API(SetModuleAttribs); + REGISTER_API(IsModuleNameBusy); REGISTER_API(WrongArity); REGISTER_API(ReplyWithLongLong); REGISTER_API(ReplyWithError); diff --git a/src/redismodule.h b/src/redismodule.h index 7fc0fec40..8078e9fe3 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -119,7 +119,8 @@ void *REDISMODULE_API_FUNC(RedisModule_Calloc)(size_t nmemb, size_t size); char *REDISMODULE_API_FUNC(RedisModule_Strdup)(const char *str); int REDISMODULE_API_FUNC(RedisModule_GetApi)(const char *, void *); int REDISMODULE_API_FUNC(RedisModule_CreateCommand)(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep); -int REDISMODULE_API_FUNC(RedisModule_SetModuleAttribs)(RedisModuleCtx *ctx, const char *name, int ver, int apiver); +void REDISMODULE_API_FUNC(RedisModule_SetModuleAttribs)(RedisModuleCtx *ctx, const char *name, int ver, int apiver); +int REDISMODULE_API_FUNC(RedisModule_IsModuleNameBusy)(const char *name); int REDISMODULE_API_FUNC(RedisModule_WrongArity)(RedisModuleCtx *ctx); int REDISMODULE_API_FUNC(RedisModule_ReplyWithLongLong)(RedisModuleCtx *ctx, long long ll); int REDISMODULE_API_FUNC(RedisModule_GetSelectedDb)(RedisModuleCtx *ctx); @@ -238,6 +239,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(Strdup); REDISMODULE_GET_API(CreateCommand); REDISMODULE_GET_API(SetModuleAttribs); + REDISMODULE_GET_API(IsModuleNameBusy); REDISMODULE_GET_API(WrongArity); REDISMODULE_GET_API(ReplyWithLongLong); REDISMODULE_GET_API(ReplyWithError); @@ -344,6 +346,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(AbortBlock); #endif + if (RedisModule_IsModuleNameBusy(name)) return REDISMODULE_ERR; RedisModule_SetModuleAttribs(ctx,name,ver,apiver); return REDISMODULE_OK; } From ba954302f646fe9e19a07a76ef3fdb7fff9fa09b Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 28 Sep 2017 23:15:34 +0200 Subject: [PATCH 0429/1722] Limit statement in RM_BlockClient() to 80 cols. --- src/module.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/module.c b/src/module.c index ae76ae612..afeb6e2cf 100644 --- a/src/module.c +++ b/src/module.c @@ -3407,8 +3407,8 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle; /* We need to handle the invalid operation of calling modules blocking - * commands from Lua or MULTI. We actually create an already aborted - * (client set to NULL) blocked client handle, and actually reply with + * commands from Lua or MULTI. We actually create an already aborted + * (client set to NULL) blocked client handle, and actually reply with * an error. */ bc->client = (islua || ismulti) ? NULL : c; bc->module = ctx->module; @@ -3423,8 +3423,9 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc if (islua || ismulti) { c->bpop.module_blocked_handle = NULL; - addReplyError(c, islua ? "Blocking module command called from Lua script" : - "Blocking module command called from transaction"); + addReplyError(c, islua ? + "Blocking module command called from Lua script" : + "Blocking module command called from transaction"); } else { blockClient(c,BLOCKED_MODULE); } From 8eb5823f43841b739646874131f3e8b755a90dae Mon Sep 17 00:00:00 2001 From: rouzier Date: Fri, 13 Oct 2017 13:20:45 -0400 Subject: [PATCH 0430/1722] Fix file descriptor leak and error handling --- src/redis-check-rdb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 4027536e5..71ac50d03 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -193,12 +193,12 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { buf[9] = '\0'; if (memcmp(buf,"REDIS",5) != 0) { rdbCheckError("Wrong signature trying to load DB from file"); - return 1; + goto err; } rdbver = atoi(buf+5); if (rdbver < 1 || rdbver > RDB_VERSION) { rdbCheckError("Can't handle RDB format version %d",rdbver); - return 1; + goto err; } startLoading(fp); @@ -270,7 +270,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { } else { if (!rdbIsObjectType(type)) { rdbCheckError("Invalid object type: %d", type); - return 1; + goto err; } rdbstate.key_type = type; } @@ -307,6 +307,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { rdbCheckInfo("RDB file was saved with checksum disabled: no check performed."); } else if (cksum != expected) { rdbCheckError("RDB CRC error"); + goto err; } else { rdbCheckInfo("Checksum OK"); } @@ -321,6 +322,8 @@ eoferr: /* unexpected end of file is handled here with a fatal exit */ } else { rdbCheckError("Unexpected EOF reading RDB file"); } +err: + if (closefile) fclose(fp); return 1; } From fa36221bf6d903a0ca5d208d871926e4c65e36b7 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 30 Oct 2017 13:20:13 +0100 Subject: [PATCH 0431/1722] More robust object -> double conversion. Certain checks were useless, at the same time certain malformed inputs were accepted without problems (emtpy strings parsed as zero). Cases where strtod() returns ERANGE but we still want to parse the input where ok in getDoubleFromObject() but not in the long variant. As a side effect of these fixes, this commit fixes #4391. --- src/object.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/object.c b/src/object.c index 2565ed59f..188ec0101 100644 --- a/src/object.c +++ b/src/object.c @@ -558,11 +558,11 @@ int getDoubleFromObject(const robj *o, double *target) { if (sdsEncodedObject(o)) { errno = 0; value = strtod(o->ptr, &eptr); - if (isspace(((const char*)o->ptr)[0]) || + if (sdslen(o->ptr) == 0 || + isspace(((const char*)o->ptr)[0]) || eptr[0] != '\0' || (errno == ERANGE && (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) || - errno == EINVAL || isnan(value)) return C_ERR; } else if (o->encoding == OBJ_ENCODING_INT) { @@ -600,8 +600,12 @@ int getLongDoubleFromObject(robj *o, long double *target) { if (sdsEncodedObject(o)) { errno = 0; value = strtold(o->ptr, &eptr); - if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' || - errno == ERANGE || isnan(value)) + if (sdslen(o->ptr) == 0 || + isspace(((const char*)o->ptr)[0]) || + eptr[0] != '\0' || + (errno == ERANGE && + (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) || + isnan(value)) return C_ERR; } else if (o->encoding == OBJ_ENCODING_INT) { value = (long)o->ptr; From a07c0db3af7d6492e63e7b1852900bffa106e8b9 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 30 Oct 2017 13:45:46 +0100 Subject: [PATCH 0432/1722] Regression test for issue #4391. --- tests/unit/type/zset.tcl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index 82f76befe..564825ae9 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -696,6 +696,10 @@ start_server {tags {"zset"}} { } } + test "ZSET commands don't accept the empty strings as valid score" { + assert_error "*not*float*" {r zadd myzset "" abc} + } + proc stressers {encoding} { if {$encoding == "ziplist"} { # Little extra to allow proper fuzzing in the sorting stresser From 401cfa554f314bae8f5e6aa9ed763e28702828fe Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 31 Oct 2017 09:41:22 +0100 Subject: [PATCH 0433/1722] Fix buffer overflows occurring reading redis.conf. There was not enough sanity checking in the code loading the slots of Redis Cluster from the nodes.conf file, this resulted into the attacker's ability to write data at random addresses in the process memory, by manipulating the index of the array. The bug seems exploitable using the following techique: the config file may be altered so that one of the nodes gets, as node ID (which is the first field inside the structure) some data that is actually executable: then by writing this address in selected places, this node ID part can be executed after a jump. So it is mostly just a matter of effort in order to exploit the bug. In practice however the issue is not very critical because the bug requires an unprivileged user to be able to modify the Redis cluster nodes configuration, and at the same time this should result in some gain. However Redis normally is unprivileged as well. Yet much better to have this fixed indeed. Fix #4278. --- src/cluster.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cluster.c b/src/cluster.c index a9fedce0c..2da0f54fc 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -243,6 +243,7 @@ int clusterLoadConfig(char *filename) { *p = '\0'; direction = p[1]; /* Either '>' or '<' */ slot = atoi(argv[j]+1); + if (slot < 0 || slot >= CLUSTER_SLOTS) goto fmterr; p += 3; cn = clusterLookupNode(p); if (!cn) { @@ -262,6 +263,8 @@ int clusterLoadConfig(char *filename) { } else { start = stop = atoi(argv[j]); } + if (start < 0 || start >= CLUSTER_SLOTS) goto fmterr; + if (stop < 0 || stop >= CLUSTER_SLOTS) goto fmterr; while(start <= stop) clusterAddSlot(n, start++); } From 39acc4df0edbe8abf80edddf278cb0df27492db3 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Wed, 1 Nov 2017 17:32:23 +0800 Subject: [PATCH 0434/1722] PSYNC2: safe free backlog when reach the time limit When we free the backlog, we should use a new replication ID and clear the ID2. Since without backlog we can not increment master_repl_offset even do write commands, that may lead to inconsistency when we try to connect a "slave-before" master (if this master is our slave before, our replid equals the master's replid2). As the master have our history, so we can match the master's replid2 and second_replid_offset, that make partial sync work, but the data is inconsistent. --- src/replication.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/replication.c b/src/replication.c index e0b3d910e..fe7b0f739 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2613,6 +2613,18 @@ void replicationCron(void) { time_t idle = server.unixtime - server.repl_no_slaves_since; if (idle > server.repl_backlog_time_limit) { + /* When we free the backlog, we always use a new + * replication ID and clear the ID2. Since without + * backlog we can not increment master_repl_offset + * even do write commands, that may lead to inconsistency + * when we try to connect a "slave-before" master + * (if this master is our slave before, our replid + * equals the master's replid2). As the master have our + * history, so we can match the master's replid2 and + * second_replid_offset, that make partial sync work, + * but the data is inconsistent. */ + changeReplicationId(); + clearReplicationId2(); freeReplicationBacklog(); serverLog(LL_NOTICE, "Replication backlog freed after %d seconds " From 015a0f42b013210d45b874c44660e03099c32bba Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Wed, 1 Nov 2017 17:52:43 +0800 Subject: [PATCH 0435/1722] PSYNC2 & RDB: fix the missing rdbSaveInfo for BGSAVE --- src/rdb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/rdb.c b/src/rdb.c index 125df6071..70b13fb94 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1999,6 +1999,9 @@ void bgsaveCommand(client *c) { } } + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + if (server.rdb_child_pid != -1) { addReplyError(c,"Background save already in progress"); } else if (server.aof_child_pid != -1) { @@ -2011,7 +2014,7 @@ void bgsaveCommand(client *c) { "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever " "possible."); } - } else if (rdbSaveBackground(server.rdb_filename,NULL) == C_OK) { + } else if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK) { addReplyStatus(c,"Background saving started"); } else { addReply(c,shared.err); From db1d277847ee2565135686495770fc726f0a1905 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Thu, 2 Nov 2017 10:45:33 +0800 Subject: [PATCH 0436/1722] PSYNC2: clarify the scenario when repl_stream_db can be -1 --- src/rdb.c | 27 +++++++++++++++++++-------- src/server.c | 3 ++- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 70b13fb94..8de3cd965 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2035,14 +2035,25 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) { *rsi = rsi_init; /* If the instance is a master, we can populate the replication info - * in all the cases, even if sometimes in incomplete (but safe) form. */ - if (!server.masterhost) { - if (server.repl_backlog) rsi->repl_stream_db = server.slaveseldb; - /* Note that if repl_backlog is NULL, it means that histories - * following from this point will trigger a full synchronization - * generating a SELECT statement, so we can leave the currently - * selected DB set to -1. This allows a restarted master to reload - * its replication ID/offset when there are no connected slaves. */ + * only when repl_backlog is not NULL. If the repl_backlog is NULL, + * it means that the instance isn't in any replication chains. In this + * scenario the replication info is useless, because when a slave + * connect to us, the NULL repl_backlog will trigger a full synchronization, + * at the same time we will use a new replid and clear replid2. + * And remember that after free backlog if we reach repl_backlog_time_limit, + * we will use a new replid and clear replid2 too. So there is only one + * scenario which can make repl_stream_db be -1, that is the instance is + * a master, and it have repl_backlog, but server.slaveseldb is -1. */ + if (!server.masterhost && server.repl_backlog) { + rsi->repl_stream_db = server.slaveseldb; + /* Note that server.slaveseldb may be -1, it means that this master + * didn't apply any write commands after a full synchronization, + * so we can leave the currently selected DB set to -1, because the + * next write command must generate a SELECT statement. This allows + * a restarted slave to reload replication ID/offset even the repl_stream_db + * is -1, but we should not do that, because older implementations + * may save a repl_stream_db as -1 in a wrong way. Maybe we can fix + * it in the next release version. */ return rsi; } diff --git a/src/server.c b/src/server.c index 61291fde5..d617d3ece 100644 --- a/src/server.c +++ b/src/server.c @@ -3536,7 +3536,8 @@ void loadDataFromDisk(void) { rsi.repl_id_is_set && rsi.repl_offset != -1 && /* Note that older implementations may save a repl_stream_db - * of -1 inside the RDB file. */ + * of -1 inside the RDB file in a wrong way, see more information + * in function rdbPopulateSaveInfo. */ rsi.repl_stream_db != -1) { memcpy(server.replid,rsi.repl_id,sizeof(server.replid)); From f8d379424e0cabb73717cd78e2055241a115b3ce Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 3 Nov 2017 10:19:27 +0100 Subject: [PATCH 0437/1722] SDS: improve sdsRemoveFreeSpace() to avoid useless data copy. Since SDS v2, we no longer have a single header, so the function to rewrite the SDS in terms of the minimum space required, instead of just using realloc() and let the underlying allocator decide what to do, was doing an allocation + copy every time the minimum possible header needed to represent the string was different than the current one. This could be often a bit wasteful, because if we go, for instance, from the 32 bit fields header to the 16 bit fields header, the overhead of the header is normally very small. With this commit we call realloc instead, unless the change in header size is very significant in relation to the string length. --- src/sds.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/sds.c b/src/sds.c index eafa13c29..ff633c8bc 100644 --- a/src/sds.c +++ b/src/sds.c @@ -248,16 +248,23 @@ sds sdsMakeRoomFor(sds s, size_t addlen) { sds sdsRemoveFreeSpace(sds s) { void *sh, *newsh; char type, oldtype = s[-1] & SDS_TYPE_MASK; - int hdrlen; + int hdrlen, oldhdrlen = sdsHdrSize(oldtype); size_t len = sdslen(s); - sh = (char*)s-sdsHdrSize(oldtype); + sh = (char*)s-oldhdrlen; + /* Check what would be the minimum SDS header that is just good enough to + * fit this string. */ type = sdsReqType(len); hdrlen = sdsHdrSize(type); - if (oldtype==type) { - newsh = s_realloc(sh, hdrlen+len+1); + + /* If the type is the same, or at least a large enough type is still + * required, we just realloc(), letting the allocator to do the copy + * only if really needed. Otherwise if the change is huge, we manually + * reallocate the string to use the different header type. */ + if (oldtype==type || type > SDS_TYPE_8) { + newsh = s_realloc(sh, oldhdrlen+len+1); if (newsh == NULL) return NULL; - s = (char*)newsh+hdrlen; + s = (char*)newsh+oldhdrlen; } else { newsh = s_malloc(hdrlen+len+1); if (newsh == NULL) return NULL; From 50ea8843533d698d420c85364ba7a76d80c6800f Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 6 Nov 2017 12:33:42 +0100 Subject: [PATCH 0438/1722] Fix saving of zero-length lists. Normally in modern Redis you can't create zero-len lists, however it's possible to load them from old RDB files generated, for instance, using Redis 2.8 (see issue #4409). The "Right Thing" would be not loading such lists at all, but this requires to hook in rdb.c random places in a not great way, for a problem that is at this point, at best, minor. Here in this commit instead I just fix the fact that zero length lists, materialized as quicklists with the first node set to NULL, were iterated in the wrong way while they are saved, leading to a crash. The other parts of the list implementation are apparently able to deal with empty lists correctly, even if they are no longer a thing. --- src/rdb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 125df6071..36e4400f4 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -656,7 +656,7 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { if ((n = rdbSaveLen(rdb,ql->len)) == -1) return -1; nwritten += n; - do { + while(node) { if (quicklistNodeIsCompressed(node)) { void *data; size_t compress_len = quicklistGetLzf(node, &data); @@ -666,7 +666,8 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { if ((n = rdbSaveRawString(rdb,node->zl,node->sz)) == -1) return -1; nwritten += n; } - } while ((node = node->next)); + node = node->next; + } } else { serverPanic("Unknown list encoding"); } From 0a8dfc8935da172410b4969a673f93d2a88445c4 Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Wed, 8 Nov 2017 16:08:29 +0200 Subject: [PATCH 0439/1722] Fixes an off-by-one in argument handling of `MEMORY USAGE` Fixes #4430 --- src/object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/object.c b/src/object.c index 188ec0101..d2db7963e 100644 --- a/src/object.c +++ b/src/object.c @@ -1074,7 +1074,7 @@ void memoryCommand(client *c) { if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) == NULL) return; size_t usage = objectComputeSize(o,samples); - usage += sdsAllocSize(c->argv[1]->ptr); + usage += sdsAllocSize(c->argv[2]->ptr); usage += sizeof(dictEntry); addReplyLongLong(c,usage); } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) { From 7d9209e38b401549666cd81e4b5c3a9acf0b58f7 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sun, 19 Nov 2017 16:23:42 +0000 Subject: [PATCH 0440/1722] Fix undefined behavior constant defined. --- src/lzfP.h | 6 +++++- src/setproctitle.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/lzfP.h b/src/lzfP.h index c6d2e096c..93c27b42d 100644 --- a/src/lzfP.h +++ b/src/lzfP.h @@ -79,7 +79,11 @@ * Unconditionally aligning does not cost very much, so do it if unsure */ #ifndef STRICT_ALIGN -# define STRICT_ALIGN !(defined(__i386) || defined (__amd64)) +# if !(defined(__i386) || defined (__amd64)) +# define STRICT_ALIGN 1 +# else +# define STRICT_ALIGN 0 +# endif #endif /* diff --git a/src/setproctitle.c b/src/setproctitle.c index f44253e16..6563242de 100644 --- a/src/setproctitle.c +++ b/src/setproctitle.c @@ -39,7 +39,11 @@ #include /* errno program_invocation_name program_invocation_short_name */ #if !defined(HAVE_SETPROCTITLE) -#define HAVE_SETPROCTITLE (defined __NetBSD__ || defined __FreeBSD__ || defined __OpenBSD__) +#if (defined __NetBSD__ || defined __FreeBSD__ || defined __OpenBSD__) +#define HAVE_SETPROCTITLE 1 +#else +#define HAVE_SETPROCTITLE 0 +#endif #endif From 32a1a4da1e1c621357df92f836bea5f23e1b8283 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Sat, 18 Nov 2017 01:21:39 +0800 Subject: [PATCH 0441/1722] rehash: handle one db until finished --- src/server.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/server.c b/src/server.c index 61291fde5..c9972115a 100644 --- a/src/server.c +++ b/src/server.c @@ -908,12 +908,15 @@ void databasesCron(void) { /* Rehash */ if (server.activerehashing) { for (j = 0; j < dbs_per_call; j++) { - int work_done = incrementallyRehash(rehash_db % server.dbnum); - rehash_db++; + int work_done = incrementallyRehash(rehash_db); if (work_done) { /* If the function did some work, stop here, we'll do * more at the next cron loop. */ break; + } else { + /* If this db didn't need rehash, we'll try the next one. */ + rehash_db++; + rehash_db %= server.dbnum; } } } From 91922131ce68d681fa7185a4acc2fc0e4457d93a Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Tue, 21 Nov 2017 23:35:30 +0800 Subject: [PATCH 0442/1722] expire & latency: fix the missing latency records generated by expire --- src/expire.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/expire.c b/src/expire.c index a02fe566a..81c9e23f5 100644 --- a/src/expire.c +++ b/src/expire.c @@ -103,7 +103,7 @@ void activeExpireCycle(int type) { int j, iteration = 0; int dbs_per_call = CRON_DBS_PER_CALL; - long long start = ustime(), timelimit; + long long start = ustime(), timelimit, elapsed; /* When clients are paused the dataset should be static not just from the * POV of clients not being able to write, but also from the POV of @@ -140,7 +140,7 @@ void activeExpireCycle(int type) { if (type == ACTIVE_EXPIRE_CYCLE_FAST) timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */ - for (j = 0; j < dbs_per_call; j++) { + for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) { int expired; redisDb *db = server.db+(current_db % server.dbnum); @@ -155,6 +155,7 @@ void activeExpireCycle(int type) { unsigned long num, slots; long long now, ttl_sum; int ttl_samples; + iteration++; /* If there is nothing to expire try next DB ASAP. */ if ((num = dictSize(db->expires)) == 0) { @@ -207,18 +208,20 @@ void activeExpireCycle(int type) { /* We can't block forever here even if there are many keys to * expire. So after a given amount of milliseconds return to the * caller waiting for the other active expire cycle. */ - iteration++; if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */ - long long elapsed = ustime()-start; - - latencyAddSampleIfNeeded("expire-cycle",elapsed/1000); - if (elapsed > timelimit) timelimit_exit = 1; + elapsed = ustime()-start; + if (elapsed > timelimit) { + timelimit_exit = 1; + break; + } } - if (timelimit_exit) return; /* We don't repeat the cycle if there are less than 25% of keys * found expired in the current DB. */ } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4); } + + elapsed = ustime()-start; + latencyAddSampleIfNeeded("expire-cycle",elapsed/1000); } /*----------------------------------------------------------------------------- From 7f0a2494ca26ee94c3f4ca55eda7c0f48825b6c1 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Wed, 22 Nov 2017 12:05:30 +0800 Subject: [PATCH 0443/1722] PSYNC2: make repl_stream_db never be -1 it means that after this change all the replication info in RDB is valid, and it can distinguish us from the older version. --- src/rdb.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 8de3cd965..386b6a78f 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2045,15 +2045,12 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) { * scenario which can make repl_stream_db be -1, that is the instance is * a master, and it have repl_backlog, but server.slaveseldb is -1. */ if (!server.masterhost && server.repl_backlog) { - rsi->repl_stream_db = server.slaveseldb; - /* Note that server.slaveseldb may be -1, it means that this master - * didn't apply any write commands after a full synchronization, - * so we can leave the currently selected DB set to -1, because the - * next write command must generate a SELECT statement. This allows - * a restarted slave to reload replication ID/offset even the repl_stream_db - * is -1, but we should not do that, because older implementations - * may save a repl_stream_db as -1 in a wrong way. Maybe we can fix - * it in the next release version. */ + rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb; + /* Note that when server.slaveseldb is -1, it means that this master + * didn't apply any write commands after a full synchronization. + * So we can let repl_stream_db be 0, this allows a restarted slave + * to reload replication ID/offset, it's safe because the next write + * command must generate a SELECT statement. */ return rsi; } From 308fa486ed4ba4673a72e3416cd3f27112ef39b0 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Sat, 4 Nov 2017 23:05:00 +0800 Subject: [PATCH 0444/1722] PSYNC2: persist cached_master's dbid inside the RDB --- src/rdb.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 386b6a78f..a7334449e 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2054,11 +2054,25 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) { return rsi; } - /* If the instance is a slave we need a connected master in order to - * fetch the currently selected DB. */ + /* If the instance is a slave we need a connected master + * in order to fetch the currently selected DB. */ if (server.master) { rsi->repl_stream_db = server.master->db->id; return rsi; } + /* It is useful to persist cached_master's db id inside RDB file. + * When a slave lost master's connection, server.master will be + * cached as server.cached_master, after that a slave can not + * increment the master_repl_offset because slave only apply data + * from connected master, so the cached_master can hold right + * replication info. But please note that this action is safe + * only after we fix the free backlog problem, because when a master + * turn to be a slave, it will use itself as the server.cached_master, + * that is dangerous if we didn't use a new replication ID after + * free backlog. */ + if (server.cached_master) { + rsi->repl_stream_db = server.cached_master->db->id; + return rsi; + } return NULL; } From 3246b9512925c7afdf79e69259f5d406f799f0c5 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Wed, 22 Nov 2017 15:09:30 +0200 Subject: [PATCH 0445/1722] Nested MULTI/EXEC may replicate in different cases. For example: 1. A module command called within a MULTI section. 2. A Lua script with replicate_commands() called within a MULTI section. 3. A module command called from a Lua script in the above context. --- src/module.c | 2 ++ src/scripting.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/src/module.c b/src/module.c index afeb6e2cf..03857ee44 100644 --- a/src/module.c +++ b/src/module.c @@ -1164,6 +1164,8 @@ int RM_ReplyWithDouble(RedisModuleCtx *ctx, double d) { * in the context of a command execution. EXEC will be handled by the * RedisModuleCommandDispatcher() function. */ void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx) { + /* Skip this if client explicitly wrap the command with MULTI */ + if (ctx->client->flags & CLIENT_MULTI) return; /* If we already emitted MULTI return ASAP. */ if (ctx->flags & REDISMODULE_CTX_MULTI_EMITTED) return; /* If this is a thread safe context, we do not want to wrap commands diff --git a/src/scripting.c b/src/scripting.c index 8f8145b2c..d9f954068 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -358,6 +358,13 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { static size_t cached_objects_len[LUA_CMD_OBJCACHE_SIZE]; static int inuse = 0; /* Recursive calls detection. */ + /* Reflect MULTI state */ + if (server.lua_multi_emitted || (server.lua_caller->flags & CLIENT_MULTI)) { + c->flags |= CLIENT_MULTI; + } else { + c->flags &= ~CLIENT_MULTI; + } + /* By using Lua debug hooks it is possible to trigger a recursive call * to luaRedisGenericCommand(), which normally should never happen. * To make this function reentrant is futile and makes it slower, but @@ -535,6 +542,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { * a Lua script in the context of AOF and slaves. */ if (server.lua_replicate_commands && !server.lua_multi_emitted && + !(server.lua_caller->flags & CLIENT_MULTI) && server.lua_write_dirty && server.lua_repl != PROPAGATE_NONE) { From cff7b04fdcc81e3c2b639040a9eca63b657aa188 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Nov 2017 13:05:00 +0100 Subject: [PATCH 0446/1722] Modules: fix for scripting replication of modules commands. See issue #4466 / #4467. --- src/module.c | 11 ++++++----- src/server.c | 5 +++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/module.c b/src/module.c index 03857ee44..83249554e 100644 --- a/src/module.c +++ b/src/module.c @@ -442,9 +442,7 @@ void moduleFreeContext(RedisModuleCtx *ctx) { void moduleHandlePropagationAfterCommandCallback(RedisModuleCtx *ctx) { client *c = ctx->client; - /* We don't want any automatic propagation here since in modules we handle - * replication / AOF propagation in explicit ways. */ - preventCommandPropagation(c); + if (c->flags & CLIENT_LUA) return; /* Handle the replication of the final EXEC, since whatever a command * emits is always wrappered around MULTI/EXEC. */ @@ -1164,8 +1162,9 @@ int RM_ReplyWithDouble(RedisModuleCtx *ctx, double d) { * in the context of a command execution. EXEC will be handled by the * RedisModuleCommandDispatcher() function. */ void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx) { - /* Skip this if client explicitly wrap the command with MULTI */ - if (ctx->client->flags & CLIENT_MULTI) return; + /* Skip this if client explicitly wrap the command with MULTI, or if + * the module command was called by a script. */ + if (ctx->client->flags & (CLIENT_MULTI|CLIENT_LUA)) return; /* If we already emitted MULTI return ASAP. */ if (ctx->flags & REDISMODULE_CTX_MULTI_EMITTED) return; /* If this is a thread safe context, we do not want to wrap commands @@ -1218,6 +1217,7 @@ int RM_Replicate(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) /* Release the argv. */ for (j = 0; j < argc; j++) decrRefCount(argv[j]); zfree(argv); + server.dirty++; return REDISMODULE_OK; } @@ -1236,6 +1236,7 @@ int RM_ReplicateVerbatim(RedisModuleCtx *ctx) { alsoPropagate(ctx->client->cmd,ctx->client->db->id, ctx->client->argv,ctx->client->argc, PROPAGATE_AOF|PROPAGATE_REPL); + server.dirty++; return REDISMODULE_OK; } diff --git a/src/server.c b/src/server.c index c9972115a..69dca456e 100644 --- a/src/server.c +++ b/src/server.c @@ -2265,8 +2265,9 @@ void call(client *c, int flags) { propagate_flags &= ~PROPAGATE_AOF; /* Call propagate() only if at least one of AOF / replication - * propagation is needed. */ - if (propagate_flags != PROPAGATE_NONE) + * propagation is needed. Note that modules commands handle replication + * in an explicit way, so we never replicate them automatically. */ + if (propagate_flags != PROPAGATE_NONE && !(c->cmd->flags & CMD_MODULE)) propagate(c->cmd,c->db->id,c->argv,c->argc,propagate_flags); } From 07e0f0f72fd280529d34aeb93e113303f2d4fb85 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 23 Nov 2017 16:42:15 +0200 Subject: [PATCH 0447/1722] fix string to double conversion, stopped parsing on \0 even if the string has more data. getLongLongFromObject calls string2ll which has this line: /* Return if not all bytes were used. */ so if you pass an sds with 3 characters "1\01" it will fail. but getLongDoubleFromObject calls strtold, and considers it ok if eptr[0]==`\0` i.e. if the end of the string found by strtold ends with null terminator 127.0.0.1:6379> set a 1 OK 127.0.0.1:6379> setrange a 2 2 (integer) 3 127.0.0.1:6379> get a "1\x002" 127.0.0.1:6379> incrbyfloat a 2 "3" 127.0.0.1:6379> get a "3" --- src/object.c | 4 ++-- tests/unit/type/incr.tcl | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index d2db7963e..0950837c8 100644 --- a/src/object.c +++ b/src/object.c @@ -560,7 +560,7 @@ int getDoubleFromObject(const robj *o, double *target) { value = strtod(o->ptr, &eptr); if (sdslen(o->ptr) == 0 || isspace(((const char*)o->ptr)[0]) || - eptr[0] != '\0' || + (size_t)(eptr-(char*)o->ptr) != sdslen(o->ptr) || (errno == ERANGE && (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) || isnan(value)) @@ -602,7 +602,7 @@ int getLongDoubleFromObject(robj *o, long double *target) { value = strtold(o->ptr, &eptr); if (sdslen(o->ptr) == 0 || isspace(((const char*)o->ptr)[0]) || - eptr[0] != '\0' || + (size_t)(eptr-(char*)o->ptr) != sdslen(o->ptr) || (errno == ERANGE && (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) || isnan(value)) diff --git a/tests/unit/type/incr.tcl b/tests/unit/type/incr.tcl index 2287aaae2..a58710d39 100644 --- a/tests/unit/type/incr.tcl +++ b/tests/unit/type/incr.tcl @@ -144,4 +144,11 @@ start_server {tags {"incr"}} { r set foo 1 roundFloat [r incrbyfloat foo -1.1] } {-0.1} + + test {string to double with null terminator} { + r set foo 1 + r setrange foo 2 2 + catch {r incrbyfloat foo 1} err + format $err + } {ERR*valid*} } From 97ca53e8ab21771ab644b5c59df4a3fdea671af6 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 24 Nov 2017 11:08:22 +0100 Subject: [PATCH 0448/1722] PSYNC2: reorganize comments related to recent fixes. Related to PR #4412 and issue #4407. --- src/rdb.c | 27 ++++++++++----------------- src/replication.c | 23 ++++++++++++++--------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index acf3197f0..00106cac4 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2039,19 +2039,16 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) { * only when repl_backlog is not NULL. If the repl_backlog is NULL, * it means that the instance isn't in any replication chains. In this * scenario the replication info is useless, because when a slave - * connect to us, the NULL repl_backlog will trigger a full synchronization, - * at the same time we will use a new replid and clear replid2. - * And remember that after free backlog if we reach repl_backlog_time_limit, - * we will use a new replid and clear replid2 too. So there is only one - * scenario which can make repl_stream_db be -1, that is the instance is - * a master, and it have repl_backlog, but server.slaveseldb is -1. */ + * connects to us, the NULL repl_backlog will trigger a full + * synchronization, at the same time we will use a new replid and clear + * replid2. */ if (!server.masterhost && server.repl_backlog) { - rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb; /* Note that when server.slaveseldb is -1, it means that this master * didn't apply any write commands after a full synchronization. * So we can let repl_stream_db be 0, this allows a restarted slave * to reload replication ID/offset, it's safe because the next write * command must generate a SELECT statement. */ + rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb; return rsi; } @@ -2061,16 +2058,12 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) { rsi->repl_stream_db = server.master->db->id; return rsi; } - /* It is useful to persist cached_master's db id inside RDB file. - * When a slave lost master's connection, server.master will be - * cached as server.cached_master, after that a slave can not - * increment the master_repl_offset because slave only apply data - * from connected master, so the cached_master can hold right - * replication info. But please note that this action is safe - * only after we fix the free backlog problem, because when a master - * turn to be a slave, it will use itself as the server.cached_master, - * that is dangerous if we didn't use a new replication ID after - * free backlog. */ + + /* If we have a cached master we can use it in order to populate the + * replication selected DB info inside the RDB file: the slave can + * increment the master_repl_offset only from data arriving from the + * master, so if we are disconnected the offset in the cached master + * is valid. */ if (server.cached_master) { rsi->repl_stream_db = server.cached_master->db->id; return rsi; diff --git a/src/replication.c b/src/replication.c index fe7b0f739..cf4db3e3a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2614,15 +2614,20 @@ void replicationCron(void) { if (idle > server.repl_backlog_time_limit) { /* When we free the backlog, we always use a new - * replication ID and clear the ID2. Since without - * backlog we can not increment master_repl_offset - * even do write commands, that may lead to inconsistency - * when we try to connect a "slave-before" master - * (if this master is our slave before, our replid - * equals the master's replid2). As the master have our - * history, so we can match the master's replid2 and - * second_replid_offset, that make partial sync work, - * but the data is inconsistent. */ + * replication ID and clear the ID2. This is needed + * because when there is no backlog, the master_repl_offset + * is not updated, but we would still retain our replication + * ID, leading to the following problem: + * + * 1. We are a master instance. + * 2. Our slave is promoted to master. It's repl-id-2 will + * be the same as our repl-id. + * 3. We, yet as master, receive some updates, that will not + * increment the master_repl_offset. + * 4. Later we are turned into a slave, connecto to the new + * master that will accept our PSYNC request by second + * replication ID, but there will be data inconsistency + * because we received writes. */ changeReplicationId(); clearReplicationId2(); freeReplicationBacklog(); From 63f2530a049590b450c11d1ad62cfe0bda58991f Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 24 Nov 2017 13:29:54 +0100 Subject: [PATCH 0449/1722] Modules: fix memory leak in RM_IsModuleNameBusy(). --- src/module.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/module.c b/src/module.c index 7f9402ecf..8a4c40f12 100644 --- a/src/module.c +++ b/src/module.c @@ -664,13 +664,9 @@ void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int api * Otherwise zero is returned. */ int RM_IsModuleNameBusy(const char *name) { sds modulename = sdsnew(name); - - /* Check if the module name is busy. */ - if (dictFind(modules,modulename) != NULL) { - sdsfree(modulename); - return 1; - } - return 0; + dictEntry *de = dictFind(modules,modulename); + sdsfree(modulename); + return de != NULL; } /* Return the current UNIX time in milliseconds. */ From f56b7aaef2192a7d86b4ddcf8d64a071be7a391f Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 24 Nov 2017 18:33:31 +0100 Subject: [PATCH 0450/1722] Test: regression test for latency expire events logging bug. Regression for #4452. --- tests/unit/latency-monitor.tcl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/unit/latency-monitor.tcl b/tests/unit/latency-monitor.tcl index b736cad98..69da13f06 100644 --- a/tests/unit/latency-monitor.tcl +++ b/tests/unit/latency-monitor.tcl @@ -47,4 +47,18 @@ start_server {tags {"latency-monitor"}} { assert {[r latency reset] > 0} assert {[r latency latest] eq {}} } + + test {LATENCY of expire events are correctly collected} { + r config set latency-monitor-threshold 20 + r eval { + local i = 0 + while (i < 1000000) do + redis.call('sadd','mybigkey',i) + i = i+1 + end + } 0 + r pexpire mybigkey 1 + after 500 + assert_match {*expire-cycle*} [r latency latest] + } } From 21479f9ebc311cb3b83f513771f7bb886bb4b415 Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Fri, 24 Nov 2017 19:58:37 +0200 Subject: [PATCH 0451/1722] Prevents `OBJECT freq` with `noeviction` When maxmemory is set to noeviction, idletime is implicitly kept. This renders access frequency nonsensical. --- src/object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index d2db7963e..bbf1ca110 100644 --- a/src/object.c +++ b/src/object.c @@ -1035,8 +1035,8 @@ void objectCommand(client *c) { } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) { if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) == NULL) return; - if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) { - addReplyError(c,"An LRU maxmemory policy is selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust."); + if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) { + addReplyError(c,"A non-LFU maxmemory policy is selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust."); return; } addReplyLongLong(c,o->lru&255); From 4a9fb02acace3a6c5e8497e6d6a8d09dc27ff024 Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Fri, 24 Nov 2017 19:59:05 +0200 Subject: [PATCH 0452/1722] Adds `OBJECT help` --- src/object.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/object.c b/src/object.c index bbf1ca110..4028380d4 100644 --- a/src/object.c +++ b/src/object.c @@ -1012,11 +1012,25 @@ robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) { } /* Object command allows to inspect the internals of an Redis Object. - * Usage: OBJECT */ + * Usage: OBJECT */ void objectCommand(client *c) { robj *o; - if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) { + if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) { + void *blenp = addDeferredMultiBulkLength(c); + int blen = 0; + blen++; addReplyStatus(c, + "OBJECT key. Subcommands:"); + blen++; addReplyStatus(c, + "refcount -- Return the number of references of the value associated with the specified key."); + blen++; addReplyStatus(c, + "encoding -- Return the kind of internal representation used in order to store the value associated with a key."); + blen++; addReplyStatus(c, + "idletime -- Return the number of seconds since the object stored at the specified key is idle."); + blen++; addReplyStatus(c, + "freq -- Return the inverse logarithmic access frequency counter of the object stored at the specified key."); + setDeferredMultiBulkLength(c,blenp,blen); + } else if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) { if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) == NULL) return; addReplyLongLong(c,o->refcount); @@ -1041,7 +1055,8 @@ void objectCommand(client *c) { } addReplyLongLong(c,o->lru&255); } else { - addReplyError(c,"Syntax error. Try OBJECT (refcount|encoding|idletime|freq)"); + addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try OBJECT help", + (char *)c->argv[1]->ptr); } } From 241526af3daef5e3590f487ee1d5d0e1d8be71ac Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Sat, 23 Jul 2016 11:41:20 -0700 Subject: [PATCH 0453/1722] Adds -u option to redis-cli. --- src/redis-cli.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index ca9fe6ad7..84eabf391 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -198,6 +198,92 @@ static sds getDotfilePath(char *envoverride, char *dotfilename) { return dotPath; } +/* URL-style percent decoding. */ +#define isHexChar(c) (isdigit(c) || (c >= 'a' && c <= 'f')) +#define decodeHexChar(c) (isdigit(c) ? c - '0' : c - 'a' + 10) +#define decodeHex(h, l) ((decodeHexChar(h) << 4) + decodeHexChar(l)) + +static sds percentDecode(const char *pe, size_t len) { + const char *end = pe + len; + sds ret = sdsempty(); + const char *curr = pe; + + while (curr < end) { + if (*curr == '%') { + if ((end - curr) < 2) { + fprintf(stderr, "Incomplete URI encoding\n"); + exit(1); + } + + char h = tolower(*(++curr)); + char l = tolower(*(++curr)); + if (!isHexChar(h) || !isHexChar(l)) { + fprintf(stderr, "Illegal character in URI encoding\n"); + exit(1); + } + char c = decodeHex(h, l); + ret = sdscatlen(ret, &c, 1); + curr++; + } else { + ret = sdscatlen(ret, curr++, 1); + } + } + + return ret; +} + +/* Parse a URI and extract the server connection information. + * URI scheme is based on the the provisional specification[1] excluding support + * for query parameters. Valid URIs are: + * scheme: "redis://" + * authority: [ ":"] "@"] [ [":" ]] + * path: ["/" []] + * + * [1]: https://www.iana.org/assignments/uri-schemes/prov/redis */ +static void parseRedisUri(const char *uri) { + + const char *scheme = "redis://"; + const char *curr = uri; + const char *end = uri + strlen(uri); + const char *userinfo, *username, *port, *host, *path; + + /* URI must start with a valid scheme. */ + if (strncasecmp(scheme, curr, strlen(scheme))) { + fprintf(stderr,"Invalid URI scheme\n"); + exit(1); + } + curr += strlen(scheme); + if (curr == end) return; + + /* Extract user info. */ + if ((userinfo = strchr(curr,'@'))) { + if ((username = strchr(curr, ':')) && username < userinfo) { + /* If provided, username is ignored. */ + curr = username + 1; + } + + config.auth = percentDecode(curr, userinfo - curr); + curr = userinfo + 1; + } + if (curr == end) return; + + /* Extract host and port. */ + path = strchr(curr, '/'); + if (*curr != '/') { + host = path ? path - 1 : end; + if ((port = strchr(curr, ':'))) { + config.hostport = atoi(port + 1); + host = port - 1; + } + config.hostip = sdsnewlen(curr, host - curr + 1); + } + curr = path ? path + 1 : end; + if (curr == end) return; + + /* Extract database number. */ + config.dbnum = atoi(curr); +} + /*------------------------------------------------------------------------------ * Help functions *--------------------------------------------------------------------------- */ @@ -1002,6 +1088,8 @@ static int parseOptions(int argc, char **argv) { config.dbnum = atoi(argv[++i]); } else if (!strcmp(argv[i],"-a") && !lastarg) { config.auth = argv[++i]; + } else if (!strcmp(argv[i],"-u") && !lastarg) { + parseRedisUri(argv[++i]); } else if (!strcmp(argv[i],"--raw")) { config.output = OUTPUT_RAW; } else if (!strcmp(argv[i],"--no-raw")) { @@ -1109,6 +1197,7 @@ static void usage(void) { " -p Server port (default: 6379).\n" " -s Server socket (overrides hostname and port).\n" " -a Password to use when connecting to the server.\n" +" -u Server URI.\n" " -r Execute specified command N times.\n" " -i When -r is used, waits seconds per command.\n" " It is possible to specify sub-second times like -i 0.1.\n" From af514a1155bd6cbb40b2a05f9b9403c1cef17e3f Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 27 Nov 2017 13:16:07 +0100 Subject: [PATCH 0454/1722] Fix entry command table entry for OBJECT for HELP option. After #4472 the command may have just 2 arguments. --- src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index 888926267..6bc8bc66f 100644 --- a/src/server.c +++ b/src/server.c @@ -276,7 +276,7 @@ struct redisCommand redisCommandTable[] = { {"readonly",readonlyCommand,1,"F",0,NULL,0,0,0,0,0}, {"readwrite",readwriteCommand,1,"F",0,NULL,0,0,0,0,0}, {"dump",dumpCommand,2,"r",0,NULL,1,1,1,0,0}, - {"object",objectCommand,3,"r",0,NULL,2,2,2,0,0}, + {"object",objectCommand,-2,"r",0,NULL,2,2,2,0,0}, {"memory",memoryCommand,-2,"r",0,NULL,0,0,0,0,0}, {"client",clientCommand,-2,"as",0,NULL,0,0,0,0,0}, {"eval",evalCommand,-3,"s",0,evalGetKeys,0,0,0,0,0}, From d0a716f709d2176aad2addd32fad7be30abb0925 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 27 Nov 2017 18:09:05 +0100 Subject: [PATCH 0455/1722] Improve OBJECT HELP descriptions. See #4472. --- src/object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index e62b5b3d9..8c33d7ef6 100644 --- a/src/object.c +++ b/src/object.c @@ -1026,9 +1026,9 @@ void objectCommand(client *c) { blen++; addReplyStatus(c, "encoding -- Return the kind of internal representation used in order to store the value associated with a key."); blen++; addReplyStatus(c, - "idletime -- Return the number of seconds since the object stored at the specified key is idle."); + "idletime -- Return the idle time of the key, that is the approximated number of seconds elapsed since the last access to the key."); blen++; addReplyStatus(c, - "freq -- Return the inverse logarithmic access frequency counter of the object stored at the specified key."); + "freq -- Return the access frequency index of the key. The returned integer is proportional to the logarithm of the recent access frequency of the key."); setDeferredMultiBulkLength(c,blenp,blen); } else if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) { if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) From 44c2e4cdbfbbbd33728112541ab78d5b7e582c85 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Fri, 13 Oct 2017 10:52:10 +0800 Subject: [PATCH 0456/1722] LFU: fix the missing of config get and rewrite --- src/config.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/config.c b/src/config.c index 06d869be1..50993666d 100644 --- a/src/config.c +++ b/src/config.c @@ -330,13 +330,13 @@ void loadServerConfigFromString(char *config) { } } else if (!strcasecmp(argv[0],"lfu-log-factor") && argc == 2) { server.lfu_log_factor = atoi(argv[1]); - if (server.maxmemory_samples < 0) { + if (server.lfu_log_factor < 0) { err = "lfu-log-factor must be 0 or greater"; goto loaderr; } } else if (!strcasecmp(argv[0],"lfu-decay-time") && argc == 2) { server.lfu_decay_time = atoi(argv[1]); - if (server.maxmemory_samples < 1) { + if (server.lfu_decay_time < 0) { err = "lfu-decay-time must be 0 or greater"; goto loaderr; } @@ -1221,6 +1221,8 @@ void configGetCommand(client *c) { /* Numerical values */ config_get_numerical_field("maxmemory",server.maxmemory); config_get_numerical_field("maxmemory-samples",server.maxmemory_samples); + config_get_numerical_field("lfu-log-factor",server.lfu_log_factor); + config_get_numerical_field("lfu-decay-time",server.lfu_decay_time); config_get_numerical_field("timeout",server.maxidletime); config_get_numerical_field("active-defrag-threshold-lower",server.active_defrag_threshold_lower); config_get_numerical_field("active-defrag-threshold-upper",server.active_defrag_threshold_upper); @@ -1992,6 +1994,8 @@ int rewriteConfig(char *path) { rewriteConfigBytesOption(state,"maxmemory",server.maxmemory,CONFIG_DEFAULT_MAXMEMORY); rewriteConfigEnumOption(state,"maxmemory-policy",server.maxmemory_policy,maxmemory_policy_enum,CONFIG_DEFAULT_MAXMEMORY_POLICY); rewriteConfigNumericalOption(state,"maxmemory-samples",server.maxmemory_samples,CONFIG_DEFAULT_MAXMEMORY_SAMPLES); + rewriteConfigNumericalOption(state,"lfu-log-factor",server.lfu_log_factor,CONFIG_DEFAULT_LFU_LOG_FACTOR); + rewriteConfigNumericalOption(state,"lfu-decay-time",server.lfu_decay_time,CONFIG_DEFAULT_LFU_DECAY_TIME); rewriteConfigNumericalOption(state,"active-defrag-threshold-lower",server.active_defrag_threshold_lower,CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER); rewriteConfigNumericalOption(state,"active-defrag-threshold-upper",server.active_defrag_threshold_upper,CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER); rewriteConfigBytesOption(state,"active-defrag-ignore-bytes",server.active_defrag_ignore_bytes,CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES); From aef50770ba15f9c24bdda4adc275fae33c76bb13 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Fri, 13 Oct 2017 11:09:48 +0800 Subject: [PATCH 0457/1722] LFU: change lfu* parameters to int --- src/evict.c | 2 +- src/server.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/evict.c b/src/evict.c index 5ce5ca07f..0a04ed1bb 100644 --- a/src/evict.c +++ b/src/evict.c @@ -334,7 +334,7 @@ uint8_t LFULogIncr(uint8_t counter) { unsigned long LFUDecrAndReturn(robj *o) { unsigned long ldt = o->lru >> 8; unsigned long counter = o->lru & 255; - if (LFUTimeElapsed(ldt) >= server.lfu_decay_time && counter) { + if (LFUTimeElapsed(ldt) >= (unsigned long)server.lfu_decay_time && counter) { if (counter > LFU_INIT_VAL*2) { counter /= 2; if (counter < LFU_INIT_VAL*2) counter = LFU_INIT_VAL*2; diff --git a/src/server.h b/src/server.h index e3b56075a..aa04344cb 100644 --- a/src/server.h +++ b/src/server.h @@ -1118,8 +1118,8 @@ struct redisServer { unsigned long long maxmemory; /* Max number of memory bytes to use */ int maxmemory_policy; /* Policy for key eviction */ int maxmemory_samples; /* Pricision of random sampling */ - unsigned int lfu_log_factor; /* LFU logarithmic counter factor. */ - unsigned int lfu_decay_time; /* LFU counter decay factor. */ + int lfu_log_factor; /* LFU logarithmic counter factor. */ + int lfu_decay_time; /* LFU counter decay factor. */ /* Blocked clients */ unsigned int bpop_blocked_clients; /* Number of clients blocked by lists */ list *unblocked_clients; /* list of clients to unblock before next loop */ From 660f01011cbdb618f6ee8869e19cee556f1a3c82 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Sun, 15 Oct 2017 20:17:55 +0800 Subject: [PATCH 0458/1722] LFU: do some changes about LFU to find hotkeys Firstly, use access time to replace the decreas time of LFU. For function LFUDecrAndReturn, it should only try to get decremented counter, not update LFU fields, we will update it in an explicit way. And we will times halve the counter according to the times of elapsed time than server.lfu_decay_time. Everytime a key is accessed, we should update the LFU including update access time, and increment the counter after call function LFUDecrAndReturn. If a key is overwritten, the LFU should be also updated. Then we can use `OBJECT freq` command to get a key's frequence, and LFUDecrAndReturn should be called in `OBJECT freq` command in case of the key has not been accessed for a long time, because we update the access time only when the key is read or overwritten. --- src/db.c | 16 +++++++++++++--- src/evict.c | 31 ++++++++++++++++++------------- src/object.c | 8 ++++++-- src/server.h | 3 ++- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/src/db.c b/src/db.c index 71c642d00..4d6999be3 100644 --- a/src/db.c +++ b/src/db.c @@ -38,6 +38,15 @@ * C-level DB API *----------------------------------------------------------------------------*/ +/* Update LFU when an object is accessed. + * Firstly, decrement the counter if the decrement time is reached. + * Then logarithmically increment the counter, and update the access time. */ +void updateLFU(robj *val) { + unsigned long counter = LFUDecrAndReturn(val); + counter = LFULogIncr(counter); + val->lru = (LFUGetTimeInMinutes()<<8) | counter; +} + /* Low level key lookup API, not actually called directly from commands * implementations that should instead rely on lookupKeyRead(), * lookupKeyWrite() and lookupKeyReadWithFlags(). */ @@ -54,9 +63,7 @@ robj *lookupKey(redisDb *db, robj *key, int flags) { !(flags & LOOKUP_NOTOUCH)) { if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { - unsigned long ldt = val->lru >> 8; - unsigned long counter = LFULogIncr(val->lru & 255); - val->lru = (ldt << 8) | counter; + updateLFU(val); } else { val->lru = LRU_CLOCK(); } @@ -180,6 +187,9 @@ void dbOverwrite(redisDb *db, robj *key, robj *val) { int saved_lru = old->lru; dictReplace(db->dict, key->ptr, val); val->lru = saved_lru; + /* LFU should be not only copied but also updated + * when a key is overwritten. */ + updateLFU(val); } else { dictReplace(db->dict, key->ptr, val); } diff --git a/src/evict.c b/src/evict.c index 0a04ed1bb..55b132123 100644 --- a/src/evict.c +++ b/src/evict.c @@ -60,8 +60,6 @@ struct evictionPoolEntry { static struct evictionPoolEntry *EvictionPoolLRU; -unsigned long LFUDecrAndReturn(robj *o); - /* ---------------------------------------------------------------------------- * Implementation of eviction, aging and LRU * --------------------------------------------------------------------------*/ @@ -302,8 +300,8 @@ unsigned long LFUGetTimeInMinutes(void) { return (server.unixtime/60) & 65535; } -/* Given an object last decrement time, compute the minimum number of minutes - * that elapsed since the last decrement. Handle overflow (ldt greater than +/* Given an object last access time, compute the minimum number of minutes + * that elapsed since the last access. Handle overflow (ldt greater than * the current 16 bits minutes time) considering the time as wrapping * exactly once. */ unsigned long LFUTimeElapsed(unsigned long ldt) { @@ -324,24 +322,31 @@ uint8_t LFULogIncr(uint8_t counter) { return counter; } -/* If the object decrement time is reached, decrement the LFU counter and - * update the decrement time field. Return the object frequency counter. +/* If the object decrement time is reached decrement the LFU counter but + * do not update LFU fields of the object, we update the access time + * and counter in an explicit way when the object is really accessed. + * And we will times halve the counter according to the times of + * elapsed time than server.lfu_decay_time. + * Return the object frequency counter. * * This function is used in order to scan the dataset for the best object * to fit: as we check for the candidate, we incrementally decrement the * counter of the scanned objects if needed. */ -#define LFU_DECR_INTERVAL 1 unsigned long LFUDecrAndReturn(robj *o) { unsigned long ldt = o->lru >> 8; unsigned long counter = o->lru & 255; - if (LFUTimeElapsed(ldt) >= (unsigned long)server.lfu_decay_time && counter) { - if (counter > LFU_INIT_VAL*2) { - counter /= 2; - if (counter < LFU_INIT_VAL*2) counter = LFU_INIT_VAL*2; + long halve_times = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0; + if (halve_times > 0 && counter) { + if (halve_times == 1) { + if (counter > LFU_INIT_VAL*2) { + counter /= 2; + if (counter < LFU_INIT_VAL*2) counter = LFU_INIT_VAL*2; + } else { + counter--; + } } else { - counter--; + counter = counter >> halve_times; } - o->lru = (LFUGetTimeInMinutes()<<8) | counter; } return counter; } diff --git a/src/object.c b/src/object.c index 8c33d7ef6..d2f8d53c5 100644 --- a/src/object.c +++ b/src/object.c @@ -1050,10 +1050,14 @@ void objectCommand(client *c) { if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) == NULL) return; if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) { - addReplyError(c,"A non-LFU maxmemory policy is selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust."); + addReplyError(c,"An LFU maxmemory policy is not selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust."); return; } - addReplyLongLong(c,o->lru&255); + /* LFUDecrAndReturn should be called + * in case of the key has not been accessed for a long time, + * because we update the access time only + * when the key is read or overwritten. */ + addReplyLongLong(c,LFUDecrAndReturn(o)); } else { addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try OBJECT help", (char *)c->argv[1]->ptr); diff --git a/src/server.h b/src/server.h index aa04344cb..9b7da1d37 100644 --- a/src/server.h +++ b/src/server.h @@ -586,7 +586,7 @@ typedef struct redisObject { unsigned encoding:4; unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or * LFU data (least significant 8 bits frequency - * and most significant 16 bits decreas time). */ + * and most significant 16 bits access time). */ int refcount; void *ptr; } robj; @@ -1802,6 +1802,7 @@ void evictionPoolAlloc(void); #define LFU_INIT_VAL 5 unsigned long LFUGetTimeInMinutes(void); uint8_t LFULogIncr(uint8_t value); +unsigned long LFUDecrAndReturn(robj *o); /* Keys hashing / comparison functions for dict.c hash tables. */ uint64_t dictSdsHash(const void *key); From 0a76b9bcd8b29024c5b8a119c2a83be7c3d1ce80 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Thu, 19 Oct 2017 14:04:39 +0800 Subject: [PATCH 0459/1722] LFU: add hotkeys option to redis-cli --- src/redis-cli.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 84eabf391..a6f8c1139 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -107,6 +107,7 @@ static struct config { char *pattern; char *rdb_filename; int bigkeys; + int hotkeys; int stdinarg; /* get last arg from stdin. (-x option) */ char *auth; int output; /* output mode, see OUTPUT_* defines */ @@ -1129,6 +1130,8 @@ static int parseOptions(int argc, char **argv) { config.pipe_timeout = atoi(argv[++i]); } else if (!strcmp(argv[i],"--bigkeys")) { config.bigkeys = 1; + } else if (!strcmp(argv[i],"--hotkeys")) { + config.hotkeys = 1; } else if (!strcmp(argv[i],"--eval") && !lastarg) { config.eval = argv[++i]; } else if (!strcmp(argv[i],"--ldb")) { @@ -1229,6 +1232,8 @@ static void usage(void) { " no reply is received within seconds.\n" " Default timeout: %d. Use 0 to wait forever.\n" " --bigkeys Sample Redis keys looking for big keys.\n" +" --hotkeys Sample Redis keys looking for hot keys.\n" +" only works when maxmemory-policy is *lfu.\n" " --scan List all keys using the SCAN command.\n" " --pattern Useful with --scan to specify a SCAN pattern.\n" " --intrinsic-latency Run a test to measure intrinsic system latency.\n" @@ -2343,6 +2348,129 @@ static void findBigKeys(void) { exit(0); } +static void getKeyFreqs(redisReply *keys, unsigned long long *freqs) { + redisReply *reply; + unsigned int i; + + /* Pipeline OBJECT freq commands */ + for(i=0;ielements;i++) { + redisAppendCommand(context, "OBJECT freq %s", keys->element[i]->str); + } + + /* Retrieve freqs */ + for(i=0;ielements;i++) { + if(redisGetReply(context, (void**)&reply)!=REDIS_OK) { + fprintf(stderr, "Error getting freq for key '%s' (%d: %s)\n", + keys->element[i]->str, context->err, context->errstr); + exit(1); + } else if(reply->type != REDIS_REPLY_INTEGER) { + if(reply->type == REDIS_REPLY_ERROR) { + fprintf(stderr, "Error: %s\n", reply->str); + exit(1); + } else { + fprintf(stderr, "Warning: OBJECT freq on '%s' failed (may have been deleted)\n", keys->element[i]->str); + freqs[i] = 0; + } + } else { + freqs[i] = reply->integer; + } + freeReplyObject(reply); + } +} + +#define HOTKEYS_SAMPLE 16 +static void findHotKeys(void) { + redisReply *keys, *reply; + unsigned long long counters[HOTKEYS_SAMPLE] = {0}; + sds hotkeys[HOTKEYS_SAMPLE] = {NULL}; + unsigned long long sampled = 0, total_keys, *freqs = NULL, it = 0; + unsigned int arrsize = 0, i, k; + double pct; + + /* Total keys pre scanning */ + total_keys = getDbSize(); + + /* Status message */ + printf("\n# Scanning the entire keyspace to find hot keys as well as\n"); + printf("# average sizes per key type. You can use -i 0.1 to sleep 0.1 sec\n"); + printf("# per 100 SCAN commands (not usually needed).\n\n"); + + /* SCAN loop */ + do { + /* Calculate approximate percentage completion */ + pct = 100 * (double)sampled/total_keys; + + /* Grab some keys and point to the keys array */ + reply = sendScan(&it); + keys = reply->element[1]; + + /* Reallocate our freqs array if we need to */ + if(keys->elements > arrsize) { + freqs = zrealloc(freqs, sizeof(unsigned long long)*keys->elements); + + if(!freqs) { + fprintf(stderr, "Failed to allocate storage for keys!\n"); + exit(1); + } + + arrsize = keys->elements; + } + + getKeyFreqs(keys, freqs); + + /* Now update our stats */ + for(i=0;ielements;i++) { + sampled++; + /* Update overall progress */ + if(sampled % 1000000 == 0) { + printf("[%05.2f%%] Sampled %llu keys so far\n", pct, sampled); + } + + /* Use eviction pool here */ + k = 0; + while (k < HOTKEYS_SAMPLE && freqs[i] > counters[k]) k++; + if (k == 0) continue; + k--; + if (k == 0 || counters[k] == 0) { + sdsfree(hotkeys[k]); + } else { + sdsfree(hotkeys[0]); + memmove(counters,counters+1,sizeof(counters[0])*k); + memmove(hotkeys,hotkeys+1,sizeof(hotkeys[0])*k); + } + counters[k] = freqs[i]; + hotkeys[k] = sdsnew(keys->element[i]->str); + printf( + "[%05.2f%%] Hot key '%s' found so far with counter %llu\n", + pct, keys->element[i]->str, freqs[i]); + } + + /* Sleep if we've been directed to do so */ + if(sampled && (sampled %100) == 0 && config.interval) { + usleep(config.interval); + } + + freeReplyObject(reply); + } while(it != 0); + + if (freqs) zfree(freqs); + + /* We're done */ + printf("\n-------- summary -------\n\n"); + + printf("Sampled %llu keys in the keyspace!\n", sampled); + + for (i=1; i<= HOTKEYS_SAMPLE; i++) { + k = HOTKEYS_SAMPLE - i; + if(counters[k]>0) { + printf("hot key found with counter: %llu\tkeyname: %s\n", counters[k], hotkeys[k]); + sdsfree(hotkeys[k]); + } + } + + exit(0); +} + /*------------------------------------------------------------------------------ * Stats mode *--------------------------------------------------------------------------- */ @@ -2720,6 +2848,7 @@ int main(int argc, char **argv) { config.pipe_mode = 0; config.pipe_timeout = REDIS_CLI_DEFAULT_PIPE_TIMEOUT; config.bigkeys = 0; + config.hotkeys = 0; config.stdinarg = 0; config.auth = NULL; config.eval = NULL; @@ -2780,6 +2909,12 @@ int main(int argc, char **argv) { findBigKeys(); } + /* Find hot keys */ + if (config.hotkeys) { + if (cliConnect(0) == REDIS_ERR) exit(1); + findHotKeys(); + } + /* Stat mode */ if (config.stat_mode) { if (cliConnect(0) == REDIS_ERR) exit(1); From ac58cbc7646446c2fa352147f1c8c32cd546053b Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 27 Nov 2017 19:04:58 +0100 Subject: [PATCH 0460/1722] LFU: Fix LFUDecrAndReturn() to just decrement. Splitting the popularity in half actually just needs decrementing the counter because the counter is logarithmic. --- src/evict.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/evict.c b/src/evict.c index 55b132123..bf485ddc5 100644 --- a/src/evict.c +++ b/src/evict.c @@ -335,19 +335,9 @@ uint8_t LFULogIncr(uint8_t counter) { unsigned long LFUDecrAndReturn(robj *o) { unsigned long ldt = o->lru >> 8; unsigned long counter = o->lru & 255; - long halve_times = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0; - if (halve_times > 0 && counter) { - if (halve_times == 1) { - if (counter > LFU_INIT_VAL*2) { - counter /= 2; - if (counter < LFU_INIT_VAL*2) counter = LFU_INIT_VAL*2; - } else { - counter--; - } - } else { - counter = counter >> halve_times; - } - } + unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0; + if (num_periods) + counter = (num_periods > counter) ? 0 : counter - num_periods; return counter; } From 5919c4d1e94d2be054d93ced7b0ab74e2689b9d9 Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Tue, 28 Nov 2017 18:18:45 +0200 Subject: [PATCH 0461/1722] Standardizes arity handling of DEBUG --- src/debug.c | 5 ----- src/server.c | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/debug.c b/src/debug.c index 5c3fd3471..a058737c1 100644 --- a/src/debug.c +++ b/src/debug.c @@ -262,11 +262,6 @@ void computeDatasetDigest(unsigned char *final) { } void debugCommand(client *c) { - if (c->argc == 1) { - addReplyError(c,"You must specify a subcommand for DEBUG. Try DEBUG HELP for info."); - return; - } - if (!strcasecmp(c->argv[1]->ptr,"help")) { void *blenp = addDeferredMultiBulkLength(c); int blen = 0; diff --git a/src/server.c b/src/server.c index 6bc8bc66f..30c5297a8 100644 --- a/src/server.c +++ b/src/server.c @@ -258,7 +258,7 @@ struct redisCommand redisCommandTable[] = { {"persist",persistCommand,2,"wF",0,NULL,1,1,1,0,0}, {"slaveof",slaveofCommand,3,"ast",0,NULL,0,0,0,0,0}, {"role",roleCommand,1,"lst",0,NULL,0,0,0,0,0}, - {"debug",debugCommand,-1,"as",0,NULL,0,0,0,0,0}, + {"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0}, {"config",configCommand,-2,"lat",0,NULL,0,0,0,0,0}, {"subscribe",subscribeCommand,-2,"pslt",0,NULL,0,0,0,0,0}, {"unsubscribe",unsubscribeCommand,-1,"pslt",0,NULL,0,0,0,0,0}, From 0cd79860ec900bfde5c96bef36904330f2be50f7 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Nov 2017 18:25:14 +0100 Subject: [PATCH 0462/1722] adlist: fix listJoin() in the case the second list is empty. See #4192, the original PR removed lines of code that are actually needed, so thanks to @chunqiulfq for reporting the problem, but merging solution from @jeesyn after checking, together with @artix75, that the logic covers all the cases. --- src/adlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adlist.c b/src/adlist.c index e87d25cee..ec5f8bbf4 100644 --- a/src/adlist.c +++ b/src/adlist.c @@ -353,7 +353,7 @@ void listJoin(list *l, list *o) { else l->head = o->head; - l->tail = o->tail; + if (o->tail) l->tail = o->tail; l->len += o->len; /* Setup other as an empty list. */ From ff2b31e9a312125abeb24d376066a41b390e608f Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Nov 2017 18:38:49 +0100 Subject: [PATCH 0463/1722] t_hash.c: clarify calling two times the same function. --- src/t_hash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 700a6233a..be73932c5 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -287,8 +287,8 @@ int hashTypeDelete(robj *o, sds field) { if (fptr != NULL) { fptr = ziplistFind(fptr, (unsigned char*)field, sdslen(field), 1); if (fptr != NULL) { - zl = ziplistDelete(zl,&fptr); - zl = ziplistDelete(zl,&fptr); + zl = ziplistDelete(zl,&fptr); /* Delete the key. */ + zl = ziplistDelete(zl,&fptr); /* Delete the value. */ o->ptr = zl; deleted = 1; } From 78aabf66ff3189642e38f27aef48bb6141e301c7 Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Mon, 27 Nov 2017 17:57:44 +0200 Subject: [PATCH 0464/1722] Standardizes the 'help' subcommand This adds a new `addReplyHelp` helper that's used by commands when returning a help text. The following commands have been touched: DEBUG, OBJECT, COMMAND, PUBSUB, SCRIPT and SLOWLOG. WIP Fix entry command table entry for OBJECT for HELP option. After #4472 the command may have just 2 arguments. Improve OBJECT HELP descriptions. See #4472. WIP 2 WIP 3 --- src/debug.c | 68 ++++++++++++++++++------------------------------ src/networking.c | 24 +++++++++++++++++ src/object.c | 24 +++++++---------- src/pubsub.c | 16 +++++++++--- src/scripting.c | 17 ++++++++++-- src/server.c | 16 +++++++++--- src/server.h | 1 + src/slowlog.c | 15 ++++++++--- 8 files changed, 112 insertions(+), 69 deletions(-) diff --git a/src/debug.c b/src/debug.c index 5c3fd3471..9236d806c 100644 --- a/src/debug.c +++ b/src/debug.c @@ -267,48 +267,29 @@ void debugCommand(client *c) { return; } - if (!strcasecmp(c->argv[1]->ptr,"help")) { - void *blenp = addDeferredMultiBulkLength(c); - int blen = 0; - blen++; addReplyStatus(c, - "DEBUG arg arg ... arg. Subcommands:"); - blen++; addReplyStatus(c, - "segfault -- Crash the server with sigsegv."); - blen++; addReplyStatus(c, - "panic -- Crash the server simulating a panic."); - blen++; addReplyStatus(c, - "restart -- Graceful restart: save config, db, restart."); - blen++; addReplyStatus(c, - "crash-and-recovery -- Hard crash and restart after delay."); - blen++; addReplyStatus(c, - "assert -- Crash by assertion failed."); - blen++; addReplyStatus(c, - "reload -- Save the RDB on disk and reload it back in memory."); - blen++; addReplyStatus(c, - "loadaof -- Flush the AOF buffers on disk and reload the AOF in memory."); - blen++; addReplyStatus(c, - "object -- Show low level info about key and associated value."); - blen++; addReplyStatus(c, - "sdslen -- Show low level SDS string info representing key and value."); - blen++; addReplyStatus(c, - "ziplist -- Show low level info about the ziplist encoding."); - blen++; addReplyStatus(c, - "populate [prefix] [size] -- Create string keys named key:. If a prefix is specified is used instead of the 'key' prefix."); - blen++; addReplyStatus(c, - "digest -- Outputs an hex signature representing the current DB content."); - blen++; addReplyStatus(c, - "sleep -- Stop the server for . Decimals allowed."); - blen++; addReplyStatus(c, - "set-active-expire (0|1) -- Setting it to 0 disables expiring keys in background when they are not accessed (otherwise the Redis behavior). Setting it to 1 reenables back the default."); - blen++; addReplyStatus(c, - "lua-always-replicate-commands (0|1) -- Setting it to 1 makes Lua replication defaulting to replicating single commands, without the script having to enable effects replication."); - blen++; addReplyStatus(c, - "error -- Return a Redis protocol error with as message. Useful for clients unit tests to simulate Redis errors."); - blen++; addReplyStatus(c, - "structsize -- Return the size of different Redis core C structures."); - blen++; addReplyStatus(c, - "htstats -- Return hash table statistics of the specified Redis database."); - setDeferredMultiBulkLength(c,blenp,blen); + if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { + const char *help[] = { + "assert -- Crash by assertion failed.", + "crash-and-recovery -- Hard crash and restart after delay.", + "digest -- Outputs an hex signature representing the current DB content.", + "htstats -- Return hash table statistics of the specified Redis database.", + "loadaof -- Flush the AOF buffers on disk and reload the AOF in memory.", + "lua-always-replicate-commands (0|1) -- Setting it to 1 makes Lua replication defaulting to replicating single commands, without the script having to enable effects replication.", + "object -- Show low level info about key and associated value.", + "panic -- Crash the server simulating a panic.", + "populate [prefix] [size] -- Create string keys named key:. If a prefix is specified is used instead of the 'key' prefix.", + "reload -- Save the RDB on disk and reload it back in memory.", + "restart -- Graceful restart: save config, db, restart.", + "sdslen -- Show low level SDS string info representing key and value.", + "segfault -- Crash the server with sigsegv.", + "set-active-expire (0|1) -- Setting it to 0 disables expiring keys in background when they are not accessed (otherwise the Redis behavior). Setting it to 1 reenables back the default.", + "sleep -- Stop the server for . Decimals allowed.", + "structsize -- Return the size of different Redis core C structures.", + "ziplist -- Show low level info about the ziplist encoding.", + "error -- Return a Redis protocol error with as message. Useful for clients unit tests to simulate Redis errors.", + NULL + }; + addReplyHelp(c, help); } else if (!strcasecmp(c->argv[1]->ptr,"segfault")) { *((char*)-1) = 'x'; } else if (!strcasecmp(c->argv[1]->ptr,"panic")) { @@ -550,8 +531,9 @@ void debugCommand(client *c) { addReplyBulkSds(c,stats); } else { - addReplyErrorFormat(c, "Unknown DEBUG subcommand or wrong number of arguments for '%s'", + addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try DEBUG help", (char*)c->argv[1]->ptr); + return; } } diff --git a/src/networking.c b/src/networking.c index aeaeca967..ea8f7d0b5 100644 --- a/src/networking.c +++ b/src/networking.c @@ -584,6 +584,30 @@ void addReplyBulkLongLong(client *c, long long ll) { addReplyBulkCBuffer(c,buf,len); } +/* Add an array of strings as a bulk reply with a heading. + * This function is typically invoked by from commands that support + * subcommands in response to the 'help' subcommand. The help array + * is terminated by NULL sentinel. */ +void addReplyHelp(client *c, const char **help) { + sds cmd = sdsnew((char*) c->argv[0]->ptr); + void *blenp = addDeferredMultiBulkLength(c); + int blen = 0; + int hlen = 0; + + sdstoupper(cmd); + addReplyStatusFormat(c, + "%s arg arg ... arg. Subcommands are:",cmd); + blen++; + sdsfree(cmd); + + while (help[hlen]) { + addReplyStatus(c,help[hlen++]); + blen++; + } + + setDeferredMultiBulkLength(c,blenp,blen); +} + /* Copy 'src' client output buffers into 'dst' client output buffers. * The function takes care of freeing the old output buffers of the * destination client. */ diff --git a/src/object.c b/src/object.c index e62b5b3d9..2b9403a49 100644 --- a/src/object.c +++ b/src/object.c @@ -1016,20 +1016,15 @@ robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) { void objectCommand(client *c) { robj *o; - if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) { - void *blenp = addDeferredMultiBulkLength(c); - int blen = 0; - blen++; addReplyStatus(c, - "OBJECT key. Subcommands:"); - blen++; addReplyStatus(c, - "refcount -- Return the number of references of the value associated with the specified key."); - blen++; addReplyStatus(c, - "encoding -- Return the kind of internal representation used in order to store the value associated with a key."); - blen++; addReplyStatus(c, - "idletime -- Return the number of seconds since the object stored at the specified key is idle."); - blen++; addReplyStatus(c, - "freq -- Return the inverse logarithmic access frequency counter of the object stored at the specified key."); - setDeferredMultiBulkLength(c,blenp,blen); + if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { + const char *help[] = { + "encoding -- Return the kind of internal representation used in order to store the value associated with a key.", + "freq -- Return the access frequency index of the key. The returned integer is proportional to the logarithm of the recent access frequency of the key.", + "idletime -- Return the idle time of the key, that is the approximated number of seconds elapsed since the last access to the key.", + "refcount -- Return the number of references of the value associated with the specified key.", + NULL + }; + addReplyHelp(c, help); } else if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) { if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk)) == NULL) return; @@ -1057,6 +1052,7 @@ void objectCommand(client *c) { } else { addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try OBJECT help", (char *)c->argv[1]->ptr); + return; } } diff --git a/src/pubsub.c b/src/pubsub.c index b6d1167db..8bd6e5d60 100644 --- a/src/pubsub.c +++ b/src/pubsub.c @@ -325,8 +325,16 @@ void publishCommand(client *c) { /* PUBSUB command for Pub/Sub introspection. */ void pubsubCommand(client *c) { - if (!strcasecmp(c->argv[1]->ptr,"channels") && - (c->argc == 2 || c->argc ==3)) + if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { + const char *help[] = { + "channels [] -- Return the currently active channels matching a pattern (default: all).", + "numpat -- Return number of subscriptions to patterns.", + "numsub [channel-1 .. channel-N] -- Returns the number of subscribers for the specified channels (excluding patterns, default: none).", + NULL + }; + addReplyHelp(c, help); + } else if (!strcasecmp(c->argv[1]->ptr,"channels") && + (c->argc == 2 || c->argc == 3)) { /* PUBSUB CHANNELS [] */ sds pat = (c->argc == 2) ? NULL : c->argv[2]->ptr; @@ -364,8 +372,8 @@ void pubsubCommand(client *c) { /* PUBSUB NUMPAT */ addReplyLongLong(c,listLength(server.pubsub_patterns)); } else { - addReplyErrorFormat(c, - "Unknown PUBSUB subcommand or wrong number of arguments for '%s'", + addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try PUBSUB help", (char*)c->argv[1]->ptr); + return; } } diff --git a/src/scripting.c b/src/scripting.c index d9f954068..3c6cc0786 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -1430,7 +1430,17 @@ void evalShaCommand(client *c) { } void scriptCommand(client *c) { - if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"flush")) { + if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { + const char *help[] = { + "debug (yes|sync|no) -- Set the debug mode for subsequent scripts executed.", + "exists sha1 [sha1 ...] -- Return information about the existence of the scripts in the script cache.", + "flush -- Flush the Lua scripts cache.", + "kill -- Kill the currently executing Lua script.", + "load script -- Load a script into the scripts cache, without executing it.", + NULL + }; + addReplyHelp(c, help); + } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"flush")) { scriptingReset(); addReply(c,shared.ok); replicationScriptCacheFlush(); @@ -1489,9 +1499,12 @@ void scriptCommand(client *c) { c->flags |= CLIENT_LUA_DEBUG_SYNC; } else { addReplyError(c,"Use SCRIPT DEBUG yes/sync/no"); + return; } } else { - addReplyError(c, "Unknown SCRIPT subcommand or wrong # of args."); + addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try SCRIPT help", + (char*)c->argv[1]->ptr); + return; } } diff --git a/src/server.c b/src/server.c index 888926267..457cf8f2e 100644 --- a/src/server.c +++ b/src/server.c @@ -276,7 +276,7 @@ struct redisCommand redisCommandTable[] = { {"readonly",readonlyCommand,1,"F",0,NULL,0,0,0,0,0}, {"readwrite",readwriteCommand,1,"F",0,NULL,0,0,0,0,0}, {"dump",dumpCommand,2,"r",0,NULL,1,1,1,0,0}, - {"object",objectCommand,3,"r",0,NULL,2,2,2,0,0}, + {"object",objectCommand,-2,"r",0,NULL,2,2,2,0,0}, {"memory",memoryCommand,-2,"r",0,NULL,0,0,0,0,0}, {"client",clientCommand,-2,"as",0,NULL,0,0,0,0,0}, {"eval",evalCommand,-3,"s",0,evalGetKeys,0,0,0,0,0}, @@ -2733,7 +2733,16 @@ void commandCommand(client *c) { dictIterator *di; dictEntry *de; - if (c->argc == 1) { + if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { + const char *help[] = { + "(no subcommand) -- Return details about all Redis commands.", + "count -- Return the total number of commands in this Redis server.", + "getkeys -- Return the keys from a full Redis command.", + "info [command-name ...] -- Return details about multiple Redis commands.", + NULL + }; + addReplyHelp(c, help); + } else if (c->argc == 1) { addReplyMultiBulkLen(c, dictSize(server.commands)); di = dictGetIterator(server.commands); while ((de = dictNext(di)) != NULL) { @@ -2767,7 +2776,8 @@ void commandCommand(client *c) { for (j = 0; j < numkeys; j++) addReplyBulk(c,c->argv[keys[j]+2]); getKeysFreeResult(keys); } else { - addReplyError(c, "Unknown subcommand or wrong number of arguments."); + addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try COMMAND help", + (char*)c->argv[1]->ptr); return; } } diff --git a/src/server.h b/src/server.h index e3b56075a..9b26221f0 100644 --- a/src/server.h +++ b/src/server.h @@ -1357,6 +1357,7 @@ void addReplyDouble(client *c, double d); void addReplyHumanLongDouble(client *c, long double d); void addReplyLongLong(client *c, long long ll); void addReplyMultiBulkLen(client *c, long length); +void addReplyHelp(client *c, const char **help); void copyClientOutputBuffer(client *dst, client *src); size_t sdsZmallocSize(sds s); size_t getStringObjectSdsUsedMemory(robj *o); diff --git a/src/slowlog.c b/src/slowlog.c index 32ec4374c..9d6f4f02d 100644 --- a/src/slowlog.c +++ b/src/slowlog.c @@ -140,7 +140,15 @@ void slowlogReset(void) { /* The SLOWLOG command. Implements all the subcommands needed to handle the * Redis slow log. */ void slowlogCommand(client *c) { - if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"reset")) { + if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) { + const char *help[] = { + "get [count] -- Return the top entries from the slowlog (default: 10).", + "len -- Return the length of the slowlog.", + "reset -- Reset the slowlog.", + NULL + }; + addReplyHelp(c, help); + } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"reset")) { slowlogReset(); addReply(c,shared.ok); } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"len")) { @@ -177,7 +185,8 @@ void slowlogCommand(client *c) { } setDeferredMultiBulkLength(c,totentries,sent); } else { - addReplyError(c, - "Unknown SLOWLOG subcommand or wrong # of args. Try GET, RESET, LEN."); + addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try SLOWLOG help", + (char*)c->argv[1]->ptr); + return; } } From 22698eef450704c391d8e9884dd44dd4f919ffc5 Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Thu, 30 Nov 2017 00:30:30 +0200 Subject: [PATCH 0465/1722] Check arity in SLOWLOG before accessing arg --- src/slowlog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/slowlog.c b/src/slowlog.c index 9d6f4f02d..525fa8a09 100644 --- a/src/slowlog.c +++ b/src/slowlog.c @@ -140,7 +140,7 @@ void slowlogReset(void) { /* The SLOWLOG command. Implements all the subcommands needed to handle the * Redis slow log. */ void slowlogCommand(client *c) { - if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) { + if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { const char *help[] = { "get [count] -- Return the top entries from the slowlog (default: 10).", "len -- Return the length of the slowlog.", From ba788152030022f34d3df19a18ad69a29cae2e30 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Thu, 30 Nov 2017 10:22:12 +0800 Subject: [PATCH 0466/1722] aof: fix the short write --- src/aof.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/aof.c b/src/aof.c index 0593b2707..1c1884b3d 100644 --- a/src/aof.c +++ b/src/aof.c @@ -266,6 +266,27 @@ int startAppendOnly(void) { return C_OK; } +ssize_t safe_write(int fd, const char *buf, size_t len) { + ssize_t nwritten = 0, totwritten = 0; + + while(len) { + nwritten = write(fd, buf, len); + + if (nwritten < 0) { + if (errno == EINTR) { + continue; + } + return totwritten ? totwritten : -1; + } + + len -= nwritten; + buf += nwritten; + totwritten += nwritten; + } + + return totwritten; +} + /* Write the append only file buffer on disk. * * Since we are required to write the AOF before replying to the client, @@ -323,7 +344,7 @@ void flushAppendOnlyFile(int force) { * or alike */ latencyStartMonitor(latency); - nwritten = write(server.aof_fd,server.aof_buf,sdslen(server.aof_buf)); + nwritten = safe_write(server.aof_fd,server.aof_buf,sdslen(server.aof_buf)); latencyEndMonitor(latency); /* We want to capture different events for delayed writes: * when the delay happens with a pending fsync, or with a saving child From e40beab163fdd7caee44c3115b3e1632dd7424cb Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Thu, 30 Nov 2017 10:27:12 +0800 Subject: [PATCH 0467/1722] aof: cast sdslen to ssize_t --- src/aof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aof.c b/src/aof.c index 1c1884b3d..50abe1d34 100644 --- a/src/aof.c +++ b/src/aof.c @@ -363,7 +363,7 @@ void flushAppendOnlyFile(int force) { /* We performed the write so reset the postponed flush sentinel to zero. */ server.aof_flush_postponed_start = 0; - if (nwritten != (signed)sdslen(server.aof_buf)) { + if (nwritten != (ssize_t)sdslen(server.aof_buf)) { static time_t last_write_error_log = 0; int can_log = 0; From 44e3dc083ad8d75ba5683ca32c873189dba68aa2 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Thu, 30 Nov 2017 18:11:05 +0800 Subject: [PATCH 0468/1722] networking: optimize unlinkClient() in freeClient() --- src/networking.c | 14 ++++++++++---- src/replication.c | 1 + src/server.h | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/networking.c b/src/networking.c index aeaeca967..10e8b48e6 100644 --- a/src/networking.c +++ b/src/networking.c @@ -135,7 +135,12 @@ client *createClient(int fd) { c->peerid = NULL; listSetFreeMethod(c->pubsub_patterns,decrRefCountVoid); listSetMatchMethod(c->pubsub_patterns,listMatchObjects); - if (fd != -1) listAddNodeTail(server.clients,c); + if (fd != -1) { + listAddNodeTail(server.clients,c); + c->client_list_node = listLast(server.clients); + } else { + c->client_list_node = NULL; + } initClientMultiState(c); return c; } @@ -743,9 +748,10 @@ void unlinkClient(client *c) { * fd is already set to -1. */ if (c->fd != -1) { /* Remove from the list of active clients. */ - ln = listSearchKey(server.clients,c); - serverAssert(ln != NULL); - listDelNode(server.clients,ln); + if (c->client_list_node) { + listDelNode(server.clients,c->client_list_node); + c->client_list_node = NULL; + } /* Unregister async I/O handlers and close the socket. */ aeDeleteFileEvent(server.el,c->fd,AE_READABLE); diff --git a/src/replication.c b/src/replication.c index cf4db3e3a..1207e060b 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2206,6 +2206,7 @@ void replicationResurrectCachedMaster(int newfd) { /* Re-add to the list of clients. */ listAddNodeTail(server.clients,server.master); + server.master->client_list_node = listLast(server.clients); if (aeCreateFileEvent(server.el, newfd, AE_READABLE, readQueryFromClient, server.master)) { serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the readable handler: %s", strerror(errno)); diff --git a/src/server.h b/src/server.h index e3b56075a..82cb9a7e0 100644 --- a/src/server.h +++ b/src/server.h @@ -722,6 +722,7 @@ typedef struct client { dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */ list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */ sds peerid; /* Cached peer ID. */ + listNode *client_list_node; /* list node in client list */ /* Response buffer */ int bufpos; From f8d42c5ec6ba401e57804c60dc86e67d5209fa8c Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 30 Nov 2017 18:08:21 +0100 Subject: [PATCH 0469/1722] Be more verbose when DEBUG RESTART fails. --- src/server.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/server.c b/src/server.c index 6bc8bc66f..e56b542a5 100644 --- a/src/server.c +++ b/src/server.c @@ -1549,16 +1549,29 @@ int restartServer(int flags, mstime_t delay) { /* Check if we still have accesses to the executable that started this * server instance. */ - if (access(server.executable,X_OK) == -1) return C_ERR; + if (access(server.executable,X_OK) == -1) { + serverLog(LL_WARNING,"Can't restart: this process has no " + "permissions to execute %s", server.executable); + return C_ERR; + } /* Config rewriting. */ if (flags & RESTART_SERVER_CONFIG_REWRITE && server.configfile && - rewriteConfig(server.configfile) == -1) return C_ERR; + rewriteConfig(server.configfile) == -1) + { + serverLog(LL_WARNING,"Can't restart: configuration rewrite process " + "failed"); + return C_ERR; + } /* Perform a proper shutdown. */ if (flags & RESTART_SERVER_GRACEFULLY && - prepareForShutdown(SHUTDOWN_NOFLAGS) != C_OK) return C_ERR; + prepareForShutdown(SHUTDOWN_NOFLAGS) != C_OK) + { + serverLog(LL_WARNING,"Can't restart: error preparing for shutdown"); + return C_ERR; + } /* Close all file descriptors, with the exception of stdin, stdout, strerr * which are useful if we restart a Redis server which is not daemonized. */ From 7f04efa1a85f7e4b4386c47b916077c6bb94bec8 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 30 Nov 2017 18:30:06 +0100 Subject: [PATCH 0470/1722] Prevent corruption of server.executable after DEBUG RESTART. Doing the following ended with a broken server.executable: 1. Start Redis with src/redis-server 2. Send CONFIG SET DIR /tmp/ 3. Send DEBUG RESTART At this point we called execve with an argv[0] that is no longer related to the new path. So after the restart the absolute path of the executable is recomputed in the wrong way. With this fix we pass the absolute path already computed as argv[0]. --- src/server.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/server.c b/src/server.c index e56b542a5..7498a25fd 100644 --- a/src/server.c +++ b/src/server.c @@ -1583,6 +1583,8 @@ int restartServer(int flags, mstime_t delay) { /* Execute the server with the original command line. */ if (delay) usleep(delay*1000); + zfree(server.exec_argv[0]); + server.exec_argv[0] = zstrdup(server.executable); execve(server.executable,server.exec_argv,environ); /* If an error occurred here, there is nothing we can do, but exit. */ From fbc1d534164f4cceb9ed67ec6fd6340436286a21 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 30 Nov 2017 18:37:07 +0100 Subject: [PATCH 0471/1722] Regression test: Slave restart with EVALSHA in backlog issue #4483. --- tests/integration/psync2.tcl | 65 +++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index d91969e3e..3d9e5527a 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -10,7 +10,7 @@ start_server {} { # Config set debug_msg 0 ; # Enable additional debug messages - set no_exit 0; ; # Do not exit at end of the test + set no_exit 0 ; # Do not exit at end of the test set duration 20 ; # Total test seconds @@ -175,6 +175,69 @@ start_server {} { assert {$sync_count == $new_sync_count} } + test "PSYNC2: Slave RDB restart with EVALSHA in backlog issue #4483" { + # Pick a random slave + set slave_id [expr {($master_id+1)%5}] + set sync_count [status $R($master_id) sync_full] + + # Make sure to replicate the first EVAL while the salve is online + # so that it's part of the scripts the master believes it's safe + # to propagate as EVALSHA. + $R($master_id) EVAL {return redis.call("incr","__mycounter")} 0 + $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0 + + # Wait for the two to sync + wait_for_condition 50 1000 { + [$R($master_id) debug digest] == [$R($slave_id) debug digest] + } else { + fail "Slave not reconnecting" + } + + # Prevent the slave from receiving master updates, and at + # the same time send a new script several times to the + # master, so that we'll end with EVALSHA into the backlog. + $R($slave_id) slaveof 127.0.0.1 0 + + $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0 + $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0 + $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0 + + catch { + $R($slave_id) config rewrite + $R($slave_id) debug restart + } + + # Reconfigure the slave correctly again, when it's back online. + set retry 50 + while {$retry} { + if {[catch { + $R($slave_id) slaveof $master_host $master_port + }]} { + after 1000 + } else { + break + } + incr retry -1 + } + + # The master should be back at 4 slaves eventually + wait_for_condition 50 1000 { + [status $R($master_id) connected_slaves] == 4 + } else { + fail "Slave not reconnecting" + } + set new_sync_count [status $R($master_id) sync_full] + assert {$sync_count == $new_sync_count} + + # However if the slave started with the full state of the + # scripting engine, we should now have the same digest. + wait_for_condition 50 1000 { + [$R($master_id) debug digest] == [$R($slave_id) debug digest] + } else { + fail "Debug digest mismatch between master and slave in post-restart handshake" + } + } + if {$no_exit} { while 1 { puts -nonewline .; flush stdout; after 1000} } From 445d37f24d36d73e8ab7ce140053363ccdaf2447 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Nov 2017 15:09:07 +0100 Subject: [PATCH 0472/1722] PSYNC2: Save Lua scripts state into RDB file. This is currently needed in order to fix #4483, but this can be useful in other contexts, so maybe later we may want to remove the conditionals and always save/load scripts. Note that we are using the "lua" AUX field here, in order to guarantee backward compatibility of the RDB file. The unknown AUX fields must be discarded by past versions of Redis. --- src/rdb.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ src/server.h | 1 + 2 files changed, 48 insertions(+) diff --git a/src/rdb.c b/src/rdb.c index 00106cac4..d1495e79a 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -943,6 +943,27 @@ int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi) { } di = NULL; /* So that we don't release it again on error. */ + /* If we are storing the replication information on disk, persist + * the script cache as well: on successful PSYNC after a restart, we need + * to be able to process any EVALSHA inside the replication backlog the + * master will send us. */ + if (rsi && dictSize(server.lua_scripts)) { + di = dictGetIterator(server.lua_scripts); + while((de = dictNext(di)) != NULL) { + sds sha = dictGetKey(de); + robj *body = dictGetVal(de); + /* Concatenate the SHA1 and the Lua script together. Because the + * SHA1 is fixed length, we will always be able to load it back + * telling apart the name from the body. */ + sds combo = sdsdup(sha); + combo = sdscatlen(combo,body->ptr,sdslen(body->ptr)); + if (rdbSaveAuxField(rdb,"lua",3,combo,sdslen(combo)) == -1) + goto werr; + sdsfree(combo); + } + dictReleaseIterator(di); + } + /* EOF opcode */ if (rdbSaveType(rdb,RDB_OPCODE_EOF) == -1) goto werr; @@ -1589,6 +1610,32 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi) { } } else if (!strcasecmp(auxkey->ptr,"repl-offset")) { if (rsi) rsi->repl_offset = strtoll(auxval->ptr,NULL,10); + } else if (!strcasecmp(auxkey->ptr,"lua")) { + /* Load the string combining the function name and body + * back in memory. The format is basically: + * . To load it back we need + * to create the function name as "f_" and load the + * body as a Redis string object. */ + sds combo = auxval->ptr; + if (sdslen(combo) < 40) { + rdbExitReportCorruptRDB( + "Lua script stored into the RDB file has invalid " + "length < 40 bytes: '%s'", combo); + } + char funcname[42]; + funcname[0] = 'f'; + funcname[1] = '_'; + memcpy(funcname+2,combo,40); + robj *body = createRawStringObject(combo+40,sdslen(combo)-40); + + /* Register the function. */ + if (luaCreateFunction(NULL,server.lua,funcname,body) == C_ERR) { + rdbExitReportCorruptRDB( + "Can't load Lua script from RDB file! " + "Script SHA1: %.42s BODY: %s", + combo, combo+42); + } + decrRefCount(body); } else { /* We ignore fields we don't understand, as by AUX field * contract. */ diff --git a/src/server.h b/src/server.h index 9b7da1d37..11eb36f3d 100644 --- a/src/server.h +++ b/src/server.h @@ -1781,6 +1781,7 @@ void scriptingInit(int setup); int ldbRemoveChild(pid_t pid); void ldbKillForkedSessions(void); int ldbPendingChildren(void); +int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body); /* Blocked clients */ void processUnblockedClients(void); From d14beab331f6312c1e4be5a2697c4c5dd19dd062 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Nov 2017 16:21:14 +0100 Subject: [PATCH 0473/1722] PSYNC2: luaCreateFunction() should handle NULL client parameter. See #4483. This is needed because luaCreateFunction() is now called from RDB loading code outside a client context. --- src/scripting.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/scripting.c b/src/scripting.c index d9f954068..64de1edcd 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -1160,16 +1160,21 @@ int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body) { funcdef = sdscatlen(funcdef,"\nend",4); if (luaL_loadbuffer(lua,funcdef,sdslen(funcdef),"@user_script")) { - addReplyErrorFormat(c,"Error compiling script (new function): %s\n", - lua_tostring(lua,-1)); + if (c != NULL) { + addReplyErrorFormat(c, + "Error compiling script (new function): %s\n", + lua_tostring(lua,-1)); + } lua_pop(lua,1); sdsfree(funcdef); return C_ERR; } sdsfree(funcdef); if (lua_pcall(lua,0,0,0)) { - addReplyErrorFormat(c,"Error running script (new function): %s\n", - lua_tostring(lua,-1)); + if (c != NULL) { + addReplyErrorFormat(c,"Error running script (new function): %s\n", + lua_tostring(lua,-1)); + } lua_pop(lua,1); return C_ERR; } @@ -1180,7 +1185,7 @@ int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body) { { int retval = dictAdd(server.lua_scripts, sdsnewlen(funcname+2,40),body); - serverAssertWithInfo(c,NULL,retval == DICT_OK); + serverAssertWithInfo(c ? c : server.lua_client,NULL,retval == DICT_OK); incrRefCount(body); } return C_OK; From 6051ebea336d3383cb5da85184daebf08e2bd10c Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Nov 2017 16:38:16 +0100 Subject: [PATCH 0474/1722] PSYNC2: just store script bodies into RDB. Related to #4483. As suggested by @soloestoy, we can retrieve the SHA1 from the body. Given that in the new implementation using AUX fields we ended copying around a lot to create new objects and strings, extremize such concept and trade CPU for space inside the RDB file. --- src/rdb.c | 34 ++++------------------------------ src/scripting.c | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index d1495e79a..19ba59ab8 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -950,16 +950,9 @@ int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi) { if (rsi && dictSize(server.lua_scripts)) { di = dictGetIterator(server.lua_scripts); while((de = dictNext(di)) != NULL) { - sds sha = dictGetKey(de); robj *body = dictGetVal(de); - /* Concatenate the SHA1 and the Lua script together. Because the - * SHA1 is fixed length, we will always be able to load it back - * telling apart the name from the body. */ - sds combo = sdsdup(sha); - combo = sdscatlen(combo,body->ptr,sdslen(body->ptr)); - if (rdbSaveAuxField(rdb,"lua",3,combo,sdslen(combo)) == -1) + if (rdbSaveAuxField(rdb,"lua",3,body->ptr,sdslen(body->ptr)) == -1) goto werr; - sdsfree(combo); } dictReleaseIterator(di); } @@ -1611,31 +1604,12 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi) { } else if (!strcasecmp(auxkey->ptr,"repl-offset")) { if (rsi) rsi->repl_offset = strtoll(auxval->ptr,NULL,10); } else if (!strcasecmp(auxkey->ptr,"lua")) { - /* Load the string combining the function name and body - * back in memory. The format is basically: - * . To load it back we need - * to create the function name as "f_" and load the - * body as a Redis string object. */ - sds combo = auxval->ptr; - if (sdslen(combo) < 40) { - rdbExitReportCorruptRDB( - "Lua script stored into the RDB file has invalid " - "length < 40 bytes: '%s'", combo); - } - char funcname[42]; - funcname[0] = 'f'; - funcname[1] = '_'; - memcpy(funcname+2,combo,40); - robj *body = createRawStringObject(combo+40,sdslen(combo)-40); - - /* Register the function. */ - if (luaCreateFunction(NULL,server.lua,funcname,body) == C_ERR) { + /* Load the script back in memory. */ + if (luaCreateFunction(NULL,server.lua,NULL,auxval) == C_ERR) { rdbExitReportCorruptRDB( "Can't load Lua script from RDB file! " - "Script SHA1: %.42s BODY: %s", - combo, combo+42); + "BODY: %s", auxval->ptr); } - decrRefCount(body); } else { /* We ignore fields we don't understand, as by AUX field * contract. */ diff --git a/src/scripting.c b/src/scripting.c index 64de1edcd..1ef91a4de 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -1147,11 +1147,26 @@ int redis_math_randomseed (lua_State *L) { * * f_ * + * If 'funcname' is NULL, the function name is created by the function + * on the fly doing the SHA1 of the body, this means that passing the funcname + * is just an optimization in case it's already at hand. + * + * The function increments the reference count of the 'body' object as a + * side effect of a successful call. + * * On success C_OK is returned, and nothing is left on the Lua stack. * On error C_ERR is returned and an appropriate error is set in the * client context. */ int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body) { sds funcdef = sdsempty(); + char fname[42]; + + if (funcname == NULL) { + fname[0] = 'f'; + fname[1] = '_'; + sha1hex(fname+2,body->ptr,sdslen(body->ptr)); + funcname = fname; + } funcdef = sdscat(funcdef,"function "); funcdef = sdscatlen(funcdef,funcname,42); From 95eaf3aada8819c9556b64dba1c4862d066056ea Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Nov 2017 17:11:46 +0100 Subject: [PATCH 0475/1722] PSYNC2: Fix off by one buffer size in luaCreateFunction(). --- src/scripting.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripting.c b/src/scripting.c index 1ef91a4de..848629e28 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -1159,7 +1159,7 @@ int redis_math_randomseed (lua_State *L) { * client context. */ int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body) { sds funcdef = sdsempty(); - char fname[42]; + char fname[43]; if (funcname == NULL) { fname[0] = 'f'; From 71ad8ff26e3daf24abe5d1608ca631ad9102678b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 30 Aug 2017 12:40:27 +0200 Subject: [PATCH 0476/1722] Streams: 12 commits squashed into the initial Streams implementation. --- src/Makefile | 2 +- src/listpack.c | 783 ++++++++++++++++++++++++++++++++++++++++++ src/listpack.h | 61 ++++ src/listpack_malloc.h | 44 +++ src/object.c | 7 + src/rax.c | 24 +- src/rax.h | 1 + src/rdb.h | 3 +- src/server.c | 2 + src/server.h | 9 + src/stream.h | 21 ++ src/t_stream.c | 376 ++++++++++++++++++++ 12 files changed, 1323 insertions(+), 10 deletions(-) create mode 100644 src/listpack.c create mode 100644 src/listpack.h create mode 100644 src/listpack_malloc.h create mode 100644 src/stream.h create mode 100644 src/t_stream.c diff --git a/src/Makefile b/src/Makefile index 86e0b3fe0..b896b1263 100644 --- a/src/Makefile +++ b/src/Makefile @@ -144,7 +144,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.c REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o REDIS_BENCHMARK_NAME=redis-benchmark diff --git a/src/listpack.c b/src/listpack.c new file mode 100644 index 000000000..e2702b65c --- /dev/null +++ b/src/listpack.c @@ -0,0 +1,783 @@ +/* Listpack -- A lists of strings serialization format + * + * This file implements the specification you can find at: + * + * https://github.com/antirez/listpack + * + * Copyright (c) 2017, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "listpack.h" +#include "listpack_malloc.h" + +#define LP_HDR_SIZE 6 /* 32 bit total len + 16 bit number of elements. */ +#define LP_HDR_NUMELE_UNKNOWN UINT16_MAX +#define LP_MAX_INT_ENCODING_LEN 9 +#define LP_MAX_BACKLEN_SIZE 5 +#define LP_MAX_ENTRY_BACKLEN 34359738367ULL +#define LP_ENCODING_INT 0 +#define LP_ENCODING_STRING 1 + +#define LP_ENCODING_7BIT_UINT 0 +#define LP_ENCODING_7BIT_UINT_MASK 0x80 +#define LP_ENCODING_IS_7BIT_UINT(byte) (((byte)&LP_ENCODING_7BIT_UINT_MASK)==LP_ENCODING_7BIT_UINT) + +#define LP_ENCODING_6BIT_STR 0x80 +#define LP_ENCODING_6BIT_STR_MASK 0xC0 +#define LP_ENCODING_IS_6BIT_STR(byte) (((byte)&LP_ENCODING_6BIT_STR_MASK)==LP_ENCODING_6BIT_STR) + +#define LP_ENCODING_13BIT_INT 0xC0 +#define LP_ENCODING_13BIT_INT_MASK 0xE0 +#define LP_ENCODING_IS_13BIT_INT(byte) (((byte)&LP_ENCODING_13BIT_INT_MASK)==LP_ENCODING_13BIT_INT) + +#define LP_ENCODING_12BIT_STR 0xE0 +#define LP_ENCODING_12BIT_STR_MASK 0xF0 +#define LP_ENCODING_IS_12BIT_STR(byte) (((byte)&LP_ENCODING_12BIT_STR_MASK)==LP_ENCODING_12BIT_STR) + +#define LP_ENCODING_16BIT_INT 0xF1 +#define LP_ENCODING_16BIT_INT_MASK 0xFF +#define LP_ENCODING_IS_16BIT_INT(byte) (((byte)&LP_ENCODING_16BIT_INT_MASK)==LP_ENCODING_16BIT_INT) + +#define LP_ENCODING_24BIT_INT 0xF2 +#define LP_ENCODING_24BIT_INT_MASK 0xFF +#define LP_ENCODING_IS_24BIT_INT(byte) (((byte)&LP_ENCODING_24BIT_INT_MASK)==LP_ENCODING_24BIT_INT) + +#define LP_ENCODING_32BIT_INT 0xF3 +#define LP_ENCODING_32BIT_INT_MASK 0xFF +#define LP_ENCODING_IS_32BIT_INT(byte) (((byte)&LP_ENCODING_32BIT_INT_MASK)==LP_ENCODING_32BIT_INT) + +#define LP_ENCODING_64BIT_INT 0xF4 +#define LP_ENCODING_64BIT_INT_MASK 0xFF +#define LP_ENCODING_IS_64BIT_INT(byte) (((byte)&LP_ENCODING_64BIT_INT_MASK)==LP_ENCODING_64BIT_INT) + +#define LP_ENCODING_32BIT_STR 0xF0 +#define LP_ENCODING_32BIT_STR_MASK 0xFF +#define LP_ENCODING_IS_32BIT_STR(byte) (((byte)&LP_ENCODING_32BIT_STR_MASK)==LP_ENCODING_32BIT_STR) + +#define LP_EOF 0xFF + +#define LP_ENCODING_6BIT_STR_LEN(p) ((p)[0] & 0x3F) +#define LP_ENCODING_12BIT_STR_LEN(p) ((((p)[0] & 0xF) << 8) | (p)[1]) +#define LP_ENCODING_32BIT_STR_LEN(p) (((uint32_t)(p)[1]<<0) | \ + ((uint32_t)(p)[2]<<8) | \ + ((uint32_t)(p)[3]<<16) | \ + ((uint32_t)(p)[4]<<24)) + +#define lpGetTotalBytes(p) (((uint32_t)(p)[0]<<0) | \ + ((uint32_t)(p)[1]<<8) | \ + ((uint32_t)(p)[2]<<16) | \ + ((uint32_t)(p)[3]<<24)) + +#define lpGetNumElements(p) (((uint32_t)(p)[4]<<0) | \ + ((uint32_t)(p)[5]<<8)) +#define lpSetTotalBytes(p,v) do { \ + (p)[0] = (v)&0xff; \ + (p)[1] = ((v)>>8)&0xff; \ + (p)[2] = ((v)>>16)&0xff; \ + (p)[3] = ((v)>>24)&0xff; \ +} while(0) + +#define lpSetNumElements(p,v) do { \ + (p)[4] = (v)&0xff; \ + (p)[5] = ((v)>>8)&0xff; \ +} while(0) + +/* Convert a string into a signed 64 bit integer. + * The function returns 1 if the string could be parsed into a (non-overflowing) + * signed 64 bit int, 0 otherwise. The 'value' will be set to the parsed value + * when the function returns success. + * + * Note that this function demands that the string strictly represents + * a int64 value: no spaces or other characters before or after the string + * representing the number are accepted, nor zeroes at the start if not + * for the string "0" representing the zero number. + * + * Because of its strictness, it is safe to use this function to check if + * you can convert a string into a long long, and obtain back the string + * from the number without any loss in the string representation. * + * + * ----------------------------------------------------------------------------- + * + * Credits: this function was adapted from the Redis source code, file + * "utils.c", function string2ll(), and is copyright: + * + * Copyright(C) 2011, Pieter Noordhuis + * Copyright(C) 2011, Salvatore Sanfilippo + * + * The function is released under the BSD 3-clause license. + */ +int lpStringToInt64(const char *s, unsigned long slen, int64_t *value) { + const char *p = s; + unsigned long plen = 0; + int negative = 0; + uint64_t v; + + if (plen == slen) + return 0; + + /* Special case: first and only digit is 0. */ + if (slen == 1 && p[0] == '0') { + if (value != NULL) *value = 0; + return 1; + } + + if (p[0] == '-') { + negative = 1; + p++; plen++; + + /* Abort on only a negative sign. */ + if (plen == slen) + return 0; + } + + /* First digit should be 1-9, otherwise the string should just be 0. */ + if (p[0] >= '1' && p[0] <= '9') { + v = p[0]-'0'; + p++; plen++; + } else if (p[0] == '0' && slen == 1) { + *value = 0; + return 1; + } else { + return 0; + } + + while (plen < slen && p[0] >= '0' && p[0] <= '9') { + if (v > (UINT64_MAX / 10)) /* Overflow. */ + return 0; + v *= 10; + + if (v > (UINT64_MAX - (p[0]-'0'))) /* Overflow. */ + return 0; + v += p[0]-'0'; + + p++; plen++; + } + + /* Return if not all bytes were used. */ + if (plen < slen) + return 0; + + if (negative) { + if (v > ((uint64_t)(-(INT64_MIN+1))+1)) /* Overflow. */ + return 0; + if (value != NULL) *value = -v; + } else { + if (v > INT64_MAX) /* Overflow. */ + return 0; + if (value != NULL) *value = v; + } + return 1; +} + +/* Create a new, empty listpack. + * On success the new listpack is returned, otherwise an error is returned. */ +unsigned char *lpNew(void) { + unsigned char *lp = lp_malloc(LP_HDR_SIZE+1); + if (lp == NULL) return NULL; + lpSetTotalBytes(lp,LP_HDR_SIZE+1); + lpSetNumElements(lp,0); + lp[LP_HDR_SIZE] = LP_EOF; + return lp; +} + +/* Free the specified listpack. */ +void lpFree(unsigned char *lp) { + lp_free(lp); +} + +/* Given an element 'ele' of size 'size', determine if the element can be + * represented inside the listpack encoded as integer, and returns + * LP_ENCODING_INT if so. Otherwise returns LP_ENCODING_STR if no integer + * encoding is possible. + * + * If the LP_ENCODING_INT is returned, the function stores the integer encoded + * representation of the element in the 'intenc' buffer. + * + * Regardless of the returned encoding, 'enclen' is populated by reference to + * the number of bytes that the string or integer encoded element will require + * in order to be represented. */ +int lpEncodeGetType(unsigned char *ele, uint32_t size, unsigned char *intenc, uint64_t *enclen) { + int64_t v; + if (lpStringToInt64((const char*)ele, size, &v)) { + if (v >= 0 && v <= 127) { + /* Single byte 0-127 integer. */ + intenc[0] = v; + *enclen = 1; + } else if (v >= -4096 && v <= 4095) { + /* 13 bit integer. */ + if (v < 0) v = ((int64_t)1<<13)+v; + intenc[0] = (v>>8)|LP_ENCODING_13BIT_INT; + intenc[1] = v&0xff; + *enclen = 2; + } else if (v >= -32768 && v <= 32767) { + /* 16 bit integer. */ + if (v < 0) v = ((int64_t)1<<16)+v; + intenc[0] = LP_ENCODING_16BIT_INT; + intenc[1] = v&0xff; + intenc[2] = v>>8; + *enclen = 3; + } else if (v >= -8388608 && v <= 8388607) { + /* 24 bit integer. */ + if (v < 0) v = ((int64_t)1<<24)+v; + intenc[0] = LP_ENCODING_24BIT_INT; + intenc[1] = v&0xff; + intenc[2] = (v>>8)&0xff; + intenc[3] = v>>16; + *enclen = 4; + } else if (v >= -2147483648 && v <= 2147483647) { + /* 32 bit integer. */ + if (v < 0) v = ((int64_t)1<<32)+v; + intenc[0] = LP_ENCODING_32BIT_INT; + intenc[1] = v&0xff; + intenc[2] = (v>>8)&0xff; + intenc[3] = (v>>16)&0xff; + intenc[4] = v>>24; + *enclen = 5; + } else { + /* 64 bit integer. */ + uint64_t uv = v; + intenc[0] = LP_ENCODING_64BIT_INT; + intenc[1] = uv&0xff; + intenc[2] = (uv>>8)&0xff; + intenc[3] = (uv>>16)&0xff; + intenc[4] = (uv>>24)&0xff; + intenc[5] = (uv>>32)&0xff; + intenc[6] = (uv>>40)&0xff; + intenc[7] = (uv>>48)&0xff; + intenc[8] = uv>>56; + *enclen = 9; + } + return LP_ENCODING_INT; + } else { + if (size < 64) *enclen = 1+size; + else if (size < 4096) *enclen = 2+size; + else *enclen = 4+size; + return LP_ENCODING_STRING; + } +} + +/* Store a reverse-encoded variable length field, representing the length + * of the previous element of size 'l', in the target buffer 'buf'. + * The function returns the number of bytes used to encode it, from + * 1 to 5. If 'buf' is NULL the funciton just returns the number of bytes + * needed in order to encode the backlen. */ +unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) { + if (l <= 127) { + if (buf) buf[0] = l; + return 1; + } else if (l < 16383) { + if (buf) { + buf[0] = l>>7; + buf[1] = (l&127)|128; + } + return 2; + } else if (l < 2097151) { + if (buf) { + buf[0] = l>>14; + buf[1] = ((l>>7)&127)|128; + buf[2] = (l&127)|128; + } + return 3; + } else if (l < 268435455) { + if (buf) { + buf[0] = l>>21; + buf[1] = ((l>>14)&127)|128; + buf[2] = ((l>>7)&127)|128; + buf[3] = (l&127)|128; + } + return 4; + } else { + if (buf) { + buf[0] = l>>28; + buf[1] = ((l>>21)&127)|128; + buf[2] = ((l>>14)&127)|128; + buf[3] = ((l>>7)&127)|128; + buf[4] = (l&127)|128; + } + return 5; + } +} + +/* Decode the backlen and returns it. If the encoding looks invalid (more than + * 5 bytes are used), UINT64_MAX is returned to report the problem. */ +uint64_t lpDecodeBacklen(unsigned char *p) { + uint64_t val = 0; + uint64_t shift = 0; + do { + val |= (uint64_t)(p[0] & 127) << shift; + if (!(p[0] & 128)) break; + shift += 7; + p--; + if (shift > 28) return UINT64_MAX; + } while(1); + return val; +} + +/* Encode the string element pointed by 's' of size 'len' in the target + * buffer 's'. The function should be called with 'buf' having always enough + * space for encoding the string. This is done by calling lpEncodeGetType() + * before calling this function. */ +void lpEncodeString(unsigned char *buf, unsigned char *s, uint32_t len) { + if (len < 64) { + buf[0] = len | LP_ENCODING_6BIT_STR; + memcpy(buf+1,s,len); + } else if (len < 4096) { + buf[0] = (len >> 8) | LP_ENCODING_12BIT_STR; + buf[1] = len & 0xff; + memcpy(buf+2,s,len); + } else { + buf[0] = LP_ENCODING_32BIT_STR; + buf[1] = len & 0xff; + buf[2] = (len >> 8) & 0xff; + buf[3] = (len >> 16) & 0xff; + buf[4] = (len >> 24) & 0xff; + memcpy(buf+4,s,len); + } +} + +/* Return the encoded length of the listpack element pointed by 'p'. If the + * element encoding is wrong then 0 is returned. */ +uint32_t lpCurrentEncodedSize(unsigned char *p) { + if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1; + if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1+LP_ENCODING_6BIT_STR_LEN(p); + if (LP_ENCODING_IS_13BIT_INT(p[0])) return 2; + if (LP_ENCODING_IS_16BIT_INT(p[0])) return 3; + if (LP_ENCODING_IS_24BIT_INT(p[0])) return 4; + if (LP_ENCODING_IS_32BIT_INT(p[0])) return 5; + if (LP_ENCODING_IS_64BIT_INT(p[0])) return 9; + if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2+LP_ENCODING_12BIT_STR_LEN(p); + if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5+LP_ENCODING_32BIT_STR_LEN(p); + if (p[0] == LP_EOF) return 1; + return 0; +} + +/* Skip the current entry returning the next. It is invalid to call this + * function if the current element is the EOF element at the end of the + * listpack, however, while this function is used to implement lpNext(), + * it does not return NULL when the EOF element is encountered. */ +unsigned char *lpSkip(unsigned char *p) { + unsigned long entrylen = lpCurrentEncodedSize(p); + entrylen += lpEncodeBacklen(NULL,entrylen); + p += entrylen; + return p; +} + +/* If 'p' points to an element of the listpack, calling lpNext() will return + * the pointer to the next element (the one on the right), or NULL if 'p' + * already pointed to the last element of the listpack. */ +unsigned char *lpNext(unsigned char *lp, unsigned char *p) { + ((void) lp); /* lp is not used for now. However lpPrev() uses it. */ + p = lpSkip(p); + if (p[0] == LP_EOF) return NULL; + return p; +} + +/* If 'p' points to an element of the listpack, calling lpPrev() will return + * the pointer to the preivous element (the one on the left), or NULL if 'p' + * already pointed to the first element of the listpack. */ +unsigned char *lpPrev(unsigned char *lp, unsigned char *p) { + if (p-lp == LP_HDR_SIZE) return NULL; + p--; /* Seek the first backlen byte of the last element. */ + uint64_t prevlen = lpDecodeBacklen(p); + prevlen += lpEncodeBacklen(NULL,prevlen); + return p-prevlen+1; /* Seek the first byte of the previous entry. */ +} + +/* Return a pointer to the first element of the listpack, or NULL if the + * listpack has no elements. */ +unsigned char *lpFirst(unsigned char *lp) { + lp += LP_HDR_SIZE; /* Skip the header. */ + if (lp[0] == LP_EOF) return NULL; + return lp; +} + +/* Return a pointer to the last element of the listpack, or NULL if the + * listpack has no elements. */ +unsigned char *lpLast(unsigned char *lp) { + unsigned char *p = lp+lpGetTotalBytes(lp)-1; /* Seek EOF element. */ + return lpPrev(lp,p); /* Will return NULL if EOF is the only element. */ +} + +/* Return the number of elements inside the listpack. This function attempts + * to use the cached value when within range, otherwise a full scan is + * needed. As a side effect of calling this function, the listpack header + * could be modified, because if the count is found to be already within + * the 'numele' header field range, the new value is set. */ +uint32_t lpLength(unsigned char *lp) { + uint32_t numele = lpGetNumElements(lp); + if (numele != LP_HDR_NUMELE_UNKNOWN) return numele; + + /* Too many elements inside the listpack. We need to scan in order + * to get the total number. */ + uint32_t count = 0; + unsigned char *p = lpFirst(lp); + while(p) { + count++; + p = lpNext(lp,p); + } + + /* If the count is again within range of the header numele field, + * set it. */ + if (count < LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp,count); + return count; +} + +/* Return the listpack element pointed by 'p'. + * + * The function changes behavior depending on the passed 'intbuf' value. + * Specifically, if 'intbuf' is NULL: + * + * If the element is internally encoded as an integer, the function returns + * NULL and populates the integer value by reference in 'count'. Otherwise if + * the element is encoded as a string a pointer to the string (pointing inside + * the listpack itself) is returned, and 'count' is set to the length of the + * string. + * + * If instead 'intbuf' points to a buffer passed by the caller, that must be + * at least LP_INTBUF_SIZE bytes, the function always returns the element as + * it was a string (returning the pointer to the string and setting the + * 'count' argument to the string length by reference). However if the element + * is encoded as an integer, the 'intbuf' buffer is used in order to store + * the string representation. + * + * The user should use one or the other form depending on what the value will + * be used for. If there is immediate usage for an integer value returned + * by the function, than to pass a buffer (and convert it back to a number) + * is of course useless. + * + * If the function is called against a badly encoded ziplist, so that there + * is no valid way to parse it, the function returns like if there was an + * integer encoded with value 12345678900000000 + , this may + * be an hint to understand that something is wrong. To crash in this case is + * not sensible because of the different requirements of the application using + * this lib. + * + * Similarly, there is no error returned since the listpack normally can be + * assumed to be valid, so that would be a very high API cost. However a function + * in order to check the integrity of the listpack at load time is provided, + * check lpIsValid(). */ +unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) { + int64_t val; + uint64_t uval, negstart, negmax; + + if (LP_ENCODING_IS_7BIT_UINT(p[0])) { + negstart = UINT64_MAX; /* 7 bit ints are always positive. */ + negmax = 0; + uval = p[0] & 0x7f; + } else if (LP_ENCODING_IS_6BIT_STR(p[0])) { + *count = LP_ENCODING_6BIT_STR_LEN(p); + return p+1; + } else if (LP_ENCODING_IS_13BIT_INT(p[0])) { + uval = ((p[0]&0x1f)<<8) | p[1]; + negstart = (uint64_t)1<<12; + negmax = 8191; + } else if (LP_ENCODING_IS_16BIT_INT(p[0])) { + uval = (uint64_t)p[1] | + (uint64_t)p[2]<<8; + negstart = (uint64_t)1<<15; + negmax = UINT16_MAX; + } else if (LP_ENCODING_IS_24BIT_INT(p[0])) { + uval = (uint64_t)p[1] | + (uint64_t)p[2]<<8 | + (uint64_t)p[3]<<16; + negstart = (uint64_t)1<<23; + negmax = UINT32_MAX>>8; + } else if (LP_ENCODING_IS_32BIT_INT(p[0])) { + uval = (uint64_t)p[1] | + (uint64_t)p[2]<<8 | + (uint64_t)p[3]<<16 | + (uint64_t)p[4]<<24; + negstart = (uint64_t)1<<31; + negmax = UINT32_MAX; + } else if (LP_ENCODING_IS_64BIT_INT(p[0])) { + uval = (uint64_t)p[1] | + (uint64_t)p[2]<<8 | + (uint64_t)p[3]<<16 | + (uint64_t)p[4]<<24 | + (uint64_t)p[5]<<32 | + (uint64_t)p[6]<<40 | + (uint64_t)p[7]<<48 | + (uint64_t)p[8]<<56; + negstart = (uint64_t)1<<63; + negmax = UINT64_MAX; + } else if (LP_ENCODING_IS_12BIT_STR(p[0])) { + *count = LP_ENCODING_12BIT_STR_LEN(p); + return p+2; + } else if (LP_ENCODING_IS_32BIT_STR(p[0])) { + *count = LP_ENCODING_32BIT_STR_LEN(p); + return p+5; + } else { + uval = 12345678900000000ULL + p[0]; + negstart = UINT64_MAX; + negmax = 0; + } + + /* We reach this code path only for integer encodings. + * Convert the unsigned value to the signed one using two's complement + * rule. */ + if (uval >= negstart) { + /* This three steps conversion should avoid undefined behaviors + * in the unsigned -> signed conversion. */ + uval = negmax-uval; + val = uval; + val = -val-1; + } else { + val = uval; + } + + /* Return the string representation of the integer or the value itself + * depending on intbuf being NULL or not. */ + if (intbuf) { + *count = snprintf((char*)intbuf,LP_INTBUF_SIZE,"%lld",val); + return intbuf; + } else { + *count = val; + return NULL; + } +} + +/* Insert, delete or replace the specified element 'ele' of lenght 'len' at + * the specified position 'p', with 'p' being a listpack element pointer + * obtained with lpFirst(), lpLast(), lpIndex(), lpNext(), lpPrev() or + * lpSeek(). + * + * The element is inserted before, after, or replaces the element pointed + * by 'p' depending on the 'where' argument, that can be LP_BEFORE, LP_AFTER + * or LP_REPLACE. + * + * If 'ele' is set to NULL, the function removes the element pointed by 'p' + * instead of inserting one. + * + * Returns NULL on out of memory or when the listpack total length would exceed + * the max allowed size of 2^32-1, otherwise the new pointer to the listpack + * holding the new element is returned (and the old pointer passed is no longer + * considered valid) + * + * If 'newp' is not NULL, at the end of a successful call '*newp' will be set + * to the address of the element just added, so that it will be possible to + * continue an interation with lpNext() and lpPrev(). + * + * For deletion operations ('ele' set to NULL) 'newp' is set to the next + * element, on the right of the deleted one, or to NULL if the deleted element + * was the last one. */ +unsigned char *lpInsert(unsigned char *lp, unsigned char *ele, uint32_t size, unsigned char *p, int where, unsigned char **newp) { + unsigned char intenc[LP_MAX_INT_ENCODING_LEN]; + unsigned char backlen[LP_MAX_BACKLEN_SIZE]; + + uint64_t enclen; /* The length of the encoded element. */ + + /* An element pointer set to NULL means deletion, which is conceptually + * replacing the element with a zero-length element. So whatever we + * get passed as 'where', set it to LP_REPLACE. */ + if (ele == NULL) where = LP_REPLACE; + + /* If we need to insert after the current element, we just jump to the + * next element (that could be the EOF one) and handle the case of + * inserting before. So the function will actually deal with just two + * cases: LP_BEFORE and LP_REPLACE. */ + if (where == LP_AFTER) { + p = lpSkip(p); + where = LP_BEFORE; + } + + /* Store the offset of the element 'p', so that we can obtain its + * address again after a reallocation. */ + unsigned long poff = p-lp; + + /* Calling lpEncodeGetType() results into the encoded version of the + * element to be stored into 'intenc' in case it is representable as + * an integer: in that case, the function returns LP_ENCODING_INT. + * Otherwise if LP_ENCODING_STR is returned, we'll have to call + * lpEncodeString() to actually write the encoded string on place later. + * + * Whatever the returned encoding is, 'enclen' is populated with the + * length of the encoded element. */ + int enctype; + if (ele) { + enctype = lpEncodeGetType(ele,size,intenc,&enclen); + } else { + enctype = -1; + enclen = 0; + } + + /* We need to also encode the backward-parsable length of the element + * and append it to the end: this allows to traverse the listpack from + * the end to the start. */ + unsigned long backlen_size = ele ? lpEncodeBacklen(backlen,enclen) : 0; + uint64_t old_listpack_bytes = lpGetTotalBytes(lp); + uint32_t replaced_len = 0; + if (where == LP_REPLACE) { + replaced_len = lpCurrentEncodedSize(p); + replaced_len += lpEncodeBacklen(NULL,replaced_len); + } + + uint64_t new_listpack_bytes = old_listpack_bytes + enclen + backlen_size + - replaced_len; + if (new_listpack_bytes > UINT32_MAX) return NULL; + + /* We now need to reallocate in order to make space or shrink the + * allocation (in case 'when' value is LP_REPLACE and the new element is + * smaller). However we do that before memmoving the memory to + * make room for the new element if the final allocation will get + * larger, or we do it after if the final allocation will get smaller. */ + + unsigned char *dst = lp + poff; /* May be updated after reallocation. */ + + /* Realloc before: we need more room. */ + if (new_listpack_bytes > old_listpack_bytes) { + if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL; + dst = lp + poff; + } + + /* Setup the listpack relocating the elements to make the exact room + * we need to store the new one. */ + if (where == LP_BEFORE) { + memmove(dst+enclen+backlen_size,dst,old_listpack_bytes-poff); + } else { /* LP_REPLACE. */ + long lendiff = (enclen+backlen_size)-replaced_len; + memmove(dst+replaced_len+lendiff, + dst+replaced_len, + old_listpack_bytes-poff-replaced_len); + } + + /* Realloc after: we need to free space. */ + if (new_listpack_bytes < old_listpack_bytes) { + if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL; + dst = lp + poff; + } + + /* Store the entry. */ + if (newp) { + *newp = dst; + /* In case of deletion, set 'newp' to NULL if the next element is + * the EOF element. */ + if (!ele && dst[0] == LP_EOF) *newp = NULL; + } + if (ele) { + if (enctype == LP_ENCODING_INT) { + memcpy(dst,intenc,enclen); + } else { + lpEncodeString(dst,ele,size); + } + dst += enclen; + memcpy(dst,backlen,backlen_size); + dst += backlen_size; + } + + /* Update header. */ + if (where != LP_REPLACE || ele == NULL) { + uint32_t num_elements = lpGetNumElements(lp); + if (num_elements != LP_HDR_NUMELE_UNKNOWN) { + if (ele) + lpSetNumElements(lp,num_elements+1); + else + lpSetNumElements(lp,num_elements-1); + } + } + lpSetTotalBytes(lp,new_listpack_bytes); + return lp; +} + +/* Append the specified element 'ele' of lenght 'len' at the end of the + * listpack. It is implemented in terms of lpInsert(), so the return value is + * the same as lpInsert(). */ +unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size) { + uint64_t listpack_bytes = lpGetTotalBytes(lp); + unsigned char *eofptr = lp + listpack_bytes - 1; + return lpInsert(lp,ele,size,eofptr,LP_BEFORE,NULL); +} + +/* Remove the element pointed by 'p', and return the resulting listpack. + * If 'newp' is not NULL, the next element pointer (to the right of the + * deleted one) is returned by reference. If the deleted element was the + * last one, '*newp' is set to NULL. */ +unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp) { + return lpInsert(lp,NULL,0,p,LP_REPLACE,newp); +} + +/* Return the total number of bytes the listpack is composed of. */ +uint32_t lpBytes(unsigned char *lp) { + return lpGetTotalBytes(lp); +} + +/* Seek the specified element and returns the pointer to the seeked element. + * Positive indexes specify the zero-based element to seek from the head to + * the tail, negative indexes specify elements starting from the tail, where + * -1 means the last element, -2 the penultimate and so forth. If the index + * is out of range, NULL is returned. */ +unsigned char *lpSeek(unsigned char *lp, long index) { + int forward = 1; /* Seek forward by default. */ + + /* We want to seek from left to right or the other way around + * depending on the listpack length and the element position. + * However if the listpack length cannot be obtained in constant time, + * we always seek from left to right. */ + uint32_t numele = lpGetNumElements(lp); + if (numele != LP_HDR_NUMELE_UNKNOWN) { + if (index < 0) index = (long)numele+index; + if (index < 0) return NULL; /* Index still < 0 means out of range. */ + if (index >= numele) return NULL; /* Out of range the other side. */ + /* We want to scan right-to-left if the element we are looking for + * is past the half of the listpack. */ + if (index > numele/2) { + forward = 0; + /* Left to right scanning always expects a negative index. Convert + * our index to negative form. */ + index -= numele; + } + } else { + /* If the listpack length is unspecified, for negative indexes we + * want to always scan left-to-right. */ + if (index < 0) forward = 0; + } + + /* Forward and backward scanning is trivially based on lpNext()/lpPrev(). */ + if (forward) { + unsigned char *ele = lpFirst(lp); + while (index > 0 && ele) { + ele = lpNext(lp,ele); + index--; + } + return ele; + } else { + unsigned char *ele = lpLast(lp); + while (index < -1 && ele) { + ele = lpPrev(lp,ele); + index++; + } + return ele; + } +} + diff --git a/src/listpack.h b/src/listpack.h new file mode 100644 index 000000000..af67b4b41 --- /dev/null +++ b/src/listpack.h @@ -0,0 +1,61 @@ +/* Listpack -- A lists of strings serialization format + * + * This file implements the specification you can find at: + * + * https://github.com/antirez/listpack + * + * Copyright (c) 2017, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __LISTPACK_H +#define __LISTPACK_H + +#include + +#define LP_INTBUF_SIZE 21 /* 20 digits of -2^63 + 1 null term = 21. */ + +/* lpInsert() where argument possible values: */ +#define LP_BEFORE 0 +#define LP_AFTER 1 +#define LP_REPLACE 2 + +unsigned char *lpNew(void); +void lpFree(unsigned char *lp); +unsigned char *lpInsert(unsigned char *lp, unsigned char *ele, uint32_t size, unsigned char *p, int where, unsigned char **newp); +unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size); +unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp); +uint32_t lpLength(unsigned char *lp); +unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf); +unsigned char *lpFirst(unsigned char *lp); +unsigned char *lpLast(unsigned char *lp); +unsigned char *lpNext(unsigned char *lp, unsigned char *p); +unsigned char *lpPrev(unsigned char *lp, unsigned char *p); +uint32_t lpBytes(unsigned char *lp); +unsigned char *lpSeek(unsigned char *lp, long index); + +#endif diff --git a/src/listpack_malloc.h b/src/listpack_malloc.h new file mode 100644 index 000000000..a3a077fcd --- /dev/null +++ b/src/listpack_malloc.h @@ -0,0 +1,44 @@ +/* Listpack -- A lists of strings serialization format + * https://github.com/antirez/listpack + * + * Copyright (c) 2017, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* Allocator selection. + * + * This file is used in order to change the Rax allocator at compile time. + * Just define the following defines to what you want to use. Also add + * the include of your alternate allocator if needed (not needed in order + * to use the default libc allocator). */ + +#ifndef LISTPACK_ALLOC_H +#define LISTPACK_ALLOC_H +#define lp_malloc malloc +#define lp_realloc realloc +#define lp_free free +#endif diff --git a/src/object.c b/src/object.c index d2f8d53c5..8eeb5c6c1 100644 --- a/src/object.c +++ b/src/object.c @@ -232,6 +232,13 @@ robj *createZsetZiplistObject(void) { return o; } +robj *createStreamObject(void) { + stream *s = streamNew(); + robj *o = createObject(OBJ_STREAM,s); + o->encoding = OBJ_ENCODING_STREAM; + return o; +} + robj *createModuleObject(moduleType *mt, void *value) { moduleValue *mv = zmalloc(sizeof(*mv)); mv->type = mt; diff --git a/src/rax.c b/src/rax.c index dda008dff..b4f5ae05d 100644 --- a/src/rax.c +++ b/src/rax.c @@ -131,7 +131,7 @@ static inline void raxStackFree(raxStack *ts) { } /* ---------------------------------------------------------------------------- - * Radis tree implementation + * Radix tree implementation * --------------------------------------------------------------------------*/ /* Allocate a new non compressed node with the specified number of children. @@ -873,7 +873,8 @@ raxNode *raxRemoveChild(raxNode *parent, raxNode *child) { memmove(((char*)cp)-1,cp,(parent->size-taillen-1)*sizeof(raxNode**)); /* Move the remaining "tail" pointer at the right position as well. */ - memmove(((char*)c)-1,c+1,taillen*sizeof(raxNode**)+parent->iskey*sizeof(void*)); + size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void*) : 0; + memmove(((char*)c)-1,c+1,taillen*sizeof(raxNode**)+valuelen); /* 4. Update size. */ parent->size--; @@ -1175,7 +1176,7 @@ void raxIteratorDelChars(raxIterator *it, size_t count) { * The function returns 1 on success or 0 on out of memory. */ int raxIteratorNextStep(raxIterator *it, int noup) { if (it->flags & RAX_ITER_EOF) { - return 0; + return 1; } else if (it->flags & RAX_ITER_JUST_SEEKED) { it->flags &= ~RAX_ITER_JUST_SEEKED; return 1; @@ -1187,10 +1188,6 @@ int raxIteratorNextStep(raxIterator *it, int noup) { size_t orig_stack_items = it->stack.items; raxNode *orig_node = it->node; - /* Clear the EOF flag: it will be set again if the EOF condition - * is still valid. */ - it->flags &= ~RAX_ITER_EOF; - while(1) { int children = it->node->iscompr ? 1 : it->node->size; if (!noup && children) { @@ -1291,7 +1288,7 @@ int raxSeekGreatest(raxIterator *it) { * effect to the one of raxIteratorPrevSte(). */ int raxIteratorPrevStep(raxIterator *it, int noup) { if (it->flags & RAX_ITER_EOF) { - return 0; + return 1; } else if (it->flags & RAX_ITER_JUST_SEEKED) { it->flags &= ~RAX_ITER_JUST_SEEKED; return 1; @@ -1412,6 +1409,7 @@ int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) { it->node = it->rt->head; if (!raxSeekGreatest(it)) return 0; assert(it->node->iskey); + it->data = raxGetData(it->node); return 1; } @@ -1430,6 +1428,7 @@ int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) { /* We found our node, since the key matches and we have an * "equal" condition. */ if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */ + it->data = raxGetData(it->node); } else if (lt || gt) { /* Exact key not found or eq flag not set. We have to set as current * key the one represented by the node we stopped at, and perform @@ -1502,6 +1501,7 @@ int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) { * the previous sub-tree. */ if (nodechar < keychar) { if (!raxSeekGreatest(it)) return 0; + it->data = raxGetData(it->node); } else { if (!raxIteratorAddChars(it,it->node->data,it->node->size)) return 0; @@ -1647,6 +1647,14 @@ void raxStop(raxIterator *it) { raxStackFree(&it->stack); } +/* Return if the iterator is in an EOF state. This happens when raxSeek() + * failed to seek an appropriate element, so that raxNext() or raxPrev() + * will return zero, or when an EOF condition was reached while iterating + * with raxNext() and raxPrev(). */ +int raxEOF(raxIterator *it) { + return it->flags & RAX_ITER_EOF; +} + /* ----------------------------- Introspection ------------------------------ */ /* This function is mostly used for debugging and learning purposes. diff --git a/src/rax.h b/src/rax.h index 6f91f4c1b..f6985c373 100644 --- a/src/rax.h +++ b/src/rax.h @@ -155,6 +155,7 @@ int raxPrev(raxIterator *it); int raxRandomWalk(raxIterator *it, size_t steps); int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len); void raxStop(raxIterator *it); +int raxEOF(raxIterator *it); void raxShow(rax *rax); #endif diff --git a/src/rdb.h b/src/rdb.h index 62a13f444..bf1150455 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -89,10 +89,11 @@ #define RDB_TYPE_ZSET_ZIPLIST 12 #define RDB_TYPE_HASH_ZIPLIST 13 #define RDB_TYPE_LIST_QUICKLIST 14 +#define RDB_TYPE_STREAM_LISTPACKS 15 /* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */ /* Test if a type is an object type. */ -#define rdbIsObjectType(t) ((t >= 0 && t <= 7) || (t >= 9 && t <= 14)) +#define rdbIsObjectType(t) ((t >= 0 && t <= 7) || (t >= 9 && t <= 15)) /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */ #define RDB_OPCODE_AUX 250 diff --git a/src/server.c b/src/server.c index 7498a25fd..2c3647db6 100644 --- a/src/server.c +++ b/src/server.c @@ -302,6 +302,8 @@ struct redisCommand redisCommandTable[] = { {"pfcount",pfcountCommand,-2,"r",0,NULL,1,-1,1,0,0}, {"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0}, {"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0}, + {"xadd",xaddCommand,-4,"wmF",0,NULL,1,1,1,0,0}, + {"xrange",xrangeCommand,-4,"r",0,NULL,1,1,1,0,0}, {"post",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0}, {"host:",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0}, {"latency",latencyCommand,-2,"aslt",0,NULL,0,0,0,0,0} diff --git a/src/server.h b/src/server.h index 11eb36f3d..38a76d008 100644 --- a/src/server.h +++ b/src/server.h @@ -59,6 +59,7 @@ typedef long long mstime_t; /* millisecond time type. */ #include "anet.h" /* Networking the easy way */ #include "ziplist.h" /* Compact list data structure */ #include "intset.h" /* Compact integer set structure */ +#include "stream.h" /* Stream data type header file. */ #include "version.h" /* Version macro */ #include "util.h" /* Misc functions useful in many places */ #include "latency.h" /* Latency monitor API */ @@ -451,6 +452,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define OBJ_SET 2 #define OBJ_ZSET 3 #define OBJ_HASH 4 +#define OBJ_STREAM 5 /* The "module" object type is a special one that signals that the object * is one directly managed by a Redis module. In this case the value points @@ -575,6 +577,7 @@ typedef struct RedisModuleDigest { #define OBJ_ENCODING_SKIPLIST 7 /* Encoded as skiplist */ #define OBJ_ENCODING_EMBSTR 8 /* Embedded sds string encoding */ #define OBJ_ENCODING_QUICKLIST 9 /* Encoded as linked list of ziplists */ +#define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */ #define LRU_BITS 24 #define LRU_CLOCK_MAX ((1<lru */ @@ -1414,6 +1417,9 @@ void handleClientsBlockedOnLists(void); void popGenericCommand(client *c, int where); void signalListAsReady(redisDb *db, robj *key); +/* Stream data type. */ +stream *streamNew(void); + /* MULTI/EXEC/WATCH... */ void unwatchAllKeys(client *c); void initClientMultiState(client *c); @@ -1455,6 +1461,7 @@ robj *createIntsetObject(void); robj *createHashObject(void); robj *createZsetObject(void); robj *createZsetZiplistObject(void); +robj *createStreamObject(void); robj *createModuleObject(moduleType *mt, void *value); int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg); int checkType(client *c, robj *o, int type); @@ -1992,6 +1999,8 @@ void pfdebugCommand(client *c); void latencyCommand(client *c); void moduleCommand(client *c); void securityWarningCommand(client *c); +void xaddCommand(client *c); +void xrangeCommand(client *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); diff --git a/src/stream.h b/src/stream.h new file mode 100644 index 000000000..065c328eb --- /dev/null +++ b/src/stream.h @@ -0,0 +1,21 @@ +#ifndef STREAM_H +#define STREAM_H + +#include "rax.h" + +/* Stream item ID: a 128 bit number composed of a milliseconds time and + * a sequence counter. IDs generated in the same millisecond (or in a past + * millisecond if the clock jumped backward) will use the millisecond time + * of the latest generated ID and an incremented sequence. */ +typedef struct streamID { + uint64_t ms; /* Unix time in milliseconds. */ + uint64_t seq; /* Sequence number. */ +} streamID; + +typedef struct stream { + rax *rax; /* The radix tree holding the stream. */ + uint64_t length; /* Number of elements inside this stream. */ + streamID last_id; /* Zero if there are yet no items. */ +} stream; + +#endif diff --git a/src/t_stream.c b/src/t_stream.c new file mode 100644 index 000000000..c64f5059f --- /dev/null +++ b/src/t_stream.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2017, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* TODO: + * - After loading a stream, populate the last ID. + */ + +#include "server.h" +#include "listpack.h" +#include "endianconv.h" +#include "stream.h" + +#define STREAM_BYTES_PER_LISTPACK 4096 + +/* ----------------------------------------------------------------------- + * Low level stream encoding: a radix tree of listpacks. + * ----------------------------------------------------------------------- */ + +/* Create a new stream data structure. */ +stream *streamNew(void) { + stream *s = zmalloc(sizeof(*s)); + s->rax = raxNew(); + s->length = 0; + s->last_id.ms = 0; + s->last_id.seq = 0; + return s; +} + +/* Generate the next stream item ID given the previous one. If the current + * milliseconds Unix time is greater than the previous one, just use this + * as time part and start with sequence part of zero. Otherwise we use the + * previous time (and never go backward) and increment the sequence. */ +void streamNextID(streamID *last_id, streamID *new_id) { + uint64_t ms = mstime(); + if (ms > last_id->ms) { + new_id->ms = ms; + new_id->seq = 0; + } else { + new_id->ms = last_id->ms; + new_id->seq = last_id->seq+1; + } +} + +/* This is just a wrapper for lpAppend() to directly use a 64 bit integer + * instead of a string. */ +unsigned char *lpAppendInteger(unsigned char *lp, int64_t value) { + char buf[LONG_STR_SIZE]; + int slen = ll2string(buf,sizeof(buf),value); + return lpAppend(lp,(unsigned char*)buf,slen); +} + +/* This is a wrapper function for lpGet() to directly get an integer value + * from the listpack (that may store numbers as a string), converting + * the string if needed. */ +int64_t lpGetInteger(unsigned char *ele) { + int64_t v; + unsigned char *e = lpGet(ele,&v,NULL); + if (e == NULL) return v; + /* The following code path should never be used for how listpacks work: + * they should always be able to store an int64_t value in integer + * encoded form. However the implementation may change. */ + int retval = string2ll((char*)e,v,&v); + serverAssert(retval != 0); + return v; +} + +/* Convert the specified stream entry ID as a 128 bit big endian number, so + * that the IDs can be sorted lexicographically. */ +void streamEncodeID(void *buf, streamID *id) { + uint64_t e[2]; + e[0] = htonu64(id->ms); + e[1] = htonu64(id->seq); + memcpy(buf,e,sizeof(e)); +} + +/* This is the reverse of streamEncodeID(): the decoded ID will be stored + * in the 'id' structure passed by reference. The buffer 'buf' must point + * to a 128 bit big-endian encoded ID. */ +void streamDecodeID(void *buf, streamID *id) { + uint64_t e[2]; + memcpy(e,buf,sizeof(e)); + id->ms = ntohu64(e[0]); + id->seq = ntohu64(e[1]); +} + +/* Adds a new item into the stream 's' having the specified number of + * field-value pairs as specified in 'numfields' and stored into 'argv'. + * Returns the new entry ID populating the 'added_id' structure. */ +void streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id) { + raxIterator ri; + raxStart(&ri,s->rax); + raxSeek(&ri,"$",NULL,0); + + size_t lp_bytes = 0; /* Total bytes in the tail listpack. */ + unsigned char *lp = NULL; /* Tail listpack pointer. */ + + /* Get a reference to the tail node listpack. */ + if (raxNext(&ri)) { + lp = ri.data; + lp_bytes = lpBytes(lp); + } + raxStop(&ri); + + /* Generate the new entry ID. */ + streamID id; + streamNextID(&s->last_id,&id); + + /* We have to add the key into the radix tree in lexicographic order, + * to do so we consider the ID as a single 128 bit number written in + * big endian, so that the most significant bytes are the first ones. */ + uint64_t rax_key[2]; /* Key in the radix tree containing the listpack.*/ + uint64_t entry_id[2]; /* Entry ID of the new item as 128 bit string. */ + streamEncodeID(entry_id,&id); + + /* Create a new listpack and radix tree node if needed. */ + if (lp == NULL || lp_bytes > STREAM_BYTES_PER_LISTPACK) { + lp = lpNew(); + rax_key[0] = entry_id[0]; + rax_key[1] = entry_id[1]; + raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL); + } else { + serverAssert(ri.key_len == sizeof(rax_key)); + memcpy(rax_key,ri.key,sizeof(rax_key)); + } + + /* Populate the listpack with the new entry. */ + lp = lpAppend(lp,(unsigned char*)entry_id,sizeof(entry_id)); + lp = lpAppendInteger(lp,numfields); + for (int i = 0; i < numfields; i++) { + sds field = argv[i*2]->ptr, value = argv[i*2+1]->ptr; + lp = lpAppend(lp,(unsigned char*)field,sdslen(field)); + lp = lpAppend(lp,(unsigned char*)value,sdslen(value)); + } + + /* Insert back into the tree in order to update the listpack pointer. */ + raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL); + s->length++; + s->last_id = id; + if (added_id) *added_id = id; + raxShow(s->rax); +} + +/* Send the specified range to the client 'c'. The range the client will + * receive is between start and end inclusive, if 'count' is non zero, no more + * than 'count' elemnets are sent. The 'end' pointer can be NULL to mean that + * we want all the elements from 'start' till the end of the stream. */ +size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count) { + void *arraylen_ptr = addDeferredMultiBulkLength(c); + size_t arraylen = 0; + + /* Seek the radix tree node that contains our start item. */ + uint64_t key[2]; + uint64_t end_key[2]; + streamEncodeID(key,start); + if (end) streamEncodeID(end_key,end); + raxIterator ri; + raxStart(&ri,s->rax); + + /* Seek the correct node in the radix tree. */ + if (start->ms || start->seq) { + raxSeek(&ri,"<=",(unsigned char*)key,sizeof(key)); + if (raxEOF(&ri)) raxSeek(&ri,">",(unsigned char*)key,sizeof(key)); + } else { + raxSeek(&ri,"^",NULL,0); + } + + /* For every radix tree node, iterate the corresponding listpack, + * returning elmeents when they are within range. */ + while (raxNext(&ri)) { + serverAssert(ri.key_len == sizeof(key)); + unsigned char *lp = ri.data; + unsigned char *lp_ele = lpFirst(lp); + while(lp_ele) { + int64_t e_len; + unsigned char buf[LP_INTBUF_SIZE]; + unsigned char *e = lpGet(lp_ele,&e_len,buf); + serverAssert(e_len == sizeof(streamID)); + + /* Seek next field: number of elements. */ + lp_ele = lpNext(lp,lp_ele); + if (memcmp(e,key,sizeof(key)) >= 0) { /* If current >= start */ + if (end && memcmp(e,end_key,sizeof(key)) > 0) { + break; /* We are already out of range. */ + } + streamID thisid; + streamDecodeID(e,&thisid); + sds replyid = sdscatfmt(sdsempty(),"+%U.%U\r\n", + thisid.ms,thisid.seq); + + /* Emit this stream entry in the client output. */ + addReplyMultiBulkLen(c,2); + addReplySds(c,replyid); + int64_t numfields = lpGetInteger(lp_ele); + lp_ele = lpNext(lp,lp_ele); + addReplyMultiBulkLen(c,numfields*2); + for (int64_t i = 0; i < numfields; i++) { + /* Emit two items (key-value) per iteration. */ + for (int k = 0; k < 2; k++) { + e = lpGet(lp_ele,&e_len,buf); + addReplyBulkCBuffer(c,e,e_len); + lp_ele = lpNext(lp,lp_ele); + } + } + + arraylen++; + if (count && count == arraylen) break; + } else { + /* If we do not emit, we have to discard. */ + int64_t numfields = lpGetInteger(lp_ele); + lp_ele = lpNext(lp,lp_ele); + for (int64_t i = 0; i < numfields*2; i++) + lp_ele = lpNext(lp,lp_ele); + } + } + if (count && count == arraylen) break; + } + raxStop(&ri); + setDeferredMultiBulkLength(c,arraylen_ptr,arraylen); + return arraylen; +} + +/* ----------------------------------------------------------------------- + * Stream commands implementation + * ----------------------------------------------------------------------- */ + +/* Look the stream at 'key' and return the corresponding stream object. + * The function creates a key setting it to an empty stream if needed. */ +robj *streamTypeLookupWriteOrCreate(client *c, robj *key) { + robj *o = lookupKeyWrite(c->db,key); + if (o == NULL) { + o = createStreamObject(); + dbAdd(c->db,key,o); + } else { + if (o->type != OBJ_STREAM) { + addReply(c,shared.wrongtypeerr); + return NULL; + } + } + return o; +} + +/* Helper function to convert a string to an unsigned long long value. + * The function attempts to use the faster string2ll() function inside + * Redis: if it fails, strtoull() is used instead. The function returns + * 1 if the conversion happened successfully or 0 if the number is + * invalid or out of range. */ +int string2ull(const char *s, unsigned long long *value) { + long long ll; + if (string2ll(s,strlen(s),&ll)) { + if (ll < 0) return 0; /* Negative values are out of range. */ + *value = ll; + return 1; + } + errno = 0; + *value = strtoull(s,NULL,10); + if (errno == EINVAL || errno == ERANGE) return 0; /* strtoull() failed. */ + return 1; /* Conversion done! */ +} + +/* Parse a stream ID in the format given by clients to Redis, that is + * ., and converts it into a streamID structure. If + * the specified ID is invalid C_ERR is returned and an error is reported + * to the client, otherwise C_OK is returned. The ID may be in incomplete + * form, just stating the milliseconds time part of the stream. In such a case + * the missing part is set according to the value of 'missing_seq' parameter. + * The IDs "-" and "+" specify respectively the minimum and maximum IDs + * that can be represented. */ +int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) { + char buf[128]; + if (sdslen(o->ptr) > sizeof(buf)-1) goto invalid; + memcpy(buf,o->ptr,sdslen(o->ptr)+1); + + /* Handle the "-" and "+" special cases. */ + if (buf[0] == '-' && buf[1] == '\0') { + id->ms = 0; + id->seq = 0; + return C_OK; + } else if (buf[0] == '+' && buf[1] == '\0') { + id->ms = UINT64_MAX; + id->seq = UINT64_MAX; + return C_OK; + } + + /* Parse . form. */ + char *dot = strchr(buf,'.'); + if (dot) *dot = '\0'; + uint64_t ms, seq; + if (string2ull(buf,&ms) == 0) goto invalid; + if (dot && string2ull(dot+1,&seq) == 0) goto invalid; + if (!dot) seq = missing_seq; + id->ms = ms; + id->seq = seq; + return C_OK; + +invalid: + addReplyError(c,"Invalid stream ID specified as stream command argument"); + return C_ERR; +} + +/* XADD key [field value] [field value] ... */ +void xaddCommand(client *c) { + if ((c->argc % 2) == 1) { + addReplyError(c,"wrong number of arguments for XADD"); + return; + } + + /* Lookup the stream at key. */ + robj *o; + stream *s; + if ((o = streamTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return; + s = o->ptr; + + /* Append using the low level function and return the ID. */ + streamID id; + streamAppendItem(s,c->argv+2,(c->argc-2)/2,&id); + sds reply = sdscatfmt(sdsempty(),"+%U.%U\r\n",id.ms,id.seq); + addReplySds(c,reply); + + signalModifiedKey(c->db,c->argv[1]); + notifyKeyspaceEvent(NOTIFY_HASH,"xadd",c->argv[1],c->db->id); + server.dirty++; +} + +/* XRANGE key start end [COUNT ] */ +void xrangeCommand(client *c) { + robj *o; + stream *s; + streamID startid, endid; + long long count = 0; + + if (streamParseIDOrReply(c,c->argv[2],&startid,0) == C_ERR) return; + if (streamParseIDOrReply(c,c->argv[3],&endid,UINT64_MAX) == C_ERR) return; + + /* Parse the COUNT option if any. */ + if (c->argc > 4) { + if (strcasecmp(c->argv[4]->ptr,"COUNT") == 0) { + if (getLongLongFromObjectOrReply(c,c->argv[5],&count,NULL) != C_OK) + return; + } else { + addReply(c,shared.syntaxerr); + return; + } + } + + /* Return the specified range to the user. */ + if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL + || checkType(c,o,OBJ_STREAM)) return; + s = o->ptr; + streamReplyWithRange(c,s,&startid,&endid,count); +} From e4e216f8cc9839219fbaeadbd4f4a63e43e2d08d Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 5 Sep 2017 12:13:16 +0200 Subject: [PATCH 0477/1722] Streams: assign value of 6 to OBJ_STREAM + some refactoring. --- src/rdb.h | 5 +++-- src/server.h | 14 +++++++------- src/t_stream.c | 8 +++++++- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/rdb.h b/src/rdb.h index bf1150455..ecb066fb0 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -69,8 +69,9 @@ #define RDB_ENC_INT32 2 /* 32 bit signed integer */ #define RDB_ENC_LZF 3 /* string compressed with FASTLZ */ -/* Dup object types to RDB object types. Only reason is readability (are we - * dealing with RDB types or with in-memory object types?). */ +/* Map object types to RDB object types. Macros starting with OBJ_ are for + * memory storage and may change. Instead RDB types must be fixed because + * we store them on disk. */ #define RDB_TYPE_STRING 0 #define RDB_TYPE_LIST 1 #define RDB_TYPE_SET 2 diff --git a/src/server.h b/src/server.h index 38a76d008..1185d119e 100644 --- a/src/server.h +++ b/src/server.h @@ -447,12 +447,11 @@ typedef long long mstime_t; /* millisecond time type. */ /* A redis object, that is a type able to hold a string / list / set */ /* The actual Redis Object */ -#define OBJ_STRING 0 -#define OBJ_LIST 1 -#define OBJ_SET 2 -#define OBJ_ZSET 3 -#define OBJ_HASH 4 -#define OBJ_STREAM 5 +#define OBJ_STRING 0 /* String object. */ +#define OBJ_LIST 1 /* List object. */ +#define OBJ_SET 2 /* Set object. */ +#define OBJ_ZSET 3 /* Sorted set object. */ +#define OBJ_HASH 4 /* Hash object. */ /* The "module" object type is a special one that signals that the object * is one directly managed by a Redis module. In this case the value points @@ -465,7 +464,8 @@ typedef long long mstime_t; /* millisecond time type. */ * by a 64 bit module type ID, which has a 54 bits module-specific signature * in order to dispatch the loading to the right module, plus a 10 bits * encoding version. */ -#define OBJ_MODULE 5 +#define OBJ_MODULE 5 /* Module object. */ +#define OBJ_STREAM 6 /* Stream object. */ /* Extract encver / signature from a module type ID. */ #define REDISMODULE_TYPE_ENCVER_BITS 10 diff --git a/src/t_stream.c b/src/t_stream.c index c64f5059f..dcf9fccee 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -149,7 +149,13 @@ void streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id) memcpy(rax_key,ri.key,sizeof(rax_key)); } - /* Populate the listpack with the new entry. */ + /* Populate the listpack with the new entry. We use the following + * encoding: + * + * +--------+----------+-------+-------+-/-+-------+-------+ + * |entry-id|num-fields|field-1|value-1|...|field-N|value-N| + * +--------+----------+-------+-------+-/-+-------+-------+ + */ lp = lpAppend(lp,(unsigned char*)entry_id,sizeof(entry_id)); lp = lpAppendInteger(lp,numfields); for (int i = 0; i < numfields; i++) { From f028e4e68e00cc2cc7552f689b393ead8b1f32b1 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 5 Sep 2017 13:14:13 +0200 Subject: [PATCH 0478/1722] Streams: RDB saving. --- src/rax.c | 5 +++++ src/rax.h | 1 + src/rdb.c | 22 ++++++++++++++++++++++ src/stream.h | 1 + src/t_stream.c | 2 -- 5 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/rax.c b/src/rax.c index b4f5ae05d..3ead27ed7 100644 --- a/src/rax.c +++ b/src/rax.c @@ -1655,6 +1655,11 @@ int raxEOF(raxIterator *it) { return it->flags & RAX_ITER_EOF; } +/* Return the number of elements inside the radix tree. */ +uint64_t raxSize(rax *rax) { + return rax->numele; +} + /* ----------------------------- Introspection ------------------------------ */ /* This function is mostly used for debugging and learning purposes. diff --git a/src/rax.h b/src/rax.h index f6985c373..e22b6e699 100644 --- a/src/rax.h +++ b/src/rax.h @@ -157,5 +157,6 @@ int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key void raxStop(raxIterator *it); int raxEOF(raxIterator *it); void raxShow(rax *rax); +uint64_t raxSize(rax *rax); #endif diff --git a/src/rdb.c b/src/rdb.c index 19ba59ab8..c79bfa8d4 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -31,6 +31,7 @@ #include "lzf.h" /* LZF compression library */ #include "zipmap.h" #include "endianconv.h" +#include "stream.h" #include #include @@ -622,6 +623,8 @@ int rdbSaveObjectType(rio *rdb, robj *o) { return rdbSaveType(rdb,RDB_TYPE_HASH); else serverPanic("Unknown hash encoding"); + case OBJ_STREAM: + return rdbSaveType(rdb,RDB_TYPE_STREAM_LISTPACKS); case OBJ_MODULE: return rdbSaveType(rdb,RDB_TYPE_MODULE_2); default: @@ -762,7 +765,26 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { } else { serverPanic("Unknown hash encoding"); } + } else if (o->type == OBJ_STREAM) { + /* Store how many listpacks we have inside the radix tree. */ + stream *s = o->ptr; + rax *rax = s->rax; + if ((n = rdbSaveLen(rdb,raxSize(rax))) == -1) return -1; + nwritten += n; + /* Serialize all the listpacks inside the radix tree as they are, + * when loading back, we'll use the first entry of each listpack + * to insert it back into the radix tree. */ + raxIterator ri; + raxStart(&ri,rax); + raxSeek(&ri,"^",NULL,0); + while (raxNext(&ri)) { + unsigned char *lp = ri.data; + size_t lp_bytes = lpBytes(lp); + if ((n = rdbSaveRawString(rdb,lp,lp_bytes)) == -1) return -1; + nwritten += n; + } + raxStop(&ri); } else if (o->type == OBJ_MODULE) { /* Save a module-specific value. */ RedisModuleIO io; diff --git a/src/stream.h b/src/stream.h index 065c328eb..e78af5bc5 100644 --- a/src/stream.h +++ b/src/stream.h @@ -2,6 +2,7 @@ #define STREAM_H #include "rax.h" +#include "listpack.h" /* Stream item ID: a 128 bit number composed of a milliseconds time and * a sequence counter. IDs generated in the same millisecond (or in a past diff --git a/src/t_stream.c b/src/t_stream.c index dcf9fccee..9ca001d71 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -32,7 +32,6 @@ */ #include "server.h" -#include "listpack.h" #include "endianconv.h" #include "stream.h" @@ -169,7 +168,6 @@ void streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id) s->length++; s->last_id = id; if (added_id) *added_id = id; - raxShow(s->rax); } /* Send the specified range to the client 'c'. The range the client will From 90c980b10535cec911b5c88c1397a21800e9ffa5 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 5 Sep 2017 16:24:11 +0200 Subject: [PATCH 0479/1722] Streams: RDB loading. RDB saving modified. After a few attempts it looked quite saner to just add the last item ID at the end of the serialized listpacks, instead of scanning the last listpack loaded from head to tail just to fetch it. It's a disk space VS CPU-and-simplicity tradeoff basically. --- src/rdb.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/rdb.c b/src/rdb.c index c79bfa8d4..acc6ca879 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -785,6 +785,12 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { nwritten += n; } raxStop(&ri); + + /* Save the last entry ID. */ + if ((n = rdbSaveLen(rdb,s->last_id.ms)) == -1) return -1; + nwritten += n; + if ((n = rdbSaveLen(rdb,s->last_id.seq)) == -1) return -1; + nwritten += n; } else if (o->type == OBJ_MODULE) { /* Save a module-specific value. */ RedisModuleIO io; @@ -1431,6 +1437,40 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { rdbExitReportCorruptRDB("Unknown RDB encoding type %d",rdbtype); break; } + } else if (rdbtype == RDB_TYPE_STREAM_LISTPACKS) { + o = createStreamObject(); + stream *s = o->ptr; + uint64_t listpacks = rdbLoadLen(rdb,NULL); + + while(listpacks--) { + unsigned char *lp = + rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,NULL); + if (lp == NULL) return NULL; + unsigned char *first = lpFirst(lp); + if (first == NULL) { + /* Serialized listpacks should never be free, since on + * deletion we should remove the radix tree key if the + * resulting listpack is emtpy. */ + rdbExitReportCorruptRDB("Empty listpack inside stream"); + } + + /* Get the ID of the first entry: we'll use it as key to add the + * listpack into the radix tree. */ + int64_t e_len; + unsigned char buf[LP_INTBUF_SIZE]; + unsigned char *e = lpGet(first,&e_len,buf); + if (e_len != sizeof(streamID)) { + rdbExitReportCorruptRDB("Listpack first entry is not the " + "size of a stream ID"); + } + int retval = raxInsert(s->rax,e,sizeof(streamID),lp,NULL); + if (!retval) + rdbExitReportCorruptRDB("Listpack re-added with existing key"); + } + + /* Load the last entry ID. */ + s->last_id.ms = rdbLoadLen(rdb,NULL); + s->last_id.seq = rdbLoadLen(rdb,NULL); } else if (rdbtype == RDB_TYPE_MODULE || rdbtype == RDB_TYPE_MODULE_2) { uint64_t moduleid = rdbLoadLen(rdb,NULL); moduleType *mt = moduleTypeLookupModuleByID(moduleid); From fac8b716abf977b631eca4f2f281262e1ef1e864 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Sep 2017 12:00:03 +0200 Subject: [PATCH 0480/1722] Streams: change listpack allocator to zmalloc. --- src/listpack_malloc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/listpack_malloc.h b/src/listpack_malloc.h index a3a077fcd..401ab6f74 100644 --- a/src/listpack_malloc.h +++ b/src/listpack_malloc.h @@ -38,7 +38,8 @@ #ifndef LISTPACK_ALLOC_H #define LISTPACK_ALLOC_H -#define lp_malloc malloc -#define lp_realloc realloc -#define lp_free free +#include "zmalloc.h" +#define lp_malloc zmalloc +#define lp_realloc zrealloc +#define lp_free zfree #endif From 64a7ad038da8ee8ce16e150fceb6fc2f165f5d6f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Sep 2017 12:00:18 +0200 Subject: [PATCH 0481/1722] Streams: Save stream->length in RDB. --- src/rdb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/rdb.c b/src/rdb.c index acc6ca879..5d15539c5 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -786,6 +786,11 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { } raxStop(&ri); + /* Save the number of elements inside the stream. We cannot obtain + * this easily later, since our macro nodes should be checked for + * number of items: not a great CPU / space tradeoff. */ + if ((n = rdbSaveLen(rdb,s->length)) == -1) return -1; + nwritten += n; /* Save the last entry ID. */ if ((n = rdbSaveLen(rdb,s->last_id.ms)) == -1) return -1; nwritten += n; @@ -1467,7 +1472,8 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { if (!retval) rdbExitReportCorruptRDB("Listpack re-added with existing key"); } - + /* Load total number of items inside the stream. */ + s->length = rdbLoadLen(rdb,NULL); /* Load the last entry ID. */ s->last_id.ms = rdbLoadLen(rdb,NULL); s->last_id.seq = rdbLoadLen(rdb,NULL); From 760ad8f65c7d8039938cc700f5776688e2bc0f54 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Sep 2017 12:03:17 +0200 Subject: [PATCH 0482/1722] Streams: XLEN command. --- src/server.c | 1 + src/server.h | 1 + src/t_stream.c | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/src/server.c b/src/server.c index 2c3647db6..f3338f562 100644 --- a/src/server.c +++ b/src/server.c @@ -304,6 +304,7 @@ struct redisCommand redisCommandTable[] = { {"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0}, {"xadd",xaddCommand,-4,"wmF",0,NULL,1,1,1,0,0}, {"xrange",xrangeCommand,-4,"r",0,NULL,1,1,1,0,0}, + {"xlen",xlenCommand,2,"rF",0,NULL,1,1,1,0,0}, {"post",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0}, {"host:",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0}, {"latency",latencyCommand,-2,"aslt",0,NULL,0,0,0,0,0} diff --git a/src/server.h b/src/server.h index 1185d119e..c934d7f6e 100644 --- a/src/server.h +++ b/src/server.h @@ -2001,6 +2001,7 @@ void moduleCommand(client *c); void securityWarningCommand(client *c); void xaddCommand(client *c); void xrangeCommand(client *c); +void xlenCommand(client *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); diff --git a/src/t_stream.c b/src/t_stream.c index 9ca001d71..3474d4786 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -378,3 +378,12 @@ void xrangeCommand(client *c) { s = o->ptr; streamReplyWithRange(c,s,&startid,&endid,count); } + +/* XLEN */ +void xlenCommand(client *c) { + robj *o; + if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL + || checkType(c,o,OBJ_STREAM)) return; + stream *s = o->ptr; + addReplyLongLong(c,s->length); +} From a168cbef133264b4a156aeac96ad28e9d532e639 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Sep 2017 13:11:47 +0200 Subject: [PATCH 0483/1722] Streams: implement stream object release. --- src/object.c | 5 +++++ src/rax.c | 18 +++++++++++++----- src/rax.h | 1 + src/server.h | 1 + src/t_stream.c | 5 +++++ 5 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/object.c b/src/object.c index 8eeb5c6c1..b689edcf2 100644 --- a/src/object.c +++ b/src/object.c @@ -310,6 +310,10 @@ void freeModuleObject(robj *o) { zfree(mv); } +void freeStreamObject(robj *o) { + freeStream(o->ptr); +} + void incrRefCount(robj *o) { if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount++; } @@ -323,6 +327,7 @@ void decrRefCount(robj *o) { case OBJ_ZSET: freeZsetObject(o); break; case OBJ_HASH: freeHashObject(o); break; case OBJ_MODULE: freeModuleObject(o); break; + case OBJ_STREAM: freeStreamObject(o); break; default: serverPanic("Unknown object type"); break; } zfree(o); diff --git a/src/rax.c b/src/rax.c index 3ead27ed7..442e7bfef 100644 --- a/src/rax.c +++ b/src/rax.c @@ -1093,28 +1093,36 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { /* This is the core of raxFree(): performs a depth-first scan of the * tree and releases all the nodes found. */ -void raxRecursiveFree(rax *rax, raxNode *n) { +void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) { debugnode("free traversing",n); int numchildren = n->iscompr ? 1 : n->size; raxNode **cp = raxNodeLastChildPtr(n); while(numchildren--) { raxNode *child; memcpy(&child,cp,sizeof(child)); - raxRecursiveFree(rax,child); + raxRecursiveFree(rax,child,free_callback); cp--; } debugnode("free depth-first",n); + if (free_callback && n->iskey && !n->isnull) + free_callback(raxGetData(n)); rax_free(n); rax->numnodes--; } -/* Free a whole radix tree. */ -void raxFree(rax *rax) { - raxRecursiveFree(rax,rax->head); +/* Free a whole radix tree, calling the specified callback in order to + * free the auxiliary data. */ +void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) { + raxRecursiveFree(rax,rax->head,free_callback); assert(rax->numnodes == 0); rax_free(rax); } +/* Free a whole radix tree. */ +void raxFree(rax *rax) { + raxFreeWithCallback(rax,NULL); +} + /* ------------------------------- Iterator --------------------------------- */ /* Initialize a Rax iterator. This call should be performed a single time diff --git a/src/rax.h b/src/rax.h index e22b6e699..b4e2fd91e 100644 --- a/src/rax.h +++ b/src/rax.h @@ -148,6 +148,7 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old); int raxRemove(rax *rax, unsigned char *s, size_t len, void **old); void *raxFind(rax *rax, unsigned char *s, size_t len); void raxFree(rax *rax); +void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)); void raxStart(raxIterator *it, rax *rt); int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len); int raxNext(raxIterator *it); diff --git a/src/server.h b/src/server.h index c934d7f6e..8ea183414 100644 --- a/src/server.h +++ b/src/server.h @@ -1419,6 +1419,7 @@ void signalListAsReady(redisDb *db, robj *key); /* Stream data type. */ stream *streamNew(void); +void freeStream(stream *s); /* MULTI/EXEC/WATCH... */ void unwatchAllKeys(client *c); diff --git a/src/t_stream.c b/src/t_stream.c index 3474d4786..52b0e1058 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -51,6 +51,11 @@ stream *streamNew(void) { return s; } +/* Free a stream, including the listpacks stored inside the radix tree. */ +void freeStream(stream *s) { + raxFreeWithCallback(s->rax,(void(*)(void*))lpFree); +} + /* Generate the next stream item ID given the previous one. If the current * milliseconds Unix time is greater than the previous one, just use this * as time part and start with sequence part of zero. Otherwise we use the From 896300be30bd7f5a731d782a81871422b29a3b9f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Sep 2017 15:43:28 +0200 Subject: [PATCH 0484/1722] Streams: initial work to use blocking lists logic for streams XREAD. --- src/blocked.c | 208 +++++++++++++++++++++++++++++++++++++++++++++++ src/db.c | 9 +- src/networking.c | 2 +- src/server.c | 15 +++- src/server.h | 11 ++- src/t_list.c | 204 ---------------------------------------------- 6 files changed, 234 insertions(+), 215 deletions(-) diff --git a/src/blocked.c b/src/blocked.c index 54b26b713..acd3b9485 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -65,6 +65,8 @@ #include "server.h" +int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int where); + /* Get a timeout value from an object and store it into 'timeout'. * The final timeout is always stored as milliseconds as a time where the * timeout will expire, however the parsing is performed according to @@ -193,3 +195,209 @@ void disconnectAllBlockedClients(void) { } } } + +/* This function should be called by Redis every time a single command, + * a MULTI/EXEC block, or a Lua script, terminated its execution after + * being called by a client. + * + * All the keys with at least one client blocked that received at least + * one new element via some PUSH/XADD operation are accumulated into + * the server.ready_keys list. This function will run the list and will + * serve clients accordingly. Note that the function will iterate again and + * again as a result of serving BRPOPLPUSH we can have new blocking clients + * to serve because of the PUSH side of BRPOPLPUSH. */ +void handleClientsBlockedOnKeys(void) { + while(listLength(server.ready_keys) != 0) { + list *l; + + /* Point server.ready_keys to a fresh list and save the current one + * locally. This way as we run the old list we are free to call + * signalKeyAsReady() that may push new elements in server.ready_keys + * when handling clients blocked into BRPOPLPUSH. */ + l = server.ready_keys; + server.ready_keys = listCreate(); + + while(listLength(l) != 0) { + listNode *ln = listFirst(l); + readyList *rl = ln->value; + + /* First of all remove this key from db->ready_keys so that + * we can safely call signalKeyAsReady() against this key. */ + dictDelete(rl->db->ready_keys,rl->key); + + /* If the key exists and it's a list, serve blocked clients + * with data. */ + robj *o = lookupKeyWrite(rl->db,rl->key); + if (o != NULL && o->type == OBJ_LIST) { + dictEntry *de; + + /* We serve clients in the same order they blocked for + * this key, from the first blocked to the last. */ + de = dictFind(rl->db->blocking_keys,rl->key); + if (de) { + list *clients = dictGetVal(de); + int numclients = listLength(clients); + + while(numclients--) { + listNode *clientnode = listFirst(clients); + client *receiver = clientnode->value; + robj *dstkey = receiver->bpop.target; + int where = (receiver->lastcmd && + receiver->lastcmd->proc == blpopCommand) ? + LIST_HEAD : LIST_TAIL; + robj *value = listTypePop(o,where); + + if (value) { + /* Protect receiver->bpop.target, that will be + * freed by the next unblockClient() + * call. */ + if (dstkey) incrRefCount(dstkey); + unblockClient(receiver); + + if (serveClientBlockedOnList(receiver, + rl->key,dstkey,rl->db,value, + where) == C_ERR) + { + /* If we failed serving the client we need + * to also undo the POP operation. */ + listTypePush(o,value,where); + } + + if (dstkey) decrRefCount(dstkey); + decrRefCount(value); + } else { + break; + } + } + } + + if (listTypeLength(o) == 0) { + dbDelete(rl->db,rl->key); + } + /* We don't call signalModifiedKey() as it was already called + * when an element was pushed on the list. */ + } + + /* Free this item. */ + decrRefCount(rl->key); + zfree(rl); + listDelNode(l,ln); + } + listRelease(l); /* We have the new list on place at this point. */ + } +} + +/* This is how the current blocking POP works, we use BLPOP as example: + * - If the user calls BLPOP and the key exists and contains a non empty list + * then LPOP is called instead. So BLPOP is semantically the same as LPOP + * if blocking is not required. + * - If instead BLPOP is called and the key does not exists or the list is + * empty we need to block. In order to do so we remove the notification for + * new data to read in the client socket (so that we'll not serve new + * requests if the blocking request is not served). Also we put the client + * in a dictionary (db->blocking_keys) mapping keys to a list of clients + * blocking for this keys. + * - If a PUSH operation against a key with blocked clients waiting is + * performed, we mark this key as "ready", and after the current command, + * MULTI/EXEC block, or script, is executed, we serve all the clients waiting + * for this list, from the one that blocked first, to the last, accordingly + * to the number of elements we have in the ready list. + */ + +/* Set a client in blocking mode for the specified key, with the specified + * timeout */ +void blockForKeys(client *c, robj **keys, int numkeys, mstime_t timeout, robj *target) { + dictEntry *de; + list *l; + int j; + + c->bpop.timeout = timeout; + c->bpop.target = target; + + if (target != NULL) incrRefCount(target); + + for (j = 0; j < numkeys; j++) { + /* If the key already exists in the dict ignore it. */ + if (dictAdd(c->bpop.keys,keys[j],NULL) != DICT_OK) continue; + incrRefCount(keys[j]); + + /* And in the other "side", to map keys -> clients */ + de = dictFind(c->db->blocking_keys,keys[j]); + if (de == NULL) { + int retval; + + /* For every key we take a list of clients blocked for it */ + l = listCreate(); + retval = dictAdd(c->db->blocking_keys,keys[j],l); + incrRefCount(keys[j]); + serverAssertWithInfo(c,keys[j],retval == DICT_OK); + } else { + l = dictGetVal(de); + } + listAddNodeTail(l,c); + } + blockClient(c,BLOCKED_LIST); +} + +/* Unblock a client that's waiting in a blocking operation such as BLPOP. + * You should never call this function directly, but unblockClient() instead. */ +void unblockClientWaitingData(client *c) { + dictEntry *de; + dictIterator *di; + list *l; + + serverAssertWithInfo(c,NULL,dictSize(c->bpop.keys) != 0); + di = dictGetIterator(c->bpop.keys); + /* The client may wait for multiple keys, so unblock it for every key. */ + while((de = dictNext(di)) != NULL) { + robj *key = dictGetKey(de); + + /* Remove this client from the list of clients waiting for this key. */ + l = dictFetchValue(c->db->blocking_keys,key); + serverAssertWithInfo(c,key,l != NULL); + listDelNode(l,listSearchKey(l,c)); + /* If the list is empty we need to remove it to avoid wasting memory */ + if (listLength(l) == 0) + dictDelete(c->db->blocking_keys,key); + } + dictReleaseIterator(di); + + /* Cleanup the client structure */ + dictEmpty(c->bpop.keys,NULL); + if (c->bpop.target) { + decrRefCount(c->bpop.target); + c->bpop.target = NULL; + } +} + +/* If the specified key has clients blocked waiting for list pushes, this + * function will put the key reference into the server.ready_keys list. + * Note that db->ready_keys is a hash table that allows us to avoid putting + * the same key again and again in the list in case of multiple pushes + * made by a script or in the context of MULTI/EXEC. + * + * The list will be finally processed by handleClientsBlockedOnLists() */ +void signalKeyAsReady(redisDb *db, robj *key) { + readyList *rl; + + /* No clients blocking for this key? No need to queue it. */ + if (dictFind(db->blocking_keys,key) == NULL) return; + + /* Key was already signaled? No need to queue it again. */ + if (dictFind(db->ready_keys,key) != NULL) return; + + /* Ok, we need to queue this key into server.ready_keys. */ + rl = zmalloc(sizeof(*rl)); + rl->key = key; + rl->db = db; + incrRefCount(key); + listAddNodeTail(server.ready_keys,rl); + + /* We also add the key in the db->ready_keys dictionary in order + * to avoid adding it multiple times into a list with a simple O(1) + * check. */ + incrRefCount(key); + serverAssert(dictAdd(db->ready_keys,key,NULL) == DICT_OK); +} + + diff --git a/src/db.c b/src/db.c index 4d6999be3..6682e573d 100644 --- a/src/db.c +++ b/src/db.c @@ -169,9 +169,10 @@ void dbAdd(redisDb *db, robj *key, robj *val) { int retval = dictAdd(db->dict, copy, val); serverAssertWithInfo(NULL,key,retval == DICT_OK); - if (val->type == OBJ_LIST) signalListAsReady(db, key); + if (val->type == OBJ_LIST || val->type == OBJ_STREAM) + signalKeyAsReady(db, key); if (server.cluster_enabled) slotToKeyAdd(key); - } +} /* Overwrite an existing key with a new value. Incrementing the reference * count of the new value is up to the caller. @@ -951,8 +952,8 @@ void scanDatabaseForReadyLists(redisDb *db) { while((de = dictNext(di)) != NULL) { robj *key = dictGetKey(de); robj *value = lookupKey(db,key,LOOKUP_NOTOUCH); - if (value && value->type == OBJ_LIST) - signalListAsReady(db, key); + if (value && (value->type == OBJ_LIST || value->type == OBJ_STREAM)) + signalKeyAsReady(db, key); } dictReleaseIterator(di); } diff --git a/src/networking.c b/src/networking.c index aeaeca967..d672ec329 100644 --- a/src/networking.c +++ b/src/networking.c @@ -124,7 +124,7 @@ client *createClient(int fd) { listSetDupMethod(c->reply,dupClientReplyValue); c->btype = BLOCKED_NONE; c->bpop.timeout = 0; - c->bpop.keys = dictCreate(&objectKeyPointerValueDictType,NULL); + c->bpop.keys = dictCreate(&objectKeyHeapPointerValueDictType,NULL); c->bpop.target = NULL; c->bpop.numreplicas = 0; c->bpop.reploffset = 0; diff --git a/src/server.c b/src/server.c index f3338f562..56b2188e5 100644 --- a/src/server.c +++ b/src/server.c @@ -550,10 +550,21 @@ dictType objectKeyPointerValueDictType = { NULL, /* key dup */ NULL, /* val dup */ dictEncObjKeyCompare, /* key compare */ - dictObjectDestructor, /* key destructor */ + dictObjectDestructor, /* key destructor */ NULL /* val destructor */ }; +/* Like objectKeyPointerValueDictType(), but values can be destroyed, if + * not NULL, calling zfree(). */ +dictType objectKeyHeapPointerValueDictType = { + dictEncObjHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictEncObjKeyCompare, /* key compare */ + dictObjectDestructor, /* key destructor */ + dictVanillaFree /* val destructor */ +}; + /* Set dictionary type. Keys are SDS strings, values are ot used. */ dictType setDictType = { dictSdsHash, /* hash function */ @@ -2508,7 +2519,7 @@ int processCommand(client *c) { call(c,CMD_CALL_FULL); c->woff = server.master_repl_offset; if (listLength(server.ready_keys)) - handleClientsBlockedOnLists(); + handleClientsBlockedOnKeys(); } return C_OK; } diff --git a/src/server.h b/src/server.h index 8ea183414..8e50d030e 100644 --- a/src/server.h +++ b/src/server.h @@ -256,6 +256,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define BLOCKED_LIST 1 /* BLPOP & co. */ #define BLOCKED_WAIT 2 /* WAIT for synchronous replication. */ #define BLOCKED_MODULE 3 /* Blocked by a loadable module. */ +#define BLOCKED_STREAM 4 /* XREAD. */ /* Client request types */ #define PROTO_REQ_INLINE 1 @@ -641,9 +642,9 @@ typedef struct blockingState { mstime_t timeout; /* Blocking operation timeout. If UNIX current time * is > timeout then the operation timed out. */ - /* BLOCKED_LIST */ + /* BLOCKED_LIST and BLOCKED_STREAM */ dict *keys; /* The keys we are waiting to terminate a blocking - * operation such as BLPOP. Otherwise NULL. */ + * operation such as BLPOP or XREAD. Or NULL. */ robj *target; /* The key that should receive the element, * for BRPOPLPUSH. */ @@ -1291,6 +1292,7 @@ typedef struct { extern struct redisServer server; extern struct sharedObjectsStruct shared; extern dictType objectKeyPointerValueDictType; +extern dictType objectKeyHeapPointerValueDictType; extern dictType setDictType; extern dictType zsetDictType; extern dictType clusterNodesDictType; @@ -1413,9 +1415,7 @@ int listTypeEqual(listTypeEntry *entry, robj *o); void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry); void listTypeConvert(robj *subject, int enc); void unblockClientWaitingData(client *c); -void handleClientsBlockedOnLists(void); void popGenericCommand(client *c, int where); -void signalListAsReady(redisDb *db, robj *key); /* Stream data type. */ stream *streamNew(void); @@ -1798,6 +1798,9 @@ void unblockClient(client *c); void replyToBlockedClientTimedOut(client *c); int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int unit); void disconnectAllBlockedClients(void); +void handleClientsBlockedOnKeys(void); +void signalKeyAsReady(redisDb *db, robj *key); +void blockForKeys(client *c, robj **keys, int numkeys, mstime_t timeout, robj *target); /* expire.c -- Handling of expired keys */ void activeExpireCycle(int type); diff --git a/src/t_list.c b/src/t_list.c index a0a30998d..c7eacb0ee 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -603,119 +603,6 @@ void rpoplpushCommand(client *c) { * Blocking POP operations *----------------------------------------------------------------------------*/ -/* This is how the current blocking POP works, we use BLPOP as example: - * - If the user calls BLPOP and the key exists and contains a non empty list - * then LPOP is called instead. So BLPOP is semantically the same as LPOP - * if blocking is not required. - * - If instead BLPOP is called and the key does not exists or the list is - * empty we need to block. In order to do so we remove the notification for - * new data to read in the client socket (so that we'll not serve new - * requests if the blocking request is not served). Also we put the client - * in a dictionary (db->blocking_keys) mapping keys to a list of clients - * blocking for this keys. - * - If a PUSH operation against a key with blocked clients waiting is - * performed, we mark this key as "ready", and after the current command, - * MULTI/EXEC block, or script, is executed, we serve all the clients waiting - * for this list, from the one that blocked first, to the last, accordingly - * to the number of elements we have in the ready list. - */ - -/* Set a client in blocking mode for the specified key, with the specified - * timeout */ -void blockForKeys(client *c, robj **keys, int numkeys, mstime_t timeout, robj *target) { - dictEntry *de; - list *l; - int j; - - c->bpop.timeout = timeout; - c->bpop.target = target; - - if (target != NULL) incrRefCount(target); - - for (j = 0; j < numkeys; j++) { - /* If the key already exists in the dict ignore it. */ - if (dictAdd(c->bpop.keys,keys[j],NULL) != DICT_OK) continue; - incrRefCount(keys[j]); - - /* And in the other "side", to map keys -> clients */ - de = dictFind(c->db->blocking_keys,keys[j]); - if (de == NULL) { - int retval; - - /* For every key we take a list of clients blocked for it */ - l = listCreate(); - retval = dictAdd(c->db->blocking_keys,keys[j],l); - incrRefCount(keys[j]); - serverAssertWithInfo(c,keys[j],retval == DICT_OK); - } else { - l = dictGetVal(de); - } - listAddNodeTail(l,c); - } - blockClient(c,BLOCKED_LIST); -} - -/* Unblock a client that's waiting in a blocking operation such as BLPOP. - * You should never call this function directly, but unblockClient() instead. */ -void unblockClientWaitingData(client *c) { - dictEntry *de; - dictIterator *di; - list *l; - - serverAssertWithInfo(c,NULL,dictSize(c->bpop.keys) != 0); - di = dictGetIterator(c->bpop.keys); - /* The client may wait for multiple keys, so unblock it for every key. */ - while((de = dictNext(di)) != NULL) { - robj *key = dictGetKey(de); - - /* Remove this client from the list of clients waiting for this key. */ - l = dictFetchValue(c->db->blocking_keys,key); - serverAssertWithInfo(c,key,l != NULL); - listDelNode(l,listSearchKey(l,c)); - /* If the list is empty we need to remove it to avoid wasting memory */ - if (listLength(l) == 0) - dictDelete(c->db->blocking_keys,key); - } - dictReleaseIterator(di); - - /* Cleanup the client structure */ - dictEmpty(c->bpop.keys,NULL); - if (c->bpop.target) { - decrRefCount(c->bpop.target); - c->bpop.target = NULL; - } -} - -/* If the specified key has clients blocked waiting for list pushes, this - * function will put the key reference into the server.ready_keys list. - * Note that db->ready_keys is a hash table that allows us to avoid putting - * the same key again and again in the list in case of multiple pushes - * made by a script or in the context of MULTI/EXEC. - * - * The list will be finally processed by handleClientsBlockedOnLists() */ -void signalListAsReady(redisDb *db, robj *key) { - readyList *rl; - - /* No clients blocking for this key? No need to queue it. */ - if (dictFind(db->blocking_keys,key) == NULL) return; - - /* Key was already signaled? No need to queue it again. */ - if (dictFind(db->ready_keys,key) != NULL) return; - - /* Ok, we need to queue this key into server.ready_keys. */ - rl = zmalloc(sizeof(*rl)); - rl->key = key; - rl->db = db; - incrRefCount(key); - listAddNodeTail(server.ready_keys,rl); - - /* We also add the key in the db->ready_keys dictionary in order - * to avoid adding it multiple times into a list with a simple O(1) - * check. */ - incrRefCount(key); - serverAssert(dictAdd(db->ready_keys,key,NULL) == DICT_OK); -} - /* This is a helper function for handleClientsBlockedOnLists(). It's work * is to serve a specific client (receiver) that is blocked on 'key' * in the context of the specified 'db', doing the following: @@ -785,97 +672,6 @@ int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb return C_OK; } -/* This function should be called by Redis every time a single command, - * a MULTI/EXEC block, or a Lua script, terminated its execution after - * being called by a client. - * - * All the keys with at least one client blocked that received at least - * one new element via some PUSH operation are accumulated into - * the server.ready_keys list. This function will run the list and will - * serve clients accordingly. Note that the function will iterate again and - * again as a result of serving BRPOPLPUSH we can have new blocking clients - * to serve because of the PUSH side of BRPOPLPUSH. */ -void handleClientsBlockedOnLists(void) { - while(listLength(server.ready_keys) != 0) { - list *l; - - /* Point server.ready_keys to a fresh list and save the current one - * locally. This way as we run the old list we are free to call - * signalListAsReady() that may push new elements in server.ready_keys - * when handling clients blocked into BRPOPLPUSH. */ - l = server.ready_keys; - server.ready_keys = listCreate(); - - while(listLength(l) != 0) { - listNode *ln = listFirst(l); - readyList *rl = ln->value; - - /* First of all remove this key from db->ready_keys so that - * we can safely call signalListAsReady() against this key. */ - dictDelete(rl->db->ready_keys,rl->key); - - /* If the key exists and it's a list, serve blocked clients - * with data. */ - robj *o = lookupKeyWrite(rl->db,rl->key); - if (o != NULL && o->type == OBJ_LIST) { - dictEntry *de; - - /* We serve clients in the same order they blocked for - * this key, from the first blocked to the last. */ - de = dictFind(rl->db->blocking_keys,rl->key); - if (de) { - list *clients = dictGetVal(de); - int numclients = listLength(clients); - - while(numclients--) { - listNode *clientnode = listFirst(clients); - client *receiver = clientnode->value; - robj *dstkey = receiver->bpop.target; - int where = (receiver->lastcmd && - receiver->lastcmd->proc == blpopCommand) ? - LIST_HEAD : LIST_TAIL; - robj *value = listTypePop(o,where); - - if (value) { - /* Protect receiver->bpop.target, that will be - * freed by the next unblockClient() - * call. */ - if (dstkey) incrRefCount(dstkey); - unblockClient(receiver); - - if (serveClientBlockedOnList(receiver, - rl->key,dstkey,rl->db,value, - where) == C_ERR) - { - /* If we failed serving the client we need - * to also undo the POP operation. */ - listTypePush(o,value,where); - } - - if (dstkey) decrRefCount(dstkey); - decrRefCount(value); - } else { - break; - } - } - } - - if (listTypeLength(o) == 0) { - dbDelete(rl->db,rl->key); - } - /* We don't call signalModifiedKey() as it was already called - * when an element was pushed on the list. */ - } - - /* Free this item. */ - decrRefCount(rl->key); - zfree(rl); - listDelNode(l,ln); - } - listRelease(l); /* We have the new list on place at this point. */ - } -} - /* Blocking RPOP/LPOP */ void blockingPopGenericCommand(client *c, int where) { robj *o; From 6195349b507a64e520a3f5823a90f087186a6b5f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 6 Sep 2017 17:50:11 +0200 Subject: [PATCH 0485/1722] Streams: more internal preparation for blocking XREAD. --- src/blocked.c | 34 +++++++++++++++++++++++++--------- src/server.h | 2 +- src/t_list.c | 4 ++-- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/blocked.c b/src/blocked.c index acd3b9485..74dab0c19 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -225,8 +225,7 @@ void handleClientsBlockedOnKeys(void) { * we can safely call signalKeyAsReady() against this key. */ dictDelete(rl->db->ready_keys,rl->key); - /* If the key exists and it's a list, serve blocked clients - * with data. */ + /* Serve clients blocked on list key. */ robj *o = lookupKeyWrite(rl->db,rl->key); if (o != NULL && o->type == OBJ_LIST) { dictEntry *de; @@ -241,6 +240,8 @@ void handleClientsBlockedOnKeys(void) { while(numclients--) { listNode *clientnode = listFirst(clients); client *receiver = clientnode->value; + if (receiver->btype != BLOCKED_LIST) continue; + robj *dstkey = receiver->bpop.target; int where = (receiver->lastcmd && receiver->lastcmd->proc == blpopCommand) ? @@ -287,7 +288,8 @@ void handleClientsBlockedOnKeys(void) { } } -/* This is how the current blocking POP works, we use BLPOP as example: +/* This is how the current blocking lists/streams work, we use BLPOP as + * example, but the concept is the same for other list ops and XREAD. * - If the user calls BLPOP and the key exists and contains a non empty list * then LPOP is called instead. So BLPOP is semantically the same as LPOP * if blocking is not required. @@ -304,9 +306,15 @@ void handleClientsBlockedOnKeys(void) { * to the number of elements we have in the ready list. */ -/* Set a client in blocking mode for the specified key, with the specified - * timeout */ -void blockForKeys(client *c, robj **keys, int numkeys, mstime_t timeout, robj *target) { +/* Set a client in blocking mode for the specified key (list or stream), with + * the specified timeout. The 'type' argument is BLOCKED_LIST or BLOCKED_STREAM + * depending on the kind of operation we are waiting for an empty key in + * order to awake the client. The client is blocked for all the 'numkeys' + * keys as in the 'keys' argument. When we block for stream keys, we also + * provide an array of streamID structures: clients will be unblocked only + * when items with an ID greater or equal to the specified one is appended + * to the stream. */ +void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, robj *target, streamID *ids) { dictEntry *de; list *l; int j; @@ -317,8 +325,16 @@ void blockForKeys(client *c, robj **keys, int numkeys, mstime_t timeout, robj *t if (target != NULL) incrRefCount(target); for (j = 0; j < numkeys; j++) { - /* If the key already exists in the dict ignore it. */ - if (dictAdd(c->bpop.keys,keys[j],NULL) != DICT_OK) continue; + /* The value associated with the key name in the bpop.keys dictionary + * is NULL for lists, or the stream ID for streams. */ + void *key_data = NULL; + if (btype == BLOCKED_STREAM) { + key_data = zmalloc(sizeof(streamID)); + memcpy(key_data,ids+j,sizeof(streamID)); + } + + /* If the key already exists in the dictionary ignore it. */ + if (dictAdd(c->bpop.keys,keys[j],key_data) != DICT_OK) continue; incrRefCount(keys[j]); /* And in the other "side", to map keys -> clients */ @@ -336,7 +352,7 @@ void blockForKeys(client *c, robj **keys, int numkeys, mstime_t timeout, robj *t } listAddNodeTail(l,c); } - blockClient(c,BLOCKED_LIST); + blockClient(c,btype); } /* Unblock a client that's waiting in a blocking operation such as BLPOP. diff --git a/src/server.h b/src/server.h index 8e50d030e..2c69a94cd 100644 --- a/src/server.h +++ b/src/server.h @@ -1800,7 +1800,7 @@ int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int void disconnectAllBlockedClients(void); void handleClientsBlockedOnKeys(void); void signalKeyAsReady(redisDb *db, robj *key); -void blockForKeys(client *c, robj **keys, int numkeys, mstime_t timeout, robj *target); +void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, robj *target, streamID *ids); /* expire.c -- Handling of expired keys */ void activeExpireCycle(int type); diff --git a/src/t_list.c b/src/t_list.c index c7eacb0ee..c7e6aac00 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -726,7 +726,7 @@ void blockingPopGenericCommand(client *c, int where) { } /* If the list is empty or the key does not exists we must block */ - blockForKeys(c, c->argv + 1, c->argc - 2, timeout, NULL); + blockForKeys(c,BLOCKED_LIST,c->argv + 1,c->argc - 2,timeout,NULL,NULL); } void blpopCommand(client *c) { @@ -752,7 +752,7 @@ void brpoplpushCommand(client *c) { addReply(c, shared.nullbulk); } else { /* The list is empty and the client blocks. */ - blockForKeys(c, c->argv + 1, 1, timeout, c->argv[2]); + blockForKeys(c,BLOCKED_LIST,c->argv + 1,1,timeout,c->argv[2],NULL); } } else { if (key->type != OBJ_LIST) { From 428bb7cca0a5d79849e1f074dc370f79434710da Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 7 Sep 2017 09:30:50 +0200 Subject: [PATCH 0486/1722] Streams: augment client.bpop with XREAD specific fields. --- src/blocked.c | 4 ++++ src/networking.c | 1 + src/server.h | 5 +++++ src/t_stream.c | 8 ++++++++ 4 files changed, 18 insertions(+) diff --git a/src/blocked.c b/src/blocked.c index 74dab0c19..376b343d6 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -384,6 +384,10 @@ void unblockClientWaitingData(client *c) { decrRefCount(c->bpop.target); c->bpop.target = NULL; } + if (c->bpop.xread_group) { + decrRefCount(c->bpop.xread_group); + c->bpop.xread_group = NULL; + } } /* If the specified key has clients blocked waiting for list pushes, this diff --git a/src/networking.c b/src/networking.c index d672ec329..f0bdacfae 100644 --- a/src/networking.c +++ b/src/networking.c @@ -126,6 +126,7 @@ client *createClient(int fd) { c->bpop.timeout = 0; c->bpop.keys = dictCreate(&objectKeyHeapPointerValueDictType,NULL); c->bpop.target = NULL; + c->bpop.xread_group = NULL; c->bpop.numreplicas = 0; c->bpop.reploffset = 0; c->woff = 0; diff --git a/src/server.h b/src/server.h index 2c69a94cd..34c5fb06d 100644 --- a/src/server.h +++ b/src/server.h @@ -648,6 +648,11 @@ typedef struct blockingState { robj *target; /* The key that should receive the element, * for BRPOPLPUSH. */ + /* BLOCK_STREAM */ + size_t xread_count; /* XREAD COUNT option. */ + robj *xread_group; /* XREAD group name. */ + mstime_t xread_retry_time, xread_retry_ttl; + /* BLOCKED_WAIT */ int numreplicas; /* Number of replicas we are waiting for ACK. */ long long reploffset; /* Replication offset to reach. */ diff --git a/src/t_stream.c b/src/t_stream.c index 52b0e1058..66c6cb891 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -392,3 +392,11 @@ void xlenCommand(client *c) { stream *s = o->ptr; addReplyLongLong(c,s->length); } + +/* XREAD [BLOCK ] [COUNT ] [GROUP ] + * [RETRY ] STREAMS key_1 ID_1 key_2 ID_2 ... + * key_N ID_N */ +void xreadCommand(client *c) { +} + + From 54e221af43c4599a9e987107b1cc6fc11be79d4f Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 7 Sep 2017 16:48:20 +0200 Subject: [PATCH 0487/1722] Streams: XREAD arguments parsing. --- src/t_stream.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/t_stream.c b/src/t_stream.c index 66c6cb891..485ea29aa 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -371,6 +371,7 @@ void xrangeCommand(client *c) { if (strcasecmp(c->argv[4]->ptr,"COUNT") == 0) { if (getLongLongFromObjectOrReply(c,c->argv[5],&count,NULL) != C_OK) return; + if (count < 0) count = 0; } else { addReply(c,shared.syntaxerr); return; @@ -397,6 +398,77 @@ void xlenCommand(client *c) { * [RETRY ] STREAMS key_1 ID_1 key_2 ID_2 ... * key_N ID_N */ void xreadCommand(client *c) { + long long block = 0; + long long count = 0; + int streams_count = 0; + int streams_argc = 0; + #define STREAMID_STATIC_VECTOR_LEN 8 + streamID static_ids[STREAMID_STATIC_VECTOR_LEN]; + streamID *ids = static_ids; + + /* Parse arguments. */ + for (int i = 1; i < c->argc; i++) { + int moreargs = i != c->argc-1; + char *o = c->argv[i]->ptr; + if (!strcasecmp(o,"BLOCK") && moreargs) { + i++; + if (getLongLongFromObjectOrReply(c,c->argv[i],&block,NULL) != C_OK) + return; + if (block < 0) block = 0; + } else if (!strcasecmp(o,"COUNT") && moreargs) { + i++; + if (getLongLongFromObjectOrReply(c,c->argv[i],&count,NULL) != C_OK) + return; + if (count < 0) count = 0; + } else if (!strcasecmp(o,"STREAMS") && moreargs) { + streams_argc = i+1; + streams_count = (c->argc-streams_argc); + if ((streams_count % 2) != 0) { + addReplyError(c,"Unbalanced XREAD list of streams: " + "for each stream key an ID or '$' must be " + "specified."); + return; + } + streams_count /= 2; /* We have two arguments for each stream. */ + break; + } else { + addReply(c,shared.syntaxerr); + return; + } + } + + /* STREAMS option is mandatory. */ + if (streams_argc == 0) { + addReply(c,shared.syntaxerr); + return; + } + + /* Parse the IDs. */ + if (streams_count > STREAMID_STATIC_VECTOR_LEN) + ids = zmalloc(sizeof(streamID)*streams_count); + + /* Try to serve the client synchronously. */ + for (int i = streams_argc + streams_count; i < c->argc; i++) { + /* Specifying "$" as last-known-id means that the client wants to be + * served with just the messages that will arrive into the stream + * starting from now. */ + if (strcmp(c->argv[i]->ptr,"$") == 0) { + robj *o = lookupKeyRead(c->db,c->argv[i-streams_count]); + if (o) { + stream *s = o->ptr; + ids[i] = s->last_id; + } else { + ids[i].ms = 0; + ids[i].seq = 0; + } + continue; + } + if (streamParseIDOrReply(c,c->argv[i],ids+i,0) != C_OK) goto cleanup; + } + +cleanup: + /* Cleanup. */ + if (ids != static_ids) zfree(ids); } From 6f79b52dfa12c1f1ab3d179868b50e5a7fc2cb21 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 7 Sep 2017 17:45:34 +0200 Subject: [PATCH 0488/1722] Streams: XREAD, first draft. Handling of blocked clients still missing. --- src/t_stream.c | 56 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 485ea29aa..0820a7438 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -398,10 +398,10 @@ void xlenCommand(client *c) { * [RETRY ] STREAMS key_1 ID_1 key_2 ID_2 ... * key_N ID_N */ void xreadCommand(client *c) { - long long block = 0; + long long timeout = 0; long long count = 0; int streams_count = 0; - int streams_argc = 0; + int streams_arg = 0; #define STREAMID_STATIC_VECTOR_LEN 8 streamID static_ids[STREAMID_STATIC_VECTOR_LEN]; streamID *ids = static_ids; @@ -412,17 +412,17 @@ void xreadCommand(client *c) { char *o = c->argv[i]->ptr; if (!strcasecmp(o,"BLOCK") && moreargs) { i++; - if (getLongLongFromObjectOrReply(c,c->argv[i],&block,NULL) != C_OK) - return; - if (block < 0) block = 0; + if (getLongLongFromObjectOrReply(c,c->argv[i],&timeout,NULL) + != C_OK) return; + if (timeout < 0) timeout = 0; } else if (!strcasecmp(o,"COUNT") && moreargs) { i++; if (getLongLongFromObjectOrReply(c,c->argv[i],&count,NULL) != C_OK) return; if (count < 0) count = 0; } else if (!strcasecmp(o,"STREAMS") && moreargs) { - streams_argc = i+1; - streams_count = (c->argc-streams_argc); + streams_arg = i+1; + streams_count = (c->argc-streams_arg); if ((streams_count % 2) != 0) { addReplyError(c,"Unbalanced XREAD list of streams: " "for each stream key an ID or '$' must be " @@ -438,7 +438,7 @@ void xreadCommand(client *c) { } /* STREAMS option is mandatory. */ - if (streams_argc == 0) { + if (streams_arg == 0) { addReply(c,shared.syntaxerr); return; } @@ -447,8 +447,7 @@ void xreadCommand(client *c) { if (streams_count > STREAMID_STATIC_VECTOR_LEN) ids = zmalloc(sizeof(streamID)*streams_count); - /* Try to serve the client synchronously. */ - for (int i = streams_argc + streams_count; i < c->argc; i++) { + for (int i = streams_arg + streams_count; i < c->argc; i++) { /* Specifying "$" as last-known-id means that the client wants to be * served with just the messages that will arrive into the stream * starting from now. */ @@ -466,6 +465,43 @@ void xreadCommand(client *c) { if (streamParseIDOrReply(c,c->argv[i],ids+i,0) != C_OK) goto cleanup; } + /* Try to serve the client synchronously. */ + for (int i = 0; i < streams_count; i++) { + robj *o = lookupKeyRead(c->db,c->argv[i+streams_arg]); + if (o == NULL) continue; + stream *s = o->ptr; + streamID *gt = ids+i; /* ID must be greater than this. */ + if (s->last_id.ms > gt->ms || + (s->last_id.ms == gt->ms && s->last_id.seq > gt->seq)) + { + /* streamReplyWithRange() handles the 'start' ID as inclusive, + * so start from the next ID, since we want only messages with + * IDs greater than start. */ + streamID start = *gt; + start.seq++; /* Can't overflow, it's an uint64_t */ + streamReplyWithRange(c,s,&start,NULL,count); + goto cleanup; + } + } + + /* Block if needed. */ + if (timeout) { + /* If we are inside a MULTI/EXEC and the list is empty the only thing + * we can do is treating it as a timeout (even with timeout 0). */ + if (c->flags & CLIENT_MULTI) { + addReply(c,shared.nullmultibulk); + goto cleanup; + } + blockForKeys(c, BLOCKED_STREAM, c->argv+streams_arg, streams_count, + timeout, NULL, ids); + goto cleanup; + } + + /* No BLOCK option, nor any stream we can serve. Reply as with a + * timeout happened. */ + addReply(c,shared.nullmultibulk); + /* Continue to cleanup... */ + cleanup: /* Cleanup. */ if (ids != static_ids) zfree(ids); From 084a8710192a4342d74b4ca9c247950fadeb17ef Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 8 Sep 2017 11:40:16 +0200 Subject: [PATCH 0489/1722] Streams: XREAD get-keys method. --- src/db.c | 28 ++++++++++++++++++++++++++++ src/server.c | 1 + src/server.h | 2 ++ 3 files changed, 31 insertions(+) diff --git a/src/db.c b/src/db.c index 6682e573d..8b43d4b51 100644 --- a/src/db.c +++ b/src/db.c @@ -1363,6 +1363,34 @@ int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numk return keys; } +/* XREAD [BLOCK ] [COUNT ] [GROUP ] + * [RETRY ] STREAMS key_1 ID_1 key_2 ID_2 ... + * key_N ID_N */ +int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { + int i, num, *keys; + UNUSED(cmd); + + /* We need to seek the last argument that contains "STREAMS", because other + * arguments before may contain it (for example the group name). */ + int streams_pos = -1; + for (i = 1; i < argc; i++) { + char *arg = argv[i]->ptr; + if (!strcasecmp(arg, "streams")) streams_pos = i; + } + + /* Syntax error. */ + if (streams_pos == -1) { + *numkeys = 0; + return NULL; + } + + num = argc - streams_pos - 1; + keys = zmalloc(sizeof(int) * num); + for (i = streams_pos+1; i < argc; i++) keys[i-streams_pos-1] = i; + *numkeys = num; + return keys; +} + /* Slot to Key API. This is used by Redis Cluster in order to obtain in * a fast way a key that belongs to a specified hash slot. This is useful * while rehashing the cluster and in other conditions when we need to diff --git a/src/server.c b/src/server.c index 56b2188e5..e1d9abeff 100644 --- a/src/server.c +++ b/src/server.c @@ -305,6 +305,7 @@ struct redisCommand redisCommandTable[] = { {"xadd",xaddCommand,-4,"wmF",0,NULL,1,1,1,0,0}, {"xrange",xrangeCommand,-4,"r",0,NULL,1,1,1,0,0}, {"xlen",xlenCommand,2,"rF",0,NULL,1,1,1,0,0}, + {"xread",xreadCommand,-3,"rs",0,xreadGetKeys,1,1,1,0,0}, {"post",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0}, {"host:",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0}, {"latency",latencyCommand,-2,"aslt",0,NULL,0,0,0,0,0} diff --git a/src/server.h b/src/server.h index 34c5fb06d..4b84486eb 100644 --- a/src/server.h +++ b/src/server.h @@ -1767,6 +1767,7 @@ int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); +int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); /* Cluster */ void clusterInit(void); @@ -2011,6 +2012,7 @@ void securityWarningCommand(client *c); void xaddCommand(client *c); void xrangeCommand(client *c); void xlenCommand(client *c); +void xreadCommand(client *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); From bfeeeb9f340b2507a8de27b78286ff3d113f6005 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 8 Sep 2017 11:51:53 +0200 Subject: [PATCH 0490/1722] Streams: XREAD get-key method fixed. --- src/db.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/db.c b/src/db.c index 8b43d4b51..e422d4b81 100644 --- a/src/db.c +++ b/src/db.c @@ -1364,8 +1364,8 @@ int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numk } /* XREAD [BLOCK ] [COUNT ] [GROUP ] - * [RETRY ] STREAMS key_1 ID_1 key_2 ID_2 ... - * key_N ID_N */ + * [RETRY ] STREAMS key_1 key_2 ... key_N + * ID_1 ID_2 ... ID_N */ int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { int i, num, *keys; UNUSED(cmd); @@ -1377,14 +1377,16 @@ int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) char *arg = argv[i]->ptr; if (!strcasecmp(arg, "streams")) streams_pos = i; } + if (streams_pos != -1) num = argc - streams_pos - 1; /* Syntax error. */ - if (streams_pos == -1) { + if (streams_pos == -1 || num % 2 != 0) { *numkeys = 0; return NULL; } + num /= 2; /* We have half the keys as there are arguments because + there are also the IDs, one per key. */ - num = argc - streams_pos - 1; keys = zmalloc(sizeof(int) * num); for (i = streams_pos+1; i < argc; i++) keys[i-streams_pos-1] = i; *numkeys = num; From b6f1106630ea0449e5c78b9e880dd230c507fea9 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 8 Sep 2017 12:09:02 +0200 Subject: [PATCH 0491/1722] Streams: synchronous xread fixes and improvements. --- src/t_stream.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 0820a7438..92c620771 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -395,8 +395,8 @@ void xlenCommand(client *c) { } /* XREAD [BLOCK ] [COUNT ] [GROUP ] - * [RETRY ] STREAMS key_1 ID_1 key_2 ID_2 ... - * key_N ID_N */ + * [RETRY ] STREAMS key_1 key_2 ... key_N + * ID_1 ID_2 ... ID_N */ void xreadCommand(client *c) { long long timeout = 0; long long count = 0; @@ -453,12 +453,13 @@ void xreadCommand(client *c) { * starting from now. */ if (strcmp(c->argv[i]->ptr,"$") == 0) { robj *o = lookupKeyRead(c->db,c->argv[i-streams_count]); + int id_idx = i - streams_arg - streams_count; if (o) { stream *s = o->ptr; - ids[i] = s->last_id; + ids[id_idx] = s->last_id; } else { - ids[i].ms = 0; - ids[i].seq = 0; + ids[id_idx].ms = 0; + ids[id_idx].seq = 0; } continue; } @@ -466,24 +467,38 @@ void xreadCommand(client *c) { } /* Try to serve the client synchronously. */ + size_t arraylen = 0; + void *arraylen_ptr = NULL; for (int i = 0; i < streams_count; i++) { - robj *o = lookupKeyRead(c->db,c->argv[i+streams_arg]); + robj *o = lookupKeyRead(c->db,c->argv[streams_arg+i]); if (o == NULL) continue; stream *s = o->ptr; streamID *gt = ids+i; /* ID must be greater than this. */ if (s->last_id.ms > gt->ms || (s->last_id.ms == gt->ms && s->last_id.seq > gt->seq)) { + arraylen++; + if (arraylen == 1) arraylen_ptr = addDeferredMultiBulkLength(c); /* streamReplyWithRange() handles the 'start' ID as inclusive, * so start from the next ID, since we want only messages with * IDs greater than start. */ streamID start = *gt; start.seq++; /* Can't overflow, it's an uint64_t */ + + /* Emit the two elements sub-array consisting of the name + * of the stream and the data we extracted from it. */ + addReplyMultiBulkLen(c,2); + addReplyBulk(c,c->argv[i+streams_arg]); streamReplyWithRange(c,s,&start,NULL,count); - goto cleanup; } } + /* We replied synchronously! Set the top array len and return to caller. */ + if (arraylen) { + setDeferredMultiBulkLength(c,arraylen_ptr,arraylen); + goto cleanup; + } + /* Block if needed. */ if (timeout) { /* If we are inside a MULTI/EXEC and the list is empty the only thing From 6d54ec5e24bb1bdef607a1d2d43986edd50285bf Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 8 Sep 2017 12:25:06 +0200 Subject: [PATCH 0492/1722] Streams: XREAD ability to block fixed. --- src/blocked.c | 4 ++-- src/t_stream.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/blocked.c b/src/blocked.c index 376b343d6..fccce35d6 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -134,7 +134,7 @@ void processUnblockedClients(void) { /* Unblock a client calling the right function depending on the kind * of operation the client is blocking for. */ void unblockClient(client *c) { - if (c->btype == BLOCKED_LIST) { + if (c->btype == BLOCKED_LIST || c->btype == BLOCKED_STREAM) { unblockClientWaitingData(c); } else if (c->btype == BLOCKED_WAIT) { unblockClientWaitingReplicas(c); @@ -160,7 +160,7 @@ void unblockClient(client *c) { * send it a reply of some kind. After this function is called, * unblockClient() will be called with the same client as argument. */ void replyToBlockedClientTimedOut(client *c) { - if (c->btype == BLOCKED_LIST) { + if (c->btype == BLOCKED_LIST || c->btype == BLOCKED_STREAM) { addReply(c,shared.nullmultibulk); } else if (c->btype == BLOCKED_WAIT) { addReplyLongLong(c,replicationCountAcksByOffset(c->bpop.reploffset)); diff --git a/src/t_stream.c b/src/t_stream.c index 92c620771..0358e6441 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -412,9 +412,8 @@ void xreadCommand(client *c) { char *o = c->argv[i]->ptr; if (!strcasecmp(o,"BLOCK") && moreargs) { i++; - if (getLongLongFromObjectOrReply(c,c->argv[i],&timeout,NULL) - != C_OK) return; - if (timeout < 0) timeout = 0; + if (getTimeoutFromObjectOrReply(c,c->argv[i],&timeout, + UNIT_MILLISECONDS) != C_OK) return; } else if (!strcasecmp(o,"COUNT") && moreargs) { i++; if (getLongLongFromObjectOrReply(c,c->argv[i],&count,NULL) != C_OK) From ea5ea8da3dca7ea9f01b813d0a31682f2b366c18 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 8 Sep 2017 16:57:32 +0200 Subject: [PATCH 0493/1722] Streams: XREAD related code to serve blocked clients. --- src/blocked.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- src/server.h | 1 + 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/blocked.c b/src/blocked.c index fccce35d6..84d74f24b 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -240,7 +240,14 @@ void handleClientsBlockedOnKeys(void) { while(numclients--) { listNode *clientnode = listFirst(clients); client *receiver = clientnode->value; - if (receiver->btype != BLOCKED_LIST) continue; + + if (receiver->btype != BLOCKED_LIST) { + /* Put on the tail, so that at the next call + * we'll not run into it again. */ + listDelNode(clients,clientnode); + listAddNodeTail(clients,receiver); + continue; + } robj *dstkey = receiver->bpop.target; int where = (receiver->lastcmd && @@ -279,6 +286,47 @@ void handleClientsBlockedOnKeys(void) { * when an element was pushed on the list. */ } + /* Serve clients blocked on stream key. */ + else if (o != NULL && o->type == OBJ_STREAM) { + dictEntry *de = dictFind(rl->db->blocking_keys,rl->key); + stream *s = o->ptr; + + /* We need to provide the new data arrived on the stream + * to all the clients that are waiting for an offset smaller + * than the current top item. */ + if (de) { + list *clients = dictGetVal(de); + listNode *ln; + listIter li; + listRewind(clients,&li); + + while((ln = listNext(&li))) { + client *receiver = listNodeValue(ln); + if (receiver->btype != BLOCKED_STREAM) continue; + streamID *gt = dictFetchValue(receiver->bpop.keys, + rl->key); + if (s->last_id.ms > gt->ms || + (s->last_id.ms == gt->ms && + s->last_id.seq > gt->seq)) + { + unblockClient(receiver); + streamID start = *gt; + start.seq++; /* Can't overflow, it's an uint64_t */ + + /* Emit the two elements sub-array consisting of + * the name of the stream and the data we + * extracted from it. Wrapped in a single-item + * array, since we have just one key. */ + addReplyMultiBulkLen(receiver,1); + addReplyMultiBulkLen(receiver,2); + addReplyBulk(receiver,rl->key); + streamReplyWithRange(receiver,s,&start,NULL, + receiver->bpop.xread_count); + } + } + } + } + /* Free this item. */ decrRefCount(rl->key); zfree(rl); diff --git a/src/server.h b/src/server.h index 4b84486eb..8fa7380e4 100644 --- a/src/server.h +++ b/src/server.h @@ -1425,6 +1425,7 @@ void popGenericCommand(client *c, int where); /* Stream data type. */ stream *streamNew(void); void freeStream(stream *s); +size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count); /* MULTI/EXEC/WATCH... */ void unwatchAllKeys(client *c); From f3b3ca41f7c86d8dccf22f82e811804ac916f180 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 8 Sep 2017 20:48:28 +0200 Subject: [PATCH 0494/1722] Streams: fix XREAD timeout handling, zero is valid. --- src/t_stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 0358e6441..afa8224cb 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -398,7 +398,7 @@ void xlenCommand(client *c) { * [RETRY ] STREAMS key_1 key_2 ... key_N * ID_1 ID_2 ... ID_N */ void xreadCommand(client *c) { - long long timeout = 0; + long long timeout = -1; /* -1 means, no BLOCK argument given. */ long long count = 0; int streams_count = 0; int streams_arg = 0; @@ -499,7 +499,7 @@ void xreadCommand(client *c) { } /* Block if needed. */ - if (timeout) { + if (timeout != -1) { /* If we are inside a MULTI/EXEC and the list is empty the only thing * we can do is treating it as a timeout (even with timeout 0). */ if (c->flags & CLIENT_MULTI) { From 06a30111a8240d93223f3e0125e4070bb0d48a28 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 9 Sep 2017 11:10:59 +0200 Subject: [PATCH 0495/1722] Streams: fix XREAD ready-key signaling. With lists we need to signal only on key creation, but streams can provide data to clients listening at every new item added. To make this slightly more efficient we now track different classes of blocked clients to avoid signaling keys when there is nobody listening. A typical case is when the stream is used as a time series DB and accessed only by range with XRANGE. --- src/blocked.c | 6 ++++-- src/db.c | 3 +-- src/server.c | 6 ++++-- src/server.h | 4 +++- src/t_stream.c | 2 ++ 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/blocked.c b/src/blocked.c index 84d74f24b..3cf661aa8 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -102,7 +102,8 @@ int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int void blockClient(client *c, int btype) { c->flags |= CLIENT_BLOCKED; c->btype = btype; - server.bpop_blocked_clients++; + server.blocked_clients++; + server.blocked_clients_by_type[btype]++; } /* This function is called in the beforeSleep() function of the event loop @@ -145,9 +146,10 @@ void unblockClient(client *c) { } /* Clear the flags, and put the client in the unblocked list so that * we'll process new commands in its query buffer ASAP. */ + server.blocked_clients--; + server.blocked_clients_by_type[c->btype]--; c->flags &= ~CLIENT_BLOCKED; c->btype = BLOCKED_NONE; - server.bpop_blocked_clients--; /* The client may already be into the unblocked list because of a previous * blocking operation, don't add back it into the list multiple times. */ if (!(c->flags & CLIENT_UNBLOCKED)) { diff --git a/src/db.c b/src/db.c index e422d4b81..74c2be624 100644 --- a/src/db.c +++ b/src/db.c @@ -169,8 +169,7 @@ void dbAdd(redisDb *db, robj *key, robj *val) { int retval = dictAdd(db->dict, copy, val); serverAssertWithInfo(NULL,key,retval == DICT_OK); - if (val->type == OBJ_LIST || val->type == OBJ_STREAM) - signalKeyAsReady(db, key); + if (val->type == OBJ_LIST) signalKeyAsReady(db, key); if (server.cluster_enabled) slotToKeyAdd(key); } diff --git a/src/server.c b/src/server.c index e1d9abeff..38f161796 100644 --- a/src/server.c +++ b/src/server.c @@ -1426,7 +1426,9 @@ void initServerConfig(void) { server.active_defrag_running = 0; server.notify_keyspace_events = 0; server.maxclients = CONFIG_DEFAULT_MAX_CLIENTS; - server.bpop_blocked_clients = 0; + server.blocked_clients = 0; + memset(server.blocked_clients_by_type,0, + sizeof(server.blocked_clients_by_type)); server.maxmemory = CONFIG_DEFAULT_MAXMEMORY; server.maxmemory_policy = CONFIG_DEFAULT_MAXMEMORY_POLICY; server.maxmemory_samples = CONFIG_DEFAULT_MAXMEMORY_SAMPLES; @@ -2929,7 +2931,7 @@ sds genRedisInfoString(char *section) { "blocked_clients:%d\r\n", listLength(server.clients)-listLength(server.slaves), lol, bib, - server.bpop_blocked_clients); + server.blocked_clients); } /* Memory */ diff --git a/src/server.h b/src/server.h index 8fa7380e4..2d98b6f1d 100644 --- a/src/server.h +++ b/src/server.h @@ -257,6 +257,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define BLOCKED_WAIT 2 /* WAIT for synchronous replication. */ #define BLOCKED_MODULE 3 /* Blocked by a loadable module. */ #define BLOCKED_STREAM 4 /* XREAD. */ +#define BLOCKED_NUM 5 /* Number of blocked states. */ /* Client request types */ #define PROTO_REQ_INLINE 1 @@ -1130,7 +1131,8 @@ struct redisServer { int lfu_log_factor; /* LFU logarithmic counter factor. */ int lfu_decay_time; /* LFU counter decay factor. */ /* Blocked clients */ - unsigned int bpop_blocked_clients; /* Number of clients blocked by lists */ + unsigned int blocked_clients; /* # of clients executing a blocking cmd.*/ + unsigned int blocked_clients_by_type[BLOCKED_NUM]; list *unblocked_clients; /* list of clients to unblock before next loop */ list *ready_keys; /* List of readyList structures for BLPOP & co */ /* Sort parameters - qsort_r() is only available under BSD so we diff --git a/src/t_stream.c b/src/t_stream.c index afa8224cb..c47c5dde1 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -354,6 +354,8 @@ void xaddCommand(client *c) { signalModifiedKey(c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH,"xadd",c->argv[1],c->db->id); server.dirty++; + if (server.blocked_clients_by_type[BLOCKED_STREAM]) + signalKeyAsReady(c->db, c->argv[1]); } /* XRANGE key start end [COUNT ] */ From 789f74bba2dfe2c7d419a7cc390498eba9443fdd Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 11 Sep 2017 11:06:53 +0200 Subject: [PATCH 0496/1722] Streams: fix handleClientsBlockedOnKeys() access to invalid ID. --- src/blocked.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/blocked.c b/src/blocked.c index 3cf661aa8..519a402cf 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -311,9 +311,12 @@ void handleClientsBlockedOnKeys(void) { (s->last_id.ms == gt->ms && s->last_id.seq > gt->seq)) { - unblockClient(receiver); streamID start = *gt; start.seq++; /* Can't overflow, it's an uint64_t */ + /* Note that after we unblock the client, 'gt' + * is no longer valid, so we must do it after + * we copied the ID into the 'start' variable. */ + unblockClient(receiver); /* Emit the two elements sub-array consisting of * the name of the stream and the data we From 6410396372e584492fbd74db0d5ef7b675272981 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 11 Sep 2017 11:20:36 +0200 Subject: [PATCH 0497/1722] Streams: When XREAD blocks without COUNT, set a default one. A client may lose a lot of time between invocations of blocking XREAD, for example because it is processing the messages or for any other cause. When it returns back, it may provide a low enough message ID that the server will block to send an unreasonable number of messages in a single call. For this reason we set a COUNT when the client is blocked with XREAD calls, even if no COUNT is given. This is arbitrarily set to 1000 because it's enough to avoid slowing down the reception of many messages, but low enough to avoid to block. --- src/t_stream.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/t_stream.c b/src/t_stream.c index c47c5dde1..1836ae735 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -399,6 +399,7 @@ void xlenCommand(client *c) { /* XREAD [BLOCK ] [COUNT ] [GROUP ] * [RETRY ] STREAMS key_1 key_2 ... key_N * ID_1 ID_2 ... ID_N */ +#define XREAD_BLOCKED_DEFAULT_COUNT 1000 void xreadCommand(client *c) { long long timeout = -1; /* -1 means, no BLOCK argument given. */ long long count = 0; @@ -510,6 +511,11 @@ void xreadCommand(client *c) { } blockForKeys(c, BLOCKED_STREAM, c->argv+streams_arg, streams_count, timeout, NULL, ids); + /* If no COUNT is given and we block, set a relatively small count: + * in case the ID provided is too low, we do not want the server to + * block just to serve this client a huge stream of messages. */ + c->bpop.xread_count = count ? count : XREAD_BLOCKED_DEFAULT_COUNT; + c->bpop.xread_group = NULL; /* Not used for now. */ goto cleanup; } From eb8d0671f8c9727f4c8b42ed301eeb87011d7ea5 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 11 Sep 2017 18:02:57 +0200 Subject: [PATCH 0498/1722] Streams: fix XADD API and keyspace notifications. XADD was suboptimal in the first incarnation of the command, not being able to accept an ID (very useufl for replication), nor options for having capped streams. The keyspace notification for streams was not implemented. --- src/notify.c | 2 ++ src/server.c | 2 +- src/server.h | 3 +- src/t_stream.c | 80 +++++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 75 insertions(+), 12 deletions(-) diff --git a/src/notify.c b/src/notify.c index 94a1f2e79..9bbeb1423 100644 --- a/src/notify.c +++ b/src/notify.c @@ -54,6 +54,7 @@ int keyspaceEventsStringToFlags(char *classes) { case 'e': flags |= NOTIFY_EVICTED; break; case 'K': flags |= NOTIFY_KEYSPACE; break; case 'E': flags |= NOTIFY_KEYEVENT; break; + case 't': flags |= NOTIFY_STREAM; break; default: return -1; } } @@ -79,6 +80,7 @@ sds keyspaceEventsFlagsToString(int flags) { if (flags & NOTIFY_ZSET) res = sdscatlen(res,"z",1); if (flags & NOTIFY_EXPIRED) res = sdscatlen(res,"x",1); if (flags & NOTIFY_EVICTED) res = sdscatlen(res,"e",1); + if (flags & NOTIFY_STREAM) res = sdscatlen(res,"t",1); } if (flags & NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1); if (flags & NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1); diff --git a/src/server.c b/src/server.c index 38f161796..f1fd06ca0 100644 --- a/src/server.c +++ b/src/server.c @@ -302,7 +302,7 @@ struct redisCommand redisCommandTable[] = { {"pfcount",pfcountCommand,-2,"r",0,NULL,1,-1,1,0,0}, {"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0}, {"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0}, - {"xadd",xaddCommand,-4,"wmF",0,NULL,1,1,1,0,0}, + {"xadd",xaddCommand,-5,"wmF",0,NULL,1,1,1,0,0}, {"xrange",xrangeCommand,-4,"r",0,NULL,1,1,1,0,0}, {"xlen",xlenCommand,2,"rF",0,NULL,1,1,1,0,0}, {"xread",xreadCommand,-3,"rs",0,xreadGetKeys,1,1,1,0,0}, diff --git a/src/server.h b/src/server.h index 2d98b6f1d..37df429b4 100644 --- a/src/server.h +++ b/src/server.h @@ -427,7 +427,8 @@ typedef long long mstime_t; /* millisecond time type. */ #define NOTIFY_ZSET (1<<7) /* z */ #define NOTIFY_EXPIRED (1<<8) /* x */ #define NOTIFY_EVICTED (1<<9) /* e */ -#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED) /* A */ +#define NOTIFY_STREAM (1<<10) /* t */ +#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM) /* A flag */ /* Get the first bind addr or NULL */ #define NET_FIRST_BIND_ADDR (server.bindaddr_count ? server.bindaddr[0] : NULL) diff --git a/src/t_stream.c b/src/t_stream.c index 1836ae735..0921a54b0 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -115,8 +115,24 @@ void streamDecodeID(void *buf, streamID *id) { /* Adds a new item into the stream 's' having the specified number of * field-value pairs as specified in 'numfields' and stored into 'argv'. - * Returns the new entry ID populating the 'added_id' structure. */ -void streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id) { + * Returns the new entry ID populating the 'added_id' structure. + * + * If 'use_id' is not NULL, the ID is not auto-generated by the function, + * but instead the passed ID is uesd to add the new entry. In this case + * adding the entry may fail as specified later in this comment. + * + * The function returns C_OK if the item was added, this is always true + * if the ID was generated by the function. However the function may return + * C_ERR if an ID was given via 'use_id', but adding it failed since the + * current top ID is greater or equal. */ +int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, streamID *use_id) { + /* If an ID was given, check that it's greater than the last entry ID + * or return an error. */ + if (use_id && (use_id->ms < s->last_id.ms || + (use_id->ms == s->last_id.ms && + use_id->seq <= s->last_id.seq))) return C_ERR; + + /* Add the new entry. */ raxIterator ri; raxStart(&ri,s->rax); raxSeek(&ri,"$",NULL,0); @@ -133,7 +149,10 @@ void streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id) /* Generate the new entry ID. */ streamID id; - streamNextID(&s->last_id,&id); + if (use_id) + id = *use_id; + else + streamNextID(&s->last_id,&id); /* We have to add the key into the radix tree in lexicographic order, * to do so we consider the ID as a single 128 bit number written in @@ -173,6 +192,7 @@ void streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id) s->length++; s->last_id = id; if (added_id) *added_id = id; + return C_OK; } /* Send the specified range to the client 'c'. The range the client will @@ -299,7 +319,9 @@ int string2ull(const char *s, unsigned long long *value) { * form, just stating the milliseconds time part of the stream. In such a case * the missing part is set according to the value of 'missing_seq' parameter. * The IDs "-" and "+" specify respectively the minimum and maximum IDs - * that can be represented. */ + * that can be represented. + * + * If 'c' is set to NULL, no reply is sent to the client. */ int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) { char buf[128]; if (sdslen(o->ptr) > sizeof(buf)-1) goto invalid; @@ -328,13 +350,45 @@ int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) return C_OK; invalid: - addReplyError(c,"Invalid stream ID specified as stream command argument"); + if (c) addReplyError(c,"Invalid stream ID specified as stream " + "command argument"); return C_ERR; } -/* XADD key [field value] [field value] ... */ +/* XADD key [MAXLEN ] [field value] [field value] ... */ void xaddCommand(client *c) { - if ((c->argc % 2) == 1) { + streamID id; + int id_given = 0; /* Was an ID different than "*" specified? */ + + /* Parse options. */ + int i = 2; /* This is the first argument position where we could + find an option, or the ID. */ + for (; i < c->argc; i++) { + int moreargs = i != c->argc-1; + char *opt = c->argv[i]->ptr; + if (opt[0] == '*' && opt[1] == '\0') { + /* This is just a fast path for the common case of auto-ID + * creation. */ + break; + } else if (!strcasecmp(opt,"maxlen") && moreargs) { + addReplyError(c,"Sorry, MAXLEN is still not implemented"); + i++; + return; + } else { + /* If we are here is a syntax error or a valid ID. */ + if (streamParseIDOrReply(NULL,c->argv[i],&id,0) == C_OK) { + id_given = 1; + break; + } else { + addReply(c,shared.syntaxerr); + return; + } + } + } + int field_pos = i+1; + + /* Check arity. */ + if ((c->argc - field_pos) < 2 || (c->argc-field_pos % 2) == 1) { addReplyError(c,"wrong number of arguments for XADD"); return; } @@ -346,13 +400,19 @@ void xaddCommand(client *c) { s = o->ptr; /* Append using the low level function and return the ID. */ - streamID id; - streamAppendItem(s,c->argv+2,(c->argc-2)/2,&id); + if (streamAppendItem(s,c->argv+field_pos,(c->argc-field_pos)/2, + &id, id_given ? &id : NULL) + == C_ERR) + { + addReplyError(c,"The ID specified in XADD is smaller than the " + "target stream top item"); + return; + } sds reply = sdscatfmt(sdsempty(),"+%U.%U\r\n",id.ms,id.seq); addReplySds(c,reply); signalModifiedKey(c->db,c->argv[1]); - notifyKeyspaceEvent(NOTIFY_HASH,"xadd",c->argv[1],c->db->id); + notifyKeyspaceEvent(NOTIFY_STREAM,"xadd",c->argv[1],c->db->id); server.dirty++; if (server.blocked_clients_by_type[BLOCKED_STREAM]) signalKeyAsReady(c->db, c->argv[1]); From 91e09d0959ba05b41a7abc45613771dafeb07676 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 12 Sep 2017 10:48:27 +0200 Subject: [PATCH 0499/1722] Streams: rewrite XADD ID argument for AOF/slaves. --- src/t_stream.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/t_stream.c b/src/t_stream.c index 0921a54b0..84e0541e0 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -414,6 +414,16 @@ void xaddCommand(client *c) { signalModifiedKey(c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STREAM,"xadd",c->argv[1],c->db->id); server.dirty++; + + /* Let's rewrite the ID argument with the one actually generated for + * AOF/replication propagation. */ + robj *idarg = createObject(OBJ_STRING, + sdscatfmt(sdsempty(),"%U.%U",id.ms,id.seq)); + rewriteClientCommandArgument(c,i,idarg); + decrRefCount(idarg); + + /* We need to signal to blocked clients that there is new data on this + * stream. */ if (server.blocked_clients_by_type[BLOCKED_STREAM]) signalKeyAsReady(c->db, c->argv[1]); } From 110e66b0c2b84903dc73d7f3da8528274769a83c Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 12 Sep 2017 10:54:20 +0200 Subject: [PATCH 0500/1722] Streams: fix memory leak in freeStream(). --- src/t_stream.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/t_stream.c b/src/t_stream.c index 84e0541e0..3b0072ec8 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -54,6 +54,7 @@ stream *streamNew(void) { /* Free a stream, including the listpacks stored inside the radix tree. */ void freeStream(stream *s) { raxFreeWithCallback(s->rax,(void(*)(void*))lpFree); + zfree(s); } /* Generate the next stream item ID given the previous one. If the current From ba13aba3a0dc6eb4df46b08b6787d40c0a85e17c Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 12 Sep 2017 16:19:26 +0200 Subject: [PATCH 0501/1722] Streams: fix bug in XREAD last received ID processing. --- src/t_stream.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 3b0072ec8..a8230109c 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -524,9 +524,9 @@ void xreadCommand(client *c) { /* Specifying "$" as last-known-id means that the client wants to be * served with just the messages that will arrive into the stream * starting from now. */ + int id_idx = i - streams_arg - streams_count; if (strcmp(c->argv[i]->ptr,"$") == 0) { robj *o = lookupKeyRead(c->db,c->argv[i-streams_count]); - int id_idx = i - streams_arg - streams_count; if (o) { stream *s = o->ptr; ids[id_idx] = s->last_id; @@ -536,7 +536,8 @@ void xreadCommand(client *c) { } continue; } - if (streamParseIDOrReply(c,c->argv[i],ids+i,0) != C_OK) goto cleanup; + if (streamParseIDOrReply(c,c->argv[i],ids+id_idx,0) != C_OK) + goto cleanup; } /* Try to serve the client synchronously. */ From 11045c4399b1a1f94aab383368dbcd8f67f31955 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 13 Sep 2017 18:05:34 +0200 Subject: [PATCH 0502/1722] Streams: stream iteration refactoring, WIP 1. --- src/t_stream.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/src/t_stream.c b/src/t_stream.c index a8230109c..a1d3f8a17 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -196,6 +196,120 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, return C_OK; } +/* We define an iterator to iterate stream items in an abstract way, without + * caring about the radix tree + listpack representation. Technically speaking + * the iterator is only used inside streamReplyWithRange(), so could just + * be implemented inside the function, but practically there is the AOF + * rewriting code that also needs to iterate the stream to emit the XADD + * commands. */ +typedef struct streamIterator { + uint64_t start_key[2]; /* Start key as 128 bit big endian. */ + uint64_t end_key[2]; /* End key as 128 bit big endian. */ + raxIterator ri; /* Rax iterator. */ + unsigned char *lp; /* Current listpack. */ + unsigned char *lp_ele; /* Current listpack cursor. */ +} streamIterator; + +/* Initialize the stream iterator, so that we can call iterating functions + * to get the next items. This requires a corresponding streamIteratorStop() + * at the end. + * + * Once the iterator is initalized, we iterate like this: + * + * streamIterator myiterator; + * streamIteratorStart(&myiterator,...); + * size_t numfields; + * while(streamIteratorGetID(&myitereator,&ID,&numfields)) { + * while(numfields--) { + * unsigned char *key, *value; + * size_t key_len, value_len; + * streamIteratorGetField(&myiterator,&key,&value,&key_len,&value_len); + * + * ... do what you want with key and value ... + * } + * } + * streamIteratorStop(&myiterator); */ +void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end) { + /* Intialize the iterator and translates the iteration start/stop + * elements into a 128 big big-endian number. */ + streamEncodeID(si->start_key,start); + if (end) { + streamEncodeID(si->end_key,end); + } else { + /* We assume that UINT64_MAX is the same in little and big + * endian, that is, all bits set. */ + si->end_key[0] = UINT64_MAX; + si->end_key[0] = UINT64_MAX; + } + raxStart(&si->ri,s->rax); + + /* Seek the correct node in the radix tree. */ + if (start->ms || start->seq) { + raxSeek(&si->ri,"<=",(unsigned char*)si->start_key, + sizeof(si->start_key)); + if (raxEOF(&si->ri)) + raxSeek(&si->ri,">",(unsigned char*)si->start_key, + sizeof(si->start_key)); + } else { + raxSeek(&si->ri,"^",NULL,0); + } + si->lp = NULL; /* There is no current listpack right now. */ + si->lp_ele = NULL; /* Current listpack cursor. */ +} + +/* Return 1 and store the current item ID at 'id' if there are still + * elements within the iteration range, otherwise return 0 in order to + * signal the iteration terminated. */ +int streamIteratorGetID(streamIterator *si, streamID *id, size_t *numfields) { + while(1) { /* Will stop when element > stop_key or end of radix tree. */ + /* If the current listpack is set to NULL, this is the start of the + * iteration or the previous listpack was completely iterated. + * Go to the next node. */ + if (si->lp == NULL || si->lp_ele == NULL) { + if (!raxNext(&si->ri)) return 0; + serverAssert(si->ri.key_len == sizeof(streamID)); + si->lp = si->ri.data; + si->lp_ele = lpFirst(si->lp); + } + + /* For every radix tree node, iterate the corresponding listpack, + * returning elements when they are within range. */ + while(si->lp_ele) { + int64_t e_len; + unsigned char buf[LP_INTBUF_SIZE]; + unsigned char *e = lpGet(si->lp_ele,&e_len,buf); + serverAssert(e_len == sizeof(streamID)); + + /* Go to next field: number of elements. */ + si->lp_ele = lpNext(si->lp,si->lp_ele); + + /* If current >= start */ + if (memcmp(e,si->start_key,sizeof(streamID)) >= 0) { + if (memcmp(e,si->end_key,sizeof(streamID)) > 0) + return 0; /* We are already out of range. */ + streamDecodeID(e,id); + *numfields = lpGetInteger(si->lp_ele); + return 1; /* Valid item returned. */ + } else { + /* If we do not emit, we have to discard. */ + int64_t numfields = lpGetInteger(si->lp_ele); + si->lp_ele = lpNext(si->lp,si->lp_ele); + for (int64_t i = 0; i < numfields*2; i++) + si->lp_ele = lpNext(si->lp,si->lp_ele); + } + } + + /* End of listpack reached. Try the next radix tree node. */ + } +} + +/* Stop the stream iterator. The only cleanup we need is to free the rax + * itereator, since the stream iterator itself is supposed to be stack + * allocated. */ +void streamIteratorStop(streamIterator *si) { + raxStop(&si->ri); +} + /* Send the specified range to the client 'c'. The range the client will * receive is between start and end inclusive, if 'count' is non zero, no more * than 'count' elemnets are sent. The 'end' pointer can be NULL to mean that From e630b172824d4c452566ddb3c813c9bd46190ede Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 14 Sep 2017 14:23:27 +0200 Subject: [PATCH 0503/1722] Streams: stream iteration refactoring, WIP 2. --- src/t_stream.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/t_stream.c b/src/t_stream.c index a1d3f8a17..03860b8e4 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -208,6 +208,11 @@ typedef struct streamIterator { raxIterator ri; /* Rax iterator. */ unsigned char *lp; /* Current listpack. */ unsigned char *lp_ele; /* Current listpack cursor. */ + /* Buffers used to hold the string of lpGet() when the element is + * integer encoded, so that there is no string representation of the + * element inside the listpack itself. */ + unsigned char field_buf[LP_INTBUF_SIZE]; + unsigned char value_buf[LP_INTBUF_SIZE]; } streamIterator; /* Initialize the stream iterator, so that we can call iterating functions @@ -289,6 +294,7 @@ int streamIteratorGetID(streamIterator *si, streamID *id, size_t *numfields) { return 0; /* We are already out of range. */ streamDecodeID(e,id); *numfields = lpGetInteger(si->lp_ele); + si->lp_ele = lpNext(si->lp,si->lp_ele); return 1; /* Valid item returned. */ } else { /* If we do not emit, we have to discard. */ @@ -303,6 +309,19 @@ int streamIteratorGetID(streamIterator *si, streamID *id, size_t *numfields) { } } +/* Get the field and value of the current item we are iterating. This should + * be called immediately after streamIteratorGetID(), and for each field + * according to the number of fields returned by streamIteratorGetID(). + * The function populates the field and value pointers and the corresponding + * lengths by reference, that are valid until the next iterator call, assuming + * no one touches the stream meanwhile. */ +void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen) { + *fieldptr = lpGet(si->lp_ele,fieldlen,si->field_buf); + si->lp_ele = lpNext(si->lp,si->lp_ele); + *valueptr = lpGet(si->lp_ele,valuelen,si->value_buf); + si->lp_ele = lpNext(si->lp,si->lp_ele); +} + /* Stop the stream iterator. The only cleanup we need is to free the rax * itereator, since the stream iterator itself is supposed to be stack * allocated. */ From 1b0b5fa224abfec2481641bba242eb5552123272 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 14 Sep 2017 14:46:31 +0200 Subject: [PATCH 0504/1722] Streams: implement streamReplyWithRange() in terms of the iterator. --- src/t_stream.c | 87 +++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 65 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 03860b8e4..de9561a51 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -223,7 +223,7 @@ typedef struct streamIterator { * * streamIterator myiterator; * streamIteratorStart(&myiterator,...); - * size_t numfields; + * int64_t numfields; * while(streamIteratorGetID(&myitereator,&ID,&numfields)) { * while(numfields--) { * unsigned char *key, *value; @@ -265,7 +265,7 @@ void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamI /* Return 1 and store the current item ID at 'id' if there are still * elements within the iteration range, otherwise return 0 in order to * signal the iteration terminated. */ -int streamIteratorGetID(streamIterator *si, streamID *id, size_t *numfields) { +int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { while(1) { /* Will stop when element > stop_key or end of radix tree. */ /* If the current listpack is set to NULL, this is the start of the * iteration or the previous listpack was completely iterated. @@ -336,74 +336,31 @@ void streamIteratorStop(streamIterator *si) { size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count) { void *arraylen_ptr = addDeferredMultiBulkLength(c); size_t arraylen = 0; + streamIterator si; + int64_t numfields; + streamID id; - /* Seek the radix tree node that contains our start item. */ - uint64_t key[2]; - uint64_t end_key[2]; - streamEncodeID(key,start); - if (end) streamEncodeID(end_key,end); - raxIterator ri; - raxStart(&ri,s->rax); + streamIteratorStart(&si,s,start,end); + while(streamIteratorGetID(&si,&id,&numfields)) { + /* Emit a two elements array for each item. The first is + * the ID, the second is an array of field-value pairs. */ + sds replyid = sdscatfmt(sdsempty(),"+%U.%U\r\n",id.ms,id.seq); + addReplyMultiBulkLen(c,2); + addReplySds(c,replyid); + addReplyMultiBulkLen(c,numfields*2); - /* Seek the correct node in the radix tree. */ - if (start->ms || start->seq) { - raxSeek(&ri,"<=",(unsigned char*)key,sizeof(key)); - if (raxEOF(&ri)) raxSeek(&ri,">",(unsigned char*)key,sizeof(key)); - } else { - raxSeek(&ri,"^",NULL,0); - } - - /* For every radix tree node, iterate the corresponding listpack, - * returning elmeents when they are within range. */ - while (raxNext(&ri)) { - serverAssert(ri.key_len == sizeof(key)); - unsigned char *lp = ri.data; - unsigned char *lp_ele = lpFirst(lp); - while(lp_ele) { - int64_t e_len; - unsigned char buf[LP_INTBUF_SIZE]; - unsigned char *e = lpGet(lp_ele,&e_len,buf); - serverAssert(e_len == sizeof(streamID)); - - /* Seek next field: number of elements. */ - lp_ele = lpNext(lp,lp_ele); - if (memcmp(e,key,sizeof(key)) >= 0) { /* If current >= start */ - if (end && memcmp(e,end_key,sizeof(key)) > 0) { - break; /* We are already out of range. */ - } - streamID thisid; - streamDecodeID(e,&thisid); - sds replyid = sdscatfmt(sdsempty(),"+%U.%U\r\n", - thisid.ms,thisid.seq); - - /* Emit this stream entry in the client output. */ - addReplyMultiBulkLen(c,2); - addReplySds(c,replyid); - int64_t numfields = lpGetInteger(lp_ele); - lp_ele = lpNext(lp,lp_ele); - addReplyMultiBulkLen(c,numfields*2); - for (int64_t i = 0; i < numfields; i++) { - /* Emit two items (key-value) per iteration. */ - for (int k = 0; k < 2; k++) { - e = lpGet(lp_ele,&e_len,buf); - addReplyBulkCBuffer(c,e,e_len); - lp_ele = lpNext(lp,lp_ele); - } - } - - arraylen++; - if (count && count == arraylen) break; - } else { - /* If we do not emit, we have to discard. */ - int64_t numfields = lpGetInteger(lp_ele); - lp_ele = lpNext(lp,lp_ele); - for (int64_t i = 0; i < numfields*2; i++) - lp_ele = lpNext(lp,lp_ele); - } + /* Emit the field-value pairs. */ + while(numfields--) { + unsigned char *key, *value; + int64_t key_len, value_len; + streamIteratorGetField(&si,&key,&value,&key_len,&value_len); + addReplyBulkCBuffer(c,key,key_len); + addReplyBulkCBuffer(c,value,value_len); } + arraylen++; if (count && count == arraylen) break; } - raxStop(&ri); + streamIteratorStop(&si); setDeferredMultiBulkLength(c,arraylen_ptr,arraylen); return arraylen; } From 3d15e3f72234eaf4470e400ee4be327e6ee6eb65 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Sep 2017 12:17:25 +0200 Subject: [PATCH 0505/1722] Streams: export iteration API. --- src/server.h | 5 ----- src/stream.h | 31 +++++++++++++++++++++++++++++++ src/t_stream.c | 19 ------------------- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/server.h b/src/server.h index 37df429b4..bc572b1e3 100644 --- a/src/server.h +++ b/src/server.h @@ -1425,11 +1425,6 @@ void listTypeConvert(robj *subject, int enc); void unblockClientWaitingData(client *c); void popGenericCommand(client *c, int where); -/* Stream data type. */ -stream *streamNew(void); -void freeStream(stream *s); -size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count); - /* MULTI/EXEC/WATCH... */ void unwatchAllKeys(client *c); void initClientMultiState(client *c); diff --git a/src/stream.h b/src/stream.h index e78af5bc5..e38009321 100644 --- a/src/stream.h +++ b/src/stream.h @@ -19,4 +19,35 @@ typedef struct stream { streamID last_id; /* Zero if there are yet no items. */ } stream; +/* We define an iterator to iterate stream items in an abstract way, without + * caring about the radix tree + listpack representation. Technically speaking + * the iterator is only used inside streamReplyWithRange(), so could just + * be implemented inside the function, but practically there is the AOF + * rewriting code that also needs to iterate the stream to emit the XADD + * commands. */ +typedef struct streamIterator { + uint64_t start_key[2]; /* Start key as 128 bit big endian. */ + uint64_t end_key[2]; /* End key as 128 bit big endian. */ + raxIterator ri; /* Rax iterator. */ + unsigned char *lp; /* Current listpack. */ + unsigned char *lp_ele; /* Current listpack cursor. */ + /* Buffers used to hold the string of lpGet() when the element is + * integer encoded, so that there is no string representation of the + * element inside the listpack itself. */ + unsigned char field_buf[LP_INTBUF_SIZE]; + unsigned char value_buf[LP_INTBUF_SIZE]; +} streamIterator; + +/* Prototypes of exported APIs. */ + +struct client; + +stream *streamNew(void); +void freeStream(stream *s); +size_t streamReplyWithRange(struct client *c, stream *s, streamID *start, streamID *end, size_t count); +void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end); +int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields); +void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen); +void streamIteratorStop(streamIterator *si); + #endif diff --git a/src/t_stream.c b/src/t_stream.c index de9561a51..3144adc7c 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -196,25 +196,6 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, return C_OK; } -/* We define an iterator to iterate stream items in an abstract way, without - * caring about the radix tree + listpack representation. Technically speaking - * the iterator is only used inside streamReplyWithRange(), so could just - * be implemented inside the function, but practically there is the AOF - * rewriting code that also needs to iterate the stream to emit the XADD - * commands. */ -typedef struct streamIterator { - uint64_t start_key[2]; /* Start key as 128 bit big endian. */ - uint64_t end_key[2]; /* End key as 128 bit big endian. */ - raxIterator ri; /* Rax iterator. */ - unsigned char *lp; /* Current listpack. */ - unsigned char *lp_ele; /* Current listpack cursor. */ - /* Buffers used to hold the string of lpGet() when the element is - * integer encoded, so that there is no string representation of the - * element inside the listpack itself. */ - unsigned char field_buf[LP_INTBUF_SIZE]; - unsigned char value_buf[LP_INTBUF_SIZE]; -} streamIterator; - /* Initialize the stream iterator, so that we can call iterating functions * to get the next items. This requires a corresponding streamIteratorStop() * at the end. From 7118da8954deb5563a7ffd0321959673c80644b8 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Sep 2017 12:37:04 +0200 Subject: [PATCH 0506/1722] Streams: AOF rewriting + minor iterator improvements. --- src/aof.c | 33 +++++++++++++++++++++++++++++++++ src/t_stream.c | 14 +++++++++----- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/aof.c b/src/aof.c index 0593b2707..5fbfdd695 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1031,6 +1031,37 @@ int rewriteHashObject(rio *r, robj *key, robj *o) { return 1; } +/* Emit the commands needed to rebuild a stream object. + * The function returns 0 on error, 1 on success. */ +int rewriteStreamObject(rio *r, robj *key, robj *o) { + streamIterator si; + streamIteratorStart(&si,o->ptr,NULL,NULL); + streamID id; + int64_t numfields; + + while(streamIteratorGetID(&si,&id,&numfields)) { + /* Emit a two elements array for each item. The first is + * the ID, the second is an array of field-value pairs. */ + + /* Emit the XADD ...fields... command. */ + if (rioWriteBulkCount(r,'*',3+numfields*2) == 0) return 0; + if (rioWriteBulkString(r,"XADD",4) == 0) return 0; + if (rioWriteBulkObject(r,key) == 0) return 0; + sds replyid = sdscatfmt(sdsempty(),"%U.%U",id.ms,id.seq); + if (rioWriteBulkString(r,replyid,sdslen(replyid)) == 0) return 0; + sdsfree(replyid); + while(numfields--) { + unsigned char *field, *value; + int64_t field_len, value_len; + streamIteratorGetField(&si,&field,&value,&field_len,&value_len); + if (rioWriteBulkString(r,(char*)field,field_len) == 0) return 0; + if (rioWriteBulkString(r,(char*)value,value_len) == 0) return 0; + } + } + streamIteratorStop(&si); + return 1; +} + /* Call the module type callback in order to rewrite a data type * that is exported by a module and is not handled by Redis itself. * The function returns 0 on error, 1 on success. */ @@ -1111,6 +1142,8 @@ int rewriteAppendOnlyFileRio(rio *aof) { if (rewriteSortedSetObject(aof,&key,o) == 0) goto werr; } else if (o->type == OBJ_HASH) { if (rewriteHashObject(aof,&key,o) == 0) goto werr; + } else if (o->type == OBJ_STREAM) { + if (rewriteStreamObject(aof,&key,o) == 0) goto werr; } else if (o->type == OBJ_MODULE) { if (rewriteModuleObject(aof,&key,o) == 0) goto werr; } else { diff --git a/src/t_stream.c b/src/t_stream.c index 3144adc7c..760050085 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -218,19 +218,23 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end) { /* Intialize the iterator and translates the iteration start/stop * elements into a 128 big big-endian number. */ - streamEncodeID(si->start_key,start); + if (start) { + streamEncodeID(si->start_key,start); + } else { + si->start_key[0] = 0; + si->start_key[0] = 0; + } + if (end) { streamEncodeID(si->end_key,end); } else { - /* We assume that UINT64_MAX is the same in little and big - * endian, that is, all bits set. */ si->end_key[0] = UINT64_MAX; si->end_key[0] = UINT64_MAX; } - raxStart(&si->ri,s->rax); /* Seek the correct node in the radix tree. */ - if (start->ms || start->seq) { + raxStart(&si->ri,s->rax); + if (start && (start->ms || start->seq)) { raxSeek(&si->ri,"<=",(unsigned char*)si->start_key, sizeof(si->start_key)); if (raxEOF(&si->ri)) From d9a50e2d942735babee98bb8e4d61e4b23991aba Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Sep 2017 15:54:18 +0200 Subject: [PATCH 0507/1722] Streams: basic XADD tests. --- tests/test_helper.tcl | 1 + tests/unit/type/stream.tcl | 42 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 tests/unit/type/stream.tcl diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 41c867803..7def9a7f6 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -26,6 +26,7 @@ set ::all_tests { unit/type/set unit/type/zset unit/type/hash + unit/type/stream unit/sort unit/expire unit/other diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl new file mode 100644 index 000000000..a668ddf42 --- /dev/null +++ b/tests/unit/type/stream.tcl @@ -0,0 +1,42 @@ +# return value is like strcmp() and similar. +proc streamCompareID {a b} { + if {$a == $b} {return 0} + lassign [split $a .] a_ms a_seq + lassign [split $b .] b_ms b_seq + if {$a_ms > $b_ms} {return 1} + if {$a_ms < $b_ms} {return -1} + # Same ms case, compare seq. + if {$a_seq > $b_seq} {return 1} + if {$a_seq < $b_seq} {return -1} +} + +start_server { + tags {"stream"} +} { + test {XADD can add entries into a stream that XRANGE can fetch} { + r XADD mystream * item 1 value a + r XADD mystream * item 2 value b + assert_equal 2 [r XLEN mystream] + set items [r XRANGE mystream - +] + assert_equal [lindex $items 0 1] {item 1 value a} + assert_equal [lindex $items 1 1] {item 2 value b} + } + + test {XADD IDs are incremental} { + set id1 [r XADD mystream * item 1 value a] + set id2 [r XADD mystream * item 2 value b] + set id3 [r XADD mystream * item 3 value c] + assert {[streamCompareID $id1 $id2] == -1} + assert {[streamCompareID $id2 $id3] == -1} + } + + test {XADD IDs are incremental when ms is the same as well} { + r multi + r XADD mystream * item 1 value a + r XADD mystream * item 2 value b + r XADD mystream * item 3 value c + lassign [r exec] id1 id2 id3 + assert {[streamCompareID $id1 $id2] == -1} + assert {[streamCompareID $id2 $id3] == -1} + } +} From aa9cff04007f1aee39f01b22c16d8c21a85768d3 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Sep 2017 16:56:18 +0200 Subject: [PATCH 0508/1722] Streams: more advanced XADD and XRANGE tests. --- tests/unit/type/stream.tcl | 43 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index a668ddf42..35de5c1f8 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -10,6 +10,15 @@ proc streamCompareID {a b} { if {$a_seq < $b_seq} {return -1} } +# return the ID immediately greater than the specified one. +# Note that this function does not care to handle 'seq' overflow +# since it's a 64 bit value. +proc streamNextID {id} { + lassign [split $id .] ms seq + incr seq + join [list $ms $seq] . +} + start_server { tags {"stream"} } { @@ -39,4 +48,38 @@ start_server { assert {[streamCompareID $id1 $id2] == -1} assert {[streamCompareID $id2 $id3] == -1} } + + test {XADD mass insertion and XLEN} { + r DEL mystream + r multi + for {set j 0} {$j < 10000} {incr j} { + r XADD mystream * item $j + } + r exec + + set items [r XRANGE mystream - +] + for {set j 0} {$j < 10000} {incr j} { + assert {[lindex $items $j 1] eq [list item $j]} + } + assert {[r xlen mystream] == $j} + } + + test {XRANGE COUNT works as expected} { + assert {[llength [r xrange mystream - + COUNT 10]] == 10} + } + + test {XRANGE can be used to iterate the whole stream} { + set last_id "-" + set j 0 + while 1 { + set elements [r xrange mystream $last_id + COUNT 100] + if {[llength $elements] == 0} break + foreach e $elements { + assert {[lindex $e 1] eq [list item $j]} + incr j; + } + set last_id [streamNextID [lindex $elements end 0]] + } + assert {$j == 10000} + } } From 6df222cbcd22fa8b25360426e7233f4c31f335cd Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Sep 2017 10:48:27 +0200 Subject: [PATCH 0509/1722] Streams: XRANGE fuzz testing. --- tests/unit/type/stream.tcl | 53 +++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index 35de5c1f8..8a94aa5e7 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -1,6 +1,6 @@ # return value is like strcmp() and similar. proc streamCompareID {a b} { - if {$a == $b} {return 0} + if {$a eq $b} {return 0} lassign [split $a .] a_ms a_seq lassign [split $b .] b_ms b_seq if {$a_ms > $b_ms} {return 1} @@ -19,6 +19,36 @@ proc streamNextID {id} { join [list $ms $seq] . } +# Generate a random stream entry ID with the ms part between min and max +# and a low sequence number (0 - 999 range), in order to stress test +# XRANGE against a Tcl implementation implementing the same concept +# with Tcl-only code in a linear array. +proc streamRandomID {min_id max_id} { + lassign [split $min_id .] min_ms min_seq + lassign [split $max_id .] max_ms max_seq + set delta [expr {$max_ms-$min_ms+1}] + set ms [expr {$min_ms+[randomInt $delta]}] + set seq [randomInt 1000] + return $ms.$seq +} + +# Tcl-side implementation of XRANGE to perform fuzz testing in the Redis +# XRANGE implementation. +proc streamSimulateXRANGE {items start end} { + set res {} + foreach i $items { + set this_id [lindex $i 0] + if {[streamCompareID $this_id $start] >= 0} { + if {[streamCompareID $this_id $end] <= 0} { + lappend res $i + } + } + } + return $res +} + +set content {} ;# Will be populated with Tcl side copy of the stream content. + start_server { tags {"stream"} } { @@ -82,4 +112,25 @@ start_server { } assert {$j == 10000} } + + test {XRANGE fuzzing} { + # puts $items + set low_id [lindex $items 0 0] + set high_id [lindex $items end 0] + for {set j 0} {$j < 100} {incr j} { + set start [streamRandomID $low_id $high_id] + set end [streamRandomID $low_id $high_id] + set range [r xrange mystream $start $end] + set tcl_range [streamSimulateXRANGE $items $start $end] + if {$range ne $tcl_range} { + puts "*** WARNING *** - XRANGE fuzzing mismatch: $start - $end" + puts "---" + puts "XRANGE: '$range'" + puts "---" + puts "TCL: '$tcl_range'" + puts "---" + fail "XRANGE fuzzing failed, check logs for details" + } + } + } } From a2d7e004d4fa87f771caa3fc865e8b5613aa6313 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 19 Sep 2017 16:49:56 +0200 Subject: [PATCH 0510/1722] Streams: tests for blocking and non-blocking XREAD. --- tests/unit/type/stream.tcl | 90 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index 8a94aa5e7..dbed9f0a6 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -113,6 +113,96 @@ start_server { assert {$j == 10000} } + test {XREAD with non empty stream} { + set res [r XREAD COUNT 1 STREAMS mystream 0.0] + assert {[lindex $res 0 1 0 1] eq {item 0}} + } + + test {Non blocking XREAD with empty streams} { + set res [r XREAD STREAMS s1 s2 0.0 0.0] + assert {$res eq {}} + } + + test {XREAD with non empty second stream} { + set res [r XREAD COUNT 1 STREAMS nostream mystream 0.0 0.0] + assert {[lindex $res 0 0] eq {mystream}} + assert {[lindex $res 0 1 0 1] eq {item 0}} + } + + test {Blocking XREAD waiting new data} { + r XADD s2 * old abcd1234 + set rd [redis_deferring_client] + $rd XREAD BLOCK 20000 STREAMS s1 s2 s3 $ $ $ + r XADD s2 * new abcd1234 + set res [$rd read] + assert {[lindex $res 0 0] eq {s2}} + assert {[lindex $res 0 1 0 1] eq {new abcd1234}} + } + + test {Blocking XREAD waiting old data} { + set rd [redis_deferring_client] + $rd XREAD BLOCK 20000 STREAMS s1 s2 s3 $ 0.0 $ + r XADD s2 * foo abcd1234 + set res [$rd read] + assert {[lindex $res 0 0] eq {s2}} + assert {[lindex $res 0 1 0 1] eq {old abcd1234}} + } + + test "XREAD: XADD + DEL should not awake client" { + set rd [redis_deferring_client] + r del s1 + $rd XREAD BLOCK 20000 STREAMS s1 $ + r multi + r XADD s1 * old abcd1234 + r DEL s1 + r exec + r XADD s1 * new abcd1234 + set res [$rd read] + assert {[lindex $res 0 0] eq {s1}} + assert {[lindex $res 0 1 0 1] eq {new abcd1234}} + } + + test "XREAD: XADD + DEL + LPUSH should not awake client" { + set rd [redis_deferring_client] + r del s1 + $rd XREAD BLOCK 20000 STREAMS s1 $ + r multi + r XADD s1 * old abcd1234 + r DEL s1 + r LPUSH s1 foo bar + r exec + r DEL s1 + r XADD s1 * new abcd1234 + set res [$rd read] + assert {[lindex $res 0 0] eq {s1}} + assert {[lindex $res 0 1 0 1] eq {new abcd1234}} + } + + test {XREAD with same stream name multiple times should work} { + r XADD s2 * old abcd1234 + set rd [redis_deferring_client] + $rd XREAD BLOCK 20000 STREAMS s2 s2 s2 $ $ $ + r XADD s2 * new abcd1234 + set res [$rd read] + assert {[lindex $res 0 0] eq {s2}} + assert {[lindex $res 0 1 0 1] eq {new abcd1234}} + } + + test {XREAD + multiple XADD inside transaction} { + r XADD s2 * old abcd1234 + set rd [redis_deferring_client] + $rd XREAD BLOCK 20000 STREAMS s2 s2 s2 $ $ $ + r MULTI + r XADD s2 * field one + r XADD s2 * field two + r XADD s2 * field three + r EXEC + set res [$rd read] + assert {[lindex $res 0 0] eq {s2}} + assert {[lindex $res 0 1 0 1] eq {field one}} + assert {[lindex $res 0 1 1 1] eq {field two}} + } + test {XRANGE fuzzing} { # puts $items set low_id [lindex $items 0 0] From efd32685508c97a6c01e118c06375b5e16b2e27c Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 19 Sep 2017 16:57:37 +0200 Subject: [PATCH 0511/1722] Streams: fixed memory leaks when blocking again for same stream. blockForKeys() was not freeing the allocation holding the ID when the key was already found busy. Fortunately the unit test checked explicitly for blocking multiple times for the same key (copying a regression in the blocking lists tests), so the bug was detected by the Redis test leak checker. --- src/blocked.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/blocked.c b/src/blocked.c index 519a402cf..734e6ffd6 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -387,7 +387,10 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo } /* If the key already exists in the dictionary ignore it. */ - if (dictAdd(c->bpop.keys,keys[j],key_data) != DICT_OK) continue; + if (dictAdd(c->bpop.keys,keys[j],key_data) != DICT_OK) { + zfree(key_data); + continue; + } incrRefCount(keys[j]); /* And in the other "side", to map keys -> clients */ From 731ad0ef1d8e4d41deb463169874c96186ce1d21 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Sep 2017 17:41:32 +0200 Subject: [PATCH 0512/1722] Streams: items compression implemented. The approach used is to set a fixed header at the start of every listpack blob (that contains many entries). The header contains a "master" ID and fields, that are initially just obtained from the first entry inserted in the listpack, so that the first enty is always well compressed. Later every new entry is checked against these fields, and if it matches, the SAMEFIELD flag is set in the entry so that we know to just use the master entry flags. The IDs are always delta-encoded against the first entry. This approach avoids cascading effects in which entries are encoded depending on the previous entries, in order to avoid complexity and rewritings of the data when data is removed in the middle (which is a planned feature). --- src/stream.h | 5 ++ src/t_stream.c | 171 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 142 insertions(+), 34 deletions(-) diff --git a/src/stream.h b/src/stream.h index e38009321..df29e9e70 100644 --- a/src/stream.h +++ b/src/stream.h @@ -26,6 +26,11 @@ typedef struct stream { * rewriting code that also needs to iterate the stream to emit the XADD * commands. */ typedef struct streamIterator { + streamID master_id; /* ID of the master entry at listpack head. */ + uint64_t master_fields_count; /* Master entries # of fields. */ + unsigned char *master_fields_start; /* Master entries start in listapck. */ + unsigned char *master_fields_ptr; /* Master field to emit next. */ + int entry_flags; /* Flags of entry we are emitting. */ uint64_t start_key[2]; /* Start key as 128 bit big endian. */ uint64_t end_key[2]; /* End key as 128 bit big endian. */ raxIterator ri; /* Rax iterator. */ diff --git a/src/t_stream.c b/src/t_stream.c index 760050085..5250c36b3 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -27,16 +27,19 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/* TODO: - * - After loading a stream, populate the last ID. - */ - #include "server.h" #include "endianconv.h" #include "stream.h" #define STREAM_BYTES_PER_LISTPACK 4096 +/* Every stream item inside the listpack, has a flags field that is used to + * mark the entry as deleted, or having the same field as the "master" + * entry at the start of the listpack> */ +#define STREAM_ITEM_FLAG_NONE 0 /* No special flags. */ +#define STREAM_ITEM_FLAG_DELETED (1<<0) /* Entry is delted. Skip it. */ +#define STREAM_ITEM_FLAG_SAMEFIELDS (1<<1) /* Same fields as master entry. */ + /* ----------------------------------------------------------------------- * Low level stream encoding: a radix tree of listpacks. * ----------------------------------------------------------------------- */ @@ -95,6 +98,19 @@ int64_t lpGetInteger(unsigned char *ele) { return v; } +/* Debugging function to log the full content of a listpack. Useful + * for development and debugging. */ +void streamLogListpackContent(unsigned char *lp) { + unsigned char *p = lpFirst(lp); + while(p) { + unsigned char buf[LP_INTBUF_SIZE]; + int64_t v; + unsigned char *ele = lpGet(p,&v,buf); + serverLog(LL_WARNING,"- [%d] '%.*s'", (int)v, (int)v, ele); + p = lpNext(lp,p); + } +} + /* Convert the specified stream entry ID as a 128 bit big endian number, so * that the IDs can be sorted lexicographically. */ void streamEncodeID(void *buf, streamID *id) { @@ -159,32 +175,82 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, * to do so we consider the ID as a single 128 bit number written in * big endian, so that the most significant bytes are the first ones. */ uint64_t rax_key[2]; /* Key in the radix tree containing the listpack.*/ - uint64_t entry_id[2]; /* Entry ID of the new item as 128 bit string. */ - streamEncodeID(entry_id,&id); + streamID master_id; /* ID of the master entry in the listpack. */ /* Create a new listpack and radix tree node if needed. */ + int flags = STREAM_ITEM_FLAG_NONE; if (lp == NULL || lp_bytes > STREAM_BYTES_PER_LISTPACK) { + master_id = id; + streamEncodeID(rax_key,&id); + /* Create the listpack having the master entry ID and fields. */ lp = lpNew(); - rax_key[0] = entry_id[0]; - rax_key[1] = entry_id[1]; + lp = lpAppend(lp,(unsigned char*)rax_key,sizeof(rax_key)); + lp = lpAppendInteger(lp,numfields); + for (int i = 0; i < numfields; i++) { + sds field = argv[i*2]->ptr; + lp = lpAppend(lp,(unsigned char*)field,sdslen(field)); + } raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL); + /* The first entry we insert, has obviously the same fields of the + * master entry. */ + flags |= STREAM_ITEM_FLAG_SAMEFIELDS; } else { serverAssert(ri.key_len == sizeof(rax_key)); memcpy(rax_key,ri.key,sizeof(rax_key)); + + /* Read the master entry ID. */ + int64_t e_len; + unsigned char *lp_ele = lpFirst(lp); + unsigned char buf[LP_INTBUF_SIZE]; + unsigned char *e = lpGet(lp_ele,&e_len,buf); + serverAssert(e_len == sizeof(streamID)); + streamDecodeID(e,&master_id); + lp_ele = lpNext(lp,lp_ele); + + /* Check if the entry we are adding, have the same fields + * as the master entry. */ + int master_fields_count = lpGetInteger(lp_ele); + lp_ele = lpNext(lp,lp_ele); + if (numfields == master_fields_count) { + int i; + for (i = 0; i < master_fields_count; i++) { + sds field = argv[i*2]->ptr; + unsigned char *e = lpGet(lp_ele,&e_len,buf); + /* Stop if there is a mismatch. */ + if (sdslen(field) != (size_t)e_len || + memcmp(e,field,e_len) != 0) break; + lp_ele = lpNext(lp,lp_ele); + } + if (i == master_fields_count) flags |= STREAM_ITEM_FLAG_SAMEFIELDS; + } } /* Populate the listpack with the new entry. We use the following * encoding: * - * +--------+----------+-------+-------+-/-+-------+-------+ - * |entry-id|num-fields|field-1|value-1|...|field-N|value-N| - * +--------+----------+-------+-------+-/-+-------+-------+ + * +-----+--------+----------+-------+-------+-/-+-------+-------+ + * |flags|entry-id|num-fields|field-1|value-1|...|field-N|value-N| + * +-----+--------+----------+-------+-------+-/-+-------+-------+ + * + * However if the SAMEFIELD flag is set, we have just to populate + * the entry with the values, so it becomes: + * + * +-----+--------+-------+-/-+-------+ + * |flags|entry-id|value-1|...|value-N| + * +-----+--------+-------+-/-+-------+ + * + * The entry-id field is actually two separated fields: the ms + * and seq difference compared to the master entry. */ - lp = lpAppend(lp,(unsigned char*)entry_id,sizeof(entry_id)); - lp = lpAppendInteger(lp,numfields); + lp = lpAppendInteger(lp,flags); + lp = lpAppendInteger(lp,id.ms - master_id.ms); + lp = lpAppendInteger(lp,id.seq - master_id.seq); + if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) + lp = lpAppendInteger(lp,numfields); for (int i = 0; i < numfields; i++) { sds field = argv[i*2]->ptr, value = argv[i*2+1]->ptr; - lp = lpAppend(lp,(unsigned char*)field,sdslen(field)); + if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) + lp = lpAppend(lp,(unsigned char*)field,sdslen(field)); lp = lpAppend(lp,(unsigned char*)value,sdslen(value)); } @@ -259,35 +325,67 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { if (!raxNext(&si->ri)) return 0; serverAssert(si->ri.key_len == sizeof(streamID)); si->lp = si->ri.data; - si->lp_ele = lpFirst(si->lp); + si->lp_ele = lpFirst(si->lp); /* Seek the master ID. */ + /* Get the master ID. */ + int64_t e_len; + unsigned char buf[LP_INTBUF_SIZE]; + unsigned char *e = lpGet(si->lp_ele,&e_len,buf); + serverAssert(e_len == sizeof(streamID)); + streamDecodeID(e,&si->master_id); + si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek fields count. */ + /* Get the master fields count. */ + si->master_fields_count = lpGetInteger(si->lp_ele); + si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek first field. */ + si->master_fields_start = si->lp_ele; + /* Skip master fileds to seek the first entry. */ + for (uint64_t i = 0; i < si->master_fields_count; i++) + si->lp_ele = lpNext(si->lp,si->lp_ele); } /* For every radix tree node, iterate the corresponding listpack, * returning elements when they are within range. */ while(si->lp_ele) { - int64_t e_len; - unsigned char buf[LP_INTBUF_SIZE]; - unsigned char *e = lpGet(si->lp_ele,&e_len,buf); - serverAssert(e_len == sizeof(streamID)); + /* Get the flags entry. */ + int flags = lpGetInteger(si->lp_ele); + si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek ID. */ - /* Go to next field: number of elements. */ + /* Get the ID: it is encoded as difference between the master + * ID and this entry ID. */ + *id = si->master_id; + id->ms += lpGetInteger(si->lp_ele); si->lp_ele = lpNext(si->lp,si->lp_ele); + id->seq += lpGetInteger(si->lp_ele); + si->lp_ele = lpNext(si->lp,si->lp_ele); + unsigned char buf[sizeof(streamID)]; + streamEncodeID(buf,id); - /* If current >= start */ - if (memcmp(e,si->start_key,sizeof(streamID)) >= 0) { - if (memcmp(e,si->end_key,sizeof(streamID)) > 0) - return 0; /* We are already out of range. */ - streamDecodeID(e,id); + /* The number of entries is here or not depending on the + * flags. */ + if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) { + *numfields = si->master_fields_count; + } else { *numfields = lpGetInteger(si->lp_ele); si->lp_ele = lpNext(si->lp,si->lp_ele); - return 1; /* Valid item returned. */ - } else { - /* If we do not emit, we have to discard. */ - int64_t numfields = lpGetInteger(si->lp_ele); - si->lp_ele = lpNext(si->lp,si->lp_ele); - for (int64_t i = 0; i < numfields*2; i++) - si->lp_ele = lpNext(si->lp,si->lp_ele); } + + /* If current >= start, and the entry is not marked as + * deleted, emit it. */ + if (memcmp(buf,si->start_key,sizeof(streamID)) >= 0 && + !(flags & STREAM_ITEM_FLAG_DELETED)) + { + if (memcmp(buf,si->end_key,sizeof(streamID)) > 0) + return 0; /* We are already out of range. */ + si->entry_flags = flags; + if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) + si->master_fields_ptr = si->master_fields_start; + return 1; /* Valid item returned. */ + } + + /* If we do not emit, we have to discard. */ + int to_discard = (flags & STREAM_ITEM_FLAG_SAMEFIELDS) ? + *numfields : *numfields*2; + for (int64_t i = 0; i < to_discard; i++) + si->lp_ele = lpNext(si->lp,si->lp_ele); } /* End of listpack reached. Try the next radix tree node. */ @@ -301,8 +399,13 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { * lengths by reference, that are valid until the next iterator call, assuming * no one touches the stream meanwhile. */ void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen) { - *fieldptr = lpGet(si->lp_ele,fieldlen,si->field_buf); - si->lp_ele = lpNext(si->lp,si->lp_ele); + if (si->entry_flags & STREAM_ITEM_FLAG_SAMEFIELDS) { + *fieldptr = lpGet(si->master_fields_ptr,fieldlen,si->field_buf); + si->master_fields_ptr = lpNext(si->lp,si->master_fields_ptr); + } else { + *fieldptr = lpGet(si->lp_ele,fieldlen,si->field_buf); + si->lp_ele = lpNext(si->lp,si->lp_ele); + } *valueptr = lpGet(si->lp_ele,valuelen,si->value_buf); si->lp_ele = lpNext(si->lp,si->lp_ele); } From e05a901cdcaa989d9604e37ad0cd5bd27794addd Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Sep 2017 23:04:31 +0200 Subject: [PATCH 0513/1722] Streams: modify tests to stress compression. --- tests/unit/type/stream.tcl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index dbed9f0a6..c63ed8a25 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -83,13 +83,19 @@ start_server { r DEL mystream r multi for {set j 0} {$j < 10000} {incr j} { - r XADD mystream * item $j + # From time to time insert a field with a different set + # of fields in order to stress the stream compression code. + if {rand() < 0.9} { + r XADD mystream * item $j + } else { + r XADD mystream * item $j otherfield foo + } } r exec set items [r XRANGE mystream - +] for {set j 0} {$j < 10000} {incr j} { - assert {[lindex $items $j 1] eq [list item $j]} + assert {[lrange [lindex $items $j 1] 0 1] eq [list item $j]} } assert {[r xlen mystream] == $j} } @@ -105,7 +111,7 @@ start_server { set elements [r xrange mystream $last_id + COUNT 100] if {[llength $elements] == 0} break foreach e $elements { - assert {[lindex $e 1] eq [list item $j]} + assert {[lrange [lindex $e 1] 0 1] eq [list item $j]} incr j; } set last_id [streamNextID [lindex $elements end 0]] @@ -115,7 +121,7 @@ start_server { test {XREAD with non empty stream} { set res [r XREAD COUNT 1 STREAMS mystream 0.0] - assert {[lindex $res 0 1 0 1] eq {item 0}} + assert {[lrange [lindex $res 0 1 0 1] 0 1] eq {item 0}} } test {Non blocking XREAD with empty streams} { @@ -204,7 +210,6 @@ start_server { } test {XRANGE fuzzing} { - # puts $items set low_id [lindex $items 0 0] set high_id [lindex $items end 0] for {set j 0} {$j < 100} {incr j} { From 8538eacf168d0ea4d6f72d96be2f34a892c5697b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Sep 2017 23:12:43 +0200 Subject: [PATCH 0514/1722] Streams: specify better how the master enty works. --- src/t_stream.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/t_stream.c b/src/t_stream.c index 5250c36b3..bfc6e4c9a 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -177,7 +177,29 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, uint64_t rax_key[2]; /* Key in the radix tree containing the listpack.*/ streamID master_id; /* ID of the master entry in the listpack. */ - /* Create a new listpack and radix tree node if needed. */ + /* Create a new listpack and radix tree node if needed. Note that when + * a new listpack is created, we populate it with a "master entry". This + * is just an ID and a set of fields that is taken as refernce in order + * to compress the stream entries that we'll add inside the listpack. + * + * Note that while we use the first added entry ID and fields to create + * the master entry, the first added entry is NOT represented in the master + * entry, which is a stand alone object. But of course, the first entry + * will compress well because it's used as reference. + * + * The master entry is composed of just: an ID and a set of fields, like: + * + * +------------+------------+---------+---------+--/--+---------+ + * | 128 bit ID | num-fields | field_1 | field_2 | ... | field_N | + * +------------+------------+---------+---------+--/--+---------+ + * + * The real entries will be encoded with an ID that is just the + * millisecond and sequence difference compared to the master entry + * (delta encoding), and if the fields of the entry are the same as + * the master enty fields, the entry flags will specify this fact + * and the entry fields and number of fields will be omitted (see later + * in the code of this function). */ + int flags = STREAM_ITEM_FLAG_NONE; if (lp == NULL || lp_bytes > STREAM_BYTES_PER_LISTPACK) { master_id = id; @@ -221,6 +243,8 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, memcmp(e,field,e_len) != 0) break; lp_ele = lpNext(lp,lp_ele); } + /* All fields are the same! We can compress the field names + * setting a single bit in the flags. */ if (i == master_fields_count) flags |= STREAM_ITEM_FLAG_SAMEFIELDS; } } From 79f540894a371e3d4428e188179e4d3c58908d8b Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 28 Sep 2017 16:55:46 +0200 Subject: [PATCH 0515/1722] Streams: delta encode IDs based on key. Add count + deleted fields. We used to have the master ID stored at the start of the listpack, however using the key directly makes more sense in order to create a space efficient representation: anyway the key at the radix tree is very unlikely to change because of how the stream is implemented. Moreover on nodes merging, to rewrite the merged listpacks is anyway the most sensible operation, and we can use the iterator and the append-to-stream function in order to avoid re-implementing the code needed for merging. This commit also adds two items at the start of the listpack: the number of valid items inside the listpack, and the number of items marked as deleted. This means that there is no need to scan a listpack in order to understand if it's a good candidate for garbage collection, if the ration between valid/deleted items triggers the GC. --- src/rdb.c | 28 ++++++++++++-------- src/t_stream.c | 72 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 61 insertions(+), 39 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 5d15539c5..17a932755 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -781,6 +781,8 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { while (raxNext(&ri)) { unsigned char *lp = ri.data; size_t lp_bytes = lpBytes(lp); + if ((n = rdbSaveRawString(rdb,ri.key,ri.key_len)) == -1) return -1; + nwritten += n; if ((n = rdbSaveRawString(rdb,lp,lp_bytes)) == -1) return -1; nwritten += n; } @@ -1448,27 +1450,31 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { uint64_t listpacks = rdbLoadLen(rdb,NULL); while(listpacks--) { + /* Get the master ID, the one we'll use as key of the radix tree + * node: the entries inside the listpack itself are delta-encoded + * relatively to this ID. */ + sds nodekey = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL); + if (sdslen(nodekey) != sizeof(streamID)) { + rdbExitReportCorruptRDB("Stream node key entry is not the " + "size of a stream ID"); + } + + /* Load the listpack. */ unsigned char *lp = rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,NULL); if (lp == NULL) return NULL; unsigned char *first = lpFirst(lp); if (first == NULL) { - /* Serialized listpacks should never be free, since on + /* Serialized listpacks should never be empty, since on * deletion we should remove the radix tree key if the * resulting listpack is emtpy. */ rdbExitReportCorruptRDB("Empty listpack inside stream"); } - /* Get the ID of the first entry: we'll use it as key to add the - * listpack into the radix tree. */ - int64_t e_len; - unsigned char buf[LP_INTBUF_SIZE]; - unsigned char *e = lpGet(first,&e_len,buf); - if (e_len != sizeof(streamID)) { - rdbExitReportCorruptRDB("Listpack first entry is not the " - "size of a stream ID"); - } - int retval = raxInsert(s->rax,e,sizeof(streamID),lp,NULL); + /* Insert the key in the radix tree. */ + int retval = raxInsert(s->rax, + (unsigned char*)nodekey,sizeof(streamID),lp,NULL); + sdsfree(nodekey); if (!retval) rdbExitReportCorruptRDB("Listpack re-added with existing key"); } diff --git a/src/t_stream.c b/src/t_stream.c index bfc6e4c9a..00d07ac57 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -83,6 +83,16 @@ unsigned char *lpAppendInteger(unsigned char *lp, int64_t value) { return lpAppend(lp,(unsigned char*)buf,slen); } +/* This is just a wrapper for lpReplace() to directly use a 64 bit integer + * instead of a string to replace the current element. The function returns + * the new listpack as return value, and also updates the current cursor + * by updating '*pos'. */ +unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **pos, int64_t value) { + char buf[LONG_STR_SIZE]; + int slen = ll2string(buf,sizeof(buf),value); + return lpInsert(lp, (unsigned char*)buf, slen, *pos, LP_REPLACE, pos); +} + /* This is a wrapper function for lpGet() to directly get an integer value * from the listpack (that may store numbers as a string), converting * the string if needed. */ @@ -179,26 +189,31 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, /* Create a new listpack and radix tree node if needed. Note that when * a new listpack is created, we populate it with a "master entry". This - * is just an ID and a set of fields that is taken as refernce in order - * to compress the stream entries that we'll add inside the listpack. + * is just a set of fields that is taken as refernce in order to compress + * the stream entries that we'll add inside the listpack. * - * Note that while we use the first added entry ID and fields to create + * Note that while we use the first added entry fields to create * the master entry, the first added entry is NOT represented in the master * entry, which is a stand alone object. But of course, the first entry * will compress well because it's used as reference. * - * The master entry is composed of just: an ID and a set of fields, like: + * The master entry is composed like in the following example: * - * +------------+------------+---------+---------+--/--+---------+ - * | 128 bit ID | num-fields | field_1 | field_2 | ... | field_N | - * +------------+------------+---------+---------+--/--+---------+ + * +-------+---------+------------+---------+--/--+---------+---------+ + * | count | deleted | num-fields | field_1 | field_2 | ... | field_N | + * +-------+---------+------------+---------+--/--+---------+---------+ + * + * count and deleted just represent respectively the total number of + * entires inside the listpack that are valid, and marked as deleted + * (delted flag in the entry flags set). So the total number of items + * actually inside the listpack (both deleted and not) is count+deleted. * * The real entries will be encoded with an ID that is just the - * millisecond and sequence difference compared to the master entry - * (delta encoding), and if the fields of the entry are the same as - * the master enty fields, the entry flags will specify this fact - * and the entry fields and number of fields will be omitted (see later - * in the code of this function). */ + * millisecond and sequence difference compared to the key stored at + * the radix tree node containing the listpack (delta encoding), and + * if the fields of the entry are the same as the master enty fields, the + * entry flags will specify this fact and the entry fields and number + * of fields will be omitted (see later in the code of this function). */ int flags = STREAM_ITEM_FLAG_NONE; if (lp == NULL || lp_bytes > STREAM_BYTES_PER_LISTPACK) { @@ -206,7 +221,8 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, streamEncodeID(rax_key,&id); /* Create the listpack having the master entry ID and fields. */ lp = lpNew(); - lp = lpAppend(lp,(unsigned char*)rax_key,sizeof(rax_key)); + lp = lpAppendInteger(lp,1); /* One item, the one we are adding. */ + lp = lpAppendInteger(lp,0); /* Zero deleted so far. */ lp = lpAppendInteger(lp,numfields); for (int i = 0; i < numfields; i++) { sds field = argv[i*2]->ptr; @@ -220,14 +236,15 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, serverAssert(ri.key_len == sizeof(rax_key)); memcpy(rax_key,ri.key,sizeof(rax_key)); - /* Read the master entry ID. */ - int64_t e_len; + /* Read the master ID from the radix tree key. */ + streamDecodeID(rax_key,&master_id); unsigned char *lp_ele = lpFirst(lp); - unsigned char buf[LP_INTBUF_SIZE]; - unsigned char *e = lpGet(lp_ele,&e_len,buf); - serverAssert(e_len == sizeof(streamID)); - streamDecodeID(e,&master_id); - lp_ele = lpNext(lp,lp_ele); + + /* Update count and skip the deleted fields. */ + int64_t count = lpGetInteger(lp_ele); + lp = lpReplaceInteger(lp,&lp_ele,count+1); + lp_ele = lpNext(lp,lp_ele); /* seek delted. */ + lp_ele = lpNext(lp,lp_ele); /* seek master entry num fields. */ /* Check if the entry we are adding, have the same fields * as the master entry. */ @@ -237,6 +254,8 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, int i; for (i = 0; i < master_fields_count; i++) { sds field = argv[i*2]->ptr; + int64_t e_len; + unsigned char buf[LP_INTBUF_SIZE]; unsigned char *e = lpGet(lp_ele,&e_len,buf); /* Stop if there is a mismatch. */ if (sdslen(field) != (size_t)e_len || @@ -348,16 +367,13 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { if (si->lp == NULL || si->lp_ele == NULL) { if (!raxNext(&si->ri)) return 0; serverAssert(si->ri.key_len == sizeof(streamID)); - si->lp = si->ri.data; - si->lp_ele = lpFirst(si->lp); /* Seek the master ID. */ /* Get the master ID. */ - int64_t e_len; - unsigned char buf[LP_INTBUF_SIZE]; - unsigned char *e = lpGet(si->lp_ele,&e_len,buf); - serverAssert(e_len == sizeof(streamID)); - streamDecodeID(e,&si->master_id); - si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek fields count. */ + streamDecodeID(si->ri.key,&si->master_id); /* Get the master fields count. */ + si->lp = si->ri.data; + si->lp_ele = lpFirst(si->lp); /* Seek items count */ + si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek deleted count. */ + si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek num fields. */ si->master_fields_count = lpGetInteger(si->lp_ele); si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek first field. */ si->master_fields_start = si->lp_ele; From 51797185e04add345fb2fd1cddb3f49edfcae637 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 28 Sep 2017 18:12:15 +0200 Subject: [PATCH 0516/1722] Streams: reduce listpack max size to 2k to speedup range queries. Listpack max size is a tradeoff between space and time. A 2k max entry puts the memory usage approximately at a similar order of magnitude (5 million entries went from 96 to 120 MB), but the range queries speed doubled (because there are half entries to scan in the average case). Lower values could be considered, or maybe this parameter should be made tunable. --- src/t_stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_stream.c b/src/t_stream.c index 00d07ac57..956a9af1b 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -31,7 +31,7 @@ #include "endianconv.h" #include "stream.h" -#define STREAM_BYTES_PER_LISTPACK 4096 +#define STREAM_BYTES_PER_LISTPACK 2048 /* Every stream item inside the listpack, has a flags field that is used to * mark the entry as deleted, or having the same field as the "master" From 3eea06a2b3bde3e12b748f8421637cfaed59d7c1 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 29 Sep 2017 12:40:29 +0200 Subject: [PATCH 0517/1722] Streams: XADD MAXLEN implementation. The core of this change is the implementation of stream trimming, and the resulting MAXLEN option of XADD as a trivial result of having trimming functionalities. MAXLEN already works but in order to be more efficient listpack GC should be implemented, currently marked as a TODO item inside the comments. --- src/t_stream.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 3 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 956a9af1b..a7505d15d 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -305,6 +305,107 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, return C_OK; } +/* Trim the stream 's' to have no more than maxlen elements, and return the + * number of elements removed from the stream. The 'approx' option, if non-zero, + * specifies that the trimming must be performed in a approximated way in + * order to maximize performances. This means that the stream may contain + * more elements than 'maxlen', and elements are only removed if we can remove + * a *whole* node of the radix tree. The elements are removed from the head + * of the stream (older elements). + * + * The function may return zero if: + * + * 1) The stream is already shorter or equal to the specified max length. + * 2) The 'approx' option is true and the head node had not enough elements + * to be deleted, leaving the stream with a number of elements >= maxlen. + */ +int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) { + if (s->length <= maxlen) return 0; + + raxIterator ri; + raxStart(&ri,s->rax); + raxSeek(&ri,"^",NULL,0); + + int64_t deleted = 0; + while(s->length > maxlen && raxNext(&ri)) { + unsigned char *lp = ri.data, *p = lpFirst(lp); + int64_t entries = lpGetInteger(p); + + /* Check if we can remove the whole node, and still have at + * least maxlen elements. */ + if (s->length - entries >= maxlen) { + raxRemove(s->rax,ri.key,ri.key_len,NULL); + raxSeek(&ri,">=",ri.key,ri.key_len); + s->length -= entries; + deleted += entries; + continue; + } + + /* If we cannot remove a whole element, and approx is true, + * stop here. */ + if (approx) break; + + /* Otherwise, we have to mark single entries inside the listpack + * as deleted. We start by updating the entries/deleted counters. */ + int64_t to_delete = s->length - maxlen; + serverAssert(to_delete < entries); + lp = lpReplaceInteger(lp,&p,entries-to_delete); + p = lpNext(lp,p); /* Seek deleted field. */ + int64_t deleted = lpGetInteger(p); + lp = lpReplaceInteger(lp,&p,deleted+to_delete); + p = lpNext(lp,p); /* Seek num-of-fields in the master entry. */ + + /* Skip all the master fields. */ + int64_t master_fields_count = lpGetInteger(p); + p = lpNext(lp,p); /* Seek the first field. */ + for (int64_t j = 0; j < master_fields_count; j++) + p = lpNext(lp,p); /* Skip all master fields. */ + + /* 'p' is now pointing to the first entry inside the listpack. + * We have to run entry after entry, marking entries as deleted + * if they are already not deleted. */ + while(p) { + int flags = lpGetInteger(p); + int to_skip; + + /* Mark the entry as deleted. */ + if (!(flags & STREAM_ITEM_FLAG_DELETED)) { + flags |= STREAM_ITEM_FLAG_DELETED; + lp = lpReplaceInteger(lp,&p,flags); + deleted++; + s->length--; + if (s->length <= maxlen) break; /* Enough entries deleted. */ + } + + p = lpNext(lp,p); /* Skip ID ms delta. */ + p = lpNext(lp,p); /* Skip ID seq delta. */ + p = lpNext(lp,p); /* Seek num-fields or values (if compressed). */ + if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) { + to_skip = master_fields_count; + } else { + to_skip = lpGetInteger(p); p = lpNext(lp,p); + to_skip = 1+(to_skip*2); + } + + while(to_skip--) p = lpNext(lp,p); /* Skip the whole entry. */ + } + + /* Here we should perform garbage collection in case at this point + * there are too many entries deleted inside the listpack. */ + entries -= to_delete; + deleted += to_delete; + if (entries + deleted > 10 && deleted > entries/2) { + /* TODO: perform a garbage collection. */ + } + + break; /* If we are here, there was enough to delete in the current + node, so no need to go to the next node. */ + } + + raxStop(&ri); + return deleted; +} + /* Initialize the stream iterator, so that we can call iterating functions * to get the next items. This requires a corresponding streamIteratorStop() * at the end. @@ -578,21 +679,32 @@ invalid: void xaddCommand(client *c) { streamID id; int id_given = 0; /* Was an ID different than "*" specified? */ + long long maxlen = 0; /* 0 means no maximum length. */ + int approx_maxlen = 0; /* If 1 only delete whole radix tree nodes, so + the maxium length is not applied verbatim. */ + int maxlen_arg_idx = 0; /* Index of the count in MAXLEN, for rewriting. */ /* Parse options. */ int i = 2; /* This is the first argument position where we could find an option, or the ID. */ for (; i < c->argc; i++) { - int moreargs = i != c->argc-1; + int moreargs = (c->argc-1) - i; /* Number of additional arguments. */ char *opt = c->argv[i]->ptr; if (opt[0] == '*' && opt[1] == '\0') { /* This is just a fast path for the common case of auto-ID * creation. */ break; } else if (!strcasecmp(opt,"maxlen") && moreargs) { - addReplyError(c,"Sorry, MAXLEN is still not implemented"); + char *next = c->argv[i+1]->ptr; + /* Check for the form MAXLEN ~ . */ + if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') { + approx_maxlen = 1; + i++; + } + if (getLongLongFromObjectOrReply(c,c->argv[i+1],&maxlen,NULL) + != C_OK) return; i++; - return; + maxlen_arg_idx = i; } else { /* If we are here is a syntax error or a valid ID. */ if (streamParseIDOrReply(NULL,c->argv[i],&id,0) == C_OK) { @@ -634,6 +746,20 @@ void xaddCommand(client *c) { notifyKeyspaceEvent(NOTIFY_STREAM,"xadd",c->argv[1],c->db->id); server.dirty++; + /* Remove older elements if MAXLEN was specified. */ + if (maxlen) { + if (!streamTrimByLength(s,maxlen,approx_maxlen)) { + /* If no trimming was performed, for instance because approximated + * trimming length was specified, rewrite the MAXLEN argument + * as zero, so that the command is propagated without trimming. */ + robj *zeroobj = createStringObjectFromLongLong(0); + rewriteClientCommandArgument(c,maxlen_arg_idx,zeroobj); + decrRefCount(zeroobj); + } else { + notifyKeyspaceEvent(NOTIFY_STREAM,"xtrim",c->argv[1],c->db->id); + } + } + /* Let's rewrite the ID argument with the one actually generated for * AOF/replication propagation. */ robj *idarg = createObject(OBJ_STRING, From 6bb42f8d7fdb0e714d69f9ad2a8b06afa0bf6632 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 29 Sep 2017 16:16:19 +0200 Subject: [PATCH 0518/1722] Streams: fix streamTrimByLength() standalone items skipping. --- src/t_stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_stream.c b/src/t_stream.c index a7505d15d..4365aa47d 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -383,7 +383,7 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) { if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) { to_skip = master_fields_count; } else { - to_skip = lpGetInteger(p); p = lpNext(lp,p); + to_skip = lpGetInteger(p); to_skip = 1+(to_skip*2); } From aa4a55ac9758dfebd0fdd1877e395b97a38b5893 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 29 Sep 2017 16:17:46 +0200 Subject: [PATCH 0519/1722] Streams: add XADD + MAXLEN test. --- tests/unit/type/stream.tcl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index c63ed8a25..e9f187ae2 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -79,6 +79,23 @@ start_server { assert {[streamCompareID $id2 $id3] == -1} } + test {XADD with MAXLEN option} { + r DEL mystream + for {set j 0} {$j < 1000} {incr j} { + if {rand() < 0.9} { + r XADD mystream MAXLEN 5 * xitem $j + } else { + r XADD mystream MAXLEN 5 * yitem $j + } + } + set res [r xrange mystream - +] + set expected 995 + foreach r $res { + assert {[lindex $r 1 1] == $expected} + incr expected + } + } + test {XADD mass insertion and XLEN} { r DEL mystream r multi From 3f8034d75ae3c3ace475d5127cca2281379bb4f2 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 29 Sep 2017 16:18:59 +0200 Subject: [PATCH 0520/1722] Streams: fix memory leak in streamTrimByLength(). --- src/t_stream.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/t_stream.c b/src/t_stream.c index 4365aa47d..03fa5300b 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -334,6 +334,7 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) { /* Check if we can remove the whole node, and still have at * least maxlen elements. */ if (s->length - entries >= maxlen) { + lpFree(lp); raxRemove(s->rax,ri.key,ri.key_len,NULL); raxSeek(&ri,">=",ri.key,ri.key_len); s->length -= entries; From 8493fd310ecaa4408b70b6c57474591ecae655cf Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 29 Sep 2017 18:00:40 +0200 Subject: [PATCH 0521/1722] Streams: fix XADD + MAXLEN propagation due to var shadowing. Clang should be more prone to return warnings by default when there is same-var-name shadowing. GCC does this and can avoid bugs like that. --- src/t_stream.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 03fa5300b..55d06dd75 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -352,8 +352,8 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) { serverAssert(to_delete < entries); lp = lpReplaceInteger(lp,&p,entries-to_delete); p = lpNext(lp,p); /* Seek deleted field. */ - int64_t deleted = lpGetInteger(p); - lp = lpReplaceInteger(lp,&p,deleted+to_delete); + int64_t marked_deleted = lpGetInteger(p); + lp = lpReplaceInteger(lp,&p,marked_deleted+to_delete); p = lpNext(lp,p); /* Seek num-of-fields in the master entry. */ /* Skip all the master fields. */ @@ -394,8 +394,8 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) { /* Here we should perform garbage collection in case at this point * there are too many entries deleted inside the listpack. */ entries -= to_delete; - deleted += to_delete; - if (entries + deleted > 10 && deleted > entries/2) { + marked_deleted += to_delete; + if (entries + marked_deleted > 10 && marked_deleted > entries/2) { /* TODO: perform a garbage collection. */ } From 503e3053ee51e67b3df42b93d6db424753a583c0 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 3 Oct 2017 11:42:08 +0200 Subject: [PATCH 0522/1722] Streams: move ID ms/seq separator from '.' to '-' After checking with the community via Twitter (here: https://twitter.com/antirez/status/915130876861788161) the verdict was to use ":". However I later realized, after users lamented the fact that it's hard to copy IDs just with double click, that this was the reason why I moved to "." in the first instance. Fortunately "-", that was the other option with most votes, also gets selected with double click on most terminal applications on Linux and MacOS. So my reasoning was: 1) We can't retain "." because it's actually confusing to newcomers, it looks like a floating number, people may be tricked into thinking they can order IDs numerically as floats. 2) Moving to a double-click-to-select format is much better. People will work with such IDs for long time when coding / debugging. Why making now a choice that will impact this for the next years? The only other viable option was "-", and that's what I did. Thanks. --- src/t_stream.c | 8 ++++---- tests/unit/type/stream.tcl | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 55d06dd75..7838b92b0 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -574,7 +574,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end while(streamIteratorGetID(&si,&id,&numfields)) { /* Emit a two elements array for each item. The first is * the ID, the second is an array of field-value pairs. */ - sds replyid = sdscatfmt(sdsempty(),"+%U.%U\r\n",id.ms,id.seq); + sds replyid = sdscatfmt(sdsempty(),"+%U-%U\r\n",id.ms,id.seq); addReplyMultiBulkLen(c,2); addReplySds(c,replyid); addReplyMultiBulkLen(c,numfields*2); @@ -660,7 +660,7 @@ int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) } /* Parse . form. */ - char *dot = strchr(buf,'.'); + char *dot = strchr(buf,'-'); if (dot) *dot = '\0'; uint64_t ms, seq; if (string2ull(buf,&ms) == 0) goto invalid; @@ -740,7 +740,7 @@ void xaddCommand(client *c) { "target stream top item"); return; } - sds reply = sdscatfmt(sdsempty(),"+%U.%U\r\n",id.ms,id.seq); + sds reply = sdscatfmt(sdsempty(),"+%U-%U\r\n",id.ms,id.seq); addReplySds(c,reply); signalModifiedKey(c->db,c->argv[1]); @@ -764,7 +764,7 @@ void xaddCommand(client *c) { /* Let's rewrite the ID argument with the one actually generated for * AOF/replication propagation. */ robj *idarg = createObject(OBJ_STRING, - sdscatfmt(sdsempty(),"%U.%U",id.ms,id.seq)); + sdscatfmt(sdsempty(),"%U-%U",id.ms,id.seq)); rewriteClientCommandArgument(c,i,idarg); decrRefCount(idarg); diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index e9f187ae2..06f31e08c 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -1,8 +1,8 @@ # return value is like strcmp() and similar. proc streamCompareID {a b} { if {$a eq $b} {return 0} - lassign [split $a .] a_ms a_seq - lassign [split $b .] b_ms b_seq + lassign [split $a -] a_ms a_seq + lassign [split $b -] b_ms b_seq if {$a_ms > $b_ms} {return 1} if {$a_ms < $b_ms} {return -1} # Same ms case, compare seq. @@ -14,9 +14,9 @@ proc streamCompareID {a b} { # Note that this function does not care to handle 'seq' overflow # since it's a 64 bit value. proc streamNextID {id} { - lassign [split $id .] ms seq + lassign [split $id -] ms seq incr seq - join [list $ms $seq] . + join [list $ms $seq] - } # Generate a random stream entry ID with the ms part between min and max @@ -24,12 +24,12 @@ proc streamNextID {id} { # XRANGE against a Tcl implementation implementing the same concept # with Tcl-only code in a linear array. proc streamRandomID {min_id max_id} { - lassign [split $min_id .] min_ms min_seq - lassign [split $max_id .] max_ms max_seq + lassign [split $min_id -] min_ms min_seq + lassign [split $max_id -] max_ms max_seq set delta [expr {$max_ms-$min_ms+1}] set ms [expr {$min_ms+[randomInt $delta]}] set seq [randomInt 1000] - return $ms.$seq + return $ms-$seq } # Tcl-side implementation of XRANGE to perform fuzz testing in the Redis From 6c3b947799a655c020ae9581d352f518c1eb7edd Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 4 Nov 2017 18:05:46 +0100 Subject: [PATCH 0523/1722] Streams: fix XREAD test broken after previous tests improvements. 10% of times the data is not just "item 0" but there is also the "otherfield" part. Use [lrange] to avoid the issue. This commit fixes #4416. --- tests/unit/type/stream.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index 06f31e08c..5c58e7fb2 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -149,7 +149,7 @@ start_server { test {XREAD with non empty second stream} { set res [r XREAD COUNT 1 STREAMS nostream mystream 0.0 0.0] assert {[lindex $res 0 0] eq {mystream}} - assert {[lindex $res 0 1 0 1] eq {item 0}} + assert {[lrange [lindex $res 0 1 0 1] 0 1] eq {item 0}} } test {Blocking XREAD waiting new data} { From 97d57e3052e9f76748a29abb619f74729158bfe0 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 8 Nov 2017 22:57:10 +0100 Subject: [PATCH 0524/1722] Streams: fix TYPE for stream type. --- src/db.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/db.c b/src/db.c index 74c2be624..0ded55586 100644 --- a/src/db.c +++ b/src/db.c @@ -798,6 +798,7 @@ void typeCommand(client *c) { case OBJ_SET: type = "set"; break; case OBJ_ZSET: type = "zset"; break; case OBJ_HASH: type = "hash"; break; + case OBJ_STREAM: type = "stream"; break; case OBJ_MODULE: { moduleValue *mv = o->ptr; type = mv->type->name; From f065f551a91c12608f59e092442b8668bdff2d71 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 8 Nov 2017 22:59:15 +0100 Subject: [PATCH 0525/1722] Streams: fix redis-cli to understand the stream type. --- src/redis-cli.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4ad325786..1f80bc615 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2074,7 +2074,8 @@ static void pipeMode(void) { #define TYPE_SET 2 #define TYPE_HASH 3 #define TYPE_ZSET 4 -#define TYPE_NONE 5 +#define TYPE_STREAM 5 +#define TYPE_NONE 6 static redisReply *sendScan(unsigned long long *it) { redisReply *reply = redisCommand(context, "SCAN %llu", *it); @@ -2133,6 +2134,8 @@ static int toIntType(char *key, char *type) { return TYPE_HASH; } else if(!strcmp(type, "zset")) { return TYPE_ZSET; + } else if(!strcmp(type, "stream")) { + return TYPE_STREAM; } else if(!strcmp(type, "none")) { return TYPE_NONE; } else { @@ -2221,7 +2224,7 @@ static void findBigKeys(void) { unsigned long long biggest[5] = {0}, counts[5] = {0}, totalsize[5] = {0}; unsigned long long sampled = 0, total_keys, totlen=0, *sizes=NULL, it=0; sds maxkeys[5] = {0}; - char *typename[] = {"string","list","set","hash","zset"}; + char *typename[] = {"string","list","set","hash","zset","stream"}; char *typeunit[] = {"bytes","items","members","fields","members"}; redisReply *reply, *keys; unsigned int arrsize=0, i; From d42f4b141433d771f0fef8f6ebbbe6302c7416e8 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 9 Nov 2017 12:04:26 +0100 Subject: [PATCH 0526/1722] Streams: fix COUNT parsing, issue #4433. --- src/t_stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_stream.c b/src/t_stream.c index 7838b92b0..61b229a5c 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -785,7 +785,7 @@ void xrangeCommand(client *c) { if (streamParseIDOrReply(c,c->argv[3],&endid,UINT64_MAX) == C_ERR) return; /* Parse the COUNT option if any. */ - if (c->argc > 4) { + if (c->argc > 5) { if (strcasecmp(c->argv[4]->ptr,"COUNT") == 0) { if (getLongLongFromObjectOrReply(c,c->argv[5],&count,NULL) != C_OK) return; From 77d514201c6a96c7db50081ca4626ccf016ce784 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 Nov 2017 12:48:32 +0100 Subject: [PATCH 0527/1722] Streams: Update listpack to fix 32bit strings encoding error. Note that streams produced by XADD in previous broken versions having elements with 4096 bytes or more will be permanently broken and must be created again from scratch. Fix #4428 Fix #4349 --- src/listpack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/listpack.c b/src/listpack.c index e2702b65c..6db4086e9 100644 --- a/src/listpack.c +++ b/src/listpack.c @@ -283,7 +283,7 @@ int lpEncodeGetType(unsigned char *ele, uint32_t size, unsigned char *intenc, ui } else { if (size < 64) *enclen = 1+size; else if (size < 4096) *enclen = 2+size; - else *enclen = 4+size; + else *enclen = 5+size; return LP_ENCODING_STRING; } } @@ -363,7 +363,7 @@ void lpEncodeString(unsigned char *buf, unsigned char *s, uint32_t len) { buf[2] = (len >> 8) & 0xff; buf[3] = (len >> 16) & 0xff; buf[4] = (len >> 24) & 0xff; - memcpy(buf+4,s,len); + memcpy(buf+5,s,len); } } From 2ac84591875986c5437de771d67030fbb7e6a32d Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 17 Nov 2017 10:16:30 +0100 Subject: [PATCH 0528/1722] Streams: augment stream entries to allow backward scanning. --- src/t_stream.c | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 61b229a5c..14eba44c0 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -199,9 +199,9 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, * * The master entry is composed like in the following example: * - * +-------+---------+------------+---------+--/--+---------+---------+ - * | count | deleted | num-fields | field_1 | field_2 | ... | field_N | - * +-------+---------+------------+---------+--/--+---------+---------+ + * +-------+---------+------------+---------+--/--+---------+---------+-+ + * | count | deleted | num-fields | field_1 | field_2 | ... | field_N |0| + * +-------+---------+------------+---------+--/--+---------+---------+-+ * * count and deleted just represent respectively the total number of * entires inside the listpack that are valid, and marked as deleted @@ -213,7 +213,11 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, * the radix tree node containing the listpack (delta encoding), and * if the fields of the entry are the same as the master enty fields, the * entry flags will specify this fact and the entry fields and number - * of fields will be omitted (see later in the code of this function). */ + * of fields will be omitted (see later in the code of this function). + * + * The "0" entry at the end is the same as the 'lp-count' entry in the + * regular stream entries (see below), and marks the fact that there are + * no more entires, when we scan the stream from right to left. */ int flags = STREAM_ITEM_FLAG_NONE; if (lp == NULL || lp_bytes > STREAM_BYTES_PER_LISTPACK) { @@ -228,6 +232,7 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, sds field = argv[i*2]->ptr; lp = lpAppend(lp,(unsigned char*)field,sdslen(field)); } + lp = lpAppendInteger(lp,0); /* Master entry zero terminator. */ raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL); /* The first entry we insert, has obviously the same fields of the * master entry. */ @@ -271,20 +276,25 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, /* Populate the listpack with the new entry. We use the following * encoding: * - * +-----+--------+----------+-------+-------+-/-+-------+-------+ - * |flags|entry-id|num-fields|field-1|value-1|...|field-N|value-N| - * +-----+--------+----------+-------+-------+-/-+-------+-------+ + * +-----+--------+----------+-------+-------+-/-+-------+-------+--------+ + * |flags|entry-id|num-fields|field-1|value-1|...|field-N|value-N|lp-count| + * +-----+--------+----------+-------+-------+-/-+-------+-------+--------+ * * However if the SAMEFIELD flag is set, we have just to populate * the entry with the values, so it becomes: * - * +-----+--------+-------+-/-+-------+ - * |flags|entry-id|value-1|...|value-N| - * +-----+--------+-------+-/-+-------+ + * +-----+--------+-------+-/-+-------+--------+ + * |flags|entry-id|value-1|...|value-N|lp-count| + * +-----+--------+-------+-/-+-------+--------+ * * The entry-id field is actually two separated fields: the ms * and seq difference compared to the master entry. - */ + * + * The lp-count field is a number that states the number of listpack pieces + * that compose the entry, so that it's possible to travel the entry + * in reverse order: we can just start from the end of the listpack, read + * the entry, and jump back N times to seek the "flags" field to read + * the stream full entry. */ lp = lpAppendInteger(lp,flags); lp = lpAppendInteger(lp,id.ms - master_id.ms); lp = lpAppendInteger(lp,id.seq - master_id.seq); @@ -296,6 +306,11 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, lp = lpAppend(lp,(unsigned char*)field,sdslen(field)); lp = lpAppend(lp,(unsigned char*)value,sdslen(value)); } + /* Compute and store the lp-count field. */ + int lp_count = numfields; + if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) lp_count *= 2; + lp_count += 3; /* Add the 3 fixed fileds flags + ms-diff + seq-diff. */ + lp = lpAppendInteger(lp,lp_count); /* Insert back into the tree in order to update the listpack pointer. */ raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL); @@ -361,6 +376,7 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) { p = lpNext(lp,p); /* Seek the first field. */ for (int64_t j = 0; j < master_fields_count; j++) p = lpNext(lp,p); /* Skip all master fields. */ + p = lpNext(lp,p); /* Skip the zero master entry terminator. */ /* 'p' is now pointing to the first entry inside the listpack. * We have to run entry after entry, marking entries as deleted @@ -389,6 +405,7 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) { } while(to_skip--) p = lpNext(lp,p); /* Skip the whole entry. */ + p = lpNext(lp,p); /* Skip the final lp-count field. */ } /* Here we should perform garbage collection in case at this point @@ -482,11 +499,17 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { /* Skip master fileds to seek the first entry. */ for (uint64_t i = 0; i < si->master_fields_count; i++) si->lp_ele = lpNext(si->lp,si->lp_ele); + /* We are now pointing the zero term of the master entry. */ } /* For every radix tree node, iterate the corresponding listpack, * returning elements when they are within range. */ - while(si->lp_ele) { + while(1) { + /* Skip the previous entry lp-count field, or in case of the + * master entry, the zero term field. */ + si->lp_ele = lpNext(si->lp,si->lp_ele); + if (si->lp_ele == NULL) break; + /* Get the flags entry. */ int flags = lpGetInteger(si->lp_ele); si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek ID. */ From fbee3af6adadf4f9aa85071674af104e014ea3fd Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 17 Nov 2017 13:24:20 +0100 Subject: [PATCH 0529/1722] Streams: state machine for reverse iteration WIP 1. --- src/aof.c | 2 +- src/blocked.c | 2 +- src/stream.h | 7 +-- src/t_stream.c | 129 +++++++++++++++++++++++++++++++++++-------------- 4 files changed, 98 insertions(+), 42 deletions(-) diff --git a/src/aof.c b/src/aof.c index 5fbfdd695..79962fd0a 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1035,7 +1035,7 @@ int rewriteHashObject(rio *r, robj *key, robj *o) { * The function returns 0 on error, 1 on success. */ int rewriteStreamObject(rio *r, robj *key, robj *o) { streamIterator si; - streamIteratorStart(&si,o->ptr,NULL,NULL); + streamIteratorStart(&si,o->ptr,NULL,NULL,0); streamID id; int64_t numfields; diff --git a/src/blocked.c b/src/blocked.c index 734e6ffd6..f438c3353 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -326,7 +326,7 @@ void handleClientsBlockedOnKeys(void) { addReplyMultiBulkLen(receiver,2); addReplyBulk(receiver,rl->key); streamReplyWithRange(receiver,s,&start,NULL, - receiver->bpop.xread_count); + receiver->bpop.xread_count,0); } } } diff --git a/src/stream.h b/src/stream.h index df29e9e70..214b6d9a5 100644 --- a/src/stream.h +++ b/src/stream.h @@ -28,9 +28,10 @@ typedef struct stream { typedef struct streamIterator { streamID master_id; /* ID of the master entry at listpack head. */ uint64_t master_fields_count; /* Master entries # of fields. */ - unsigned char *master_fields_start; /* Master entries start in listapck. */ + unsigned char *master_fields_start; /* Master entries start in listpack. */ unsigned char *master_fields_ptr; /* Master field to emit next. */ int entry_flags; /* Flags of entry we are emitting. */ + int rev; /* True if iterating end to start (reverse). */ uint64_t start_key[2]; /* Start key as 128 bit big endian. */ uint64_t end_key[2]; /* End key as 128 bit big endian. */ raxIterator ri; /* Rax iterator. */ @@ -49,8 +50,8 @@ struct client; stream *streamNew(void); void freeStream(stream *s); -size_t streamReplyWithRange(struct client *c, stream *s, streamID *start, streamID *end, size_t count); -void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end); +size_t streamReplyWithRange(struct client *c, stream *s, streamID *start, streamID *end, size_t count, int rev); +void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev); int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields); void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen); void streamIteratorStop(streamIterator *si); diff --git a/src/t_stream.c b/src/t_stream.c index 14eba44c0..945fc28c0 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -426,7 +426,9 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) { /* Initialize the stream iterator, so that we can call iterating functions * to get the next items. This requires a corresponding streamIteratorStop() - * at the end. + * at the end. The 'rev' parameter controls the direction. If it's zero the + * iteration is from the start to the end element (inclusive), otherwise + * if rev is non-zero, the iteration is reversed. * * Once the iterator is initalized, we iterate like this: * @@ -443,7 +445,7 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) { * } * } * streamIteratorStop(&myiterator); */ -void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end) { +void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev) { /* Intialize the iterator and translates the iteration start/stop * elements into a 128 big big-endian number. */ if (start) { @@ -462,17 +464,26 @@ void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamI /* Seek the correct node in the radix tree. */ raxStart(&si->ri,s->rax); - if (start && (start->ms || start->seq)) { - raxSeek(&si->ri,"<=",(unsigned char*)si->start_key, - sizeof(si->start_key)); - if (raxEOF(&si->ri)) - raxSeek(&si->ri,">",(unsigned char*)si->start_key, + if (!rev) { + if (start && (start->ms || start->seq)) { + raxSeek(&si->ri,"<=",(unsigned char*)si->start_key, sizeof(si->start_key)); + if (raxEOF(&si->ri)) raxSeek(&si->ri,"^",NULL,0); + } else { + raxSeek(&si->ri,"^",NULL,0); + } } else { - raxSeek(&si->ri,"^",NULL,0); + if (end && (end->ms || end->seq)) { + raxSeek(&si->ri,"<=",(unsigned char*)si->end_key, + sizeof(si->end_key)); + if (raxEOF(&si->ri)) raxSeek(&si->ri,"$",NULL,0); + } else { + raxSeek(&si->ri,"$",NULL,0); + } } si->lp = NULL; /* There is no current listpack right now. */ si->lp_ele = NULL; /* Current listpack cursor. */ + si->rev = rev; /* Direction, if non-zero reversed, from end to start. */ } /* Return 1 and store the current item ID at 'id' if there are still @@ -484,7 +495,8 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { * iteration or the previous listpack was completely iterated. * Go to the next node. */ if (si->lp == NULL || si->lp_ele == NULL) { - if (!raxNext(&si->ri)) return 0; + if (!si->rev && !raxNext(&si->ri)) return 0; + else if (si->rev && !raxPrev(&si->ri)) return 0; serverAssert(si->ri.key_len == sizeof(streamID)); /* Get the master ID. */ streamDecodeID(si->ri.key,&si->master_id); @@ -499,16 +511,38 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { /* Skip master fileds to seek the first entry. */ for (uint64_t i = 0; i < si->master_fields_count; i++) si->lp_ele = lpNext(si->lp,si->lp_ele); - /* We are now pointing the zero term of the master entry. */ + /* We are now pointing the zero term of the master entry. If + * we are iterating in reverse order, we need to seek the + * end of the listpack. */ + if (si->rev) si->lp_ele = lpLast(si->lp); + } else if (si->rev) { + /* If we are itereating in the reverse order, and this is not + * the first entry emitted for this listpack, then we already + * emitted the current entry, and have to go back to the previous + * one. */ + int lp_count = lpGetInteger(si->lp_ele); + while(lp_count--) si->lp_ele = lpPrev(si->lp,si->lp_ele); + /* Seek lp-count of prev entry. */ + si->lp_ele = lpPrev(si->lp,si->lp_ele); } /* For every radix tree node, iterate the corresponding listpack, * returning elements when they are within range. */ while(1) { - /* Skip the previous entry lp-count field, or in case of the - * master entry, the zero term field. */ - si->lp_ele = lpNext(si->lp,si->lp_ele); - if (si->lp_ele == NULL) break; + if (!si->rev) { + /* If we are going forward, skip the previous entry + * lp-count field (or in case of the master entry, the zero + * term field) */ + si->lp_ele = lpNext(si->lp,si->lp_ele); + if (si->lp_ele == NULL) break; + } else { + /* If we are going backward, read the number of elements this + * entry is composed of, and jump backward N times to seek + * its start. */ + int lp_count = lpGetInteger(si->lp_ele); + if (lp_count == 0) break; /* We reached the master entry. */ + while(lp_count--) si->lp_ele = lpPrev(si->lp,si->lp_ele); + } /* Get the flags entry. */ int flags = lpGetInteger(si->lp_ele); @@ -535,15 +569,28 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { /* If current >= start, and the entry is not marked as * deleted, emit it. */ - if (memcmp(buf,si->start_key,sizeof(streamID)) >= 0 && - !(flags & STREAM_ITEM_FLAG_DELETED)) - { - if (memcmp(buf,si->end_key,sizeof(streamID)) > 0) - return 0; /* We are already out of range. */ - si->entry_flags = flags; - if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) - si->master_fields_ptr = si->master_fields_start; - return 1; /* Valid item returned. */ + if (!si->rev) { + if (memcmp(buf,si->start_key,sizeof(streamID)) >= 0 && + !(flags & STREAM_ITEM_FLAG_DELETED)) + { + if (memcmp(buf,si->end_key,sizeof(streamID)) > 0) + return 0; /* We are already out of range. */ + si->entry_flags = flags; + if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) + si->master_fields_ptr = si->master_fields_start; + return 1; /* Valid item returned. */ + } + } else { + if (memcmp(buf,si->end_key,sizeof(streamID)) <= 0 && + !(flags & STREAM_ITEM_FLAG_DELETED)) + { + if (memcmp(buf,si->start_key,sizeof(streamID)) < 0) + return 0; /* We are already out of range. */ + si->entry_flags = flags; + if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) + si->master_fields_ptr = si->master_fields_start; + return 1; /* Valid item returned. */ + } } /* If we do not emit, we have to discard. */ @@ -553,7 +600,7 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { si->lp_ele = lpNext(si->lp,si->lp_ele); } - /* End of listpack reached. Try the next radix tree node. */ + /* End of listpack reached. Try the next/prev radix tree node. */ } } @@ -585,15 +632,16 @@ void streamIteratorStop(streamIterator *si) { /* Send the specified range to the client 'c'. The range the client will * receive is between start and end inclusive, if 'count' is non zero, no more * than 'count' elemnets are sent. The 'end' pointer can be NULL to mean that - * we want all the elements from 'start' till the end of the stream. */ -size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count) { + * we want all the elements from 'start' till the end of the stream. If 'rev' + * is non zero, elements are produced in reversed order from end to start. */ +size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count, int rev) { void *arraylen_ptr = addDeferredMultiBulkLength(c); size_t arraylen = 0; streamIterator si; int64_t numfields; streamID id; - streamIteratorStart(&si,s,start,end); + streamIteratorStart(&si,s,start,end,rev); while(streamIteratorGetID(&si,&id,&numfields)) { /* Emit a two elements array for each item. The first is * the ID, the second is an array of field-value pairs. */ @@ -797,25 +845,32 @@ void xaddCommand(client *c) { signalKeyAsReady(c->db, c->argv[1]); } -/* XRANGE key start end [COUNT ] */ +/* XRANGE key start end [COUNT ] [REV] */ void xrangeCommand(client *c) { robj *o; stream *s; streamID startid, endid; long long count = 0; + int rev = 0; if (streamParseIDOrReply(c,c->argv[2],&startid,0) == C_ERR) return; if (streamParseIDOrReply(c,c->argv[3],&endid,UINT64_MAX) == C_ERR) return; /* Parse the COUNT option if any. */ - if (c->argc > 5) { - if (strcasecmp(c->argv[4]->ptr,"COUNT") == 0) { - if (getLongLongFromObjectOrReply(c,c->argv[5],&count,NULL) != C_OK) + if (c->argc > 4) { + for (int j = 4; j < c->argc; j++) { + int additional = c->argc-j-1; + if (strcasecmp(c->argv[j]->ptr,"COUNT") == 0 && additional >= 1) { + if (getLongLongFromObjectOrReply(c,c->argv[j+1],&count,NULL) + != C_OK) return; + if (count < 0) count = 0; + j++; /* Consume additional arg. */ + } else if (strcasecmp(c->argv[j]->ptr,"REV") == 0) { + rev = 1; + } else { + addReply(c,shared.syntaxerr); return; - if (count < 0) count = 0; - } else { - addReply(c,shared.syntaxerr); - return; + } } } @@ -823,7 +878,7 @@ void xrangeCommand(client *c) { if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL || checkType(c,o,OBJ_STREAM)) return; s = o->ptr; - streamReplyWithRange(c,s,&startid,&endid,count); + streamReplyWithRange(c,s,&startid,&endid,count,rev); } /* XLEN */ @@ -931,7 +986,7 @@ void xreadCommand(client *c) { * of the stream and the data we extracted from it. */ addReplyMultiBulkLen(c,2); addReplyBulk(c,c->argv[i+streams_arg]); - streamReplyWithRange(c,s,&start,NULL,count); + streamReplyWithRange(c,s,&start,NULL,count,0); } } From 681efe652e9332b6440cb048ebd21ff6df9bcdd8 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 17 Nov 2017 13:47:50 +0100 Subject: [PATCH 0530/1722] Streams: fix reverse iteration next node jumping. --- src/t_stream.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/t_stream.c b/src/t_stream.c index 945fc28c0..f64824c9b 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -540,7 +540,11 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { * entry is composed of, and jump backward N times to seek * its start. */ int lp_count = lpGetInteger(si->lp_ele); - if (lp_count == 0) break; /* We reached the master entry. */ + if (lp_count == 0) { /* We reached the master entry. */ + si->lp = NULL; + si->lp_ele = NULL; + break; + } while(lp_count--) si->lp_ele = lpPrev(si->lp,si->lp_ele); } From c46aa40ccacd3218b547ee513411183f1f21840a Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 17 Nov 2017 16:02:11 +0100 Subject: [PATCH 0531/1722] Streams: fix reverse iterator discarding of items out of range. --- src/t_stream.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index f64824c9b..efb01ef62 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -597,11 +597,18 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { } } - /* If we do not emit, we have to discard. */ - int to_discard = (flags & STREAM_ITEM_FLAG_SAMEFIELDS) ? - *numfields : *numfields*2; - for (int64_t i = 0; i < to_discard; i++) - si->lp_ele = lpNext(si->lp,si->lp_ele); + /* If we do not emit, we have to discard if we are going + * forward, or seek the previous entry if we are going + * backward. */ + if (!si->rev) { + int to_discard = (flags & STREAM_ITEM_FLAG_SAMEFIELDS) ? + *numfields : *numfields*2; + for (int64_t i = 0; i < to_discard; i++) + si->lp_ele = lpNext(si->lp,si->lp_ele); + } else { + int prev_times = 4; /* flag + id ms/seq diff + numfields. */ + while(prev_times--) si->lp_ele = lpPrev(si->lp,si->lp_ele); + } } /* End of listpack reached. Try the next/prev radix tree node. */ From 0c06581bee4b4029474191824c85b2afd5b08d1e Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 20 Nov 2017 11:25:05 +0100 Subject: [PATCH 0532/1722] Streams: XRANGE REV option -> XREVRANGE command. --- src/server.c | 1 + src/server.h | 1 + src/t_stream.c | 23 ++++++++++++++++------- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/server.c b/src/server.c index f1fd06ca0..af19b5a33 100644 --- a/src/server.c +++ b/src/server.c @@ -304,6 +304,7 @@ struct redisCommand redisCommandTable[] = { {"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0}, {"xadd",xaddCommand,-5,"wmF",0,NULL,1,1,1,0,0}, {"xrange",xrangeCommand,-4,"r",0,NULL,1,1,1,0,0}, + {"xrevrange",xrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0}, {"xlen",xlenCommand,2,"rF",0,NULL,1,1,1,0,0}, {"xread",xreadCommand,-3,"rs",0,xreadGetKeys,1,1,1,0,0}, {"post",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0}, diff --git a/src/server.h b/src/server.h index bc572b1e3..d65fd0c5d 100644 --- a/src/server.h +++ b/src/server.h @@ -2010,6 +2010,7 @@ void moduleCommand(client *c); void securityWarningCommand(client *c); void xaddCommand(client *c); void xrangeCommand(client *c); +void xrevrangeCommand(client *c); void xlenCommand(client *c); void xreadCommand(client *c); diff --git a/src/t_stream.c b/src/t_stream.c index efb01ef62..837a812a6 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -856,16 +856,17 @@ void xaddCommand(client *c) { signalKeyAsReady(c->db, c->argv[1]); } -/* XRANGE key start end [COUNT ] [REV] */ -void xrangeCommand(client *c) { +/* XRANGE/XREVRANGE actual implementation. */ +void xrangeGenericCommand(client *c, int rev) { robj *o; stream *s; streamID startid, endid; long long count = 0; - int rev = 0; + robj *startarg = rev ? c->argv[3] : c->argv[2]; + robj *endarg = rev ? c->argv[2] : c->argv[3]; - if (streamParseIDOrReply(c,c->argv[2],&startid,0) == C_ERR) return; - if (streamParseIDOrReply(c,c->argv[3],&endid,UINT64_MAX) == C_ERR) return; + if (streamParseIDOrReply(c,startarg,&startid,0) == C_ERR) return; + if (streamParseIDOrReply(c,endarg,&endid,UINT64_MAX) == C_ERR) return; /* Parse the COUNT option if any. */ if (c->argc > 4) { @@ -876,8 +877,6 @@ void xrangeCommand(client *c) { != C_OK) return; if (count < 0) count = 0; j++; /* Consume additional arg. */ - } else if (strcasecmp(c->argv[j]->ptr,"REV") == 0) { - rev = 1; } else { addReply(c,shared.syntaxerr); return; @@ -892,6 +891,16 @@ void xrangeCommand(client *c) { streamReplyWithRange(c,s,&startid,&endid,count,rev); } +/* XRANGE key start end [COUNT ] */ +void xrangeCommand(client *c) { + xrangeGenericCommand(c,0); +} + +/* XREVRANGE key end start [COUNT ] */ +void xrevrangeCommand(client *c) { + xrangeGenericCommand(c,1); +} + /* XLEN */ void xlenCommand(client *c) { robj *o; From 620a9086db208b1f52576963786354b946aa7bca Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 21 Nov 2017 22:21:37 +0100 Subject: [PATCH 0533/1722] Streams: fix lp-count field for non-same-fields entries. --- src/t_stream.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 837a812a6..213a46bb1 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -308,8 +308,12 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, } /* Compute and store the lp-count field. */ int lp_count = numfields; - if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) lp_count *= 2; - lp_count += 3; /* Add the 3 fixed fileds flags + ms-diff + seq-diff. */ + lp_count += 3; /* Add the 3 fixed fields flags + ms-diff + seq-diff. */ + if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) { + /* If the item is not compressed, it also has the fields other than + * the values, and an additional num-fileds field. */ + lp_count += numfields+1; + } lp = lpAppendInteger(lp,lp_count); /* Insert back into the tree in order to update the listpack pointer. */ From 62836254beeadb67e3ef234e54143bda5898cb01 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 21 Nov 2017 22:22:05 +0100 Subject: [PATCH 0534/1722] Streams: add some initial test for XREVRANGE. --- tests/unit/type/stream.tcl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index 5c58e7fb2..d7b5ca2a8 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -121,6 +121,10 @@ start_server { assert {[llength [r xrange mystream - + COUNT 10]] == 10} } + test {XREVRANGE COUNT works as expected} { + assert {[llength [r xrevrange mystream + - COUNT 10]] == 10} + } + test {XRANGE can be used to iterate the whole stream} { set last_id "-" set j 0 @@ -136,6 +140,10 @@ start_server { assert {$j == 10000} } + test {XREVRANGE returns the reverse of XRANGE} { + assert {[r xrange mystream - +] == [lreverse [r xrevrange mystream + -]]} + } + test {XREAD with non empty stream} { set res [r XREAD COUNT 1 STREAMS mystream 0.0] assert {[lrange [lindex $res 0 1 0 1] 0 1] eq {item 0}} From d4d8154daf78aed05d0d9f39f209f4bef848d0f1 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 1 Dec 2017 12:50:18 +0100 Subject: [PATCH 0535/1722] Streams: add code to compute the stream memory usage. It's a bit of black magic without actually tracking it inside rax.c, however Redis usage of the radix tree for the stream data structure is quite consistent, so a few magic constants apparently are producing results that make sense. --- src/object.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/object.c b/src/object.c index b689edcf2..28df57907 100644 --- a/src/object.c +++ b/src/object.c @@ -800,6 +800,49 @@ size_t objectComputeSize(robj *o, size_t sample_size) { } else { serverPanic("Unknown hash encoding"); } + } else if (o->type == OBJ_STREAM) { + stream *s = o->ptr; + /* Note: to guess the size of the radix tree is not trivial, so we + * approximate it considering 64 bytes of data overhead for each + * key (the ID), and then adding the number of bare nodes, plus some + * overhead due by the data and child pointers. This secret recipe + * was obtained by checking the average radix tree created by real + * workloads, and then adjusting the constants to get numbers that + * more or less match the real memory usage. + * + * Actually the number of nodes and keys may be different depending + * on the insertion speed and thus the ability of the radix tree + * to compress prefixes. */ + asize = sizeof(*o); + asize += s->rax->numele * 64; + asize += s->rax->numnodes * sizeof(raxNode); + asize += s->rax->numnodes * 32*7; /* Add a few child pointers... */ + + /* Now we have to add the listpacks. The last listpack is often non + * complete, so we estimate the size of the first N listpacks, and + * use the average to compute the size of the first N-1 listpacks, and + * finally add the real size of the last node. */ + raxIterator ri; + raxStart(&ri,s->rax); + raxSeek(&ri,"^",NULL,0); + size_t lpsize = 0, samples = 0; + while(samples < sample_size && raxNext(&ri)) { + unsigned char *lp = ri.data; + lpsize += lpBytes(lp); + samples++; + } + if (s->rax->numele <= samples) { + asize += lpsize; + } else { + if (samples) lpsize /= samples; /* Compute the average. */ + asize += lpsize * (s->rax->numele-1); + /* No need to check if seek succeeded, we enter this branch only + * if there are a few elements in the radix tree. */ + raxSeek(&ri,"$",NULL,0); + raxNext(&ri); + asize += lpBytes(ri.data); + } + raxStop(&ri); } else if (o->type == OBJ_MODULE) { moduleValue *mv = o->ptr; moduleType *mt = mv->type; From 69701e2fac1bae77e6b6d1bafa0492c97b3147b2 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 1 Dec 2017 15:04:05 +0100 Subject: [PATCH 0536/1722] Streams: DEBUG DIGEST support. --- src/debug.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/debug.c b/src/debug.c index 5c3fd3471..5b08475a5 100644 --- a/src/debug.c +++ b/src/debug.c @@ -239,6 +239,27 @@ void computeDatasetDigest(unsigned char *final) { xorDigest(digest,eledigest,20); } hashTypeReleaseIterator(hi); + } else if (o->type == OBJ_STREAM) { + streamIterator si; + streamIteratorStart(&si,o->ptr,NULL,NULL,0); + streamID id; + int64_t numfields; + + while(streamIteratorGetID(&si,&id,&numfields)) { + sds itemid = sdscatfmt(sdsempty(),"%U.%U",id.ms,id.seq); + mixDigest(digest,itemid,sdslen(itemid)); + sdsfree(itemid); + + while(numfields--) { + unsigned char *field, *value; + int64_t field_len, value_len; + streamIteratorGetField(&si,&field,&value, + &field_len,&value_len); + mixDigest(digest,field,field_len); + mixDigest(digest,value,value_len); + } + } + streamIteratorStop(&si); } else if (o->type == OBJ_MODULE) { RedisModuleDigest md; moduleValue *mv = o->ptr; From cfef19ea1991ce1ae1add66376d255b03ef66218 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 1 Dec 2017 16:01:10 +0100 Subject: [PATCH 0537/1722] Fix loading of RDB files lua AUX fields when the script is defined. In the case of slaves loading the RDB from master, or in other similar cases, the script is already defined, and the function registering the script should not fail in the assert() call. --- src/rdb.c | 2 +- src/scripting.c | 12 +++++++++--- src/server.h | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 17a932755..19f254996 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1679,7 +1679,7 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi) { if (rsi) rsi->repl_offset = strtoll(auxval->ptr,NULL,10); } else if (!strcasecmp(auxkey->ptr,"lua")) { /* Load the script back in memory. */ - if (luaCreateFunction(NULL,server.lua,NULL,auxval) == C_ERR) { + if (luaCreateFunction(NULL,server.lua,NULL,auxval,1) == C_ERR) { rdbExitReportCorruptRDB( "Can't load Lua script from RDB file! " "BODY: %s", auxval->ptr); diff --git a/src/scripting.c b/src/scripting.c index 848629e28..ea167365a 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -1151,13 +1151,16 @@ int redis_math_randomseed (lua_State *L) { * on the fly doing the SHA1 of the body, this means that passing the funcname * is just an optimization in case it's already at hand. * + * if 'allow_dup' is true, the function can be called with a script already + * in memory without crashing in assert(). In this case C_OK is returned. + * * The function increments the reference count of the 'body' object as a * side effect of a successful call. * * On success C_OK is returned, and nothing is left on the Lua stack. * On error C_ERR is returned and an appropriate error is set in the * client context. */ -int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body) { +int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body, int allow_dup) { sds funcdef = sdsempty(); char fname[43]; @@ -1168,6 +1171,9 @@ int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body) { funcname = fname; } + if (allow_dup && dictFind(server.lua_scripts,funcname+2) != NULL) + return C_OK; + funcdef = sdscat(funcdef,"function "); funcdef = sdscatlen(funcdef,funcname,42); funcdef = sdscatlen(funcdef,"() ",3); @@ -1302,7 +1308,7 @@ void evalGenericCommand(client *c, int evalsha) { addReply(c, shared.noscripterr); return; } - if (luaCreateFunction(c,lua,funcname,c->argv[1]) == C_ERR) { + if (luaCreateFunction(c,lua,funcname,c->argv[1],0) == C_ERR) { lua_pop(lua,1); /* remove the error handler from the stack. */ /* The error is sent to the client by luaCreateFunction() * itself when it returns C_ERR. */ @@ -1474,7 +1480,7 @@ void scriptCommand(client *c) { sha1hex(funcname+2,c->argv[2]->ptr,sdslen(c->argv[2]->ptr)); sha = sdsnewlen(funcname+2,40); if (dictFind(server.lua_scripts,sha) == NULL) { - if (luaCreateFunction(c,server.lua,funcname,c->argv[2]) + if (luaCreateFunction(c,server.lua,funcname,c->argv[2],0) == C_ERR) { sdsfree(sha); return; diff --git a/src/server.h b/src/server.h index d65fd0c5d..498a05500 100644 --- a/src/server.h +++ b/src/server.h @@ -1794,7 +1794,7 @@ void scriptingInit(int setup); int ldbRemoveChild(pid_t pid); void ldbKillForkedSessions(void); int ldbPendingChildren(void); -int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body); +int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body, int allow_dup); /* Blocked clients */ void processUnblockedClients(void); From f6666c228329f0cbd498ba456f9827129c4d9c8e Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Sun, 3 Dec 2017 16:49:29 +0200 Subject: [PATCH 0538/1722] Helps CLIENT --- src/networking.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/networking.c b/src/networking.c index ea8f7d0b5..fc484b676 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1580,7 +1580,22 @@ void clientCommand(client *c) { listIter li; client *client; - if (!strcasecmp(c->argv[1]->ptr,"list") && c->argc == 2) { + if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { + const char *help[] = { + "getname -- Return the name of the current connection.", + "kill -- Kill connection made from .", + "kill

Name

jemalloc — general purpose memory allocation functions

SYNOPSIS

#include <jemalloc/jemalloc.h>

Non-standard API

void *mallocx(size_t size,
 int flags);
 
void *rallocx(void *ptr,
 size_t size,
 int flags);
 
size_t xallocx(void *ptr,
 size_t size,
 size_t extra,
 int flags);
 
size_t sallocx(void *ptr,
 int flags);
 
void dallocx(void *ptr,
 int flags);
 
void sdallocx(void *ptr,
 size_t size,
 int flags);
 
size_t nallocx(size_t size,
 int flags);
 
int mallctl(const char *name,
 void *oldp,
 size_t *oldlenp,
 void *newp,
 size_t newlen);
 
int mallctlnametomib(const char *name,
 size_t *mibp,
 size_t *miblenp);
 
int mallctlbymib(const size_t *mib,
 size_t miblen,
 void *oldp,
 size_t *oldlenp,
 void *newp,
 size_t newlen);
 
void malloc_stats_print(void (*write_cb) +JEMALLOC

Name

jemalloc — general purpose memory allocation functions

LIBRARY

This manual describes jemalloc 4.0.3-0-ge9192eacf8935e29fc62fddc2701f7942b1cc02c. More information + can be found at the jemalloc website.

SYNOPSIS

#include <jemalloc/jemalloc.h>

Standard API

void *malloc(size_t size);
 
void *calloc(size_t number,
 size_t size);
 
int posix_memalign(void **ptr,
 size_t alignment,
 size_t size);
 
void *aligned_alloc(size_t alignment,
 size_t size);
 
void *realloc(void *ptr,
 size_t size);
 
void free(void *ptr);
 

Non-standard API

void *mallocx(size_t size,
 int flags);
 
void *rallocx(void *ptr,
 size_t size,
 int flags);
 
size_t xallocx(void *ptr,
 size_t size,
 size_t extra,
 int flags);
 
size_t sallocx(void *ptr,
 int flags);
 
void dallocx(void *ptr,
 int flags);
 
void sdallocx(void *ptr,
 size_t size,
 int flags);
 
size_t nallocx(size_t size,
 int flags);
 
int mallctl(const char *name,
 void *oldp,
 size_t *oldlenp,
 void *newp,
 size_t newlen);
 
int mallctlnametomib(const char *name,
 size_t *mibp,
 size_t *miblenp);
 
int mallctlbymib(const size_t *mib,
 size_t miblen,
 void *oldp,
 size_t *oldlenp,
 void *newp,
 size_t newlen);
 
void malloc_stats_print(void (*write_cb) (void *, const char *) - ,
 void *cbopaque,
 const char *opts);
 
size_t malloc_usable_size(const void *ptr);
 
void (*malloc_message)(void *cbopaque,
 const char *s);
 

const char *malloc_conf;

 void *cbopaque,
 const char *opts);
 
size_t malloc_usable_size(const void *ptr);
 
void (*malloc_message)(void *cbopaque,
 const char *s);
 

const char *malloc_conf;

DESCRIPTION

Standard API

The malloc() function allocates size bytes of uninitialized memory. The allocated space is suitably aligned (after possible pointer coercion) for storage - of any type of object.

The calloc() function allocates + of any type of object.

The calloc() function allocates space for number objects, each size bytes in length. The result is identical to - calling malloc() with an argument of + calling malloc() with an argument of number * size, with the exception that the allocated memory is explicitly initialized to zero - bytes.

The posix_memalign() function + bytes.

The posix_memalign() function allocates size bytes of memory such that the allocation's base address is a multiple of alignment, and returns the allocation in the value pointed to by ptr. The requested alignment must be a power of 2 at least as large as - sizeof(void *).

The aligned_alloc() function + sizeof(void *).

The aligned_alloc() function allocates size bytes of memory such that the allocation's base address is a multiple of alignment. The requested alignment must be a power of 2. Behavior is undefined if size is not an integral multiple of - alignment.

The realloc() function changes the + alignment.

The realloc() function changes the size of the previously allocated memory referenced by ptr to size bytes. The contents of the memory are unchanged up to the lesser of the new and old @@ -31,65 +30,69 @@ portion of the memory are undefined. Upon success, the memory referenced by ptr is freed and a pointer to the newly allocated memory is returned. Note that - realloc() may move the memory allocation, + realloc() may move the memory allocation, resulting in a different return value than ptr. If ptr is NULL, the - realloc() function behaves identically to - malloc() for the specified size.

The free() function causes the + realloc() function behaves identically to + malloc() for the specified size.

The free() function causes the allocated memory referenced by ptr to be made available for future allocations. If ptr is - NULL, no action occurs.

Non-standard API

The mallocx(), + rallocx(), + xallocx(), + sallocx(), + dallocx(), + sdallocx(), and + nallocx() functions all have a flags argument that can be used to specify options. The functions only check the options that are contextually relevant. Use bitwise or (|) operations to specify one or more of the following: -

MALLOCX_LG_ALIGN(la) +

MALLOCX_LG_ALIGN(la)

Align the memory allocation to start at an address that is a multiple of (1 << la). This macro does not validate that la is within the valid - range.

MALLOCX_ALIGN(a) + range.

MALLOCX_ALIGN(a)

Align the memory allocation to start at an address that is a multiple of a, where a is a power of two. This macro does not validate that a is a power of 2. -

MALLOCX_ZERO

Initialize newly allocated memory to contain zero +

MALLOCX_ZERO

Initialize newly allocated memory to contain zero bytes. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes. If this macro is - absent, newly allocated memory is uninitialized.

MALLOCX_TCACHE(tc) + absent, newly allocated memory is uninitialized.

MALLOCX_TCACHE(tc)

Use the thread-specific cache (tcache) specified by the identifier tc, which must have been - acquired via the tcache.create + acquired via the + "tcache.create" + mallctl. This macro does not validate that tc specifies a valid - identifier.

MALLOCX_TCACHE_NONE

Do not use a thread-specific cache (tcache). Unless + identifier.

MALLOCX_TCACHE_NONE

Do not use a thread-specific cache (tcache). Unless MALLOCX_TCACHE(tc) or MALLOCX_TCACHE_NONE is specified, an automatically managed tcache will be used under many circumstances. This macro cannot be used in the same flags argument as - MALLOCX_TCACHE(tc).

MALLOCX_ARENA(a) + MALLOCX_TCACHE(tc).

MALLOCX_ARENA(a)

Use the arena specified by the index a. This macro has no effect for regions that were allocated via an arena other than the one specified. This macro does not validate that a specifies an arena index in the valid range.

-

The mallocx() function allocates at +

The mallocx() function allocates at least size bytes of memory, and returns a pointer to the base address of the allocation. Behavior is undefined if - size is 0.

The rallocx() function resizes the + size is 0, or if request size + overflows due to size class and/or alignment constraints.

The rallocx() function resizes the allocation at ptr to be at least size bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location. Behavior is undefined if - size is 0.

The xallocx() function resizes the + size is 0, or if request size + overflows due to size class and/or alignment constraints.

The xallocx() function resizes the allocation at ptr in place to be at least size bytes, and returns the real size of the allocation. If extra is non-zero, an attempt is @@ -98,24 +101,24 @@ the extra byte(s) will not by itself result in failure to resize. Behavior is undefined if size is 0, or if (size + extra - > SIZE_T_MAX).

The sallocx() function returns the - real size of the allocation at ptr.

The dallocx() function causes the + > SIZE_T_MAX).

The sallocx() function returns the + real size of the allocation at ptr.

The dallocx() function causes the memory referenced by ptr to be made available for - future allocations.

The sdallocx() function is an - extension of dallocx() with a + future allocations.

The sdallocx() function is an + extension of dallocx() with a size parameter to allow the caller to pass in the allocation size as an optimization. The minimum valid input size is the original requested size of the allocation, and the maximum valid input size is the corresponding value returned by - nallocx() or - sallocx().

The nallocx() function allocates no + nallocx() or + sallocx().

The nallocx() function allocates no memory, but it performs the same size computation as the - mallocx() function, and returns the real + mallocx() function, and returns the real size of the allocation that would result from the equivalent - mallocx() function call, or - 0 if the inputs exceed the maximum supported size - class and/or alignment. Behavior is undefined if - size is 0.

The mallctl() function provides a + mallocx() function call. Behavior is + undefined if size is 0, or if + request size overflows due to size class and/or alignment + constraints.

The mallctl() function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions. The period-separated name argument specifies a @@ -127,12 +130,12 @@ write a value, pass a pointer to the value via newp, and its length via newlen; otherwise pass NULL - and 0.

The mallctlnametomib() function + and 0.

The mallctlnametomib() function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name - to a “Management Information Base†(MIB) that can be passed - repeatedly to mallctlbymib(). Upon - successful return from mallctlnametomib(), + to a “Management Information Base” (MIB) that can be passed + repeatedly to mallctlbymib(). Upon + successful return from mallctlnametomib(), mibp contains an array of *miblenp integers, where *miblenp is the lesser of the number of components @@ -142,7 +145,9 @@ period-separated name components, which results in a partial MIB that can be used as the basis for constructing a complete MIB. For name components that are integers (e.g. the 2 in - arenas.bin.2.size), + + "arenas.bin.2.size" + ), the corresponding MIB component will always be that integer. Therefore, it is legitimate to construct code like the following:

 unsigned nbins, i;
@@ -159,62 +164,65 @@ for (i = 0; i < nbins; i++) {
 
 	mib[2] = i;
 	len = sizeof(bin_size);
-	mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0);
+	mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0);
 	/* Do something with bin_size... */
-}

The malloc_stats_print() function writes - summary statistics via the write_cb callback - function pointer and cbopaque data passed to - write_cb, or malloc_message() - if write_cb is NULL. The - statistics are presented in human-readable form unless “J†is - specified as a character within the opts string, in - which case the statistics are presented in JSON format. This function can be - called repeatedly. General information that never changes during - execution can be omitted by specifying “g†as a character +}

The malloc_stats_print() function + writes human-readable summary statistics via the + write_cb callback function pointer and + cbopaque data passed to + write_cb, or + malloc_message() if + write_cb is NULL. This + function can be called repeatedly. General information that never + changes during execution can be omitted by specifying "g" as a character within the opts string. Note that - malloc_message() uses the - mallctl*() functions internally, so inconsistent - statistics can be reported if multiple threads use these functions - simultaneously. If --enable-stats is specified during - configuration, “m†and “a†can be specified to - omit merged arena and per arena statistics, respectively; - “bâ€, “lâ€, and “h†can be specified - to omit per size class statistics for bins, large objects, and huge - objects, respectively. Unrecognized characters are silently ignored. - Note that thread caching may prevent some statistics from being completely - up to date, since extra locking would be required to merge counters that - track thread cache operations.

The malloc_usable_size() function + malloc_message() uses the + mallctl*() functions internally, so + inconsistent statistics can be reported if multiple threads use these + functions simultaneously. If --enable-stats is + specified during configuration, “m” and “a” can + be specified to omit merged arena and per arena statistics, respectively; + “b”, “l”, and “h” can be specified to + omit per size class statistics for bins, large objects, and huge objects, + respectively. Unrecognized characters are silently ignored. Note that + thread caching may prevent some statistics from being completely up to + date, since extra locking would be required to merge counters that track + thread cache operations. +

The malloc_usable_size() function returns the usable size of the allocation pointed to by ptr. The return value may be larger than the size that was requested during allocation. The - malloc_usable_size() function is not a - mechanism for in-place realloc(); rather + malloc_usable_size() function is not a + mechanism for in-place realloc(); rather it is provided solely as a tool for introspection purposes. Any discrepancy between the requested allocation size and the size reported - by malloc_usable_size() should not be + by malloc_usable_size() should not be depended on, since such behavior is entirely implementation-dependent. -

TUNING

Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various - options that can be specified at compile- or run-time.

The string specified via --with-malloc-conf, the - string pointed to by the global variable malloc_conf, the - “name†of the file referenced by the symbolic link named - /etc/malloc.conf, and the value of the + options that can be specified at compile- or run-time.

The string pointed to by the global variable + malloc_conf, the “name” of the file + referenced by the symbolic link named /etc/malloc.conf, and the value of the environment variable MALLOC_CONF, will be interpreted, in that order, from left to right as options. Note that malloc_conf may be read before - main() is entered, so the declaration of + main() is entered, so the declaration of malloc_conf should specify an initializer that contains - the final value to be read by jemalloc. --with-malloc-conf - and malloc_conf are compile-time mechanisms, whereas - /etc/malloc.conf and - MALLOC_CONF can be safely set any time prior to program - invocation.

An options string is a comma-separated list of option:value pairs. - There is one key corresponding to each opt.* mallctl (see the MALLCTL NAMESPACE section for options + the final value to be read by jemalloc. malloc_conf is + a compile-time setting, whereas /etc/malloc.conf and MALLOC_CONF + can be safely set any time prior to program invocation.

An options string is a comma-separated list of option:value pairs. + There is one key corresponding to each + "opt.*" + mallctl (see the MALLCTL NAMESPACE section for options documentation). For example, abort:true,narenas:1 sets - the opt.abort and opt.narenas options. Some + the + "opt.abort" + and + "opt.narenas" + options. Some options have boolean values (true/false), others have integer values (base 8, 10, or 16, depending on prefix), and yet others have raw string - values.

IMPLEMENTATION NOTES

Traditionally, allocators have used sbrk(2) to obtain memory, which is suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory. If @@ -238,18 +246,19 @@ for (i = 0; i < nbins; i++) { order to make it possible to completely avoid synchronization for most allocation requests. Such caching allows very fast allocation in the common case, but it increases memory usage and fragmentation, since a - bounded number of objects can remain allocated in each thread cache.

Memory is conceptually broken into equal-sized chunks, where the chunk - size is a power of two that is greater than the page size. Chunks are - always aligned to multiples of the chunk size. This alignment makes it - possible to find metadata for user objects very quickly. User objects are - broken into three categories according to size: small, large, and huge. - Multiple small and large objects can reside within a single chunk, whereas - huge objects each have one or more chunks backing them. Each chunk that - contains small and/or large objects tracks its contents as runs of + bounded number of objects can remain allocated in each thread cache.

Memory is conceptually broken into equal-sized chunks, where the + chunk size is a power of two that is greater than the page size. Chunks + are always aligned to multiples of the chunk size. This alignment makes it + possible to find metadata for user objects very quickly.

User objects are broken into three categories according to size: + small, large, and huge. Small and large objects are managed entirely by + arenas; huge objects are additionally aggregated in a single data structure + that is shared by all threads. Huge objects are typically used by + applications infrequently enough that this single data structure is not a + scalability issue.

Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one - large object). The combination of chunk alignment and chunk page maps makes - it possible to determine all metadata regarding small and large allocations - in constant time.

Small objects are managed in groups by page runs. Each run maintains + large object). The combination of chunk alignment and chunk page maps + makes it possible to determine all metadata regarding small and large + allocations in constant time.

Small objects are managed in groups by page runs. Each run maintains a bitmap to track which regions are in use. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least sizeof(double). All other object size @@ -257,20 +266,22 @@ for (i = 0; i < nbins; i++) { classes for each doubling in size, which limits internal fragmentation to approximately 20% for all but the smallest size classes. Small size classes are smaller than four times the page size, large size classes are smaller - than the chunk size (see the opt.lg_chunk option), and - huge size classes extend from the chunk size up to the largest size class - that does not exceed PTRDIFF_MAX.

Allocations are packed tightly together, which can be an issue for + than the chunk size (see the + "opt.lg_chunk" + option), and + huge size classes extend from the chunk size up to one size class less than + the full address space size.

Allocations are packed tightly together, which can be an issue for multi-threaded applications. If you need to assure that allocations do not suffer from cacheline sharing, round your allocation requests up to the nearest multiple of the cacheline size, or specify cacheline alignment when - allocating.

The realloc(), - rallocx(), and - xallocx() functions may resize allocations + allocating.

The realloc(), + rallocx(), and + xallocx() functions may resize allocations without moving them under limited circumstances. Unlike the - *allocx() API, the standard API does not + *allocx() API, the standard API does not officially round up the usable size of an allocation to the nearest size class, so technically it is necessary to call - realloc() to grow e.g. a 9-byte allocation to + realloc() to grow e.g. a 9-byte allocation to 16 bytes, or shrink a 16-byte allocation to 9 bytes. Growth and shrinkage trivially succeeds in place as long as the pre-size and post-size both round up to the same size class. No other API guarantees are made regarding @@ -278,111 +289,147 @@ for (i = 0; i < nbins; i++) { and huge allocations in place, as long as the pre-size and post-size are both large or both huge. In such cases shrinkage always succeeds for large size classes, but for huge size classes the chunk allocator must support - splitting (see arena.<i>.chunk_hooks). + splitting (see + "arena.<i>.chunk_hooks" + ). Growth only succeeds if the trailing memory is currently available, and additionally for huge size classes the chunk allocator must support merging.

Assuming 2 MiB chunks, 4 KiB pages, and a 16-byte quantum on a - 64-bit system, the size classes in each category are as shown in Table 1.


MALLCTL NAMESPACE

The following names are defined in the namespace accessible via the - mallctl*() functions. Value types are + 64-bit system, the size classes in each category are as shown in Table 1.

Table 1. Size classes

CategorySpacingSize
Smalllg[8]
16[16, 32, 48, 64, 80, 96, 112, 128]
32[160, 192, 224, 256]
64[320, 384, 448, 512]
128[640, 768, 896, 1024]
256[1280, 1536, 1792, 2048]
512[2560, 3072, 3584, 4096]
1 KiB[5 KiB, 6 KiB, 7 KiB, 8 KiB]
2 KiB[10 KiB, 12 KiB, 14 KiB]
Large2 KiB[16 KiB]
4 KiB[20 KiB, 24 KiB, 28 KiB, 32 KiB]
8 KiB[40 KiB, 48 KiB, 54 KiB, 64 KiB]
16 KiB[80 KiB, 96 KiB, 112 KiB, 128 KiB]
32 KiB[160 KiB, 192 KiB, 224 KiB, 256 KiB]
64 KiB[320 KiB, 384 KiB, 448 KiB, 512 KiB]
128 KiB[640 KiB, 768 KiB, 896 KiB, 1 MiB]
256 KiB[1280 KiB, 1536 KiB, 1792 KiB]
Huge256 KiB[2 MiB]
512 KiB[2560 KiB, 3 MiB, 3584 KiB, 4 MiB]
1 MiB[5 MiB, 6 MiB, 7 MiB, 8 MiB]
2 MiB[10 MiB, 12 MiB, 14 MiB, 16 MiB]
4 MiB[20 MiB, 24 MiB, 28 MiB, 32 MiB]
8 MiB[40 MiB, 48 MiB, 56 MiB, 64 MiB]
......

MALLCTL NAMESPACE

The following names are defined in the namespace accessible via the + mallctl*() functions. Value types are specified in parentheses, their readable/writable statuses are encoded as rw, r-, -w, or --, and required build configuration flags follow, if any. A name element encoded as <i> or <j> indicates an integer component, where the integer varies from 0 to some upper value that must be determined via - introspection. In the case of stats.arenas.<i>.*, - <i> equal to arenas.narenas can be + introspection. In the case of + "stats.arenas.<i>.*" + , + <i> equal to + "arenas.narenas" + can be used to access the summation of statistics from all arenas. Take special - note of the epoch mallctl, - which controls refreshing of cached dynamic statistics.

- version + note of the + "epoch" + mallctl, + which controls refreshing of cached dynamic statistics.

+ + "version" + (const char *) r- -

Return the jemalloc version string.

- epoch +

Return the jemalloc version string.

+ + "epoch" + (uint64_t) rw

If a value is passed in, refresh the data from which - the mallctl*() functions report values, + the mallctl*() functions report values, and increment the epoch. Return the current epoch. This is useful for - detecting whether another thread caused a refresh.

- config.cache_oblivious + detecting whether another thread caused a refresh.

+ + "config.cache_oblivious" + (bool) r-

--enable-cache-oblivious was specified - during build configuration.

- config.debug + during build configuration.

+ + "config.debug" + (bool) r-

--enable-debug was specified during - build configuration.

- config.fill + build configuration.

+ + "config.fill" + (bool) r-

--enable-fill was specified during - build configuration.

- config.lazy_lock + build configuration.

+ + "config.lazy_lock" + (bool) r-

--enable-lazy-lock was specified - during build configuration.

- config.malloc_conf - (const char *) - r- -

Embedded configure-time-specified run-time options - string, empty unless --with-malloc-conf was specified - during build configuration.

- config.munmap + during build configuration.

+ + "config.munmap" + (bool) r-

--enable-munmap was specified during - build configuration.

- config.prof + build configuration.

+ + "config.prof" + (bool) r-

--enable-prof was specified during - build configuration.

- config.prof_libgcc + build configuration.

+ + "config.prof_libgcc" + (bool) r-

--disable-prof-libgcc was not - specified during build configuration.

- config.prof_libunwind + specified during build configuration.

+ + "config.prof_libunwind" + (bool) r-

--enable-prof-libunwind was specified - during build configuration.

- config.stats + during build configuration.

+ + "config.stats" + (bool) r-

--enable-stats was specified during - build configuration.

- config.tcache + build configuration.

+ + "config.tcache" + (bool) r-

--disable-tcache was not specified - during build configuration.

- config.tls + during build configuration.

+ + "config.tls" + (bool) r-

--disable-tls was not specified during - build configuration.

- config.utrace + build configuration.

+ + "config.utrace" + (bool) r-

--enable-utrace was specified during - build configuration.

- config.valgrind + build configuration.

+ + "config.valgrind" + (bool) r-

--enable-valgrind was specified during - build configuration.

- config.xmalloc + build configuration.

+ + "config.xmalloc" + (bool) r-

--enable-xmalloc was specified during - build configuration.

- opt.abort + build configuration.

+ + "opt.abort" + (bool) r-

Abort-on-warning enabled/disabled. If true, most @@ -390,42 +437,43 @@ for (i = 0; i < nbins; i++) { abort(3) in these cases. This option is disabled by default unless --enable-debug is specified during configuration, in which case it is enabled by default. -

- opt.dss +

+ + "opt.dss" + (const char *) r-

dss (sbrk(2)) allocation precedence as related to mmap(2) allocation. The following settings are supported if sbrk(2) is supported by the operating - system: “disabledâ€, “primaryâ€, and - “secondaryâ€; otherwise only “disabled†is - supported. The default is “secondary†if + system: “disabled”, “primary”, and + “secondary”; otherwise only “disabled” is + supported. The default is “secondary” if sbrk(2) is supported by the operating - system; “disabled†otherwise. -

- opt.lg_chunk + system; “disabled” otherwise. +

+ + "opt.lg_chunk" + (size_t) r-

Virtual memory chunk size (log base 2). If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size. The default chunk size is 2 MiB (2^21). -

- opt.narenas - (unsigned) +

+ + "opt.narenas" + + (size_t) r-

Maximum number of arenas to use for automatic multiplexing of threads and arenas. The default is four times the - number of CPUs, or one if there is a single CPU.

- opt.purge - (const char *) - r- -

Purge mode is “ratio†(default) or - “decayâ€. See opt.lg_dirty_mult - for details of the ratio mode. See opt.decay_time for - details of the decay mode.

- opt.lg_dirty_mult + number of CPUs, or one if there is a single CPU.

+ + "opt.lg_dirty_mult" + (ssize_t) r-

Per-arena minimum ratio (log base 2) of active to dirty @@ -436,57 +484,53 @@ for (i = 0; i < nbins; i++) { provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused. The default minimum ratio is 8:1 (2^3:1); an option value of -1 will - disable dirty page purging. See arenas.lg_dirty_mult - and arena.<i>.lg_dirty_mult - for related dynamic control options.

- opt.decay_time - (ssize_t) - r- -

Approximate time in seconds from the creation of a set - of unused dirty pages until an equivalent set of unused dirty pages is - purged and/or reused. The pages are incrementally purged according to a - sigmoidal decay curve that starts and ends with zero purge rate. A - decay time of 0 causes all unused dirty pages to be purged immediately - upon creation. A decay time of -1 disables purging. The default decay - time is 10 seconds. See arenas.decay_time - and arena.<i>.decay_time - for related dynamic control options. -

- opt.stats_print + disable dirty page purging. See + "arenas.lg_dirty_mult" + + and + "arena.<i>.lg_dirty_mult" + + for related dynamic control options.

+ + "opt.stats_print" + (bool) r-

Enable/disable statistics printing at exit. If - enabled, the malloc_stats_print() + enabled, the malloc_stats_print() function is called at program exit via an atexit(3) function. If --enable-stats is specified during configuration, this has the potential to cause deadlock for a multi-threaded process that exits while one or more threads are executing in the memory allocation - functions. Furthermore, atexit() may + functions. Furthermore, atexit() may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls - atexit(), so this option is not - universally usable (though the application can register its own - atexit() function with equivalent + atexit(), so this option is not + univerally usable (though the application can register its own + atexit() function with equivalent functionality). Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application - development. This option is disabled by default.

- opt.junk + development. This option is disabled by default.

+ + "opt.junk" + (const char *) r- [--enable-fill] -

Junk filling. If set to “allocâ€, each byte - of uninitialized allocated memory will be initialized to - 0xa5. If set to “freeâ€, all deallocated - memory will be initialized to 0x5a. If set to - “trueâ€, both allocated and deallocated memory will be - initialized, and if set to “falseâ€, junk filling be - disabled entirely. This is intended for debugging and will impact - performance negatively. This option is “false†by default - unless --enable-debug is specified during - configuration, in which case it is “true†by default unless - running inside Valgrind.

- opt.quarantine +

Junk filling. If set to "alloc", each byte of + uninitialized allocated memory will be initialized to + 0xa5. If set to "free", all deallocated memory will + be initialized to 0x5a. If set to "true", both + allocated and deallocated memory will be initialized, and if set to + "false", junk filling be disabled entirely. This is intended for + debugging and will impact performance negatively. This option is + "false" by default unless --enable-debug is specified + during configuration, in which case it is "true" by default unless + running inside Valgrind.

+ + "opt.quarantine" + (size_t) r- [--enable-fill] @@ -494,46 +538,58 @@ for (i = 0; i < nbins; i++) { thread maintains a FIFO object quarantine that stores up to the specified number of bytes of memory. The quarantined memory is not freed until it is released from quarantine, though it is immediately - junk-filled if the opt.junk option is + junk-filled if the + "opt.junk" + option is enabled. This feature is of particular use in combination with Valgrind, which can detect attempts to access quarantined objects. This is intended for debugging and will impact performance negatively. The default quarantine size is 0 unless running inside Valgrind, in which case the default is 16 - MiB.

- opt.redzone + MiB.

+ + "opt.redzone" + (bool) r- [--enable-fill]

Redzones enabled/disabled. If enabled, small allocations have redzones before and after them. Furthermore, if the - opt.junk option is + + "opt.junk" + option is enabled, the redzones are checked for corruption during deallocation. However, the primary intended purpose of this feature is to be used in combination with Valgrind, which needs redzones in order to do effective buffer overflow/underflow detection. This option is intended for debugging and will impact performance negatively. This option is disabled by - default unless running inside Valgrind.

- opt.zero + default unless running inside Valgrind.

+ + "opt.zero" + (bool) r- [--enable-fill]

Zero filling enabled/disabled. If enabled, each byte of uninitialized allocated memory will be initialized to 0. Note that this initialization only happens once for each byte, so - realloc() and - rallocx() calls do not zero memory that + realloc() and + rallocx() calls do not zero memory that was previously allocated. This is intended for debugging and will impact performance negatively. This option is disabled by default. -

- opt.utrace +

+ + "opt.utrace" + (bool) r- [--enable-utrace]

Allocation tracing based on utrace(2) enabled/disabled. This option - is disabled by default.

- opt.xmalloc + is disabled by default.

+ + "opt.xmalloc" + (bool) r- [--enable-xmalloc] @@ -546,8 +602,10 @@ for (i = 0; i < nbins; i++) { including the following in the source code:

 malloc_conf = "xmalloc:true";

- This option is disabled by default.

- opt.tcache + This option is disabled by default.

+ + "opt.tcache" + (bool) r- [--enable-tcache] @@ -555,36 +613,55 @@ malloc_conf = "xmalloc:true";

there are multiple threads, each thread uses a tcache for objects up to a certain size. Thread-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of - increased memory use. See the opt.lg_tcache_max + increased memory use. See the + "opt.lg_tcache_max" + option for related tuning information. This option is enabled by default unless running inside Valgrind, in which case it is - forcefully disabled.

- opt.lg_tcache_max + forcefully disabled.

+ + "opt.lg_tcache_max" + (size_t) r- [--enable-tcache]

Maximum size class (log base 2) to cache in the thread-specific cache (tcache). At a minimum, all small size classes are cached, and at a maximum all large size classes are cached. The - default maximum is 32 KiB (2^15).

- opt.prof + default maximum is 32 KiB (2^15).

+ + "opt.prof" + (bool) r- [--enable-prof]

Memory profiling enabled/disabled. If enabled, profile - memory allocation activity. See the opt.prof_active - option for on-the-fly activation/deactivation. See the opt.lg_prof_sample - option for probabilistic sampling control. See the opt.prof_accum - option for control of cumulative sample reporting. See the opt.lg_prof_interval - option for information on interval-triggered profile dumping, the opt.prof_gdump + memory allocation activity. See the + "opt.prof_active" + + option for on-the-fly activation/deactivation. See the + "opt.lg_prof_sample" + + option for probabilistic sampling control. See the + "opt.prof_accum" + + option for control of cumulative sample reporting. See the + "opt.lg_prof_interval" + + option for information on interval-triggered profile dumping, the + "opt.prof_gdump" + option for information on high-water-triggered profile dumping, and the - opt.prof_final + + "opt.prof_final" + option for final profile dumping. Profile output is compatible with the jeprof command, which is based on the pprof that is developed as part of the gperftools - package. See HEAP PROFILE - FORMAT for heap profile format documentation.

- opt.prof_prefix + package.

+ + "opt.prof_prefix" + (const char *) r- [--enable-prof] @@ -592,26 +669,40 @@ malloc_conf = "xmalloc:true";

set to the empty string, no automatic dumps will occur; this is primarily useful for disabling the automatic final heap dump (which also disables leak reporting, if enabled). The default prefix is - jeprof.

- opt.prof_active + jeprof.

+ + "opt.prof_active" + (bool) r- [--enable-prof]

Profiling activated/deactivated. This is a secondary control mechanism that makes it possible to start the application with - profiling enabled (see the opt.prof option) but + profiling enabled (see the + "opt.prof" + option) but inactive, then toggle profiling at any time during program execution - with the prof.active mallctl. - This option is enabled by default.

- opt.prof_thread_active_init + with the + "prof.active" + mallctl. + This option is enabled by default.

+ + "opt.prof_thread_active_init" + (bool) r- [--enable-prof] -

Initial setting for thread.prof.active +

Initial setting for + "thread.prof.active" + in newly created threads. The initial setting for newly created threads - can also be changed during execution via the prof.thread_active_init - mallctl. This option is enabled by default.

- opt.lg_prof_sample + can also be changed during execution via the + "prof.thread_active_init" + + mallctl. This option is enabled by default.

+ + "opt.lg_prof_sample" + (size_t) r- [--enable-prof] @@ -619,8 +710,10 @@ malloc_conf = "xmalloc:true";

samples, as measured in bytes of allocation activity. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead. The default sample interval is 512 KiB (2^19 - B).

- opt.prof_accum + B).

+ + "opt.prof_accum" + (bool) r- [--enable-prof] @@ -629,8 +722,10 @@ malloc_conf = "xmalloc:true";

backtrace must be stored for the duration of execution. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest. This option is disabled - by default.

- opt.lg_prof_interval + by default.

+ + "opt.lg_prof_interval" + (ssize_t) r- [--enable-prof] @@ -641,19 +736,27 @@ malloc_conf = "xmalloc:true";

dumped to files named according to the pattern <prefix>.<pid>.<seq>.i<iseq>.heap, where <prefix> is controlled by the - opt.prof_prefix + + "opt.prof_prefix" + option. By default, interval-triggered profile dumping is disabled (encoded as -1). -

- opt.prof_gdump +

+ + "opt.prof_gdump" + (bool) r- [--enable-prof] -

Set the initial state of prof.gdump, which when +

Set the initial state of + "prof.gdump" + , which when enabled triggers a memory profile dump every time the total virtual memory exceeds the previous maximum. This option is disabled by - default.

- opt.prof_final + default.

+ + "opt.prof_final" + (bool) r- [--enable-prof] @@ -661,73 +764,101 @@ malloc_conf = "xmalloc:true";

atexit(3) function to dump final memory usage to a file named according to the pattern <prefix>.<pid>.<seq>.f.heap, - where <prefix> is controlled by the opt.prof_prefix - option. Note that atexit() may allocate + where <prefix> is controlled by the + "opt.prof_prefix" + + option. Note that atexit() may allocate memory during application initialization and then deadlock internally - when jemalloc in turn calls atexit(), so - this option is not universally usable (though the application can - register its own atexit() function with + when jemalloc in turn calls atexit(), so + this option is not univerally usable (though the application can + register its own atexit() function with equivalent functionality). This option is disabled by - default.

- opt.prof_leak + default.

+ + "opt.prof_leak" + (bool) r- [--enable-prof]

Leak reporting enabled/disabled. If enabled, use an atexit(3) function to report memory leaks detected by allocation sampling. See the - opt.prof option for + + "opt.prof" + option for information on analyzing heap profile output. This option is disabled - by default.

- thread.arena + by default.

+ + "thread.arena" + (unsigned) rw

Get or set the arena associated with the calling thread. If the specified arena was not initialized beforehand (see the - arenas.initialized + + "arenas.initialized" + mallctl), it will be automatically initialized as a side effect of - calling this interface.

- thread.allocated + calling this interface.

+ + "thread.allocated" + (uint64_t) r- [--enable-stats]

Get the total number of bytes ever allocated by the calling thread. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such - cases.

- thread.allocatedp + cases.

+ + "thread.allocatedp" + (uint64_t *) r- [--enable-stats]

Get a pointer to the the value that is returned by the - thread.allocated + + "thread.allocated" + mallctl. This is useful for avoiding the overhead of repeated - mallctl*() calls.

- thread.deallocated + mallctl*() calls.

+ + "thread.deallocated" + (uint64_t) r- [--enable-stats]

Get the total number of bytes ever deallocated by the calling thread. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such - cases.

- thread.deallocatedp + cases.

+ + "thread.deallocatedp" + (uint64_t *) r- [--enable-stats]

Get a pointer to the the value that is returned by the - thread.deallocated + + "thread.deallocated" + mallctl. This is useful for avoiding the overhead of repeated - mallctl*() calls.

- thread.tcache.enabled + mallctl*() calls.

+ + "thread.tcache.enabled" + (bool) rw [--enable-tcache]

Enable/disable calling thread's tcache. The tcache is implicitly flushed as a side effect of becoming - disabled (see thread.tcache.flush). -

- thread.tcache.flush + disabled (see + "thread.tcache.flush" + ). +

+ + "thread.tcache.flush" + (void) -- [--enable-tcache] @@ -739,8 +870,10 @@ malloc_conf = "xmalloc:true";

a thread exits. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case - the developer may find manual flushing useful.

- thread.prof.name + the developer may find manual flushing useful.

+ + "thread.prof.name" + (const char *) r- or -w @@ -753,19 +886,25 @@ malloc_conf = "xmalloc:true";

can cause asynchronous string deallocation. Furthermore, each invocation of this interface can only read or write; simultaneous read/write is not supported due to string lifetime limitations. The - name string must be nil-terminated and comprised only of characters in - the sets recognized + name string must nil-terminated and comprised only of characters in the + sets recognized by isgraph(3) and - isblank(3).

- thread.prof.active + isblank(3).

+ + "thread.prof.active" + (bool) rw [--enable-prof]

Control whether sampling is currently active for the - calling thread. This is an activation mechanism in addition to prof.active; both must + calling thread. This is an activation mechanism in addition to + "prof.active" + ; both must be active for the calling thread to sample. This flag is enabled by - default.

- tcache.create + default.

+ + "tcache.create" + (unsigned) r- [--enable-tcache] @@ -775,81 +914,76 @@ malloc_conf = "xmalloc:true";

automatically managed one that is used by default. Each explicit cache can be used by only one thread at a time; the application must assure that this constraint holds. -

- tcache.flush +

+ + "tcache.flush" + (unsigned) -w [--enable-tcache]

Flush the specified thread-specific cache (tcache). The - same considerations apply to this interface as to thread.tcache.flush, - except that the tcache will never be automatically discarded. -

- tcache.destroy + same considerations apply to this interface as to + "thread.tcache.flush" + , + except that the tcache will never be automatically be discarded. +

+ + "tcache.destroy" + (unsigned) -w [--enable-tcache]

Flush the specified thread-specific cache (tcache) and make the identifier available for use during a future tcache creation. -

- arena.<i>.purge +

+ + "arena.<i>.purge" + (void) -- -

Purge all unused dirty pages for arena <i>, or for - all arenas if <i> equals arenas.narenas. -

- arena.<i>.decay - (void) - -- -

Trigger decay-based purging of unused dirty pages for - arena <i>, or for all arenas if <i> equals arenas.narenas. - The proportion of unused dirty pages to be purged depends on the current - time; see opt.decay_time for - details.

- arena.<i>.reset - (void) - -- -

Discard all of the arena's extant allocations. This - interface can only be used with arenas created via arenas.extend. None - of the arena's discarded/cached allocations may accessed afterward. As - part of this requirement, all thread caches which were used to - allocate/deallocate in conjunction with the arena must be flushed - beforehand. This interface cannot be used if running inside Valgrind, - nor if the quarantine size is - non-zero.

- arena.<i>.dss +

Purge unused dirty pages for arena <i>, or for + all arenas if <i> equals + "arenas.narenas" + . +

+ + "arena.<i>.dss" + (const char *) rw

Set the precedence of dss allocation as related to mmap allocation for arena <i>, or for all arenas if <i> equals - arenas.narenas. See - opt.dss for supported - settings.

- arena.<i>.lg_dirty_mult + + "arenas.narenas" + . See + + "opt.dss" + for supported + settings.

+ + "arena.<i>.lg_dirty_mult" + (ssize_t) rw

Current per-arena minimum ratio (log base 2) of active to dirty pages for arena <i>. Each time this interface is set and the ratio is increased, pages are synchronously purged as necessary to - impose the new ratio. See opt.lg_dirty_mult - for additional information.

- arena.<i>.decay_time - (ssize_t) - rw -

Current per-arena approximate time in seconds from the - creation of a set of unused dirty pages until an equivalent set of - unused dirty pages is purged and/or reused. Each time this interface is - set, all currently unused dirty pages are considered to have fully - decayed, which causes immediate purging of all unused dirty pages unless - the decay time is set to -1 (i.e. purging disabled). See opt.decay_time for - additional information.

- arena.<i>.chunk_hooks + impose the new ratio. See + "opt.lg_dirty_mult" + + for additional information.

+ + "arena.<i>.chunk_hooks" + (chunk_hooks_t) rw

Get or set the chunk management hook functions for arena <i>. The functions must be capable of operating on all extant chunks associated with arena <i>, usually by passing unknown chunks to the replaced functions. In practice, it is feasible to - control allocation for arenas created via arenas.extend such + control allocation for arenas created via + "arenas.extend" + such that all chunks originate from an application-supplied chunk allocator (by setting custom chunk hook functions just after arena creation), but the automatically created arenas may have already created chunks prior @@ -875,7 +1009,7 @@ typedef struct { operations can also be opted out of, but this is mainly intended to support platforms on which virtual memory mappings provided by the operating system kernel do not automatically coalesce and split, e.g. - Windows.

typedef void *(chunk_alloc_t)(void *chunk,
 size_t size,
 size_t alignment,
 bool *zero,
 bool *commit,
 unsigned arena_ind);
 

A chunk allocation function conforms to the + Windows.

typedef void *(chunk_alloc_t)(void *chunk,
 size_t size,
 size_t alignment,
 bool *zero,
 bool *commit,
 unsigned arena_ind);
 

A chunk allocation function conforms to the chunk_alloc_t type and upon success returns a pointer to size bytes of mapped memory on behalf of arena arena_ind such that the chunk's base address is a @@ -896,8 +1030,10 @@ typedef struct { in absolute terms as on a system that does not overcommit, or in implicit terms as on a system that overcommits and satisfies physical memory needs on demand via soft page faults. Note that replacing the - default chunk allocation function makes the arena's arena.<i>.dss - setting irrelevant.

typedef bool (chunk_dalloc_t)(void *chunk,
 size_t size,
 bool committed,
 unsigned arena_ind);
 

+ default chunk allocation function makes the arena's + "arena.<i>.dss" + + setting irrelevant.

typedef bool (chunk_dalloc_t)(void *chunk,
 size_t size,
 bool committed,
 unsigned arena_ind);
 

A chunk deallocation function conforms to the chunk_dalloc_t type and deallocates a chunk of given size with @@ -906,7 +1042,7 @@ typedef struct { success. If the function returns true, this indicates opt-out from deallocation; the virtual memory mapping associated with the chunk remains mapped, in the same commit state, and available for future use, - in which case it will be automatically retained for later reuse.

typedef bool (chunk_commit_t)(void *chunk,
 size_t size,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk commit function conforms to the + in which case it will be automatically retained for later reuse.

typedef bool (chunk_commit_t)(void *chunk,
 size_t size,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk commit function conforms to the chunk_commit_t type and commits zeroed physical memory to back pages within a chunk of given size at offset bytes, @@ -916,7 +1052,7 @@ typedef struct { does not overcommit, or in implicit terms as on a system that overcommits and satisfies physical memory needs on demand via soft page faults. If the function returns true, this indicates insufficient - physical memory to satisfy the request.

typedef bool (chunk_decommit_t)(void *chunk,
 size_t size,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk decommit function conforms to the + physical memory to satisfy the request.

typedef bool (chunk_decommit_t)(void *chunk,
 size_t size,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk decommit function conforms to the chunk_decommit_t type and decommits any physical memory that is backing pages within a chunk of given size at offset bytes, @@ -925,14 +1061,14 @@ typedef struct { case the pages will be committed via the chunk commit function before being reused. If the function returns true, this indicates opt-out from decommit; the memory remains committed and available for future use, in - which case it will be automatically retained for later reuse.

typedef bool (chunk_purge_t)(void *chunk,
 size_tsize,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk purge function conforms to the chunk_purge_t + which case it will be automatically retained for later reuse.

typedef bool (chunk_purge_t)(void *chunk,
 size_tsize,
 size_t offset,
 size_t length,
 unsigned arena_ind);
 

A chunk purge function conforms to the chunk_purge_t type and optionally discards physical pages within the virtual memory mapping associated with chunk of given size at offset bytes, extending for length on behalf of arena arena_ind, returning false if pages within the purged virtual memory range will be zero-filled the next time they are - accessed.

typedef bool (chunk_split_t)(void *chunk,
 size_t size,
 size_t size_a,
 size_t size_b,
 bool committed,
 unsigned arena_ind);
 

A chunk split function conforms to the chunk_split_t + accessed.

typedef bool (chunk_split_t)(void *chunk,
 size_t size,
 size_t size_a,
 size_t size_b,
 bool committed,
 unsigned arena_ind);
 

A chunk split function conforms to the chunk_split_t type and optionally splits chunk of given size into two adjacent chunks, the first of size_a bytes, and the second of @@ -941,7 +1077,7 @@ typedef struct { behalf of arena arena_ind, returning false upon success. If the function returns true, this indicates that the chunk remains unsplit and therefore should continue to be operated on as a - whole.

typedef bool (chunk_merge_t)(void *chunk_a,
 size_t size_a,
 void *chunk_b,
 size_t size_b,
 bool committed,
 unsigned arena_ind);
 

A chunk merge function conforms to the chunk_merge_t + whole.

typedef bool (chunk_merge_t)(void *chunk_a,
 size_t size_a,
 void *chunk_b,
 size_t size_b,
 bool committed,
 unsigned arena_ind);
 

A chunk merge function conforms to the chunk_merge_t type and optionally merges adjacent chunks, chunk_a of given size_a and chunk_b of given @@ -950,106 +1086,150 @@ typedef struct { behalf of arena arena_ind, returning false upon success. If the function returns true, this indicates that the chunks remain distinct mappings and therefore should continue to be operated on - independently.

- arenas.narenas + independently.

+ + "arenas.narenas" + (unsigned) r- -

Current limit on number of arenas.

- arenas.initialized +

Current limit on number of arenas.

+ + "arenas.initialized" + (bool *) r- -

An array of arenas.narenas +

An array of + "arenas.narenas" + booleans. Each boolean indicates whether the corresponding arena is - initialized.

- arenas.lg_dirty_mult + initialized.

+ + "arenas.lg_dirty_mult" + (ssize_t) rw

Current default per-arena minimum ratio (log base 2) of - active to dirty pages, used to initialize arena.<i>.lg_dirty_mult - during arena creation. See opt.lg_dirty_mult - for additional information.

- arenas.decay_time - (ssize_t) - rw -

Current default per-arena approximate time in seconds - from the creation of a set of unused dirty pages until an equivalent set - of unused dirty pages is purged and/or reused, used to initialize arena.<i>.decay_time - during arena creation. See opt.decay_time for - additional information.

- arenas.quantum + active to dirty pages, used to initialize + "arena.<i>.lg_dirty_mult" + + during arena creation. See + "opt.lg_dirty_mult" + + for additional information.

+ + "arenas.quantum" + (size_t) r- -

Quantum size.

- arenas.page +

Quantum size.

+ + "arenas.page" + (size_t) r- -

Page size.

- arenas.tcache_max +

Page size.

+ + "arenas.tcache_max" + (size_t) r- [--enable-tcache] -

Maximum thread-cached size class.

- arenas.nbins +

Maximum thread-cached size class.

+ + "arenas.nbins" + (unsigned) r- -

Number of bin size classes.

- arenas.nhbins +

Number of bin size classes.

+ + "arenas.nhbins" + (unsigned) r- [--enable-tcache]

Total number of thread cache bin size - classes.

- arenas.bin.<i>.size + classes.

+ + "arenas.bin.<i>.size" + (size_t) r- -

Maximum size supported by size class.

- arenas.bin.<i>.nregs +

Maximum size supported by size class.

+ + "arenas.bin.<i>.nregs" + (uint32_t) r- -

Number of regions per page run.

- arenas.bin.<i>.run_size +

Number of regions per page run.

+ + "arenas.bin.<i>.run_size" + (size_t) r- -

Number of bytes per page run.

- arenas.nlruns +

Number of bytes per page run.

+ + "arenas.nlruns" + (unsigned) r- -

Total number of large size classes.

- arenas.lrun.<i>.size +

Total number of large size classes.

+ + "arenas.lrun.<i>.size" + (size_t) r-

Maximum size supported by this large size - class.

- arenas.nhchunks + class.

+ + "arenas.nhchunks" + (unsigned) r- -

Total number of huge size classes.

- arenas.hchunk.<i>.size +

Total number of huge size classes.

+ + "arenas.hchunk.<i>.size" + (size_t) r-

Maximum size supported by this huge size - class.

- arenas.extend + class.

+ + "arenas.extend" + (unsigned) r-

Extend the array of arenas by appending a new arena, - and returning the new arena index.

- prof.thread_active_init + and returning the new arena index.

+ + "prof.thread_active_init" + (bool) rw [--enable-prof] -

Control the initial setting for thread.prof.active - in newly created threads. See the opt.prof_thread_active_init - option for additional information.

- prof.active +

Control the initial setting for + "thread.prof.active" + + in newly created threads. See the + "opt.prof_thread_active_init" + + option for additional information.

+ + "prof.active" + (bool) rw [--enable-prof]

Control whether sampling is currently active. See the - opt.prof_active - option for additional information, as well as the interrelated thread.prof.active - mallctl.

- prof.dump + + "opt.prof_active" + + option for additional information, as well as the interrelated + "thread.prof.active" + + mallctl.

+ + "prof.dump" + (const char *) -w [--enable-prof] @@ -1057,9 +1237,13 @@ typedef struct { is specified, to a file according to the pattern <prefix>.<pid>.<seq>.m<mseq>.heap, where <prefix> is controlled by the - opt.prof_prefix - option.

- prof.gdump + + "opt.prof_prefix" + + option.

+ + "prof.gdump" + (bool) rw [--enable-prof] @@ -1067,67 +1251,103 @@ typedef struct { the total virtual memory exceeds the previous maximum. Profiles are dumped to files named according to the pattern <prefix>.<pid>.<seq>.u<useq>.heap, - where <prefix> is controlled by the opt.prof_prefix - option.

- prof.reset + where <prefix> is controlled by the + "opt.prof_prefix" + + option.

+ + "prof.reset" + (size_t) -w [--enable-prof]

Reset all memory profile statistics, and optionally - update the sample rate (see opt.lg_prof_sample - and prof.lg_sample). -

- prof.lg_sample + update the sample rate (see + "opt.lg_prof_sample" + + and + "prof.lg_sample" + ). +

+ + "prof.lg_sample" + (size_t) r- [--enable-prof] -

Get the current sample rate (see opt.lg_prof_sample). -

- prof.interval +

Get the current sample rate (see + "opt.lg_prof_sample" + ). +

+ + "prof.interval" + (uint64_t) r- [--enable-prof]

Average number of bytes allocated between - interval-based profile dumps. See the - opt.lg_prof_interval - option for additional information.

- stats.cactive + inverval-based profile dumps. See the + + "opt.lg_prof_interval" + + option for additional information.

+ + "stats.cactive" + (size_t *) r- [--enable-stats]

Pointer to a counter that contains an approximate count of the current number of bytes in active pages. The estimate may be high, but never low, because each arena rounds up when computing its - contribution to the counter. Note that the epoch mallctl has no bearing + contribution to the counter. Note that the + "epoch" + mallctl has no bearing on this counter. Furthermore, counter consistency is maintained via atomic operations, so it is necessary to use an atomic operation in order to guarantee a consistent read when dereferencing the pointer. -

- stats.allocated +

+ + "stats.allocated" + (size_t) r- [--enable-stats]

Total number of bytes allocated by the - application.

- stats.active + application.

+ + "stats.active" + (size_t) r- [--enable-stats]

Total number of bytes in active pages allocated by the application. This is a multiple of the page size, and greater than or - equal to stats.allocated. + equal to + "stats.allocated" + . This does not include - stats.arenas.<i>.pdirty, nor pages - entirely devoted to allocator metadata.

- stats.metadata + + "stats.arenas.<i>.pdirty" + , nor pages + entirely devoted to allocator metadata.

+ + "stats.metadata" + (size_t) r- [--enable-stats]

Total number of bytes dedicated to metadata, which comprise base allocations used for bootstrap-sensitive internal - allocator data structures, arena chunk headers (see stats.arenas.<i>.metadata.mapped), - and internal allocations (see stats.arenas.<i>.metadata.allocated).

- stats.resident + allocator data structures, arena chunk headers (see + "stats.arenas.<i>.metadata.mapped" + ), + and internal allocations (see + "stats.arenas.<i>.metadata.allocated" + ).

+ + "stats.resident" + (size_t) r- [--enable-stats] @@ -1137,393 +1357,436 @@ typedef struct { pages. This is a maximum rather than precise because pages may not actually be physically resident if they correspond to demand-zeroed virtual memory that has not yet been touched. This is a multiple of the - page size, and is larger than stats.active.

- stats.mapped + page size, and is larger than + "stats.active" + .

+ + "stats.mapped" + (size_t) r- [--enable-stats]

Total number of bytes in active chunks mapped by the allocator. This is a multiple of the chunk size, and is larger than - stats.active. + + "stats.active" + . This does not include inactive chunks, even those that contain unused dirty pages, which means that there is no strict ordering between this - and stats.resident.

- stats.retained - (size_t) - r- - [--enable-stats] -

Total number of bytes in virtual memory mappings that - were retained rather than being returned to the operating system via - e.g. munmap(2). Retained virtual memory is - typically untouched, decommitted, or purged, so it has no strongly - associated physical memory (see chunk hooks for details). Retained - memory is excluded from mapped memory statistics, e.g. stats.mapped. -

- stats.arenas.<i>.dss + and + "stats.resident" + .

+ + "stats.arenas.<i>.dss" + (const char *) r-

dss (sbrk(2)) allocation precedence as - related to mmap(2) allocation. See opt.dss for details. -

- stats.arenas.<i>.lg_dirty_mult + related to mmap(2) allocation. See + "opt.dss" + for details. +

+ + "stats.arenas.<i>.lg_dirty_mult" + (ssize_t) r-

Minimum ratio (log base 2) of active to dirty pages. - See opt.lg_dirty_mult - for details.

- stats.arenas.<i>.decay_time - (ssize_t) - r- -

Approximate time in seconds from the creation of a set - of unused dirty pages until an equivalent set of unused dirty pages is - purged and/or reused. See opt.decay_time - for details.

- stats.arenas.<i>.nthreads + See + "opt.lg_dirty_mult" + + for details.

+ + "stats.arenas.<i>.nthreads" + (unsigned) r-

Number of threads currently assigned to - arena.

- stats.arenas.<i>.pactive + arena.

+ + "stats.arenas.<i>.pactive" + (size_t) r- -

Number of pages in active runs.

- stats.arenas.<i>.pdirty +

Number of pages in active runs.

+ + "stats.arenas.<i>.pdirty" + (size_t) r-

Number of pages within unused runs that are potentially - dirty, and for which madvise... - MADV_DONTNEED or - similar has not been called.

- stats.arenas.<i>.mapped + dirty, and for which madvise(..., + MADV_DONTNEED) or + similar has not been called.

+ + "stats.arenas.<i>.mapped" + (size_t) r- [--enable-stats] -

Number of mapped bytes.

- stats.arenas.<i>.retained - (size_t) - r- - [--enable-stats] -

Number of retained bytes. See stats.retained for - details.

- stats.arenas.<i>.metadata.mapped +

Number of mapped bytes.

+ + "stats.arenas.<i>.metadata.mapped" + (size_t) r- [--enable-stats]

Number of mapped bytes in arena chunk headers, which - track the states of the non-metadata pages.

- stats.arenas.<i>.metadata.allocated + track the states of the non-metadata pages.

+ + "stats.arenas.<i>.metadata.allocated" + (size_t) r- [--enable-stats]

Number of bytes dedicated to internal allocations. Internal allocations differ from application-originated allocations in that they are for internal use, and that they are omitted from heap - profiles. This statistic is reported separately from stats.metadata and - stats.arenas.<i>.metadata.mapped - because it overlaps with e.g. the stats.allocated and - stats.active + profiles. This statistic is reported separately from + "stats.metadata" + and + + "stats.arenas.<i>.metadata.mapped" + + because it overlaps with e.g. the + "stats.allocated" + and + + "stats.active" + statistics, whereas the other metadata statistics do - not.

- stats.arenas.<i>.npurge + not.

+ + "stats.arenas.<i>.npurge" + (uint64_t) r- [--enable-stats]

Number of dirty page purge sweeps performed. -

- stats.arenas.<i>.nmadvise +

+ + "stats.arenas.<i>.nmadvise" + (uint64_t) r- [--enable-stats] -

Number of madvise... - MADV_DONTNEED or - similar calls made to purge dirty pages.

- stats.arenas.<i>.purged +

Number of madvise(..., + MADV_DONTNEED) or + similar calls made to purge dirty pages.

+ + "stats.arenas.<i>.purged" + (uint64_t) r- [--enable-stats] -

Number of pages purged.

- stats.arenas.<i>.small.allocated +

Number of pages purged.

+ + "stats.arenas.<i>.small.allocated" + (size_t) r- [--enable-stats]

Number of bytes currently allocated by small objects. -

- stats.arenas.<i>.small.nmalloc +

+ + "stats.arenas.<i>.small.nmalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of allocation requests served by - small bins.

- stats.arenas.<i>.small.ndalloc + small bins.

+ + "stats.arenas.<i>.small.ndalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of small objects returned to bins. -

- stats.arenas.<i>.small.nrequests +

+ + "stats.arenas.<i>.small.nrequests" + (uint64_t) r- [--enable-stats]

Cumulative number of small allocation requests. -

- stats.arenas.<i>.large.allocated +

+ + "stats.arenas.<i>.large.allocated" + (size_t) r- [--enable-stats]

Number of bytes currently allocated by large objects. -

- stats.arenas.<i>.large.nmalloc +

+ + "stats.arenas.<i>.large.nmalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of large allocation requests served - directly by the arena.

- stats.arenas.<i>.large.ndalloc + directly by the arena.

+ + "stats.arenas.<i>.large.ndalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of large deallocation requests served - directly by the arena.

- stats.arenas.<i>.large.nrequests + directly by the arena.

+ + "stats.arenas.<i>.large.nrequests" + (uint64_t) r- [--enable-stats]

Cumulative number of large allocation requests. -

- stats.arenas.<i>.huge.allocated +

+ + "stats.arenas.<i>.huge.allocated" + (size_t) r- [--enable-stats]

Number of bytes currently allocated by huge objects. -

- stats.arenas.<i>.huge.nmalloc +

+ + "stats.arenas.<i>.huge.nmalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of huge allocation requests served - directly by the arena.

- stats.arenas.<i>.huge.ndalloc + directly by the arena.

+ + "stats.arenas.<i>.huge.ndalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of huge deallocation requests served - directly by the arena.

- stats.arenas.<i>.huge.nrequests + directly by the arena.

+ + "stats.arenas.<i>.huge.nrequests" + (uint64_t) r- [--enable-stats]

Cumulative number of huge allocation requests. -

- stats.arenas.<i>.bins.<j>.nmalloc +

+ + "stats.arenas.<i>.bins.<j>.nmalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of allocations served by bin. -

- stats.arenas.<i>.bins.<j>.ndalloc +

+ + "stats.arenas.<i>.bins.<j>.ndalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of allocations returned to bin. -

- stats.arenas.<i>.bins.<j>.nrequests +

+ + "stats.arenas.<i>.bins.<j>.nrequests" + (uint64_t) r- [--enable-stats]

Cumulative number of allocation - requests.

- stats.arenas.<i>.bins.<j>.curregs + requests.

+ + "stats.arenas.<i>.bins.<j>.curregs" + (size_t) r- [--enable-stats]

Current number of regions for this size - class.

- stats.arenas.<i>.bins.<j>.nfills + class.

+ + "stats.arenas.<i>.bins.<j>.nfills" + (uint64_t) r- [--enable-stats --enable-tcache] -

Cumulative number of tcache fills.

- stats.arenas.<i>.bins.<j>.nflushes +

Cumulative number of tcache fills.

+ + "stats.arenas.<i>.bins.<j>.nflushes" + (uint64_t) r- [--enable-stats --enable-tcache] -

Cumulative number of tcache flushes.

- stats.arenas.<i>.bins.<j>.nruns +

Cumulative number of tcache flushes.

+ + "stats.arenas.<i>.bins.<j>.nruns" + (uint64_t) r- [--enable-stats] -

Cumulative number of runs created.

- stats.arenas.<i>.bins.<j>.nreruns +

Cumulative number of runs created.

+ + "stats.arenas.<i>.bins.<j>.nreruns" + (uint64_t) r- [--enable-stats]

Cumulative number of times the current run from which - to allocate changed.

- stats.arenas.<i>.bins.<j>.curruns + to allocate changed.

+ + "stats.arenas.<i>.bins.<j>.curruns" + (size_t) r- [--enable-stats] -

Current number of runs.

- stats.arenas.<i>.lruns.<j>.nmalloc +

Current number of runs.

+ + "stats.arenas.<i>.lruns.<j>.nmalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of allocation requests for this size - class served directly by the arena.

- stats.arenas.<i>.lruns.<j>.ndalloc + class served directly by the arena.

+ + "stats.arenas.<i>.lruns.<j>.ndalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of deallocation requests for this - size class served directly by the arena.

- stats.arenas.<i>.lruns.<j>.nrequests + size class served directly by the arena.

+ + "stats.arenas.<i>.lruns.<j>.nrequests" + (uint64_t) r- [--enable-stats]

Cumulative number of allocation requests for this size - class.

- stats.arenas.<i>.lruns.<j>.curruns + class.

+ + "stats.arenas.<i>.lruns.<j>.curruns" + (size_t) r- [--enable-stats]

Current number of runs for this size class. -

- stats.arenas.<i>.hchunks.<j>.nmalloc +

+ + "stats.arenas.<i>.hchunks.<j>.nmalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of allocation requests for this size - class served directly by the arena.

- stats.arenas.<i>.hchunks.<j>.ndalloc + class served directly by the arena.

+ + "stats.arenas.<i>.hchunks.<j>.ndalloc" + (uint64_t) r- [--enable-stats]

Cumulative number of deallocation requests for this - size class served directly by the arena.

- stats.arenas.<i>.hchunks.<j>.nrequests + size class served directly by the arena.

+ + "stats.arenas.<i>.hchunks.<j>.nrequests" + (uint64_t) r- [--enable-stats]

Cumulative number of allocation requests for this size - class.

- stats.arenas.<i>.hchunks.<j>.curhchunks + class.

+ + "stats.arenas.<i>.hchunks.<j>.curhchunks" + (size_t) r- [--enable-stats]

Current number of huge allocations for this size class. -

HEAP PROFILE FORMAT

Although the heap profiling functionality was originally designed to - be compatible with the - pprof command that is developed as part of the gperftools - package, the addition of per thread heap profiling functionality - required a different heap profile format. The jeprof - command is derived from pprof, with enhancements to - support the heap profile format described here.

In the following hypothetical heap profile, [...] - indicates elision for the sake of compactness.

-heap_v2/524288
-  t*: 28106: 56637512 [0: 0]
-  [...]
-  t3: 352: 16777344 [0: 0]
-  [...]
-  t99: 17754: 29341640 [0: 0]
-  [...]
-@ 0x5f86da8 0x5f5a1dc [...] 0x29e4d4e 0xa200316 0xabb2988 [...]
-  t*: 13: 6688 [0: 0]
-  t3: 12: 6496 [0: ]
-  t99: 1: 192 [0: 0]
-[...]
-
-MAPPED_LIBRARIES:
-[...]

The following matches the above heap profile, but most -tokens are replaced with <description> to indicate -descriptions of the corresponding fields.

-<heap_profile_format_version>/<mean_sample_interval>
-  <aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
-  [...]
-  <thread_3_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
-  [...]
-  <thread_99_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
-  [...]
-@ <top_frame> <frame> [...] <frame> <frame> <frame> [...]
-  <backtrace_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
-  <backtrace_thread_3>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
-  <backtrace_thread_99>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
-[...]
-
-MAPPED_LIBRARIES:
-</proc/<pid>/maps>

DEBUGGING MALLOC PROBLEMS

When debugging, it is a good idea to configure/build jemalloc with the --enable-debug and --enable-fill options, and recompile the program with suitable options and symbols for debugger support. When so configured, jemalloc incorporates a wide variety of run-time assertions that catch application errors such as double-free, - write-after-free, etc.

Programs often accidentally depend on “uninitialized†+ write-after-free, etc.

Programs often accidentally depend on “uninitialized” memory actually being filled with zero bytes. Junk filling - (see the opt.junk + (see the + "opt.junk" + option) tends to expose such bugs in the form of obviously incorrect results and/or coredumps. Conversely, zero - filling (see the opt.zero option) eliminates + filling (see the + "opt.zero" + option) eliminates the symptoms of such bugs. Between these two options, it is usually possible to quickly detect, diagnose, and eliminate such bugs.

This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive. However, jemalloc does integrate with the most excellent Valgrind tool if the - --enable-valgrind configuration option is enabled.

DIAGNOSTIC MESSAGES

If any of the memory allocation/deallocation functions detect an error or warning condition, a message will be printed to file descriptor STDERR_FILENO. Errors will result in the process - dumping core. If the opt.abort option is set, most + dumping core. If the + "opt.abort" + option is set, most warnings are treated as errors.

The malloc_message variable allows the programmer to override the function which emits the text strings forming the errors and warnings if for some reason the STDERR_FILENO file descriptor is not suitable for this. - malloc_message() takes the + malloc_message() takes the cbopaque pointer argument that is NULL unless overridden by the arguments in a call to - malloc_stats_print(), followed by a string + malloc_stats_print(), followed by a string pointer. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock.

All messages are prefixed by - “<jemalloc>: â€.

RETURN VALUES

RETURN VALUES

Standard API

The malloc() and + calloc() functions return a pointer to the allocated memory if successful; otherwise a NULL pointer is returned and errno is set to - ENOMEM.

The posix_memalign() function + ENOMEM.

The posix_memalign() function returns the value 0 if successful; otherwise it returns an error value. - The posix_memalign() function will fail + The posix_memalign() function will fail if:

EINVAL

The alignment parameter is not a power of 2 at least as large as sizeof(void *).

ENOMEM

Memory allocation error.

-

The aligned_alloc() function returns +

The aligned_alloc() function returns a pointer to the allocated memory if successful; otherwise a NULL pointer is returned and errno is set. The - aligned_alloc() function will fail if: + aligned_alloc() function will fail if:

EINVAL

The alignment parameter is not a power of 2.

ENOMEM

Memory allocation error.

-

The realloc() function returns a +

The realloc() function returns a pointer, possibly identical to ptr, to the allocated memory if successful; otherwise a NULL pointer is returned, and errno is set to ENOMEM if the error was the result of an - allocation failure. The realloc() + allocation failure. The realloc() function always leaves the original buffer intact when an error occurs. -

The free() function returns no - value.

Non-standard API

The mallocx() and + rallocx() functions return a pointer to the allocated memory if successful; otherwise a NULL pointer is returned to indicate insufficient contiguous memory was - available to service the allocation request.

The xallocx() function returns the + available to service the allocation request.

The xallocx() function returns the real size of the resulting resized allocation pointed to by ptr, which is a value less than size if the allocation could not be adequately - grown in place.

The sallocx() function returns the + grown in place.

The sallocx() function returns the real size of the allocation pointed to by ptr. -

The nallocx() returns the real size +

The nallocx() returns the real size that would result from a successful equivalent - mallocx() function call, or zero if - insufficient memory is available to perform the size computation.

The mallctl(), - mallctlnametomib(), and - mallctlbymib() functions return 0 on + mallocx() function call, or zero if + insufficient memory is available to perform the size computation.

The mallctl(), + mallctlnametomib(), and + mallctlbymib() functions return 0 on success; otherwise they return an error value. The functions will fail if:

EINVAL

newp is not @@ -1535,28 +1798,28 @@ MAPPED_LIBRARIES: value.

EPERM

Attempt to read or write void value, or attempt to write read-only value.

EAGAIN

A memory allocation failure occurred.

EFAULT

An interface with side effects failed in some way - not directly related to mallctl*() + not directly related to mallctl*() read/write processing.

-

The malloc_usable_size() function +

The malloc_usable_size() function returns the usable size of the allocation pointed to by - ptr.

ENVIRONMENT

The following environment variable affects the execution of the allocation functions:

MALLOC_CONF

If the environment variable MALLOC_CONF is set, the characters it contains will be interpreted as options.

-

EXAMPLES

To dump core whenever a problem occurs:

ln -s 'abort:true' /etc/malloc.conf

To specify in the source a chunk size that is 16 MiB:

-malloc_conf = "lg_chunk:24";

SEE ALSO

madvise(2), mmap(2), sbrk(2), utrace(2), alloca(3), atexit(3), - getpagesize(3)