From 46b9b4b4684194d7e0375e8f954c8d20449e96b5 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 2 Jan 2012 15:24:32 -0800 Subject: [PATCH 01/70] string2* functions take a const pointer --- src/util.c | 6 +++--- src/util.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/util.c b/src/util.c index f5a23af2a..f7437e1c2 100644 --- a/src/util.c +++ b/src/util.c @@ -209,8 +209,8 @@ int ll2string(char *s, size_t len, long long value) { /* Convert a string into a long long. Returns 1 if the string could be parsed * into a (non-overflowing) long long, 0 otherwise. The value will be set to * the parsed value when appropriate. */ -int string2ll(char *s, size_t slen, long long *value) { - char *p = s; +int string2ll(const char *s, size_t slen, long long *value) { + const char *p = s; size_t plen = 0; int negative = 0; unsigned long long v; @@ -275,7 +275,7 @@ int string2ll(char *s, size_t slen, long long *value) { /* Convert a string into a long. Returns 1 if the string could be parsed into a * (non-overflowing) long, 0 otherwise. The value will be set to the parsed * value when appropriate. */ -int string2l(char *s, size_t slen, long *lval) { +int string2l(const char *s, size_t slen, long *lval) { long long llval; if (!string2ll(s,slen,&llval)) diff --git a/src/util.h b/src/util.h index b897a89e7..59dd10ac7 100644 --- a/src/util.h +++ b/src/util.h @@ -5,8 +5,8 @@ int stringmatchlen(const char *p, int plen, const char *s, int slen, int nocase) int stringmatch(const char *p, const char *s, int nocase); long long memtoll(const char *p, int *err); int ll2string(char *s, size_t len, long long value); -int string2ll(char *s, size_t slen, long long *value); -int string2l(char *s, size_t slen, long *value); +int string2ll(const char *s, size_t slen, long long *value); +int string2l(const char *s, size_t slen, long *value); int d2string(char *buf, size_t len, double value); #endif From ae204e542878f3c7ba572af1d37215d3f811c1a3 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 2 Jan 2012 22:14:10 -0800 Subject: [PATCH 02/70] Encode small hashes with a ziplist --- src/aof.c | 78 ++--- src/config.c | 30 +- src/object.c | 12 +- src/rdb.c | 139 ++++++--- src/rdb.h | 1 + src/redis.c | 4 +- src/redis.h | 24 +- src/t_hash.c | 619 +++++++++++++++++++++++++++------------ tests/unit/aofrw.tcl | 4 +- tests/unit/type/hash.tcl | 10 +- 10 files changed, 603 insertions(+), 318 deletions(-) diff --git a/src/aof.c b/src/aof.c index 25d6f454f..742af9052 100644 --- a/src/aof.c +++ b/src/aof.c @@ -607,53 +607,55 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) { return 1; } +static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) { + if (hi->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll); + if (vstr) { + return rioWriteBulkString(r, (char*)vstr, vlen); + } else { + return rioWriteBulkLongLong(r, vll); + } + + } else if (hi->encoding == REDIS_ENCODING_HT) { + robj *value; + + hashTypeCurrentFromHashTable(hi, what, &value); + return rioWriteBulkObject(r, value); + } + + redisPanic("Unknown hash encoding"); + return 0; +} + /* Emit the commands needed to rebuild a hash object. * The function returns 0 on error, 1 on success. */ int rewriteHashObject(rio *r, robj *key, robj *o) { + hashTypeIterator *hi; long long count = 0, items = hashTypeLength(o); - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - unsigned char *p = zipmapRewind(o->ptr); - unsigned char *field, *val; - unsigned int flen, vlen; + hi = hashTypeInitIterator(o); + while (hashTypeNext(hi) != REDIS_ERR) { + if (count == 0) { + int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ? + REDIS_AOF_REWRITE_ITEMS_PER_CMD : items; - while((p = zipmapNext(p,&field,&flen,&val,&vlen)) != NULL) { - if (count == 0) { - int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ? - REDIS_AOF_REWRITE_ITEMS_PER_CMD : items; - - if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0; - if (rioWriteBulkString(r,"HMSET",5) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - } - if (rioWriteBulkString(r,(char*)field,flen) == 0) return 0; - if (rioWriteBulkString(r,(char*)val,vlen) == 0) return 0; - if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; + if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0; + if (rioWriteBulkString(r,"HMSET",5) == 0) return 0; + if (rioWriteBulkObject(r,key) == 0) return 0; } - } else { - dictIterator *di = dictGetIterator(o->ptr); - dictEntry *de; - while((de = dictNext(di)) != NULL) { - robj *field = dictGetKey(de); - robj *val = dictGetVal(de); - - if (count == 0) { - int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ? - REDIS_AOF_REWRITE_ITEMS_PER_CMD : items; - - if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0; - if (rioWriteBulkString(r,"HMSET",5) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - } - if (rioWriteBulkObject(r,field) == 0) return 0; - if (rioWriteBulkObject(r,val) == 0) return 0; - if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; - } - dictReleaseIterator(di); + if (rioWriteHashIteratorCursor(r, hi, REDIS_HASH_KEY) == 0) return 0; + if (rioWriteHashIteratorCursor(r, hi, REDIS_HASH_VALUE) == 0) return 0; + if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0; + items--; } + + hashTypeReleaseIterator(hi); + return 1; } diff --git a/src/config.c b/src/config.c index 4a25489a6..26bb2ff5e 100644 --- a/src/config.c +++ b/src/config.c @@ -259,9 +259,15 @@ void loadServerConfigFromString(char *config) { zfree(server.rdb_filename); server.rdb_filename = zstrdup(argv[1]); } else if (!strcasecmp(argv[0],"hash-max-zipmap-entries") && argc == 2) { - server.hash_max_zipmap_entries = memtoll(argv[1], NULL); + redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]); + server.hash_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"hash-max-zipmap-value") && argc == 2) { - server.hash_max_zipmap_value = memtoll(argv[1], NULL); + redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]); + server.hash_max_ziplist_value = memtoll(argv[1], NULL); + } else if (!strcasecmp(argv[0],"hash-max-ziplist-entries") && argc == 2) { + server.hash_max_ziplist_entries = memtoll(argv[1], NULL); + } else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) { + server.hash_max_ziplist_value = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){ server.list_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2) { @@ -491,12 +497,12 @@ void configSetCommand(redisClient *c) { addReplyErrorFormat(c,"Changing directory: %s", strerror(errno)); return; } - } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-zipmap-entries")) { + } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-entries")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; - server.hash_max_zipmap_entries = ll; - } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-zipmap-value")) { + server.hash_max_ziplist_entries = ll; + } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-value")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; - server.hash_max_zipmap_value = ll; + server.hash_max_ziplist_value = ll; } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-entries")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; server.list_max_ziplist_entries = ll; @@ -668,14 +674,14 @@ void configGetCommand(redisClient *c) { addReplyBulkCString(c,server.repl_serve_stale_data ? "yes" : "no"); matches++; } - if (stringmatch(pattern,"hash-max-zipmap-entries",0)) { - addReplyBulkCString(c,"hash-max-zipmap-entries"); - addReplyBulkLongLong(c,server.hash_max_zipmap_entries); + if (stringmatch(pattern,"hash-max-ziplist-entries",0)) { + addReplyBulkCString(c,"hash-max-ziplist-entries"); + addReplyBulkLongLong(c,server.hash_max_ziplist_entries); matches++; } - if (stringmatch(pattern,"hash-max-zipmap-value",0)) { - addReplyBulkCString(c,"hash-max-zipmap-value"); - addReplyBulkLongLong(c,server.hash_max_zipmap_value); + if (stringmatch(pattern,"hash-max-ziplist-value",0)) { + addReplyBulkCString(c,"hash-max-ziplist-value"); + addReplyBulkLongLong(c,server.hash_max_ziplist_value); matches++; } if (stringmatch(pattern,"list-max-ziplist-entries",0)) { diff --git a/src/object.c b/src/object.c index 0711afed7..ccb072085 100644 --- a/src/object.c +++ b/src/object.c @@ -95,12 +95,9 @@ robj *createIntsetObject(void) { } robj *createHashObject(void) { - /* All the Hashes start as zipmaps. Will be automatically converted - * into hash tables if there are enough elements or big elements - * inside. */ - unsigned char *zm = zipmapNew(); - robj *o = createObject(REDIS_HASH,zm); - o->encoding = REDIS_ENCODING_ZIPMAP; + unsigned char *zl = ziplistNew(); + robj *o = createObject(REDIS_HASH, zl); + o->encoding = REDIS_ENCODING_ZIPLIST; return o; } @@ -176,7 +173,7 @@ void freeHashObject(robj *o) { case REDIS_ENCODING_HT: dictRelease((dict*) o->ptr); break; - case REDIS_ENCODING_ZIPMAP: + case REDIS_ENCODING_ZIPLIST: zfree(o->ptr); break; default: @@ -492,7 +489,6 @@ char *strEncoding(int encoding) { case REDIS_ENCODING_RAW: return "raw"; case REDIS_ENCODING_INT: return "int"; case REDIS_ENCODING_HT: return "hashtable"; - case REDIS_ENCODING_ZIPMAP: return "zipmap"; case REDIS_ENCODING_LINKEDLIST: return "linkedlist"; case REDIS_ENCODING_ZIPLIST: return "ziplist"; case REDIS_ENCODING_INTSET: return "intset"; diff --git a/src/rdb.c b/src/rdb.c index 77e2a0480..c74ec41c5 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1,5 +1,6 @@ #include "redis.h" #include "lzf.h" /* LZF compression library */ +#include "zipmap.h" #include #include @@ -424,8 +425,8 @@ int rdbSaveObjectType(rio *rdb, robj *o) { else redisPanic("Unknown sorted set encoding"); case REDIS_HASH: - if (o->encoding == REDIS_ENCODING_ZIPMAP) - return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH_ZIPMAP); + if (o->encoding == REDIS_ENCODING_ZIPLIST) + return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH_ZIPLIST); else if (o->encoding == REDIS_ENCODING_HT) return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH); else @@ -530,12 +531,13 @@ int rdbSaveObject(rio *rdb, robj *o) { } } else if (o->type == REDIS_HASH) { /* Save a hash value */ - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - size_t l = zipmapBlobLen((unsigned char*)o->ptr); + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + size_t l = ziplistBlobLen((unsigned char*)o->ptr); if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1; nwritten += n; - } else { + + } else if (o->encoding == REDIS_ENCODING_HT) { dictIterator *di = dictGetIterator(o->ptr); dictEntry *de; @@ -552,7 +554,11 @@ int rdbSaveObject(rio *rdb, robj *o) { nwritten += n; } dictReleaseIterator(di); + + } else { + redisPanic("Unknown hash encoding"); } + } else { redisPanic("Unknown object type"); } @@ -824,55 +830,69 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { maxelelen <= server.zset_max_ziplist_value) zsetConvert(o,REDIS_ENCODING_ZIPLIST); } else if (rdbtype == REDIS_RDB_TYPE_HASH) { - size_t hashlen; + size_t len; + int ret; + + len = rdbLoadLen(rdb, NULL); + if (len == REDIS_RDB_LENERR) return NULL; - if ((hashlen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL; o = createHashObject(); + /* Too many entries? Use an hash table. */ - if (hashlen > server.hash_max_zipmap_entries) - convertToRealHash(o); - /* Load every key/value, then set it into the zipmap or hash - * table, as needed. */ - while(hashlen--) { - robj *key, *val; + if (len > server.hash_max_ziplist_entries) + hashTypeConvert(o, REDIS_ENCODING_HT); - if ((key = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL; - if ((val = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL; - /* If we are using a zipmap and there are too big values - * the object is converted to real hash table encoding. */ - if (o->encoding != REDIS_ENCODING_HT && - ((key->encoding == REDIS_ENCODING_RAW && - sdslen(key->ptr) > server.hash_max_zipmap_value) || - (val->encoding == REDIS_ENCODING_RAW && - sdslen(val->ptr) > server.hash_max_zipmap_value))) + /* Load every field and value into the ziplist */ + while (o->encoding == REDIS_ENCODING_ZIPLIST && len-- > 0) { + robj *field, *value; + + /* Load raw strings */ + field = rdbLoadStringObject(rdb); + if (field == NULL) return NULL; + redisAssert(field->encoding == REDIS_ENCODING_RAW); + value = rdbLoadStringObject(rdb); + if (value == NULL) return NULL; + redisAssert(field->encoding == REDIS_ENCODING_RAW); + + /* Convert to hash table if size threshold is exceeded */ + if (sdslen(field->ptr) > server.hash_max_ziplist_value || + sdslen(value->ptr) > server.hash_max_ziplist_value) { - convertToRealHash(o); + hashTypeConvert(o, REDIS_ENCODING_HT); + break; } - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - unsigned char *zm = o->ptr; - robj *deckey, *decval; - - /* We need raw string objects to add them to the zipmap */ - deckey = getDecodedObject(key); - decval = getDecodedObject(val); - zm = zipmapSet(zm,deckey->ptr,sdslen(deckey->ptr), - decval->ptr,sdslen(decval->ptr),NULL); - o->ptr = zm; - decrRefCount(deckey); - decrRefCount(decval); - decrRefCount(key); - decrRefCount(val); - } else { - key = tryObjectEncoding(key); - val = tryObjectEncoding(val); - dictAdd((dict*)o->ptr,key,val); - } + /* Add pair to ziplist */ + o->ptr = ziplistPush(o->ptr, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); + o->ptr = ziplistPush(o->ptr, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); } + + /* Load remaining fields and values into the hash table */ + while (o->encoding == REDIS_ENCODING_HT && len-- > 0) { + robj *field, *value; + + /* Load encoded strings */ + field = rdbLoadEncodedStringObject(rdb); + if (field == NULL) return NULL; + value = rdbLoadEncodedStringObject(rdb); + if (value == NULL) return NULL; + + field = tryObjectEncoding(field); + value = tryObjectEncoding(value); + + /* Add pair to hash table */ + ret = dictAdd((dict*)o->ptr, field, value); + redisAssert(ret == REDIS_OK); + } + + /* All pairs should be read by now */ + redisAssert(len == 0); + } else if (rdbtype == REDIS_RDB_TYPE_HASH_ZIPMAP || rdbtype == REDIS_RDB_TYPE_LIST_ZIPLIST || rdbtype == REDIS_RDB_TYPE_SET_INTSET || - rdbtype == REDIS_RDB_TYPE_ZSET_ZIPLIST) + rdbtype == REDIS_RDB_TYPE_ZSET_ZIPLIST || + rdbtype == REDIS_RDB_TYPE_HASH_ZIPLIST) { robj *aux = rdbLoadStringObject(rdb); @@ -890,10 +910,29 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { * converted. */ switch(rdbtype) { case REDIS_RDB_TYPE_HASH_ZIPMAP: - o->type = REDIS_HASH; - o->encoding = REDIS_ENCODING_ZIPMAP; - if (zipmapLen(o->ptr) > server.hash_max_zipmap_entries) - convertToRealHash(o); + /* Convert to ziplist encoded hash. This must be deprecated + * when loading dumps created by Redis 2.4 gets deprecated. */ + { + unsigned char *zl = ziplistNew(); + unsigned char *zi = zipmapRewind(o->ptr); + + while (zi != NULL) { + unsigned char *fstr, *vstr; + unsigned int flen, vlen; + + zi = zipmapNext(zi, &fstr, &flen, &vstr, &vlen); + zl = ziplistPush(zl, fstr, flen, ZIPLIST_TAIL); + zl = ziplistPush(zl, vstr, vlen, ZIPLIST_TAIL); + } + + zfree(o->ptr); + o->ptr = zl; + o->type = REDIS_HASH; + o->encoding = REDIS_ENCODING_ZIPLIST; + + if (hashTypeLength(o) > server.hash_max_ziplist_entries) + hashTypeConvert(o, REDIS_ENCODING_HT); + } break; case REDIS_RDB_TYPE_LIST_ZIPLIST: o->type = REDIS_LIST; @@ -913,6 +952,12 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { if (zsetLength(o) > server.zset_max_ziplist_entries) zsetConvert(o,REDIS_ENCODING_SKIPLIST); break; + case REDIS_RDB_TYPE_HASH_ZIPLIST: + o->type = REDIS_HASH; + o->encoding = REDIS_ENCODING_ZIPLIST; + if (hashTypeLength(o) > server.hash_max_ziplist_entries) + hashTypeConvert(o, REDIS_ENCODING_HT); + break; default: redisPanic("Unknown encoding"); break; diff --git a/src/rdb.h b/src/rdb.h index 827947b4a..45beaa93a 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -47,6 +47,7 @@ #define REDIS_RDB_TYPE_LIST_ZIPLIST 10 #define REDIS_RDB_TYPE_SET_INTSET 11 #define REDIS_RDB_TYPE_ZSET_ZIPLIST 12 +#define REDIS_RDB_TYPE_HASH_ZIPLIST 13 /* Test if a type is an object type. */ #define rdbIsObjectType(t) ((t >= 0 && t <= 4) || (t >= 9 && t <= 12)) diff --git a/src/redis.c b/src/redis.c index 091461159..7de52c87a 100644 --- a/src/redis.c +++ b/src/redis.c @@ -895,8 +895,8 @@ void initServerConfig() { server.maxmemory = 0; server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU; server.maxmemory_samples = 3; - server.hash_max_zipmap_entries = REDIS_HASH_MAX_ZIPMAP_ENTRIES; - server.hash_max_zipmap_value = REDIS_HASH_MAX_ZIPMAP_VALUE; + server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES; + server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE; server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES; server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE; server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES; diff --git a/src/redis.h b/src/redis.h index aa79b4ada..7a27e56b9 100644 --- a/src/redis.h +++ b/src/redis.h @@ -27,7 +27,6 @@ #include "adlist.h" /* Linked lists */ #include "zmalloc.h" /* total memory usage aware version of malloc/free */ #include "anet.h" /* Networking the easy way */ -#include "zipmap.h" /* Compact string -> string data structure */ #include "ziplist.h" /* Compact list data structure */ #include "intset.h" /* Compact integer set structure */ #include "version.h" /* Version macro */ @@ -194,8 +193,8 @@ #define AOF_FSYNC_EVERYSEC 2 /* Zip structure related defaults */ -#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 512 -#define REDIS_HASH_MAX_ZIPMAP_VALUE 64 +#define REDIS_HASH_MAX_ZIPLIST_ENTRIES 512 +#define REDIS_HASH_MAX_ZIPLIST_VALUE 64 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 512 #define REDIS_LIST_MAX_ZIPLIST_VALUE 64 #define REDIS_SET_MAX_INTSET_ENTRIES 512 @@ -610,8 +609,8 @@ struct redisServer { int sort_alpha; int sort_bypattern; /* Zip structure config, see redis.conf for more information */ - size_t hash_max_zipmap_entries; - size_t hash_max_zipmap_value; + size_t hash_max_ziplist_entries; + size_t hash_max_ziplist_value; size_t list_max_ziplist_entries; size_t list_max_ziplist_value; size_t set_max_intset_entries; @@ -715,10 +714,10 @@ typedef struct { * not both are required, store pointers in the iterator to avoid * unnecessary memory allocation for fields/values. */ typedef struct { + robj *subject; int encoding; - unsigned char *zi; - unsigned char *zk, *zv; - unsigned int zklen, zvlen; + + unsigned char *fptr, *vptr; dictIterator *di; dictEntry *de; @@ -934,10 +933,9 @@ unsigned long setTypeSize(robj *subject); void setTypeConvert(robj *subject, int enc); /* Hash data type */ -void convertToRealHash(robj *o); +void hashTypeConvert(robj *o, int enc); void hashTypeTryConversion(robj *subject, robj **argv, int start, int end); void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2); -int hashTypeGet(robj *o, robj *key, robj **objval, unsigned char **v, unsigned int *vlen); robj *hashTypeGetObject(robj *o, robj *key); int hashTypeExists(robj *o, robj *key); int hashTypeSet(robj *o, robj *key, robj *value); @@ -946,7 +944,11 @@ unsigned long hashTypeLength(robj *o); hashTypeIterator *hashTypeInitIterator(robj *subject); void hashTypeReleaseIterator(hashTypeIterator *hi); int hashTypeNext(hashTypeIterator *hi); -int hashTypeCurrent(hashTypeIterator *hi, int what, robj **objval, unsigned char **v, unsigned int *vlen); +void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what, + unsigned char **vstr, + unsigned int *vlen, + long long *vll); +void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, robj **dst); robj *hashTypeCurrentObject(hashTypeIterator *hi, int what); robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key); diff --git a/src/t_hash.c b/src/t_hash.c index 8ee5485c1..9699223a8 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1,5 +1,4 @@ #include "redis.h" - #include /*----------------------------------------------------------------------------- @@ -7,18 +6,19 @@ *----------------------------------------------------------------------------*/ /* Check the length of a number of objects to see if we need to convert a - * zipmap to a real hash. Note that we only check string encoded objects + * ziplist to a real hash. Note that we only check string encoded objects * as their string length can be queried in constant time. */ -void hashTypeTryConversion(robj *subject, robj **argv, int start, int end) { +void hashTypeTryConversion(robj *o, robj **argv, int start, int end) { int i; - if (subject->encoding != REDIS_ENCODING_ZIPMAP) return; + + if (o->encoding != REDIS_ENCODING_ZIPLIST) return; for (i = start; i <= end; i++) { if (argv[i]->encoding == REDIS_ENCODING_RAW && - sdslen(argv[i]->ptr) > server.hash_max_zipmap_value) + sdslen(argv[i]->ptr) > server.hash_max_ziplist_value) { - convertToRealHash(subject); - return; + hashTypeConvert(o, REDIS_ENCODING_HT); + break; } } } @@ -31,137 +31,262 @@ void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2) { } } -/* Get the value from a hash identified by key. - * - * If the string is found either REDIS_ENCODING_HT or REDIS_ENCODING_ZIPMAP - * is returned, and either **objval or **v and *vlen are set accordingly, - * so that objects in hash tables are returend as objects and pointers - * inside a zipmap are returned as such. - * - * If the object was not found -1 is returned. - * - * This function is copy on write friendly as there is no incr/decr - * of refcount needed if objects are accessed just for reading operations. */ -int hashTypeGet(robj *o, robj *key, robj **objval, unsigned char **v, - unsigned int *vlen) +/* Get the value from a ziplist encoded hash, identified by field. + * Returns -1 when the field cannot be found. */ +int hashTypeGetFromZiplist(robj *o, robj *field, + unsigned char **vstr, + unsigned int *vlen, + long long *vll) { - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - int found; + unsigned char *zl, *fptr = NULL, *vptr = NULL; + int ret; - key = getDecodedObject(key); - found = zipmapGet(o->ptr,key->ptr,sdslen(key->ptr),v,vlen); - decrRefCount(key); - if (!found) return -1; - } else { - dictEntry *de = dictFind(o->ptr,key); - if (de == NULL) return -1; - *objval = dictGetVal(de); + redisAssert(o->encoding == REDIS_ENCODING_ZIPLIST); + + field = getDecodedObject(field); + + zl = o->ptr; + fptr = ziplistIndex(zl, ZIPLIST_HEAD); + while (fptr != NULL) { + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); + + /* Compare field in ziplist with specified field */ + if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { + break; + } + + /* Skip over value */ + fptr = ziplistNext(zl, vptr); } - return o->encoding; + + decrRefCount(field); + + if (fptr != NULL) { + ret = ziplistGet(vptr, vstr, vlen, vll); + redisAssert(ret); + return 0; + } + + return -1; } -/* Higher level function of hashTypeGet() that always returns a Redis +/* Get the value from a hash table encoded hash, identified by field. + * Returns -1 when the field cannot be found. */ +int hashTypeGetFromHashTable(robj *o, robj *field, robj **value) { + dictEntry *de; + + redisAssert(o->encoding == REDIS_ENCODING_HT); + + de = dictFind(o->ptr, field); + if (de == NULL) { + return -1; + } + + *value = dictGetVal(de); + return 0; +} + +/* Higher level function of hashTypeGet*() that always returns a Redis * object (either new or with refcount incremented), so that the caller * can retain a reference or call decrRefCount after the usage. * * The lower level function can prevent copy on write so it is * the preferred way of doing read operations. */ -robj *hashTypeGetObject(robj *o, robj *key) { - robj *objval; - unsigned char *v; - unsigned int vlen; +robj *hashTypeGetObject(robj *o, robj *field) { + robj *value = NULL; - int encoding = hashTypeGet(o,key,&objval,&v,&vlen); - switch(encoding) { - case REDIS_ENCODING_HT: - incrRefCount(objval); - return objval; - case REDIS_ENCODING_ZIPMAP: - objval = createStringObject((char*)v,vlen); - return objval; - default: return NULL; + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) { + if (vstr) { + value = createStringObject((char*)vstr, vlen); + } else { + value = createStringObjectFromLongLong(vll); + } + } + + } else if (o->encoding == REDIS_ENCODING_HT) { + robj *aux; + + if (hashTypeGetFromHashTable(o, field, &aux) == 0) { + incrRefCount(aux); + value = aux; + } + + } else { + redisPanic("Unknown hash encoding"); } + + return value; } -/* Test if the key exists in the given hash. Returns 1 if the key - * exists and 0 when it doesn't. */ -int hashTypeExists(robj *o, robj *key) { - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - key = getDecodedObject(key); - if (zipmapExists(o->ptr,key->ptr,sdslen(key->ptr))) { - decrRefCount(key); +/* Test if the specified field exists in the given hash. Returns 1 if the field + * exists, and 0 when it doesn't. */ +int hashTypeExists(robj *o, robj *field) { + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) { return 1; } - decrRefCount(key); + + } else if (o->encoding == REDIS_ENCODING_HT) { + robj *aux; + + if (hashTypeGetFromHashTable(o, field, &aux) == 0) { + return 1; + } + } else { - if (dictFind(o->ptr,key) != NULL) { - return 1; - } + redisPanic("Unknown hash encoding"); } + return 0; } /* Add an element, discard the old if the key already exists. * Return 0 on insert and 1 on update. */ -int hashTypeSet(robj *o, robj *key, robj *value) { +int hashTypeSet(robj *o, robj *field, robj *value) { int update = 0; - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - key = getDecodedObject(key); + + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *zl, *fptr, *vptr; + + field = getDecodedObject(field); value = getDecodedObject(value); - o->ptr = zipmapSet(o->ptr, - key->ptr,sdslen(key->ptr), - value->ptr,sdslen(value->ptr), &update); - decrRefCount(key); + + zl = o->ptr; + fptr = ziplistIndex(zl, ZIPLIST_HEAD); + while (fptr != NULL) { + /* Compare field in ziplist with specified field */ + if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { + zl = ziplistDelete(zl,&fptr); + zl = ziplistDelete(zl,&fptr); + o->ptr = zl; + update = 1; + break; + } + + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); + + /* Grab pointer (if any) to the next field */ + fptr = ziplistNext(zl, vptr); + } + + /* Push new field/value pair onto the tail of the ziplist */ + zl = ziplistPush(zl, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); + zl = ziplistPush(zl, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); + o->ptr = zl; + + decrRefCount(field); decrRefCount(value); - /* Check if the zipmap needs to be upgraded to a real hash table */ - if (zipmapLen(o->ptr) > server.hash_max_zipmap_entries) - convertToRealHash(o); - } else { - if (dictReplace(o->ptr,key,value)) { - /* Insert */ - incrRefCount(key); - } else { - /* Update */ + /* Check if the ziplist needs to be converted to a hash table */ + if (hashTypeLength(o) > server.hash_max_ziplist_entries) { + hashTypeConvert(o, REDIS_ENCODING_HT); + } + + } else if (o->encoding == REDIS_ENCODING_HT) { + if (dictReplace(o->ptr, field, value)) { /* Insert */ + incrRefCount(field); + } else { /* Update */ update = 1; } + incrRefCount(value); + + } else { + redisPanic("Unknown hash encoding"); } + return update; } /* Delete an element from a hash. * Return 1 on deleted and 0 on not found. */ -int hashTypeDelete(robj *o, robj *key) { +int hashTypeDelete(robj *o, robj *field) { int deleted = 0; - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - key = getDecodedObject(key); - o->ptr = zipmapDel(o->ptr,key->ptr,sdslen(key->ptr), &deleted); - decrRefCount(key); + + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *zl, *fptr, *vptr; + + field = getDecodedObject(field); + + zl = o->ptr; + fptr = ziplistIndex(zl, ZIPLIST_HEAD); + while (fptr != NULL) { + /* Compare field in ziplist with specified field */ + if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { + zl = ziplistDelete(zl,&fptr); + zl = ziplistDelete(zl,&fptr); + o->ptr = zl; + deleted = 1; + break; + } + + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); + + /* Grab pointer (if any) to the next field */ + fptr = ziplistNext(zl, vptr); + } + + decrRefCount(field); + + } else if (o->encoding == REDIS_ENCODING_HT) { + if (dictDelete((dict*)o->ptr, field) == REDIS_OK) { + deleted = 1; + + /* Always check if the dictionary needs a resize after a delete. */ + if (htNeedsResize(o->ptr)) dictResize(o->ptr); + } + } else { - deleted = dictDelete((dict*)o->ptr,key) == DICT_OK; - /* Always check if the dictionary needs a resize after a delete. */ - if (deleted && htNeedsResize(o->ptr)) dictResize(o->ptr); + redisPanic("Unknown hash encoding"); } + return deleted; } /* Return the number of elements in a hash. */ unsigned long hashTypeLength(robj *o) { - return (o->encoding == REDIS_ENCODING_ZIPMAP) ? - zipmapLen((unsigned char*)o->ptr) : dictSize((dict*)o->ptr); + unsigned long length = ULONG_MAX; + + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + length = ziplistLen(o->ptr) / 2; + } else if (o->encoding == REDIS_ENCODING_HT) { + length = dictSize((dict*)o->ptr); + } else { + redisPanic("Unknown hash encoding"); + } + + return length; } hashTypeIterator *hashTypeInitIterator(robj *subject) { hashTypeIterator *hi = zmalloc(sizeof(hashTypeIterator)); + hi->subject = subject; hi->encoding = subject->encoding; - if (hi->encoding == REDIS_ENCODING_ZIPMAP) { - hi->zi = zipmapRewind(subject->ptr); + + if (hi->encoding == REDIS_ENCODING_ZIPLIST) { + hi->fptr = NULL; + hi->vptr = NULL; } else if (hi->encoding == REDIS_ENCODING_HT) { hi->di = dictGetIterator(subject->ptr); } else { - redisAssertWithInfo(NULL,subject,0); + redisPanic("Unknown hash encoding"); } + return hi; } @@ -169,68 +294,116 @@ void hashTypeReleaseIterator(hashTypeIterator *hi) { if (hi->encoding == REDIS_ENCODING_HT) { dictReleaseIterator(hi->di); } + zfree(hi); } /* Move to the next entry in the hash. Return REDIS_OK when the next entry * could be found and REDIS_ERR when the iterator reaches the end. */ int hashTypeNext(hashTypeIterator *hi) { - if (hi->encoding == REDIS_ENCODING_ZIPMAP) { - if ((hi->zi = zipmapNext(hi->zi, &hi->zk, &hi->zklen, - &hi->zv, &hi->zvlen)) == NULL) return REDIS_ERR; + if (hi->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *zl; + unsigned char *fptr, *vptr; + + zl = hi->subject->ptr; + fptr = hi->fptr; + vptr = hi->vptr; + + if (fptr == NULL) { + /* Initialize cursor */ + redisAssert(vptr == NULL); + fptr = ziplistIndex(zl, 0); + } else { + /* Advance cursor */ + redisAssert(vptr != NULL); + fptr = ziplistNext(zl, vptr); + } + + if (fptr == NULL) { + return REDIS_ERR; + } + + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); + + /* fptr, vptr now point to the first or next pair */ + hi->fptr = fptr; + hi->vptr = vptr; + + } else if (hi->encoding == REDIS_ENCODING_HT) { + if ((hi->de = dictNext(hi->di)) == NULL) { + return REDIS_ERR; + } + } else { - if ((hi->de = dictNext(hi->di)) == NULL) return REDIS_ERR; + redisPanic("Unknown hash encoding"); } + return REDIS_OK; } -/* Get key or value object at current iteration position. - * The returned item differs with the hash object encoding: - * - When encoding is REDIS_ENCODING_HT, the objval pointer is populated - * with the original object. - * - When encoding is REDIS_ENCODING_ZIPMAP, a pointer to the string and - * its length is retunred populating the v and vlen pointers. - * This function is copy on write friendly as accessing objects in read only - * does not require writing to any memory page. - * - * The function returns the encoding of the object, so that the caller - * can underestand if the key or value was returned as object or C string. */ -int hashTypeCurrent(hashTypeIterator *hi, int what, robj **objval, unsigned char **v, unsigned int *vlen) { - if (hi->encoding == REDIS_ENCODING_ZIPMAP) { - if (what & REDIS_HASH_KEY) { - *v = hi->zk; - *vlen = hi->zklen; - } else { - *v = hi->zv; - *vlen = hi->zvlen; - } +/* Get the field or value at iterator cursor, for an iterator on a hash value + * encoded as a ziplist. Prototype is similar to `hashTypeGetFromZiplist`. */ +void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what, + unsigned char **vstr, + unsigned int *vlen, + long long *vll) +{ + int ret; + + redisAssert(hi->encoding == REDIS_ENCODING_ZIPLIST); + + if (what & REDIS_HASH_KEY) { + ret = ziplistGet(hi->fptr, vstr, vlen, vll); + redisAssert(ret); } else { - if (what & REDIS_HASH_KEY) - *objval = dictGetKey(hi->de); - else - *objval = dictGetVal(hi->de); + ret = ziplistGet(hi->vptr, vstr, vlen, vll); + redisAssert(ret); } - return hi->encoding; } -/* A non copy-on-write friendly but higher level version of hashTypeCurrent() - * that always returns an object with refcount incremented by one (or a new - * object), so it's up to the caller to decrRefCount() the object if no - * reference is retained. */ -robj *hashTypeCurrentObject(hashTypeIterator *hi, int what) { - robj *obj; - unsigned char *v = NULL; - unsigned int vlen = 0; - int encoding = hashTypeCurrent(hi,what,&obj,&v,&vlen); +/* Get the field or value at iterator cursor, for an iterator on a hash value + * encoded as a ziplist. Prototype is similar to `hashTypeGetFromHashTable`. */ +void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, robj **dst) { + redisAssert(hi->encoding == REDIS_ENCODING_HT); - if (encoding == REDIS_ENCODING_HT) { - incrRefCount(obj); - return obj; + if (what & REDIS_HASH_KEY) { + *dst = dictGetKey(hi->de); } else { - return createStringObject((char*)v,vlen); + *dst = dictGetVal(hi->de); } } +/* A non copy-on-write friendly but higher level version of hashTypeCurrent*() + * that returns an object with incremented refcount (or a new object). It is up + * to the caller to decrRefCount() the object if no reference is retained. */ +robj *hashTypeCurrentObject(hashTypeIterator *hi, int what) { + robj *dst; + + if (hi->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll); + if (vstr) { + dst = createStringObject((char*)vstr, vlen); + } else { + dst = createStringObjectFromLongLong(vll); + } + + } else if (hi->encoding == REDIS_ENCODING_HT) { + hashTypeCurrentFromHashTable(hi, what, &dst); + incrRefCount(dst); + + } else { + redisPanic("Unknown hash encoding"); + } + + return dst; +} + robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key) { robj *o = lookupKeyWrite(c->db,key); if (o == NULL) { @@ -245,25 +418,50 @@ robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key) { return o; } -void convertToRealHash(robj *o) { - unsigned char *key, *val, *p, *zm = o->ptr; - unsigned int klen, vlen; - dict *dict = dictCreate(&hashDictType,NULL); +void hashTypeConvertZiplist(robj *o, int enc) { + redisAssert(o->encoding == REDIS_ENCODING_ZIPLIST); - redisAssertWithInfo(NULL,o,o->type == REDIS_HASH && o->encoding != REDIS_ENCODING_HT); - p = zipmapRewind(zm); - while((p = zipmapNext(p,&key,&klen,&val,&vlen)) != NULL) { - robj *keyobj, *valobj; + if (enc == REDIS_ENCODING_ZIPLIST) { + /* Nothing to do... */ - keyobj = createStringObject((char*)key,klen); - valobj = createStringObject((char*)val,vlen); - keyobj = tryObjectEncoding(keyobj); - valobj = tryObjectEncoding(valobj); - dictAdd(dict,keyobj,valobj); + } else if (enc == REDIS_ENCODING_HT) { + hashTypeIterator *hi; + dict *dict; + int ret; + + hi = hashTypeInitIterator(o); + dict = dictCreate(&hashDictType, NULL); + + while (hashTypeNext(hi) != REDIS_ERR) { + robj *field, *value; + + field = hashTypeCurrentObject(hi, REDIS_HASH_KEY); + field = tryObjectEncoding(field); + value = hashTypeCurrentObject(hi, REDIS_HASH_VALUE); + value = tryObjectEncoding(value); + ret = dictAdd(dict, field, value); + redisAssert(ret == DICT_OK); + } + + hashTypeReleaseIterator(hi); + zfree(o->ptr); + + o->encoding = REDIS_ENCODING_HT; + o->ptr = dict; + + } else { + redisPanic("Unknown hash encoding"); + } +} + +void hashTypeConvert(robj *o, int enc) { + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + hashTypeConvertZiplist(o, enc); + } else if (o->encoding == REDIS_ENCODING_HT) { + redisPanic("Not implemented"); + } else { + redisPanic("Unknown hash encoding"); } - o->encoding = REDIS_ENCODING_HT; - o->ptr = dict; - zfree(zm); } /*----------------------------------------------------------------------------- @@ -373,51 +571,69 @@ void hincrbyfloatCommand(redisClient *c) { server.dirty++; } +static void addHashFieldToReply(redisClient *c, robj *o, robj *field) { + int ret; + + if (o == NULL) { + addReply(c, shared.nullbulk); + return; + } + + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + ret = hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll); + if (ret < 0) { + addReply(c, shared.nullbulk); + } else { + if (vstr) { + addReplyBulkCBuffer(c, vstr, vlen); + } else { + addReplyBulkLongLong(c, vll); + } + } + + } else if (o->encoding == REDIS_ENCODING_HT) { + robj *value; + + ret = hashTypeGetFromHashTable(o, field, &value); + if (ret < 0) { + addReply(c, shared.nullbulk); + } else { + addReplyBulk(c, value); + } + + } else { + redisPanic("Unknown hash encoding"); + } +} + void hgetCommand(redisClient *c) { - robj *o, *value; - unsigned char *v; - unsigned int vlen; - int encoding; + robj *o; if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL || checkType(c,o,REDIS_HASH)) return; - if ((encoding = hashTypeGet(o,c->argv[2],&value,&v,&vlen)) != -1) { - if (encoding == REDIS_ENCODING_HT) - addReplyBulk(c,value); - else - addReplyBulkCBuffer(c,v,vlen); - } else { - addReply(c,shared.nullbulk); - } + addHashFieldToReply(c, o, c->argv[2]); } void hmgetCommand(redisClient *c) { - int i, encoding; - robj *o, *value; - unsigned char *v; - unsigned int vlen; + robj *o; + int i; - o = lookupKeyRead(c->db,c->argv[1]); + /* Don't abort when the key cannot be found. Non-existing keys are empty + * hashes, where HMGET should respond with a series of null bulks. */ + o = lookupKeyRead(c->db, c->argv[1]); if (o != NULL && o->type != REDIS_HASH) { - addReply(c,shared.wrongtypeerr); + addReply(c, shared.wrongtypeerr); return; } - /* Note the check for o != NULL happens inside the loop. This is - * done because objects that cannot be found are considered to be - * an empty hash. The reply should then be a series of NULLs. */ - addReplyMultiBulkLen(c,c->argc-2); + addReplyMultiBulkLen(c, c->argc-2); for (i = 2; i < c->argc; i++) { - if (o != NULL && - (encoding = hashTypeGet(o,c->argv[i],&value,&v,&vlen)) != -1) { - if (encoding == REDIS_ENCODING_HT) - addReplyBulk(c,value); - else - addReplyBulkCBuffer(c,v,vlen); - } else { - addReply(c,shared.nullbulk); - } + addHashFieldToReply(c, o, c->argv[i]); } } @@ -452,42 +668,59 @@ void hlenCommand(redisClient *c) { addReplyLongLong(c,hashTypeLength(o)); } +static void addHashIteratorCursorToReply(redisClient *c, hashTypeIterator *hi, int what) { + if (hi->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll); + if (vstr) { + addReplyBulkCBuffer(c, vstr, vlen); + } else { + addReplyBulkLongLong(c, vll); + } + + } else if (hi->encoding == REDIS_ENCODING_HT) { + robj *value; + + hashTypeCurrentFromHashTable(hi, what, &value); + addReplyBulk(c, value); + + } else { + redisPanic("Unknown hash encoding"); + } +} + void genericHgetallCommand(redisClient *c, int flags) { robj *o; - unsigned long count = 0; hashTypeIterator *hi; - void *replylen = NULL; + int multiplier = 0; + int length, count = 0; if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL || checkType(c,o,REDIS_HASH)) return; - replylen = addDeferredMultiBulkLength(c); + if (flags & REDIS_HASH_KEY) multiplier++; + if (flags & REDIS_HASH_VALUE) multiplier++; + + length = hashTypeLength(o) * multiplier; + addReplyMultiBulkLen(c, length); + hi = hashTypeInitIterator(o); while (hashTypeNext(hi) != REDIS_ERR) { - robj *obj; - unsigned char *v = NULL; - unsigned int vlen = 0; - int encoding; - if (flags & REDIS_HASH_KEY) { - encoding = hashTypeCurrent(hi,REDIS_HASH_KEY,&obj,&v,&vlen); - if (encoding == REDIS_ENCODING_HT) - addReplyBulk(c,obj); - else - addReplyBulkCBuffer(c,v,vlen); + addHashIteratorCursorToReply(c, hi, REDIS_HASH_KEY); count++; } if (flags & REDIS_HASH_VALUE) { - encoding = hashTypeCurrent(hi,REDIS_HASH_VALUE,&obj,&v,&vlen); - if (encoding == REDIS_ENCODING_HT) - addReplyBulk(c,obj); - else - addReplyBulkCBuffer(c,v,vlen); + addHashIteratorCursorToReply(c, hi, REDIS_HASH_VALUE); count++; } } + hashTypeReleaseIterator(hi); - setDeferredMultiBulkLength(c,replylen,count); + redisAssert(count == length); } void hkeysCommand(redisClient *c) { diff --git a/tests/unit/aofrw.tcl b/tests/unit/aofrw.tcl index a558ed4ca..358266ef7 100644 --- a/tests/unit/aofrw.tcl +++ b/tests/unit/aofrw.tcl @@ -54,10 +54,10 @@ start_server {tags {"aofrw"}} { } foreach d {string int} { - foreach e {zipmap hashtable} { + foreach e {ziplist hashtable} { test "AOF rewrite of hash with $e encoding, $d data" { r flushall - if {$e eq {zipmap}} {set len 10} else {set len 1000} + if {$e eq {ziplist}} {set len 10} else {set len 1000} for {set j 0} {$j < $len} {incr j} { if {$d eq {string}} { set data [randstring 0 16 alpha] diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl index 04a5f4c75..141971a81 100644 --- a/tests/unit/type/hash.tcl +++ b/tests/unit/type/hash.tcl @@ -14,8 +14,8 @@ start_server {tags {"hash"}} { list [r hlen smallhash] } {8} - test {Is the small hash encoded with a zipmap?} { - assert_encoding zipmap smallhash + test {Is the small hash encoded with a ziplist?} { + assert_encoding ziplist smallhash } test {HSET/HLEN - Big hash creation} { @@ -33,7 +33,7 @@ start_server {tags {"hash"}} { list [r hlen bighash] } {1024} - test {Is the big hash encoded with a zipmap?} { + test {Is the big hash encoded with a ziplist?} { assert_encoding hashtable bighash } @@ -252,7 +252,7 @@ start_server {tags {"hash"}} { lappend rv [r hexists bighash nokey] } {1 0 1 0} - test {Is a zipmap encoded Hash promoted on big payload?} { + test {Is a ziplist encoded Hash promoted on big payload?} { r hset smallhash foo [string repeat a 1024] r debug object smallhash } {*hashtable*} @@ -382,7 +382,7 @@ start_server {tags {"hash"}} { lappend rv [string match "ERR*not*float*" $bigerr] } {1 1} - test {Hash zipmap regression test for large keys} { + test {Hash ziplist regression test for large keys} { r hset hash kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk a r hset hash kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk b r hget hash kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk From 3805b342125d0669b830a6148241d93ca0c662dc Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 3 Jan 2012 15:48:55 -0800 Subject: [PATCH 03/70] Implements ziplistFind To improve the performance of the ziplist implementation, some functions have been converted to macros to avoid unnecessary stack movement and duplicate variable assignments. --- src/t_hash.c | 70 +++++++--------- src/ziplist.c | 215 ++++++++++++++++++++++++++++++++------------------ src/ziplist.h | 1 + 3 files changed, 170 insertions(+), 116 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 9699223a8..a22de2429 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -47,23 +47,18 @@ int hashTypeGetFromZiplist(robj *o, robj *field, zl = o->ptr; fptr = ziplistIndex(zl, ZIPLIST_HEAD); - while (fptr != NULL) { - /* Grab pointer to the value (fptr points to the field) */ - vptr = ziplistNext(zl, fptr); - redisAssert(vptr != NULL); - - /* Compare field in ziplist with specified field */ - if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { - break; + if (fptr != NULL) { + fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1); + if (fptr != NULL) { + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); } - - /* Skip over value */ - fptr = ziplistNext(zl, vptr); } decrRefCount(field); - if (fptr != NULL) { + if (vptr != NULL) { ret = ziplistGet(vptr, vstr, vlen, vll); redisAssert(ret); return 0; @@ -164,27 +159,28 @@ int hashTypeSet(robj *o, robj *field, robj *value) { zl = o->ptr; fptr = ziplistIndex(zl, ZIPLIST_HEAD); - while (fptr != NULL) { - /* Compare field in ziplist with specified field */ - if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { - zl = ziplistDelete(zl,&fptr); - zl = ziplistDelete(zl,&fptr); - o->ptr = zl; + if (fptr != NULL) { + fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1); + if (fptr != NULL) { + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); update = 1; - break; + + /* Delete value */ + zl = ziplistDelete(zl, &vptr); + + /* Insert new value */ + zl = ziplistInsert(zl, vptr, value->ptr, sdslen(value->ptr)); } - - /* Grab pointer to the value (fptr points to the field) */ - vptr = ziplistNext(zl, fptr); - redisAssert(vptr != NULL); - - /* Grab pointer (if any) to the next field */ - fptr = ziplistNext(zl, vptr); } - /* Push new field/value pair onto the tail of the ziplist */ - zl = ziplistPush(zl, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); - zl = ziplistPush(zl, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); + if (!update) { + /* Push new field/value pair onto the tail of the ziplist */ + zl = ziplistPush(zl, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); + zl = ziplistPush(zl, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); + } + o->ptr = zl; decrRefCount(field); @@ -217,28 +213,20 @@ int hashTypeDelete(robj *o, robj *field) { int deleted = 0; if (o->encoding == REDIS_ENCODING_ZIPLIST) { - unsigned char *zl, *fptr, *vptr; + unsigned char *zl, *fptr; field = getDecodedObject(field); zl = o->ptr; fptr = ziplistIndex(zl, ZIPLIST_HEAD); - while (fptr != NULL) { - /* Compare field in ziplist with specified field */ - if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { + if (fptr != NULL) { + fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1); + if (fptr != NULL) { zl = ziplistDelete(zl,&fptr); zl = ziplistDelete(zl,&fptr); o->ptr = zl; deleted = 1; - break; } - - /* Grab pointer to the value (fptr points to the field) */ - vptr = ziplistNext(zl, fptr); - redisAssert(vptr != NULL); - - /* Grab pointer (if any) to the next field */ - fptr = ziplistNext(zl, vptr); } decrRefCount(field); diff --git a/src/ziplist.c b/src/ziplist.c index 639e4b61e..4880013af 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -75,6 +75,8 @@ #define ZIP_BIGLEN 254 /* Different encoding/length possibilities */ +#define ZIP_STR_MASK (0xc0) +#define ZIP_INT_MASK (0x30) #define ZIP_STR_06B (0 << 6) #define ZIP_STR_14B (1 << 6) #define ZIP_STR_32B (2 << 6) @@ -82,9 +84,8 @@ #define ZIP_INT_32B (0xc0 | 1<<4) #define ZIP_INT_64B (0xc0 | 2<<4) -/* Macro's to determine type */ -#define ZIP_IS_STR(enc) (((enc) & 0xc0) < 0xc0) -#define ZIP_IS_INT(enc) (!ZIP_IS_STR(enc) && ((enc) & 0x30) < 0x30) +/* Macro to determine type */ +#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK) /* Utility macros */ #define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl))) @@ -108,19 +109,13 @@ typedef struct zlentry { unsigned char *p; } zlentry; -/* Return the encoding pointer to by 'p'. */ -static unsigned int zipEntryEncoding(unsigned char *p) { - /* String encoding: 2 MSBs */ - unsigned char b = p[0] & 0xc0; - if (b < 0xc0) { - return b; - } else { - /* Integer encoding: 4 MSBs */ - return p[0] & 0xf0; - } - assert(NULL); - return 0; -} +#define ZIP_ENTRY_ENCODING(ptr, encoding) do { \ + (encoding) = (ptr[0]) & (ZIP_STR_MASK | ZIP_INT_MASK); \ + if (((encoding) & ZIP_STR_MASK) < ZIP_STR_MASK) { \ + /* String encoding: 2 MSBs */ \ + (encoding) &= ZIP_STR_MASK; \ + } \ +} while(0) /* Return bytes needed to store integer encoded by 'encoding' */ static unsigned int zipIntSize(unsigned char encoding) { @@ -133,36 +128,6 @@ static unsigned int zipIntSize(unsigned char encoding) { return 0; } -/* Decode the encoded length pointed by 'p'. If a pointer to 'lensize' is - * provided, it is set to the number of bytes required to encode the length. */ -static unsigned int zipDecodeLength(unsigned char *p, unsigned int *lensize) { - unsigned char encoding = zipEntryEncoding(p); - unsigned int len = 0; - - if (ZIP_IS_STR(encoding)) { - switch(encoding) { - case ZIP_STR_06B: - len = p[0] & 0x3f; - if (lensize) *lensize = 1; - break; - case ZIP_STR_14B: - len = ((p[0] & 0x3f) << 8) | p[1]; - if (lensize) *lensize = 2; - break; - case ZIP_STR_32B: - len = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4]; - if (lensize) *lensize = 5; - break; - default: - assert(NULL); - } - } else { - len = zipIntSize(encoding); - if (lensize) *lensize = 1; - } - return len; -} - /* Encode the length 'l' writing it in 'p'. If p is NULL it just returns * the amount of bytes required to encode such a length. */ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) { @@ -199,18 +164,33 @@ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, un return len; } -/* Decode the length of the previous element stored at "p". */ -static unsigned int zipPrevDecodeLength(unsigned char *p, unsigned int *lensize) { - unsigned int len = *p; - if (len < ZIP_BIGLEN) { - if (lensize) *lensize = 1; - } else { - if (lensize) *lensize = 1+sizeof(len); - memcpy(&len,p+1,sizeof(len)); - memrev32ifbe(&len); - } - return len; -} +/* Decode the length encoded in 'ptr'. The 'encoding' variable will hold the + * entries encoding, the 'lensize' variable will hold the number of bytes + * required to encode the entries length, and the 'len' variable will hold the + * entries length. */ +#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do { \ + ZIP_ENTRY_ENCODING((ptr), (encoding)); \ + if ((encoding) < ZIP_STR_MASK) { \ + if ((encoding) == ZIP_STR_06B) { \ + (lensize) = 1; \ + (len) = (ptr)[0] & 0x3f; \ + } else if ((encoding) == ZIP_STR_14B) { \ + (lensize) = 2; \ + (len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1]; \ + } else if (encoding == ZIP_STR_32B) { \ + (lensize) = 5; \ + (len) = ((ptr)[1] << 24) | \ + ((ptr)[2] << 16) | \ + ((ptr)[3] << 8) | \ + ((ptr)[4]); \ + } else { \ + assert(NULL); \ + } \ + } else { \ + (lensize) = 1; \ + (len) = zipIntSize(encoding); \ + } \ +} while(0); /* Encode the length of the previous entry and write it to "p". Return the * number of bytes needed to encode this length if "p" is NULL. */ @@ -239,12 +219,43 @@ static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { memrev32ifbe(p+1); } -/* Return the difference in number of bytes needed to store the new length - * "len" on the entry pointed to by "p". */ +/* Decode the number of bytes required to store the length of the previous + * element, from the perspective of the entry pointed to by 'ptr'. */ +#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do { \ + if ((ptr)[0] < ZIP_BIGLEN) { \ + (prevlensize) = 1; \ + } else { \ + (prevlensize) = 5; \ + } \ +} while(0); + +/* Decode the length of the previous element, from the perspective of the entry + * pointed to by 'ptr'. */ +#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do { \ + ZIP_DECODE_PREVLENSIZE(ptr, prevlensize); \ + if ((prevlensize) == 1) { \ + (prevlen) = (ptr)[0]; \ + } else if ((prevlensize) == 5) { \ + assert(sizeof((prevlensize)) == 4); \ + memcpy(&(prevlen), ((char*)(ptr)) + 1, 4); \ + memrev32ifbe(&len); \ + } \ +} while(0); + +/* Return the difference in number of bytes needed to store the length of the + * previous element 'len', in the entry pointed to by 'p'. */ static int zipPrevLenByteDiff(unsigned char *p, unsigned int len) { unsigned int prevlensize; - zipPrevDecodeLength(p,&prevlensize); - return zipPrevEncodeLength(NULL,len)-prevlensize; + ZIP_DECODE_PREVLENSIZE(p, prevlensize); + return zipPrevEncodeLength(NULL, len) - prevlensize; +} + +/* Return the total number of bytes used by the entry pointed to by 'p'. */ +static unsigned int zipRawEntryLength(unsigned char *p) { + unsigned int prevlensize, encoding, lensize, len; + ZIP_DECODE_PREVLENSIZE(p, prevlensize); + ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len); + return prevlensize + lensize + len; } /* Check if string pointed to by 'entry' can be encoded as an integer. @@ -317,20 +328,14 @@ static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) { /* Return a struct with all information about an entry. */ static zlentry zipEntry(unsigned char *p) { zlentry e; - e.prevrawlen = zipPrevDecodeLength(p,&e.prevrawlensize); - e.len = zipDecodeLength(p+e.prevrawlensize,&e.lensize); - e.headersize = e.prevrawlensize+e.lensize; - e.encoding = zipEntryEncoding(p+e.prevrawlensize); + + ZIP_DECODE_PREVLEN(p, e.prevrawlensize, e.prevrawlen); + ZIP_DECODE_LENGTH(p + e.prevrawlensize, e.encoding, e.lensize, e.len); + e.headersize = e.prevrawlensize + e.lensize; e.p = p; return e; } -/* Return the total number of bytes used by the entry at "p". */ -static unsigned int zipRawEntryLength(unsigned char *p) { - zlentry e = zipEntry(p); - return e.headersize + e.len; -} - /* Create a new empty ziplist. */ unsigned char *ziplistNew(void) { unsigned int bytes = ZIPLIST_HEADER_SIZE+1; @@ -616,10 +621,14 @@ unsigned char *ziplistNext(unsigned char *zl, unsigned char *p) { * when the *next* element is ZIP_END (there is no next entry). */ if (p[0] == ZIP_END) { return NULL; - } else { - p = p+zipRawEntryLength(p); - return (p[0] == ZIP_END) ? NULL : p; } + + p += zipRawEntryLength(p); + if (p[0] == ZIP_END) { + return NULL; + } + + return p; } /* Return pointer to previous entry in ziplist. */ @@ -717,6 +726,62 @@ unsigned int ziplistCompare(unsigned char *p, unsigned char *sstr, unsigned int return 0; } +/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries + * between every comparison. Returns NULL when the field could not be found. */ +unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip) { + int skipcnt = 0; + unsigned char vencoding = 0; + long long vll = 0; + + while (p[0] != ZIP_END) { + unsigned int prevlensize, encoding, lensize, len; + unsigned char *q; + + ZIP_DECODE_PREVLENSIZE(p, prevlensize); + ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len); + q = p + prevlensize + lensize; + + if (skipcnt == 0) { + /* Compare current entry with specified entry */ + if (ZIP_IS_STR(encoding)) { + if (len == vlen && memcmp(q, vstr, vlen) == 0) { + return p; + } + } else { + /* Find out if the specified entry can be encoded */ + if (vencoding == 0) { + /* UINT_MAX when the entry CANNOT be encoded */ + if (!zipTryEncoding(vstr, vlen, &vll, &vencoding)) { + vencoding = UCHAR_MAX; + } + + /* Must be non-zero by now */ + assert(vencoding); + } + + /* Compare current entry with specified entry */ + if (encoding == vencoding) { + long long ll = zipLoadInteger(q, encoding); + if (ll == vll) { + return p; + } + } + } + + /* Reset skip count */ + skipcnt = skip; + } else { + /* Skip entry */ + skipcnt--; + } + + /* Move to next entry */ + p = q + len; + } + + return NULL; +} + /* Return length of ziplist. */ unsigned int ziplistLen(unsigned char *zl) { unsigned int len = 0; diff --git a/src/ziplist.h b/src/ziplist.h index a07b84404..865b38b42 100644 --- a/src/ziplist.h +++ b/src/ziplist.h @@ -11,5 +11,6 @@ unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p); unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num); unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen); +unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip); unsigned int ziplistLen(unsigned char *zl); size_t ziplistBlobLen(unsigned char *zl); From a40390001d152c0684bf0f36dbb2933de435dbb8 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 25 Jan 2012 13:26:25 -0800 Subject: [PATCH 04/70] Test that zipmap from RDB is correctly converted --- src/rdb.c | 16 +++++---- tests/assets/hash-zipmap.rdb | Bin 0 -> 35 bytes .../convert-zipmap-hash-on-load.tcl | 34 ++++++++++++++++++ 3 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 tests/assets/hash-zipmap.rdb create mode 100644 tests/integration/convert-zipmap-hash-on-load.tcl diff --git a/src/rdb.c b/src/rdb.c index c74ec41c5..15555fe48 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -915,12 +915,13 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { { unsigned char *zl = ziplistNew(); unsigned char *zi = zipmapRewind(o->ptr); + unsigned char *fstr, *vstr; + unsigned int flen, vlen; + unsigned int maxlen = 0; - while (zi != NULL) { - unsigned char *fstr, *vstr; - unsigned int flen, vlen; - - zi = zipmapNext(zi, &fstr, &flen, &vstr, &vlen); + while ((zi = zipmapNext(zi, &fstr, &flen, &vstr, &vlen)) != NULL) { + if (flen > maxlen) maxlen = flen; + if (vlen > maxlen) maxlen = vlen; zl = ziplistPush(zl, fstr, flen, ZIPLIST_TAIL); zl = ziplistPush(zl, vstr, vlen, ZIPLIST_TAIL); } @@ -930,8 +931,11 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { o->type = REDIS_HASH; o->encoding = REDIS_ENCODING_ZIPLIST; - if (hashTypeLength(o) > server.hash_max_ziplist_entries) + if (hashTypeLength(o) > server.hash_max_ziplist_entries || + maxlen > server.hash_max_ziplist_value) + { hashTypeConvert(o, REDIS_ENCODING_HT); + } } break; case REDIS_RDB_TYPE_LIST_ZIPLIST: diff --git a/tests/assets/hash-zipmap.rdb b/tests/assets/hash-zipmap.rdb new file mode 100644 index 0000000000000000000000000000000000000000..27a42ed4bb494cee9d67b7f0feccce7e4136d50a GIT binary patch literal 35 pcmWG?b@2=~FfcIw$H2*wkyxA|z{Heh$iz@)$dqOTq>TRm2LPS*34j0q literal 0 HcmV?d00001 diff --git a/tests/integration/convert-zipmap-hash-on-load.tcl b/tests/integration/convert-zipmap-hash-on-load.tcl new file mode 100644 index 000000000..75a65d3e8 --- /dev/null +++ b/tests/integration/convert-zipmap-hash-on-load.tcl @@ -0,0 +1,34 @@ +set server_path [tmpdir "server.convert-zipmap-hash-on-load"] + +# Copy RDB with zipmap encoded hash to server path +exec cp tests/assets/hash-zipmap.rdb $server_path + +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb"]] { + test "RDB load zipmap hash: converts to ziplist" { + r select 0 + + assert_match "*ziplist*" [r debug object hash] + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} + +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb" "hash-max-ziplist-entries" 1]] { + test "RDB load zipmap hash: converts to hash table when hash-max-ziplist-entries is exceeded" { + r select 0 + + assert_match "*hashtable*" [r debug object hash] + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} + +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb" "hash-max-ziplist-value" 1]] { + test "RDB load zipmap hash: converts to hash table when hash-max-ziplist-value is exceeded" { + r select 0 + + assert_match "*hashtable*" [r debug object hash] + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} From 10cc40065e7e4d7c3f82148419aee5f7a1399ac2 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 25 Jan 2012 13:37:43 -0800 Subject: [PATCH 05/70] Update default configuration --- redis.conf | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/redis.conf b/redis.conf index 29e326d12..fc72b5364 100644 --- a/redis.conf +++ b/redis.conf @@ -394,12 +394,11 @@ slowlog-max-len 1024 ############################### ADVANCED CONFIG ############################### -# Hashes are encoded in a special way (much more memory efficient) when they -# have at max a given numer of elements, and the biggest element does not -# exceed a given threshold. You can configure this limits with the following -# configuration directives. -hash-max-zipmap-entries 512 -hash-max-zipmap-value 64 +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 # Similarly to hashes, small lists are also encoded in a special way in order # to save a lot of space. The special representation is only used when From 4ff7b78ddf9dfbfcd40ad197006b7e3975a84c08 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Feb 2012 16:17:00 +0100 Subject: [PATCH 06/70] Added command propagation API. --- src/redis.c | 42 +++++++++++++++++++++++++++++++++++------- src/redis.h | 5 +++++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/redis.c b/src/redis.c index b4acd6eaa..647c58673 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1175,10 +1175,33 @@ struct redisCommand *lookupCommandByCString(char *s) { return cmd; } +/* Propagate the specified command (in the context of the specified database id) + * to AOF, Slaves and Monitors. + * + * flags are an xor between: + * + REDIS_PROPAGATE_NONE (no propagation of command at all) + * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled) + * + REDIS_PROPAGATE_REPL (propagate into the replication link) + */ +void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, + int flags) +{ + if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF) + feedAppendOnlyFile(cmd,dbid,argv,argc); + if (flags & REDIS_PROPAGATE_REPL && listLength(server.slaves)) + replicationFeedSlaves(server.slaves,dbid,argv,argc); +} + /* Call() is the core of Redis execution of a command */ void call(redisClient *c, int flags) { long long dirty, start = ustime(), duration; + /* Sent the command to clients in MONITOR mode, only if the commands are + * not geneated from reading an AOF. */ + if (listLength(server.monitors) && !server.loading) + replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); + + /* Call the command. */ dirty = server.dirty; c->cmd->proc(c); dirty = server.dirty-dirty; @@ -1189,20 +1212,25 @@ void call(redisClient *c, int flags) { if (server.loading && c->flags & REDIS_LUA_CLIENT) flags &= ~(REDIS_CALL_SLOWLOG | REDIS_CALL_STATS); + /* Log the command into the Slow log if needed, and populate the + * per-command statistics that we show in INFO commandstats. */ if (flags & REDIS_CALL_SLOWLOG) slowlogPushEntryIfNeeded(c->argv,c->argc,duration); if (flags & REDIS_CALL_STATS) { c->cmd->microseconds += duration; c->cmd->calls++; } + + /* Propagate the command into the AOF and replication link */ if (flags & REDIS_CALL_PROPAGATE) { - if (server.aof_state != REDIS_AOF_OFF && dirty > 0) - feedAppendOnlyFile(c->cmd,c->db->id,c->argv,c->argc); - if ((dirty > 0 || c->cmd->flags & REDIS_CMD_FORCE_REPLICATION) && - listLength(server.slaves)) - replicationFeedSlaves(server.slaves,c->db->id,c->argv,c->argc); - if (listLength(server.monitors)) - replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); + int flags = REDIS_PROPAGATE_NONE; + + if (c->cmd->flags & REDIS_CMD_FORCE_REPLICATION) + flags |= REDIS_PROPAGATE_REPL; + if (dirty) + flags |= (REDIS_PROPAGATE_REPL | REDIS_PROPAGATE_AOF); + if (flags != REDIS_PROPAGATE_NONE) + propagate(c->cmd,c->db->id,c->argv,c->argc,flags); } server.stat_numcommands++; } diff --git a/src/redis.h b/src/redis.h index 982f33fe5..921e319eb 100644 --- a/src/redis.h +++ b/src/redis.h @@ -245,6 +245,11 @@ #define REDIS_CALL_PROPAGATE 4 #define REDIS_CALL_FULL (REDIS_CALL_SLOWLOG | REDIS_CALL_STATS | REDIS_CALL_PROPAGATE) +/* Command propagation flags, see propagate() function */ +#define REDIS_PROPAGATE_NONE 0 +#define REDIS_PROPAGATE_AOF 1 +#define REDIS_PROPAGATE_REPL 2 + /* We can print the stacktrace, so our assert is defined this way: */ #define redisAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_redisAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1))) #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) From ef34fd54ed1d641e726f1af4a97a592ecee4a264 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Feb 2012 16:17:55 +0100 Subject: [PATCH 07/70] Var renamed into pushGenericCommand() to better reflect what it means. --- src/t_list.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/t_list.c b/src/t_list.c index 3742ec49d..f0c6790e3 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -259,7 +259,7 @@ void listTypeConvert(robj *subject, int enc) { *----------------------------------------------------------------------------*/ void pushGenericCommand(redisClient *c, int where) { - int j, addlen = 0, pushed = 0; + int j, waiting = 0, pushed = 0; robj *lobj = lookupKeyWrite(c->db,c->argv[1]); int may_have_waiting_clients = (lobj == NULL); @@ -272,7 +272,7 @@ void pushGenericCommand(redisClient *c, int where) { c->argv[j] = tryObjectEncoding(c->argv[j]); if (may_have_waiting_clients) { if (handleClientsWaitingListPush(c,c->argv[1],c->argv[j])) { - addlen++; + waiting++; continue; } else { may_have_waiting_clients = 0; @@ -285,7 +285,7 @@ void pushGenericCommand(redisClient *c, int where) { listTypePush(lobj,c->argv[j],where); pushed++; } - addReplyLongLong(c,addlen + (lobj ? listTypeLength(lobj) : 0)); + addReplyLongLong(c, waiting + (lobj ? listTypeLength(lobj) : 0)); if (pushed) signalModifiedKey(c->db,c->argv[1]); server.dirty += pushed; } From 110a46da6a60b78a42e4c5065afc98c96933301d Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Feb 2012 16:20:41 +0100 Subject: [PATCH 08/70] propagate() prototype added to redis.h --- src/redis.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis.h b/src/redis.h index 921e319eb..f1e407b12 100644 --- a/src/redis.h +++ b/src/redis.h @@ -955,6 +955,7 @@ void setupSignalHandlers(void); struct redisCommand *lookupCommand(sds name); struct redisCommand *lookupCommandByCString(char *s); void call(redisClient *c, int flags); +void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags); int prepareForShutdown(); void redisLog(int level, const char *fmt, ...); void redisLogRaw(int level, const char *msg); From 2b846878119323910224807106f052ac8ac7dd34 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Feb 2012 18:03:08 +0100 Subject: [PATCH 09/70] Added a new API to replicate an additional command after the replication of the currently executed command, in order to propagte the LPUSH originating from RPOPLPUSH and indirectly by BRPOPLPUSH. --- src/redis.c | 26 ++++++++++++++++++++++++++ src/redis.h | 16 +++++++++++++++- src/t_list.c | 51 ++++++++++++++++++++++++++++++--------------------- 3 files changed, 71 insertions(+), 22 deletions(-) diff --git a/src/redis.c b/src/redis.c index 647c58673..2eed47030 100644 --- a/src/redis.c +++ b/src/redis.c @@ -962,6 +962,7 @@ void initServerConfig() { populateCommandTable(); server.delCommand = lookupCommandByCString("del"); server.multiCommand = lookupCommandByCString("multi"); + server.lpushCommand = lookupCommandByCString("lpush"); /* Slow log */ server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN; @@ -1192,6 +1193,20 @@ void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, replicationFeedSlaves(server.slaves,dbid,argv,argc); } +/* Used inside commands to propatate an additional command if needed. */ +void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, + int target) +{ + propagatedItem *pi = &server.also_propagate; + + redisAssert(pi->target == REDIS_PROPAGATE_NONE); + pi->cmd = cmd; + pi->dbid = dbid; + pi->argv = argv; + pi->argc = argc; + pi->target = target; +} + /* Call() is the core of Redis execution of a command */ void call(redisClient *c, int flags) { long long dirty, start = ustime(), duration; @@ -1202,6 +1217,7 @@ void call(redisClient *c, int flags) { replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); /* Call the command. */ + server.also_propagate.target = REDIS_PROPAGATE_NONE; dirty = server.dirty; c->cmd->proc(c); dirty = server.dirty-dirty; @@ -1232,6 +1248,16 @@ void call(redisClient *c, int flags) { if (flags != REDIS_PROPAGATE_NONE) propagate(c->cmd,c->db->id,c->argv,c->argc,flags); } + /* Commands such as LPUSH or BRPOPLPUSH may propagate an additional + * PUSH command. */ + if (server.also_propagate.target != REDIS_PROPAGATE_NONE) { + int j; + propagatedItem *pi = &server.also_propagate; + + propagate(pi->cmd, pi->dbid, pi->argv, pi->argc, pi->target); + for (j = 0; j < pi->argc; j++) decrRefCount(pi->argv[j]); + zfree(pi->argv); + } server.stat_numcommands++; } diff --git a/src/redis.h b/src/redis.h index f1e407b12..09a746cc5 100644 --- a/src/redis.h +++ b/src/redis.h @@ -399,6 +399,17 @@ typedef struct clientBufferLimitsConfig { time_t soft_limit_seconds; } clientBufferLimitsConfig; +/* Currently only used to additionally propagate more commands to AOF/Replication + * after the propagation of the executed command. + * The structure contains everything needed to propagate a command: + * argv and argc, the ID of the database, pointer to the command table entry, + * and finally the target, that is an xor between REDIS_PROPAGATE_* flags. */ +typedef struct propagatedItem { + robj **argv; + int argc, dbid, target; + struct redisCommand *cmd; +} propagatedItem; + /*----------------------------------------------------------------------------- * Redis cluster data structures *----------------------------------------------------------------------------*/ @@ -562,7 +573,7 @@ struct redisServer { off_t loading_loaded_bytes; time_t loading_start_time; /* Fast pointers to often looked up command */ - struct redisCommand *delCommand, *multiCommand; + struct redisCommand *delCommand, *multiCommand, *lpushCommand; int cronloops; /* Number of times the cron function run */ time_t lastsave; /* Unix time of last save succeeede */ /* Fields used only for stats */ @@ -612,6 +623,8 @@ struct redisServer { int saveparamslen; /* Number of saving points */ char *rdb_filename; /* Name of RDB file */ int rdb_compression; /* Use compression in RDB? */ + /* Propagation of commands in AOF / replication */ + propagatedItem also_propagate; /* Additional command to propagate. */ /* Logging */ char *logfile; /* Path of log file */ int syslog_enabled; /* Is syslog enabled? */ @@ -956,6 +969,7 @@ struct redisCommand *lookupCommand(sds name); struct redisCommand *lookupCommandByCString(char *s); void call(redisClient *c, int flags); void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags); +void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target); int prepareForShutdown(); void redisLog(int level, const char *fmt, ...); void redisLogRaw(int level, const char *msg); diff --git a/src/t_list.c b/src/t_list.c index f0c6790e3..636c556c2 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -288,6 +288,18 @@ void pushGenericCommand(redisClient *c, int where) { addReplyLongLong(c, waiting + (lobj ? listTypeLength(lobj) : 0)); if (pushed) signalModifiedKey(c->db,c->argv[1]); server.dirty += pushed; + + /* Alter the replication of the command accordingly to the number of + * list elements delivered to clients waiting into a blocking operation. + * We do that only if there were waiting clients, and only if still some + * element was pushed into the list (othewise dirty is 0 and nothign will + * be propagated). */ + if (waiting && pushed) { + /* CMD KEY a b C D E */ + for (j = 2; j < pushed+2; j++) + rewriteClientCommandArgument(c,j,c->argv[j+waiting]); + c->argc -= waiting; + } } void lpushCommand(redisClient *c) { @@ -655,8 +667,6 @@ void lremCommand(redisClient *c) { */ void rpoplpushHandlePush(redisClient *origclient, redisClient *c, robj *dstkey, robj *dstobj, robj *value) { - robj *aux; - if (!handleClientsWaitingListPush(origclient,dstkey,value)) { /* Create the list if the key does not exist */ if (!dstobj) { @@ -666,27 +676,19 @@ void rpoplpushHandlePush(redisClient *origclient, redisClient *c, robj *dstkey, signalModifiedKey(c->db,dstkey); } listTypePush(dstobj,value,REDIS_HEAD); - /* If we are pushing as a result of LPUSH against a key - * watched by BRPOPLPUSH, we need to rewrite the command vector - * as an LPUSH. - * - * If this is called directly by RPOPLPUSH (either directly - * or via a BRPOPLPUSH where the popped list exists) - * we should replicate the RPOPLPUSH command itself. */ - if (c != origclient) { - aux = createStringObject("LPUSH",5); - rewriteClientCommandVector(origclient,3,aux,dstkey,value); - decrRefCount(aux); - } else { - /* Make sure to always use RPOPLPUSH in the replication / AOF, - * even if the original command was BRPOPLPUSH. */ - aux = createStringObject("RPOPLPUSH",9); - rewriteClientCommandVector(origclient,3,aux,c->argv[1],c->argv[2]); - decrRefCount(aux); + /* Additionally propagate this PUSH operation together with + * the operation performed by the command. */ + { + robj **argv = zmalloc(sizeof(robj*)*3); + argv[0] = createStringObject("LPUSH",5); + argv[1] = dstkey; + argv[2] = value; + incrRefCount(argv[1]); + incrRefCount(argv[2]); + alsoPropagate(server.lpushCommand,c->db->id,argv,3, + REDIS_PROPAGATE_AOF|REDIS_PROPAGATE_REPL); } - server.dirty++; } - /* Always send the pushed value to the client. */ addReplyBulk(c,value); } @@ -717,6 +719,13 @@ void rpoplpushCommand(redisClient *c) { signalModifiedKey(c->db,touchedkey); decrRefCount(touchedkey); server.dirty++; + + /* Replicate this as a simple RPOP since the LPUSH side is replicated + * by rpoplpushHandlePush() call if needed (it may not be needed + * if a client is blocking wait a push against the list). */ + rewriteClientCommandVector(c,2, + resetRefCount(createStringObject("RPOP",4)), + c->argv[1]); } } From 332a430de82672abbdd550a74ee6af8759c47bb1 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 00:46:50 +0100 Subject: [PATCH 10/70] Better system for additional commands replication. The new code uses a more generic data structure to describe redis operations. The new design allows for multiple alsoPropagate() calls within the scope of a single command, that is useful in different contexts. For instance there when there are multiple clients doing BRPOPLPUSH against the same list, and a variadic LPUSH is performed against this list, the blocked clients will both be served, and we should correctly replicate multiple LPUSH commands after the replication of the current command. --- src/redis.c | 63 ++++++++++++++++++++++++++++++++++++++++------------- src/redis.h | 29 +++++++++++++++++------- 2 files changed, 69 insertions(+), 23 deletions(-) diff --git a/src/redis.c b/src/redis.c index 2eed47030..78067d310 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1161,6 +1161,43 @@ void resetCommandTableStats(void) { } } +/* ========================== Redis OP Array API ============================ */ + +void redisOpArrayInit(redisOpArray *oa) { + oa->ops = NULL; + oa->numops = 0; +} + +int redisOpArrayAppend(redisOpArray *oa, struct redisCommand *cmd, int dbid, + robj **argv, int argc, int target) +{ + redisOp *op; + + oa->ops = zrealloc(oa->ops,sizeof(redisOp)*(oa->numops+1)); + op = oa->ops+oa->numops; + op->cmd = cmd; + op->dbid = dbid; + op->argv = argv; + op->argc = argc; + op->target = target; + oa->numops++; + return oa->numops; +} + +void redisOpArrayFree(redisOpArray *oa) { + while(oa->numops) { + int j; + redisOp *op; + + oa->numops--; + op = oa->ops+oa->numops; + for (j = 0; j < op->argc; j++) + decrRefCount(op->argv[j]); + zfree(op->argv); + } + zfree(oa->ops); +} + /* ====================== Commands lookup and execution ===================== */ struct redisCommand *lookupCommand(sds name) { @@ -1193,18 +1230,12 @@ void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, replicationFeedSlaves(server.slaves,dbid,argv,argc); } -/* Used inside commands to propatate an additional command if needed. */ +/* Used inside commands to schedule the propagation of additional commands + * after the current command is propagated to AOF / Replication. */ void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target) { - propagatedItem *pi = &server.also_propagate; - - redisAssert(pi->target == REDIS_PROPAGATE_NONE); - pi->cmd = cmd; - pi->dbid = dbid; - pi->argv = argv; - pi->argc = argc; - pi->target = target; + redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target); } /* Call() is the core of Redis execution of a command */ @@ -1217,7 +1248,7 @@ void call(redisClient *c, int flags) { replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); /* Call the command. */ - server.also_propagate.target = REDIS_PROPAGATE_NONE; + redisOpArrayInit(&server.also_propagate); dirty = server.dirty; c->cmd->proc(c); dirty = server.dirty-dirty; @@ -1250,13 +1281,15 @@ void call(redisClient *c, int flags) { } /* Commands such as LPUSH or BRPOPLPUSH may propagate an additional * PUSH command. */ - if (server.also_propagate.target != REDIS_PROPAGATE_NONE) { + if (server.also_propagate.numops) { int j; - propagatedItem *pi = &server.also_propagate; + redisOp *rop; - propagate(pi->cmd, pi->dbid, pi->argv, pi->argc, pi->target); - for (j = 0; j < pi->argc; j++) decrRefCount(pi->argv[j]); - zfree(pi->argv); + for (j = 0; j < server.also_propagate.numops; j++) { + rop = &server.also_propagate.ops[j]; + propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target); + } + redisOpArrayFree(&server.also_propagate); } server.stat_numcommands++; } diff --git a/src/redis.h b/src/redis.h index 09a746cc5..c26178702 100644 --- a/src/redis.h +++ b/src/redis.h @@ -399,16 +399,29 @@ typedef struct clientBufferLimitsConfig { time_t soft_limit_seconds; } clientBufferLimitsConfig; -/* Currently only used to additionally propagate more commands to AOF/Replication - * after the propagation of the executed command. - * The structure contains everything needed to propagate a command: - * argv and argc, the ID of the database, pointer to the command table entry, - * and finally the target, that is an xor between REDIS_PROPAGATE_* flags. */ -typedef struct propagatedItem { +/* The redisOp structure defines a Redis Operation, that is an instance of + * a command with an argument vector, database ID, propagation target + * (REDIS_PROPAGATE_*), and command pointer. + * + * Currently only used to additionally propagate more commands to AOF/Replication + * after the propagation of the executed command. */ +typedef struct redisOp { robj **argv; int argc, dbid, target; struct redisCommand *cmd; -} propagatedItem; +} redisOp; + +/* Defines an array of Redis operations. There is an API to add to this + * structure in a easy way. + * + * redisOpArrayInit(); + * redisOpArrayAppend(); + * redisOpArrayFree(); + */ +typedef struct redisOpArray { + redisOp *ops; + int numops; +} redisOpArray; /*----------------------------------------------------------------------------- * Redis cluster data structures @@ -624,7 +637,7 @@ struct redisServer { char *rdb_filename; /* Name of RDB file */ int rdb_compression; /* Use compression in RDB? */ /* Propagation of commands in AOF / replication */ - propagatedItem also_propagate; /* Additional command to propagate. */ + redisOpArray also_propagate; /* Additional command to propagate. */ /* Logging */ char *logfile; /* Path of log file */ int syslog_enabled; /* Is syslog enabled? */ From 84cfae38b3d385f60dea3a552d7229e0c0368ba5 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 00:54:52 +0100 Subject: [PATCH 11/70] Version bumped to 2.9.5 --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index a7a2fdb24..9ef74b080 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define REDIS_VERSION "2.9.4" +#define REDIS_VERSION "2.9.5" From 38c49acd48aca0d85093dee922346529ef856860 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 13:38:30 +0100 Subject: [PATCH 12/70] lpush arguments vector rewrite modified for more speed and to memory leak removal. --- src/t_list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_list.c b/src/t_list.c index 636c556c2..d6e28b7e5 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -296,8 +296,8 @@ void pushGenericCommand(redisClient *c, int where) { * be propagated). */ if (waiting && pushed) { /* CMD KEY a b C D E */ - for (j = 2; j < pushed+2; j++) - rewriteClientCommandArgument(c,j,c->argv[j+waiting]); + for (j = 0; j < waiting; j++) decrRefCount(c->argv[j+2]); + memmove(c->argv+2,c->argv+2+waiting,sizeof(robj*)*pushed); c->argc -= waiting; } } From 6b56c5fd036b822bb90b22f919bd21ed7cf475b4 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 14:41:57 +0100 Subject: [PATCH 13/70] Better implementation for BRPOP/BLPOP in the non blocking case. --- src/redis.c | 2 ++ src/redis.h | 2 +- src/t_list.c | 40 +++++++++++++--------------------------- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/src/redis.c b/src/redis.c index 78067d310..64df9073b 100644 --- a/src/redis.c +++ b/src/redis.c @@ -852,6 +852,8 @@ void createSharedObjects(void) { shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17); shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19); shared.del = createStringObject("DEL",3); + shared.rpop = createStringObject("RPOP",4); + shared.lpop = createStringObject("LPOP",4); for (j = 0; j < REDIS_SHARED_INTEGERS; j++) { shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j); shared.integers[j]->encoding = REDIS_ENCODING_INT; diff --git a/src/redis.h b/src/redis.h index c26178702..cccdcc81b 100644 --- a/src/redis.h +++ b/src/redis.h @@ -365,7 +365,7 @@ struct sharedObjectsStruct { *select0, *select1, *select2, *select3, *select4, *select5, *select6, *select7, *select8, *select9, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, - *psubscribebulk, *punsubscribebulk, *del, + *psubscribebulk, *punsubscribebulk, *del, *rpop, *lpop, *integers[REDIS_SHARED_INTEGERS], *mbulkhdr[REDIS_SHARED_BULKHDR_LEN], /* "*\r\n" */ *bulkhdr[REDIS_SHARED_BULKHDR_LEN]; /* "$\r\n" */ diff --git a/src/t_list.c b/src/t_list.c index d6e28b7e5..2be8074a7 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -933,36 +933,22 @@ void blockingPopGenericCommand(redisClient *c, int where) { return; } else { if (listTypeLength(o) != 0) { - /* If the list contains elements fall back to the usual - * non-blocking POP operation */ - struct redisCommand *orig_cmd; - robj *argv[2], **orig_argv; - int orig_argc; + /* Non empty list, this is like a non normal [LR]POP. */ + robj *value = listTypePop(o,where); + redisAssert(value != NULL); - /* We need to alter the command arguments before to call - * popGenericCommand() as the command takes a single key. */ - orig_argv = c->argv; - orig_argc = c->argc; - orig_cmd = c->cmd; - argv[1] = c->argv[j]; - c->argv = argv; - c->argc = 2; - - /* Also the return value is different, we need to output - * the multi bulk reply header and the key name. The - * "real" command will add the last element (the value) - * for us. If this souds like an hack to you it's just - * because it is... */ addReplyMultiBulkLen(c,2); - addReplyBulk(c,argv[1]); - - popGenericCommand(c,where); - - /* Fix the client structure with the original stuff */ - c->argv = orig_argv; - c->argc = orig_argc; - c->cmd = orig_cmd; + addReplyBulk(c,c->argv[j]); + addReplyBulk(c,value); + decrRefCount(value); + if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[j]); + signalModifiedKey(c->db,c->argv[j]); + server.dirty++; + /* Replicate it as an [LR]POP instead of B[LR]POP. */ + rewriteClientCommandVector(c,2, + (where == REDIS_HEAD) ? shared.lpop : shared.rpop, + c->argv[j]); return; } } From f77aff1f0555bc76550c5582d9481cc48e9bf887 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 16:33:54 +0100 Subject: [PATCH 14/70] Ping the slave using the standard protocol instead of the inline one. --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index dd42fcf17..e82978395 100644 --- a/src/replication.c +++ b/src/replication.c @@ -596,7 +596,7 @@ void replicationCron(void) { if (slave->replstate == REDIS_REPL_SEND_BULK) continue; if (slave->replstate == REDIS_REPL_ONLINE) { /* If the slave is online send a normal ping */ - addReplySds(slave,sdsnew("PING\r\n")); + addReplySds(slave,sdsnew("*1\r\n$4\r\nPING\r\n")); } else { /* Otherwise we are in the pre-synchronization stage. * Just a newline will do the work of refreshing the From 6f484a07044c758af55cf69d549bb09a51b3ab10 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 17:10:21 +0100 Subject: [PATCH 15/70] Initial implementation of redis-cli --slave support. --- src/redis-cli.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 827e40611..3b3ef5044 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -64,6 +64,7 @@ static struct config { int latency_mode; int cluster_mode; int cluster_reissue_command; + int slave_mode; int stdinarg; /* get last arg from stdin. (-x option) */ char *auth; int raw_output; /* output mode per command */ @@ -606,6 +607,8 @@ static int parseOptions(int argc, char **argv) { config.raw_output = 1; } else if (!strcmp(argv[i],"--latency")) { config.latency_mode = 1; + } else if (!strcmp(argv[i],"--slave")) { + config.slave_mode = 1; } else if (!strcmp(argv[i],"--eval") && !lastarg) { config.eval = argv[++i]; } else if (!strcmp(argv[i],"-c")) { @@ -661,6 +664,7 @@ static void usage() { " -c Enable cluster mode (follow -ASK and -MOVED redirections)\n" " --raw Use raw formatting for replies (default when STDOUT is not a tty)\n" " --latency Enter a special mode continuously sampling latency.\n" +" --slave Simulate a slave showing commands received from the master.\n" " --eval Send an EVAL command using the Lua script at .\n" " --help Output this help and exit\n" " --version Output version and exit\n" @@ -866,6 +870,51 @@ static void latencyMode(void) { } } +static void slaveMode(void) { + /* To start we need to send the SYNC command and return the payload. + * The hiredis client lib does not understand this part of the protocol + * and we don't want to mess with its buffers, so everything is performed + * using direct low-level I/O. */ + int fd = context->fd; + char buf[1024], *p; + ssize_t nread; + unsigned long long payload; + + /* Send the SYNC command. */ + write(fd,"SYNC\r\n",6); + + /* Read $\r\n, making sure to read just up to "\n" */ + p = buf; + while(1) { + nread = read(fd,p,1); + if (nread <= 0) { + fprintf(stderr,"Error reading bulk length while SYNCing\n"); + exit(1); + } + if (*p == '\n') break; + p++; + } + *p = '\0'; + payload = strtoull(buf+1,NULL,10); + if (!config.raw_output) + printf("SYNC with master, discarding %lld bytes of bulk tranfer...\n", + payload); + + /* Discard the payload. */ + while(payload) { + nread = read(fd,buf,(payload > sizeof(buf)) ? sizeof(buf) : payload); + if (nread <= 0) { + fprintf(stderr,"Error reading RDB payload while SYNCing\n"); + exit(1); + } + payload -= nread; + } + if (!config.raw_output) printf("SYNC done. Logging commands from master.\n"); + + /* Now we can use the hiredis to read the incoming protocol. */ + while (cliReadReply(config.raw_output) == REDIS_OK); +} + int main(int argc, char **argv) { int firstarg; @@ -898,6 +947,12 @@ int main(int argc, char **argv) { latencyMode(); } + /* Start in slave mode if appropriate */ + if (config.slave_mode) { + cliConnect(0); + slaveMode(); + } + /* Start interactive mode when no command is provided */ if (argc == 0 && !config.eval) { /* Note that in repl mode we don't abort on connection error. From caa58bc830dae5ea586a610506492caa6d4f7cc8 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 17:43:03 +0100 Subject: [PATCH 16/70] redis-cli: CSV output added, used for the --slave mode. --- src/redis-cli.c | 68 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 3b3ef5044..c631aa791 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -49,6 +49,10 @@ #define REDIS_NOTUSED(V) ((void) V) +#define OUTPUT_STANDARD 0 +#define OUTPUT_RAW 1 +#define OUTPUT_CSV 2 + static redisContext *context; static struct config { char *hostip; @@ -67,7 +71,7 @@ static struct config { int slave_mode; int stdinarg; /* get last arg from stdin. (-x option) */ char *auth; - int raw_output; /* output mode per command */ + int output; /* output mode, see OUTPUT_* defines */ sds mb_delim; char prompt[128]; char *eval; @@ -435,6 +439,42 @@ static sds cliFormatReplyRaw(redisReply *r) { return out; } +static sds cliFormatReplyCSV(redisReply *r) { + unsigned int i; + + sds out = sdsempty(); + switch (r->type) { + case REDIS_REPLY_ERROR: + out = sdscat(out,"ERROR,"); + out = sdscatrepr(out,r->str,strlen(r->str)); + break; + case REDIS_REPLY_STATUS: + out = sdscatrepr(out,r->str,r->len); + break; + case REDIS_REPLY_INTEGER: + out = sdscatprintf(out,"%lld",r->integer); + break; + case REDIS_REPLY_STRING: + out = sdscatrepr(out,r->str,r->len); + break; + case REDIS_REPLY_NIL: + out = sdscat(out,"NIL\n"); + break; + case REDIS_REPLY_ARRAY: + for (i = 0; i < r->elements; i++) { + sds tmp = cliFormatReplyCSV(r->element[i]); + out = sdscatlen(out,tmp,sdslen(tmp)); + if (i != r->elements-1) out = sdscat(out,","); + sdsfree(tmp); + } + break; + default: + fprintf(stderr,"Unknown reply type: %d\n", r->type); + exit(1); + } + return out; +} + static int cliReadReply(int output_raw_strings) { void *_reply; redisReply *reply; @@ -490,11 +530,14 @@ static int cliReadReply(int output_raw_strings) { if (output_raw_strings) { out = cliFormatReplyRaw(reply); } else { - if (config.raw_output) { + if (config.output == OUTPUT_RAW) { out = cliFormatReplyRaw(reply); out = sdscat(out,"\n"); - } else { + } else if (config.output == OUTPUT_STANDARD) { out = cliFormatReplyTTY(reply,""); + } else if (config.output == OUTPUT_CSV) { + out = cliFormatReplyCSV(reply); + out = sdscat(out,"\n"); } } fwrite(out,sdslen(out),1,stdout); @@ -546,7 +589,7 @@ static int cliSendCommand(int argc, char **argv, int repeat) { } if (config.pubsub_mode) { - if (!config.raw_output) + if (config.output != OUTPUT_RAW) printf("Reading messages... (press Ctrl-C to quit)\n"); while (1) { if (cliReadReply(output_raw) != REDIS_OK) exit(1); @@ -604,7 +647,9 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-a") && !lastarg) { config.auth = argv[++i]; } else if (!strcmp(argv[i],"--raw")) { - config.raw_output = 1; + config.output = OUTPUT_RAW; + } else if (!strcmp(argv[i],"--csv")) { + config.output = OUTPUT_CSV; } else if (!strcmp(argv[i],"--latency")) { config.latency_mode = 1; } else if (!strcmp(argv[i],"--slave")) { @@ -896,8 +941,7 @@ static void slaveMode(void) { } *p = '\0'; payload = strtoull(buf+1,NULL,10); - if (!config.raw_output) - printf("SYNC with master, discarding %lld bytes of bulk tranfer...\n", + fprintf(stderr,"SYNC with master, discarding %lld bytes of bulk tranfer...\n", payload); /* Discard the payload. */ @@ -909,10 +953,11 @@ static void slaveMode(void) { } payload -= nread; } - if (!config.raw_output) printf("SYNC done. Logging commands from master.\n"); + fprintf(stderr,"SYNC done. Logging commands from master.\n"); /* Now we can use the hiredis to read the incoming protocol. */ - while (cliReadReply(config.raw_output) == REDIS_OK); + config.output = OUTPUT_CSV; + while (cliReadReply(0) == REDIS_OK); } int main(int argc, char **argv) { @@ -933,7 +978,10 @@ int main(int argc, char **argv) { config.stdinarg = 0; config.auth = NULL; config.eval = NULL; - config.raw_output = !isatty(fileno(stdout)) && (getenv("FAKETTY") == NULL); + if (!isatty(fileno(stdout)) && (getenv("FAKETTY") == NULL)) + config.output = OUTPUT_RAW; + else + config.output = OUTPUT_STANDARD; config.mb_delim = sdsnew("\n"); cliInitHelp(); From ebf37c3156ff98470d739880b8bb0a7466d2cd93 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Mar 2012 10:38:01 +0100 Subject: [PATCH 17/70] TIME command. --- src/redis.c | 14 +++++++++++++- src/redis.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/redis.c b/src/redis.c index 64df9073b..452c3bb75 100644 --- a/src/redis.c +++ b/src/redis.c @@ -242,7 +242,8 @@ struct redisCommand redisCommandTable[] = { {"eval",evalCommand,-3,"wms",0,zunionInterGetKeys,0,0,0,0,0}, {"evalsha",evalShaCommand,-3,"wms",0,zunionInterGetKeys,0,0,0,0,0}, {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0}, - {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0} + {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0}, + {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0} }; /*============================ Utility functions ============================ */ @@ -1505,6 +1506,17 @@ void echoCommand(redisClient *c) { addReplyBulk(c,c->argv[1]); } +void timeCommand(redisClient *c) { + struct timeval tv; + + /* gettimeofday() can only fail if &tv is a bad addresss so we + * don't check for errors. */ + gettimeofday(&tv,NULL); + addReplyMultiBulkLen(c,2); + addReplyBulkLongLong(c,tv.tv_sec); + addReplyBulkLongLong(c,tv.tv_usec); +} + /* Convert an amount of bytes into a human readable string in the form * of 100B, 2G, 100M, 4K, and so forth. */ void bytesToHuman(char *s, unsigned long long n) { diff --git a/src/redis.h b/src/redis.h index cccdcc81b..870439d7b 100644 --- a/src/redis.h +++ b/src/redis.h @@ -1218,6 +1218,7 @@ void clientCommand(redisClient *c); void evalCommand(redisClient *c); void evalShaCommand(redisClient *c); void scriptCommand(redisClient *c); +void timeCommand(redisClient *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); From fcc81cb320be641a2dfe1eb845836f8e92ee7eee Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Mar 2012 11:30:30 +0100 Subject: [PATCH 18/70] anetPeerToString() automatically populates ip/port with something that may be provided to the user as output in case of errors. --- src/anet.c | 7 ++++++- src/networking.c | 6 +----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/anet.c b/src/anet.c index 9aff4dfa1..ba4e6cce8 100644 --- a/src/anet.c +++ b/src/anet.c @@ -353,7 +353,12 @@ int anetPeerToString(int fd, char *ip, int *port) { struct sockaddr_in sa; socklen_t salen = sizeof(sa); - if (getpeername(fd,(struct sockaddr*)&sa,&salen) == -1) return -1; + if (getpeername(fd,(struct sockaddr*)&sa,&salen) == -1) { + *port = 0; + ip[0] = '?'; + ip[1] = '\0'; + return -1; + } if (ip) strcpy(ip,inet_ntoa(sa.sin_addr)); if (port) *port = ntohs(sa.sin_port); return 0; diff --git a/src/networking.c b/src/networking.c index d8fb81320..40aad8360 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1087,11 +1087,7 @@ sds getClientInfoString(redisClient *client) { time_t now = time(NULL); int emask; - if (anetPeerToString(client->fd,ip,&port) == -1) { - ip[0] = '?'; - ip[1] = '\0'; - port = 0; - } + anetPeerToString(client->fd,ip,&port); p = flags; if (client->flags & REDIS_SLAVE) { if (client->flags & REDIS_MONITOR) From 8c0915285d2dd0f74db6f571b428ee17f9005774 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Mar 2012 12:12:15 +0100 Subject: [PATCH 19/70] Better MONITOR output, now includes client ip:port or the lua string if the command was executed by the scripting engine. --- src/redis.c | 2 +- src/redis.h | 2 +- src/replication.c | 12 +++++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/redis.c b/src/redis.c index 452c3bb75..6d5522e82 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1248,7 +1248,7 @@ void call(redisClient *c, int flags) { /* Sent the command to clients in MONITOR mode, only if the commands are * not geneated from reading an AOF. */ if (listLength(server.monitors) && !server.loading) - replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); + replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc); /* Call the command. */ redisOpArrayInit(&server.also_propagate); diff --git a/src/redis.h b/src/redis.h index 870439d7b..c18e91700 100644 --- a/src/redis.h +++ b/src/redis.h @@ -932,7 +932,7 @@ int syncReadLine(int fd, char *ptr, ssize_t size, int timeout); /* Replication */ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); -void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc); +void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **argv, int argc); void updateSlavesWaitingBgsave(int bgsaveerr); void replicationCron(void); diff --git a/src/replication.c b/src/replication.c index e82978395..6c0091e8c 100644 --- a/src/replication.c +++ b/src/replication.c @@ -50,17 +50,23 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { } } -void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc) { +void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **argv, int argc) { listNode *ln; listIter li; - int j; + int j, port; sds cmdrepr = sdsnew("+"); robj *cmdobj; + char ip[32]; struct timeval tv; gettimeofday(&tv,NULL); cmdrepr = sdscatprintf(cmdrepr,"%ld.%06ld ",(long)tv.tv_sec,(long)tv.tv_usec); - if (dictid != 0) cmdrepr = sdscatprintf(cmdrepr,"(db %d) ", dictid); + if (c->flags & REDIS_LUA_CLIENT) { + cmdrepr = sdscatprintf(cmdrepr,"[%d lua] ", dictid); + } else { + anetPeerToString(c->fd,ip,&port); + cmdrepr = sdscatprintf(cmdrepr,"[%d %s:%d] ", dictid,ip,port); + } for (j = 0; j < argc; j++) { if (argv[j]->encoding == REDIS_ENCODING_INT) { From a380c4ad9cfc2767785e71fc41d17f37afc15e23 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Mar 2012 13:05:46 +0100 Subject: [PATCH 20/70] Refuse writes if can't persist on disk. Redis now refuses accepting write queries if RDB persistence is configured, but RDB snapshots can't be generated for some reason. The status of the latest background save operation is now exposed in the INFO output as well. This fixes issue #90. --- src/rdb.c | 4 ++++ src/redis.c | 13 +++++++++++++ src/redis.h | 9 +++++---- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 3a10e0cec..840e99137 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -656,6 +656,7 @@ int rdbSave(char *filename) { redisLog(REDIS_NOTICE,"DB saved on disk"); server.dirty = 0; server.lastsave = time(NULL); + server.lastbgsave_status = REDIS_OK; return REDIS_OK; werr: @@ -1061,12 +1062,15 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) { "Background saving terminated with success"); server.dirty = server.dirty - server.dirty_before_bgsave; server.lastsave = time(NULL); + server.lastbgsave_status = REDIS_OK; } else if (!bysignal && exitcode != 0) { redisLog(REDIS_WARNING, "Background saving error"); + server.lastbgsave_status = REDIS_ERR; } else { redisLog(REDIS_WARNING, "Background saving terminated by signal %d", bysignal); rdbRemoveTempFile(server.rdb_child_pid); + server.lastbgsave_status = REDIS_ERR; } server.rdb_child_pid = -1; /* Possibly there are slaves waiting for a BGSAVE in order to be served diff --git a/src/redis.c b/src/redis.c index 6d5522e82..3dfef024f 100644 --- a/src/redis.c +++ b/src/redis.c @@ -833,6 +833,8 @@ void createSharedObjects(void) { "-LOADING Redis is loading the dataset in memory\r\n")); shared.slowscripterr = createObject(REDIS_STRING,sdsnew( "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n")); + shared.bgsaveerr = createObject(REDIS_STRING,sdsnew( + "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Write commands are disabled. Please check Redis logs for details about the error.\r\n")); shared.space = createObject(REDIS_STRING,sdsnew(" ")); shared.colon = createObject(REDIS_STRING,sdsnew(":")); shared.plus = createObject(REDIS_STRING,sdsnew("+")); @@ -1088,6 +1090,7 @@ void initServer() { server.stat_fork_time = 0; server.stat_rejected_conn = 0; server.unixtime = time(NULL); + server.lastbgsave_status = REDIS_OK; aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL); if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE, acceptTcpHandler,NULL) == AE_ERR) oom("creating file event"); @@ -1374,6 +1377,14 @@ int processCommand(redisClient *c) { } } + /* Don't accept write commands if there are problems persisting on disk. */ + if (server.saveparamslen > 0 && server.lastbgsave_status == REDIS_ERR && + c->cmd->flags & REDIS_CMD_WRITE) + { + addReply(c, shared.bgsaveerr); + return REDIS_OK; + } + /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0) && @@ -1645,12 +1656,14 @@ sds genRedisInfoString(char *section) { "changes_since_last_save:%lld\r\n" "bgsave_in_progress:%d\r\n" "last_save_time:%ld\r\n" + "last_bgsave_status:%s\r\n" "bgrewriteaof_in_progress:%d\r\n", server.loading, server.aof_state != REDIS_AOF_OFF, server.dirty, server.rdb_child_pid != -1, server.lastsave, + server.lastbgsave_status == REDIS_OK ? "ok" : "err", server.aof_child_pid != -1); if (server.aof_state != REDIS_AOF_OFF) { diff --git a/src/redis.h b/src/redis.h index c18e91700..daaf362fb 100644 --- a/src/redis.h +++ b/src/redis.h @@ -361,8 +361,8 @@ struct sharedObjectsStruct { robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space, *colon, *nullbulk, *nullmultibulk, *queued, *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr, - *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *plus, - *select0, *select1, *select2, *select3, *select4, + *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr, + *plus, *select0, *select1, *select2, *select3, *select4, *select5, *select6, *select7, *select8, *select9, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *rpop, *lpop, @@ -567,6 +567,7 @@ struct redisServer { char *requirepass; /* Pass for AUTH command, or NULL */ char *pidfile; /* PID file path */ int arch_bits; /* 32 or 64 depending on sizeof(long) */ + int cronloops; /* Number of times the cron function run */ /* Networking */ int port; /* TCP listening port */ char *bindaddr; /* Bind address or NULL */ @@ -587,8 +588,6 @@ struct redisServer { time_t loading_start_time; /* Fast pointers to often looked up command */ struct redisCommand *delCommand, *multiCommand, *lpushCommand; - int cronloops; /* Number of times the cron function run */ - time_t lastsave; /* Unix time of last save succeeede */ /* Fields used only for stats */ time_t stat_starttime; /* Server start time */ long long stat_numcommands; /* Number of processed commands */ @@ -636,6 +635,8 @@ struct redisServer { int saveparamslen; /* Number of saving points */ char *rdb_filename; /* Name of RDB file */ int rdb_compression; /* Use compression in RDB? */ + time_t lastsave; /* Unix time of last save succeeede */ + int lastbgsave_status; /* REDIS_OK or REDIS_ERR */ /* Propagation of commands in AOF / replication */ redisOpArray also_propagate; /* Additional command to propagate. */ /* Logging */ From bab3ce5a09b0b70f275a5bf490d07cd8be5dd094 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Mar 2012 18:02:26 +0100 Subject: [PATCH 21/70] By default Redis refuses writes with an error if the latest BGSAVE failed (and at least one save point is configured). However people having good monitoring systems may prefer a server that continues to work, since they are notified that there are problems by their monitoring systems. This commit implements the ability to turn the feature on or off via redis.conf and CONFIG SET. --- redis.conf | 15 +++++++++++++++ src/config.c | 14 ++++++++++++++ src/redis.c | 5 ++++- src/redis.h | 1 + 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/redis.conf b/redis.conf index 4760f2914..2b4b64796 100644 --- a/redis.conf +++ b/redis.conf @@ -93,6 +93,21 @@ save 900 1 save 300 10 save 60 10000 +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in an hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# distater will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usually even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + # Compress string objects using LZF when dump .rdb databases? # For default that's set to 'yes' as it's almost always a win. # If you want to save some CPU in the saving child set it to 'no' but diff --git a/src/config.c b/src/config.c index 9d28d6780..6befca8ff 100644 --- a/src/config.c +++ b/src/config.c @@ -336,6 +336,11 @@ void loadServerConfigFromString(char *config) { server.client_obuf_limits[class].hard_limit_bytes = hard; server.client_obuf_limits[class].soft_limit_bytes = soft; server.client_obuf_limits[class].soft_limit_seconds = soft_seconds; + } else if (!strcasecmp(argv[0],"stop-writes-on-bgsave-error") && + argc == 2) { + if ((server.stop_writes_on_bgsave_err = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else { err = "Bad directive or wrong number of arguments"; goto loaderr; } @@ -604,7 +609,11 @@ void configSetCommand(redisClient *c) { server.client_obuf_limits[class].soft_limit_seconds = soft_seconds; } sdsfreesplitres(v,vlen); + } else if (!strcasecmp(c->argv[2]->ptr,"stop-writes-on-bgsave-error")) { + int yn = yesnotoi(o->ptr); + if (yn == -1) goto badfmt; + server.stop_writes_on_bgsave_err = yn; } else { addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s", (char*)c->argv[2]->ptr); @@ -822,6 +831,11 @@ void configGetCommand(redisClient *c) { sdsfree(buf); matches++; } + if (stringmatch(pattern,"stop-writes-on-bgsave-error",0)) { + addReplyBulkCString(c,"stop-writes-on-bgsave-error"); + addReplyBulkCString(c,server.stop_writes_on_bgsave_err ? "yes" : "no"); + matches++; + } setDeferredMultiBulkLength(c,replylen,matches*2); } diff --git a/src/redis.c b/src/redis.c index 3dfef024f..01ec0531e 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1091,6 +1091,7 @@ void initServer() { server.stat_rejected_conn = 0; server.unixtime = time(NULL); server.lastbgsave_status = REDIS_OK; + server.stop_writes_on_bgsave_err = 1; aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL); if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE, acceptTcpHandler,NULL) == AE_ERR) oom("creating file event"); @@ -1378,7 +1379,9 @@ int processCommand(redisClient *c) { } /* Don't accept write commands if there are problems persisting on disk. */ - if (server.saveparamslen > 0 && server.lastbgsave_status == REDIS_ERR && + if (server.stop_writes_on_bgsave_err && + server.saveparamslen > 0 + && server.lastbgsave_status == REDIS_ERR && c->cmd->flags & REDIS_CMD_WRITE) { addReply(c, shared.bgsaveerr); diff --git a/src/redis.h b/src/redis.h index daaf362fb..fdf7f08d3 100644 --- a/src/redis.h +++ b/src/redis.h @@ -637,6 +637,7 @@ struct redisServer { int rdb_compression; /* Use compression in RDB? */ time_t lastsave; /* Unix time of last save succeeede */ int lastbgsave_status; /* REDIS_OK or REDIS_ERR */ + int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */ /* Propagation of commands in AOF / replication */ redisOpArray also_propagate; /* Additional command to propagate. */ /* Logging */ From d05dba2e541b10514f3541f9d8b16cf0ab74bd4a Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 8 Mar 2012 10:08:44 +0100 Subject: [PATCH 22/70] clusterGetRandomName() generalized into getRandomHexChars() so that we can use it for the run_id field as well. --- src/cluster.c | 16 +--------------- src/redis.h | 3 +++ src/util.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 85cb11981..f76e8ff5c 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -19,20 +19,6 @@ int clusterAddSlot(clusterNode *n, int slot); * Initialization * -------------------------------------------------------------------------- */ -void clusterGetRandomName(char *p) { - FILE *fp = fopen("/dev/urandom","r"); - char *charset = "0123456789abcdef"; - int j; - - if (fp == NULL || fread(p,REDIS_CLUSTER_NAMELEN,1,fp) == 0) { - for (j = 0; j < REDIS_CLUSTER_NAMELEN; j++) - p[j] = rand(); - } - for (j = 0; j < REDIS_CLUSTER_NAMELEN; j++) - p[j] = charset[p[j] & 0x0F]; - fclose(fp); -} - int clusterLoadConfig(char *filename) { FILE *fp = fopen(filename,"r"); char *line; @@ -304,7 +290,7 @@ clusterNode *createClusterNode(char *nodename, int flags) { if (nodename) memcpy(node->name, nodename, REDIS_CLUSTER_NAMELEN); else - clusterGetRandomName(node->name); + getRandomHexChars(node->name, REDIS_CLUSTER_NAMELEN); node->flags = flags; memset(node->slots,0,sizeof(node->slots)); node->numslaves = 0; diff --git a/src/redis.h b/src/redis.h index fdf7f08d3..3ecedd4ca 100644 --- a/src/redis.h +++ b/src/redis.h @@ -58,6 +58,8 @@ #define REDIS_REPL_TIMEOUT 60 #define REDIS_REPL_PING_SLAVE_PERIOD 10 +#define REDIS_RUN_ID_SIZE 40 + /* Protocol and I/O related defines */ #define REDIS_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */ #define REDIS_IOBUF_LEN (1024*16) /* Generic I/O buffer size */ @@ -813,6 +815,7 @@ dictType hashDictType; /* Utils */ long long ustime(void); long long mstime(void); +void getRandomHexChars(char *p, unsigned int len); /* networking.c -- Networking and Client related operations */ redisClient *createClient(int fd); diff --git a/src/util.c b/src/util.c index f5a23af2a..b6ec2150d 100644 --- a/src/util.c +++ b/src/util.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include "util.h" @@ -327,6 +329,52 @@ int d2string(char *buf, size_t len, double value) { return len; } +/* Generate the Redis "Run ID", a SHA1-sized random number that identifies a + * given execution of Redis, so that if you are talking with an instance + * having run_id == A, and you reconnect and it has run_id == B, you can be + * sure that it is either a different instance or it was restarted. */ +void getRandomHexChars(char *p, unsigned int len) { + FILE *fp = fopen("/dev/urandom","r"); + char *charset = "0123456789abcdef"; + unsigned int j; + + if (fp == NULL || fread(p,len,1,fp) == 0) { + /* If we can't read from /dev/urandom, do some reasonable effort + * in order to create some entropy, since this function is used to + * generate run_id and cluster instance IDs */ + char *x = p; + unsigned int l = len; + struct timeval tv; + pid_t pid = getpid(); + + /* Use time and PID to fill the initial array. */ + gettimeofday(&tv,NULL); + if (l >= sizeof(tv.tv_usec)) { + memcpy(x,&tv.tv_usec,sizeof(tv.tv_usec)); + l -= sizeof(tv.tv_usec); + x += sizeof(tv.tv_usec); + } + if (l >= sizeof(tv.tv_sec)) { + memcpy(x,&tv.tv_sec,sizeof(tv.tv_sec)); + l -= sizeof(tv.tv_sec); + x += sizeof(tv.tv_sec); + } + if (l >= sizeof(pid)) { + memcpy(x,&pid,sizeof(pid)); + l -= sizeof(pid); + x += sizeof(pid); + } + /* Finally xor it with rand() output, that was already seeded with + * time() at startup. */ + for (j = 0; j < len; j++) + p[j] ^= rand(); + } + /* Turn it into hex digits taking just 4 bits out of 8 for every byte. */ + for (j = 0; j < len; j++) + p[j] = charset[p[j] & 0x0F]; + fclose(fp); +} + #ifdef UTIL_TEST_MAIN #include From 6c939b3752e0e3abb854c9ed94fcedc936819515 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 8 Mar 2012 10:13:12 +0100 Subject: [PATCH 23/70] run_id added to INFO output. The Run ID is a field that identifies a single execution of the Redis server. It can be useful for many purposes as it makes easy to detect if the instance we are talking about is the same, or if it is a different one or was rebooted. An application of run_id will be in the partial synchronization of replication, where a slave may request a partial sync from a given offset only if it is talking with the same master. Another application is in failover and monitoring scripts. --- src/redis.c | 4 ++++ src/redis.h | 1 + 2 files changed, 5 insertions(+) diff --git a/src/redis.c b/src/redis.c index 01ec0531e..acc01b4b7 100644 --- a/src/redis.c +++ b/src/redis.c @@ -870,6 +870,8 @@ void createSharedObjects(void) { } void initServerConfig() { + getRandomHexChars(server.runid,REDIS_RUN_ID_SIZE); + server.runid[REDIS_RUN_ID_SIZE] = '\0'; server.arch_bits = (sizeof(long) == 8) ? 64 : 32; server.port = REDIS_SERVERPORT; server.bindaddr = NULL; @@ -1585,6 +1587,7 @@ sds genRedisInfoString(char *section) { "multiplexing_api:%s\r\n" "gcc_version:%d.%d.%d\r\n" "process_id:%ld\r\n" + "run_id:%s\r\n" "tcp_port:%d\r\n" "uptime_in_seconds:%ld\r\n" "uptime_in_days:%ld\r\n" @@ -1600,6 +1603,7 @@ sds genRedisInfoString(char *section) { 0,0,0, #endif (long) getpid(), + server.runid, server.port, uptime, uptime/(3600*24), diff --git a/src/redis.h b/src/redis.h index 3ecedd4ca..ee1cef4c3 100644 --- a/src/redis.h +++ b/src/redis.h @@ -570,6 +570,7 @@ struct redisServer { char *pidfile; /* PID file path */ int arch_bits; /* 32 or 64 depending on sizeof(long) */ int cronloops; /* Number of times the cron function run */ + char runid[REDIS_RUN_ID_SIZE+1]; /* ID always different at every exec. */ /* Networking */ int port; /* TCP listening port */ char *bindaddr; /* Bind address or NULL */ From 407b0e0f48c05d97b5dbd15a4b3c8ffdadaa781a Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 8 Mar 2012 12:14:23 +0100 Subject: [PATCH 24/70] Support for all the redis.conf fields in CONFIG GET. config.c refactored a bit. --- src/config.c | 219 +++++++++++++++++++++++++-------------------------- 1 file changed, 108 insertions(+), 111 deletions(-) diff --git a/src/config.c b/src/config.c index 6befca8ff..8d8fcda74 100644 --- a/src/config.c +++ b/src/config.c @@ -614,6 +614,12 @@ void configSetCommand(redisClient *c) { if (yn == -1) goto badfmt; server.stop_writes_on_bgsave_err = yn; + } else if (!strcasecmp(c->argv[2]->ptr,"repl-ping-slave-period")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt; + server.repl_ping_slave_period = ll; + } else if (!strcasecmp(c->argv[2]->ptr,"repl-timeout")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt; + server.repl_timeout = ll; } else { addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s", (char*)c->argv[2]->ptr); @@ -628,6 +634,31 @@ badfmt: /* Bad format errors */ (char*)c->argv[2]->ptr); } +#define config_get_string_field(_name,_var) do { \ + if (stringmatch(pattern,_name,0)) { \ + addReplyBulkCString(c,_name); \ + addReplyBulkCString(c,_var ? _var : ""); \ + matches++; \ + } \ +} while(0); + +#define config_get_bool_field(_name,_var) do { \ + if (stringmatch(pattern,_name,0)) { \ + addReplyBulkCString(c,_name); \ + addReplyBulkCString(c,_var ? "yes" : "no"); \ + matches++; \ + } \ +} while(0); + +#define config_get_numerical_field(_name,_var) do { \ + if (stringmatch(pattern,_name,0)) { \ + ll2string(buf,sizeof(buf),_var); \ + addReplyBulkCString(c,_name); \ + addReplyBulkCString(c,buf); \ + matches++; \ + } \ +} while(0); + void configGetCommand(redisClient *c) { robj *o = c->argv[2]; void *replylen = addDeferredMultiBulkLength(c); @@ -636,6 +667,66 @@ void configGetCommand(redisClient *c) { int matches = 0; redisAssertWithInfo(c,o,o->encoding == REDIS_ENCODING_RAW); + /* String values */ + config_get_string_field("dbfilename",server.rdb_filename); + config_get_string_field("requirepass",server.requirepass); + config_get_string_field("masterauth",server.requirepass); + config_get_string_field("bind",server.bindaddr); + config_get_string_field("unixsocket",server.unixsocket); + config_get_string_field("logfile",server.logfile); + config_get_string_field("pidfile",server.pidfile); + + /* Numerical values */ + config_get_numerical_field("maxmemory",server.maxmemory); + config_get_numerical_field("maxmemory-samples",server.maxmemory_samples); + config_get_numerical_field("timeout",server.maxidletime); + config_get_numerical_field("auto-aof-rewrite-percentage", + server.aof_rewrite_perc); + config_get_numerical_field("auto-aof-rewrite-min-size", + server.aof_rewrite_min_size); + config_get_numerical_field("hash-max-zipmap-entries", + server.hash_max_zipmap_entries); + config_get_numerical_field("hash-max-zipmap-value", + server.hash_max_zipmap_value); + config_get_numerical_field("list-max-ziplist-entries", + server.list_max_ziplist_entries); + config_get_numerical_field("list-max-ziplist-value", + server.list_max_ziplist_value); + config_get_numerical_field("set-max-intset-entries", + server.set_max_intset_entries); + config_get_numerical_field("zset-max-ziplist-entries", + server.zset_max_ziplist_entries); + config_get_numerical_field("zset-max-ziplist-value", + server.zset_max_ziplist_value); + config_get_numerical_field("lua-time-limit",server.lua_time_limit); + config_get_numerical_field("slowlog-log-slower-than", + server.slowlog_log_slower_than); + config_get_numerical_field("slowlog-max-len", + server.slowlog_max_len); + config_get_numerical_field("port",server.port); + config_get_numerical_field("databases",server.dbnum); + config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period); + config_get_numerical_field("repl-timeout",server.repl_timeout); + config_get_numerical_field("maxclients",server.maxclients); + + /* Bool (yes/no) values */ + config_get_bool_field("no-appendfsync-on-rewrite", + server.aof_no_fsync_on_rewrite); + config_get_bool_field("slave-serve-stale-data", + server.repl_serve_stale_data); + config_get_bool_field("stop-writes-on-bgsave-error", + server.stop_writes_on_bgsave_err); + config_get_bool_field("daemonize", server.daemonize); + config_get_bool_field("rdbcompression", server.rdb_compression); + config_get_bool_field("activerehashing", server.activerehashing); + + /* Everything we can't handle with macros follows. */ + + if (stringmatch(pattern,"appendonly",0)) { + addReplyBulkCString(c,"appendonly"); + addReplyBulkCString(c,server.aof_state == REDIS_AOF_OFF ? "no" : "yes"); + matches++; + } if (stringmatch(pattern,"dir",0)) { char buf[1024]; @@ -646,27 +737,6 @@ void configGetCommand(redisClient *c) { addReplyBulkCString(c,buf); matches++; } - if (stringmatch(pattern,"dbfilename",0)) { - addReplyBulkCString(c,"dbfilename"); - addReplyBulkCString(c,server.rdb_filename); - matches++; - } - if (stringmatch(pattern,"requirepass",0)) { - addReplyBulkCString(c,"requirepass"); - addReplyBulkCString(c,server.requirepass); - matches++; - } - if (stringmatch(pattern,"masterauth",0)) { - addReplyBulkCString(c,"masterauth"); - addReplyBulkCString(c,server.masterauth); - matches++; - } - if (stringmatch(pattern,"maxmemory",0)) { - ll2string(buf,sizeof(buf),server.maxmemory); - addReplyBulkCString(c,"maxmemory"); - addReplyBulkCString(c,buf); - matches++; - } if (stringmatch(pattern,"maxmemory-policy",0)) { char *s; @@ -683,28 +753,6 @@ void configGetCommand(redisClient *c) { addReplyBulkCString(c,s); matches++; } - if (stringmatch(pattern,"maxmemory-samples",0)) { - ll2string(buf,sizeof(buf),server.maxmemory_samples); - addReplyBulkCString(c,"maxmemory-samples"); - addReplyBulkCString(c,buf); - matches++; - } - if (stringmatch(pattern,"timeout",0)) { - ll2string(buf,sizeof(buf),server.maxidletime); - addReplyBulkCString(c,"timeout"); - addReplyBulkCString(c,buf); - matches++; - } - if (stringmatch(pattern,"appendonly",0)) { - addReplyBulkCString(c,"appendonly"); - addReplyBulkCString(c,server.aof_state == REDIS_AOF_OFF ? "no" : "yes"); - matches++; - } - if (stringmatch(pattern,"no-appendfsync-on-rewrite",0)) { - addReplyBulkCString(c,"no-appendfsync-on-rewrite"); - addReplyBulkCString(c,server.aof_no_fsync_on_rewrite ? "yes" : "no"); - matches++; - } if (stringmatch(pattern,"appendfsync",0)) { char *policy; @@ -734,71 +782,6 @@ void configGetCommand(redisClient *c) { sdsfree(buf); matches++; } - if (stringmatch(pattern,"auto-aof-rewrite-percentage",0)) { - addReplyBulkCString(c,"auto-aof-rewrite-percentage"); - addReplyBulkLongLong(c,server.aof_rewrite_perc); - matches++; - } - if (stringmatch(pattern,"auto-aof-rewrite-min-size",0)) { - addReplyBulkCString(c,"auto-aof-rewrite-min-size"); - addReplyBulkLongLong(c,server.aof_rewrite_min_size); - matches++; - } - if (stringmatch(pattern,"slave-serve-stale-data",0)) { - addReplyBulkCString(c,"slave-serve-stale-data"); - addReplyBulkCString(c,server.repl_serve_stale_data ? "yes" : "no"); - matches++; - } - if (stringmatch(pattern,"hash-max-zipmap-entries",0)) { - addReplyBulkCString(c,"hash-max-zipmap-entries"); - addReplyBulkLongLong(c,server.hash_max_zipmap_entries); - matches++; - } - if (stringmatch(pattern,"hash-max-zipmap-value",0)) { - addReplyBulkCString(c,"hash-max-zipmap-value"); - addReplyBulkLongLong(c,server.hash_max_zipmap_value); - matches++; - } - if (stringmatch(pattern,"list-max-ziplist-entries",0)) { - addReplyBulkCString(c,"list-max-ziplist-entries"); - addReplyBulkLongLong(c,server.list_max_ziplist_entries); - matches++; - } - if (stringmatch(pattern,"list-max-ziplist-value",0)) { - addReplyBulkCString(c,"list-max-ziplist-value"); - addReplyBulkLongLong(c,server.list_max_ziplist_value); - matches++; - } - if (stringmatch(pattern,"set-max-intset-entries",0)) { - addReplyBulkCString(c,"set-max-intset-entries"); - addReplyBulkLongLong(c,server.set_max_intset_entries); - matches++; - } - if (stringmatch(pattern,"zset-max-ziplist-entries",0)) { - addReplyBulkCString(c,"zset-max-ziplist-entries"); - addReplyBulkLongLong(c,server.zset_max_ziplist_entries); - matches++; - } - if (stringmatch(pattern,"zset-max-ziplist-value",0)) { - addReplyBulkCString(c,"zset-max-ziplist-value"); - addReplyBulkLongLong(c,server.zset_max_ziplist_value); - matches++; - } - if (stringmatch(pattern,"lua-time-limit",0)) { - addReplyBulkCString(c,"lua-time-limit"); - addReplyBulkLongLong(c,server.lua_time_limit); - matches++; - } - if (stringmatch(pattern,"slowlog-log-slower-than",0)) { - addReplyBulkCString(c,"slowlog-log-slower-than"); - addReplyBulkLongLong(c,server.slowlog_log_slower_than); - matches++; - } - if (stringmatch(pattern,"slowlog-max-len",0)) { - addReplyBulkCString(c,"slowlog-max-len"); - addReplyBulkLongLong(c,server.slowlog_max_len); - matches++; - } if (stringmatch(pattern,"loglevel",0)) { char *s; @@ -831,9 +814,23 @@ void configGetCommand(redisClient *c) { sdsfree(buf); matches++; } - if (stringmatch(pattern,"stop-writes-on-bgsave-error",0)) { - addReplyBulkCString(c,"stop-writes-on-bgsave-error"); - addReplyBulkCString(c,server.stop_writes_on_bgsave_err ? "yes" : "no"); + if (stringmatch(pattern,"unixsocketperm",0)) { + char buf[32]; + snprintf(buf,sizeof(buf),"%o",server.unixsocketperm); + addReplyBulkCString(c,"unixsocketperm"); + addReplyBulkCString(c,buf); + matches++; + } + if (stringmatch(pattern,"slaveof",0)) { + char buf[256]; + + addReplyBulkCString(c,"slaveof"); + if (server.masterhost) + snprintf(buf,sizeof(buf),"%s %d", + server.masterhost, server.masterport); + else + buf[0] = '\0'; + addReplyBulkCString(c,buf); matches++; } setDeferredMultiBulkLength(c,replylen,matches*2); From 4002005dc45e0c8868226e4cf0698bf77ab3bd55 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 8 Mar 2012 16:15:37 +0100 Subject: [PATCH 25/70] Instantaneous ops/sec figure in INFO output. --- src/redis.c | 33 +++++++++++++++++++++++++++++++++ src/redis.h | 7 +++++++ 2 files changed, 40 insertions(+) diff --git a/src/redis.c b/src/redis.c index acc01b4b7..03dc2ed1e 100644 --- a/src/redis.c +++ b/src/redis.c @@ -616,6 +616,31 @@ void updateLRUClock(void) { REDIS_LRU_CLOCK_MAX; } + +/* Add a sample to the operations per second array of samples. */ +void trackOperationsPerSecond(void) { + long long t = mstime() - server.ops_sec_last_sample_time; + long long ops = server.stat_numcommands - server.ops_sec_last_sample_ops; + long long ops_sec; + + ops_sec = t > 0 ? (ops*1000/t) : 0; + + server.ops_sec_samples[server.ops_sec_idx] = ops_sec; + server.ops_sec_idx = (server.ops_sec_idx+1) % REDIS_OPS_SEC_SAMPLES; + server.ops_sec_last_sample_time = mstime(); + server.ops_sec_last_sample_ops = server.stat_numcommands; +} + +/* Return the mean of all the samples. */ +long long getOperationsPerSecond(void) { + int j; + long long sum = 0; + + for (j = 0; j < REDIS_OPS_SEC_SAMPLES; j++) + sum += server.ops_sec_samples[j]; + return sum / REDIS_OPS_SEC_SAMPLES; +} + int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { int j, loops = server.cronloops; REDIS_NOTUSED(eventLoop); @@ -628,6 +653,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { * To access a global var is faster than calling time(NULL) */ server.unixtime = time(NULL); + trackOperationsPerSecond(); + /* We have just 22 bits per object for LRU information. * So we use an (eventually wrapping) LRU clock with 10 seconds resolution. * 2^22 bits with 10 seconds resoluton is more or less 1.5 years. @@ -1091,6 +1118,10 @@ void initServer() { server.stat_peak_memory = 0; server.stat_fork_time = 0; server.stat_rejected_conn = 0; + memset(server.ops_sec_samples,0,sizeof(server.ops_sec_samples)); + server.ops_sec_idx = 0; + server.ops_sec_last_sample_time = mstime(); + server.ops_sec_last_sample_ops = 0; server.unixtime = time(NULL); server.lastbgsave_status = REDIS_OK; server.stop_writes_on_bgsave_err = 1; @@ -1726,6 +1757,7 @@ sds genRedisInfoString(char *section) { "# Stats\r\n" "total_connections_received:%lld\r\n" "total_commands_processed:%lld\r\n" + "instantaneous_ops_per_sec:%lld\r\n" "rejected_connections:%lld\r\n" "expired_keys:%lld\r\n" "evicted_keys:%lld\r\n" @@ -1736,6 +1768,7 @@ sds genRedisInfoString(char *section) { "latest_fork_usec:%lld\r\n", server.stat_numconnections, server.stat_numcommands, + getOperationsPerSecond(), server.stat_rejected_conn, server.stat_expiredkeys, server.stat_evictedkeys, diff --git a/src/redis.h b/src/redis.h index ee1cef4c3..8d492596c 100644 --- a/src/redis.h +++ b/src/redis.h @@ -59,6 +59,7 @@ #define REDIS_REPL_PING_SLAVE_PERIOD 10 #define REDIS_RUN_ID_SIZE 40 +#define REDIS_OPS_SEC_SAMPLES 16 /* Protocol and I/O related defines */ #define REDIS_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */ @@ -606,6 +607,12 @@ struct redisServer { long long slowlog_entry_id; /* SLOWLOG current entry ID */ long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */ unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */ + /* The following two are used to track instantaneous "load" in terms + * of operations per second. */ + long long ops_sec_last_sample_time; /* Timestamp of last sample (in ms) */ + long long ops_sec_last_sample_ops; /* numcommands in last sample */ + long long ops_sec_samples[REDIS_OPS_SEC_SAMPLES]; + int ops_sec_idx; /* Configuration */ int verbosity; /* Loglevel in redis.conf */ int maxidletime; /* Client timeout in seconds */ From 656b852dc44688cddce1afe7ffdd63fd08ec0ba8 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 10:36:51 +0100 Subject: [PATCH 26/70] Added a top-function comment to rioWriteHashIteratorCursor() to better specify what the function does. Not immediately clear from the name. --- src/aof.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/aof.c b/src/aof.c index 03e324914..64cd76d3d 100644 --- a/src/aof.c +++ b/src/aof.c @@ -609,6 +609,12 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) { return 1; } +/* Write either the key or the value of the currently selected item of an hash. + * The 'hi' argument passes a valid Redis hash iterator. + * The 'what' filed specifies if to write a key or a value and can be + * either REDIS_HASH_KEY or REDIS_HASH_VALUE. + * + * The function returns 0 on error, non-zero on success. */ static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) { if (hi->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *vstr = NULL; From 79370acd45678d529bc790d73f4b76275187beb6 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 10:38:53 +0100 Subject: [PATCH 27/70] Removed handling of deprecated hash-max-zipmap-entries nad hash-map-zipmap-value. Pieter is too good with users ;). Better to have them switch to a saner configuration ASAP after the 2.6 upgrade. --- 00-RELEASENOTES | 6 ++++++ src/config.c | 8 -------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/00-RELEASENOTES b/00-RELEASENOTES index 49221f2c0..8a225b717 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -22,6 +22,12 @@ but it is better to keep them in mind: starting with '#'. All the major clients should be already fixed to work with the new INFO format. +Also the following redis.conf and CONFIG GET / SET parameters changed name: + + * hash-max-zipmap-entries, now replaced by hash-max-ziplist-entries + * hash-max-zipmap-value, now replaced by hash-max-ziplist-value + * glueoutputbuf was no completely removed as it does not make sense + --------- CHANGELOG --------- diff --git a/src/config.c b/src/config.c index d84cd474d..533a2a572 100644 --- a/src/config.c +++ b/src/config.c @@ -202,8 +202,6 @@ void loadServerConfigFromString(char *config) { if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } - } else if (!strcasecmp(argv[0],"glueoutputbuf")) { - redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]); } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) { if ((server.rdb_compression = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -262,12 +260,6 @@ void loadServerConfigFromString(char *config) { } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) { zfree(server.rdb_filename); server.rdb_filename = zstrdup(argv[1]); - } else if (!strcasecmp(argv[0],"hash-max-zipmap-entries") && argc == 2) { - redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]); - server.hash_max_ziplist_entries = memtoll(argv[1], NULL); - } else if (!strcasecmp(argv[0],"hash-max-zipmap-value") && argc == 2) { - redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]); - server.hash_max_ziplist_value = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"hash-max-ziplist-entries") && argc == 2) { server.hash_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) { From b626635f39836c51c5e8ce671b030214f8a7242e Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 11:09:43 +0100 Subject: [PATCH 28/70] Minor code aesthetic change to use Redis code base style rule of saving vertical space when possible. --- src/t_hash.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index f0ecefc32..b09ff41ea 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -75,10 +75,7 @@ int hashTypeGetFromHashTable(robj *o, robj *field, robj **value) { redisAssert(o->encoding == REDIS_ENCODING_HT); de = dictFind(o->ptr, field); - if (de == NULL) { - return -1; - } - + if (de == NULL) return -1; *value = dictGetVal(de); return 0; } From 41f3921a226b7a25110449cba6cdda396d360daf Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 11:11:28 +0100 Subject: [PATCH 29/70] More vertical space saved. --- src/t_hash.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index b09ff41ea..b39284505 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -109,11 +109,9 @@ robj *hashTypeGetObject(robj *o, robj *field) { incrRefCount(aux); value = aux; } - } else { redisPanic("Unknown hash encoding"); } - return value; } @@ -125,21 +123,14 @@ int hashTypeExists(robj *o, robj *field) { unsigned int vlen = UINT_MAX; long long vll = LLONG_MAX; - if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) { - return 1; - } - + if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) return 1; } else if (o->encoding == REDIS_ENCODING_HT) { robj *aux; - if (hashTypeGetFromHashTable(o, field, &aux) == 0) { - return 1; - } - + if (hashTypeGetFromHashTable(o, field, &aux) == 0) return 1; } else { redisPanic("Unknown hash encoding"); } - return 0; } @@ -303,10 +294,7 @@ int hashTypeNext(hashTypeIterator *hi) { redisAssert(vptr != NULL); fptr = ziplistNext(zl, vptr); } - - if (fptr == NULL) { - return REDIS_ERR; - } + if (fptr == NULL) return REDIS_ERR; /* Grab pointer to the value (fptr points to the field) */ vptr = ziplistNext(zl, fptr); @@ -315,16 +303,11 @@ int hashTypeNext(hashTypeIterator *hi) { /* fptr, vptr now point to the first or next pair */ hi->fptr = fptr; hi->vptr = vptr; - } else if (hi->encoding == REDIS_ENCODING_HT) { - if ((hi->de = dictNext(hi->di)) == NULL) { - return REDIS_ERR; - } - + if ((hi->de = dictNext(hi->di)) == NULL) return REDIS_ERR; } else { redisPanic("Unknown hash encoding"); } - return REDIS_OK; } From 887bd01a804dc98446a24560dbf19dc42e320c13 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 12:14:17 +0100 Subject: [PATCH 30/70] hash-max-zipmap-... renamed hash-max-ziplist-... in defalt conf for tests. --- tests/assets/default.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/assets/default.conf b/tests/assets/default.conf index 75334426b..976852e91 100644 --- a/tests/assets/default.conf +++ b/tests/assets/default.conf @@ -297,8 +297,8 @@ no-appendfsync-on-rewrite no # have at max a given numer of elements, and the biggest element does not # exceed a given threshold. You can configure this limits with the following # configuration directives. -hash-max-zipmap-entries 64 -hash-max-zipmap-value 512 +hash-max-ziplist-entries 64 +hash-max-ziplist-value 512 # Similarly to hashes, small lists are also encoded in a special way in order # to save a lot of space. The special representation is only used when From f32d32de596984348a05576f23a3fe87bb87000d Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 12:35:15 +0100 Subject: [PATCH 31/70] RDB version is no 4, because small hashes are now encoded as ziplists, so older versions of Redis will not understand this format. --- src/rdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 113856d43..518fef02c 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -616,7 +616,7 @@ int rdbSave(char *filename) { } rioInitWithFile(&rdb,fp); - if (rdbWriteRaw(&rdb,"REDIS0003",9) == -1) goto werr; + if (rdbWriteRaw(&rdb,"REDIS0004",9) == -1) goto werr; for (j = 0; j < server.dbnum; j++) { redisDb *db = server.db+j; @@ -1023,7 +1023,7 @@ int rdbLoad(char *filename) { return REDIS_ERR; } rdbver = atoi(buf+5); - if (rdbver < 1 || rdbver > 3) { + if (rdbver < 1 || rdbver > 4) { fclose(fp); redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver); errno = EINVAL; From b4d396beaad2c827c531ca965ec2c577d34bfa41 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 12:38:42 +0100 Subject: [PATCH 32/70] RDB4 support in redis-check-dump. --- src/redis-check-dump.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/redis-check-dump.c b/src/redis-check-dump.c index 77be686c2..42fe51810 100644 --- a/src/redis-check-dump.c +++ b/src/redis-check-dump.c @@ -20,6 +20,7 @@ #define REDIS_LIST_ZIPLIST 10 #define REDIS_SET_INTSET 11 #define REDIS_ZSET_ZIPLIST 12 +#define REDIS_HASH_ZIPLIST 13 /* Objects encoding. Some kind of objects like Strings and Hashes can be * internally represented in multiple ways. The 'encoding' field of the object @@ -136,7 +137,7 @@ int processHeader() { } dump_version = (int)strtol(buf + 5, NULL, 10); - if (dump_version < 1 || dump_version > 2) { + if (dump_version < 1 || dump_version > 4) { ERROR("Unknown RDB format version: %d\n", dump_version); } return 1; @@ -384,6 +385,7 @@ int loadPair(entry *e) { case REDIS_LIST_ZIPLIST: case REDIS_SET_INTSET: case REDIS_ZSET_ZIPLIST: + case REDIS_HASH_ZIPLIST: if (!processStringObject(NULL)) { SHIFT_ERROR(offset, "Error reading entry value"); return 0; From b1839bd367fa8d8cb7ebc80d920a480fd67b67e6 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 12:40:03 +0100 Subject: [PATCH 33/70] Build dependencies updated. --- src/Makefile | 61 +++++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/src/Makefile b/src/Makefile index cca785cbc..f86ea859c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -98,35 +98,39 @@ ae_kqueue.o: ae_kqueue.c ae_select.o: ae_select.c anet.o: anet.c fmacros.h anet.h aof.o: aof.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h bio.h bio.o: bio.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h bio.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h bio.h cluster.o: cluster.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h config.o: config.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h crc16.o: crc16.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h db.o: db.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h debug.o: debug.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h sha1.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h sha1.h dict.o: dict.c fmacros.h dict.h zmalloc.h endianconv.o: endianconv.c intset.o: intset.c intset.h zmalloc.h endianconv.h lzf_c.o: lzf_c.c lzfP.h lzf_d.o: lzf_d.c lzfP.h multi.o: multi.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h networking.o: networking.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h object.o: object.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h pqsort.o: pqsort.c pubsub.o: pubsub.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h +rand.o: rand.c rdb.o: rdb.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h lzf.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h lzf.h \ + zipmap.h redis-benchmark.o: redis-benchmark.c fmacros.h ae.h \ ../deps/hiredis/hiredis.h sds.h adlist.h zmalloc.h redis-check-aof.o: redis-check-aof.c fmacros.h config.h @@ -134,34 +138,37 @@ redis-check-dump.o: redis-check-dump.c lzf.h redis-cli.o: redis-cli.c fmacros.h version.h ../deps/hiredis/hiredis.h \ sds.h zmalloc.h ../deps/linenoise/linenoise.h help.h redis.o: redis.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h slowlog.h \ - bio.h asciilogo.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h \ + slowlog.h bio.h asciilogo.h release.o: release.c release.h replication.o: replication.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h +rio.o: rio.c fmacros.h rio.h sds.h util.h scripting.o: scripting.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h \ - sha1.h rand.h -rio.o: rio.c sds.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h sha1.h rand.h sds.o: sds.c sds.h zmalloc.h sha1.o: sha1.c sha1.h config.h slowlog.o: slowlog.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h \ - slowlog.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h slowlog.h sort.o: sort.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h pqsort.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h \ + pqsort.h syncio.o: syncio.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h t_hash.o: t_hash.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h t_list.o: t_list.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h t_set.o: t_set.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h t_string.o: t_string.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h t_zset.o: t_zset.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h util.o: util.c fmacros.h util.h ziplist.o: ziplist.c zmalloc.h util.h ziplist.h endianconv.h zipmap.o: zipmap.c zmalloc.h endianconv.h From 5e049d41ed5e1440c9c1a45801a4b4b05818ec44 Mon Sep 17 00:00:00 2001 From: quiver Date: Sat, 10 Mar 2012 21:06:08 +0900 Subject: [PATCH 34/70] fix typo of redis.conf --- redis.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redis.conf b/redis.conf index 80e14ad95..e03359963 100644 --- a/redis.conf +++ b/redis.conf @@ -246,7 +246,7 @@ slave-serve-stale-data yes # volatile-lru -> remove the key with an expire set using an LRU algorithm # allkeys-lru -> remove any key accordingly to the LRU algorithm # volatile-random -> remove a random key with an expire set -# allkeys->random -> remove a random key, any key +# allkeys-random -> remove a random key, any key # volatile-ttl -> remove the key with the nearest expire time (minor TTL) # noeviction -> don't expire at all, just return an error on write operations # From 3d03cba03b4aa00db617b9c20d3c03546665abca Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 09:49:11 +0100 Subject: [PATCH 35/70] RDB hashes loading fixed removing the assertion that failed every time an HT-encoded hash was loaded. --- src/rdb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 518fef02c..4a56659de 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -844,9 +844,10 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { hashTypeConvert(o, REDIS_ENCODING_HT); /* Load every field and value into the ziplist */ - while (o->encoding == REDIS_ENCODING_ZIPLIST && len-- > 0) { + while (o->encoding == REDIS_ENCODING_ZIPLIST && len > 0) { robj *field, *value; + len--; /* Load raw strings */ field = rdbLoadStringObject(rdb); if (field == NULL) return NULL; @@ -869,9 +870,10 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { } /* Load remaining fields and values into the hash table */ - while (o->encoding == REDIS_ENCODING_HT && len-- > 0) { + while (o->encoding == REDIS_ENCODING_HT && len > 0) { robj *field, *value; + len--; /* Load encoded strings */ field = rdbLoadEncodedStringObject(rdb); if (field == NULL) return NULL; From 877f76610d0b19f3b366b67752ec4f1f97429593 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 10:59:29 +0100 Subject: [PATCH 36/70] RDB hashes loading, fixed another bug in the loading of HT-encoded hashes: when the hash entry is too big for ziplist, add the field, then convert. The code used to break before the new entry was inserted, resulting into missing fields in the loaded Hash object. --- src/rdb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 4a56659de..519b645d1 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -856,6 +856,10 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { if (value == NULL) return NULL; redisAssert(field->encoding == REDIS_ENCODING_RAW); + /* Add pair to ziplist */ + o->ptr = ziplistPush(o->ptr, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); + o->ptr = ziplistPush(o->ptr, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); + /* Convert to hash table if size threshold is exceeded */ if (sdslen(field->ptr) > server.hash_max_ziplist_value || sdslen(value->ptr) > server.hash_max_ziplist_value) @@ -863,10 +867,6 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { hashTypeConvert(o, REDIS_ENCODING_HT); break; } - - /* Add pair to ziplist */ - o->ptr = ziplistPush(o->ptr, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); - o->ptr = ziplistPush(o->ptr, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); } /* Load remaining fields and values into the hash table */ From bad7813b70963995b9e7028fc6730293e489fede Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 12:59:27 +0100 Subject: [PATCH 37/70] c->bufpos initialization moved for aesthetics. --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 40aad8360..14c5dcce6 100644 --- a/src/networking.c +++ b/src/networking.c @@ -22,7 +22,6 @@ int listMatchObjects(void *a, void *b) { redisClient *createClient(int fd) { redisClient *c = zmalloc(sizeof(redisClient)); - c->bufpos = 0; /* passing -1 as fd it is possible to create a non connected client. * This is useful since all the Redis commands needs to be executed @@ -42,6 +41,7 @@ redisClient *createClient(int fd) { selectDb(c,0); c->fd = fd; + c->bufpos = 0; c->querybuf = sdsempty(); c->reqtype = 0; c->argc = 0; From f171b199ca6db679da263b0f79f575ddaea392b4 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 13:05:08 +0100 Subject: [PATCH 38/70] Client creation time in redisClient structure. New age field in CLIENT LIST output. --- src/networking.c | 5 +++-- src/redis.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 14c5dcce6..b0f450544 100644 --- a/src/networking.c +++ b/src/networking.c @@ -51,7 +51,7 @@ redisClient *createClient(int fd) { c->bulklen = -1; c->sentlen = 0; c->flags = 0; - c->lastinteraction = time(NULL); + c->ctime = c->lastinteraction = time(NULL); c->authenticated = 0; c->replstate = REDIS_REPL_NONE; c->reply = listCreate(); @@ -1111,8 +1111,9 @@ sds getClientInfoString(redisClient *client) { if (emask & AE_WRITABLE) *p++ = 'w'; *p = '\0'; return sdscatprintf(sdsempty(), - "addr=%s:%d fd=%d idle=%ld flags=%s db=%d sub=%d psub=%d qbuf=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s", + "addr=%s:%d fd=%d age=%ld idle=%ld flags=%s db=%d sub=%d psub=%d qbuf=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s", ip,port,client->fd, + (long)(now - client->ctime), (long)(now - client->lastinteraction), flags, client->db->id, diff --git a/src/redis.h b/src/redis.h index 6ead029d8..1d1f572b3 100644 --- a/src/redis.h +++ b/src/redis.h @@ -332,6 +332,7 @@ typedef struct redisClient { list *reply; unsigned long reply_bytes; /* Tot bytes of objects in reply list */ int sentlen; + time_t ctime; /* Client creation time */ time_t lastinteraction; /* time of the last interaction, used for timeout */ time_t obuf_soft_limit_reached_time; int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ From ca9f6a5457ad8cafd52b18cf77965a227b62be8a Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 13:26:33 +0100 Subject: [PATCH 39/70] Added a qbuf-free field to CLIENT LIST output. --- src/networking.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index b0f450544..06097b586 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1111,7 +1111,7 @@ sds getClientInfoString(redisClient *client) { if (emask & AE_WRITABLE) *p++ = 'w'; *p = '\0'; return sdscatprintf(sdsempty(), - "addr=%s:%d fd=%d age=%ld idle=%ld flags=%s db=%d sub=%d psub=%d qbuf=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s", + "addr=%s:%d fd=%d age=%ld idle=%ld flags=%s db=%d sub=%d psub=%d qbuf=%lu qbuf-free=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s", ip,port,client->fd, (long)(now - client->ctime), (long)(now - client->lastinteraction), @@ -1120,6 +1120,7 @@ sds getClientInfoString(redisClient *client) { (int) dictSize(client->pubsub_channels), (int) listLength(client->pubsub_patterns), (unsigned long) sdslen(client->querybuf), + (unsigned long) sdsavail(client->querybuf), (unsigned long) client->bufpos, (unsigned long) listLength(client->reply), getClientOutputBufferMemoryUsage(client), From 5bab1baf2146b43d89bb5b1749a622e298e121a2 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 18:05:11 +0100 Subject: [PATCH 40/70] Process async client checks like client timeouts and BLPOP timeouts incrementally using a circular list. --- src/adlist.c | 16 ++++++++++++++++ src/adlist.h | 1 + src/networking.c | 28 --------------------------- src/redis.c | 49 +++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 63 insertions(+), 31 deletions(-) diff --git a/src/adlist.c b/src/adlist.c index 51ba03bd5..e48957e3a 100644 --- a/src/adlist.c +++ b/src/adlist.c @@ -323,3 +323,19 @@ listNode *listIndex(list *list, long index) { } return n; } + +/* Rotate the list removing the tail node and inserting it to the head. */ +void listRotate(list *list) { + listNode *tail = list->tail; + + if (listLength(list) <= 1) return; + + /* Detatch current tail */ + list->tail = tail->prev; + list->tail->next = NULL; + /* Move it as head */ + list->head->prev = tail; + tail->prev = NULL; + tail->next = list->head; + list->head = tail; +} diff --git a/src/adlist.h b/src/adlist.h index 36dba1ff3..259bd0f83 100644 --- a/src/adlist.h +++ b/src/adlist.h @@ -84,6 +84,7 @@ listNode *listSearchKey(list *list, void *key); listNode *listIndex(list *list, long index); void listRewind(list *list, listIter *li); void listRewindTail(list *list, listIter *li); +void listRotate(list *list); /* Directions for iterators */ #define AL_START_HEAD 0 diff --git a/src/networking.c b/src/networking.c index 06097b586..ae77d11b2 100644 --- a/src/networking.c +++ b/src/networking.c @@ -751,34 +751,6 @@ void resetClient(redisClient *c) { if (!(c->flags & REDIS_MULTI)) c->flags &= (~REDIS_ASKING); } -void closeTimedoutClients(void) { - redisClient *c; - listNode *ln; - time_t now = time(NULL); - listIter li; - - listRewind(server.clients,&li); - while ((ln = listNext(&li)) != NULL) { - c = listNodeValue(ln); - if (server.maxidletime && - !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */ - !(c->flags & REDIS_MASTER) && /* no timeout for masters */ - !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */ - dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */ - listLength(c->pubsub_patterns) == 0 && - (now - c->lastinteraction > server.maxidletime)) - { - redisLog(REDIS_VERBOSE,"Closing idle client"); - freeClient(c); - } else if (c->flags & REDIS_BLOCKED) { - if (c->bpop.timeout != 0 && c->bpop.timeout < now) { - addReply(c,shared.nullmultibulk); - unblockClientWaitingData(c); - } - } - } -} - int processInlineBuffer(redisClient *c) { char *newline = strstr(c->querybuf,"\r\n"); int argc, j; diff --git a/src/redis.c b/src/redis.c index 3294eea43..5741bc90f 100644 --- a/src/redis.c +++ b/src/redis.c @@ -641,6 +641,50 @@ long long getOperationsPerSecond(void) { return sum / REDIS_OPS_SEC_SAMPLES; } +void closeTimedoutClient(redisClient *c) { + time_t now = time(NULL); + + if (server.maxidletime && + !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */ + !(c->flags & REDIS_MASTER) && /* no timeout for masters */ + !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */ + dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */ + listLength(c->pubsub_patterns) == 0 && + (now - c->lastinteraction > server.maxidletime)) + { + redisLog(REDIS_VERBOSE,"Closing idle client"); + freeClient(c); + } else if (c->flags & REDIS_BLOCKED) { + if (c->bpop.timeout != 0 && c->bpop.timeout < now) { + addReply(c,shared.nullmultibulk); + unblockClientWaitingData(c); + } + } +} + +void clientsCron(void) { + /* Make sure to process at least 1/100 of clients per call. + * Since this function is called 10 times per second we are sure that + * in the worst case we process all the clients in 10 seconds. + * In normal conditions (a reasonable number of clients) we process + * all the clients in a shorter time. */ + int iterations = listLength(server.clients)/100; + if (iterations < 50) iterations = 50; + + while(listLength(server.clients) && iterations--) { + redisClient *c; + listNode *head; + + /* Rotate the list, take the current head, process. + * This way if the client must be removed from the list it's the + * first element and we don't incur into O(N) computation. */ + listRotate(server.clients); + head = listFirst(server.clients); + c = listNodeValue(head); + closeTimedoutClient(c); + } +} + int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { int j, loops = server.cronloops; REDIS_NOTUSED(eventLoop); @@ -712,9 +756,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { zmalloc_used_memory()); } - /* Close connections of timedout clients */ - if ((server.maxidletime && !(loops % 100)) || server.bpop_blocked_clients) - closeTimedoutClients(); + /* We need to do a few operations on clients asynchronously. */ + clientsCron(); /* Start a scheduled AOF rewrite if this was requested by the user while * a BGSAVE was in progress. */ From 9704f5a00570076b6955b8560c3b14850ca054b2 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 18:06:29 +0100 Subject: [PATCH 41/70] CLIENT LIST test modified to reflect the new output. --- tests/unit/introspection.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl index 3daa65e07..a768e2ab9 100644 --- a/tests/unit/introspection.tcl +++ b/tests/unit/introspection.tcl @@ -1,5 +1,5 @@ start_server {tags {"introspection"}} { test {CLIENT LIST} { r client list - } {*addr=*:* fd=* idle=* flags=N db=9 sub=0 psub=0 qbuf=0 obl=0 oll=0 omem=0 events=r cmd=client*} + } {*addr=*:* fd=* age=* idle=* flags=N db=9 sub=0 psub=0 qbuf=0 qbuf-free=* obl=0 oll=0 omem=0 events=r cmd=client*} } From 94df4c639f608f8d04d04641c1f17fc87dbb70b0 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Mar 2012 09:56:22 +0100 Subject: [PATCH 42/70] Call all the helper functions needed by clientsCron() as clientsCronSomething() for clarity. --- src/redis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis.c b/src/redis.c index 5741bc90f..f46f2376d 100644 --- a/src/redis.c +++ b/src/redis.c @@ -641,7 +641,7 @@ long long getOperationsPerSecond(void) { return sum / REDIS_OPS_SEC_SAMPLES; } -void closeTimedoutClient(redisClient *c) { +void clientsCronHandleTimeout(redisClient *c) { time_t now = time(NULL); if (server.maxidletime && @@ -681,7 +681,7 @@ void clientsCron(void) { listRotate(server.clients); head = listFirst(server.clients); c = listNodeValue(head); - closeTimedoutClient(c); + clientsCronHandleTimeout(c); } } From a9004ec1fa20f2fff0a0afef9a7525fbd0bf4d10 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Mar 2012 10:13:23 +0100 Subject: [PATCH 43/70] sds.c new function sdsRemoveFreeSpace(). The new function is used in order to resize the string allocation so that only the minimal allocation possible is used, removing all the free space at the end of the string normally used to improve efficiency of concatenation operations. --- src/sds.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/sds.c b/src/sds.c index 092a431e5..82d12e232 100644 --- a/src/sds.c +++ b/src/sds.c @@ -111,6 +111,18 @@ sds sdsMakeRoomFor(sds s, size_t addlen) { return newsh->buf; } +/* Reallocate the sds string so that it has no free space at the end. The + * contained string remains not altered, but next concatenation operations + * will require a reallocation. */ +sds sdsRemoveFreeSpace(sds s) { + struct sdshdr *sh; + + sh = (void*) (s-(sizeof(struct sdshdr))); + sh = zrealloc(sh, sizeof(struct sdshdr)+sh->len+1); + sh->free = 0; + return sh->buf; +} + /* Increment the sds length and decrements the left free space at the * end of the string accordingly to 'incr'. Also set the null term * in the new end of the string. From c0c5a7a6cc6cea7f53b927669b306d2391885ca6 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Mar 2012 14:58:26 +0100 Subject: [PATCH 44/70] sds.c: sdsAllocSize() function added. --- src/sds.c | 6 ++++++ src/sds.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/sds.c b/src/sds.c index 82d12e232..bc6aa6b2f 100644 --- a/src/sds.c +++ b/src/sds.c @@ -123,6 +123,12 @@ sds sdsRemoveFreeSpace(sds s) { return sh->buf; } +size_t sdsAllocSize(sds s) { + struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr))); + + return sizeof(*sh)+sh->len+sh->free+1; +} + /* Increment the sds length and decrements the left free space at the * end of the string accordingly to 'incr'. Also set the null term * in the new end of the string. diff --git a/src/sds.h b/src/sds.h index b00551b41..0648381bb 100644 --- a/src/sds.h +++ b/src/sds.h @@ -94,5 +94,7 @@ sds sdsmapchars(sds s, char *from, char *to, size_t setlen); /* Low level functions exposed to the user API */ sds sdsMakeRoomFor(sds s, size_t addlen); void sdsIncrLen(sds s, int incr); +sds sdsRemoveFreeSpace(sds s); +size_t sdsAllocSize(sds s); #endif From 696c28fcf9d68153697a50062b042b8b5084bef3 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Mar 2012 15:32:30 +0100 Subject: [PATCH 45/70] Reclaim space from the client querybuf if needed. --- src/aof.c | 1 + src/networking.c | 2 ++ src/redis.c | 32 +++++++++++++++++++++++++++++--- src/redis.h | 1 + 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/aof.c b/src/aof.c index 64cd76d3d..83633217f 100644 --- a/src/aof.c +++ b/src/aof.c @@ -287,6 +287,7 @@ struct redisClient *createFakeClient(void) { selectDb(c,0); c->fd = -1; c->querybuf = sdsempty(); + c->querybuf_peak = 0; c->argc = 0; c->argv = NULL; c->bufpos = 0; diff --git a/src/networking.c b/src/networking.c index ae77d11b2..375186d1e 100644 --- a/src/networking.c +++ b/src/networking.c @@ -43,6 +43,7 @@ redisClient *createClient(int fd) { c->fd = fd; c->bufpos = 0; c->querybuf = sdsempty(); + c->querybuf_peak = 0; c->reqtype = 0; c->argc = 0; c->argv = NULL; @@ -998,6 +999,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { } qblen = sdslen(c->querybuf); + if (c->querybuf_peak < qblen) c->querybuf_peak = qblen; c->querybuf = sdsMakeRoomFor(c->querybuf, readlen); nread = read(fd, c->querybuf+qblen, readlen); if (nread == -1) { diff --git a/src/redis.c b/src/redis.c index f46f2376d..5388f5b72 100644 --- a/src/redis.c +++ b/src/redis.c @@ -642,7 +642,7 @@ long long getOperationsPerSecond(void) { } void clientsCronHandleTimeout(redisClient *c) { - time_t now = time(NULL); + time_t now = server.unixtime; if (server.maxidletime && !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */ @@ -662,15 +662,40 @@ void clientsCronHandleTimeout(redisClient *c) { } } +/* The client query buffer is an sds.c string that can end with a lot of + * free space not used, this function reclaims space if needed. */ +void clientsCronResizeQueryBuffer(redisClient *c) { + size_t querybuf_size = sdsAllocSize(c->querybuf); + time_t idletime = server.unixtime - c->lastinteraction; + + /* There are two conditions to resize the query buffer: + * 1) Query buffer is > BIG_ARG and too big for latest peak. + * 2) Client is inactive and the buffer is bigger than 1k. */ + if (((querybuf_size > REDIS_MBULK_BIG_ARG) && + (querybuf_size/(c->querybuf_peak+1)) > 2) || + (querybuf_size > 1024 && idletime > 2)) + { + /* Only resize the query buffer if it is actually wasting space. */ + if (sdsavail(c->querybuf) > 1024) { + c->querybuf = sdsRemoveFreeSpace(c->querybuf); + } + } + /* Reset the peak again to capture the peak memory usage in the next + * cycle. */ + c->querybuf_peak = 0; +} + void clientsCron(void) { /* Make sure to process at least 1/100 of clients per call. * Since this function is called 10 times per second we are sure that * in the worst case we process all the clients in 10 seconds. * In normal conditions (a reasonable number of clients) we process * all the clients in a shorter time. */ - int iterations = listLength(server.clients)/100; - if (iterations < 50) iterations = 50; + int numclients = listLength(server.clients); + int iterations = numclients/100; + if (iterations < 50) + iterations = (numclients < 50) ? numclients : 50; while(listLength(server.clients) && iterations--) { redisClient *c; listNode *head; @@ -682,6 +707,7 @@ void clientsCron(void) { head = listFirst(server.clients); c = listNodeValue(head); clientsCronHandleTimeout(c); + clientsCronResizeQueryBuffer(c); } } diff --git a/src/redis.h b/src/redis.h index 1d1f572b3..49b695237 100644 --- a/src/redis.h +++ b/src/redis.h @@ -323,6 +323,7 @@ typedef struct redisClient { redisDb *db; int dictid; sds querybuf; + size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size */ int argc; robj **argv; struct redisCommand *cmd, *lastcmd; From 74d1cad274558aa0e65f67a263c69ea88018bcef Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 15 Mar 2012 20:51:35 +0100 Subject: [PATCH 46/70] Fix for issue #391. Use a simple protocol between clientsCron() and helper functions to understand if the client is still valind and clientsCron() should continue processing or if the client was freed and we should continue with the next one. --- src/redis.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/redis.c b/src/redis.c index 5388f5b72..07a38b215 100644 --- a/src/redis.c +++ b/src/redis.c @@ -641,7 +641,8 @@ long long getOperationsPerSecond(void) { return sum / REDIS_OPS_SEC_SAMPLES; } -void clientsCronHandleTimeout(redisClient *c) { +/* Check for timeouts. Returns non-zero if the client was terminated */ +int clientsCronHandleTimeout(redisClient *c) { time_t now = server.unixtime; if (server.maxidletime && @@ -654,17 +655,21 @@ void clientsCronHandleTimeout(redisClient *c) { { redisLog(REDIS_VERBOSE,"Closing idle client"); freeClient(c); + return 1; } else if (c->flags & REDIS_BLOCKED) { if (c->bpop.timeout != 0 && c->bpop.timeout < now) { addReply(c,shared.nullmultibulk); unblockClientWaitingData(c); } } + return 0; } /* The client query buffer is an sds.c string that can end with a lot of - * free space not used, this function reclaims space if needed. */ -void clientsCronResizeQueryBuffer(redisClient *c) { + * free space not used, this function reclaims space if needed. + * + * The funciton always returns 0 as it never terminates the client. */ +int clientsCronResizeQueryBuffer(redisClient *c) { size_t querybuf_size = sdsAllocSize(c->querybuf); time_t idletime = server.unixtime - c->lastinteraction; @@ -683,6 +688,7 @@ void clientsCronResizeQueryBuffer(redisClient *c) { /* Reset the peak again to capture the peak memory usage in the next * cycle. */ c->querybuf_peak = 0; + return 0; } void clientsCron(void) { @@ -706,8 +712,11 @@ void clientsCron(void) { listRotate(server.clients); head = listFirst(server.clients); c = listNodeValue(head); - clientsCronHandleTimeout(c); - clientsCronResizeQueryBuffer(c); + /* The following functions do different service checks on the client. + * The protocol is that they return non-zero if the client was + * terminated. */ + if (clientsCronHandleTimeout(c)) continue; + if (clientsCronResizeQueryBuffer(c)) continue; } } From 26330d4a064b706a9009413d01e397f86c98c262 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Mar 2012 17:17:39 +0100 Subject: [PATCH 47/70] First implementation of --test-memory. Still a work in progress. --- src/Makefile | 2 +- src/redis.c | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Makefile b/src/Makefile index f86ea859c..43922c2de 100644 --- a/src/Makefile +++ b/src/Makefile @@ -73,7 +73,7 @@ QUIET_CC = @printf ' %b %b\n' $(CCCOLOR)CC$(ENDCOLOR) $(SRCCOLOR)$@$(ENDCOLOR QUIET_LINK = @printf ' %b %b\n' $(LINKCOLOR)LINK$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR); endif -OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o +OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o BENCHOBJ = ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o CLIOBJ = anet.o sds.o adlist.o redis-cli.o zmalloc.o release.o CHECKDUMPOBJ = redis-check-dump.o lzf_c.o lzf_d.o diff --git a/src/redis.c b/src/redis.c index 07a38b215..60d45b5e6 100644 --- a/src/redis.c +++ b/src/redis.c @@ -2231,7 +2231,8 @@ void usage() { fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options]\n"); fprintf(stderr," ./redis-server - (read config from stdin)\n"); fprintf(stderr," ./redis-server -v or --version\n"); - fprintf(stderr," ./redis-server -h or --help\n\n"); + fprintf(stderr," ./redis-server -h or --help\n"); + fprintf(stderr," ./redis-server --test-memory \n\n"); fprintf(stderr,"Examples:\n"); fprintf(stderr," ./redis-server (run the server with default conf)\n"); fprintf(stderr," ./redis-server /etc/redis/6379.conf\n"); @@ -2287,6 +2288,8 @@ void setupSignalHandlers(void) { return; } +void memtest(size_t megabytes, int passes); + int main(int argc, char **argv) { long long start; struct timeval tv; @@ -2308,6 +2311,17 @@ int main(int argc, char **argv) { strcmp(argv[1], "--version") == 0) version(); if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) usage(); + if (strcmp(argv[1], "--test-memory") == 0) { + if (argc == 3) { + memtest(atoi(argv[2]),10000); + exit(0); + } else { + fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n"); + fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n"); + exit(1); + } + } + /* First argument is the config file name? */ if (argv[j][0] != '-' || argv[j][1] != '-') configfile = argv[j++]; From 76d3f5b0682ea8ce5f521454fb8d4c2a53a269a4 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Mar 2012 17:21:49 +0100 Subject: [PATCH 48/70] Hem... actual memtest.c file added. --- src/memtest.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 src/memtest.c diff --git a/src/memtest.c b/src/memtest.c new file mode 100644 index 000000000..a0e76f192 --- /dev/null +++ b/src/memtest.c @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include + +#if (ULONG_MAX == 4294967295UL) +#define MEMTEST_32BIT +#elif (ULONG_MAX == 18446744073709551615ULL) +#define MEMTEST_64BIT +#else +#error "ULONG_MAX value not supported." +#endif + +/* Fill words stepping a single page at every write, so we continue to + * touch all the pages in the smallest amount of time reducing the + * effectiveness of caches, and making it hard for the OS to transfer + * pages on the swap. */ +void memtest_fill(unsigned long *l, size_t bytes) { + unsigned long step = 4096/sizeof(unsigned long); + unsigned long words = bytes/sizeof(unsigned long)/2; + unsigned long iwords = words/step; /* words per iteration */ + unsigned long off, w, *l1, *l2; + + assert((bytes & 4095) == 0); + for (off = 0; off < step; off++) { + l1 = l+off; + l2 = l1+words; + for (w = 0; w < iwords; w++) { +#ifdef MEMTEST_32BIT + *l1 = *l2 = ((unsigned long) (rand()&0xffff)) | + (((unsigned long) (rand()&0xffff)) << 16); +#else + *l1 = *l2 = ((unsigned long) (rand()&0xffff)) | + (((unsigned long) (rand()&0xffff)) << 16) | + (((unsigned long) (rand()&0xffff)) << 32) | + (((unsigned long) (rand()&0xffff)) << 48); +#endif + l1 += step; + l2 += step; + } + } +} + +void memtest_compare(unsigned long *l, size_t bytes) { + unsigned long words = bytes/sizeof(unsigned long)/2; + unsigned long w, *l1, *l2; + + assert((bytes & 4095) == 0); + l1 = l; + l2 = l1+words; + for (w = 0; w < words; w++) { + if (*l1 != *l2) { + printf("\n*** MEMORY ERROR DETECTED: %p != %p (%lu vs %lu)\n", + (void*)l1, (void*)l2, *l1, *l2); + exit(1); + } + l1 ++; + l2 ++; + } +} + +void memtest_test(size_t megabytes, int passes) { + size_t bytes = megabytes*1024*1024; + unsigned long *m = malloc(bytes); + int pass = 0; + + if (m == NULL) { + fprintf(stderr,"Unable to allocate %zu megabytes: %s", + megabytes, strerror(errno)); + exit(1); + } + while (pass != passes) { + pass++; + printf("PASS %d... ", pass); + fflush(stdout); + memtest_fill(m,bytes); + memtest_compare(m,bytes); + printf("ok\n"); + } +} + +void memtest(size_t megabytes, int passes) { + memtest_test(megabytes,passes); + printf("\nYour memory passed this test.\n"); + printf("Please if you are stil in doubt use the following two tools:\n"); + printf("1) memtest86: http://www.memtest86.com/\n"); + printf("2) memtester: http://pyropus.ca/software/memtester/\n"); + exit(0); +} From fd5faf15fcceeced3086081a58f41871028f3bff Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Mar 2012 21:19:53 +0100 Subject: [PATCH 49/70] Memory test function now less boring thanks to screen-wide progress bar. --- src/memtest.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/src/memtest.c b/src/memtest.c index a0e76f192..56d566abd 100644 --- a/src/memtest.c +++ b/src/memtest.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #if (ULONG_MAX == 4294967295UL) #define MEMTEST_32BIT @@ -13,6 +15,37 @@ #error "ULONG_MAX value not supported." #endif +static struct winsize ws; +size_t progress_printed; /* Printed chars in screen-wide progress bar. */ +size_t progress_full; /* How many chars to write to fill the progress bar. */ + +void memtest_progress_start(char *title, int pass) { + int j; + + printf("\x1b[H\x1b[2J"); /* Cursor home, clear screen. */ + /* Fill with dots. */ + for (j = 0; j < ws.ws_col*ws.ws_row; j++) printf("."); + printf("\x1b[H\x1b[2K"); /* Cursor home, clear current line. */ + printf("%s [%d]\n", title, pass); /* Print title. */ + progress_printed = 0; + progress_full = ws.ws_col*(ws.ws_row-1); + fflush(stdout); +} + +void memtest_progress_end(void) { + printf("\x1b[H\x1b[2J"); /* Cursor home, clear screen. */ +} + +void memtest_progress_step(size_t curr, size_t size, char c) { + size_t chars = (curr*progress_full)/size, j; + + for (j = 0; j < chars-progress_printed; j++) { + printf("%c",c); + progress_printed++; + } + fflush(stdout); +} + /* Fill words stepping a single page at every write, so we continue to * touch all the pages in the smallest amount of time reducing the * effectiveness of caches, and making it hard for the OS to transfer @@ -39,6 +72,8 @@ void memtest_fill(unsigned long *l, size_t bytes) { #endif l1 += step; l2 += step; + if ((w & 0xffff) == 0) + memtest_progress_step(w+iwords*off,words,'+'); } } } @@ -58,13 +93,14 @@ void memtest_compare(unsigned long *l, size_t bytes) { } l1 ++; l2 ++; + if ((w & 0xffff) == 0) memtest_progress_step(w,words,'='); } } void memtest_test(size_t megabytes, int passes) { size_t bytes = megabytes*1024*1024; unsigned long *m = malloc(bytes); - int pass = 0; + int pass = 0, j; if (m == NULL) { fprintf(stderr,"Unable to allocate %zu megabytes: %s", @@ -73,15 +109,22 @@ void memtest_test(size_t megabytes, int passes) { } while (pass != passes) { pass++; - printf("PASS %d... ", pass); - fflush(stdout); + memtest_progress_start("Random fill",pass); memtest_fill(m,bytes); - memtest_compare(m,bytes); - printf("ok\n"); + memtest_progress_end(); + for (j = 0; j < 4; j++) { + memtest_progress_start("Compare",pass); + memtest_compare(m,bytes); + memtest_progress_end(); + } } } void memtest(size_t megabytes, int passes) { + if (ioctl(1, TIOCGWINSZ, &ws) == -1) { + ws.ws_col = 80; + ws.ws_row = 20; + } memtest_test(megabytes,passes); printf("\nYour memory passed this test.\n"); printf("Please if you are stil in doubt use the following two tools:\n"); From 28d36fd045c104d46b5b12d69534fe72b7bab720 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 18 Mar 2012 11:35:35 +0100 Subject: [PATCH 50/70] On crash suggest to give --test-memory a try. --- src/debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/debug.c b/src/debug.c index a355df05f..8eea3bf8d 100644 --- a/src/debug.c +++ b/src/debug.c @@ -636,8 +636,9 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) { redisLog(REDIS_WARNING, "\n=== REDIS BUG REPORT END. Make sure to include from START to END. ===\n\n" -" Please report the crash opening an issue on github:\n\n" -" http://github.com/antirez/redis/issues\n\n" +" Please report the crash opening an issue on github:\n\n" +" http://github.com/antirez/redis/issues\n\n" +" Suspect RAM error? Use redis-server --test-memory to veryfy it.\n\n" ); /* free(messages); Don't call free() with possibly corrupted memory. */ if (server.daemonize) unlink(server.pidfile); From eec33378563dd8bf9140f4e2439739f9b37b69b2 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 18 Mar 2012 17:24:48 +0100 Subject: [PATCH 51/70] Number of iteration of --test-memory is now 300 (several minutes per gigabyte). Memtest86 and Memtester links are also displayed while running the test. --- src/memtest.c | 6 ++++-- src/redis.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/memtest.c b/src/memtest.c index 56d566abd..18e2e6e82 100644 --- a/src/memtest.c +++ b/src/memtest.c @@ -24,11 +24,13 @@ void memtest_progress_start(char *title, int pass) { printf("\x1b[H\x1b[2J"); /* Cursor home, clear screen. */ /* Fill with dots. */ - for (j = 0; j < ws.ws_col*ws.ws_row; j++) printf("."); + for (j = 0; j < ws.ws_col*(ws.ws_row-2); j++) printf("."); + printf("Please keep the test running several minutes per GB of memory.\n"); + printf("Also check http://www.memtest86.com/ and http://pyropus.ca/software/memtester/"); printf("\x1b[H\x1b[2K"); /* Cursor home, clear current line. */ printf("%s [%d]\n", title, pass); /* Print title. */ progress_printed = 0; - progress_full = ws.ws_col*(ws.ws_row-1); + progress_full = ws.ws_col*(ws.ws_row-3); fflush(stdout); } diff --git a/src/redis.c b/src/redis.c index 60d45b5e6..8e0b22eb9 100644 --- a/src/redis.c +++ b/src/redis.c @@ -2313,7 +2313,7 @@ int main(int argc, char **argv) { strcmp(argv[1], "-h") == 0) usage(); if (strcmp(argv[1], "--test-memory") == 0) { if (argc == 3) { - memtest(atoi(argv[2]),10000); + memtest(atoi(argv[2]),300); exit(0); } else { fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n"); From 82003f2f1f6f3661cbd80311f7c1f6a7e468c6eb Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 18 Mar 2012 17:27:56 +0100 Subject: [PATCH 52/70] Fixed typo. --- src/memtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memtest.c b/src/memtest.c index 18e2e6e82..645d1da1e 100644 --- a/src/memtest.c +++ b/src/memtest.c @@ -129,7 +129,7 @@ void memtest(size_t megabytes, int passes) { } memtest_test(megabytes,passes); printf("\nYour memory passed this test.\n"); - printf("Please if you are stil in doubt use the following two tools:\n"); + printf("Please if you are still in doubt use the following two tools:\n"); printf("1) memtest86: http://www.memtest86.com/\n"); printf("2) memtester: http://pyropus.ca/software/memtester/\n"); exit(0); From c577014461acc92e79c93cd810dfe7f641556968 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 18 Mar 2012 18:03:27 +0100 Subject: [PATCH 53/70] More memory tests implemented. Default number of iterations lowered to a more acceptable value of 50. --- src/memtest.c | 75 ++++++++++++++++++++++++++++++++++++++++++++------- src/redis.c | 2 +- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/src/memtest.c b/src/memtest.c index 645d1da1e..01569d059 100644 --- a/src/memtest.c +++ b/src/memtest.c @@ -15,6 +15,14 @@ #error "ULONG_MAX value not supported." #endif +#ifdef MEMTEST_32BIT +#define ULONG_ONEZERO 0xaaaaaaaaaaaaaaaaUL +#define ULONG_ZEROONE 0x5555555555555555UL +#else +#define ULONG_ONEZERO 0xaaaaaaaaUL +#define ULONG_ZEROONE 0x55555555UL +#endif + static struct winsize ws; size_t progress_printed; /* Printed chars in screen-wide progress bar. */ size_t progress_full; /* How many chars to write to fill the progress bar. */ @@ -52,7 +60,7 @@ void memtest_progress_step(size_t curr, size_t size, char c) { * touch all the pages in the smallest amount of time reducing the * effectiveness of caches, and making it hard for the OS to transfer * pages on the swap. */ -void memtest_fill(unsigned long *l, size_t bytes) { +void memtest_fill_random(unsigned long *l, size_t bytes) { unsigned long step = 4096/sizeof(unsigned long); unsigned long words = bytes/sizeof(unsigned long)/2; unsigned long iwords = words/step; /* words per iteration */ @@ -75,7 +83,40 @@ void memtest_fill(unsigned long *l, size_t bytes) { l1 += step; l2 += step; if ((w & 0xffff) == 0) - memtest_progress_step(w+iwords*off,words,'+'); + memtest_progress_step(w+iwords*off,words,'R'); + } + } +} + +/* Like memtest_fill_random() but uses the two specified values to fill + * memory, in an alternated way (v1|v2|v1|v2|...) */ +void memtest_fill_value(unsigned long *l, size_t bytes, unsigned long v1, + unsigned long v2, char sym) +{ + unsigned long step = 4096/sizeof(unsigned long); + unsigned long words = bytes/sizeof(unsigned long)/2; + unsigned long iwords = words/step; /* words per iteration */ + unsigned long off, w, *l1, *l2, v; + + assert((bytes & 4095) == 0); + for (off = 0; off < step; off++) { + l1 = l+off; + l2 = l1+words; + v = (off & 1) ? v2 : v1; + for (w = 0; w < iwords; w++) { +#ifdef MEMTEST_32BIT + *l1 = *l2 = ((unsigned long) (rand()&0xffff)) | + (((unsigned long) (rand()&0xffff)) << 16); +#else + *l1 = *l2 = ((unsigned long) (rand()&0xffff)) | + (((unsigned long) (rand()&0xffff)) << 16) | + (((unsigned long) (rand()&0xffff)) << 32) | + (((unsigned long) (rand()&0xffff)) << 48); +#endif + l1 += step; + l2 += step; + if ((w & 0xffff) == 0) + memtest_progress_step(w+iwords*off,words,sym); } } } @@ -99,10 +140,20 @@ void memtest_compare(unsigned long *l, size_t bytes) { } } +void memtest_compare_times(unsigned long *m, size_t bytes, int pass, int times) { + int j; + + for (j = 0; j < times; j++) { + memtest_progress_start("Compare",pass); + memtest_compare(m,bytes); + memtest_progress_end(); + } +} + void memtest_test(size_t megabytes, int passes) { size_t bytes = megabytes*1024*1024; unsigned long *m = malloc(bytes); - int pass = 0, j; + int pass = 0; if (m == NULL) { fprintf(stderr,"Unable to allocate %zu megabytes: %s", @@ -112,13 +163,19 @@ void memtest_test(size_t megabytes, int passes) { while (pass != passes) { pass++; memtest_progress_start("Random fill",pass); - memtest_fill(m,bytes); + memtest_fill_random(m,bytes); memtest_progress_end(); - for (j = 0; j < 4; j++) { - memtest_progress_start("Compare",pass); - memtest_compare(m,bytes); - memtest_progress_end(); - } + memtest_compare_times(m,bytes,pass,4); + + memtest_progress_start("Solid fill",pass); + memtest_fill_value(m,bytes,0,(unsigned long)-1,'S'); + memtest_progress_end(); + memtest_compare_times(m,bytes,pass,4); + + memtest_progress_start("Checkerboard fill",pass); + memtest_fill_value(m,bytes,ULONG_ONEZERO,ULONG_ZEROONE,'C'); + memtest_progress_end(); + memtest_compare_times(m,bytes,pass,4); } } diff --git a/src/redis.c b/src/redis.c index 8e0b22eb9..d84a4fc2d 100644 --- a/src/redis.c +++ b/src/redis.c @@ -2313,7 +2313,7 @@ int main(int argc, char **argv) { strcmp(argv[1], "-h") == 0) usage(); if (strcmp(argv[1], "--test-memory") == 0) { if (argc == 3) { - memtest(atoi(argv[2]),300); + memtest(atoi(argv[2]),50); exit(0); } else { fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n"); From 49432f6def3cbcd395685b571238ac2562550184 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Mar 2012 14:02:34 +0100 Subject: [PATCH 54/70] Memory addressing test implemented. --- src/memtest.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/memtest.c b/src/memtest.c index 01569d059..272ec5022 100644 --- a/src/memtest.c +++ b/src/memtest.c @@ -56,6 +56,33 @@ void memtest_progress_step(size_t curr, size_t size, char c) { fflush(stdout); } +/* Test that addressing is fine. Every location is populated with its own + * address, and finally verified. This test is very fast but may detect + * ASAP big issues with the memory subsystem. */ +void memtest_addressing(unsigned long *l, size_t bytes) { + unsigned long words = bytes/sizeof(unsigned long); + unsigned long j, *p; + + /* Fill */ + p = l; + for (j = 0; j < words; j++) { + *p = (unsigned long)p; + p++; + if ((j & 0xffff) == 0) memtest_progress_step(j,words*2,'A'); + } + /* Test */ + p = l; + for (j = 0; j < words; j++) { + if (*p != (unsigned long)p) { + printf("\n*** MEMORY ADDRESSING ERROR: %p contains %lu\n", + (void*) p, *p); + exit(1); + } + p++; + if ((j & 0xffff) == 0) memtest_progress_step(j+words,words*2,'A'); + } +} + /* Fill words stepping a single page at every write, so we continue to * touch all the pages in the smallest amount of time reducing the * effectiveness of caches, and making it hard for the OS to transfer @@ -162,6 +189,11 @@ void memtest_test(size_t megabytes, int passes) { } while (pass != passes) { pass++; + + memtest_progress_start("Addressing test",pass); + memtest_addressing(m,bytes); + memtest_progress_end(); + memtest_progress_start("Random fill",pass); memtest_fill_random(m,bytes); memtest_progress_end(); From e47e5e87a2375895e7d142223c949fee2d0cf344 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Mar 2012 19:16:41 +0100 Subject: [PATCH 55/70] Read-only flag removed from PUBLISH command. --- src/redis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis.c b/src/redis.c index d84a4fc2d..98c12be9f 100644 --- a/src/redis.c +++ b/src/redis.c @@ -229,7 +229,7 @@ struct redisCommand redisCommandTable[] = { {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0}, {"psubscribe",psubscribeCommand,-2,"rps",0,NULL,0,0,0,0,0}, {"punsubscribe",punsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0}, - {"publish",publishCommand,3,"rpf",0,NULL,0,0,0,0,0}, + {"publish",publishCommand,3,"pf",0,NULL,0,0,0,0,0}, {"watch",watchCommand,-2,"rs",0,noPreloadGetKeys,1,-1,1,0,0}, {"unwatch",unwatchCommand,1,"rs",0,NULL,0,0,0,0,0}, {"cluster",clusterCommand,-2,"ar",0,NULL,0,0,0,0,0}, From 4f615b696c7e3b0a4a0532ab1fc685c5831b9932 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Mar 2012 19:29:06 +0100 Subject: [PATCH 56/70] Suppress warnings compiling redis-cli with certain gcc versions. --- src/redis-cli.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c631aa791..bdaa39642 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -478,7 +478,7 @@ static sds cliFormatReplyCSV(redisReply *r) { static int cliReadReply(int output_raw_strings) { void *_reply; redisReply *reply; - sds out; + sds out = NULL; int output = 1; if (redisGetReply(context,&_reply) != REDIS_OK) { @@ -498,7 +498,8 @@ static int cliReadReply(int output_raw_strings) { reply = (redisReply*)_reply; - /* Check if we need to connect to a different node and reissue the request. */ + /* Check if we need to connect to a different node and reissue the + * request. */ if (config.cluster_mode && reply->type == REDIS_REPLY_ERROR && (!strncmp(reply->str,"MOVED",5) || !strcmp(reply->str,"ASK"))) { @@ -926,7 +927,10 @@ static void slaveMode(void) { unsigned long long payload; /* Send the SYNC command. */ - write(fd,"SYNC\r\n",6); + if (write(fd,"SYNC\r\n",6) != 6) { + fprintf(stderr,"Error writing to master\n"); + exit(1); + } /* Read $\r\n, making sure to read just up to "\n" */ p = buf; From 2750fb5c1ac86527abf64ae6a2b6a63cf2413d14 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Mar 2012 13:06:50 +0100 Subject: [PATCH 57/70] redis_init_script template updated. --- utils/redis.conf.tpl | 190 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 151 insertions(+), 39 deletions(-) diff --git a/utils/redis.conf.tpl b/utils/redis.conf.tpl index 9e2e13557..e7febedab 100644 --- a/utils/redis.conf.tpl +++ b/utils/redis.conf.tpl @@ -1,6 +1,6 @@ # Redis configuration file example -# Note on units: when memory size is needed, it is possible to specifiy +# Note on units: when memory size is needed, it is possible to specify # it in the usual form of 1k 5GB 4M and so forth: # # 1k => 1000 bytes @@ -34,9 +34,10 @@ port $REDIS_PORT # on a unix socket when not specified. # # unixsocket /tmp/redis.sock +# unixsocketperm 755 # Close the connection after a client is idle for N seconds (0 to disable) -timeout 300 +timeout 0 # Set server verbosity to 'debug' # it can be one of: @@ -44,7 +45,7 @@ timeout 300 # verbose (many rarely useful info, but not a mess like the debug level) # notice (moderately verbose, what you want in production probably) # warning (only very important / critical messages are logged) -loglevel verbose +loglevel notice # Specify the log file name. Also 'stdout' can be used to force # Redis to log on the standard output. Note that if you use standard @@ -81,11 +82,32 @@ databases 16 # after 60 sec if at least 10000 keys changed # # Note: you can disable saving at all commenting all the "save" lines. +# +# It is also possible to remove all the previously configured save +# points by adding a save directive with a single empty string argument +# like in the following example: +# +# save "" save 900 1 save 300 10 save 60 10000 +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in an hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# distater will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usually even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + # Compress string objects using LZF when dump .rdb databases? # For default that's set to 'yes' as it's almost always a win. # If you want to save some CPU in the saving child set it to 'no' but @@ -125,7 +147,7 @@ dir $REDIS_DATA_DIR # is still in progress, the slave can act in two different ways: # # 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will -# still reply to client requests, possibly with out of data data, or the +# still reply to client requests, possibly with out of date data, or the # data set may just be empty if this is the first synchronization. # # 2) if slave-serve-stale data is set to 'no' the slave will reply with @@ -134,6 +156,21 @@ dir $REDIS_DATA_DIR # slave-serve-stale-data yes +# Slaves send PINGs to server in a predefined interval. It's possible to change +# this interval with the repl_ping_slave_period option. The default value is 10 +# seconds. +# +# repl-ping-slave-period 10 + +# The following option sets a timeout for both Bulk transfer I/O timeout and +# master data or ping response timeout. The default value is 60 seconds. +# +# It is important to make sure that this value is greater than the value +# specified for repl-ping-slave-period otherwise a timeout will be detected +# every time there is low traffic between the master and the slave. +# +# repl-timeout 60 + ################################## SECURITY ################################### # Require clients to issue AUTH before processing any other @@ -151,7 +188,7 @@ slave-serve-stale-data yes # Command renaming. # -# It is possilbe to change the name of dangerous commands in a shared +# It is possible to change the name of dangerous commands in a shared # environment. For instance the CONFIG command may be renamed into something # of hard to guess so that it will be still available for internal-use # tools but not available for general clients. @@ -160,37 +197,46 @@ slave-serve-stale-data yes # # rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 # -# It is also possilbe to completely kill a command renaming it into +# It is also possible to completely kill a command renaming it into # an empty string: # # rename-command CONFIG "" ################################### LIMITS #################################### -# Set the max number of connected clients at the same time. By default there -# is no limit, and it's up to the number of file descriptors the Redis process -# is able to open. The special value '0' means no limits. +# Set the max number of connected clients at the same time. By default +# this limit is set to 10000 clients, however if the Redis server is not +# able ot configure the process file limit to allow for the specified limit +# the max number of allowed clients is set to the current file limit +# minus 32 (as Redis reserves a few file descriptors for internal uses). +# # Once the limit is reached Redis will close all the new connections sending # an error 'max number of clients reached'. # -# maxclients 128 +# maxclients 10000 # Don't use more memory than the specified amount of bytes. -# When the memory limit is reached Redis will try to remove keys with an -# EXPIRE set. It will try to start freeing keys that are going to expire -# in little time and preserve keys with a longer time to live. -# Redis will also try to remove objects from free lists if possible. +# When the memory limit is reached Redis will try to remove keys +# accordingly to the eviction policy selected (see maxmemmory-policy). # -# If all this fails, Redis will start to reply with errors to commands -# that will use more memory, like SET, LPUSH, and so on, and will continue -# to reply to most read-only commands like GET. +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. # -# WARNING: maxmemory can be a good idea mainly if you want to use Redis as a -# 'state' server or cache, not as a real DB. When Redis is used as a real -# database the memory usage will grow over the weeks, it will be obvious if -# it is going to use too much memory in the long run, and you'll have the time -# to upgrade. With maxmemory after the limit is reached you'll start to get -# errors for write operations, and this may even lead to DB inconsistency. +# This option is usually useful when using Redis as an LRU cache, or to set +# an hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have slaves attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the slaves are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of slaves is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. +# +# In short... if you have slaves attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for slave +# output buffers (but this is not needed if the policy is 'noeviction'). # # maxmemory @@ -200,7 +246,7 @@ slave-serve-stale-data yes # volatile-lru -> remove the key with an expire set using an LRU algorithm # allkeys-lru -> remove any key accordingly to the LRU algorithm # volatile-random -> remove a random key with an expire set -# allkeys->random -> remove a random key, any key +# allkeys-random -> remove a random key, any key # volatile-ttl -> remove the key with the nearest expire time (minor TTL) # noeviction -> don't expire at all, just return an error on write operations # @@ -260,7 +306,7 @@ appendonly no # # The default is "everysec" that's usually the right compromise between # speed and data safety. It's up to you to understand if you can relax this to -# "no" that will will let the operating system flush the output buffer when +# "no" that will let the operating system flush the output buffer when # it wants, for better performances (but if you can live with the idea of # some data loss consider the default persistence mode that's snapshotting), # or on the contrary, use "always" that's very slow but a bit safer than @@ -284,7 +330,7 @@ appendfsync everysec # BGSAVE or BGREWRITEAOF is in progress. # # This means that while another child is saving the durability of Redis is -# the same as "appendfsync none", that in pratical terms means that it is +# the same as "appendfsync none", that in practical terms means that it is # possible to lost up to 30 seconds of log in the worst scenario (with the # default Linux settings). # @@ -306,7 +352,7 @@ no-appendfsync-on-rewrite no # is useful to avoid rewriting the AOF file even if the percentage increase # is reached but it is still pretty small. # -# Specify a precentage of zero in order to disable the automatic AOF +# Specify a percentage of zero in order to disable the automatic AOF # rewrite feature. auto-aof-rewrite-percentage 100 @@ -315,9 +361,39 @@ auto-aof-rewrite-min-size 64mb ################################ LUA SCRIPTING ############################### # Max execution time of a Lua script in milliseconds. -# This prevents that a programming error generating an infinite loop will block -# your server forever. Set it to 0 or a negative value for unlimited execution. -#lua-time-limit 60000 +# +# If the maximum execution time is reached Redis will log that a script is +# still in execution after the maximum allowed time and will start to +# reply to queries with an error. +# +# When a long running script exceed the maximum execution time only the +# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be +# used to stop a script that did not yet called write commands. The second +# is the only way to shut down the server in the case a write commands was +# already issue by the script but the user don't want to wait for the natural +# termination of the script. +# +# Set it to 0 or a negative value for unlimited execution without warnings. +lua-time-limit 5000 + +################################ REDIS CLUSTER ############################### +# +# Normal Redis instances can't be part of a Redis Cluster, only nodes that are +# started as cluster nodes can. In order to start a Redis instance as a +# cluster node enable the cluster support uncommenting the following: +# +# cluster-enabled yes + +# Every cluster node has a cluster configuration file. This file is not +# intended to be edited by hand. It is created and updated by Redis nodes. +# Every Redis Cluster node requires a different cluster configuration file. +# Make sure that instances running in the same system does not have +# overlapping cluster configuration file names. +# +# cluster-config-file nodes-6379.conf + +# In order to setup your cluster make sure to read the documentation +# available at http://redis.io web site. ################################## SLOW LOG ################################### @@ -345,12 +421,11 @@ slowlog-max-len 1024 ############################### ADVANCED CONFIG ############################### -# Hashes are encoded in a special way (much more memory efficient) when they -# have at max a given numer of elements, and the biggest element does not -# exceed a given threshold. You can configure this limits with the following -# configuration directives. -hash-max-zipmap-entries 512 -hash-max-zipmap-value 64 +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 # Similarly to hashes, small lists are also encoded in a special way in order # to save a lot of space. The special representation is only used when @@ -373,9 +448,9 @@ zset-max-ziplist-value 64 # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in # order to help rehashing the main Redis hash table (the one mapping top-level -# keys to values). The hash table implementation redis uses (see dict.c) +# keys to values). The hash table implementation Redis uses (see dict.c) # performs a lazy rehashing: the more operation you run into an hash table -# that is rhashing, the more rehashing "steps" are performed, so if the +# that is rehashing, the more rehashing "steps" are performed, so if the # server is idle the rehashing is never complete and some more memory is used # by the hash table. # @@ -391,10 +466,47 @@ zset-max-ziplist-value 64 # want to free memory asap when possible. activerehashing yes +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub client can't consume messages as fast as the +# publisher can produce them). +# +# The limit can be set differently for the three different classes of clients: +# +# normal -> normal clients +# slave -> slave clients and MONITOR clients +# pubsub -> clients subcribed to at least one pubsub channel or pattern +# +# The syntax of every client-output-buffer-limit directive is the following: +# +# client-output-buffer-limit +# +# A client is immediately disconnected once the hard limit is reached, or if +# the soft limit is reached and remains reached for the specified number of +# seconds (continuously). +# So for instance if the hard limit is 32 megabytes and the soft limit is +# 16 megabytes / 10 seconds, the client will get disconnected immediately +# if the size of the output buffers reach 32 megabytes, but will also get +# disconnected if the client reaches 16 megabytes and continuously overcomes +# the limit for 10 seconds. +# +# By default normal clients are not limited because they don't receive data +# without asking (in a push way), but just after a request, so only +# asynchronous clients may create a scenario where data is requested faster +# than it can read. +# +# Instead there is a default limit for pubsub and slave clients, since +# subscribers and slaves receive data in a push fashion. +# +# Both the hard or the soft limit can be disabled just setting it to zero. +client-output-buffer-limit normal 0 0 0 +client-output-buffer-limit slave 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + ################################## INCLUDES ################################### # Include one or more other config files here. This is useful if you -# have a standard template that goes to all redis server but also need +# have a standard template that goes to all Redis server but also need # to customize a few per-server settings. Include files can include # other files, so use this wisely. # From a15f004026ebadd6c50611031b7d1ac1fb81ea93 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Mar 2012 17:32:48 +0100 Subject: [PATCH 58/70] Support for read-only slaves. Semantical fixes. This commit introduces support for read only slaves via redis.conf and CONFIG GET/SET commands. Also various semantical fixes are implemented here: 1) MULTI/EXEC with only read commands now work where the server is into a state where writes (or commands increasing memory usage) are not allowed. Before this patch everything inside a transaction would fail in this conditions. 2) Scripts just calling read-only commands will work against read only slaves, when the server is out of memory, or when persistence is into an error condition. Before the patch EVAL always failed in this condition. --- redis.conf | 8 ++++++++ src/config.c | 11 +++++++++++ src/multi.c | 13 ++++++++----- src/redis.c | 26 ++++++++++++++++++++------ src/redis.h | 6 ++++-- src/scripting.c | 38 ++++++++++++++++++++++++++++++++++---- 6 files changed, 85 insertions(+), 17 deletions(-) diff --git a/redis.conf b/redis.conf index e03359963..8396a6a47 100644 --- a/redis.conf +++ b/redis.conf @@ -156,6 +156,14 @@ dir ./ # slave-serve-stale-data yes +# You can configure a slave instance to accept writes or not. Writing against +# a slave instance may be useful to store some ephemeral data (because data +# written on a slave will be easily deleted after resync with the master) but +# may also cause problems if clients are writing to it for an error. +# +# Since Redis 2.6 by default slaves are read-only. +slave-read-only yes + # Slaves send PINGs to server in a predefined interval. It's possible to change # this interval with the repl_ping_slave_period option. The default value is 10 # seconds. diff --git a/src/config.c b/src/config.c index 533a2a572..316f0a284 100644 --- a/src/config.c +++ b/src/config.c @@ -202,6 +202,10 @@ void loadServerConfigFromString(char *config) { if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } + } else if (!strcasecmp(argv[0],"slave-read-only") && argc == 2) { + if ((server.repl_slave_ro = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) { if ((server.rdb_compression = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -514,6 +518,11 @@ void configSetCommand(redisClient *c) { if (yn == -1) goto badfmt; server.repl_serve_stale_data = yn; + } else if (!strcasecmp(c->argv[2]->ptr,"slave-read-only")) { + int yn = yesnotoi(o->ptr); + + if (yn == -1) goto badfmt; + server.repl_slave_ro = yn; } else if (!strcasecmp(c->argv[2]->ptr,"dir")) { if (chdir((char*)o->ptr) == -1) { addReplyErrorFormat(c,"Changing directory: %s", strerror(errno)); @@ -712,6 +721,8 @@ void configGetCommand(redisClient *c) { server.aof_no_fsync_on_rewrite); config_get_bool_field("slave-serve-stale-data", server.repl_serve_stale_data); + config_get_bool_field("slave-read-only", + server.repl_slave_ro); config_get_bool_field("stop-writes-on-bgsave-error", server.stop_writes_on_bgsave_err); config_get_bool_field("daemonize", server.daemonize); diff --git a/src/multi.c b/src/multi.c index 65ec38a8d..eee9748c5 100644 --- a/src/multi.c +++ b/src/multi.c @@ -40,6 +40,13 @@ void queueMultiCommand(redisClient *c) { c->mstate.count++; } +void discardTransaction(redisClient *c) { + freeClientMultiState(c); + initClientMultiState(c); + c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);; + unwatchAllKeys(c); +} + void multiCommand(redisClient *c) { if (c->flags & REDIS_MULTI) { addReplyError(c,"MULTI calls can not be nested"); @@ -54,11 +61,7 @@ void discardCommand(redisClient *c) { addReplyError(c,"DISCARD without MULTI"); return; } - - freeClientMultiState(c); - initClientMultiState(c); - c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);; - unwatchAllKeys(c); + discardTransaction(c); addReply(c,shared.ok); } diff --git a/src/redis.c b/src/redis.c index 98c12be9f..737613d83 100644 --- a/src/redis.c +++ b/src/redis.c @@ -211,7 +211,7 @@ struct redisCommand redisCommandTable[] = { {"lastsave",lastsaveCommand,1,"r",0,NULL,0,0,0,0,0}, {"type",typeCommand,2,"r",0,NULL,1,1,1,0,0}, {"multi",multiCommand,1,"rs",0,NULL,0,0,0,0,0}, - {"exec",execCommand,1,"wms",0,NULL,0,0,0,0,0}, + {"exec",execCommand,1,"s",0,NULL,0,0,0,0,0}, {"discard",discardCommand,1,"rs",0,NULL,0,0,0,0,0}, {"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0}, {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0}, @@ -239,8 +239,8 @@ struct redisCommand redisCommandTable[] = { {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0}, {"object",objectCommand,-2,"r",0,NULL,2,2,2,0,0}, {"client",clientCommand,-2,"ar",0,NULL,0,0,0,0,0}, - {"eval",evalCommand,-3,"wms",0,zunionInterGetKeys,0,0,0,0,0}, - {"evalsha",evalShaCommand,-3,"wms",0,zunionInterGetKeys,0,0,0,0,0}, + {"eval",evalCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0}, + {"evalsha",evalShaCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0}, {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0}, {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0}, {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0} @@ -939,7 +939,11 @@ void createSharedObjects(void) { shared.slowscripterr = createObject(REDIS_STRING,sdsnew( "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n")); shared.bgsaveerr = createObject(REDIS_STRING,sdsnew( - "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Write commands are disabled. Please check Redis logs for details about the error.\r\n")); + "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n")); + shared.roslaveerr = createObject(REDIS_STRING,sdsnew( + "-READONLY You can't write against a read only slave.\r\n")); + shared.oomerr = createObject(REDIS_STRING, + "-OOM command not allowed when used memory > 'maxmemory'.\r\n"); shared.space = createObject(REDIS_STRING,sdsnew(" ")); shared.colon = createObject(REDIS_STRING,sdsnew(":")); shared.plus = createObject(REDIS_STRING,sdsnew("+")); @@ -1048,6 +1052,7 @@ void initServerConfig() { server.repl_state = REDIS_REPL_NONE; server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT; server.repl_serve_stale_data = 1; + server.repl_slave_ro = 1; server.repl_down_since = -1; /* Client output buffer limits */ @@ -1483,8 +1488,7 @@ int processCommand(redisClient *c) { if (server.maxmemory) { int retval = freeMemoryIfNeeded(); if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) { - addReplyError(c, - "command not allowed when used memory > 'maxmemory'"); + addReply(c, shared.oomerr); return REDIS_OK; } } @@ -1499,6 +1503,16 @@ int processCommand(redisClient *c) { return REDIS_OK; } + /* Don't accept wirte commands if this is a read only slave. But + * accept write commands if this is our master. */ + if (server.masterhost && server.repl_slave_ro && + !(c->flags & REDIS_MASTER) && + c->cmd->flags & REDIS_CMD_WRITE) + { + addReply(c, shared.roslaveerr); + return REDIS_OK; + } + /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0) && diff --git a/src/redis.h b/src/redis.h index 49b695237..1fc2ae393 100644 --- a/src/redis.h +++ b/src/redis.h @@ -366,8 +366,8 @@ struct sharedObjectsStruct { *colon, *nullbulk, *nullmultibulk, *queued, *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr, *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr, - *plus, *select0, *select1, *select2, *select3, *select4, - *select5, *select6, *select7, *select8, *select9, + *roslaveerr, *oomerr, *plus, *select0, *select1, *select2, *select3, + *select4, *select5, *select6, *select7, *select8, *select9, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *rpop, *lpop, *integers[REDIS_SHARED_INTEGERS], @@ -671,6 +671,7 @@ struct redisServer { char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */ time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */ int repl_serve_stale_data; /* Serve stale data when link is down? */ + int repl_slave_ro; /* Slave is read only? */ time_t repl_down_since; /* Unix time at which link with master went down */ /* Limits */ unsigned int maxclients; /* Max number of simultaneous clients */ @@ -901,6 +902,7 @@ void freeClientMultiState(redisClient *c); void queueMultiCommand(redisClient *c); void touchWatchedKey(redisDb *db, robj *key); void touchWatchedKeysOnFlush(int dbid); +void discardTransaction(redisClient *c); /* Redis object implementation */ void decrRefCount(void *o); diff --git a/src/scripting.c b/src/scripting.c index ce1f0877b..e38d08077 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -206,15 +206,45 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { goto cleanup; } + /* There are commands that are not allowed inside scripts. */ if (cmd->flags & REDIS_CMD_NOSCRIPT) { luaPushError(lua, "This Redis command is not allowed from scripts"); goto cleanup; } - if (cmd->flags & REDIS_CMD_WRITE && server.lua_random_dirty) { - luaPushError(lua, - "Write commands not allowed after non deterministic commands"); - goto cleanup; + /* Write commands are forbidden against read-only slaves, or if a + * command marked as non-deterministic was already called in the context + * of this script. */ + if (cmd->flags & REDIS_CMD_WRITE) { + if (server.lua_random_dirty) { + luaPushError(lua, + "Write commands not allowed after non deterministic commands"); + goto cleanup; + } else if (server.masterhost && server.repl_slave_ro && + !(server.lua_caller->flags & REDIS_MASTER)) + { + luaPushError(lua, shared.roslaveerr->ptr); + goto cleanup; + } else if (server.stop_writes_on_bgsave_err && + server.saveparamslen > 0 && + server.lastbgsave_status == REDIS_ERR) + { + luaPushError(lua, shared.bgsaveerr->ptr); + goto cleanup; + } + } + + /* If we reached the memory limit configured via maxmemory, commands that + * could enlarge the memory usage are not allowed, but only if this is the + * first write in the context of this script, otherwise we can't stop + * in the middle. */ + if (server.maxmemory && server.lua_write_dirty == 0 && + (cmd->flags & REDIS_CMD_DENYOOM)) + { + if (freeMemoryIfNeeded() == REDIS_ERR) { + luaPushError(lua, shared.oomerr->ptr); + goto cleanup; + } } if (cmd->flags & REDIS_CMD_RANDOM) server.lua_random_dirty = 1; From 3cd475b25466cc3b4755aa2fef2e0ed10950fa89 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Mar 2012 17:53:47 +0100 Subject: [PATCH 59/70] DEBUG should not be flagged as w otherwise we can not call DEBUG DIGEST and other commands against read only slaves. --- src/redis.c | 2 +- tests/support/redis.tcl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis.c b/src/redis.c index 737613d83..2558f66ae 100644 --- a/src/redis.c +++ b/src/redis.c @@ -223,7 +223,7 @@ struct redisCommand redisCommandTable[] = { {"pttl",pttlCommand,2,"r",0,NULL,1,1,1,0,0}, {"persist",persistCommand,2,"w",0,NULL,1,1,1,0,0}, {"slaveof",slaveofCommand,3,"aws",0,NULL,0,0,0,0,0}, - {"debug",debugCommand,-2,"aws",0,NULL,0,0,0,0,0}, + {"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0}, {"config",configCommand,-2,"ar",0,NULL,0,0,0,0,0}, {"subscribe",subscribeCommand,-2,"rps",0,NULL,0,0,0,0,0}, {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0}, diff --git a/tests/support/redis.tcl b/tests/support/redis.tcl index 4f8ac485d..ca6cf34b6 100644 --- a/tests/support/redis.tcl +++ b/tests/support/redis.tcl @@ -160,7 +160,7 @@ proc ::redis::redis_read_reply fd { - {return -code error [redis_read_line $fd]} $ {redis_bulk_read $fd} * {redis_multi_bulk_read $fd} - default {return -code error "Bad protocol, $type as reply type byte"} + default {return -code error "Bad protocol, '$type' as reply type byte"} } } From 118caec1a81b34de17eda6be74f6a313449ec644 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Mar 2012 12:11:07 +0100 Subject: [PATCH 60/70] Correctly create shared.oomerr as an sds string. --- src/redis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis.c b/src/redis.c index 2558f66ae..bae852b8f 100644 --- a/src/redis.c +++ b/src/redis.c @@ -942,8 +942,8 @@ void createSharedObjects(void) { "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n")); shared.roslaveerr = createObject(REDIS_STRING,sdsnew( "-READONLY You can't write against a read only slave.\r\n")); - shared.oomerr = createObject(REDIS_STRING, - "-OOM command not allowed when used memory > 'maxmemory'.\r\n"); + shared.oomerr = createObject(REDIS_STRING,sdsnew( + "-OOM command not allowed when used memory > 'maxmemory'.\r\n")); shared.space = createObject(REDIS_STRING,sdsnew(" ")); shared.colon = createObject(REDIS_STRING,sdsnew(":")); shared.plus = createObject(REDIS_STRING,sdsnew("+")); From 5ed9340fe486790e43dc643b4a6da7d50c0ddc6d Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Mar 2012 12:26:05 +0100 Subject: [PATCH 61/70] Comments about security of slave-read-only in redis.coinf. --- redis.conf | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/redis.conf b/redis.conf index 8396a6a47..1b79e09ef 100644 --- a/redis.conf +++ b/redis.conf @@ -159,9 +159,17 @@ slave-serve-stale-data yes # You can configure a slave instance to accept writes or not. Writing against # a slave instance may be useful to store some ephemeral data (because data # written on a slave will be easily deleted after resync with the master) but -# may also cause problems if clients are writing to it for an error. +# may also cause problems if clients are writing to it because of a +# misconfiguration. # # Since Redis 2.6 by default slaves are read-only. +# +# Note: read only slaves are not designed to be exposed to untrusted clients +# on the internet. It's just a protection layer against misuse of the instance. +# Still a read only slave exports by default all the administrative commands +# such as CONFIG, DEBUG, and so forth. To a limited extend you can improve +# security of read only slaves using 'rename-command' to shadow all the +# administrative / dangerous commands. slave-read-only yes # Slaves send PINGs to server in a predefined interval. It's possible to change From e63497d4d85dd73268f66b29adbaab92d31e3512 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 22 Mar 2012 18:14:32 +0100 Subject: [PATCH 62/70] Result of INCRBYFLOAT and HINCRBYFLOAT should never be in exponential form, and also should never contain trailing zeroes. This is not possible with vanilla printf() format specifiers, so we alter the output. --- src/object.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index ccb072085..9699ea53e 100644 --- a/src/object.c +++ b/src/object.c @@ -48,7 +48,7 @@ robj *createStringObjectFromLongLong(long long value) { /* Note: this function is defined into object.c since here it is where it * belongs but it is actually designed to be used just for INCRBYFLOAT */ robj *createStringObjectFromLongDouble(long double value) { - char buf[256]; + char buf[256], *p; int len; /* We use 17 digits precision since with 128 bit floats that precision @@ -56,7 +56,16 @@ robj *createStringObjectFromLongDouble(long double value) { * that is "non surprising" for the user (that is, most small decimal * numbers will be represented in a way that when converted back into * a string are exactly the same as what the user typed.) */ - len = snprintf(buf,sizeof(buf),"%.17Lg", value); + len = snprintf(buf,sizeof(buf),"%.17Lf", value); + /* Now remove trailing zeroes after the '.' */ + if ((p = strchr(buf,'.')) != NULL) { + p = buf+len-1; + while(*p == '0') { + p--; + len--; + } + if (*p == '.') len--; + } return createStringObject(buf,len); } From b02916eff7c8df9baf8dba13e57e1a3940912b4f Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 22 Mar 2012 18:16:41 +0100 Subject: [PATCH 63/70] Code style hack. --- src/object.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/object.c b/src/object.c index 9699ea53e..91e1933af 100644 --- a/src/object.c +++ b/src/object.c @@ -48,7 +48,7 @@ robj *createStringObjectFromLongLong(long long value) { /* Note: this function is defined into object.c since here it is where it * belongs but it is actually designed to be used just for INCRBYFLOAT */ robj *createStringObjectFromLongDouble(long double value) { - char buf[256], *p; + char buf[256]; int len; /* We use 17 digits precision since with 128 bit floats that precision @@ -58,8 +58,8 @@ robj *createStringObjectFromLongDouble(long double value) { * a string are exactly the same as what the user typed.) */ len = snprintf(buf,sizeof(buf),"%.17Lf", value); /* Now remove trailing zeroes after the '.' */ - if ((p = strchr(buf,'.')) != NULL) { - p = buf+len-1; + if (strchr(buf,'.') != NULL) { + char *p = buf+len-1; while(*p == '0') { p--; len--; From e5b348e0c3d202d03429bc671595b8c0ec6804e4 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 23 Mar 2012 10:22:58 +0100 Subject: [PATCH 64/70] Replicate HINCRBYFLOAT as HSET. --- src/t_hash.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index b39284505..5b7a347ab 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -135,7 +135,9 @@ int hashTypeExists(robj *o, robj *field) { } /* Add an element, discard the old if the key already exists. - * Return 0 on insert and 1 on update. */ + * Return 0 on insert and 1 on update. + * This function will take care of incrementing the reference count of the + * retained fields and value objects. */ int hashTypeSet(robj *o, robj *field, robj *value) { int update = 0; @@ -168,30 +170,23 @@ int hashTypeSet(robj *o, robj *field, robj *value) { zl = ziplistPush(zl, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); zl = ziplistPush(zl, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); } - o->ptr = zl; - decrRefCount(field); decrRefCount(value); /* Check if the ziplist needs to be converted to a hash table */ - if (hashTypeLength(o) > server.hash_max_ziplist_entries) { + if (hashTypeLength(o) > server.hash_max_ziplist_entries) hashTypeConvert(o, REDIS_ENCODING_HT); - } - } else if (o->encoding == REDIS_ENCODING_HT) { if (dictReplace(o->ptr, field, value)) { /* Insert */ incrRefCount(field); } else { /* Update */ update = 1; } - incrRefCount(value); - } else { redisPanic("Unknown hash encoding"); } - return update; } @@ -520,7 +515,7 @@ void hincrbyCommand(redisClient *c) { void hincrbyfloatCommand(redisClient *c) { double long value, incr; - robj *o, *current, *new; + robj *o, *current, *new, *aux; if (getLongDoubleFromObjectOrReply(c,c->argv[3],&incr,NULL) != REDIS_OK) return; if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return; @@ -540,9 +535,17 @@ void hincrbyfloatCommand(redisClient *c) { hashTypeTryObjectEncoding(o,&c->argv[2],NULL); hashTypeSet(o,c->argv[2],new); addReplyBulk(c,new); - decrRefCount(new); signalModifiedKey(c->db,c->argv[1]); server.dirty++; + + /* Always replicate HINCRBYFLOAT as an HSET command with the final value + * in order to make sure that differences in float pricision or formatting + * will not create differences in replicas or after an AOF restart. */ + aux = createStringObject("HSET",4); + rewriteClientCommandArgument(c,0,aux); + decrRefCount(aux); + rewriteClientCommandArgument(c,3,new); + decrRefCount(new); } static void addHashFieldToReply(redisClient *c, robj *o, robj *field) { From 5b89814ce31c0f7271259a9dcac8db4b99b81804 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 23 Mar 2012 12:42:20 +0100 Subject: [PATCH 65/70] Big endian fix. The bug was introduced because of a typo. --- src/ziplist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ziplist.c b/src/ziplist.c index 5962510d5..b214b2da6 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -240,7 +240,7 @@ static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { } else if ((prevlensize) == 5) { \ assert(sizeof((prevlensize)) == 4); \ memcpy(&(prevlen), ((char*)(ptr)) + 1, 4); \ - memrev32ifbe(&len); \ + memrev32ifbe(&prevlen); \ } \ } while(0); From 251268cbe0b4f44d1017a003f201b76976ae320a Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 23 Mar 2012 15:22:25 +0100 Subject: [PATCH 66/70] RDB load of different encodings test added. --- tests/assets/encodings.rdb | Bin 0 -> 667 bytes tests/integration/rdb.tcl | 25 +++++++++++++++++++++++++ tests/test_helper.tcl | 1 + 3 files changed, 26 insertions(+) create mode 100644 tests/assets/encodings.rdb create mode 100644 tests/integration/rdb.tcl diff --git a/tests/assets/encodings.rdb b/tests/assets/encodings.rdb new file mode 100644 index 0000000000000000000000000000000000000000..9fd9b705d16220065ee117a1c1c094f40fb122f2 GIT binary patch literal 667 zcmbVKu}UOC5UuKJ7oAzb-~v%NHW5S&dS+bpFfmX#Qw_|N?w&>$M|aur5EcU?;j)7> zGV&XY4SF>ZBR^rkz`zf1tzKQwOdP0r-Cfn)uU@~+^|g&HrPRU;knEK1xGIbhsS?(T zOp!5$Ql%uLiRxVU_K~%gGG1r2V@aAV)EAeQf1$=iXe|~B7q#x40{=g8Nhbr35aH0(af04| z31?FkfXdOIL*v>$`m`ZiW?H~$fQQSK0B})18Q{+2^#ErNo(A|lG8gy_c?x0;Mx(`{ zoa#1QiP|F?FVK2I8DyCB=mk$S8nlC&4|~3w8;|#Ox&QtGwHmXU=HND1)gUq}8+2xM zS?Yc@4yO2OwUpuPICQ~2@KI=m_~m`huJS+FRQ_l1RQDc;oztC1%JaPY56L Date: Fri, 23 Mar 2012 20:21:19 +0100 Subject: [PATCH 67/70] Fixed memory leak in hash loading. --- src/rdb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/rdb.c b/src/rdb.c index 519b645d1..1e23fa70c 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -859,14 +859,17 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { /* Add pair to ziplist */ o->ptr = ziplistPush(o->ptr, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); o->ptr = ziplistPush(o->ptr, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); - /* Convert to hash table if size threshold is exceeded */ if (sdslen(field->ptr) > server.hash_max_ziplist_value || sdslen(value->ptr) > server.hash_max_ziplist_value) { + decrRefCount(field); + decrRefCount(value); hashTypeConvert(o, REDIS_ENCODING_HT); break; } + decrRefCount(field); + decrRefCount(value); } /* Load remaining fields and values into the hash table */ From 4c955a571515900bf79843ed42da0f7c02d030b2 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 23 Mar 2012 20:20:43 +0100 Subject: [PATCH 68/70] Contextualize comment. --- tests/integration/rdb.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl index fdc5495f7..85c5db9f5 100644 --- a/tests/integration/rdb.tcl +++ b/tests/integration/rdb.tcl @@ -1,6 +1,6 @@ set server_path [tmpdir "server.rdb-encoding-test"] -# Copy RDB with zipmap encoded hash to server path +# Copy RDB with different encodings in server path exec cp tests/assets/encodings.rdb $server_path start_server [list overrides [list "dir" $server_path "dbfilename" "encodings.rdb"]] { From 626c23a540da0a8731dccfcfd3c54643d72eb1f1 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 24 Mar 2012 11:42:20 +0100 Subject: [PATCH 69/70] convert-zipmap-hash-on-load test enabled --- tests/test_helper.tcl | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 4954b79e5..bef8231a7 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -31,6 +31,7 @@ set ::all_tests { integration/replication-3 integration/aof integration/rdb + integration/convert-zipmap-hash-on-load unit/pubsub unit/slowlog unit/scripting From ef34aec7196bc71f1c024a071e36fd5f0b3ab168 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 24 Mar 2012 11:52:56 +0100 Subject: [PATCH 70/70] Add used allocator in redis-server -v output. --- src/redis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis.c b/src/redis.c index bae852b8f..2f27b4ddb 100644 --- a/src/redis.c +++ b/src/redis.c @@ -2236,8 +2236,8 @@ void daemonize(void) { } void version() { - printf("Redis server version %s (%s:%d)\n", REDIS_VERSION, - redisGitSHA1(), atoi(redisGitDirty()) > 0); + printf("Redis server v=%s sha=%s:%d malloc=%s\n", REDIS_VERSION, + redisGitSHA1(), atoi(redisGitDirty()) > 0, ZMALLOC_LIB); exit(0); }