490 lines
18 KiB
C
490 lines
18 KiB
C
/*
|
|
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
* to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "server.h"
|
|
|
|
/* ================================ MULTI/EXEC ============================== */
|
|
|
|
/* Client state initialization for MULTI/EXEC */
|
|
void initClientMultiState(client *c) {
|
|
c->mstate.commands = NULL;
|
|
c->mstate.count = 0;
|
|
c->mstate.cmd_flags = 0;
|
|
c->mstate.cmd_inv_flags = 0;
|
|
c->mstate.argv_len_sums = 0;
|
|
c->mstate.alloc_count = 0;
|
|
}
|
|
|
|
/* Release all the resources associated with MULTI/EXEC state */
|
|
void freeClientMultiState(client *c) {
|
|
int j;
|
|
|
|
for (j = 0; j < c->mstate.count; j++) {
|
|
int i;
|
|
multiCmd *mc = c->mstate.commands + j;
|
|
|
|
for (i = 0; i < mc->argc; i++) decrRefCount(mc->argv[i]);
|
|
zfree(mc->argv);
|
|
}
|
|
zfree(c->mstate.commands);
|
|
}
|
|
|
|
/* Add a new command into the MULTI commands queue */
|
|
void queueMultiCommand(client *c, uint64_t cmd_flags) {
|
|
multiCmd *mc;
|
|
|
|
/* No sense to waste memory if the transaction is already aborted.
|
|
* this is useful in case client sends these in a pipeline, or doesn't
|
|
* bother to read previous responses and didn't notice the multi was already
|
|
* aborted. */
|
|
if (c->flag.dirty_cas || c->flag.dirty_exec) return;
|
|
if (c->mstate.count == 0) {
|
|
/* If a client is using multi/exec, assuming it is used to execute at least
|
|
* two commands. Hence, creating by default size of 2. */
|
|
c->mstate.commands = zmalloc(sizeof(multiCmd) * 2);
|
|
c->mstate.alloc_count = 2;
|
|
}
|
|
if (c->mstate.count == c->mstate.alloc_count) {
|
|
c->mstate.alloc_count = c->mstate.alloc_count < INT_MAX / 2 ? c->mstate.alloc_count * 2 : INT_MAX;
|
|
c->mstate.commands = zrealloc(c->mstate.commands, sizeof(multiCmd) * (c->mstate.alloc_count));
|
|
}
|
|
mc = c->mstate.commands + c->mstate.count;
|
|
mc->cmd = c->cmd;
|
|
mc->argc = c->argc;
|
|
mc->argv = c->argv;
|
|
mc->argv_len = c->argv_len;
|
|
|
|
c->mstate.count++;
|
|
c->mstate.cmd_flags |= cmd_flags;
|
|
c->mstate.cmd_inv_flags |= ~cmd_flags;
|
|
c->mstate.argv_len_sums += c->argv_len_sum + sizeof(robj *) * c->argc;
|
|
|
|
/* Reset the client's args since we copied them into the mstate and shouldn't
|
|
* reference them from c anymore. */
|
|
c->argv = NULL;
|
|
c->argc = 0;
|
|
c->argv_len_sum = 0;
|
|
c->argv_len = 0;
|
|
}
|
|
|
|
void discardTransaction(client *c) {
|
|
freeClientMultiState(c);
|
|
initClientMultiState(c);
|
|
c->flag.multi = 0;
|
|
c->flag.dirty_cas = 0;
|
|
c->flag.dirty_exec = 0;
|
|
unwatchAllKeys(c);
|
|
}
|
|
|
|
/* Flag the transaction as DIRTY_EXEC so that EXEC will fail.
|
|
* Should be called every time there is an error while queueing a command. */
|
|
void flagTransaction(client *c) {
|
|
if (c->flag.multi) c->flag.dirty_exec = 1;
|
|
}
|
|
|
|
void multiCommand(client *c) {
|
|
if (c->flag.multi) {
|
|
addReplyError(c, "MULTI calls can not be nested");
|
|
return;
|
|
}
|
|
c->flag.multi = 1;
|
|
|
|
addReply(c, shared.ok);
|
|
}
|
|
|
|
void discardCommand(client *c) {
|
|
if (!c->flag.multi) {
|
|
addReplyError(c, "DISCARD without MULTI");
|
|
return;
|
|
}
|
|
discardTransaction(c);
|
|
addReply(c, shared.ok);
|
|
}
|
|
|
|
/* Aborts a transaction, with a specific error message.
|
|
* The transaction is always aborted with -EXECABORT so that the client knows
|
|
* the server exited the multi state, but the actual reason for the abort is
|
|
* included too.
|
|
* Note: 'error' may or may not end with \r\n. see addReplyErrorFormat. */
|
|
void execCommandAbort(client *c, sds error) {
|
|
discardTransaction(c);
|
|
|
|
if (error[0] == '-') error++;
|
|
addReplyErrorFormat(c, "-EXECABORT Transaction discarded because of: %s", error);
|
|
|
|
/* Send EXEC to clients waiting data from MONITOR. We did send a MULTI
|
|
* already, and didn't send any of the queued commands, now we'll just send
|
|
* EXEC so it is clear that the transaction is over. */
|
|
replicationFeedMonitors(c, server.monitors, c->db->id, c->argv, c->argc);
|
|
}
|
|
|
|
void execCommand(client *c) {
|
|
int j;
|
|
robj **orig_argv;
|
|
int orig_argc, orig_argv_len;
|
|
struct serverCommand *orig_cmd;
|
|
|
|
if (!c->flag.multi) {
|
|
addReplyError(c, "EXEC without MULTI");
|
|
return;
|
|
}
|
|
|
|
/* EXEC with expired watched key is disallowed*/
|
|
if (isWatchedKeyExpired(c)) {
|
|
c->flag.dirty_cas = 1;
|
|
}
|
|
|
|
/* Check if we need to abort the EXEC because:
|
|
* 1) Some WATCHed key was touched.
|
|
* 2) There was a previous error while queueing commands.
|
|
* A failed EXEC in the first case returns a multi bulk nil object
|
|
* (technically it is not an error but a special behavior), while
|
|
* in the second an EXECABORT error is returned. */
|
|
if (c->flag.dirty_cas || c->flag.dirty_exec) {
|
|
if (c->flag.dirty_exec) {
|
|
addReplyErrorObject(c, shared.execaborterr);
|
|
} else {
|
|
addReply(c, shared.nullarray[c->resp]);
|
|
}
|
|
|
|
discardTransaction(c);
|
|
return;
|
|
}
|
|
|
|
struct ClientFlags old_flags = c->flag;
|
|
|
|
/* we do not want to allow blocking commands inside multi */
|
|
c->flag.deny_blocking = 1;
|
|
|
|
/* Exec all the queued commands */
|
|
unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
|
|
|
|
server.in_exec = 1;
|
|
|
|
orig_argv = c->argv;
|
|
orig_argv_len = c->argv_len;
|
|
orig_argc = c->argc;
|
|
orig_cmd = c->cmd;
|
|
addReplyArrayLen(c, c->mstate.count);
|
|
for (j = 0; j < c->mstate.count; j++) {
|
|
c->argc = c->mstate.commands[j].argc;
|
|
c->argv = c->mstate.commands[j].argv;
|
|
c->argv_len = c->mstate.commands[j].argv_len;
|
|
c->cmd = c->realcmd = c->mstate.commands[j].cmd;
|
|
|
|
/* ACL permissions are also checked at the time of execution in case
|
|
* they were changed after the commands were queued. */
|
|
int acl_errpos;
|
|
int acl_retval = ACLCheckAllPerm(c, &acl_errpos);
|
|
if (acl_retval != ACL_OK) {
|
|
char *reason;
|
|
switch (acl_retval) {
|
|
case ACL_DENIED_CMD: reason = "no permission to execute the command or subcommand"; break;
|
|
case ACL_DENIED_KEY: reason = "no permission to touch the specified keys"; break;
|
|
case ACL_DENIED_CHANNEL:
|
|
reason = "no permission to access one of the channels used "
|
|
"as arguments";
|
|
break;
|
|
default: reason = "no permission"; break;
|
|
}
|
|
addACLLogEntry(c, acl_retval, ACL_LOG_CTX_MULTI, acl_errpos, NULL, NULL);
|
|
addReplyErrorFormat(c,
|
|
"-NOPERM ACLs rules changed between the moment the "
|
|
"transaction was accumulated and the EXEC call. "
|
|
"This command is no longer allowed for the "
|
|
"following reason: %s",
|
|
reason);
|
|
} else {
|
|
if (c->id == CLIENT_ID_AOF)
|
|
call(c, CMD_CALL_NONE);
|
|
else
|
|
call(c, CMD_CALL_FULL);
|
|
|
|
serverAssert(c->flag.blocked == 0);
|
|
}
|
|
|
|
/* Commands may alter argc/argv, restore mstate. */
|
|
c->mstate.commands[j].argc = c->argc;
|
|
c->mstate.commands[j].argv = c->argv;
|
|
c->mstate.commands[j].argv_len = c->argv_len;
|
|
c->mstate.commands[j].cmd = c->cmd;
|
|
}
|
|
|
|
// restore old DENY_BLOCKING value
|
|
if (!(old_flags.deny_blocking)) c->flag.deny_blocking = 0;
|
|
|
|
c->argv = orig_argv;
|
|
c->argv_len = orig_argv_len;
|
|
c->argc = orig_argc;
|
|
c->cmd = c->realcmd = orig_cmd;
|
|
discardTransaction(c);
|
|
|
|
server.in_exec = 0;
|
|
}
|
|
|
|
/* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
|
|
*
|
|
* The implementation uses a per-DB hash table mapping keys to list of clients
|
|
* WATCHing those keys, so that given a key that is going to be modified
|
|
* we can mark all the associated clients as dirty.
|
|
*
|
|
* Also every client contains a list of WATCHed keys so that's possible to
|
|
* un-watch such keys when the client is freed or when UNWATCH is called. */
|
|
|
|
/* The watchedKey struct is included in two lists: the client->watched_keys list,
|
|
* and db->watched_keys dict (each value in that dict is a list of watchedKey structs).
|
|
* The list in the client struct is a plain list, where each node's value is a pointer to a watchedKey.
|
|
* The list in the db db->watched_keys is different, the listnode member that's embedded in this struct
|
|
* is the node in the dict. And the value inside that listnode is a pointer to the that list, and we can use
|
|
* struct member offset math to get from the listnode to the watchedKey struct.
|
|
* This is done to avoid the need for listSearchKey and dictFind when we remove from the list. */
|
|
typedef struct watchedKey {
|
|
listNode node;
|
|
robj *key;
|
|
serverDb *db;
|
|
client *client;
|
|
unsigned expired : 1; /* Flag that we're watching an already expired key. */
|
|
} watchedKey;
|
|
|
|
/* Attach a watchedKey to the list of clients watching that key. */
|
|
static inline void watchedKeyLinkToClients(list *clients, watchedKey *wk) {
|
|
wk->node.value = clients; /* Point the value back to the list */
|
|
listLinkNodeTail(clients, &wk->node); /* Link the embedded node */
|
|
}
|
|
|
|
/* Get the list of clients watching that key. */
|
|
static inline list *watchedKeyGetClients(watchedKey *wk) {
|
|
return listNodeValue(&wk->node); /* embedded node->value points back to the list */
|
|
}
|
|
|
|
/* Get the node with wk->client in the list of clients watching that key. Actually it
|
|
* is just the embedded node. */
|
|
static inline listNode *watchedKeyGetClientNode(watchedKey *wk) {
|
|
return &wk->node;
|
|
}
|
|
|
|
/* Watch for the specified key */
|
|
void watchForKey(client *c, robj *key) {
|
|
list *clients = NULL;
|
|
listIter li;
|
|
listNode *ln;
|
|
watchedKey *wk;
|
|
|
|
if (listLength(c->watched_keys) == 0) server.watching_clients++;
|
|
|
|
/* Check if we are already watching for this key */
|
|
listRewind(c->watched_keys, &li);
|
|
while ((ln = listNext(&li))) {
|
|
wk = listNodeValue(ln);
|
|
if (wk->db == c->db && equalStringObjects(key, wk->key)) return; /* Key already watched */
|
|
}
|
|
/* This key is not already watched in this DB. Let's add it */
|
|
clients = dictFetchValue(c->db->watched_keys, key);
|
|
if (!clients) {
|
|
clients = listCreate();
|
|
dictAdd(c->db->watched_keys, key, clients);
|
|
incrRefCount(key);
|
|
}
|
|
/* Add the new key to the list of keys watched by this client */
|
|
wk = zmalloc(sizeof(*wk));
|
|
wk->key = key;
|
|
wk->client = c;
|
|
wk->db = c->db;
|
|
wk->expired = keyIsExpired(c->db, key);
|
|
incrRefCount(key);
|
|
listAddNodeTail(c->watched_keys, wk);
|
|
watchedKeyLinkToClients(clients, wk);
|
|
}
|
|
|
|
/* Unwatch all the keys watched by this client. To clean the EXEC dirty
|
|
* flag is up to the caller. */
|
|
void unwatchAllKeys(client *c) {
|
|
listIter li;
|
|
listNode *ln;
|
|
|
|
if (listLength(c->watched_keys) == 0) return;
|
|
listRewind(c->watched_keys, &li);
|
|
while ((ln = listNext(&li))) {
|
|
list *clients;
|
|
watchedKey *wk;
|
|
|
|
/* Remove the client's wk from the list of clients watching the key. */
|
|
wk = listNodeValue(ln);
|
|
clients = watchedKeyGetClients(wk);
|
|
serverAssertWithInfo(c, NULL, clients != NULL);
|
|
listUnlinkNode(clients, watchedKeyGetClientNode(wk));
|
|
/* Kill the entry at all if this was the only client */
|
|
if (listLength(clients) == 0) dictDelete(wk->db->watched_keys, wk->key);
|
|
/* Remove this watched key from the client->watched list */
|
|
listDelNode(c->watched_keys, ln);
|
|
decrRefCount(wk->key);
|
|
zfree(wk);
|
|
}
|
|
server.watching_clients--;
|
|
}
|
|
|
|
/* Iterates over the watched_keys list and looks for an expired key. Keys which
|
|
* were expired already when WATCH was called are ignored. */
|
|
int isWatchedKeyExpired(client *c) {
|
|
listIter li;
|
|
listNode *ln;
|
|
watchedKey *wk;
|
|
if (listLength(c->watched_keys) == 0) return 0;
|
|
listRewind(c->watched_keys, &li);
|
|
while ((ln = listNext(&li))) {
|
|
wk = listNodeValue(ln);
|
|
if (wk->expired) continue; /* was expired when WATCH was called */
|
|
if (keyIsExpired(wk->db, wk->key)) return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* "Touch" a key, so that if this key is being WATCHed by some client the
|
|
* next EXEC will fail. */
|
|
void touchWatchedKey(serverDb *db, robj *key) {
|
|
list *clients;
|
|
listIter li;
|
|
listNode *ln;
|
|
|
|
if (dictSize(db->watched_keys) == 0) return;
|
|
clients = dictFetchValue(db->watched_keys, key);
|
|
if (!clients) return;
|
|
|
|
/* Mark all the clients watching this key as CLIENT_DIRTY_CAS */
|
|
/* Check if we are already watching for this key */
|
|
listRewind(clients, &li);
|
|
while ((ln = listNext(&li))) {
|
|
watchedKey *wk = server_member2struct(watchedKey, node, ln);
|
|
client *c = wk->client;
|
|
|
|
if (wk->expired) {
|
|
/* The key was already expired when WATCH was called. */
|
|
if (db == wk->db && equalStringObjects(key, wk->key) && dbFind(db, key->ptr) == NULL) {
|
|
/* Already expired key is deleted, so logically no change. Clear
|
|
* the flag. Deleted keys are not flagged as expired. */
|
|
wk->expired = 0;
|
|
goto skip_client;
|
|
}
|
|
break;
|
|
}
|
|
|
|
c->flag.dirty_cas = 1;
|
|
/* As the client is marked as dirty, there is no point in getting here
|
|
* again in case that key (or others) are modified again (or keep the
|
|
* memory overhead till EXEC). */
|
|
unwatchAllKeys(c);
|
|
|
|
skip_client:
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* Set CLIENT_DIRTY_CAS to all clients of DB when DB is dirty.
|
|
* It may happen in the following situations:
|
|
* FLUSHDB, FLUSHALL, SWAPDB, end of successful diskless replication.
|
|
*
|
|
* replaced_with: for SWAPDB, the WATCH should be invalidated if
|
|
* the key exists in either of them, and skipped only if it
|
|
* doesn't exist in both. */
|
|
void touchAllWatchedKeysInDb(serverDb *emptied, serverDb *replaced_with) {
|
|
listIter li;
|
|
listNode *ln;
|
|
dictEntry *de;
|
|
|
|
if (dictSize(emptied->watched_keys) == 0) return;
|
|
|
|
dictIterator *di = dictGetSafeIterator(emptied->watched_keys);
|
|
while ((de = dictNext(di)) != NULL) {
|
|
robj *key = dictGetKey(de);
|
|
int exists_in_emptied = dbFind(emptied, key->ptr) != NULL;
|
|
if (exists_in_emptied || (replaced_with && dbFind(replaced_with, key->ptr) != NULL)) {
|
|
list *clients = dictGetVal(de);
|
|
if (!clients) continue;
|
|
listRewind(clients, &li);
|
|
while ((ln = listNext(&li))) {
|
|
watchedKey *wk = server_member2struct(watchedKey, node, ln);
|
|
if (wk->expired) {
|
|
if (!replaced_with || !dbFind(replaced_with, key->ptr)) {
|
|
/* Expired key now deleted. No logical change. Clear the
|
|
* flag. Deleted keys are not flagged as expired. */
|
|
wk->expired = 0;
|
|
continue;
|
|
} else if (keyIsExpired(replaced_with, key)) {
|
|
/* Expired key remains expired. */
|
|
continue;
|
|
}
|
|
} else if (!exists_in_emptied && keyIsExpired(replaced_with, key)) {
|
|
/* Non-existing key is replaced with an expired key. */
|
|
wk->expired = 1;
|
|
continue;
|
|
}
|
|
client *c = wk->client;
|
|
c->flag.dirty_cas = 1;
|
|
/* Note - we could potentially call unwatchAllKeys for this specific client in order to reduce
|
|
* the total number of iterations. BUT this could also free the current next entry pointer
|
|
* held by the iterator and can lead to use-after-free. */
|
|
}
|
|
}
|
|
}
|
|
dictReleaseIterator(di);
|
|
}
|
|
|
|
void watchCommand(client *c) {
|
|
int j;
|
|
|
|
if (c->flag.multi) {
|
|
addReplyError(c, "WATCH inside MULTI is not allowed");
|
|
return;
|
|
}
|
|
/* No point in watching if the client is already dirty. */
|
|
if (c->flag.dirty_cas) {
|
|
addReply(c, shared.ok);
|
|
return;
|
|
}
|
|
for (j = 1; j < c->argc; j++) watchForKey(c, c->argv[j]);
|
|
addReply(c, shared.ok);
|
|
}
|
|
|
|
void unwatchCommand(client *c) {
|
|
unwatchAllKeys(c);
|
|
c->flag.dirty_cas = 0;
|
|
addReply(c, shared.ok);
|
|
}
|
|
|
|
size_t multiStateMemOverhead(client *c) {
|
|
size_t mem = c->mstate.argv_len_sums;
|
|
/* Add watched keys overhead, Note: this doesn't take into account the watched keys themselves, because they aren't
|
|
* managed per-client. */
|
|
mem += listLength(c->watched_keys) * (sizeof(listNode) + sizeof(watchedKey));
|
|
/* Reserved memory for queued multi commands. */
|
|
mem += c->mstate.alloc_count * sizeof(multiCmd);
|
|
return mem;
|
|
}
|