Fix issue where SCAN misses elements while snapshot is in flight
Former-commit-id: ce005d748ebf0e116d674a96f74d698d17394010
This commit is contained in:
parent
1fef6c42b7
commit
3692771457
43
src/db.cpp
43
src/db.cpp
@ -1032,7 +1032,7 @@ void scanGenericCommand(client *c, robj_roptr o, unsigned long cursor) {
|
||||
}
|
||||
}
|
||||
|
||||
if (o == nullptr && count > 100)
|
||||
if (o == nullptr && count >= 100)
|
||||
{
|
||||
// Do an async version
|
||||
const redisDbPersistentDataSnapshot *snapshot = nullptr;
|
||||
@ -1046,7 +1046,7 @@ void scanGenericCommand(client *c, robj_roptr o, unsigned long cursor) {
|
||||
sds patCopy = pat ? sdsdup(pat) : nullptr;
|
||||
sds typeCopy = type ? sdsdup(type) : nullptr;
|
||||
g_pserver->asyncworkqueue->AddWorkFunction([c, snapshot, cursor, count, keys, el, db, patCopy, typeCopy, use_pattern]{
|
||||
auto cursorResult = snapshot->scan_threadsafe(cursor, count, keys, nullptr);
|
||||
auto cursorResult = snapshot->scan_threadsafe(cursor, count, keys);
|
||||
if (use_pattern) {
|
||||
listNode *ln = listFirst(keys);
|
||||
int patlen = sdslen(patCopy);
|
||||
@ -1109,23 +1109,30 @@ void scanGenericCommand(client *c, robj_roptr o, unsigned long cursor) {
|
||||
}
|
||||
|
||||
if (ht) {
|
||||
void *privdata[2];
|
||||
/* We set the max number of iterations to ten times the specified
|
||||
* COUNT, so if the hash table is in a pathological state (very
|
||||
* sparsely populated) we avoid to block too much time at the cost
|
||||
* of returning no or very few elements. */
|
||||
long maxiterations = count*10;
|
||||
if (ht == c->db->dictUnsafeKeyOnly())
|
||||
{
|
||||
cursor = c->db->scan_threadsafe(cursor, count, keys);
|
||||
}
|
||||
else
|
||||
{
|
||||
void *privdata[2];
|
||||
/* We set the max number of iterations to ten times the specified
|
||||
* COUNT, so if the hash table is in a pathological state (very
|
||||
* sparsely populated) we avoid to block too much time at the cost
|
||||
* of returning no or very few elements. */
|
||||
long maxiterations = count*10;
|
||||
|
||||
/* We pass two pointers to the callback: the list to which it will
|
||||
* add new elements, and the object containing the dictionary so that
|
||||
* it is possible to fetch more data in a type-dependent way. */
|
||||
privdata[0] = keys;
|
||||
privdata[1] = o.unsafe_robjcast();
|
||||
do {
|
||||
cursor = dictScan(ht, cursor, scanCallback, NULL, privdata);
|
||||
} while (cursor &&
|
||||
maxiterations-- &&
|
||||
listLength(keys) < (unsigned long)count);
|
||||
/* We pass two pointers to the callback: the list to which it will
|
||||
* add new elements, and the object containing the dictionary so that
|
||||
* it is possible to fetch more data in a type-dependent way. */
|
||||
privdata[0] = keys;
|
||||
privdata[1] = o.unsafe_robjcast();
|
||||
do {
|
||||
cursor = dictScan(ht, cursor, scanCallback, NULL, privdata);
|
||||
} while (cursor &&
|
||||
maxiterations-- &&
|
||||
listLength(keys) < (unsigned long)count);
|
||||
}
|
||||
} else if (o->type == OBJ_SET) {
|
||||
int pos = 0;
|
||||
int64_t ll;
|
||||
|
@ -1395,7 +1395,7 @@ public:
|
||||
bool FWillFreeChildDebug() const { return m_spdbSnapshotHOLDER != nullptr; }
|
||||
|
||||
bool iterate_threadsafe(std::function<bool(const char*, robj_roptr o)> fn, bool fKeyOnly = false, bool fCacheOnly = false) const;
|
||||
unsigned long scan_threadsafe(unsigned long iterator, long count, list *keys, struct scan_callback_data *pdata = nullptr) const;
|
||||
unsigned long scan_threadsafe(unsigned long iterator, long count, list *keys) const;
|
||||
|
||||
using redisDbPersistentData::createSnapshot;
|
||||
using redisDbPersistentData::endSnapshot;
|
||||
|
@ -20,6 +20,7 @@ const redisDbPersistentDataSnapshot *redisDbPersistentData::createSnapshot(uint6
|
||||
|
||||
if (m_spdbSnapshotHOLDER != nullptr)
|
||||
{
|
||||
serverLog(LL_DEBUG, "Attempting reuse of snapshot, client tstamp: %llu snapshot tstamp: %llu", mvccCheckpoint, m_spdbSnapshotHOLDER->m_mvccCheckpoint);
|
||||
// If possible reuse an existing snapshot (we want to minimize nesting)
|
||||
if (mvccCheckpoint <= m_spdbSnapshotHOLDER->m_mvccCheckpoint)
|
||||
{
|
||||
@ -59,8 +60,11 @@ const redisDbPersistentDataSnapshot *redisDbPersistentData::createSnapshot(uint6
|
||||
}
|
||||
|
||||
// See if we have too many levels and can bail out of this to reduce load
|
||||
if (fOptional && (levels >= 4))
|
||||
if (fOptional && (levels >= 6))
|
||||
{
|
||||
serverLog(LL_DEBUG, "Snapshot nesting too deep, abondoning");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto spdb = std::unique_ptr<redisDbPersistentDataSnapshot>(new (MALLOC_LOCAL) redisDbPersistentDataSnapshot());
|
||||
|
||||
@ -402,44 +406,38 @@ void snapshot_scan_callback(void *privdata, const dictEntry *de)
|
||||
sds sdskey = (sds)dictGetKey(de);
|
||||
listAddNodeHead(data->keys, createStringObject(sdskey, sdslen(sdskey)));
|
||||
}
|
||||
unsigned long redisDbPersistentDataSnapshot::scan_threadsafe(unsigned long iterator, long count, list *keys, scan_callback_data *pdata) const
|
||||
unsigned long redisDbPersistentDataSnapshot::scan_threadsafe(unsigned long iterator, long count, list *keys) const
|
||||
{
|
||||
unsigned long iteratorReturn = 0;
|
||||
|
||||
scan_callback_data dataT;
|
||||
if (pdata == nullptr)
|
||||
{
|
||||
dataT.dictTombstone = m_pdictTombstone;
|
||||
dataT.keys = keys;
|
||||
pdata = &dataT;
|
||||
}
|
||||
scan_callback_data data;
|
||||
data.dictTombstone = m_pdictTombstone;
|
||||
data.keys = keys;
|
||||
|
||||
const redisDbPersistentDataSnapshot *psnapshot;
|
||||
__atomic_load(&m_pdbSnapshot, &psnapshot, __ATOMIC_ACQUIRE);
|
||||
if (psnapshot != nullptr)
|
||||
{
|
||||
// Always process the snapshot first as we assume its bigger than we are
|
||||
iteratorReturn = psnapshot->scan_threadsafe(iterator, count, keys, pdata);
|
||||
}
|
||||
iteratorReturn = psnapshot->scan_threadsafe(iterator, count, keys);
|
||||
|
||||
if (psnapshot == nullptr)
|
||||
// Just catch up with our snapshot
|
||||
do
|
||||
{
|
||||
iterator = dictScan(m_pdict, iterator, snapshot_scan_callback, nullptr, &data);
|
||||
} while (iterator != 0 && (iterator < iteratorReturn || iteratorReturn == 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
long maxiterations = count * 10; // allow more iterations than keys for sparse tables
|
||||
iteratorReturn = iterator;
|
||||
do {
|
||||
iteratorReturn = dictScan(m_pdict, iteratorReturn, snapshot_scan_callback, NULL, pdata);
|
||||
iteratorReturn = dictScan(m_pdict, iteratorReturn, snapshot_scan_callback, NULL, &data);
|
||||
} while (iteratorReturn &&
|
||||
maxiterations-- &&
|
||||
listLength(keys) < (unsigned long)count);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Just catch up with our snapshot
|
||||
do
|
||||
{
|
||||
iterator = dictScan(m_pdict, iterator, snapshot_scan_callback, nullptr, pdata);
|
||||
} while (iterator != 0 && (iterator < iteratorReturn || iteratorReturn == 0));
|
||||
}
|
||||
|
||||
return iteratorReturn;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user