2019-11-24 17:59:02 -05:00
# include "server.h"
2019-11-28 19:00:51 -05:00
# include "aelocker.h"
2019-11-24 17:59:02 -05:00
2020-02-02 23:42:44 -05:00
static const size_t c_elementsSmallLimit = 500000 ;
2019-11-24 17:59:02 -05:00
const redisDbPersistentDataSnapshot * redisDbPersistentData : : createSnapshot ( uint64_t mvccCheckpoint , bool fOptional )
{
serverAssert ( GlobalLocksAcquired ( ) ) ;
serverAssert ( m_refCount = = 0 ) ; // do not call this on a snapshot
2020-08-09 23:36:20 +00:00
if ( freeMemoryIfNeededAndSafe ( false /*fQuickCycle*/ , true /*fPreSnapshot*/ ) ! = C_OK & & fOptional )
2020-07-11 21:23:48 +00:00
return nullptr ; // can't create snapshot due to OOM
2020-05-07 23:07:31 -04:00
2019-11-24 17:59:02 -05:00
int levels = 1 ;
redisDbPersistentDataSnapshot * psnapshot = m_spdbSnapshotHOLDER . get ( ) ;
while ( psnapshot ! = nullptr )
{
+ + levels ;
psnapshot = psnapshot - > m_spdbSnapshotHOLDER . get ( ) ;
}
if ( m_spdbSnapshotHOLDER ! = nullptr )
{
2019-11-28 19:00:51 -05:00
// If possible reuse an existing snapshot (we want to minimize nesting)
2020-01-12 01:22:44 -05:00
if ( mvccCheckpoint < = m_spdbSnapshotHOLDER - > m_mvccCheckpoint )
2019-11-24 17:59:02 -05:00
{
2020-01-12 01:22:44 -05:00
if ( ! m_spdbSnapshotHOLDER - > FStale ( ) )
2019-11-28 19:00:51 -05:00
{
m_spdbSnapshotHOLDER - > m_refCount + + ;
return m_spdbSnapshotHOLDER . get ( ) ;
}
2020-01-12 01:22:44 -05:00
serverLog ( LL_VERBOSE , " Existing snapshot too old, creating a new one " ) ;
2019-11-24 17:59:02 -05:00
}
}
2020-01-12 01:22:44 -05:00
2020-02-02 23:42:44 -05:00
if ( m_pdbSnapshot ! = nullptr & & m_pdbSnapshot = = m_pdbSnapshotASYNC & & m_spdbSnapshotHOLDER - > m_refCount = = 1 & & dictSize ( m_pdictTombstone ) < c_elementsSmallLimit )
{
serverLog ( LL_WARNING , " Reusing old snapshot " ) ;
// is there an existing snapshot only owned by us?
dictIterator * di = dictGetIterator ( m_pdictTombstone ) ;
dictEntry * de ;
while ( ( de = dictNext ( di ) ) ! = nullptr )
{
if ( dictDelete ( m_pdbSnapshot - > m_pdict , dictGetKey ( de ) ) ! = DICT_OK )
dictAdd ( m_spdbSnapshotHOLDER - > m_pdictTombstone , sdsdupshared ( ( sds ) dictGetKey ( de ) ) , nullptr ) ;
}
dictReleaseIterator ( di ) ;
dictForceRehash ( m_spdbSnapshotHOLDER - > m_pdictTombstone ) ;
dictMerge ( m_pdbSnapshot - > m_pdict , m_pdict ) ;
dictEmpty ( m_pdictTombstone , nullptr ) ;
2020-06-09 19:58:42 -04:00
{
std : : unique_lock < fastlock > ul ( g_expireLock ) ;
( * m_spdbSnapshotHOLDER - > m_setexpire ) = * m_setexpire ;
}
2020-02-02 23:42:44 -05:00
m_pdbSnapshotASYNC = nullptr ;
serverAssert ( m_pdbSnapshot - > m_pdict - > iterators = = 1 ) ;
serverAssert ( m_spdbSnapshotHOLDER - > m_refCount = = 1 ) ;
return m_pdbSnapshot ;
}
2020-01-12 01:22:44 -05:00
// See if we have too many levels and can bail out of this to reduce load
2020-07-10 01:43:51 +00:00
if ( fOptional & & ( levels > = 6 ) )
{
serverLog ( LL_DEBUG , " Snapshot nesting too deep, abondoning " ) ;
2020-01-12 01:22:44 -05:00
return nullptr ;
2020-07-10 01:43:51 +00:00
}
2020-01-12 01:22:44 -05:00
2019-11-24 17:59:02 -05:00
auto spdb = std : : unique_ptr < redisDbPersistentDataSnapshot > ( new ( MALLOC_LOCAL ) redisDbPersistentDataSnapshot ( ) ) ;
spdb - > m_fAllChanged = false ;
spdb - > m_fTrackingChanges = 0 ;
spdb - > m_pdict = m_pdict ;
spdb - > m_pdictTombstone = m_pdictTombstone ;
// Add a fake iterator so the dicts don't rehash (they need to be read only)
spdb - > m_pdict - > iterators + + ;
dictForceRehash ( spdb - > m_pdictTombstone ) ; // prevent rehashing by finishing the rehash now
spdb - > m_spdbSnapshotHOLDER = std : : move ( m_spdbSnapshotHOLDER ) ;
2019-12-06 17:43:28 -05:00
if ( m_spstorage ! = nullptr )
2020-07-11 21:23:48 +00:00
spdb - > m_spstorage = std : : shared_ptr < StorageCache > ( const_cast < StorageCache * > ( m_spstorage - > clone ( ) ) ) ;
2019-11-24 17:59:02 -05:00
spdb - > m_pdbSnapshot = m_pdbSnapshot ;
spdb - > m_refCount = 1 ;
2020-01-12 01:22:44 -05:00
spdb - > m_mvccCheckpoint = getMvccTstamp ( ) ;
2019-11-24 17:59:02 -05:00
if ( m_setexpire ! = nullptr )
2019-12-17 17:39:04 -05:00
{
2020-06-09 19:58:42 -04:00
std : : unique_lock < fastlock > ul ( g_expireLock ) ;
2019-12-16 22:07:53 -05:00
spdb - > m_setexpire = new ( MALLOC_LOCAL ) expireset ( * m_setexpire ) ;
2019-12-17 17:39:04 -05:00
spdb - > m_setexpire - > pause_rehash ( ) ; // needs to be const
}
2019-11-24 17:59:02 -05:00
m_pdict = dictCreate ( & dbDictType , this ) ;
2020-02-01 22:28:24 -05:00
m_pdictTombstone = dictCreate ( & dbDictTypeTombstone , this ) ;
2019-12-06 17:43:28 -05:00
2019-11-24 17:59:02 -05:00
serverAssert ( spdb - > m_pdict - > iterators = = 1 ) ;
m_spdbSnapshotHOLDER = std : : move ( spdb ) ;
m_pdbSnapshot = m_spdbSnapshotHOLDER . get ( ) ;
// Finally we need to take a ref on all our children snapshots. This ensures they aren't free'd before we are
redisDbPersistentData * pdbSnapshotNext = m_pdbSnapshot - > m_spdbSnapshotHOLDER . get ( ) ;
while ( pdbSnapshotNext ! = nullptr )
{
pdbSnapshotNext - > m_refCount + + ;
pdbSnapshotNext = pdbSnapshotNext - > m_spdbSnapshotHOLDER . get ( ) ;
}
2020-02-01 22:28:24 -05:00
if ( m_pdbSnapshotASYNC ! = nullptr )
{
// free the async snapshot, it's done its job
endSnapshot ( m_pdbSnapshotASYNC ) ; // should be just a dec ref (FAST)
m_pdbSnapshotASYNC = nullptr ;
}
2019-11-24 17:59:02 -05:00
return m_pdbSnapshot ;
}
void redisDbPersistentData : : recursiveFreeSnapshots ( redisDbPersistentDataSnapshot * psnapshot )
{
std : : vector < redisDbPersistentDataSnapshot * > stackSnapshots ;
// gather a stack of snapshots, we do this so we can free them in reverse
// Note: we don't touch the incoming psnapshot since the parent is free'ing that one
while ( ( psnapshot = psnapshot - > m_spdbSnapshotHOLDER . get ( ) ) ! = nullptr )
{
stackSnapshots . push_back ( psnapshot ) ;
}
for ( auto itr = stackSnapshots . rbegin ( ) ; itr ! = stackSnapshots . rend ( ) ; + + itr )
{
endSnapshot ( * itr ) ;
}
}
2019-11-28 19:00:51 -05:00
/* static */ void redisDbPersistentDataSnapshot : : gcDisposeSnapshot ( redisDbPersistentDataSnapshot * psnapshot )
{
psnapshot - > m_refCount - - ;
if ( psnapshot - > m_refCount < = 0 )
{
serverAssert ( psnapshot - > m_refCount = = 0 ) ;
// Remove our ref from any children and dispose them too
redisDbPersistentDataSnapshot * psnapshotChild = psnapshot ;
std : : vector < redisDbPersistentDataSnapshot * > vecClean ;
while ( ( psnapshotChild = psnapshotChild - > m_spdbSnapshotHOLDER . get ( ) ) ! = nullptr )
vecClean . push_back ( psnapshotChild ) ;
for ( auto psnapshotChild : vecClean )
gcDisposeSnapshot ( psnapshotChild ) ;
//psnapshot->m_pdict->iterators--;
psnapshot - > m_spdbSnapshotHOLDER . release ( ) ;
//psnapshot->m_pdbSnapshot = nullptr;
g_pserver - > garbageCollector . enqueue ( serverTL - > gcEpoch , std : : unique_ptr < redisDbPersistentDataSnapshot > ( psnapshot ) ) ;
2020-01-12 01:22:44 -05:00
serverLog ( LL_VERBOSE , " Garbage collected snapshot " ) ;
2019-11-28 19:00:51 -05:00
}
}
2020-01-29 12:55:23 -05:00
void redisDbPersistentData : : restoreSnapshot ( const redisDbPersistentDataSnapshot * psnapshot )
{
serverAssert ( psnapshot - > m_refCount = = 1 ) ;
serverAssert ( m_spdbSnapshotHOLDER . get ( ) = = psnapshot ) ;
m_pdbSnapshot = psnapshot ; // if it was deleted restore it
size_t expectedSize = psnapshot - > size ( ) ;
dictEmpty ( m_pdict , nullptr ) ;
dictEmpty ( m_pdictTombstone , nullptr ) ;
2020-06-09 19:58:42 -04:00
{
std : : unique_lock < fastlock > ul ( g_expireLock ) ;
2020-01-29 12:55:23 -05:00
delete m_setexpire ;
m_setexpire = new ( MALLOC_LOCAL ) expireset ( * psnapshot - > m_setexpire ) ;
2020-06-09 19:58:42 -04:00
}
2020-01-29 12:55:23 -05:00
endSnapshot ( psnapshot ) ;
serverAssert ( size ( ) = = expectedSize ) ;
}
2020-02-01 22:28:24 -05:00
// This function is all about minimizing the amount of work done under global lock
// when there has been lots of changes since snapshot creation a naive endSnapshot()
// will block for a very long time and will cause latency spikes.
//
// Note that this function uses a lot more CPU time than a simple endSnapshot(), we
// have some internal heuristics to do a synchronous endSnapshot if it makes sense
void redisDbPersistentData : : endSnapshotAsync ( const redisDbPersistentDataSnapshot * psnapshot )
2019-11-24 17:59:02 -05:00
{
2020-08-04 04:37:16 +00:00
mstime_t latency ;
aeAcquireLock ( ) ; latencyStartMonitor ( latency ) ;
2020-02-01 22:28:24 -05:00
if ( m_pdbSnapshotASYNC & & m_pdbSnapshotASYNC - > m_mvccCheckpoint < = psnapshot - > m_mvccCheckpoint )
{
// Free a stale async snapshot so consolidate_children can clean it up later
endSnapshot ( m_pdbSnapshotASYNC ) ; // FAST: just a ref decrement
m_pdbSnapshotASYNC = nullptr ;
}
size_t elements = dictSize ( m_pdictTombstone ) ;
// if neither dict is rehashing then the merge is O(1) so don't count the size
if ( dictIsRehashing ( psnapshot - > m_pdict ) | | dictIsRehashing ( m_pdict ) )
elements + = dictSize ( m_pdict ) ;
2020-02-02 23:42:44 -05:00
if ( elements < c_elementsSmallLimit | | psnapshot ! = m_spdbSnapshotHOLDER . get ( ) ) // heuristic
2020-02-01 22:28:24 -05:00
{
// For small snapshots it makes more sense just to merge it directly
endSnapshot ( psnapshot ) ;
2020-08-04 04:37:16 +00:00
latencyEndMonitor ( latency ) ;
latencyAddSampleIfNeeded ( " end-snapshot-async-synchronous-path " , latency ) ;
2020-02-01 22:28:24 -05:00
aeReleaseLock ( ) ;
return ;
}
2019-11-24 17:59:02 -05:00
2020-02-01 22:28:24 -05:00
// OK this is a big snapshot so lets do the merge work outside the lock
auto psnapshotT = createSnapshot ( LLONG_MAX , false ) ;
endSnapshot ( psnapshot ) ; // this will just dec the ref count since our new snapshot has a ref
2020-02-02 23:42:44 -05:00
psnapshot = nullptr ;
2020-08-04 04:37:16 +00:00
aeReleaseLock ( ) ; latencyEndMonitor ( latency ) ;
latencyAddSampleIfNeeded ( " end-snapshot-async-phase-1 " , latency ) ;
2020-02-01 22:28:24 -05:00
// do the expensive work of merging snapshots outside the ref
2020-02-02 23:42:44 -05:00
const_cast < redisDbPersistentDataSnapshot * > ( psnapshotT ) - > freeTombstoneObjects ( 1 ) ; // depth is one because we just creted it
2020-06-04 01:07:14 -04:00
const_cast < redisDbPersistentDataSnapshot * > ( psnapshotT ) - > consolidate_children ( this , true ) ;
2020-02-01 22:28:24 -05:00
// Final Cleanup
2020-08-04 04:37:16 +00:00
aeAcquireLock ( ) ; latencyStartMonitor ( latency ) ;
2020-02-01 22:28:24 -05:00
if ( m_pdbSnapshotASYNC = = nullptr )
m_pdbSnapshotASYNC = psnapshotT ;
else
2020-02-02 23:42:44 -05:00
endSnapshot ( psnapshotT ) ; // finally clean up our temp snapshot
2020-08-04 04:37:16 +00:00
aeReleaseLock ( ) ; latencyEndMonitor ( latency ) ;
latencyAddSampleIfNeeded ( " end-snapshot-async-phase-2 " , latency ) ;
2020-02-01 22:28:24 -05:00
}
2020-02-02 23:42:44 -05:00
void redisDbPersistentDataSnapshot : : freeTombstoneObjects ( int depth )
{
if ( m_pdbSnapshot = = nullptr )
return ;
const_cast < redisDbPersistentDataSnapshot * > ( m_pdbSnapshot ) - > freeTombstoneObjects ( depth + 1 ) ;
if ( m_pdbSnapshot - > m_refCount ! = depth & & ( m_pdbSnapshot - > m_refCount ! = ( m_refCount + 1 ) ) )
return ;
dictIterator * di = dictGetIterator ( m_pdictTombstone ) ;
dictEntry * de ;
size_t freed = 0 ;
while ( ( de = dictNext ( di ) ) ! = nullptr )
{
dictEntry * deObj = dictFind ( m_pdbSnapshot - > m_pdict , dictGetKey ( de ) ) ;
if ( deObj ! = nullptr & & dictGetVal ( deObj ) ! = nullptr )
{
decrRefCount ( ( robj * ) dictGetVal ( deObj ) ) ;
deObj - > v . val = nullptr ;
+ + freed ;
}
}
dictReleaseIterator ( di ) ;
}
2020-02-01 22:28:24 -05:00
void redisDbPersistentData : : endSnapshot ( const redisDbPersistentDataSnapshot * psnapshot )
{
serverAssert ( GlobalLocksAcquired ( ) ) ;
2019-11-24 17:59:02 -05:00
if ( m_spdbSnapshotHOLDER . get ( ) ! = psnapshot )
{
2019-11-28 19:00:51 -05:00
if ( m_spdbSnapshotHOLDER = = nullptr )
{
// This is an orphaned snapshot
redisDbPersistentDataSnapshot : : gcDisposeSnapshot ( const_cast < redisDbPersistentDataSnapshot * > ( psnapshot ) ) ;
return ;
}
2019-11-24 17:59:02 -05:00
m_spdbSnapshotHOLDER - > endSnapshot ( psnapshot ) ;
return ;
}
2020-08-04 04:37:16 +00:00
mstime_t latency_endsnapshot ;
latencyStartMonitor ( latency_endsnapshot ) ;
2019-11-24 17:59:02 -05:00
// Alright we're ready to be free'd, but first dump all the refs on our child snapshots
if ( m_spdbSnapshotHOLDER - > m_refCount = = 1 )
recursiveFreeSnapshots ( m_spdbSnapshotHOLDER . get ( ) ) ;
m_spdbSnapshotHOLDER - > m_refCount - - ;
if ( m_spdbSnapshotHOLDER - > m_refCount > 0 )
return ;
2020-02-01 21:08:26 -05:00
size_t sizeStart = size ( ) ;
2019-11-24 17:59:02 -05:00
serverAssert ( m_spdbSnapshotHOLDER - > m_refCount = = 0 ) ;
serverAssert ( ( m_refCount = = 0 & & m_pdict - > iterators = = 0 ) | | ( m_refCount ! = 0 & & m_pdict - > iterators = = 1 ) ) ;
serverAssert ( m_spdbSnapshotHOLDER - > m_pdict - > iterators = = 1 ) ; // All iterators should have been free'd except the fake one from createSnapshot
if ( m_refCount = = 0 )
{
m_spdbSnapshotHOLDER - > m_pdict - > iterators - - ;
}
if ( m_pdbSnapshot = = nullptr )
{
// the database was cleared so we don't need to recover the snapshot
dictEmpty ( m_pdictTombstone , nullptr ) ;
m_spdbSnapshotHOLDER = std : : move ( m_spdbSnapshotHOLDER - > m_spdbSnapshotHOLDER ) ;
return ;
}
// Stage 1 Loop through all the tracked deletes and remove them from the snapshot DB
dictIterator * di = dictGetIterator ( m_pdictTombstone ) ;
dictEntry * de ;
while ( ( de = dictNext ( di ) ) ! = NULL )
{
2020-06-04 01:07:14 -04:00
dictEntry * * dePrev ;
dictht * ht ;
dictEntry * deSnapshot = dictFindWithPrev ( m_spdbSnapshotHOLDER - > m_pdict , dictGetKey ( de ) , & dePrev , & ht ) ;
2020-06-03 23:30:35 -04:00
if ( deSnapshot = = nullptr & & m_spdbSnapshotHOLDER - > m_pdbSnapshot )
2019-11-25 17:50:40 -05:00
{
2020-06-03 23:30:35 -04:00
// The tombstone is for a grand child, propogate it (or possibly in the storage provider - but an extra tombstone won't hurt)
2019-12-20 17:45:07 -05:00
serverAssert ( m_spdbSnapshotHOLDER - > m_pdbSnapshot - > find_cached_threadsafe ( ( const char * ) dictGetKey ( de ) ) ! = nullptr ) ;
2019-12-16 21:14:16 -05:00
dictAdd ( m_spdbSnapshotHOLDER - > m_pdictTombstone , sdsdupshared ( ( sds ) dictGetKey ( de ) ) , nullptr ) ;
2019-11-25 17:50:40 -05:00
continue ;
}
2020-06-03 23:30:35 -04:00
else if ( deSnapshot = = nullptr )
{
serverAssert ( m_spdbSnapshotHOLDER - > m_spstorage ! = nullptr ) ; // the only case where we can have a tombstone without a snapshot child is if a storage engine is set
continue ;
}
2019-11-24 17:59:02 -05:00
2020-06-04 01:07:14 -04:00
// Delete the object from the source dict, we don't use dictDelete to avoid a second search
dictFreeKey ( m_spdbSnapshotHOLDER - > m_pdict , deSnapshot ) ;
dictFreeVal ( m_spdbSnapshotHOLDER - > m_pdict , deSnapshot ) ;
serverAssert ( * dePrev = = deSnapshot ) ;
* dePrev = deSnapshot - > next ;
zfree ( deSnapshot ) ;
ht - > used - - ;
2019-11-24 17:59:02 -05:00
}
dictReleaseIterator ( di ) ;
dictEmpty ( m_pdictTombstone , nullptr ) ;
// Stage 2 Move all new keys to the snapshot DB
2020-02-01 21:08:26 -05:00
dictMerge ( m_spdbSnapshotHOLDER - > m_pdict , m_pdict ) ;
2019-11-24 17:59:02 -05:00
// Stage 3 swap the databases with the snapshot
std : : swap ( m_pdict , m_spdbSnapshotHOLDER - > m_pdict ) ;
2019-11-28 19:00:51 -05:00
if ( m_spdbSnapshotHOLDER - > m_pdbSnapshot ! = nullptr )
std : : swap ( m_pdictTombstone , m_spdbSnapshotHOLDER - > m_pdictTombstone ) ;
2019-11-24 17:59:02 -05:00
// Finally free the snapshot
if ( m_pdbSnapshot ! = nullptr & & m_spdbSnapshotHOLDER - > m_pdbSnapshot ! = nullptr )
{
m_pdbSnapshot = m_spdbSnapshotHOLDER - > m_pdbSnapshot ;
m_spdbSnapshotHOLDER - > m_pdbSnapshot = nullptr ;
}
else
{
m_pdbSnapshot = nullptr ;
}
// Fixup the about to free'd snapshots iterator count so the dtor doesn't complain
if ( m_refCount )
{
m_spdbSnapshotHOLDER - > m_pdict - > iterators - - ;
}
2020-02-01 22:28:24 -05:00
auto spsnapshotFree = std : : move ( m_spdbSnapshotHOLDER ) ;
m_spdbSnapshotHOLDER = std : : move ( spsnapshotFree - > m_spdbSnapshotHOLDER ) ;
if ( serverTL ! = nullptr )
g_pserver - > garbageCollector . enqueue ( serverTL - > gcEpoch , std : : move ( spsnapshotFree ) ) ;
// Sanity Checks
2019-11-24 17:59:02 -05:00
serverAssert ( m_spdbSnapshotHOLDER ! = nullptr | | m_pdbSnapshot = = nullptr ) ;
serverAssert ( m_pdbSnapshot = = m_spdbSnapshotHOLDER . get ( ) | | m_pdbSnapshot = = nullptr ) ;
serverAssert ( ( m_refCount = = 0 & & m_pdict - > iterators = = 0 ) | | ( m_refCount ! = 0 & & m_pdict - > iterators = = 1 ) ) ;
2019-11-25 17:50:40 -05:00
serverAssert ( m_spdbSnapshotHOLDER ! = nullptr | | dictSize ( m_pdictTombstone ) = = 0 ) ;
2020-02-01 21:08:26 -05:00
serverAssert ( sizeStart = = size ( ) ) ;
2020-05-07 23:07:31 -04:00
2020-06-04 01:07:14 -04:00
latencyEndMonitor ( latency_endsnapshot ) ;
latencyAddSampleIfNeeded ( " end-mvcc-snapshot " , latency_endsnapshot ) ;
2020-08-09 23:36:20 +00:00
freeMemoryIfNeededAndSafe ( false /*fQuickCycle*/ , false ) ;
2019-11-24 17:59:02 -05:00
}
2020-03-23 23:12:10 -04:00
dict_iter redisDbPersistentDataSnapshot : : random_cache_threadsafe ( bool fPrimaryOnly ) const
2019-11-24 17:59:02 -05:00
{
if ( size ( ) = = 0 )
return dict_iter ( nullptr ) ;
2020-03-23 23:12:10 -04:00
if ( ! fPrimaryOnly & & m_pdbSnapshot ! = nullptr & & m_pdbSnapshot - > size ( ) > 0 )
2019-11-24 17:59:02 -05:00
{
dict_iter iter ( nullptr ) ;
double pctInSnapshot = ( double ) m_pdbSnapshot - > size ( ) / ( size ( ) + m_pdbSnapshot - > size ( ) ) ;
double randval = ( double ) rand ( ) / RAND_MAX ;
if ( randval < = pctInSnapshot )
{
2019-12-20 17:45:07 -05:00
return m_pdbSnapshot - > random_cache_threadsafe ( ) ;
2019-11-24 17:59:02 -05:00
}
}
2019-12-20 17:45:07 -05:00
if ( dictSize ( m_pdict ) = = 0 )
return dict_iter ( nullptr ) ;
2019-11-24 17:59:02 -05:00
dictEntry * de = dictGetRandomKey ( m_pdict ) ;
return dict_iter ( de ) ;
}
2020-06-05 00:39:58 -04:00
dict_iter redisDbPersistentData : : find_cached_threadsafe ( const char * key ) const
2019-11-25 17:50:40 -05:00
{
dictEntry * de = dictFind ( m_pdict , key ) ;
2020-02-02 23:42:44 -05:00
if ( de = = nullptr & & m_pdbSnapshot ! = nullptr & & dictFind ( m_pdictTombstone , key ) = = nullptr )
2019-11-25 17:50:40 -05:00
{
2019-12-20 17:45:07 -05:00
auto itr = m_pdbSnapshot - > find_cached_threadsafe ( key ) ;
2020-02-02 23:42:44 -05:00
if ( itr ! = nullptr )
2019-11-25 17:50:40 -05:00
return itr ;
}
return dict_iter ( de ) ;
}
2020-07-09 12:57:35 -04:00
struct scan_callback_data
{
dict * dictTombstone ;
2020-07-10 03:43:56 +00:00
sds type ;
2020-07-09 12:57:35 -04:00
list * keys ;
} ;
void snapshot_scan_callback ( void * privdata , const dictEntry * de )
{
scan_callback_data * data = ( scan_callback_data * ) privdata ;
if ( data - > dictTombstone ! = nullptr & & dictFind ( data - > dictTombstone , dictGetKey ( de ) ) ! = nullptr )
return ;
sds sdskey = ( sds ) dictGetKey ( de ) ;
2020-07-10 03:43:56 +00:00
if ( data - > type ! = nullptr )
{
if ( strcasecmp ( data - > type , getObjectTypeName ( ( robj * ) dictGetVal ( de ) ) ) ! = 0 )
return ;
}
2020-07-09 12:57:35 -04:00
listAddNodeHead ( data - > keys , createStringObject ( sdskey , sdslen ( sdskey ) ) ) ;
}
2020-07-10 03:43:56 +00:00
unsigned long redisDbPersistentDataSnapshot : : scan_threadsafe ( unsigned long iterator , long count , sds type , list * keys ) const
2020-07-09 12:57:35 -04:00
{
unsigned long iteratorReturn = 0 ;
2020-07-10 01:43:51 +00:00
scan_callback_data data ;
data . dictTombstone = m_pdictTombstone ;
data . keys = keys ;
2020-07-10 03:43:56 +00:00
data . type = type ;
2020-07-09 12:57:35 -04:00
const redisDbPersistentDataSnapshot * psnapshot ;
__atomic_load ( & m_pdbSnapshot , & psnapshot , __ATOMIC_ACQUIRE ) ;
if ( psnapshot ! = nullptr )
{
// Always process the snapshot first as we assume its bigger than we are
2020-07-10 03:43:56 +00:00
iteratorReturn = psnapshot - > scan_threadsafe ( iterator , count , type , keys ) ;
2020-07-09 12:57:35 -04:00
2020-07-10 01:43:51 +00:00
// Just catch up with our snapshot
do
{
iterator = dictScan ( m_pdict , iterator , snapshot_scan_callback , nullptr , & data ) ;
} while ( iterator ! = 0 & & ( iterator < iteratorReturn | | iteratorReturn = = 0 ) ) ;
}
else
2020-07-09 12:57:35 -04:00
{
long maxiterations = count * 10 ; // allow more iterations than keys for sparse tables
iteratorReturn = iterator ;
do {
2020-07-10 01:43:51 +00:00
iteratorReturn = dictScan ( m_pdict , iteratorReturn , snapshot_scan_callback , NULL , & data ) ;
2020-07-09 12:57:35 -04:00
} while ( iteratorReturn & &
maxiterations - - & &
listLength ( keys ) < ( unsigned long ) count ) ;
}
2020-07-10 01:43:51 +00:00
2020-07-09 12:57:35 -04:00
return iteratorReturn ;
}
2020-06-04 00:26:51 -04:00
bool redisDbPersistentDataSnapshot : : iterate_threadsafe ( std : : function < bool ( const char * , robj_roptr o ) > fn , bool fKeyOnly , bool fCacheOnly ) const
2019-11-24 17:59:02 -05:00
{
2019-12-23 17:17:41 -05:00
// Take the size so we can ensure we visited every element exactly once
// use volatile to ensure it's not checked too late. This makes it more
// likely we'll detect races (but it won't gurantee it)
volatile size_t celem = size ( ) ;
dictEntry * de = nullptr ;
bool fResult = true ;
dictIterator * di = dictGetSafeIterator ( m_pdict ) ;
while ( fResult & & ( ( de = dictNext ( di ) ) ! = nullptr ) )
{
- - celem ;
2020-01-12 01:22:44 -05:00
robj * o = ( robj * ) dictGetVal ( de ) ;
2019-12-23 17:17:41 -05:00
if ( ! fn ( ( const char * ) dictGetKey ( de ) , o ) )
fResult = false ;
}
dictReleaseIterator ( di ) ;
2020-06-04 00:26:51 -04:00
if ( m_spstorage ! = nullptr & & ! fCacheOnly )
2019-12-20 17:45:07 -05:00
{
2019-12-23 17:17:41 -05:00
bool fSawAll = fResult & & m_spstorage - > enumerate ( [ & ] ( const char * key , size_t cchKey , const void * data , size_t cbData ) {
2019-12-20 17:45:07 -05:00
sds sdsKey = sdsnewlen ( key , cchKey ) ;
dictEntry * de = dictFind ( m_pdict , sdsKey ) ;
2019-12-23 17:17:41 -05:00
bool fContinue = true ;
if ( de = = nullptr )
2019-12-20 17:45:07 -05:00
{
2020-01-03 15:53:36 -05:00
robj * o = nullptr ;
if ( ! fKeyOnly )
{
size_t offset = 0 ;
deserializeExpire ( sdsKey , ( const char * ) data , cbData , & offset ) ;
o = deserializeStoredObject ( this , sdsKey , reinterpret_cast < const char * > ( data ) + offset , cbData - offset ) ;
}
2019-12-20 17:45:07 -05:00
fContinue = fn ( sdsKey , o ) ;
if ( o ! = nullptr )
decrRefCount ( o ) ;
}
sdsfree ( sdsKey ) ;
return fContinue ;
} ) ;
return fSawAll ;
}
2019-12-17 17:39:04 -05:00
const redisDbPersistentDataSnapshot * psnapshot ;
2019-11-28 19:00:51 -05:00
__atomic_load ( & m_pdbSnapshot , & psnapshot , __ATOMIC_ACQUIRE ) ;
if ( fResult & & psnapshot ! = nullptr )
2019-11-24 17:59:02 -05:00
{
2020-02-02 23:42:44 -05:00
fResult = psnapshot - > iterate_threadsafe ( [ this , & fn , & celem ] ( const char * key , robj_roptr o ) {
2019-11-24 17:59:02 -05:00
dictEntry * deTombstone = dictFind ( m_pdictTombstone , key ) ;
if ( deTombstone ! = nullptr )
return true ;
// Alright it's a key in the use keyspace, lets ensure it and then pass it off
2019-11-25 17:50:40 -05:00
- - celem ;
2019-11-24 17:59:02 -05:00
return fn ( key , o ) ;
2020-06-04 00:26:51 -04:00
} , fKeyOnly , fCacheOnly ) ;
2019-11-24 17:59:02 -05:00
}
2020-06-04 00:26:51 -04:00
// we should have hit all keys or had a good reason not to
serverAssert ( ! fResult | | celem = = 0 | | ( m_spstorage & & fCacheOnly ) ) ;
2019-11-24 17:59:02 -05:00
return fResult ;
}
2019-11-28 19:00:51 -05:00
int redisDbPersistentDataSnapshot : : snapshot_depth ( ) const
{
if ( m_pdbSnapshot )
return m_pdbSnapshot - > snapshot_depth ( ) + 1 ;
return 0 ;
}
void redisDbPersistentData : : consolidate_snapshot ( )
{
aeAcquireLock ( ) ;
auto psnapshot = ( m_pdbSnapshot ! = nullptr ) ? m_spdbSnapshotHOLDER . get ( ) : nullptr ;
if ( psnapshot = = nullptr )
{
aeReleaseLock ( ) ;
return ;
}
psnapshot - > m_refCount + + ; // ensure it's not free'd
aeReleaseLock ( ) ;
2020-02-01 22:28:24 -05:00
psnapshot - > consolidate_children ( this , false /* fForce */ ) ;
2019-11-28 19:00:51 -05:00
aeAcquireLock ( ) ;
endSnapshot ( psnapshot ) ;
aeReleaseLock ( ) ;
}
// only call this on the "real" database to consolidate the first child
2020-02-01 22:28:24 -05:00
void redisDbPersistentDataSnapshot : : consolidate_children ( redisDbPersistentData * pdbPrimary , bool fForce )
2019-11-28 19:00:51 -05:00
{
static fastlock s_lock { " consolidate_children " } ; // this lock ensures only one thread is consolidating at a time
std : : unique_lock < fastlock > lock ( s_lock , std : : defer_lock ) ;
if ( ! lock . try_lock ( ) )
return ; // this is a best effort function
2020-02-02 23:42:44 -05:00
if ( ! fForce & & snapshot_depth ( ) < 2 )
2019-11-28 19:00:51 -05:00
return ;
auto spdb = std : : unique_ptr < redisDbPersistentDataSnapshot > ( new ( MALLOC_LOCAL ) redisDbPersistentDataSnapshot ( ) ) ;
spdb - > initialize ( ) ;
dictExpand ( spdb - > m_pdict , m_pdbSnapshot - > size ( ) ) ;
2020-06-09 20:59:09 -04:00
volatile size_t skipped = 0 ;
2020-02-01 22:28:24 -05:00
m_pdbSnapshot - > iterate_threadsafe ( [ & ] ( const char * key , robj_roptr o ) {
2020-01-12 01:22:44 -05:00
if ( o ! = nullptr ) {
2019-12-20 17:45:07 -05:00
dictAdd ( spdb - > m_pdict , sdsdupshared ( key ) , o . unsafe_robjcast ( ) ) ;
2020-01-12 01:22:44 -05:00
incrRefCount ( o ) ;
2020-06-09 20:59:09 -04:00
} else {
+ + skipped ;
2020-01-12 01:22:44 -05:00
}
2019-11-28 19:00:51 -05:00
return true ;
2020-06-04 00:26:51 -04:00
} , true /*fKeyOnly*/ , true /*fCacheOnly*/ ) ;
2019-12-06 17:43:28 -05:00
spdb - > m_spstorage = m_pdbSnapshot - > m_spstorage ;
2020-06-09 19:58:42 -04:00
{
std : : unique_lock < fastlock > ul ( g_expireLock ) ;
delete spdb - > m_setexpire ;
spdb - > m_setexpire = new ( MALLOC_LOCAL ) expireset ( * m_pdbSnapshot - > m_setexpire ) ;
}
2019-11-28 19:00:51 -05:00
spdb - > m_pdict - > iterators + + ;
2020-06-09 20:59:09 -04:00
if ( m_spstorage ) {
serverAssert ( spdb - > size ( ) = = m_pdbSnapshot - > size ( ) ) ;
} else {
serverAssert ( ( spdb - > size ( ) + skipped ) = = m_pdbSnapshot - > size ( ) ) ;
}
2019-11-28 19:00:51 -05:00
// Now wire us in (Acquire the LOCK)
AeLocker locker ;
locker . arm ( nullptr ) ;
int depth = 0 ;
redisDbPersistentDataSnapshot * psnapshotT = pdbPrimary - > m_spdbSnapshotHOLDER . get ( ) ;
while ( psnapshotT ! = nullptr )
{
+ + depth ;
if ( psnapshotT = = this )
break ;
psnapshotT = psnapshotT - > m_spdbSnapshotHOLDER . get ( ) ;
}
if ( psnapshotT ! = this )
{
locker . disarm ( ) ; // don't run spdb's dtor in the lock
return ; // we were unlinked and this was a waste of time
}
2020-01-12 01:22:44 -05:00
serverLog ( LL_VERBOSE , " cleaned %d snapshots " , snapshot_depth ( ) - 1 ) ;
2019-11-28 19:00:51 -05:00
spdb - > m_refCount = depth ;
spdb - > m_fConsolidated = true ;
// Drop our refs from this snapshot and its children
psnapshotT = this ;
std : : vector < redisDbPersistentDataSnapshot * > vecT ;
while ( ( psnapshotT = psnapshotT - > m_spdbSnapshotHOLDER . get ( ) ) ! = nullptr )
{
vecT . push_back ( psnapshotT ) ;
}
for ( auto itr = vecT . rbegin ( ) ; itr ! = vecT . rend ( ) ; + + itr )
{
psnapshotT = * itr ;
psnapshotT - > m_refCount - = ( depth - 1 ) ; // -1 because dispose will sub another
gcDisposeSnapshot ( psnapshotT ) ;
}
std : : atomic_thread_fence ( std : : memory_order_seq_cst ) ;
m_spdbSnapshotHOLDER . release ( ) ; // GC has responsibility for it now
m_spdbSnapshotHOLDER = std : : move ( spdb ) ;
2019-12-17 17:39:04 -05:00
const redisDbPersistentDataSnapshot * ptrT = m_spdbSnapshotHOLDER . get ( ) ;
2019-11-28 19:00:51 -05:00
__atomic_store ( & m_pdbSnapshot , & ptrT , __ATOMIC_SEQ_CST ) ;
locker . disarm ( ) ; // ensure we're not locked for any dtors
2020-01-12 01:22:44 -05:00
}
bool redisDbPersistentDataSnapshot : : FStale ( ) const
{
// 0.5 seconds considered stale;
static const uint64_t msStale = 500 ;
return ( ( getMvccTstamp ( ) - m_mvccCheckpoint ) > > MVCC_MS_SHIFT ) > = msStale ;
2019-11-28 19:00:51 -05:00
}