diff --git a/src/fastlock.cpp b/src/fastlock.cpp index 19375cd0e..4f8b2e6dc 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -74,6 +74,10 @@ extern int g_fInCrash; #define __has_feature(x) 0 #endif +#ifdef __linux__ +extern "C" void unlock_futex(struct fastlock *lock, uint16_t ifutex); +#endif + #if __has_feature(thread_sanitizer) /* Report that a lock has been created at address "lock". */ @@ -206,6 +210,11 @@ DeadlockDetector g_dlock; static_assert(sizeof(pid_t) <= sizeof(fastlock::m_pidOwner), "fastlock::m_pidOwner not large enough"); uint64_t g_longwaits = 0; +extern "C" void fastlock_panic(struct fastlock *lock) +{ + _serverPanic(__FILE__, __LINE__, "fastlock lock/unlock mismatch for: %s", lock->szName); +} + uint64_t fastlock_getlongwaitcount() { uint64_t rval; @@ -337,31 +346,6 @@ extern "C" int fastlock_trylock(struct fastlock *lock, int fWeak) return false; } -#ifdef __linux__ -#define ROL32(v, shift) ((v << shift) | (v >> (32-shift))) -void unlock_futex(struct fastlock *lock, uint16_t ifutex) -{ - unsigned mask = (1U << (ifutex % 32)); - unsigned futexT; - __atomic_load(&lock->futex, &futexT, __ATOMIC_RELAXED); - futexT &= mask; - - if (futexT == 0) - return; - - for (;;) - { - __atomic_load(&lock->futex, &futexT, __ATOMIC_ACQUIRE); - futexT &= mask; - if (!futexT) - break; - - if (futex(&lock->m_ticket.u, FUTEX_WAKE_BITSET_PRIVATE, INT_MAX, nullptr, mask) == 1) - break; - } -} -#endif - extern "C" void fastlock_unlock(struct fastlock *lock) { --lock->m_depth; @@ -384,6 +368,26 @@ extern "C" void fastlock_unlock(struct fastlock *lock) } #endif +#ifdef __linux__ +#define ROL32(v, shift) ((v << shift) | (v >> (32-shift))) +extern "C" void unlock_futex(struct fastlock *lock, uint16_t ifutex) +{ + unsigned mask = (1U << (ifutex % 32)); + unsigned futexT; + + for (;;) + { + __atomic_load(&lock->futex, &futexT, __ATOMIC_ACQUIRE); + futexT &= mask; + if (!futexT) + break; + + if (futex(&lock->m_ticket.u, FUTEX_WAKE_BITSET_PRIVATE, INT_MAX, nullptr, mask) == 1) + break; + } +} +#endif + extern "C" void fastlock_free(struct fastlock *lock) { // NOP @@ -413,4 +417,4 @@ void fastlock_lock_recursive(struct fastlock *lock, int nesting) { fastlock_lock(lock); lock->m_depth = nesting; -} \ No newline at end of file +} diff --git a/src/fastlock_x64.asm b/src/fastlock_x64.asm index 7c9990a6d..bcea0e095 100644 --- a/src/fastlock_x64.asm +++ b/src/fastlock_x64.asm @@ -126,6 +126,7 @@ fastlock_trylock: .ALIGN 16 .global fastlock_unlock +.type fastlock_unlock,@function fastlock_unlock: # RDI points to the struct: # int32_t m_pidOwner @@ -133,34 +134,19 @@ fastlock_unlock: # [rdi+64] ... # uint16_t active # uint16_t avail - push r11 sub dword ptr [rdi+4], 1 # decrement m_depth, don't use dec because it partially writes the flag register and we don't know its state jnz .LDone # if depth is non-zero this is a recursive unlock, and we still hold it mov dword ptr [rdi], -1 # pidOwner = -1 (we don't own it anymore) - mov ecx, [rdi+64] # get current active (this one) - inc ecx # bump it to the next thread - mov [rdi+64], cx # give up our ticket (note: lock is not required here because the spinlock itself guards this variable) + mov esi, [rdi+64] # get current active (this one) + inc esi # bump it to the next thread + mov word ptr [rdi+64], si # give up our ticket (note: lock is not required here because the spinlock itself guards this variable) mfence # sync other threads # At this point the lock is removed, however we must wake up any pending futexs - mov r9d, 1 # eax is the bitmask for 2 threads - rol r9d, cl # place the mask in the right spot for the next 2 threads - add rdi, 64 # rdi now points to the token + mov edx, [rdi+64+4] # load the futex mask + bt edx, esi # is the next thread waiting on a futex? + jc unlock_futex # unlock the futex if necessary + ret # if not we're done. .ALIGN 16 -.LRetryWake: - mov r11d, [rdi+4] # load the futex mask - and r11d, r9d # are any threads waiting on a futex? - jz .LDone # if not we're done. - # we have to wake the futexs - # rdi ARG1 futex (already in rdi) - mov esi, (10 | 128) # rsi ARG2 FUTEX_WAKE_BITSET_PRIVATE - mov edx, 0x7fffffff # rdx ARG3 INT_MAX (number of threads to wake) - xor r10d, r10d # r10 ARG4 NULL - mov r8, rdi # r8 ARG5 dup rdi - # r9 ARG6 mask (already set above) - mov eax, 202 # sys_futex - syscall - cmp eax, 1 # did we wake as many as we expected? - jnz .LRetryWake .LDone: - pop r11 + js fastlock_panic # panic if we made m_depth negative ret diff --git a/src/networking.cpp b/src/networking.cpp index 242e69798..672077b2f 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -3193,6 +3193,7 @@ int processEventsWhileBlocked(int iel) { serverAssert(c->flags & CLIENT_PROTECTED); c->lock.unlock(); } + aeReleaseLock(); serverAssertDebug(!GlobalLocksAcquired()); try @@ -3215,6 +3216,7 @@ int processEventsWhileBlocked(int iel) { locker.release(); throw; } + AeLocker locker; if (c != nullptr) c->lock.lock();