Fix failure to wakeup from futex sleep due to fastlock_unlock reading the wrong offset in the asm version. Also fix false sharing in spinlock
Former-commit-id: 4c8603815cf525c75dcc360fddeab9ca6fe70ae6
This commit is contained in:
parent
a27c1c9f0a
commit
72bbf16c2f
@ -258,7 +258,10 @@ extern "C" void fastlock_init(struct fastlock *lock, const char *name)
|
||||
lock->m_depth = 0;
|
||||
lock->m_pidOwner = -1;
|
||||
lock->futex = 0;
|
||||
lock->szName = name;
|
||||
int cch = strlen(name);
|
||||
cch = std::min<int>(cch, sizeof(lock->szName)-1);
|
||||
memcpy(lock->szName, name, cch);
|
||||
lock->szName[cch] = '\0';
|
||||
ANNOTATE_RWLOCK_CREATE(lock);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -40,12 +41,13 @@ struct ticket
|
||||
|
||||
struct fastlock
|
||||
{
|
||||
volatile struct ticket m_ticket;
|
||||
|
||||
volatile int m_pidOwner;
|
||||
volatile int m_depth;
|
||||
char szName[56];
|
||||
/* Volatile data on seperate cache line */
|
||||
volatile struct ticket m_ticket;
|
||||
unsigned futex;
|
||||
const char *szName;
|
||||
char padding[56]; // ensure ticket and futex are on their own independent cache line
|
||||
|
||||
#ifdef __cplusplus
|
||||
fastlock(const char *name)
|
||||
@ -81,3 +83,5 @@ struct fastlock
|
||||
bool fOwnLock(); // true if this thread owns the lock, NOTE: not 100% reliable, use for debugging only
|
||||
#endif
|
||||
};
|
||||
|
||||
static_assert(offsetof(struct fastlock, m_ticket) == 64, "ensure padding is correct");
|
@ -16,10 +16,11 @@ fastlock_lock:
|
||||
.cfi_startproc
|
||||
.cfi_def_cfa rsp, 8
|
||||
# RDI points to the struct:
|
||||
# uint16_t active
|
||||
# uint16_t avail
|
||||
# int32_t m_pidOwner
|
||||
# int32_t m_depth
|
||||
# [rdi+64] ...
|
||||
# uint16_t active
|
||||
# uint16_t avail
|
||||
|
||||
# First get our TID and put it in ecx
|
||||
push rdi # we need our struct pointer (also balance the stack for the call)
|
||||
@ -29,18 +30,18 @@ fastlock_lock:
|
||||
pop rdi # get our pointer back
|
||||
.cfi_adjust_cfa_offset -8
|
||||
|
||||
cmp [rdi+4], esi # Is the TID we got back the owner of the lock?
|
||||
cmp [rdi], esi # Is the TID we got back the owner of the lock?
|
||||
je .LLocked # Don't spin in that case
|
||||
|
||||
xor eax, eax # eliminate partial register dependency
|
||||
inc eax # we want to add one
|
||||
lock xadd [rdi+2], ax # do the xadd, ax contains the value before the addition
|
||||
lock xadd [rdi+66], ax # do the xadd, ax contains the value before the addition
|
||||
# ax now contains the ticket
|
||||
# OK Start the wait loop
|
||||
xor ecx, ecx
|
||||
.ALIGN 16
|
||||
.LLoop:
|
||||
mov edx, [rdi]
|
||||
mov edx, [rdi+64]
|
||||
cmp dx, ax # is our ticket up?
|
||||
je .LLocked # leave the loop
|
||||
pause
|
||||
@ -72,8 +73,8 @@ fastlock_lock:
|
||||
jmp .LLoop # Get back in the game
|
||||
.ALIGN 16
|
||||
.LLocked:
|
||||
mov [rdi+4], esi # lock->m_pidOwner = gettid()
|
||||
inc dword ptr [rdi+8] # lock->m_depth++
|
||||
mov [rdi], esi # lock->m_pidOwner = gettid()
|
||||
inc dword ptr [rdi+4] # lock->m_depth++
|
||||
ret
|
||||
.cfi_endproc
|
||||
|
||||
@ -82,10 +83,11 @@ fastlock_lock:
|
||||
.type fastlock_trylock,@function
|
||||
fastlock_trylock:
|
||||
# RDI points to the struct:
|
||||
# uint16_t active
|
||||
# uint16_t avail
|
||||
# int32_t m_pidOwner
|
||||
# int32_t m_depth
|
||||
# [rdi+64] ...
|
||||
# uint16_t active
|
||||
# uint16_t avail
|
||||
|
||||
# First get our TID and put it in ecx
|
||||
push rdi # we need our struct pointer (also balance the stack for the call)
|
||||
@ -93,29 +95,29 @@ fastlock_trylock:
|
||||
mov esi, eax # back it up in esi
|
||||
pop rdi # get our pointer back
|
||||
|
||||
cmp [rdi+4], esi # Is the TID we got back the owner of the lock?
|
||||
cmp [rdi], esi # Is the TID we got back the owner of the lock?
|
||||
je .LRecursive # Don't spin in that case
|
||||
|
||||
mov eax, [rdi] # get both active and avail counters
|
||||
mov eax, [rdi+64] # get both active and avail counters
|
||||
mov ecx, eax # duplicate in ecx
|
||||
ror ecx, 16 # swap upper and lower 16-bits
|
||||
cmp eax, ecx # are the upper and lower 16-bits the same?
|
||||
jnz .LAlreadyLocked # If not return failure
|
||||
|
||||
# at this point we know eax+ecx have [avail][active] and they are both the same
|
||||
add ecx, 0x10000 # increment avail, ecx is now our wanted value
|
||||
lock cmpxchg [rdi], ecx # If rdi still contains the value in eax, put in ecx (inc avail)
|
||||
jnz .LAlreadyLocked # If Z is not set then someone locked it while we were preparing
|
||||
add ecx, 0x10000 # increment avail, ecx is now our wanted value
|
||||
lock cmpxchg [rdi+64], ecx # If rdi still contains the value in eax, put in ecx (inc avail)
|
||||
jnz .LAlreadyLocked # If Z is not set then someone locked it while we were preparing
|
||||
xor eax, eax
|
||||
inc eax # return SUCCESS! (eax=1)
|
||||
mov [rdi+4], esi # lock->m_pidOwner = gettid()
|
||||
mov dword ptr [rdi+8], eax # lock->m_depth = 1
|
||||
inc eax # return SUCCESS! (eax=1)
|
||||
mov [rdi], esi # lock->m_pidOwner = gettid()
|
||||
mov dword ptr [rdi+4], eax # lock->m_depth = 1
|
||||
ret
|
||||
.ALIGN 16
|
||||
.LRecursive:
|
||||
xor eax, eax
|
||||
inc eax # return SUCCESS! (eax=1)
|
||||
inc dword ptr [rdi+8] # lock->m_depth++
|
||||
inc dword ptr [rdi+4] # lock->m_depth++
|
||||
ret
|
||||
.ALIGN 16
|
||||
.LAlreadyLocked:
|
||||
@ -126,23 +128,25 @@ fastlock_trylock:
|
||||
.global fastlock_unlock
|
||||
fastlock_unlock:
|
||||
# RDI points to the struct:
|
||||
# uint16_t active
|
||||
# uint16_t avail
|
||||
# int32_t m_pidOwner
|
||||
# int32_t m_depth
|
||||
# [rdi+64] ...
|
||||
# uint16_t active
|
||||
# uint16_t avail
|
||||
push r11
|
||||
sub dword ptr [rdi+8], 1 # decrement m_depth, don't use dec because it partially writes the flag register and we don't know its state
|
||||
sub dword ptr [rdi+4], 1 # decrement m_depth, don't use dec because it partially writes the flag register and we don't know its state
|
||||
jnz .LDone # if depth is non-zero this is a recursive unlock, and we still hold it
|
||||
mov dword ptr [rdi+4], -1 # pidOwner = -1 (we don't own it anymore)
|
||||
mov ecx, [rdi] # get current active (this one)
|
||||
mov dword ptr [rdi], -1 # pidOwner = -1 (we don't own it anymore)
|
||||
mov ecx, [rdi+64] # get current active (this one)
|
||||
inc ecx # bump it to the next thread
|
||||
mov [rdi], cx # give up our ticket (note: lock is not required here because the spinlock itself guards this variable)
|
||||
mov [rdi+64], cx # give up our ticket (note: lock is not required here because the spinlock itself guards this variable)
|
||||
# At this point the lock is removed, however we must wake up any pending futexs
|
||||
mov r9d, 1 # eax is the bitmask for 2 threads
|
||||
rol r9d, cl # place the mask in the right spot for the next 2 threads
|
||||
add rdi, 64 # rdi now points to the token
|
||||
.ALIGN 16
|
||||
.LRetryWake:
|
||||
mov r11d, [rdi+12] # load the futex mask
|
||||
mov r11d, [rdi+4] # load the futex mask
|
||||
and r11d, r9d # are any threads waiting on a futex?
|
||||
jz .LDone # if not we're done.
|
||||
# we have to wake the futexs
|
||||
|
Loading…
x
Reference in New Issue
Block a user