diff --git a/src/fastlock.cpp b/src/fastlock.cpp index c74ca8358..4a64b20fc 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -258,7 +258,10 @@ extern "C" void fastlock_init(struct fastlock *lock, const char *name) lock->m_depth = 0; lock->m_pidOwner = -1; lock->futex = 0; - lock->szName = name; + int cch = strlen(name); + cch = std::min(cch, sizeof(lock->szName)-1); + memcpy(lock->szName, name, cch); + lock->szName[cch] = '\0'; ANNOTATE_RWLOCK_CREATE(lock); } diff --git a/src/fastlock.h b/src/fastlock.h index 0117049a6..44c09ac17 100644 --- a/src/fastlock.h +++ b/src/fastlock.h @@ -1,5 +1,6 @@ #pragma once #include +#include #ifdef __cplusplus extern "C" { @@ -40,12 +41,13 @@ struct ticket struct fastlock { - volatile struct ticket m_ticket; - volatile int m_pidOwner; volatile int m_depth; + char szName[56]; + /* Volatile data on seperate cache line */ + volatile struct ticket m_ticket; unsigned futex; - const char *szName; + char padding[56]; // ensure ticket and futex are on their own independent cache line #ifdef __cplusplus fastlock(const char *name) @@ -81,3 +83,5 @@ struct fastlock bool fOwnLock(); // true if this thread owns the lock, NOTE: not 100% reliable, use for debugging only #endif }; + +static_assert(offsetof(struct fastlock, m_ticket) == 64, "ensure padding is correct"); \ No newline at end of file diff --git a/src/fastlock_x64.asm b/src/fastlock_x64.asm index 6c9df490e..80791a6de 100644 --- a/src/fastlock_x64.asm +++ b/src/fastlock_x64.asm @@ -16,10 +16,11 @@ fastlock_lock: .cfi_startproc .cfi_def_cfa rsp, 8 # RDI points to the struct: - # uint16_t active - # uint16_t avail # int32_t m_pidOwner # int32_t m_depth + # [rdi+64] ... + # uint16_t active + # uint16_t avail # First get our TID and put it in ecx push rdi # we need our struct pointer (also balance the stack for the call) @@ -29,18 +30,18 @@ fastlock_lock: pop rdi # get our pointer back .cfi_adjust_cfa_offset -8 - cmp [rdi+4], esi # Is the TID we got back the owner of the lock? + cmp [rdi], esi # Is the TID we got back the owner of the lock? je .LLocked # Don't spin in that case xor eax, eax # eliminate partial register dependency inc eax # we want to add one - lock xadd [rdi+2], ax # do the xadd, ax contains the value before the addition + lock xadd [rdi+66], ax # do the xadd, ax contains the value before the addition # ax now contains the ticket # OK Start the wait loop xor ecx, ecx .ALIGN 16 .LLoop: - mov edx, [rdi] + mov edx, [rdi+64] cmp dx, ax # is our ticket up? je .LLocked # leave the loop pause @@ -72,8 +73,8 @@ fastlock_lock: jmp .LLoop # Get back in the game .ALIGN 16 .LLocked: - mov [rdi+4], esi # lock->m_pidOwner = gettid() - inc dword ptr [rdi+8] # lock->m_depth++ + mov [rdi], esi # lock->m_pidOwner = gettid() + inc dword ptr [rdi+4] # lock->m_depth++ ret .cfi_endproc @@ -82,10 +83,11 @@ fastlock_lock: .type fastlock_trylock,@function fastlock_trylock: # RDI points to the struct: - # uint16_t active - # uint16_t avail # int32_t m_pidOwner # int32_t m_depth + # [rdi+64] ... + # uint16_t active + # uint16_t avail # First get our TID and put it in ecx push rdi # we need our struct pointer (also balance the stack for the call) @@ -93,29 +95,29 @@ fastlock_trylock: mov esi, eax # back it up in esi pop rdi # get our pointer back - cmp [rdi+4], esi # Is the TID we got back the owner of the lock? + cmp [rdi], esi # Is the TID we got back the owner of the lock? je .LRecursive # Don't spin in that case - mov eax, [rdi] # get both active and avail counters + mov eax, [rdi+64] # get both active and avail counters mov ecx, eax # duplicate in ecx ror ecx, 16 # swap upper and lower 16-bits cmp eax, ecx # are the upper and lower 16-bits the same? jnz .LAlreadyLocked # If not return failure # at this point we know eax+ecx have [avail][active] and they are both the same - add ecx, 0x10000 # increment avail, ecx is now our wanted value - lock cmpxchg [rdi], ecx # If rdi still contains the value in eax, put in ecx (inc avail) - jnz .LAlreadyLocked # If Z is not set then someone locked it while we were preparing + add ecx, 0x10000 # increment avail, ecx is now our wanted value + lock cmpxchg [rdi+64], ecx # If rdi still contains the value in eax, put in ecx (inc avail) + jnz .LAlreadyLocked # If Z is not set then someone locked it while we were preparing xor eax, eax - inc eax # return SUCCESS! (eax=1) - mov [rdi+4], esi # lock->m_pidOwner = gettid() - mov dword ptr [rdi+8], eax # lock->m_depth = 1 + inc eax # return SUCCESS! (eax=1) + mov [rdi], esi # lock->m_pidOwner = gettid() + mov dword ptr [rdi+4], eax # lock->m_depth = 1 ret .ALIGN 16 .LRecursive: xor eax, eax inc eax # return SUCCESS! (eax=1) - inc dword ptr [rdi+8] # lock->m_depth++ + inc dword ptr [rdi+4] # lock->m_depth++ ret .ALIGN 16 .LAlreadyLocked: @@ -126,23 +128,25 @@ fastlock_trylock: .global fastlock_unlock fastlock_unlock: # RDI points to the struct: - # uint16_t active - # uint16_t avail # int32_t m_pidOwner # int32_t m_depth + # [rdi+64] ... + # uint16_t active + # uint16_t avail push r11 - sub dword ptr [rdi+8], 1 # decrement m_depth, don't use dec because it partially writes the flag register and we don't know its state + sub dword ptr [rdi+4], 1 # decrement m_depth, don't use dec because it partially writes the flag register and we don't know its state jnz .LDone # if depth is non-zero this is a recursive unlock, and we still hold it - mov dword ptr [rdi+4], -1 # pidOwner = -1 (we don't own it anymore) - mov ecx, [rdi] # get current active (this one) + mov dword ptr [rdi], -1 # pidOwner = -1 (we don't own it anymore) + mov ecx, [rdi+64] # get current active (this one) inc ecx # bump it to the next thread - mov [rdi], cx # give up our ticket (note: lock is not required here because the spinlock itself guards this variable) + mov [rdi+64], cx # give up our ticket (note: lock is not required here because the spinlock itself guards this variable) # At this point the lock is removed, however we must wake up any pending futexs mov r9d, 1 # eax is the bitmask for 2 threads rol r9d, cl # place the mask in the right spot for the next 2 threads + add rdi, 64 # rdi now points to the token .ALIGN 16 .LRetryWake: - mov r11d, [rdi+12] # load the futex mask + mov r11d, [rdi+4] # load the futex mask and r11d, r9d # are any threads waiting on a futex? jz .LDone # if not we're done. # we have to wake the futexs