write fastlock_unlock in ASM... because its faster

Former-commit-id: bad73faf28f879d32c4064389b69c83e9474115a
This commit is contained in:
John Sully 2019-03-02 16:47:27 -05:00
parent 49ec69de3b
commit 465b8a9ea7
2 changed files with 18 additions and 4 deletions

View File

@ -111,7 +111,6 @@ extern "C" int fastlock_trylock(struct fastlock *lock)
}
return false;
}
#endif
extern "C" void fastlock_unlock(struct fastlock *lock)
{
@ -121,9 +120,10 @@ extern "C" void fastlock_unlock(struct fastlock *lock)
assert((int)__atomic_load_4(&lock->m_pidOwner, __ATOMIC_RELAXED) >= 0); // unlock after free
lock->m_pidOwner = -1;
std::atomic_thread_fence(std::memory_order_acquire);
__atomic_fetch_add(&lock->m_ticket.m_active, 1, __ATOMIC_ACQ_REL);
__atomic_fetch_add(&lock->m_ticket.m_active, 1, __ATOMIC_ACQ_REL); // on x86 the atomic is not required here, but ASM handles that case
}
}
#endif
extern "C" void fastlock_free(struct fastlock *lock)
{

View File

@ -6,8 +6,7 @@ extern sched_yield
; This is the first use of assembly in this codebase, a valid question is WHY?
; The spinlock we implement here is performance critical, and simply put GCC
; emits awful code. The original C code is left in fastlock.cpp for reference
; and x-plat. The code generated for the unlock case is reasonable and left in
; C++.
; and x-plat.
ALIGN 16
global fastlock_lock
@ -103,3 +102,18 @@ ALIGN 16
.LAlreadyLocked:
xor eax, eax ; return 0;
ret
ALIGN 16
global fastlock_unlock
fastlock_unlock:
; RDI points to the struct:
; uint16_t active
; uint16_t avail
; int32_t m_pidOwner
; int32_t m_depth
sub dword [rdi+8], 1 ; decrement m_depth, don't use dec because it partially writes the flag register and we don't know its state
jnz .LDone ; if depth is non-zero this is a recursive unlock, and we still hold it
mov dword [rdi+4], -1 ; pidOwner = -1 (we don't own it anymore)
inc word [rdi] ; give up our ticket (note: lock is not required here because the spinlock itself guards this variable)
.LDone:
ret