From 465b8a9ea7609b0d73ecfeac134ad848a57634c2 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 2 Mar 2019 16:47:27 -0500 Subject: [PATCH] write fastlock_unlock in ASM... because its faster Former-commit-id: bad73faf28f879d32c4064389b69c83e9474115a --- src/fastlock.cpp | 4 ++-- src/fastlock_x64.asm | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/fastlock.cpp b/src/fastlock.cpp index a1499d77b..f265f3908 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -111,7 +111,6 @@ extern "C" int fastlock_trylock(struct fastlock *lock) } return false; } -#endif extern "C" void fastlock_unlock(struct fastlock *lock) { @@ -121,9 +120,10 @@ extern "C" void fastlock_unlock(struct fastlock *lock) assert((int)__atomic_load_4(&lock->m_pidOwner, __ATOMIC_RELAXED) >= 0); // unlock after free lock->m_pidOwner = -1; std::atomic_thread_fence(std::memory_order_acquire); - __atomic_fetch_add(&lock->m_ticket.m_active, 1, __ATOMIC_ACQ_REL); + __atomic_fetch_add(&lock->m_ticket.m_active, 1, __ATOMIC_ACQ_REL); // on x86 the atomic is not required here, but ASM handles that case } } +#endif extern "C" void fastlock_free(struct fastlock *lock) { diff --git a/src/fastlock_x64.asm b/src/fastlock_x64.asm index d2a1a90b3..1b876350f 100644 --- a/src/fastlock_x64.asm +++ b/src/fastlock_x64.asm @@ -6,8 +6,7 @@ extern sched_yield ; This is the first use of assembly in this codebase, a valid question is WHY? ; The spinlock we implement here is performance critical, and simply put GCC ; emits awful code. The original C code is left in fastlock.cpp for reference -; and x-plat. The code generated for the unlock case is reasonable and left in -; C++. +; and x-plat. ALIGN 16 global fastlock_lock @@ -103,3 +102,18 @@ ALIGN 16 .LAlreadyLocked: xor eax, eax ; return 0; ret + +ALIGN 16 +global fastlock_unlock +fastlock_unlock: + ; RDI points to the struct: + ; uint16_t active + ; uint16_t avail + ; int32_t m_pidOwner + ; int32_t m_depth + sub dword [rdi+8], 1 ; decrement m_depth, don't use dec because it partially writes the flag register and we don't know its state + jnz .LDone ; if depth is non-zero this is a recursive unlock, and we still hold it + mov dword [rdi+4], -1 ; pidOwner = -1 (we don't own it anymore) + inc word [rdi] ; give up our ticket (note: lock is not required here because the spinlock itself guards this variable) +.LDone: + ret