From 6ee9d69763f337e402b5c5dd3d3e985e4a53f002 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 26 Feb 2021 01:28:05 +0000 Subject: [PATCH] Eliminate the need for an mfence by tricking the CPU into ordering the futex read Former-commit-id: 340e6f5bc94cd1c3b0c6fb6da833e8504acaf23a --- src/fastlock_x64.asm | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fastlock_x64.asm b/src/fastlock_x64.asm index 9e17f8995..3fecfa439 100644 --- a/src/fastlock_x64.asm +++ b/src/fastlock_x64.asm @@ -190,10 +190,11 @@ fastlock_unlock: mov dword ptr [rdi], -1 # pidOwner = -1 (we don't own it anymore) mov esi, [rdi+64] # get current active (this one) inc esi # bump it to the next thread + sfence # ensure whatever was written in the lock is visible mov word ptr [rdi+64], si # give up our ticket (note: lock is not required here because the spinlock itself guards this variable) - mfence # sync other threads # At this point the lock is removed, however we must wake up any pending futexs - mov edx, [rdi+64+4] # load the futex mask + mov rdx, [rdi+64] # load the futex mask, note we intentionally also read the ticket we just wrote to ensure this is ordered with the above mov + shr rdx, 32 # isolate the mask bt edx, esi # is the next thread waiting on a futex? jc unlock_futex # unlock the futex if necessary ret # if not we're done.