tighten up the spinlock loop, and some other bikeshedding

Former-commit-id: 8bd56fadd6e73794415e1f9eae892c772800e559
This commit is contained in:
John Sully 2019-03-01 13:29:21 -05:00
parent 2cb187ce0a
commit c8e0070fd4

View File

@ -19,50 +19,45 @@ fastlock_lock:
; int32_t m_depth ; int32_t m_depth
; First get our TID and put it in ecx ; First get our TID and put it in ecx
push rdi ; we need our struct pointer (also balance the stack for the call) push rdi ; we need our struct pointer (also balance the stack for the call)
call gettid ; get our thread ID (TLS is nasty in ASM so don't bother inlining) call gettid ; get our thread ID (TLS is nasty in ASM so don't bother inlining)
mov esi, eax ; back it up in esi mov esi, eax ; back it up in esi
mov rdi, [rsp] ; get our pointer back mov rdi, [rsp] ; get our pointer back
cmp [rdi+4], esi ; Is the TID we got back the owner of the lock? cmp [rdi+4], esi ; Is the TID we got back the owner of the lock?
je .LRecursive ; Don't spin in that case je .LLocked ; Don't spin in that case
xor eax, eax ; eliminate partial register dependency xor eax, eax ; eliminate partial register dependency
mov ax, 1 ; we want to add one inc eax ; we want to add one
lock xadd [rdi+2], ax ; do the xadd, ax contains the value before the addition lock xadd [rdi+2], ax ; do the xadd, ax contains the value before the addition
; eax now contains the ticket ; eax now contains the ticket
xor ecx, ecx xor ecx, ecx
ALIGN 16 ALIGN 16
.Loop: .LLoop:
cmp [rdi], ax ; is our ticket up? cmp [rdi], ax ; is our ticket up?
je .LDone ; leave the loop je .LLocked ; leave the loop
add ecx, 1000h ; Have we been waiting a long time? (oflow if we have) pause
; 1000h is set so we overflow on the 1024*1024'th iteration (like the C code) add ecx, 1000h ; Have we been waiting a long time? (oflow if we have)
jc .LYield ; If so, give up our timeslice to someone who's doing real work ; 1000h is set so we overflow on the 1024*1024'th iteration (like the C code)
pause ; be nice to other hyperthreads jnc .LLoop ; If so, give up our timeslice to someone who's doing real work
jmp .Loop ; maybe next time we'll get our turn
.LDone:
mov [rdi+4], esi ; lock->m_pidOwner = gettid()
mov dword [rdi+8], 1 ; lock->m_depth = 1
add rsp, 8 ; fix stack
ret
.LYield:
; Like the compiler, you're probably thinking: "Hey! I should take these pushs out of the loop" ; Like the compiler, you're probably thinking: "Hey! I should take these pushs out of the loop"
; But the compiler doesn't know that we rarely hit this, and when we do we know the lock is ; But the compiler doesn't know that we rarely hit this, and when we do we know the lock is
; taking a long time to be released anyways. We optimize for the common case of short ; taking a long time to be released anyways. We optimize for the common case of short
; lock intervals. That's why we're using a spinlock in the first place ; lock intervals. That's why we're using a spinlock in the first place
push rsi push rsi
push rax push rax
mov rax, 24 ; sys_sched_yield mov rax, 24 ; sys_sched_yield
syscall ; give up our timeslice we'll be here a while syscall ; give up our timeslice we'll be here a while
pop rax pop rax
pop rsi pop rsi
mov rdi, [rsp] ; our struct pointer is on the stack already mov rdi, [rsp] ; our struct pointer is on the stack already
xor ecx, ecx ; Reset our loop counter xor ecx, ecx ; Reset our loop counter
jmp .Loop ; Get back in the game jmp .LLoop ; Get back in the game
.LRecursive: ALIGN 16
add dword [rdi+8], 1 ; increment the depth counter .LLocked:
add rsp, 8 ; fix the stack mov [rdi+4], esi ; lock->m_pidOwner = gettid()
inc dword [rdi+8] ; lock->m_depth++
add rsp, 8 ; fix stack
ret ret
ALIGN 16 ALIGN 16
@ -75,32 +70,36 @@ fastlock_trylock:
; int32_t m_depth ; int32_t m_depth
; First get our TID and put it in ecx ; First get our TID and put it in ecx
push rdi ; we need our struct pointer (also balance the stack for the call) push rdi ; we need our struct pointer (also balance the stack for the call)
call gettid ; get our thread ID (TLS is nasty in ASM so don't bother inlining) call gettid ; get our thread ID (TLS is nasty in ASM so don't bother inlining)
mov esi, eax ; back it up in esi mov esi, eax ; back it up in esi
pop rdi ; get our pointer back pop rdi ; get our pointer back
cmp [rdi+4], esi ; Is the TID we got back the owner of the lock? cmp [rdi+4], esi ; Is the TID we got back the owner of the lock?
je .LRecursive ; Don't spin in that case je .LRecursive ; Don't spin in that case
mov eax, [rdi] ; get both active and avail counters mov eax, [rdi] ; get both active and avail counters
mov ecx, eax ; duplicate in ecx mov ecx, eax ; duplicate in ecx
ror ecx, 16 ; swap upper and lower 16-bits ror ecx, 16 ; swap upper and lower 16-bits
cmp eax, ecx ; are the upper and lower 16-bits the same? cmp eax, ecx ; are the upper and lower 16-bits the same?
jnz .LAlreadyLocked ; If not return failure jnz .LAlreadyLocked ; If not return failure
; at this point we know eax+ecx have [avail][active] and they are both the same ; at this point we know eax+ecx have [avail][active] and they are both the same
add ecx, 10000h ; increment avail, ecx is now our wanted value add ecx, 10000h ; increment avail, ecx is now our wanted value
lock cmpxchg [rdi], ecx ; If rdi still contains the value in eax, put in ecx (inc avail) lock cmpxchg [rdi], ecx ; If rdi still contains the value in eax, put in ecx (inc avail)
jnz .LAlreadyLocked ; If Z is not set then someone locked it while we were preparing jnz .LAlreadyLocked ; If Z is not set then someone locked it while we were preparing
mov eax, 1 ; return SUCCESS! xor eax, eax
mov [rdi+4], esi ; lock->m_pidOwner = gettid() inc eax ; return SUCCESS! (eax=1)
mov dword [rdi+8], eax ; lock->m_depth = 1 mov [rdi+4], esi ; lock->m_pidOwner = gettid()
ret mov dword [rdi+8], eax ; lock->m_depth = 1
.LAlreadyLocked:
xor eax, eax ; return 0 for failure
ret ret
ALIGN 16
.LRecursive: .LRecursive:
add dword [rdi+8], 1 ; increment the depth counter xor eax, eax
mov eax, 1 ; we successfully got the lock inc eax ; return SUCCESS! (eax=1)
ret inc dword [rdi+8] ; lock->m_depth++
ret
ALIGN 16
.LAlreadyLocked:
xor eax, eax ; return 0;
ret