1
0
forked from 0ad/0ad

move atomic_add and CAS_ into pure asm.

this is good in general (less dependence on compiler) and is intended to
work around a bug andrew has been seeing. apparently VC generates
invalid code for the parameters + ASSERT, which causes infinite
recursion on the <location> validation.

This was SVN commit r3142.
This commit is contained in:
janwas 2005-11-18 05:16:43 +00:00
parent 78af045795
commit eee7fca2d3
2 changed files with 54 additions and 94 deletions

View File

@ -333,6 +333,60 @@ sym(ia32_cpuid):
ret
;-------------------------------------------------------------------------------
; lock-free support routines
;-------------------------------------------------------------------------------
extern sym(cpus)
; extern "C" void __cdecl atomic_add(intptr_t* location, intptr_t increment);
global sym(atomic_add)
sym(atomic_add):
cmp byte [sym(cpus)], 1
mov edx, [esp+4] ; location
mov eax, [esp+8] ; increment
je .no_lock
db 0xf0 ; LOCK prefix
.no_lock:
add [edx], eax
ret
; notes:
; - this is called via CAS macro, which silently casts its inputs for
; convenience. mixing up the <expected> and <location> parameters would
; go unnoticed; we therefore perform a basic sanity check on <location> and
; raise a warning if it is invalid.
; - a 486 or later processor is required since we use CMPXCHG.
; there's no feature flag we can check, and the ia32 code doesn't
; bother detecting anything < Pentium, so this'll crash and burn if
; run on 386. we could fall back to simple MOVs there (since 386 CPUs
; aren't MP-capable), but it's not worth the trouble.
; extern "C" __declspec(naked) bool __cdecl CAS_(uintptr_t* location, uintptr_t expected, uintptr_t new_value);
global sym(CAS_)
sym(CAS_):
cmp byte [sym(cpus)], 1
mov eax, [esp+8] ; expected
mov edx, [esp+4] ; location
cmp edx, 0x10000 ; .. valid pointer?
jb .invalid_location ; no - raise warning
mov ecx, [esp+12] ; new_value
je .no_lock
db 0xf0 ; LOCK prefix
.no_lock:
cmpxchg [edx], ecx
sete al
movzx eax, al
ret
; NOTE: nasm 0.98.39 doesn't support generating debug info for win32
; output format. that means this code may be misattributed to other
; functions, which makes tracking it down very difficult.
; we therefore raise an "Invalid Opcode" exception, which is rather distinct.
.invalid_location:
ud2
;-------------------------------------------------------------------------------
; misc
;-------------------------------------------------------------------------------

View File

@ -169,100 +169,6 @@ void ia32_debug_break()
// support code for lock-free primitives
//-----------------------------------------------------------------------------
// CAS does a sanity check on the location parameter to see if the caller
// actually is passing an address (instead of a value, e.g. 1). this is
// important because the call is via a macro that coerces parameters.
//
// reporting is done with the regular CRT assert instead of debug_assert
// because the wdbg code relies on CAS internally (e.g. to avoid
// nested stack traces). a bug such as VC's incorrect handling of params
// in __declspec(naked) functions would then cause infinite recursion,
// which is difficult to debug (since wdbg is hosed) and quite fatal.
#define ASSERT(x) assert(x)
// note: a 486 or later processor is required since we use CMPXCHG.
// there's no feature flag we can check, and the ia32 code doesn't
// bother detecting anything < Pentium, so this'll crash and burn if
// run on 386. we could replace cmpxchg with a simple mov (since 386
// CPUs aren't MP-capable), but it's not worth the trouble.
// note: don't use __declspec(naked) because we need to access one parameter
// from C code and VC can't handle that correctly.
#if HAVE_MS_ASM
bool __cdecl CAS_(uintptr_t* location, uintptr_t expected, uintptr_t new_value)
{
// try to see if caller isn't passing in an address
// (CAS's arguments are silently casted)
ASSERT(!debug_is_pointer_bogus(location));
bool was_updated;
__asm
{
cmp byte ptr [cpus], 1
mov eax, [expected]
mov edx, [location]
mov ecx, [new_value]
je $no_lock
_emit 0xf0 // LOCK prefix
$no_lock:
cmpxchg [edx], ecx
sete al
mov [was_updated], al
}
return was_updated;
}
void atomic_add(intptr_t* location, intptr_t increment)
{
__asm
{
cmp byte ptr [cpus], 1
mov edx, [location]
mov eax, [increment]
je $no_lock
_emit 0xf0 // LOCK prefix
$no_lock:
add [edx], eax
}
}
#else // #if HAVE_MS_ASM
bool CAS_(uintptr_t* location, uintptr_t expected, uintptr_t new_value)
{
uintptr_t prev;
ASSERT(location >= (uintptr_t*)0x10000);
__asm__ __volatile__(
"lock; cmpxchgl %1,%2"
: "=a"(prev) // %0: Result in eax should be stored in prev
: "q"(new_value), // %1: new_value -> e[abcd]x
"m"(*location), // %2: Memory operand
"0"(expected) // Stored in same place as %0
: "memory"); // We make changes in memory
return prev == expected;
}
void atomic_add(intptr_t* location, intptr_t increment)
{
__asm__ __volatile__ (
"cmpb $1, %1;"
"je 1f;"
"lock;"
"1: addl %3, %0"
: "=m" (*location) /* %0: Output into *location */
: "m" (cpus), /* %1: Input for cpu check */
"m" (*location), /* %2: *location is also an input */
"r" (increment) /* %3: Increment (store in register) */
: "memory"); /* clobbers memory (*location) */
}
#endif // #if HAVE_MS_ASM
// enforce strong memory ordering.
void mfence()
{