moved lock-free primitive (CAS) here; add support functions (memory barrier and instruction serialization)
lib: speed up round_up by requiring alignment to be a power of 2 This was SVN commit r2221.
This commit is contained in:
parent
03a3fd6091
commit
c65d966112
@ -199,16 +199,11 @@ int ilog2(const float x)
|
||||
}
|
||||
|
||||
|
||||
// multiple must be a power of two.
|
||||
uintptr_t round_up(const uintptr_t n, const uintptr_t multiple)
|
||||
{
|
||||
if(multiple == 0) // paranoid divide-by-zero
|
||||
{
|
||||
assert(0);
|
||||
return n;
|
||||
}
|
||||
const uintptr_t padded = n + multiple-1;
|
||||
const uintptr_t remainder = padded % multiple;
|
||||
const uintptr_t result = padded - remainder;
|
||||
assert(is_pow2((long)multiple)); // also catches divide-by-zero
|
||||
const uintptr_t result = (n + multiple-1) & ~(multiple-1);
|
||||
assert(n <= result && result < n+multiple);
|
||||
return result;
|
||||
}
|
||||
|
@ -307,6 +307,7 @@ extern int ilog2(const int n);
|
||||
extern uint log2(uint x);
|
||||
|
||||
|
||||
// multiple must be a power of two.
|
||||
extern uintptr_t round_up(uintptr_t val, uintptr_t multiple);
|
||||
|
||||
extern u16 fp_to_u16(double in);
|
||||
|
@ -23,6 +23,20 @@ extern int on_each_cpu(void(*cb)());
|
||||
extern void get_cpu_info(void);
|
||||
|
||||
|
||||
// atomic "compare and swap". compare the machine word at <location> against
|
||||
// <expected>; if not equal, return false; otherwise, overwrite it with
|
||||
// <new_value> and return true.
|
||||
extern bool CAS_(uintptr_t* location, uintptr_t expected, uintptr_t new_value);
|
||||
|
||||
#define CAS(l,o,n) CAS_((uintptr_t*)l, (uintptr_t)o, (uintptr_t)n)
|
||||
|
||||
extern void atomic_add(intptr_t* location, intptr_t increment);
|
||||
|
||||
// enforce strong memory ordering.
|
||||
extern void mfence();
|
||||
|
||||
extern void serialize();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -549,4 +549,71 @@ void ia32_get_cpu_info()
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// note: a 486 or later processor is required since we use CMPXCHG.
|
||||
// there's no feature flag we can check, and the ia32 code doesn't
|
||||
// bother detecting anything < Pentium, so this'll crash and burn if
|
||||
// run on 386. we could replace cmpxchg with a simple mov (since 386
|
||||
// CPUs aren't MP-capable), but it's not worth the trouble.
|
||||
|
||||
__declspec(naked) bool __cdecl CAS_(uintptr_t* location, uintptr_t expected, uintptr_t new_value)
|
||||
{
|
||||
// try to see if caller isn't passing in an address
|
||||
// (CAS's arguments are silently casted)
|
||||
assert2(location >= (uintptr_t*)0x10000);
|
||||
|
||||
__asm
|
||||
{
|
||||
cmp byte ptr [cpus], 1
|
||||
mov eax, [esp+8] // expected
|
||||
mov edx, [esp+4] // location
|
||||
mov ecx, [esp+12] // new_value
|
||||
je $no_lock
|
||||
_emit 0xf0 // LOCK prefix
|
||||
$no_lock:
|
||||
cmpxchg [edx], ecx
|
||||
mov eax, 0
|
||||
sete al
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__declspec(naked) void __cdecl atomic_add(intptr_t* location, intptr_t increment)
|
||||
{
|
||||
__asm
|
||||
{
|
||||
cmp byte ptr [cpus], 1
|
||||
mov edx, [esp+4] // location
|
||||
mov eax, [esp+8] // increment
|
||||
je $no_lock
|
||||
_emit 0xf0 // LOCK prefix
|
||||
$no_lock:
|
||||
add [edx], eax
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// enforce strong memory ordering.
|
||||
void mfence()
|
||||
{
|
||||
// Pentium IV
|
||||
if(ia32_cap(SSE2))
|
||||
__asm mfence
|
||||
}
|
||||
|
||||
|
||||
void serialize()
|
||||
{
|
||||
__asm cpuid
|
||||
}
|
||||
|
||||
#endif // #ifndef _M_IX86
|
||||
|
Loading…
Reference in New Issue
Block a user