1
1
forked from 0ad/0ad

moved lock-free primitive (CAS) here; add support functions (memory barrier and instruction serialization)

lib: speed up round_up by requiring alignment to be a power of 2

This was SVN commit r2221.
This commit is contained in:
janwas 2005-05-03 05:05:16 +00:00
parent 03a3fd6091
commit c65d966112
4 changed files with 85 additions and 8 deletions

View File

@ -199,16 +199,11 @@ int ilog2(const float x)
}
// multiple must be a power of two.
uintptr_t round_up(const uintptr_t n, const uintptr_t multiple)
{
if(multiple == 0) // paranoid divide-by-zero
{
assert(0);
return n;
}
const uintptr_t padded = n + multiple-1;
const uintptr_t remainder = padded % multiple;
const uintptr_t result = padded - remainder;
assert(is_pow2((long)multiple)); // also catches divide-by-zero
const uintptr_t result = (n + multiple-1) & ~(multiple-1);
assert(n <= result && result < n+multiple);
return result;
}

View File

@ -307,6 +307,7 @@ extern int ilog2(const int n);
extern uint log2(uint x);
// multiple must be a power of two.
extern uintptr_t round_up(uintptr_t val, uintptr_t multiple);
extern u16 fp_to_u16(double in);

View File

@ -23,6 +23,20 @@ extern int on_each_cpu(void(*cb)());
extern void get_cpu_info(void);
// atomic "compare and swap". compare the machine word at <location> against
// <expected>; if not equal, return false; otherwise, overwrite it with
// <new_value> and return true.
extern bool CAS_(uintptr_t* location, uintptr_t expected, uintptr_t new_value);
#define CAS(l,o,n) CAS_((uintptr_t*)l, (uintptr_t)o, (uintptr_t)n)
extern void atomic_add(intptr_t* location, intptr_t increment);
// enforce strong memory ordering.
extern void mfence();
extern void serialize();
#ifdef __cplusplus
}
#endif

View File

@ -549,4 +549,71 @@ void ia32_get_cpu_info()
#endif
}
// note: a 486 or later processor is required since we use CMPXCHG.
// there's no feature flag we can check, and the ia32 code doesn't
// bother detecting anything < Pentium, so this'll crash and burn if
// run on 386. we could replace cmpxchg with a simple mov (since 386
// CPUs aren't MP-capable), but it's not worth the trouble.
__declspec(naked) bool __cdecl CAS_(uintptr_t* location, uintptr_t expected, uintptr_t new_value)
{
// try to see if caller isn't passing in an address
// (CAS's arguments are silently casted)
assert2(location >= (uintptr_t*)0x10000);
__asm
{
cmp byte ptr [cpus], 1
mov eax, [esp+8] // expected
mov edx, [esp+4] // location
mov ecx, [esp+12] // new_value
je $no_lock
_emit 0xf0 // LOCK prefix
$no_lock:
cmpxchg [edx], ecx
mov eax, 0
sete al
ret
}
}
__declspec(naked) void __cdecl atomic_add(intptr_t* location, intptr_t increment)
{
__asm
{
cmp byte ptr [cpus], 1
mov edx, [esp+4] // location
mov eax, [esp+8] // increment
je $no_lock
_emit 0xf0 // LOCK prefix
$no_lock:
add [edx], eax
ret
}
}
// enforce strong memory ordering.
void mfence()
{
// Pentium IV
if(ia32_cap(SSE2))
__asm mfence
}
void serialize()
{
__asm cpuid
}
#endif // #ifndef _M_IX86