moved lock-free primitive (CAS) here; add support functions (memory barrier and instruction serialization)

lib: speed up round_up by requiring alignment to be a power of 2 This was SVN commit r2221.
2005-05-03 05:05:16 +00:00 · 2005-05-03 05:05:16 +00:00 · c65d966112
commit c65d966112
parent 03a3fd6091
4 changed files with 85 additions and 8 deletions
--- a/source/lib/lib.cpp
+++ b/source/lib/lib.cpp
@ -199,16 +199,11 @@ int ilog2(const float x)
 }


+// multiple must be a power of two.
 uintptr_t round_up(const uintptr_t n, const uintptr_t multiple)
 {
-	if(multiple == 0)	// paranoid divide-by-zero
-	{
-		assert(0);
-		return n;
-	}
-	const uintptr_t padded = n + multiple-1;
-	const uintptr_t remainder = padded % multiple;
-	const uintptr_t result = padded - remainder;
+	assert(is_pow2((long)multiple));	// also catches divide-by-zero
+	const uintptr_t result = (n + multiple-1) & ~(multiple-1);
 	assert(n <= result && result < n+multiple);
 	return result;
 }
--- a/source/lib/lib.h
+++ b/source/lib/lib.h
@ -307,6 +307,7 @@ extern int ilog2(const int n);
 extern uint log2(uint x);


+// multiple must be a power of two.
 extern uintptr_t round_up(uintptr_t val, uintptr_t multiple);

 extern u16 fp_to_u16(double in);
--- a/source/lib/sysdep/cpu.h
+++ b/source/lib/sysdep/cpu.h
@ -23,6 +23,20 @@ extern int on_each_cpu(void(*cb)());
 extern void get_cpu_info(void);


+// atomic "compare and swap". compare the machine word at <location> against
+// <expected>; if not equal, return false; otherwise, overwrite it with
+// <new_value> and return true.
+extern bool CAS_(uintptr_t* location, uintptr_t expected, uintptr_t new_value);
+
+#define CAS(l,o,n) CAS_((uintptr_t*)l, (uintptr_t)o, (uintptr_t)n)
+
+extern void atomic_add(intptr_t* location, intptr_t increment);
+
+// enforce strong memory ordering.
+extern void mfence();
+
+extern void serialize();
+
 #ifdef __cplusplus
 }
 #endif
--- a/source/lib/sysdep/ia32.cpp
+++ b/source/lib/sysdep/ia32.cpp
@ -549,4 +549,71 @@ void ia32_get_cpu_info()
 #endif
 }

+
+
+
+
+
+
+
+
+// note: a 486 or later processor is required since we use CMPXCHG.
+// there's no feature flag we can check, and the ia32 code doesn't
+// bother detecting anything < Pentium, so this'll crash and burn if
+// run on 386. we could replace cmpxchg with a simple mov (since 386
+// CPUs aren't MP-capable), but it's not worth the trouble.
+
+__declspec(naked) bool __cdecl CAS_(uintptr_t* location, uintptr_t expected, uintptr_t new_value)
+{
+	// try to see if caller isn't passing in an address
+	// (CAS's arguments are silently casted)
+	assert2(location >= (uintptr_t*)0x10000);
+
+__asm
+{
+	cmp		byte ptr [cpus], 1
+	mov		eax, [esp+8]	// expected
+	mov		edx, [esp+4]	// location
+	mov		ecx, [esp+12]	// new_value
+	je		$no_lock
+_emit 0xf0	// LOCK prefix
+$no_lock:
+	cmpxchg	[edx], ecx
+	mov		eax, 0
+	sete	al
+	ret
+}
+}
+
+
+__declspec(naked) void __cdecl atomic_add(intptr_t* location, intptr_t increment)
+{
+__asm
+{
+	cmp		byte ptr [cpus], 1
+	mov		edx, [esp+4]	// location
+	mov		eax, [esp+8]	// increment
+	je		$no_lock
+_emit 0xf0	// LOCK prefix
+$no_lock:
+	add		[edx], eax
+	ret
+}
+}
+
+
+// enforce strong memory ordering.
+void mfence()
+{
+	// Pentium IV
+	if(ia32_cap(SSE2))
+		__asm mfence
+}
+
+
+void serialize()
+{
+	__asm cpuid
+}
+
 #endif	// #ifndef _M_IX86