WIP timer thread safety:
CAS functions now implemented directly in external asm files (avoids wrapper functions) reinstated cpu_CAS64 (checking ARCH_IA32 at every call site is onerous) (temporarily disable timer monotonicity check) This was SVN commit r8515.
This commit is contained in:
parent
2b9774aa05
commit
657ec9aa45
@ -25,12 +25,8 @@
|
||||
#if ARCH_AMD64
|
||||
|
||||
#include "lib/sysdep/arch/amd64/amd64.h"
|
||||
#include "lib/sysdep/arch/amd64/amd64_asm.h"
|
||||
|
||||
#include "lib/sysdep/cpu.h"
|
||||
#if MSC_VERSION
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
void cpu_ConfigureFloatingPoint()
|
||||
{
|
||||
@ -43,31 +39,4 @@ void* cpu_memcpy(void* RESTRICT dst, const void* RESTRICT src, size_t size)
|
||||
return memcpy(dst, src, size);
|
||||
}
|
||||
|
||||
#if MSC_VERSION
|
||||
|
||||
bool cpu_CAS(volatile intptr_t* location, intptr_t expected, intptr_t newValue)
|
||||
{
|
||||
const intptr_t initial = _InterlockedCompareExchange64((volatile __int64*)location, newValue, expected);
|
||||
return initial == expected;
|
||||
}
|
||||
|
||||
intptr_t cpu_AtomicAdd(volatile intptr_t* location, intptr_t increment)
|
||||
{
|
||||
return _InterlockedExchangeAdd64(location, increment);
|
||||
}
|
||||
|
||||
#elif GCC_VERSION
|
||||
|
||||
intptr_t cpu_AtomicAdd(volatile intptr_t* location, intptr_t increment)
|
||||
{
|
||||
return amd64_AtomicAdd(location, increment);
|
||||
}
|
||||
|
||||
bool cpu_CAS(volatile intptr_t* location, intptr_t expected, intptr_t newValue)
|
||||
{
|
||||
return amd64_CAS(location, expected, newValue) ? true : false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // ARCH_AMD64
|
||||
|
@ -46,18 +46,23 @@ sym(amd64_asm_cpuid):
|
||||
ret
|
||||
ALIGN 8
|
||||
|
||||
; extern "C" intptr_t amd64_CAS(volatile uintptr_t *location, uintptr_t expected, uintptr_t newValue);
|
||||
global sym(amd64_CAS)
|
||||
sym(amd64_CAS):
|
||||
|
||||
; extern "C" intptr_t cpu_AtomicAdd(intptr_t* location, intptr_t increment);
|
||||
global sym(cpu_AtomicAdd)
|
||||
sym(cpu_AtomicAdd):
|
||||
lock xadd [arg0], arg1
|
||||
mov rax, arg1
|
||||
ret
|
||||
|
||||
|
||||
; extern "C" bool amd64_CAS(volatile intptr_t* location, intptr_t expected, intptr_t newValue);
|
||||
; extern "C" bool amd64_CAS64(volatile i64* location, i64 expected, i64 newValue);
|
||||
global sym(cpu_CAS)
|
||||
global sym(cpu_CAS64)
|
||||
sym(cpu_CAS):
|
||||
sym(cpu_CAS64):
|
||||
mov rax, arg1 ; expected -> rax
|
||||
lock cmpxchg [arg0], arg2
|
||||
sete al
|
||||
movzx rax, al
|
||||
ret
|
||||
|
||||
; extern "C" intptr_t amd64_AtomicAdd(intptr_t *location, intptr_t increment);
|
||||
global sym(amd64_AtomicAdd)
|
||||
sym(amd64_AtomicAdd):
|
||||
lock xadd [arg0], arg1
|
||||
mov rax, arg1
|
||||
ret
|
||||
|
@ -34,9 +34,7 @@ extern "C" {
|
||||
struct x86_x64_CpuidRegs;
|
||||
extern void CALL_CONV amd64_asm_cpuid(x86_x64_CpuidRegs* reg);
|
||||
|
||||
extern intptr_t CALL_CONV amd64_CAS(volatile intptr_t *location, intptr_t expected, intptr_t newValue);
|
||||
|
||||
extern intptr_t CALL_CONV amd64_AtomicAdd(volatile intptr_t *location, intptr_t increment);
|
||||
// also implements cpu_AtomicAdd, cpu_CAS and cpu_CAS64 from "sysdep/cpu.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -147,18 +147,6 @@ void cpu_ConfigureFloatingPoint()
|
||||
}
|
||||
|
||||
|
||||
intptr_t cpu_AtomicAdd(volatile intptr_t* location, intptr_t increment)
|
||||
{
|
||||
return ia32_asm_AtomicAdd(location, increment);
|
||||
}
|
||||
|
||||
|
||||
bool cpu_CAS(volatile intptr_t* location, intptr_t expected, intptr_t new_value)
|
||||
{
|
||||
return ia32_asm_CAS(location, expected, new_value);
|
||||
}
|
||||
|
||||
|
||||
void* cpu_memcpy(void* RESTRICT dst, const void* RESTRICT src, size_t size)
|
||||
{
|
||||
return memcpy(dst, src, size);
|
||||
|
@ -60,9 +60,9 @@ sym(ia32_asm_cpuid):
|
||||
; lock-free support routines
|
||||
;-------------------------------------------------------------------------------
|
||||
|
||||
; extern "C" intptr_t __cdecl ia32_asm_AtomicAdd(volatile intptr_t* location, intptr_t increment);
|
||||
global sym(ia32_asm_AtomicAdd)
|
||||
sym(ia32_asm_AtomicAdd):
|
||||
; extern "C" intptr_t __cdecl cpu_AtomicAdd(volatile intptr_t* location, intptr_t increment);
|
||||
global sym(cpu_AtomicAdd)
|
||||
sym(cpu_AtomicAdd):
|
||||
mov edx, [esp+4] ; location
|
||||
mov eax, [esp+8] ; increment
|
||||
db 0xf0 ; LOCK prefix
|
||||
@ -79,9 +79,9 @@ db 0xf0 ; LOCK prefix
|
||||
; - nor do we bother skipping the LOCK prefix on single-processor systems.
|
||||
; the branch may be well-predicted, but difference in performance still
|
||||
; isn't expected to be enough to justify the effort.
|
||||
; extern "C" bool __cdecl ia32_asm_CAS(volatile uintptr_t* location, uintptr_t expected, uintptr_t new_value);
|
||||
global sym(ia32_asm_CAS)
|
||||
sym(ia32_asm_CAS):
|
||||
; extern "C" bool __cdecl cpu_CAS(volatile intptr_t* location, intptr_t expected, intptr_t new_value);
|
||||
global sym(cpu_CAS)
|
||||
sym(cpu_CAS):
|
||||
mov edx, [esp+4] ; location
|
||||
mov eax, [esp+8] ; expected
|
||||
mov ecx, [esp+12] ; new_value
|
||||
@ -92,9 +92,9 @@ db 0xf0 ; LOCK prefix
|
||||
ret
|
||||
|
||||
|
||||
; extern bool CALL_CONV ia32_asm_CAS64(volatile i64* location, i64 expected, i64 new_value);
|
||||
global sym(ia32_asm_CAS64)
|
||||
sym(ia32_asm_CAS64):
|
||||
; extern bool CALL_CONV cpu_CAS64(volatile i64* location, i64 expected, i64 new_value);
|
||||
global sym(cpu_CAS64)
|
||||
sym(cpu_CAS64):
|
||||
push ebx
|
||||
push esi
|
||||
mov esi, [esp+8+4] ; location
|
||||
|
@ -34,9 +34,7 @@ extern "C" {
|
||||
struct x86_x64_CpuidRegs;
|
||||
extern void CALL_CONV ia32_asm_cpuid(x86_x64_CpuidRegs* regs);
|
||||
|
||||
extern intptr_t CALL_CONV ia32_asm_AtomicAdd(volatile intptr_t* location, intptr_t increment);
|
||||
extern bool CALL_CONV ia32_asm_CAS(volatile intptr_t* location, intptr_t expected, intptr_t new_value);
|
||||
extern bool CALL_CONV ia32_asm_CAS64(volatile i64* location, i64 expected, i64 new_value);
|
||||
// also implements cpu_AtomicAdd, cpu_CAS and cpu_CAS64 from "sysdep/cpu.h"
|
||||
|
||||
/// control87
|
||||
// FPU control word
|
||||
|
@ -27,10 +27,6 @@
|
||||
#include "precompiled.h"
|
||||
#include "lib/sysdep/cpu.h"
|
||||
|
||||
#if ARCH_IA32
|
||||
# include "lib/sysdep/arch/ia32/ia32_asm.h" // ia32_asm_CAS64
|
||||
#endif
|
||||
|
||||
ERROR_ASSOCIATE(ERR::CPU_FEATURE_MISSING, L"This CPU doesn't support a required feature", -1);
|
||||
ERROR_ASSOCIATE(ERR::CPU_UNKNOWN_OPCODE, L"Disassembly failed", -1);
|
||||
ERROR_ASSOCIATE(ERR::CPU_UNKNOWN_VENDOR, L"CPU vendor unknown", -1);
|
||||
@ -51,11 +47,9 @@ cassert(sizeof(void*) == sizeof(intptr_t));
|
||||
|
||||
static void TestCAS64()
|
||||
{
|
||||
#if ARCH_IA32
|
||||
volatile i64 var = 1;
|
||||
ia32_asm_CAS64(&var, 1ull, 2ull);
|
||||
cpu_CAS64(&var, 1ull, 2ull);
|
||||
debug_assert(var == 2ull);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void TestAtomicAdd()
|
||||
|
@ -52,6 +52,16 @@ LIB_API const char* cpu_IdentifierString();
|
||||
//-----------------------------------------------------------------------------
|
||||
// lock-free support routines
|
||||
|
||||
extern "C" { // (assembly-language implementations)
|
||||
|
||||
/**
|
||||
* add a signed value to a variable without the possibility of interference
|
||||
* from other threads/CPUs.
|
||||
*
|
||||
* @return the previous value.
|
||||
**/
|
||||
LIB_API intptr_t cpu_AtomicAdd(volatile intptr_t* location, intptr_t increment);
|
||||
|
||||
/**
|
||||
* atomic "compare and swap".
|
||||
*
|
||||
@ -62,6 +72,9 @@ LIB_API const char* cpu_IdentifierString();
|
||||
* otherwise true (also overwriting the contents of location)
|
||||
**/
|
||||
LIB_API bool cpu_CAS(volatile intptr_t* location, intptr_t expected, intptr_t newValue);
|
||||
LIB_API bool cpu_CAS64(volatile i64* location, i64 expected, i64 newValue);
|
||||
|
||||
} // extern "C"
|
||||
|
||||
/**
|
||||
* specialization of cpu_CAS for pointer types. this avoids error-prone
|
||||
@ -74,14 +87,6 @@ bool cpu_CAS(volatile T* location, T expected, T new_value)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* add a signed value to a variable without the possibility of interference
|
||||
* from other threads/CPUs.
|
||||
*
|
||||
* @return the previous value.
|
||||
**/
|
||||
LIB_API intptr_t cpu_AtomicAdd(volatile intptr_t* location, intptr_t increment);
|
||||
|
||||
LIB_API void cpu_Test();
|
||||
|
||||
/**
|
||||
|
@ -35,6 +35,7 @@
|
||||
|
||||
#include "lib/module_init.h"
|
||||
#include "lib/posix/posix_time.h"
|
||||
# include "lib/sysdep/cpu.h"
|
||||
#if OS_WIN
|
||||
# include "lib/sysdep/os/win/whrt/whrt.h"
|
||||
#endif
|
||||
@ -81,6 +82,25 @@ void timer_LatchStartTime()
|
||||
}
|
||||
|
||||
|
||||
static void EnsureMonotonic(double& t)
|
||||
{
|
||||
//retry:
|
||||
// static i64 t_lastBits; // initially 0.0
|
||||
// memcpy(&t_lastBits, &m_seconds, sizeof(t_lastBits));
|
||||
//
|
||||
// i64 tBits;
|
||||
// memcpy(&tBits, &seconds, sizeof(tBits));
|
||||
//
|
||||
// if(!cpu_CAS64((volatile i64*)&m_seconds, t_lastBits, tBits))
|
||||
// goto retry;
|
||||
//
|
||||
// static double t_last = 0.0;
|
||||
// if(t < t_last)
|
||||
// t = t_last;
|
||||
// t_last = t;
|
||||
}
|
||||
|
||||
|
||||
double timer_Time()
|
||||
{
|
||||
double t;
|
||||
@ -99,12 +119,7 @@ double timer_Time()
|
||||
# error "timer_Time: add timer implementation for this platform!"
|
||||
#endif
|
||||
|
||||
// make sure time is monotonic (never goes backwards)
|
||||
static double t_last = 0.0;
|
||||
if(t < t_last)
|
||||
t = t_last+DBL_EPSILON;
|
||||
t_last = t;
|
||||
|
||||
EnsureMonotonic(t);
|
||||
return t;
|
||||
}
|
||||
|
||||
|
@ -32,9 +32,6 @@
|
||||
#if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
|
||||
# include "lib/sysdep/os_cpu.h" // os_cpu_ClockFrequency
|
||||
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc
|
||||
# if ARCH_IA32
|
||||
# include "lib/sysdep/arch/ia32/ia32_asm.h" // ia32_asm_CAS64
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
@ -181,7 +178,7 @@ public:
|
||||
cpu_AtomicAdd((volatile intptr_t*)&m_cycles, (intptr_t)delta);
|
||||
#elif ARCH_IA32
|
||||
retry:
|
||||
if(!ia32_asm_CAS64(&m_cycles, m_cycles, m_cycles+delta))
|
||||
if(!cpu_CAS64(&m_cycles, m_cycles, m_cycles+delta))
|
||||
goto retry;
|
||||
#else
|
||||
# error "port"
|
||||
@ -238,15 +235,8 @@ retry:
|
||||
i64 newRepresentation;
|
||||
memcpy(&newRepresentation, &seconds, sizeof(newRepresentation));
|
||||
|
||||
#if ARCH_AMD64
|
||||
if(!cpu_CAS((volatile intptr_t*)&m_seconds, oldRepresentation, newRepresentation))
|
||||
if(!cpu_CAS64((volatile i64*)&m_seconds, oldRepresentation, newRepresentation))
|
||||
goto retry;
|
||||
#elif ARCH_IA32
|
||||
if(!ia32_asm_CAS64((volatile i64*)&m_seconds, oldRepresentation, newRepresentation))
|
||||
goto retry;
|
||||
#else
|
||||
# error "port"
|
||||
#endif
|
||||
}
|
||||
|
||||
void Subtract(TimerUnit t)
|
||||
|
Loading…
Reference in New Issue
Block a user