1
0
forked from 0ad/0ad

add work-in-progress WHRT (win high res timer). works well enough, but calibration is not yet complete nor is TSC usable.

cpu: accessor functions ensure what they return is valid. no longer
needs to call wtime_reset_impl (ugh). uses ModuleInitState.
ia32: add APIC ID accessor and prevent redundant init

This was SVN commit r5093.
This commit is contained in:
janwas 2007-05-26 16:57:39 +00:00
parent d478e21f2b
commit c45ef68a88
24 changed files with 1751 additions and 1057 deletions

View File

@ -12,6 +12,7 @@
#include "cpu.h" #include "cpu.h"
#include "lib/bits.h" #include "lib/bits.h"
#include "lib/module_init.h"
#include "lib/posix/posix.h" #include "lib/posix/posix.h"
#if CPU_IA32 #if CPU_IA32
# include "lib/sysdep/ia32/ia32.h" # include "lib/sysdep/ia32/ia32.h"
@ -22,7 +23,6 @@
#endif #endif
#if OS_WIN #if OS_WIN
# include "lib/sysdep/win/wcpu.h" # include "lib/sysdep/win/wcpu.h"
# include "lib/sysdep/win/wposix/wtime_internal.h" // HACK (see call to wtime_reset_impl)
#endif #endif
@ -43,26 +43,36 @@ AT_STARTUP(\
// we thus avoid needing if(already_called) return old_result. // we thus avoid needing if(already_called) return old_result.
// initially set to 'impossible' values to catch uses before cpu_Init. // initially set to 'impossible' values to catch uses before cpu_Init.
static double clock_frequency = -1.0; static double clockFrequency = -1.0;
static bool is_throttling_possible = true;
static size_t page_size = 1;
static size_t memory_total_mib = 1;
double cpu_ClockFrequency()
{
debug_assert(clockFrequency > 0.0);
return clockFrequency;
}
static void DetectClockFrequency() static void DetectClockFrequency()
{ {
#if CPU_IA32 #if CPU_IA32
clock_frequency = ia32_ClockFrequency(); // authoritative, precise clockFrequency = ia32_ClockFrequency(); // authoritative, precise
#endif #endif
} }
static bool isThrottlingPossible = true;
bool cpu_IsThrottlingPossible()
{
debug_assert(clockFrequency > 0.0); // (can't verify isThrottlingPossible directly)
return isThrottlingPossible;
}
static void DetectIfThrottlingPossible() static void DetectIfThrottlingPossible()
{ {
#if CPU_IA32 #if CPU_IA32
if(ia32_IsThrottlingPossible() == 1) if(ia32_IsThrottlingPossible() == 1)
{ {
is_throttling_possible = true; isThrottlingPossible = true;
return; return;
} }
#endif #endif
@ -70,48 +80,40 @@ static void DetectIfThrottlingPossible()
#if OS_WIN #if OS_WIN
if(wcpu_IsThrottlingPossible() == 1) if(wcpu_IsThrottlingPossible() == 1)
{ {
is_throttling_possible = true; isThrottlingPossible = true;
return; return;
} }
#endif #endif
is_throttling_possible = false; isThrottlingPossible = false;
} }
static void DetectMemory() static size_t memoryTotalMib = 1;
{
page_size = (size_t)sysconf(_SC_PAGESIZE);
size_t memory_total = cpu_MemorySize(CPU_MEM_TOTAL);
// account for inaccurate reporting by rounding up (see wposix sysconf)
const size_t memory_total_pow2 = (size_t)round_up_to_pow2((uint)memory_total);
// .. difference too great, just round up to 1 MiB
if(memory_total_pow2 - memory_total > 3*MiB)
memory_total = round_up(memory_total, 1*MiB);
// .. difference acceptable, use next power of two
else
memory_total = memory_total_pow2;
memory_total_mib = memory_total / MiB;
}
double cpu_ClockFrequency()
{
return clock_frequency;
}
bool cpu_IsThrottlingPossible()
{
return is_throttling_possible;
}
size_t cpu_MemoryTotalMiB() size_t cpu_MemoryTotalMiB()
{ {
return memory_total_mib; debug_assert(memoryTotalMib > 1);
return memoryTotalMib;
} }
static void DetectMemory()
{
size_t memoryTotal = cpu_MemorySize(CPU_MEM_TOTAL);
// account for inaccurate reporting by rounding up (see wposix sysconf)
const size_t memoryTotalPow2 = (size_t)round_up_to_pow2((uint)memoryTotal);
// .. difference too great, just round up to 1 MiB
if(memoryTotalPow2 - memoryTotal > 3*MiB)
memoryTotal = round_up(memoryTotal, 1*MiB);
// .. difference acceptable, use next power of two
else
memoryTotal = memoryTotalPow2;
memoryTotalMib = memoryTotal / MiB;
}
const char* cpu_IdentifierString() const char* cpu_IdentifierString()
{ {
#if CPU_IA32 #if CPU_IA32
@ -173,18 +175,15 @@ static void InitAndConfigureIA32()
#endif #endif
// note: can't use ModuleInitState for this because it changes as soon as
// init has *begun*, which isn't what we want.
static bool isDetectFinished = false;
bool cpu_IsDetectFinished() //-----------------------------------------------------------------------------
{
return isDetectFinished;
}
static ModuleInitState initState;
void cpu_Init() void cpu_Init()
{ {
if(!ModuleShouldInitialize(&initState))
return;
#if CPU_IA32 #if CPU_IA32
InitAndConfigureIA32(); InitAndConfigureIA32();
@ -193,19 +192,15 @@ void cpu_Init()
DetectMemory(); DetectMemory();
DetectIfThrottlingPossible(); DetectIfThrottlingPossible();
DetectClockFrequency(); DetectClockFrequency();
}
// must be set before wtime_reset_impl since it queries this flag via
// cpu_IsDetectFinished.
isDetectFinished = true;
// HACK: on Windows, the HRT makes its final implementation choice void cpu_Shutdown()
// in the first calibrate call where CPU info is available. {
// call wtime_reset_impl here to have that happen now so app code isn't if(!ModuleShouldShutdown(&initState))
// surprised by a timer change, although the HRT does try to return;
// keep the timer continuous.
#if OS_WIN // currently nothing to do
wtime_reset_impl();
#endif
} }
@ -311,7 +306,8 @@ size_t cpu_MemorySize(CpuMemoryIndicators mem_type)
// quasi-POSIX // quasi-POSIX
#if defined(_SC_AVPHYS_PAGES) #if defined(_SC_AVPHYS_PAGES)
const int sc_name = SysconfFromMemType(mem_type); const int sc_name = SysconfFromMemType(mem_type);
const size_t memory_size = sysconf(sc_name) * page_size; const size_t pageSize = sysconf(_SC_PAGESIZE);
const size_t memory_size = sysconf(sc_name) * pageSize;
return memory_size; return memory_size;
// BSD / Mac OS X // BSD / Mac OS X
#else #else

View File

@ -21,9 +21,9 @@ namespace ERR
// must be called before any of the below accessors. // must be called before any of the below accessors.
extern void cpu_Init(void); extern void cpu_Init();
extern void cpu_Shutdown();
extern bool cpu_IsDetectFinished();
extern const char* cpu_IdentifierString(); extern const char* cpu_IdentifierString();
extern double cpu_ClockFrequency(); extern double cpu_ClockFrequency();
extern bool cpu_IsThrottlingPossible(); extern bool cpu_IsThrottlingPossible();

View File

@ -1,88 +0,0 @@
/**
* =========================================================================
* File : hpet.cpp
* Project : 0 A.D.
* Description : HPET timer backend
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "hpet.h"
#include "acpi.h"
#include "win/mahaf.h"
#include "lib/bits.h"
#include "lib/module_init.h"
#pragma pack(1)
struct HpetDescriptionTable
{
AcpiTable header;
u32 eventTimerBlockId;
AcpiGenericAddress baseAddress;
u8 sequenceNumber;
u16 minimumPeriodicTicks;
u8 attributes;
};
struct HpetRegisters
{
u64 capabilities;
u64 reserved1;
u64 config;
u64 reserved2;
u64 interruptStatus;
u64 reserved3[25];
u64 counterValue;
u64 reserved4;
// .. followed by blocks for timers 0..31
};
static volatile HpetRegisters* hpetRegisters;
static const u64 CONFIG_ENABLE = BIT64(0);
//-----------------------------------------------------------------------------
static ModuleInitState initState;
bool hpetInit()
{
if(!ModuleShouldInitialize(&initState))
return true;
if(!acpiInit())
return false;
const HpetDescriptionTable* desc = (const HpetDescriptionTable*)acpiGetTable("HPET");
debug_assert(desc->baseAddress.addressSpaceId == ACPI_AS_MEMORY);
hpetRegisters = (volatile HpetRegisters*)MapPhysicalMemory(desc->baseAddress.address, sizeof(HpetRegisters));
if(!hpetRegisters)
return false;
const u32 timerPeriod_fs = bits64(hpetRegisters->capabilities, 32, 63);
const double freq = 1e15 / timerPeriod_fs;
hpetRegisters->config &= ~CONFIG_ENABLE;
hpetRegisters->counterValue = 0ull;
hpetRegisters->config |= CONFIG_ENABLE;
debug_printf("HPET freq=%f counter=%I64d\n", freq, hpetRegisters->counterValue);
return true;
}
void hpetShutdown()
{
if(!ModuleShouldShutdown(&initState))
return;
UnmapPhysicalMemory((void*)hpetRegisters);
acpiShutdown();
}

View File

@ -1,17 +0,0 @@
/**
* =========================================================================
* File : hpet.h
* Project : 0 A.D.
* Description : HPET timer backend
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_HPET
#define INCLUDED_HPET
extern bool hpetInit();
extern void hpetShutdown();
#endif // #ifndef INCLUDED_HPET

View File

@ -19,6 +19,7 @@
#include "lib/posix/posix_pthread.h" #include "lib/posix/posix_pthread.h"
#include "lib/bits.h" #include "lib/bits.h"
#include "lib/timer.h" #include "lib/timer.h"
#include "lib/module_init.h"
#include "lib/sysdep/cpu.h" #include "lib/sysdep/cpu.h"
#if !HAVE_MS_ASM && !HAVE_GNU_ASM #if !HAVE_MS_ASM && !HAVE_GNU_ASM
@ -421,6 +422,16 @@ double ia32_ClockFrequency()
// detect processor types / topology // detect processor types / topology
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
uint ia32_ApicId()
{
u32 regs[4];
if(!ia32_asm_cpuid(1, regs))
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
const uint apicId = bits(regs[EBX], 24, 31);
return apicId;
}
// OSes report hyperthreading units and cores as "processors". we need to // OSes report hyperthreading units and cores as "processors". we need to
// drill down and find out the exact counts (for thread pool dimensioning // drill down and find out the exact counts (for thread pool dimensioning
// and cache sharing considerations). // and cache sharing considerations).
@ -429,23 +440,23 @@ double ia32_ClockFrequency()
static uint CoresPerPackage() static uint CoresPerPackage()
{ {
static uint cores_per_package = 0; static uint coresPerPackage = 0;
if(cores_per_package == 0) if(coresPerPackage == 0)
{ {
u32 regs[4]; u32 regs[4];
if(ia32_asm_cpuid(4, regs)) if(ia32_asm_cpuid(4, regs))
cores_per_package = bits(regs[EAX], 26, 31)+1; coresPerPackage = bits(regs[EAX], 26, 31)+1;
else else
cores_per_package = 1; // single-core coresPerPackage = 1; // single-core
} }
return cores_per_package; return coresPerPackage;
} }
static uint LogicalPerCore() static uint LogicalPerCore()
{ {
static uint logical_per_core = 0; static uint logicalPerCore = 0;
if(logical_per_core == 0) if(logicalPerCore == 0)
{ {
if(ia32_cap(IA32_CAP_HT)) if(ia32_cap(IA32_CAP_HT))
{ {
@ -455,13 +466,13 @@ static uint LogicalPerCore()
const uint logical_per_package = bits(regs[EBX], 16, 23); const uint logical_per_package = bits(regs[EBX], 16, 23);
// cores ought to be uniform WRT # logical processors // cores ought to be uniform WRT # logical processors
debug_assert(logical_per_package % CoresPerPackage() == 0); debug_assert(logical_per_package % CoresPerPackage() == 0);
logical_per_core = logical_per_package / CoresPerPackage(); logicalPerCore = logical_per_package / CoresPerPackage();
} }
else else
logical_per_core = 1; // not Hyperthreading capable logicalPerCore = 1; // not Hyperthreading capable
} }
return logical_per_core; return logicalPerCore;
} }
// the above two functions give the maximum number of cores/logical units. // the above two functions give the maximum number of cores/logical units.
@ -472,9 +483,9 @@ static uint LogicalPerCore()
// determining the exact topology as well as number of packages. // determining the exact topology as well as number of packages.
// these are set by DetectProcessorTopology, called from ia32_Init. // these are set by DetectProcessorTopology, called from ia32_Init.
static uint num_packages = 0; // i.e. sockets; > 1 => true SMP system static uint numPackages = 0; // i.e. sockets; > 1 => true SMP system
static uint enabled_cores_per_package = 0; static uint enabledCoresPerPackage = 0;
static uint enabled_logical_per_core = 0; // hyperthreading units static uint enabledLogicalPerCore = 0; // hyperthreading units
typedef std::vector<u8> Ids; typedef std::vector<u8> Ids;
typedef std::set<u8> IdSet; typedef std::set<u8> IdSet;
@ -482,21 +493,16 @@ typedef std::set<u8> IdSet;
// add the currently running processor's APIC ID to a list of IDs. // add the currently running processor's APIC ID to a list of IDs.
static void StoreApicId(void* param) static void StoreApicId(void* param)
{ {
u32 regs[4]; Ids* apicIds = (Ids*)param;
if(!ia32_asm_cpuid(1, regs)) apicIds->push_back(ia32_ApicId());
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
const uint apic_id = bits(regs[EBX], 24, 31);
Ids* apic_ids = (Ids*)param;
apic_ids->push_back(apic_id);
} }
// field := a range of bits sufficient to represent <num_values> integers. // field := a range of bits sufficient to represent <num_values> integers.
// for each id in apic_ids: extract the value of the field at offset bit_pos // for each id in apicIds: extract the value of the field at offset bit_pos
// and insert it into ids. afterwards, adjust bit_pos to the next field. // and insert it into ids. afterwards, adjust bit_pos to the next field.
// used to gather e.g. all core IDs from all APIC IDs. // used to gather e.g. all core IDs from all APIC IDs.
static void ExtractFieldsIntoSet(const Ids& apic_ids, uint& bit_pos, uint num_values, IdSet& ids) static void ExtractFieldsIntoSet(const Ids& apicIds, uint& bit_pos, uint num_values, IdSet& ids)
{ {
const uint id_bits = ceil_log2(num_values); const uint id_bits = ceil_log2(num_values);
if(id_bits == 0) if(id_bits == 0)
@ -504,9 +510,9 @@ static void ExtractFieldsIntoSet(const Ids& apic_ids, uint& bit_pos, uint num_va
const uint mask = bit_mask(id_bits); const uint mask = bit_mask(id_bits);
for(size_t i = 0; i < apic_ids.size(); i++) for(size_t i = 0; i < apicIds.size(); i++)
{ {
const u8 apic_id = apic_ids[i]; const u8 apic_id = apicIds[i];
const u8 field = (apic_id >> bit_pos) & mask; const u8 field = (apic_id >> bit_pos) & mask;
ids.insert(field); ids.insert(field);
} }
@ -520,27 +526,27 @@ static void ExtractFieldsIntoSet(const Ids& apic_ids, uint& bit_pos, uint num_va
// (scans the APIC IDs, which requires OS support for thread affinity) // (scans the APIC IDs, which requires OS support for thread affinity)
static void DetectProcessorTopology() static void DetectProcessorTopology()
{ {
Ids apic_ids; Ids apicIds;
if(cpu_CallByEachCPU(StoreApicId, &apic_ids) != INFO::OK) if(cpu_CallByEachCPU(StoreApicId, &apicIds) != INFO::OK)
return; return;
// .. if they're not unique, cpu_CallByEachCPU is broken. // .. if they're not unique, cpu_CallByEachCPU is broken.
std::sort(apic_ids.begin(), apic_ids.end()); std::sort(apicIds.begin(), apicIds.end());
debug_assert(std::unique(apic_ids.begin(), apic_ids.end()) == apic_ids.end()); debug_assert(std::unique(apicIds.begin(), apicIds.end()) == apicIds.end());
// extract values from all 3 ID bitfields into separate sets // extract values from all 3 ID bitfields into separate sets
uint bit_pos = 0; uint bit_pos = 0;
IdSet logical_ids; IdSet logical_ids;
ExtractFieldsIntoSet(apic_ids, bit_pos, LogicalPerCore(), logical_ids); ExtractFieldsIntoSet(apicIds, bit_pos, LogicalPerCore(), logical_ids);
IdSet core_ids; IdSet core_ids;
ExtractFieldsIntoSet(apic_ids, bit_pos, CoresPerPackage(), core_ids); ExtractFieldsIntoSet(apicIds, bit_pos, CoresPerPackage(), core_ids);
IdSet package_ids; IdSet package_ids;
ExtractFieldsIntoSet(apic_ids, bit_pos, 0xFF, package_ids); ExtractFieldsIntoSet(apicIds, bit_pos, 0xFF, package_ids);
// (the set cardinality is representative of all packages/cores since // (the set cardinality is representative of all packages/cores since
// they are uniform.) // they are uniform.)
num_packages = std::max((uint)package_ids.size(), 1u); numPackages = std::max((uint)package_ids.size(), 1u);
enabled_cores_per_package = std::max((uint)core_ids .size(), 1u); enabledCoresPerPackage = std::max((uint)core_ids .size(), 1u);
enabled_logical_per_core = std::max((uint)logical_ids.size(), 1u); enabledLogicalPerCore = std::max((uint)logical_ids.size(), 1u);
// note: even though APIC IDs are assigned sequentially, we can't make any // note: even though APIC IDs are assigned sequentially, we can't make any
// assumptions about the values/ordering because we get them according to // assumptions about the values/ordering because we get them according to
@ -551,25 +557,25 @@ static void DetectProcessorTopology()
uint ia32_NumPackages() uint ia32_NumPackages()
{ {
#ifndef NDEBUG #ifndef NDEBUG
debug_assert(num_packages != 0); debug_assert(numPackages != 0);
#endif #endif
return (uint)num_packages; return (uint)numPackages;
} }
uint ia32_CoresPerPackage() uint ia32_CoresPerPackage()
{ {
#ifndef NDEBUG #ifndef NDEBUG
debug_assert(enabled_cores_per_package != 0); debug_assert(enabledCoresPerPackage != 0);
#endif #endif
return (uint)enabled_cores_per_package; return (uint)enabledCoresPerPackage;
} }
uint ia32_LogicalPerCore() uint ia32_LogicalPerCore()
{ {
#ifndef NDEBUG #ifndef NDEBUG
debug_assert(enabled_logical_per_core != 0); debug_assert(enabledLogicalPerCore != 0);
#endif #endif
return (uint)enabled_logical_per_core; return (uint)enabledLogicalPerCore;
} }
@ -641,8 +647,13 @@ LibError ia32_GetCallTarget(void* ret_addr, void** target)
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
static ModuleInitState initState;
void ia32_Init() void ia32_Init()
{ {
if(!ModuleShouldInitialize(&initState))
return;
ia32_asm_cpuid_init(); ia32_asm_cpuid_init();
ia32_cap_init(); ia32_cap_init();
@ -651,3 +662,12 @@ void ia32_Init()
DetectProcessorTopology(); DetectProcessorTopology();
} }
void ia32_Shutdown()
{
if(!ModuleShouldShutdown(&initState))
return;
// nothing to do
}

View File

@ -19,46 +19,10 @@
#include "ia32_memcpy.h" #include "ia32_memcpy.h"
/** /**
* must be called exactly once before any of the following functions. * must be called before any of the following functions.
**/ **/
extern void ia32_Init(); extern void ia32_Init();
extern void ia32_Shutdown();
/// fpclassify return values
#define IA32_FP_NAN 0x0100
#define IA32_FP_NORMAL 0x0400
#define IA32_FP_INFINITE (IA32_FP_NAN | IA32_FP_NORMAL)
#define IA32_FP_ZERO 0x4000
#define IA32_FP_SUBNORMAL (IA32_FP_NORMAL | IA32_FP_ZERO)
// FPU control word (for ia32_asm_control87)
// .. Precision Control:
#define IA32_MCW_PC 0x0300
#define IA32_PC_24 0x0000
// .. Rounding Control:
#define IA32_MCW_RC 0x0C00
#define IA32_RC_NEAR 0x0000
#define IA32_RC_DOWN 0x0400
#define IA32_RC_UP 0x0800
#define IA32_RC_CHOP 0x0C00
// .. Exception Mask:
#define IA32_MCW_EM 0x003f
#define IA32_EM_INVALID BIT(0)
#define IA32_EM_DENORMAL BIT(1)
#define IA32_EM_ZERODIVIDE BIT(2)
#define IA32_EM_OVERFLOW BIT(3)
#define IA32_EM_UNDERFLOW BIT(4)
#define IA32_EM_INEXACT BIT(5)
/**
* order in which ia32_asm_cpuid stores register values
**/
enum IA32Regs
{
EAX,
EBX,
ECX,
EDX
};
/** /**
* bit indices of CPU capability flags (128 bits). * bit indices of CPU capability flags (128 bits).
@ -93,39 +57,6 @@ enum IA32Cap
**/ **/
extern bool ia32_cap(IA32Cap cap); extern bool ia32_cap(IA32Cap cap);
/**
* check if there is an IA-32 CALL instruction right before ret_addr.
* @return INFO::OK if so and ERR::FAIL if not.
*
* also attempts to determine the call target. if that is possible
* (directly addressed relative or indirect jumps), it is stored in
* target, which is otherwise 0.
*
* this function is used for walking the call stack.
**/
extern LibError ia32_GetCallTarget(void* ret_addr, void** target);
/// safe but slow inline-asm version
extern u64 ia32_rdtsc_safe(void);
/**
* @return the current value of the TimeStampCounter (a counter of
* CPU cycles since power-on, which is useful for high-resolution timing
* but potentially differs between multiple CPUs)
**/
extern u64 ia32_rdtsc(); // only for CppDoc's benefit
#if CONFIG_RETURN64_EDX_EAX
# define ia32_rdtsc ia32_asm_rdtsc_edx_eax
#else
# define ia32_rdtsc ia32_rdtsc_safe
#endif
/**
* trigger a breakpoint inside this function when it is called.
**/
extern void ia32_DebugBreak(void);
// CPU detection // CPU detection
@ -165,6 +96,49 @@ extern uint ia32_CoresPerPackage();
extern uint ia32_LogicalPerCore(); extern uint ia32_LogicalPerCore();
//-----------------------------------------------------------------------------
// stateless
/**
* @return APIC ID of the currently executing processor
**/
extern uint ia32_ApicId();
/**
* check if there is an IA-32 CALL instruction right before ret_addr.
* @return INFO::OK if so and ERR::FAIL if not.
*
* also attempts to determine the call target. if that is possible
* (directly addressed relative or indirect jumps), it is stored in
* target, which is otherwise 0.
*
* this function is used for walking the call stack.
**/
extern LibError ia32_GetCallTarget(void* ret_addr, void** target);
/// safe but slow inline-asm version
extern u64 ia32_rdtsc_safe(void);
/**
* @return the current value of the TimeStampCounter (a counter of
* CPU cycles since power-on, which is useful for high-resolution timing
* but potentially differs between multiple CPUs)
**/
extern u64 ia32_rdtsc(); // only for CppDoc's benefit
#if CONFIG_RETURN64_EDX_EAX
# define ia32_rdtsc ia32_asm_rdtsc_edx_eax
#else
# define ia32_rdtsc ia32_rdtsc_safe
#endif
/**
* trigger a breakpoint inside this function when it is called.
**/
extern void ia32_DebugBreak(void);
// implementations of the cpu.h interface // implementations of the cpu.h interface
/// see cpu_MemoryFence /// see cpu_MemoryFence
@ -173,4 +147,31 @@ extern void ia32_MemoryFence();
// see cpu_Serialize // see cpu_Serialize
extern void ia32_Serialize(); extern void ia32_Serialize();
/// fpclassify return values
#define IA32_FP_NAN 0x0100
#define IA32_FP_NORMAL 0x0400
#define IA32_FP_INFINITE (IA32_FP_NAN | IA32_FP_NORMAL)
#define IA32_FP_ZERO 0x4000
#define IA32_FP_SUBNORMAL (IA32_FP_NORMAL | IA32_FP_ZERO)
// FPU control word (for ia32_asm_control87)
// .. Precision Control:
#define IA32_MCW_PC 0x0300
#define IA32_PC_24 0x0000
// .. Rounding Control:
#define IA32_MCW_RC 0x0C00
#define IA32_RC_NEAR 0x0000
#define IA32_RC_DOWN 0x0400
#define IA32_RC_UP 0x0800
#define IA32_RC_CHOP 0x0C00
// .. Exception Mask:
#define IA32_MCW_EM 0x003f
#define IA32_EM_INVALID BIT(0)
#define IA32_EM_DENORMAL BIT(1)
#define IA32_EM_ZERODIVIDE BIT(2)
#define IA32_EM_OVERFLOW BIT(3)
#define IA32_EM_UNDERFLOW BIT(4)
#define IA32_EM_INEXACT BIT(5)
#endif // #ifndef INCLUDED_IA32 #endif // #ifndef INCLUDED_IA32

View File

@ -21,6 +21,17 @@ extern "C" {
**/ **/
extern void ia32_asm_cpuid_init(); extern void ia32_asm_cpuid_init();
/**
* order in which ia32_asm_cpuid stores register values
**/
enum IA32Regs
{
EAX,
EBX,
ECX,
EDX
};
/** /**
* try to call the specified CPUID sub-function. * try to call the specified CPUID sub-function.
* (note: ECX is set to 0 beforehand as required by sub-function 4) * (note: ECX is set to 0 beforehand as required by sub-function 4)

View File

@ -0,0 +1,127 @@
/**
* =========================================================================
* File : hpet.cpp
* Project : 0 A.D.
* Description : Timer implementation using timeGetTime
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "hpet.h"
#include "lib/sysdep/win/win.h"
#include "lib/sysdep/win/mahaf.h"
#include "lib/sysdep/acpi.h"
#include "lib/bits.h"
#pragma pack(push, 1)
struct HpetDescriptionTable
{
AcpiTable header;
u32 eventTimerBlockId;
AcpiGenericAddress baseAddress;
u8 sequenceNumber;
u16 minimumPeriodicTicks;
u8 attributes;
};
struct TickSourceHpet::HpetRegisters
{
u64 capabilities;
u64 reserved1;
u64 config;
u64 reserved2;
u64 interruptStatus;
u64 reserved3[25];
u64 counterValue;
u64 reserved4;
// .. followed by blocks for timers 0..31
};
#pragma pack(pop)
static const u64 CAP_SIZE64 = BIT64(13);
static const u64 CONFIG_ENABLE = BIT64(0);
//-----------------------------------------------------------------------------
TickSourceHpet::TickSourceHpet()
{
// (no need to check return value - valid hpet implies success)
(void)mahaf_Init();
(void)acpi_Init();
const HpetDescriptionTable* hpet = (const HpetDescriptionTable*)acpi_GetTable("HPET");
if(!hpet)
throw TickSourceUnavailable("HPET: no ACPI table");
debug_assert(hpet->baseAddress.addressSpaceId == ACPI_AS_MEMORY);
m_hpetRegisters = (volatile HpetRegisters*)mahaf_MapPhysicalMemory(hpet->baseAddress.address, sizeof(HpetRegisters));
if(!m_hpetRegisters)
throw TickSourceUnavailable("HPET: map failed");
// get counter parameters
const u64 caps = m_hpetRegisters->capabilities;
const u32 timerPeriod_fs = bits64(caps, 32, 63);
m_frequency = 1e15 / timerPeriod_fs;
m_counterBits = (caps & CAP_SIZE64)? 64 : 32;
// start the counter (if not already running)
// note: do not reset value to 0 to avoid interfering with any
// other users of the timer (e.g. Vista QPC)
m_hpetRegisters->config |= CONFIG_ENABLE;
}
TickSourceHpet::~TickSourceHpet()
{
mahaf_UnmapPhysicalMemory((void*)m_hpetRegisters);
mahaf_Shutdown();
acpi_Shutdown();
}
bool TickSourceHpet::IsSafe() const
{
return false;
// the HPET being created to address other timers' problems, it has
// no issues of its own.
return true;
}
u64 TickSourceHpet::Ticks() const
{
u64 ticks = m_hpetRegisters->counterValue;
// note: the spec allows 32 or 64 bit counters. given the typical
// frequency of 14.318 MHz, worst case is rollover within 300 s. this is
// obviously not long enough to never happen more than once, so it must
// be handled; there is no benefit in using all 64 bits where available.
//
// note that limiting ourselves to 32 bits also avoids the potential
// headache of non-atomic bus reads.
ticks &= 0xFFFFFFFFu;
return ticks;
}
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
uint TickSourceHpet::CounterBits() const
{
return 32;
}
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
double TickSourceHpet::NominalFrequency() const
{
return m_frequency;
}

View File

@ -0,0 +1,51 @@
/**
* =========================================================================
* File : hpet.h
* Project : 0 A.D.
* Description : Timer implementation using timeGetTime
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_HPET
#define INCLUDED_HPET
#include "tick_source.h"
class TickSourceHpet : public TickSource
{
public:
TickSourceHpet();
virtual ~TickSourceHpet();
virtual const char* Name() const
{
return "HPET";
}
virtual bool IsSafe() const;
virtual u64 Ticks() const;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual uint CounterBits() const;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const;
private:
double m_frequency;
struct HpetRegisters;
volatile HpetRegisters* m_hpetRegisters;
uint m_counterBits;
};
#endif // #ifndef INCLUDED_HPET

View File

@ -0,0 +1,25 @@
/**
* =========================================================================
* File : pit.h
* Project : 0 A.D.
* Description : Timer implementation using 82C53/4 PIT
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_PIT
#define INCLUDED_PIT
// note: we don't access the PIT for two reasons:
// - it rolls over every 55 ms (1.193 MHz, 16 bit) and would have to be
// read at least that often, which would require setting high thread
// priority (dangerous).
// - reading it is slow and cannot be done by two independent users
// (the second being QPC) since the counter value must be latched.
//
// there are enough other tick sources anway.
static const i64 PIT_FREQ = 1193182; // (= master oscillator frequency/12)
#endif // #ifndef INCLUDED_PIT

View File

@ -0,0 +1,90 @@
/**
* =========================================================================
* File : pmt.cpp
* Project : 0 A.D.
* Description : Timer implementation using ACPI PM timer
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "pmt.h"
#include "lib/sysdep/win/win.h"
#include "lib/sysdep/acpi.h"
#include "lib/sysdep/win/mahaf.h"
#include "lib/bits.h"
#pragma pack(push,1)
struct FADT
{
AcpiTable header;
u8 unused[40];
u32 pmTimerPortAddress;
u8 unused2[32];
u32 flags;
};
#pragma pack(pop)
static const u32 TMR_VAL_EXT = BIT(8);
TickSourcePmt::TickSourcePmt()
{
// (no need to check return value - valid fadt implies success)
(void)mahaf_Init();
(void)acpi_Init();
const FADT* fadt = (const FADT*)acpi_GetTable("FADT");
if(!fadt)
throw TickSourceUnavailable("PMT: no FADT");
m_portAddress = u16_from_larger(fadt->pmTimerPortAddress);
m_counterBits = (fadt->flags & TMR_VAL_EXT)? 32 : 24;
}
TickSourcePmt::~TickSourcePmt()
{
acpi_Shutdown();
mahaf_Shutdown();
}
bool TickSourcePmt::IsSafe() const
{
return false;
// the PMT has one issue: "Performance counter value may unexpectedly
// leap forward" (Q274323). This happens on some buggy Pentium-era
// systems under heavy PCI bus load. We are clever and observe that
// the TSC implementation would be used on such systems (because it
// has higher precedence and is safe on P5 CPUs), so the PMT is fine
// in general.
return true;
}
u64 TickSourcePmt::Ticks() const
{
u32 ticks = mahaf_ReadPort32(m_portAddress);
// note: the spec allows 24 or 32 bit counters. given the fixed
// frequency of 3.57 MHz, worst case is rollover within 4.6 s. this is
// obviously not long enough to never happen more than once, so it must
// be handled; there is no benefit in using all 32 bits where available.
ticks &= 0xFFFFFFu;
return (u64)ticks;
}
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
uint TickSourcePmt::CounterBits() const
{
return m_counterBits;
}
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
double TickSourcePmt::NominalFrequency() const
{
return (double)PMT_FREQ;
}

View File

@ -0,0 +1,50 @@
/**
* =========================================================================
* File : pmt.h
* Project : 0 A.D.
* Description : Timer implementation using ACPI PM timer
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_PMT
#define INCLUDED_PMT
#include "tick_source.h"
static const i64 PMT_FREQ = 3579545; // (= master oscillator frequency/4)
class TickSourcePmt : public TickSource
{
public:
TickSourcePmt();
virtual ~TickSourcePmt();
virtual const char* Name() const
{
return "PMT";
}
virtual bool IsSafe() const;
virtual u64 Ticks() const;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual uint CounterBits() const;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const;
private:
u16 m_portAddress;
uint m_counterBits;
};
#endif // #ifndef INCLUDED_PMT

View File

@ -0,0 +1,129 @@
/**
* =========================================================================
* File : qpc.cpp
* Project : 0 A.D.
* Description : Timer implementation using QueryPerformanceCounter
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "qpc.h"
#include "lib/sysdep/win/win.h"
#include "lib/sysdep/win/wcpu.h"
#include "pit.h" // PIT_FREQ
#include "pmt.h" // PMT_FREQ
TickSourceQpc::TickSourceQpc()
{
// note: QPC is observed to be universally supported, but the API
// provides for failure, so play it safe.
LARGE_INTEGER qpcFreq, qpcValue;
const BOOL ok1 = QueryPerformanceFrequency(&qpcFreq);
const BOOL ok2 = QueryPerformanceCounter(&qpcValue);
if(!ok1 || !ok2 || !qpcFreq.QuadPart || !qpcValue.QuadPart)
throw TickSourceUnavailable("QPC not supported?!");
m_frequency = (i64)qpcFreq.QuadPart;
}
TickSourceQpc::~TickSourceQpc()
{
}
bool TickSourceQpc::IsSafe() const
{
// the PIT is entirely safe (even if annoyingly slow to read)
if(m_frequency == PIT_FREQ)
return true;
// note: we have separate modules that directly access some of the
// tick sources potentially used by QPC. marking them or QPC unsafe is
// risky because users can override either of those decisions.
// directly disabling them is ugly (increased coupling).
// instead, we'll make sure our implementations can coexist with QPC and
// verify the secondary reference timer has a different frequency.
// the PMT is safe (see discussion in TickSourcePmt::IsSafe);
if(m_frequency == PIT_FREQ)
return true;
// two other implementations have been observed: HPET
// (on Vista) and RDTSC (on MP HAL).
//
// - the HPET is reliable but can't easily be recognized since its
// frequency is variable (the spec says > 10 MHz; the master 14.318 MHz
// oscillator is often used). note: considering frequencies between
// 10..100 MHz to be a HPET would be dangerous because it may actually
// be faster or RDTSC slower.
//
// - the TSC implementation has been known to be buggy (even mentioned
// in MSDN) and we don't know which systems have been patched. it is
// therefore considered unsafe and recognized by comparing frequency
// against the CPU clock.
const double cpuClockFrequency = wcpu_ClockFrequency();
// failed for some reason => can't tell if RDTSC is being used
// => assume unsafe
if(cpuClockFrequency == 0.0)
return false;
// QPC frequency matches the CPU clock => it uses RDTSC => unsafe.
if(IsSimilarMagnitude(m_frequency, cpuClockFrequency))
return false;
// unconfirmed reports indicate QPC sometimes uses 1/3 of the
// CPU clock frequency, so check that as well.
if(IsSimilarMagnitude(m_frequency, cpuClockFrequency/3))
return false;
// otherwise: it's apparently using the HPET => safe.
return true;
}
u64 TickSourceQpc::Ticks() const
{
// fairly time-critical here, don't check the return value
// (IsSupported made sure it succeeded initially)
LARGE_INTEGER qpc_value;
(void)QueryPerformanceCounter(&qpc_value);
return qpc_value.QuadPart;
}
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
uint TickSourceQpc::CounterBits() const
{
// note: the PMT is either 24 or 32 bits; older QPC implementations
// apparently had troubles with rollover.
// "System clock problem can inflate benchmark scores"
// (http://www.lionbridge.com/bi/cont2000/200012/perfcnt.asp ; no longer
// online, nor findable in Google Cache / archive.org) reports
// incorrect values every 4.6 seconds unless the timer is polled in
// the meantime. the given timeframe corresponds to 24 bits @ 3.57 MHz.
//
// we will therefore return the worst case value of 24 when using PMT
// (don't bother checking if it's 32-bit because there's no harm in
// ignoring the upper bits since we read it often enough)
if(m_frequency == PMT_FREQ)
return 24;
// no reports of trouble with the other implementations have surfaced,
// so we'll assume Windows correctly handles rollover and that we
// have the full 64 bits.
return 64;
}
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
double TickSourceQpc::NominalFrequency() const
{
return (double)m_frequency;
}

View File

@ -0,0 +1,49 @@
/**
* =========================================================================
* File : qpc.h
* Project : 0 A.D.
* Description : Timer implementation using QueryPerformanceCounter
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_QPC
#define INCLUDED_QPC
#include "tick_source.h"
class TickSourceQpc : public TickSource
{
public:
TickSourceQpc();
virtual ~TickSourceQpc();
virtual const char* Name() const
{
return "QPC";
}
virtual bool IsSafe() const;
virtual u64 Ticks() const;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual uint CounterBits() const;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const;
private:
// cached because QPF is a bit slow.
// (i64 allows easier conversion to double)
i64 m_frequency;
};
#endif // #ifndef INCLUDED_QPC

View File

@ -0,0 +1,83 @@
/**
* =========================================================================
* File : tgt.cpp
* Project : 0 A.D.
* Description : Timer implementation using timeGetTime
* =========================================================================
*/
// license: GPL; see lib/license.txt
// note: WinMM is delay-loaded to avoid dragging it in when this timer
// implementation isn't used. (this is relevant because its startup is
// fairly slow)
#include "precompiled.h"
#include "tgt.h"
#include "lib/sysdep/win/win.h"
#include <mmsystem.h>
#if MSC_VERSION
#pragma comment(lib, "winmm.lib")
#endif
// "Guidelines For Providing Multimedia Timer Support" claims that
// speeding the timer up to 2 ms has little impact, while 1 ms
// causes significant slowdown.
static const UINT PERIOD_MS = 2;
TickSourceTgt::TickSourceTgt()
{
// note: timeGetTime is always available and cannot fail.
MMRESULT ret = timeBeginPeriod(PERIOD_MS);
debug_assert(ret == TIMERR_NOERROR);
}
TickSourceTgt::~TickSourceTgt()
{
timeEndPeriod(PERIOD_MS);
}
bool TickSourceTgt::IsSafe() const
{
// the only point of criticism is the possibility of falling behind
// due to lost interrupts. this can happen to any interrupt-based timer
// and some systems may lack a counter-based timer, so consider TGT
// 'safe'. note that it is still only chosen when all other timers fail.
return true;
}
u64 TickSourceTgt::Ticks() const
{
return timeGetTime();
}
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
uint TickSourceTgt::CounterBits() const
{
return 32;
}
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
double TickSourceTgt::NominalFrequency() const
{
return 1000.0;
}
/**
* actual resolution [s]
**/
double TickSourceTgt::Resolution() const
{
return PERIOD_MS*1e-3;
}

View File

@ -0,0 +1,49 @@
/**
* =========================================================================
* File : tgt.h
* Project : 0 A.D.
* Description : Timer implementation using timeGetTime
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_TGT
#define INCLUDED_TGT
#include "tick_source.h"
class TickSourceTgt : public TickSource
{
public:
TickSourceTgt();
virtual ~TickSourceTgt();
virtual const char* Name() const
{
return "TGT";
}
virtual bool IsSafe() const;
virtual u64 Ticks() const;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual uint CounterBits() const;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const;
/**
* actual resolution [s]
**/
virtual double Resolution() const;
};
#endif // #ifndef INCLUDED_TGT

View File

@ -0,0 +1,57 @@
/**
* =========================================================================
* File : tick_source.h
* Project : 0 A.D.
* Description : Interface for timer implementations
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_TICK_SOURCE
#define INCLUDED_TICK_SOURCE
class TickSourceUnavailable : public std::runtime_error
{
public:
TickSourceUnavailable(const std::string& msg)
: std::runtime_error(msg)
{
}
};
class TickSource
{
public:
TickSource() {}
virtual ~TickSource() {}
virtual bool IsSafe() const = 0;
virtual const char* Name() const = 0;
virtual u64 Ticks() const = 0;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual uint CounterBits() const = 0;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const = 0;
/**
* actual resolution [s]
* (override if the timer adjustment is greater than 1 tick).
**/
virtual double Resolution() const
{
return 1.0 / NominalFrequency();
}
};
#endif // #ifndef INCLUDED_TICK_SOURCE

View File

@ -0,0 +1,184 @@
/**
* =========================================================================
* File : tsc.cpp
* Project : 0 A.D.
* Description : Timer implementation using RDTSC
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "tsc.h"
#include "lib/sysdep/win/win.h"
#include "lib/sysdep/win/wcpu.h"
#include "lib/sysdep/ia32/ia32.h"
#include "lib/sysdep/cpu.h" // cpu_CAS
//-----------------------------------------------------------------------------
// per-CPU state
// necessary because CPUs are initialized one-by-one and the TSC values
// differ significantly. (while at it, we also keep per-CPU frequency values
// in case the clocks aren't exactly synced)
//
// note: only reading the TSC from one CPU (possible via thread affinity)
// would work but take much longer (context switch).
struct PerCpuTscState
{
u64 calibrationTicks;
double calibrationTime;
// mark this struct used just in case cpu_CallByEachCPU doesn't ensure
// only one thread is running. a flag is safer than a magic APIC ID value.
uintptr_t isInitialized;
uint apicId;
float observedFrequency;
};
static const size_t MAX_CPUS = 32; // Win32 also imposes this limit
static PerCpuTscState cpuTscStates[MAX_CPUS];
static PerCpuTscState& NextUnusedPerCpuTscState()
{
for(size_t i = 0; i < MAX_CPUS; i++)
{
PerCpuTscState& cpuTscState = cpuTscStates[i];
if(cpu_CAS(&cpuTscState.isInitialized, 0, 1))
return cpuTscState;
}
throw std::runtime_error("allocated too many PerCpuTscState");
}
static PerCpuTscState& CurrentCpuTscState()
{
const uint apicId = ia32_ApicId();
for(size_t i = 0; i < MAX_CPUS; i++)
{
PerCpuTscState& cpuTscState = cpuTscStates[i];
if(cpuTscState.isInitialized && cpuTscState.apicId == apicId)
return cpuTscState;
}
throw std::runtime_error("no matching PerCpuTscState found");
}
static void InitPerCpuTscState(void* param) // callback
{
const double cpuClockFrequency = *(double*)param;
PerCpuTscState& cpuTscState = NextUnusedPerCpuTscState();
cpuTscState.apicId = ia32_ApicId();
cpuTscState.calibrationTicks = ia32_rdtsc();
cpuTscState.calibrationTime = 0.0;
cpuTscState.observedFrequency = cpuClockFrequency;
}
static LibError InitPerCpuTscStates(double cpuClockFrequency)
{
LibError ret = cpu_CallByEachCPU(InitPerCpuTscState, &cpuClockFrequency);
CHECK_ERR(ret);
return INFO::OK;
}
//-----------------------------------------------------------------------------
// note: calibration is necessary due to long-term thermal drift
// (oscillator is usually poor quality) and inaccurate initial measurement.
//-----------------------------------------------------------------------------
TickSourceTsc::TickSourceTsc()
{
if(!ia32_cap(IA32_CAP_TSC))
throw TickSourceUnavailable("TSC: unsupported");
if(InitPerCpuTscStates(wcpu_ClockFrequency()) != INFO::OK)
throw TickSourceUnavailable("TSC: per-CPU init failed");
}
TickSourceTsc::~TickSourceTsc()
{
}
bool TickSourceTsc::IsSafe() const
{
return false;
u32 regs[4];
if(ia32_asm_cpuid(0x80000007, regs))
{
// if(regs[EDX] & POWERNOW_FREQ_ID_CTRL)
}
/*
AMD has defined a CPUID feature bit that
software can test to determine if the TSC is
invariant. Issuing a CPUID instruction with an %eax register
value of 0x8000_0007, on a processor whose base family is
0xF, returns "Advanced Power Management Information" in the
%eax, %ebx, %ecx, and %edx registers. Bit 8 of the return
%edx is the "TscInvariant" feature flag which is set when
TSC is P-state, C-state, and STPCLK-throttling invariant; it
is clear otherwise.
*/
#if 0
if (CPUID.base_family < 0xf) {
// TSC drift doesn't exist on 7th Gen or less
// However, OS still needs to consider effects
// of P-state changes on TSC
return TRUE;
} else if (CPUID.AdvPowerMgmtInfo.TscInvariant) {
// Invariant TSC on 8th Gen or newer, use it
// (assume all cores have invariant TSC)
return TRUE;
// - deep sleep modes: TSC may not be advanced.
// not a problem though, because if the TSC is disabled, the CPU
// isn't doing any other work, either.
// - SpeedStep/'gearshift' CPUs: frequency may change.
// this happens on notebooks now, but eventually desktop systems
// will do this as well (if not to save power, for heat reasons).
// frequency changes are too often and drastic to correct,
// and we don't want to mess with the system power settings => unsafe.
if(cpu_IsThrottlingPossible() == 0)
return true;
/* But TSC doesn't tick in C3 so don't use it there */
957 if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 1000)
958 return 1;
#endif
return false;
}
u64 TickSourceTsc::Ticks() const
{
return ia32_rdtsc();
}
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
uint TickSourceTsc::CounterBits() const
{
return 64;
}
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
double TickSourceTsc::NominalFrequency() const
{
return wcpu_ClockFrequency();
}

View File

@ -0,0 +1,46 @@
/**
* =========================================================================
* File : tsc.h
* Project : 0 A.D.
* Description : Timer implementation using RDTSC
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_TSC
#define INCLUDED_TSC
#include "tick_source.h"
class TickSourceTsc : public TickSource
{
public:
TickSourceTsc();
~TickSourceTsc();
virtual const char* Name() const
{
return "TSC";
}
virtual bool IsSafe() const;
virtual u64 Ticks() const;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual uint CounterBits() const;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const;
private:
};
#endif // #ifndef INCLUDED_TSC

View File

@ -0,0 +1,470 @@
/**
* =========================================================================
* File : whrt.cpp
* Project : 0 A.D.
* Description : Windows High Resolution Timer
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "whrt.h"
#include <process.h> // _beginthreadex
#include "lib/sysdep/win/win.h"
#include "lib/sysdep/win/winit.h"
#include "lib/sysdep/win/wcpu.h"
#include "lib/adts.h"
#include "lib/bits.h"
#include "hpet.h"
#include "pmt.h"
#include "qpc.h"
#include "tgt.h"
#include "tsc.h"
#pragma SECTION_PRE_LIBC(L) // dependencies: wposix
WIN_REGISTER_FUNC(whrt_Init);
#pragma FORCE_INCLUDE(whrt_Init)
#pragma SECTION_POST_ATEXIT(D)
WIN_REGISTER_FUNC(whrt_Shutdown);
#pragma FORCE_INCLUDE(whrt_Shutdown)
#pragma SECTION_RESTORE
// see http://www.gamedev.net/reference/programming/features/timing/ .
static bool IsTickSourceEstablished();
static int RolloversPerCalibrationInterval(double frequency, uint counterBits);
//-----------------------------------------------------------------------------
// safety recommendation / override
// while we do our best to work around timer problems or avoid them if unsafe,
// future requirements and problems may be different. allow the user or app
// override TickSource::IsSafe decisions.
cassert(WHRT_DEFAULT == 0); // ensure 0 is the correct initializer
static WhrtOverride overrides[WHRT_NUM_TICK_SOURCES]; // indexed by WhrtTickSourceId
void whrt_OverrideRecommendation(WhrtTickSourceId id, WhrtOverride override)
{
// calling this function only makes sense when tick source hasn't
// been chosen yet
debug_assert(!IsTickSourceEstablished());
debug_assert(id < WHRT_NUM_TICK_SOURCES);
overrides[id] = override;
}
static bool IsSafe(const TickSource* tickSource, WhrtTickSourceId id)
{
debug_assert(id < WHRT_NUM_TICK_SOURCES);
if(overrides[id] == WHRT_DISABLE)
return false;
if(overrides[id] == WHRT_FORCE)
return true;
return tickSource->IsSafe();
}
//-----------------------------------------------------------------------------
// manage tick sources
// use static array to avoid allocations (max #implementations is known)
static TickSource* tickSources[WHRT_NUM_TICK_SOURCES];
static uint nextTickSourceId = 0;
// factory
static TickSource* CreateTickSource(WhrtTickSourceId id)
{
switch(id)
{
case WHRT_TSC:
return new TickSourceTsc();
case WHRT_QPC:
return new TickSourceQpc();
case WHRT_HPET:
return new TickSourceHpet();
case WHRT_PMT:
return new TickSourcePmt();
case WHRT_TGT:
return new TickSourceTgt();
NODEFAULT;
}
}
/**
* @return the newly created and unique instance of the next tick source,
* or 0 if all have already been created.
*
* notes:
* - stores the tick source in tickSources[] with index = id.
* - don't always create all tick sources - some require 'lengthy' init.
**/
static TickSource* CreateNextBestTickSource()
{
for(;;)
{
if(nextTickSourceId == WHRT_NUM_TICK_SOURCES)
return 0;
WhrtTickSourceId id = (WhrtTickSourceId)nextTickSourceId++;
try
{
TickSource* tickSource = CreateTickSource(id);
debug_printf("HRT/ create id=%d name=%s freq=%f\n", id, tickSource->Name(), tickSource->NominalFrequency());
tickSources[id] = tickSource;
return tickSource;
}
catch(TickSourceUnavailable& e)
{
debug_printf("HRT/ create id=%d failed: %s\n", id, e.what());
}
}
}
static bool IsTickSourceAcceptable(TickSource* tickSource, WhrtTickSourceId id, TickSource* undesiredTickSource = 0)
{
// not (yet|successfully) created
if(!tickSource)
return false;
// it's the one we don't want (typically the primary source)
if(tickSource == undesiredTickSource)
return false;
// unsafe
if(!IsSafe(tickSource, id))
return false;
// duplicate source (i.e. frequency matches that of another)
for(uint id = 0; ; id++)
{
TickSource* tickSource2 = tickSources[id];
// not (yet|successfully) created
if(!tickSource2)
continue;
// if there are two sources with the same frequency, the one with
// higher precedence (lower ID) should be taken, so stop when we
// reach tickSource's ID.
if(tickSource == tickSource2)
break;
if(IsSimilarMagnitude(tickSource->NominalFrequency(), tickSource2->NominalFrequency()))
return false;
}
return true;
}
static TickSource* DetermineBestSafeTickSource(TickSource* undesiredTickSource = 0)
{
// until one is found or all have been created:
for(;;)
{
// check all existing sources in decreasing order of precedence
for(uint id = 0; id < WHRT_NUM_TICK_SOURCES; id++)
{
TickSource* tickSource = tickSources[id];
if(IsTickSourceAcceptable(tickSource, (WhrtTickSourceId)id, undesiredTickSource))
return tickSource;
}
// no acceptable source found; create the next one
if(!CreateNextBestTickSource())
return 0; // have already created all sources
}
}
static void ShutdownTickSources()
{
for(uint i = 0; i < WHRT_NUM_TICK_SOURCES; i++)
{
SAFE_DELETE(tickSources[i]);
}
}
//-----------------------------------------------------------------------------
// (primary) tick source
static TickSource* primaryTickSource;
static bool IsTickSourceEstablished()
{
return (primaryTickSource != 0);
};
static void ChooseTickSource()
{
// we used to support switching tick sources at runtime, but that's
// unnecessarily complex. it need and should only be done once.
debug_assert(!IsTickSourceEstablished());
primaryTickSource = DetermineBestSafeTickSource();
const int rollovers = RolloversPerCalibrationInterval(primaryTickSource->NominalFrequency(), primaryTickSource->CounterBits());
debug_assert(rollovers <= 1);
}
/// @return ticks (unspecified start point)
i64 whrt_Ticks()
{
const u64 ticks = primaryTickSource->Ticks();
return (i64)ticks;
}
double whrt_NominalFrequency()
{
const double frequency = primaryTickSource->NominalFrequency();
return frequency;
}
double whrt_Resolution()
{
const double resolution = primaryTickSource->Resolution();
return resolution;
}
//-----------------------------------------------------------------------------
static u64 initialTicks;
double whrt_Time()
{
i64 deltaTicks = whrt_Ticks() - initialTicks;
double seconds = deltaTicks / whrt_NominalFrequency();
return seconds;
}
// must be an object so we can CAS-in the pointer to it
#if 0
class Calibrator
{
// ticks at init or last calibration.
// ticks since then are scaled by 1/hrt_cur_freq and added to hrt_cal_time
// to yield the current time.
u64 lastTicks;
//IHighResTimer safe;
u64 safe_last;
double LastFreqs[8]; // ring buffer
// used to calibrate and second-guess the primary
static TickSource* secondaryTickSource;
// value of hrt_time() at last calibration. needed so that changes to
// hrt_cur_freq don't affect the previous ticks (example: 72 ticks elapsed,
// nominal freq = 8 => time = 9.0. if freq is calculated as 9, time would
// go backwards to 8.0).
static double hrt_cal_time = 0.0;
// current ticks per second; average of last few values measured in
// calibrate(). needed to prevent long-term drift, and because
// hrt_nominal_freq isn't necessarily correct. only affects the ticks since
// last calibration - don't want to retroactively change the time.
double CurFreq;
};
calibrationTickSource = DetermineBestSafeTickSource(primaryTickSource);
// return seconds since init.
//
// split to allow calling from calibrate without recursive locking.
// (not a problem, but avoids a BoundsChecker warning)
static double time_lk()
{
debug_assert(hrt_cur_freq > 0.0);
debug_assert(hrt_cal_ticks > 0);
// elapsed ticks and time since last calibration
const i64 delta_ticks = ticks_lk() - hrt_cal_ticks;
const double delta_time = delta_ticks / hrt_cur_freq;
return hrt_cal_time + delta_time;
}
// measure current HRT freq - prevents long-term drift; also useful because
// hrt_nominal_freq isn't necessarily exact.
//
// lock must be held.
static void calibrate_lk()
{
debug_assert(hrt_cal_ticks > 0);
// we're called from a WinMM event or after thread wakeup,
// so the timer has just been updated.
// no need to determine tick / compensate.
// get elapsed HRT ticks
const i64 hrt_cur = ticks_lk();
const i64 hrt_d = hrt_cur - hrt_cal_ticks;
hrt_cal_ticks = hrt_cur;
hrt_cal_time += hrt_d / hrt_cur_freq;
// get elapsed time from safe millisecond timer
static long safe_last = LONG_MAX;
// chosen so that dt and therefore hrt_est_freq will be negative
// on first call => it won't be added to buffer
const long safe_cur = safe_time();
const double dt = (safe_cur - safe_last) / safe_timer_freq;
safe_last = safe_cur;
double hrt_est_freq = hrt_d / dt;
// past couple of calculated hrt freqs, for averaging
typedef RingBuf<double, 8> SampleBuf;
static SampleBuf samples;
// only add to buffer if within 10% of nominal
// (don't want to pollute buffer with flukes / incorrect results)
if(fabs(hrt_est_freq/hrt_nominal_freq - 1.0) < 0.10)
{
samples.push_back(hrt_est_freq);
// average all samples in buffer
double freq_sum = std::accumulate(samples.begin(), samples.end(), 0.0);
hrt_cur_freq = freq_sum / (int)samples.size();
}
else
{
samples.clear();
hrt_cur_freq = hrt_nominal_freq;
}
debug_assert(hrt_cur_freq > 0.0);
}
#endif
//-----------------------------------------------------------------------------
// calibration thread
// note: we used to discipline the HRT timestamp to the system time, so it
// was advantageous to wake up the calibration thread via WinMM event
// (reducing instances where we're called in the middle of a scheduler tick).
// since that's no longer relevant, we prefer using a thread, because that
// avoids the dependency on WinMM and its lengthy startup time.
// rationale: (+ and - are reasons for longer and shorter lengths)
// + minimize CPU usage
// + tolerate possibly low secondary tick source resolution
// - notice frequency drift quickly enough
// - no more than 1 counter rollover per interval (this is checked via
// RolloversPerCalibrationInterval)
static const DWORD CALIBRATION_INTERVAL_MS = 1000;
static int RolloversPerCalibrationInterval(double frequency, uint counterBits)
{
const double period = BIT64(counterBits) / frequency;
const i64 period_ms = cpu_i64FromDouble(period*1000.0);
return CALIBRATION_INTERVAL_MS / period_ms;
}
static HANDLE hExitEvent;
static HANDLE hCalibrationThread;
static unsigned __stdcall CalibrationThread(void* UNUSED(data))
{
debug_set_thread_name("whrt_calibrate");
for(;;)
{
const DWORD ret = WaitForSingleObject(hExitEvent, CALIBRATION_INTERVAL_MS);
// owner terminated or wait failed or exit event signaled - exit thread
if(ret != WAIT_TIMEOUT)
break;
/// Calibrate();
}
return 0;
}
static inline LibError InitCalibrationThread()
{
hExitEvent = CreateEvent(0, TRUE, FALSE, 0); // manual reset, initially false
if(hExitEvent == INVALID_HANDLE_VALUE)
WARN_RETURN(ERR::LIMIT);
hCalibrationThread = (HANDLE)_beginthreadex(0, 0, CalibrationThread, 0, 0, 0);
if(!hCalibrationThread)
WARN_RETURN(ERR::LIMIT);
return INFO::OK;
}
static inline void ShutdownCalibrationThread()
{
// signal thread
BOOL ok = SetEvent(hExitEvent);
WARN_IF_FALSE(ok);
// the nice way is to wait for it to exit
if(WaitForSingleObject(hCalibrationThread, 100) != WAIT_OBJECT_0)
TerminateThread(hCalibrationThread, 0); // forcibly exit (dangerous)
CloseHandle(hExitEvent);
CloseHandle(hCalibrationThread);
}
//-----------------------------------------------------------------------------
static LibError whrt_Init()
{
ChooseTickSource();
// latch start times
initialTicks = whrt_Ticks();
// RETURN_ERR(InitCalibrationThread());
return INFO::OK;
}
static LibError whrt_Shutdown()
{
// ShutdownCalibrationThread();
ShutdownTickSources();
return INFO::OK;
}

View File

@ -0,0 +1,51 @@
/**
* =========================================================================
* File : whrt.h
* Project : 0 A.D.
* Description : Windows High Resolution Timer
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_WHRT
#define INCLUDED_WHRT
// arranged in decreasing order of preference with values = 0..N-1 so that
// the next best timer can be chosen by incrementing a counter.
//
// rationale for the ordering:
// - TSC must come before QPC and PMT to make sure a bug in the latter on
// Pentium systems doesn't come up.
// - timeGetTime really isn't as safe as the others, so it should be last.
// - low-overhead and high-resolution tick sources are preferred.
enum WhrtTickSourceId
{
WHRT_TSC,
WHRT_HPET,
WHRT_PMT,
WHRT_QPC,
WHRT_TGT,
WHRT_NUM_TICK_SOURCES
};
enum WhrtOverride
{
// allow use of a tick source if available and we think it's safe.
WHRT_DEFAULT = 0, // (value obviates initialization of overrides[])
// override our IsSafe decision.
WHRT_DISABLE,
WHRT_FORCE
};
extern void whrt_OverrideRecommendation(WhrtTickSourceId id, WhrtOverride override);
extern i64 whrt_Ticks();
extern double whrt_NominalFrequency();
extern double whrt_Resolution();
extern double whrt_Time();
#endif // #ifndef INCLUDED_WHRT

View File

@ -2,700 +2,39 @@
* ========================================================================= * =========================================================================
* File : wtime.cpp * File : wtime.cpp
* Project : 0 A.D. * Project : 0 A.D.
* Description : emulate POSIX high resolution timer on Windows. * Description : emulate POSIX time functionality on Windows.
* ========================================================================= * =========================================================================
*/ */
// license: GPL; see lib/license.txt // license: GPL; see lib/license.txt
// note: clock_gettime et al. have been removed. callers should use the
// WHRT directly, rather than needlessly translating s -> ns -> s,
// which costs time and accuracy.
#include "precompiled.h" #include "precompiled.h"
#include "wtime.h" #include "wtime.h"
#include <algorithm>
#include <numeric>
#include <cmath>
#include <ctime>
#include "lib/adts.h"
#include "lib/sysdep/ia32/ia32.h"
#include "lib/sysdep/cpu.h"
#include "wposix_internal.h" #include "wposix_internal.h"
#include "wpthread.h" #include "lib/sysdep/cpu.h" // cpu_i64FromDouble
#include "lib/sysdep/win/whrt/whrt.h"
// define to disable time sources (useful for simulating other systems) #pragma SECTION_PRE_LIBC(M) // late; dependent on whrt
//#define NO_QPC WIN_REGISTER_FUNC(wtime_Init);
//#define NO_TSC #pragma FORCE_INCLUDE(wtime_Init)
static const int CALIBRATION_FREQ = 1;
#pragma SECTION_PRE_LIBC(G)
WIN_REGISTER_FUNC(wtime_init);
#pragma FORCE_INCLUDE(wtime_init)
#pragma SECTION_POST_ATEXIT(D)
WIN_REGISTER_FUNC(wtime_shutdown);
#pragma FORCE_INCLUDE(wtime_shutdown)
#pragma SECTION_RESTORE #pragma SECTION_RESTORE
namespace ERR // NT system time and FILETIME are hectonanoseconds since Jan. 1, 1601 UTC.
{ // SYSTEMTIME is a struct containing month, year, etc.
const LibError TIMER_NO_SAFE_IMPL = -130100;
}
AT_STARTUP(\ static const long _1e3 = 1000;
error_setDescription(ERR::TIMER_NO_SAFE_IMPL, "No safe time source available");\
)
// see http://www.gamedev.net/reference/programming/features/timing/ .
// rationale:
// we no longer use TGT, due to issues on Win9x; GTC is just as good.
// (don't want to accelerate the tick rate, because performance will suffer).
// avoid dependency on WinMM (event timer) to shorten startup time.
//
// we go to the trouble of allowing switching time sources at runtime
// (=> have to be careful to keep the timer continuous) because we want
// to allow overriding the implementation choice via command line switch,
// in case a time source turns out to have a serious problem.
// (default values for HRT_NONE impl)
// initial measurement of the time source's tick rate. not necessarily
// correct (e.g. when using TSC: cpu_ClockFrequency isn't exact).
static double hrt_nominal_freq = -1.0;
// actual resolution of the time source (may differ from hrt_nominal_freq
// for timers with adjustment > 1 tick).
static double hrt_res = -1.0;
// current ticks per second; average of last few values measured in
// calibrate(). needed to prevent long-term drift, and because
// hrt_nominal_freq isn't necessarily correct. only affects the ticks since
// last calibration - don't want to retroactively change the time.
static double hrt_cur_freq = -1.0;
// ticks at init or last calibration.
// ticks since then are scaled by 1/hrt_cur_freq and added to hrt_cal_time
// to yield the current time.
static i64 hrt_cal_ticks = 0;
// value of hrt_time() at last calibration. needed so that changes to
// hrt_cur_freq don't affect the previous ticks (example: 72 ticks elapsed,
// nominal freq = 8 => time = 9.0. if freq is calculated as 9, time would
// go backwards to 8.0).
static double hrt_cal_time = 0.0;
// possible high resolution timers, in order of preference.
// see below for timer properties + problems.
// used as index into overrides[].
enum HRTImpl
{
// CPU timestamp counter
HRT_TSC,
// Windows QueryPerformanceCounter
HRT_QPC,
// Windows GetTickCount
HRT_GTC,
// there will always be a valid timer in use.
// this is only used with hrt_override_impl.
HRT_NONE,
HRT_NUM_IMPLS
};
static HRTImpl hrt_impl = HRT_NONE;
// while we do our best to work around timer problems or avoid them if unsafe,
// future requirements and problems may be different:
// allow the user or app to override our decisions (via hrt_override_impl)
enum HRTOverride
{
// allow use of this implementation if available,
// and we can work around its problems
//
// HACK: give it value 0 for easier static data initialization
HRT_DEFAULT = 0,
// override our 'safe to use' recommendation
// set by hrt_override_impl (via command line arg or console function)
HRT_DISABLE,
HRT_FORCE
};
// HRTImpl enums as index
// HACK: no init needed - static data is zeroed (= HRT_DEFAULT)
static HRTOverride overrides[HRT_NUM_IMPLS];
cassert((int)HRT_DEFAULT == 0);
// convenience
static const long _1e6 = 1000000; static const long _1e6 = 1000000;
static const long _1e7 = 10000000; static const long _1e7 = 10000000;
static const i64 _1e9 = 1000000000; static const i64 _1e9 = 1000000000;
static inline void lock(void)
{
win_lock(WTIME_CS);
}
static inline void unlock(void)
{
win_unlock(WTIME_CS);
}
static bool IsSimilarMagnitude(double d1, double d2, const double relative_error_tolerance = 0.05)
{
const double relative_error = fabs(d1/d2 - 1.0);
if(relative_error > relative_error_tolerance)
return false;
return true;
}
// decide upon a HRT implementation, checking if we can work around
// each timer's issues on this platform, but allow user override
// in case there are unforeseen problems with one of them.
// order of preference (due to resolution and speed): TSC, QPC, GTC.
// split out of reset_impl so we can just return when impl is chosen.
static LibError choose_impl()
{
bool safe;
#define SAFETY_OVERRIDE(impl)\
if(overrides[impl] == HRT_DISABLE)\
safe = false;\
if(overrides[impl] == HRT_FORCE)\
safe = true;
// used several times below, so latch it for convenience.
const double cpu_freq = cpu_IsDetectFinished()? cpu_ClockFrequency() : 0.0;
#if CPU_IA32 && !defined(NO_TSC)
// CPU Timestamp Counter (incremented every clock)
// ns resolution, moderate precision (poor clock crystal?)
//
// issues:
// - multiprocessor systems: may be inconsistent across CPUs.
// we could discard really bad values, but that's still inaccurate.
// having a high-priority thread with set CPU affinity read the TSC
// might work, but would be rather slow. could fix the problem by
// keeping per-CPU timer state (freq and delta). we'd use the APIC ID
// (cpuid, function 1) or GetCurrentProcessorNumber (only available
// on Win Server 2003) to determine the CPU. however, this is
// too much work for little benefit ATM, so call it unsafe.
// - deep sleep modes: TSC may not be advanced.
// not a problem though, because if the TSC is disabled, the CPU
// isn't doing any other work, either.
// - SpeedStep/'gearshift' CPUs: frequency may change.
// this happens on notebooks now, but eventually desktop systems
// will do this as well (if not to save power, for heat reasons).
// frequency changes are too often and drastic to correct,
// and we don't want to mess with the system power settings => unsafe.
/*
AMD has defined a CPUID feature bit that
software can test to determine if the TSC is
invariant. Issuing a CPUID instruction with an %eax register
value of 0x8000_0007, on a processor whose base family is
0xF, returns "Advanced Power Management Information" in the
%eax, %ebx, %ecx, and %edx registers. Bit 8 of the return
%edx is the "TscInvariant" feature flag which is set when
TSC is P-state, C-state, and STPCLK-throttling invariant; it
is clear otherwise.
*/
/*
if (CPUID.base_family < 0xf) {
// TSC drift doesn't exist on 7th Gen or less
// However, OS still needs to consider effects
// of P-state changes on TSC
return TRUE;
} else if (CPUID.AdvPowerMgmtInfo.TscInvariant) {
// Invariant TSC on 8th Gen or newer, use it
// (assume all cores have invariant TSC)
return TRUE;
} else if ((number_processors == 1)&&(number_cores == 1)){
// OK to use TSC on uni-processor-uni-core
// However, OS still needs to consider effects
// of P-state changes on TSC
return TRUE;
} else if ( (number_processors == 1) &&
(CPUID.effective_family == 0x0f) &&
!C1_ramp_8gen ){
// Use TSC on 8th Gen uni-proc with C1_ramp off
// However, OS still needs to consider effects
// of P-state changes on TSC
return TRUE;
} else {
return FALSE;
}
}
C1_ramp_8gen() {
// Check if C1-Clock ramping enabled in PMM7.CpuLowPwrEnh
// On 8th-Generation cores only. Assume BIOS has setup
// all Northbridges equivalently.
return (1 & read_pci_byte(bus=0,dev=0x18,fcn=3,offset=0x87));
}
*/
if(cpu_freq > 0.0 && ia32_cap(IA32_CAP_TSC))
{
safe = (cpu_CoresPerPackage() == 1 && cpu_NumPackages() == 1 && cpu_IsThrottlingPossible() == 0);
SAFETY_OVERRIDE(HRT_TSC);
if(safe)
{
hrt_impl = HRT_TSC;
hrt_nominal_freq = cpu_ClockFrequency();
hrt_res = (1.0 / hrt_nominal_freq);
return INFO::OK;
}
}
#endif // TSC
#if OS_WIN && !defined(NO_QPC)
// Windows QueryPerformanceCounter API
// implementations:
// - PIT on Win2k - 838 ns resolution, slow to read (~3 µs)
// - PMT on WinXP - 279 ns ", moderate overhead (700 ns?)
// issues:
// 1) Q274323: may jump several seconds under heavy PCI bus load.
// not a problem, because the older systems on which this occurs
// have safe TSCs, so that is used instead.
// 2) "System clock problem can inflate benchmark scores":
// incorrect value if not polled every 4.5 seconds? solved
// by calibration thread, which reads timer every second anyway.
// - TSC on MP HAL, sometimes with 1/3 of CPU freq.
// cache freq because QPF is fairly slow.
static i64 qpc_freq = -1; // set to 0 if unsupported
if(qpc_freq == -1) // first call
{
LARGE_INTEGER freq;
BOOL qpc_ok = QueryPerformanceFrequency(&freq);
qpc_freq = qpc_ok? freq.QuadPart : 0;
}
// QPC is available
if(qpc_freq > 0)
{
// PIT and PMT are safe.
if(qpc_freq == 1193182 || qpc_freq == 3579545)
safe = true;
// make sure QPC doesn't use the TSC
// (if it were safe, we would have chosen it above)
else
{
// can't decide yet - assume unsafe
if(!cpu_IsDetectFinished())
safe = false;
else
{
safe = true;
// compare QPC freq to CPU clock freq. note: we can't
// single out the HPET (as with PIT and PMT above) because
// its frequency is variable and at least 10 MHz.
if(IsSimilarMagnitude(qpc_freq, cpu_freq))
safe = false;
if(IsSimilarMagnitude(qpc_freq, cpu_freq/3)) // QPC sometimes uses RDTSC/3
safe = false;
}
}
SAFETY_OVERRIDE(HRT_QPC);
if(safe)
{
hrt_impl = HRT_QPC;
hrt_nominal_freq = (double)qpc_freq;
hrt_res = (1.0 / hrt_nominal_freq);
return INFO::OK;
}
}
#endif // QPC
//
// GTC
//
safe = true;
SAFETY_OVERRIDE(HRT_GTC);
if(safe)
{
hrt_impl = HRT_GTC;
hrt_nominal_freq = 1000.0; // units returned
hrt_res = 1e-2; // guess, in case the following fails
// get actual resolution
DWORD adj; BOOL adj_disabled; // unused, but must be passed to GSTA
DWORD timer_period; // [hectonanoseconds]
if(GetSystemTimeAdjustment(&adj, &timer_period, &adj_disabled))
hrt_res = (timer_period / 1e7);
return INFO::OK;
}
hrt_impl = HRT_NONE;
hrt_nominal_freq = -1.0;
WARN_RETURN(ERR::TIMER_NO_SAFE_IMPL);
}
// return ticks (unspecified start point). lock must be held.
//
// split to allow calling from reset_impl_lk without recursive locking.
// (not a problem, but avoids a BoundsChecker warning)
static i64 ticks_lk()
{
switch(hrt_impl)
{
// TSC
#if CPU_IA32 && !defined(NO_TSC)
case HRT_TSC:
return (i64)ia32_rdtsc();
#endif
// QPC
#if OS_WIN && !defined(NO_QPC)
case HRT_QPC:
{
LARGE_INTEGER i;
BOOL ok = QueryPerformanceCounter(&i);
WARN_IF_FALSE(ok); // shouldn't fail if it was chosen above
return i.QuadPart;
}
#endif
// TGT
#if OS_WIN
case HRT_GTC:
return (i64)GetTickCount();
#endif
// add further timers here.
default:
debug_warn("invalid impl");
return 0;
} // switch(impl)
}
// return seconds since init. lock must be held.
//
// split to allow calling from calibrate without recursive locking.
// (not a problem, but avoids a BoundsChecker warning)
static double time_lk()
{
debug_assert(hrt_cur_freq > 0.0);
debug_assert(hrt_cal_ticks > 0);
// elapsed ticks and time since last calibration
const i64 delta_ticks = ticks_lk() - hrt_cal_ticks;
const double delta_time = delta_ticks / hrt_cur_freq;
return hrt_cal_time + delta_time;
}
// this module is dependent upon cpu.cpp (supplies information needed to
// choose a HRT), which in turn uses our timer to detect the CPU clock
// when running on Windows (clock(), the only cross platform HRT available on
// Windows, isn't good enough - only 10..15 ms resolution).
//
// we first use a safe timer, and choose again after client code calls
// hrt_override_impl when system information is available.
// the timer will work without this call, but it won't use certain
// implementations. we do it this way, instead of polling on each timer use,
// because a timer implementation change may cause the timer to jump a bit.
// choose a HRT implementation and prepare it for use. lock must be held.
//
// don't want to saddle timer module with the problem of initializing
// us on first call - it wouldn't otherwise need to be thread-safe.
static LibError reset_impl_lk()
{
HRTImpl old_impl = hrt_impl;
// if changing implementation: get time at which to continue
// (when switching, we set everything calibrate() would output)
double old_time;
// .. first call; hrt_cur_freq not initialized; can't call time_lk.
// setting to 0 will start the timer at 0.
if(hrt_cur_freq <= 0.0)
old_time = 0.0;
// .. timer has been initialized; use current reported time.
else
old_time = time_lk();
RETURN_ERR(choose_impl());
debug_assert(hrt_impl != HRT_NONE && hrt_nominal_freq > 0.0);
// impl has changed; reset timer state.
if(old_impl != hrt_impl)
{
hrt_cur_freq = hrt_nominal_freq;
hrt_cal_time = old_time;
hrt_cal_ticks = ticks_lk();
}
debug_printf("HRT impl=%d nominal_freq=%f cur_freq=%f\n", hrt_impl, hrt_nominal_freq, hrt_cur_freq);
return INFO::OK;
}
// return ticks (unspecified start point)
static i64 hrt_ticks()
{
i64 t;
lock();
t = ticks_lk();
unlock();
return t;
}
// return seconds since init.
static double hrt_time()
{
lock();
const double t = time_lk();
unlock();
return t;
}
// return seconds between start and end timestamps (returned by hrt_ticks).
// negative if end comes before start. not intended to be called for long
// intervals (start -> end), since the current frequency is used!
static double hrt_delta_s(i64 start, i64 end)
{
// paranoia: reading double may not be atomic.
lock();
const double freq = hrt_cur_freq;
unlock();
debug_assert(freq != -1.0 && "hrt_delta_s: hrt_cur_freq not set");
return (end - start) / freq;
}
// return current timer implementation and its nominal (rated) frequency.
// nominal_freq is never 0.
// implementation only changes after hrt_override_impl.
static void hrt_query_impl(HRTImpl& impl, double& nominal_freq, double& res)
{
lock();
impl = hrt_impl;
nominal_freq = hrt_nominal_freq;
res = hrt_res;
unlock();
debug_assert(nominal_freq > 0.0 && "hrt_query_impl: invalid hrt_nominal_freq");
}
// override our 'safe to use' decision.
// resets (and chooses another, if applicable) implementation;
// the timer may jump after doing so.
// call with HRT_DEFAULT, HRT_NONE to re-evaluate implementation choice
// after system info becomes available.
static LibError hrt_override_impl(HRTOverride ovr, HRTImpl impl)
{
if((ovr != HRT_DISABLE && ovr != HRT_FORCE && ovr != HRT_DEFAULT) ||
(impl != HRT_TSC && impl != HRT_QPC && impl != HRT_GTC && impl != HRT_NONE))
WARN_RETURN(ERR::INVALID_PARAM);
lock();
overrides[impl] = ovr;
LibError ret = reset_impl_lk();
unlock();
return ret;
}
//-----------------------------------------------------------------------------
// calibration
//-----------------------------------------------------------------------------
// 'safe' timer, used to measure HRT freq in calibrate()
static const long safe_timer_freq = 1000;
static long safe_time()
{
#if OS_WIN
return (long)GetTickCount();
#else
return (long)(clock() * 1000.0 / CLOCKS_PER_SEC);
#endif
}
// measure current HRT freq - prevents long-term drift; also useful because
// hrt_nominal_freq isn't necessarily exact.
//
// lock must be held.
static void calibrate_lk()
{
debug_assert(hrt_cal_ticks > 0);
// we're called from a WinMM event or after thread wakeup,
// so the timer has just been updated.
// no need to determine tick / compensate.
// get elapsed HRT ticks
const i64 hrt_cur = ticks_lk();
const i64 hrt_d = hrt_cur - hrt_cal_ticks;
hrt_cal_ticks = hrt_cur;
hrt_cal_time += hrt_d / hrt_cur_freq;
// get elapsed time from safe millisecond timer
static long safe_last = LONG_MAX;
// chosen so that dt and therefore hrt_est_freq will be negative
// on first call => it won't be added to buffer
const long safe_cur = safe_time();
const double dt = (safe_cur - safe_last) / safe_timer_freq;
safe_last = safe_cur;
double hrt_est_freq = hrt_d / dt;
// past couple of calculated hrt freqs, for averaging
typedef RingBuf<double, 8> SampleBuf;
static SampleBuf samples;
// only add to buffer if within 10% of nominal
// (don't want to pollute buffer with flukes / incorrect results)
if(fabs(hrt_est_freq/hrt_nominal_freq - 1.0) < 0.10)
{
samples.push_back(hrt_est_freq);
// average all samples in buffer
double freq_sum = std::accumulate(samples.begin(), samples.end(), 0.0);
hrt_cur_freq = freq_sum / (int)samples.size();
}
else
{
samples.clear();
hrt_cur_freq = hrt_nominal_freq;
}
debug_assert(hrt_cur_freq > 0.0);
}
// calibration thread
// note: winmm event is better than a thread or just checking elapsed time
// in hrt_ticks, because it's called right after GTC is updated;
// otherwise, we may be in the middle of a tick.
// however, we want to avoid dependency on WinMM to shorten startup time.
// hence, start a thread.
static pthread_t thread;
static sem_t exit_flag;
static void* calibration_thread(void* UNUSED(data))
{
debug_set_thread_name("wtime");
for(;;)
{
// calculate absolute timeout for sem_timedwait
struct timespec abs_timeout;
clock_gettime(CLOCK_REALTIME, &abs_timeout);
abs_timeout.tv_nsec += _1e9 / CALIBRATION_FREQ;
// .. handle nanosecond wraparound (must not be > 1000m)
if(abs_timeout.tv_nsec >= _1e9)
{
abs_timeout.tv_nsec -= _1e9;
abs_timeout.tv_sec++;
}
errno = 0;
// if we acquire the semaphore, exit was requested.
if(sem_timedwait(&exit_flag, &abs_timeout) == 0)
break;
// actual error: warn
if(errno != ETIMEDOUT)
debug_warn("wtime calibration_thread: sem_timedwait failed");
lock();
calibrate_lk();
unlock();
}
return 0;
}
static inline LibError init_calibration_thread()
{
sem_init(&exit_flag, 0, 0);
pthread_create(&thread, 0, calibration_thread, 0);
return INFO::OK;
}
static inline LibError shutdown_calibration_thread()
{
sem_post(&exit_flag);
pthread_join(thread, 0);
sem_destroy(&exit_flag);
return INFO::OK;
}
static LibError hrt_init()
{
// no lock needed - calibration thread hasn't yet been created
RETURN_ERR(reset_impl_lk());
return init_calibration_thread();
}
static LibError hrt_shutdown()
{
// don't take a lock here! race condition:
// 1) calibration_thread is about to call clock_gettime
// 2) we take the lock and wait for the thread to exit
// 3) thread's clock_gettime waits on the lock we're holding => deadlock
//
// the calibration thread protects itself anyway, so nothing breaks.
return shutdown_calibration_thread();
}
//-----------------------------------------------------------------------------
// wtime wrapper: emulates POSIX functions
//-----------------------------------------------------------------------------
// NT system time and FILETIME are hectonanoseconds since Jan. 1, 1601 UTC.
// SYSTEMTIME is a struct containing month, year, etc.
// //
// FILETIME -> time_t routines; used by wposix filetime_to_time_t wrapper. // FILETIME -> time_t routines; used by wposix filetime_to_time_t wrapper.
// //
@ -727,87 +66,54 @@ time_t wtime_utc_filetime_to_time_t(FILETIME* ft)
} }
// return nanoseconds since posix epoch as reported by system time //-----------------------------------------------------------------------------
// only 10 or 15 ms resolution!
static i64 st_time_ns() // system clock at startup [nanoseconds since POSIX epoch]
// note: the HRT starts at 0; any increase by the time we get here
// just makes our notion of the start time more accurate)
static i64 stInitial_ns;
static void LatchInitialSystemTime()
{ {
FILETIME ft; FILETIME ft;
GetSystemTimeAsFileTime(&ft); GetSystemTimeAsFileTime(&ft);
u64 hns = u64_from_FILETIME(&ft); const u64 hns = u64_from_FILETIME(&ft);
return (hns - posix_epoch_hns) * 100; stInitial_ns = (hns - posix_epoch_hns) * 100;
} }
// return nanoseconds since POSIX epoch.
// return nanoseconds since posix epoch as reported by HRT. // algorithm: add current HRT value to the startup system time
// we get system time at init and add HRT elapsed time. static i64 CurrentSystemTime_ns()
static i64 time_ns()
{ {
// we don't really need to get the HRT start time (it starts at 0, const i64 ns = stInitial_ns + cpu_i64FromDouble(whrt_Time() * _1e9);
// and will be slightly higher when we get here; doesn't matter if the
// time returned is a few ms off the real system time). do so anyway,
// because we have to get the starting ST value anyway.
static double hrt_start_time;
static i64 st_start;
if(!st_start)
{
hrt_start_time = hrt_time();
st_start = st_time_ns();
}
const double dt = hrt_time() - hrt_start_time;
const i64 ns = st_start + cpu_i64FromDouble(dt * _1e9);
return ns; return ns;
} }
static timespec TimespecFromNs(i64 ns)
static LibError wtime_init()
{ {
hrt_init(); timespec ts;
ts.tv_sec = (time_t)((ns / _1e9) & 0xFFFFFFFF);
ts.tv_nsec = (long)(ns % _1e9);
return ts;
}
// first call latches start times static uint MsFromTimespec(const timespec& ts)
time_ns(); {
i64 ms = ts.tv_sec; // avoid overflow
return INFO::OK; ms *= _1e3;
ms += ts.tv_nsec / _1e6;
return ms;
} }
static LibError wtime_shutdown() //-----------------------------------------------------------------------------
{
return hrt_shutdown();
}
int clock_gettime(clockid_t clock, struct timespec* ts)
void wtime_reset_impl()
{
hrt_override_impl(HRT_DEFAULT, HRT_NONE);
}
static void sleep_ns(i64 ns)
{
DWORD ms = DWORD(ns / _1e6);
if(ms != 0)
Sleep(ms);
else
{
i64 t0 = hrt_ticks(), t1;
do
t1 = hrt_ticks();
while(hrt_delta_s(t0, t1) * _1e9 < ns);
}
}
int clock_gettime(clockid_t clock, struct timespec* t)
{ {
debug_assert(clock == CLOCK_REALTIME); debug_assert(clock == CLOCK_REALTIME);
const i64 ns = time_ns(); const i64 ns = CurrentSystemTime_ns();
t->tv_sec = (time_t)((ns / _1e9) & 0xFFFFFFFF); *ts = TimespecFromNs(ns);
t->tv_nsec = (long) (ns % _1e9);
return 0; return 0;
} }
@ -816,38 +122,30 @@ int clock_getres(clockid_t clock, struct timespec* ts)
{ {
debug_assert(clock == CLOCK_REALTIME); debug_assert(clock == CLOCK_REALTIME);
HRTImpl impl; const i64 ns = cpu_i64FromDouble(whrt_Resolution() * 1e9);
double nominal_freq, res; *ts = TimespecFromNs(ns);
hrt_query_impl(impl, nominal_freq, res);
ts->tv_sec = 0;
ts->tv_nsec = (long)(res * 1e9);
return 0; return 0;
} }
int nanosleep(const struct timespec* rqtp, struct timespec* /* rmtp */) int nanosleep(const struct timespec* rqtp, struct timespec* /* rmtp */)
{ {
i64 ns = rqtp->tv_sec; // make sure we don't overflow const DWORD ms = (DWORD)MsFromTimespec(*rqtp);
ns *= _1e9; if(ms)
ns += rqtp->tv_nsec; Sleep(ms);
sleep_ns(ns);
return 0;
}
int gettimeofday(struct timeval* tv, void* UNUSED(tzp))
{
const long us = (long)(time_ns() / 1000);
tv->tv_sec = (time_t) (us / _1e6);
tv->tv_usec = (suseconds_t)(us % _1e6);
return 0; return 0;
} }
uint sleep(uint sec) uint sleep(uint sec)
{ {
Sleep(sec * 1000); // don't bother checking for overflow (user's fault) // warn if overflow would result (it would be insane to ask for
// such lengthy sleep timeouts, but still)
debug_assert(sec < std::numeric_limits<uint>::max()/1000);
const DWORD ms = sec * 1000;
if(ms)
Sleep(ms);
return sec; return sec;
} }
@ -855,6 +153,18 @@ uint sleep(uint sec)
int usleep(useconds_t us) int usleep(useconds_t us)
{ {
debug_assert(us < _1e6); debug_assert(us < _1e6);
sleep_ns(us * 1000); // can't overflow due to <us> limit
const DWORD ms = us/1000;
if(ms)
Sleep(ms);
return 0; return 0;
} }
//-----------------------------------------------------------------------------
static LibError wtime_Init()
{
LatchInitialSystemTime();
return INFO::OK;
}

View File

@ -1,14 +1,6 @@
#ifndef INCLUDED_WTIME_INTERNAL #ifndef INCLUDED_WTIME_INTERNAL
#define INCLUDED_WTIME_INTERNAL #define INCLUDED_WTIME_INTERNAL
// HACK: on Windows, the HRT makes its final implementation choice
// in the first calibrate call where cpu_freq is available.
// provide a routine that makes the choice when called,
// so app code isn't surprised by a timer change, although the HRT
// does try to keep the timer continuous.
extern void wtime_reset_impl(void);
// convert UTC FILETIME to seconds-since-1970 UTC. // convert UTC FILETIME to seconds-since-1970 UTC.
// used by wfilesystem. // used by wfilesystem.
#ifndef _FILETIME_ // prevent ICE on VC7 #ifndef _FILETIME_ // prevent ICE on VC7

View File

@ -19,6 +19,9 @@
#include "lib/posix/posix_time.h" #include "lib/posix/posix_time.h"
#include "adts.h" #include "adts.h"
#include "lib/sysdep/cpu.h" #include "lib/sysdep/cpu.h"
#if OS_WIN
#include "lib/sysdep/win/whrt/whrt.h"
#endif
#if CONFIG_TIMER_ALLOW_RDTSC #if CONFIG_TIMER_ALLOW_RDTSC
# include "lib/sysdep/ia32/ia32.h" // ia32_rdtsc # include "lib/sysdep/ia32/ia32.h" // ia32_rdtsc
@ -29,28 +32,35 @@
// than their us / ns interface, via double [seconds]. they're also not // than their us / ns interface, via double [seconds]. they're also not
// guaranteed to be monotonic. // guaranteed to be monotonic.
#if HAVE_CLOCK_GETTIME
static struct timespec start;
#elif HAVE_GETTIMEOFDAY
static struct timeval start;
#endif
void timer_Init()
{
#if HAVE_CLOCK_GETTIME
(void)clock_gettime(CLOCK_REALTIME, &start);
#elif HAVE_GETTIMEOFDAY
gettimeofday(&start, 0);
#endif
}
double get_time() double get_time()
{ {
double t; double t;
#if HAVE_CLOCK_GETTIME #if HAVE_CLOCK_GETTIME
static struct timespec start = {0}; struct timespec cur;
struct timespec ts; (void)clock_gettime(CLOCK_REALTIME, &cur);
t = (cur.tv_sec - start.tv_sec) + (cur.tv_nsec - start.tv_nsec)*1e-9;
if(!start.tv_sec)
(void)clock_gettime(CLOCK_REALTIME, &start);
(void)clock_gettime(CLOCK_REALTIME, &ts);
t = (ts.tv_sec - start.tv_sec) + (ts.tv_nsec - start.tv_nsec)*1e-9;
#elif HAVE_GETTIMEOFDAY #elif HAVE_GETTIMEOFDAY
static struct timeval start;
struct timeval cur; struct timeval cur;
if(!start.tv_sec)
gettimeofday(&start, 0);
gettimeofday(&cur, 0); gettimeofday(&cur, 0);
t = (cur.tv_sec - start.tv_sec) + (cur.tv_usec - start.tv_usec)*1e-6; t = (cur.tv_sec - start.tv_sec) + (cur.tv_usec - start.tv_usec)*1e-6;
#elif OS_WIN
t = whrt_Time();
#else #else
# error "get_time: add timer implementation for this platform!" # error "get_time: add timer implementation for this platform!"
#endif #endif
@ -77,19 +87,17 @@ double timer_res()
double res = 0.0; double res = 0.0;
#if HAVE_CLOCK_GETTIME #if HAVE_CLOCK_GETTIME
struct timespec ts; struct timespec ts;
if(clock_getres(CLOCK_REALTIME, &ts) == 0) if(clock_getres(CLOCK_REALTIME, &ts) == 0)
res = ts.tv_nsec * 1e-9; res = ts.tv_nsec * 1e-9;
#elif OS_WIN
res = whrt_Resolution();
#else #else
const double t0 = get_time(); const double t0 = get_time();
double t1, t2; double t1, t2;
do t1 = get_time(); while(t1 == t0); do t1 = get_time(); while(t1 == t0);
do t2 = get_time(); while(t2 == t1); do t2 = get_time(); while(t2 == t1);
res = t2-t1; res = t2-t1;
#endif #endif
cached_res = res; cached_res = res;