0ad/source/lib/sysdep/win/wposix/wtime.cpp
janwas 160bd14bb0 robustify CPU freq detection and timer code, hopefully solve lockup issue
cpu: cache CPU info to prevent calling freq measurement code several
times
config: mention e.g. fopen_s instead of only the secure string functions
ia32: exception-safe scheduler setting; no longer use absolute max
priority (risky, could hang machine if loop contains a bug)
wtime: add note on TSC safety, slight improvements
timer: try and prevent timer from returning the same value

This was SVN commit r5075.
2007-05-16 15:56:47 +00:00

861 lines
22 KiB
C++

/**
* =========================================================================
* File : wtime.cpp
* Project : 0 A.D.
* Description : emulate POSIX high resolution timer on Windows.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "wtime.h"
#include <algorithm>
#include <numeric>
#include <cmath>
#include <ctime>
#include "lib/adts.h"
#include "lib/sysdep/ia32/ia32.h"
#include "lib/sysdep/cpu.h"
#include "wposix_internal.h"
#include "wpthread.h"
// define to disable time sources (useful for simulating other systems)
//#define NO_QPC
//#define NO_TSC
static const int CALIBRATION_FREQ = 1;
#pragma SECTION_PRE_LIBC(G)
WIN_REGISTER_FUNC(wtime_init);
#pragma FORCE_INCLUDE(wtime_init)
#pragma SECTION_POST_ATEXIT(D)
WIN_REGISTER_FUNC(wtime_shutdown);
#pragma FORCE_INCLUDE(wtime_shutdown)
#pragma SECTION_RESTORE
namespace ERR
{
const LibError TIMER_NO_SAFE_IMPL = -130100;
}
AT_STARTUP(\
error_setDescription(ERR::TIMER_NO_SAFE_IMPL, "No safe time source available");\
)
// see http://www.gamedev.net/reference/programming/features/timing/ .
// rationale:
// we no longer use TGT, due to issues on Win9x; GTC is just as good.
// (don't want to accelerate the tick rate, because performance will suffer).
// avoid dependency on WinMM (event timer) to shorten startup time.
//
// we go to the trouble of allowing switching time sources at runtime
// (=> have to be careful to keep the timer continuous) because we want
// to allow overriding the implementation choice via command line switch,
// in case a time source turns out to have a serious problem.
// (default values for HRT_NONE impl)
// initial measurement of the time source's tick rate. not necessarily
// correct (e.g. when using TSC: cpu_ClockFrequency isn't exact).
static double hrt_nominal_freq = -1.0;
// actual resolution of the time source (may differ from hrt_nominal_freq
// for timers with adjustment > 1 tick).
static double hrt_res = -1.0;
// current ticks per second; average of last few values measured in
// calibrate(). needed to prevent long-term drift, and because
// hrt_nominal_freq isn't necessarily correct. only affects the ticks since
// last calibration - don't want to retroactively change the time.
static double hrt_cur_freq = -1.0;
// ticks at init or last calibration.
// ticks since then are scaled by 1/hrt_cur_freq and added to hrt_cal_time
// to yield the current time.
static i64 hrt_cal_ticks = 0;
// value of hrt_time() at last calibration. needed so that changes to
// hrt_cur_freq don't affect the previous ticks (example: 72 ticks elapsed,
// nominal freq = 8 => time = 9.0. if freq is calculated as 9, time would
// go backwards to 8.0).
static double hrt_cal_time = 0.0;
// possible high resolution timers, in order of preference.
// see below for timer properties + problems.
// used as index into overrides[].
enum HRTImpl
{
// CPU timestamp counter
HRT_TSC,
// Windows QueryPerformanceCounter
HRT_QPC,
// Windows GetTickCount
HRT_GTC,
// there will always be a valid timer in use.
// this is only used with hrt_override_impl.
HRT_NONE,
HRT_NUM_IMPLS
};
static HRTImpl hrt_impl = HRT_NONE;
// while we do our best to work around timer problems or avoid them if unsafe,
// future requirements and problems may be different:
// allow the user or app to override our decisions (via hrt_override_impl)
enum HRTOverride
{
// allow use of this implementation if available,
// and we can work around its problems
//
// HACK: give it value 0 for easier static data initialization
HRT_DEFAULT = 0,
// override our 'safe to use' recommendation
// set by hrt_override_impl (via command line arg or console function)
HRT_DISABLE,
HRT_FORCE
};
// HRTImpl enums as index
// HACK: no init needed - static data is zeroed (= HRT_DEFAULT)
static HRTOverride overrides[HRT_NUM_IMPLS];
cassert((int)HRT_DEFAULT == 0);
// convenience
static const long _1e6 = 1000000;
static const long _1e7 = 10000000;
static const i64 _1e9 = 1000000000;
static inline void lock(void)
{
win_lock(WTIME_CS);
}
static inline void unlock(void)
{
win_unlock(WTIME_CS);
}
static bool IsSimilarMagnitude(double d1, double d2, const double relative_error_tolerance = 0.05)
{
const double relative_error = fabs(d1/d2 - 1.0);
if(relative_error > relative_error_tolerance)
return false;
return true;
}
// decide upon a HRT implementation, checking if we can work around
// each timer's issues on this platform, but allow user override
// in case there are unforeseen problems with one of them.
// order of preference (due to resolution and speed): TSC, QPC, GTC.
// split out of reset_impl so we can just return when impl is chosen.
static LibError choose_impl()
{
bool safe;
#define SAFETY_OVERRIDE(impl)\
if(overrides[impl] == HRT_DISABLE)\
safe = false;\
if(overrides[impl] == HRT_FORCE)\
safe = true;
// used several times below, so latch it for convenience.
const double cpu_freq = cpu_IsModuleInitialized()? cpu_ClockFrequency() : 0.0;
#if CPU_IA32 && !defined(NO_TSC)
// CPU Timestamp Counter (incremented every clock)
// ns resolution, moderate precision (poor clock crystal?)
//
// issues:
// - multiprocessor systems: may be inconsistent across CPUs.
// we could discard really bad values, but that's still inaccurate.
// having a high-priority thread with set CPU affinity read the TSC
// might work, but would be rather slow. could fix the problem by
// keeping per-CPU timer state (freq and delta). we'd use the APIC ID
// (cpuid, function 1) or GetCurrentProcessorNumber (only available
// on Win Server 2003) to determine the CPU. however, this is
// too much work for little benefit ATM, so call it unsafe.
// - deep sleep modes: TSC may not be advanced.
// not a problem though, because if the TSC is disabled, the CPU
// isn't doing any other work, either.
// - SpeedStep/'gearshift' CPUs: frequency may change.
// this happens on notebooks now, but eventually desktop systems
// will do this as well (if not to save power, for heat reasons).
// frequency changes are too often and drastic to correct,
// and we don't want to mess with the system power settings => unsafe.
/*
AMD has defined a CPUID feature bit that
software can test to determine if the TSC is
invariant. Issuing a CPUID instruction with an %eax register
value of 0x8000_0007, on a processor whose base family is
0xF, returns "Advanced Power Management Information" in the
%eax, %ebx, %ecx, and %edx registers. Bit 8 of the return
%edx is the "TscInvariant" feature flag which is set when
TSC is P-state, C-state, and STPCLK-throttling invariant; it
is clear otherwise.
*/
/*
if (CPUID.base_family < 0xf) {
// TSC drift doesn't exist on 7th Gen or less
// However, OS still needs to consider effects
// of P-state changes on TSC
return TRUE;
} else if (CPUID.AdvPowerMgmtInfo.TscInvariant) {
// Invariant TSC on 8th Gen or newer, use it
// (assume all cores have invariant TSC)
return TRUE;
} else if ((number_processors == 1)&&(number_cores == 1)){
// OK to use TSC on uni-processor-uni-core
// However, OS still needs to consider effects
// of P-state changes on TSC
return TRUE;
} else if ( (number_processors == 1) &&
(CPUID.effective_family == 0x0f) &&
!C1_ramp_8gen ){
// Use TSC on 8th Gen uni-proc with C1_ramp off
// However, OS still needs to consider effects
// of P-state changes on TSC
return TRUE;
} else {
return FALSE;
}
}
C1_ramp_8gen() {
// Check if C1-Clock ramping enabled in PMM7.CpuLowPwrEnh
// On 8th-Generation cores only. Assume BIOS has setup
// all Northbridges equivalently.
return (1 & read_pci_byte(bus=0,dev=0x18,fcn=3,offset=0x87));
}
*/
if(cpu_freq > 0.0 && ia32_cap(IA32_CAP_TSC))
{
safe = (cpu_CoresPerPackage() == 1 && cpu_NumPackages() == 1 && cpu_IsThrottlingPossible() == 0);
SAFETY_OVERRIDE(HRT_TSC);
if(safe)
{
hrt_impl = HRT_TSC;
hrt_nominal_freq = cpu_ClockFrequency();
hrt_res = (1.0 / hrt_nominal_freq);
return INFO::OK;
}
}
#endif // TSC
#if OS_WIN && !defined(NO_QPC)
// Windows QueryPerformanceCounter API
// implementations:
// - PIT on Win2k - 838 ns resolution, slow to read (~3 �s)
// - PMT on WinXP - 279 ns ", moderate overhead (700 ns?)
// issues:
// 1) Q274323: may jump several seconds under heavy PCI bus load.
// not a problem, because the older systems on which this occurs
// have safe TSCs, so that is used instead.
// 2) "System clock problem can inflate benchmark scores":
// incorrect value if not polled every 4.5 seconds? solved
// by calibration thread, which reads timer every second anyway.
// - TSC on MP HAL, sometimes with 1/3 of CPU freq.
// cache freq because QPF is fairly slow.
static i64 qpc_freq = -1; // set to 0 if unsupported
if(qpc_freq == -1) // first call
{
LARGE_INTEGER freq;
BOOL qpc_ok = QueryPerformanceFrequency(&freq);
qpc_freq = qpc_ok? freq.QuadPart : 0;
}
// QPC is available
if(qpc_freq > 0)
{
// PIT and PMT are safe.
if(qpc_freq == 1193182 || qpc_freq == 3579545)
safe = true;
// make sure QPC doesn't use the TSC
// (if it were safe, we would have chosen it above)
else
{
// can't decide yet - assume unsafe
if(!cpu_IsModuleInitialized())
safe = false;
else
{
safe = true;
// compare QPC freq to CPU clock freq. note: we can't
// single out the HPET (as with PIT and PMT above) because
// its frequency is variable and at least 10 MHz.
if(IsSimilarMagnitude(qpc_freq, cpu_freq))
safe = false;
if(IsSimilarMagnitude(qpc_freq, cpu_freq/3)) // QPC sometimes uses RDTSC/3
safe = false;
}
}
SAFETY_OVERRIDE(HRT_QPC);
if(safe)
{
hrt_impl = HRT_QPC;
hrt_nominal_freq = (double)qpc_freq;
hrt_res = (1.0 / hrt_nominal_freq);
return INFO::OK;
}
}
#endif // QPC
//
// GTC
//
safe = true;
SAFETY_OVERRIDE(HRT_GTC);
if(safe)
{
hrt_impl = HRT_GTC;
hrt_nominal_freq = 1000.0; // units returned
hrt_res = 1e-2; // guess, in case the following fails
// get actual resolution
DWORD adj; BOOL adj_disabled; // unused, but must be passed to GSTA
DWORD timer_period; // [hectonanoseconds]
if(GetSystemTimeAdjustment(&adj, &timer_period, &adj_disabled))
hrt_res = (timer_period / 1e7);
return INFO::OK;
}
hrt_impl = HRT_NONE;
hrt_nominal_freq = -1.0;
WARN_RETURN(ERR::TIMER_NO_SAFE_IMPL);
}
// return ticks (unspecified start point). lock must be held.
//
// split to allow calling from reset_impl_lk without recursive locking.
// (not a problem, but avoids a BoundsChecker warning)
static i64 ticks_lk()
{
switch(hrt_impl)
{
// TSC
#if CPU_IA32 && !defined(NO_TSC)
case HRT_TSC:
return (i64)ia32_rdtsc();
#endif
// QPC
#if OS_WIN && !defined(NO_QPC)
case HRT_QPC:
{
LARGE_INTEGER i;
BOOL ok = QueryPerformanceCounter(&i);
WARN_IF_FALSE(ok); // shouldn't fail if it was chosen above
return i.QuadPart;
}
#endif
// TGT
#if OS_WIN
case HRT_GTC:
return (i64)GetTickCount();
#endif
// add further timers here.
default:
debug_warn("invalid impl");
return 0;
} // switch(impl)
}
// return seconds since init. lock must be held.
//
// split to allow calling from calibrate without recursive locking.
// (not a problem, but avoids a BoundsChecker warning)
static double time_lk()
{
debug_assert(hrt_cur_freq > 0.0);
debug_assert(hrt_cal_ticks > 0);
// elapsed ticks and time since last calibration
const i64 delta_ticks = ticks_lk() - hrt_cal_ticks;
const double delta_time = delta_ticks / hrt_cur_freq;
return hrt_cal_time + delta_time;
}
// this module is dependent upon cpu.cpp (supplies information needed to
// choose a HRT), which in turn uses our timer to detect the CPU clock
// when running on Windows (clock(), the only cross platform HRT available on
// Windows, isn't good enough - only 10..15 ms resolution).
//
// we first use a safe timer, and choose again after client code calls
// hrt_override_impl when system information is available.
// the timer will work without this call, but it won't use certain
// implementations. we do it this way, instead of polling on each timer use,
// because a timer implementation change may cause the timer to jump a bit.
// choose a HRT implementation and prepare it for use. lock must be held.
//
// don't want to saddle timer module with the problem of initializing
// us on first call - it wouldn't otherwise need to be thread-safe.
static LibError reset_impl_lk()
{
HRTImpl old_impl = hrt_impl;
// if changing implementation: get time at which to continue
// (when switching, we set everything calibrate() would output)
double old_time;
// .. first call; hrt_cur_freq not initialized; can't call time_lk.
// setting to 0 will start the timer at 0.
if(hrt_cur_freq <= 0.0)
old_time = 0.0;
// .. timer has been initialized; use current reported time.
else
old_time = time_lk();
RETURN_ERR(choose_impl());
debug_assert(hrt_impl != HRT_NONE && hrt_nominal_freq > 0.0);
// impl has changed; reset timer state.
if(old_impl != hrt_impl)
{
hrt_cur_freq = hrt_nominal_freq;
hrt_cal_time = old_time;
hrt_cal_ticks = ticks_lk();
}
debug_printf("HRT impl=%d nominal_freq=%f cur_freq=%f\n", hrt_impl, hrt_nominal_freq, hrt_cur_freq);
return INFO::OK;
}
// return ticks (unspecified start point)
static i64 hrt_ticks()
{
i64 t;
lock();
t = ticks_lk();
unlock();
return t;
}
// return seconds since init.
static double hrt_time()
{
lock();
const double t = time_lk();
unlock();
return t;
}
// return seconds between start and end timestamps (returned by hrt_ticks).
// negative if end comes before start. not intended to be called for long
// intervals (start -> end), since the current frequency is used!
static double hrt_delta_s(i64 start, i64 end)
{
// paranoia: reading double may not be atomic.
lock();
const double freq = hrt_cur_freq;
unlock();
debug_assert(freq != -1.0 && "hrt_delta_s: hrt_cur_freq not set");
return (end - start) / freq;
}
// return current timer implementation and its nominal (rated) frequency.
// nominal_freq is never 0.
// implementation only changes after hrt_override_impl.
static void hrt_query_impl(HRTImpl& impl, double& nominal_freq, double& res)
{
lock();
impl = hrt_impl;
nominal_freq = hrt_nominal_freq;
res = hrt_res;
unlock();
debug_assert(nominal_freq > 0.0 && "hrt_query_impl: invalid hrt_nominal_freq");
}
// override our 'safe to use' decision.
// resets (and chooses another, if applicable) implementation;
// the timer may jump after doing so.
// call with HRT_DEFAULT, HRT_NONE to re-evaluate implementation choice
// after system info becomes available.
static LibError hrt_override_impl(HRTOverride ovr, HRTImpl impl)
{
if((ovr != HRT_DISABLE && ovr != HRT_FORCE && ovr != HRT_DEFAULT) ||
(impl != HRT_TSC && impl != HRT_QPC && impl != HRT_GTC && impl != HRT_NONE))
WARN_RETURN(ERR::INVALID_PARAM);
lock();
overrides[impl] = ovr;
LibError ret = reset_impl_lk();
unlock();
return ret;
}
//-----------------------------------------------------------------------------
// calibration
//-----------------------------------------------------------------------------
// 'safe' timer, used to measure HRT freq in calibrate()
static const long safe_timer_freq = 1000;
static long safe_time()
{
#if OS_WIN
return (long)GetTickCount();
#else
return (long)(clock() * 1000.0 / CLOCKS_PER_SEC);
#endif
}
// measure current HRT freq - prevents long-term drift; also useful because
// hrt_nominal_freq isn't necessarily exact.
//
// lock must be held.
static void calibrate_lk()
{
debug_assert(hrt_cal_ticks > 0);
// we're called from a WinMM event or after thread wakeup,
// so the timer has just been updated.
// no need to determine tick / compensate.
// get elapsed HRT ticks
const i64 hrt_cur = ticks_lk();
const i64 hrt_d = hrt_cur - hrt_cal_ticks;
hrt_cal_ticks = hrt_cur;
hrt_cal_time += hrt_d / hrt_cur_freq;
// get elapsed time from safe millisecond timer
static long safe_last = LONG_MAX;
// chosen so that dt and therefore hrt_est_freq will be negative
// on first call => it won't be added to buffer
const long safe_cur = safe_time();
const double dt = (safe_cur - safe_last) / safe_timer_freq;
safe_last = safe_cur;
double hrt_est_freq = hrt_d / dt;
// past couple of calculated hrt freqs, for averaging
typedef RingBuf<double, 8> SampleBuf;
static SampleBuf samples;
// only add to buffer if within 10% of nominal
// (don't want to pollute buffer with flukes / incorrect results)
if(fabs(hrt_est_freq/hrt_nominal_freq - 1.0) < 0.10)
{
samples.push_back(hrt_est_freq);
// average all samples in buffer
double freq_sum = std::accumulate(samples.begin(), samples.end(), 0.0);
hrt_cur_freq = freq_sum / (int)samples.size();
}
else
{
samples.clear();
hrt_cur_freq = hrt_nominal_freq;
}
debug_assert(hrt_cur_freq > 0.0);
}
// calibration thread
// note: winmm event is better than a thread or just checking elapsed time
// in hrt_ticks, because it's called right after GTC is updated;
// otherwise, we may be in the middle of a tick.
// however, we want to avoid dependency on WinMM to shorten startup time.
// hence, start a thread.
static pthread_t thread;
static sem_t exit_flag;
static void* calibration_thread(void* UNUSED(data))
{
debug_set_thread_name("wtime");
for(;;)
{
// calculate absolute timeout for sem_timedwait
struct timespec abs_timeout;
clock_gettime(CLOCK_REALTIME, &abs_timeout);
abs_timeout.tv_nsec += _1e9 / CALIBRATION_FREQ;
// .. handle nanosecond wraparound (must not be > 1000m)
if(abs_timeout.tv_nsec >= _1e9)
{
abs_timeout.tv_nsec -= _1e9;
abs_timeout.tv_sec++;
}
errno = 0;
// if we acquire the semaphore, exit was requested.
if(sem_timedwait(&exit_flag, &abs_timeout) == 0)
break;
// actual error: warn
if(errno != ETIMEDOUT)
debug_warn("wtime calibration_thread: sem_timedwait failed");
lock();
calibrate_lk();
unlock();
}
return 0;
}
static inline LibError init_calibration_thread()
{
sem_init(&exit_flag, 0, 0);
pthread_create(&thread, 0, calibration_thread, 0);
return INFO::OK;
}
static inline LibError shutdown_calibration_thread()
{
sem_post(&exit_flag);
pthread_join(thread, 0);
sem_destroy(&exit_flag);
return INFO::OK;
}
static LibError hrt_init()
{
// no lock needed - calibration thread hasn't yet been created
RETURN_ERR(reset_impl_lk());
return init_calibration_thread();
}
static LibError hrt_shutdown()
{
// don't take a lock here! race condition:
// 1) calibration_thread is about to call clock_gettime
// 2) we take the lock and wait for the thread to exit
// 3) thread's clock_gettime waits on the lock we're holding => deadlock
//
// the calibration thread protects itself anyway, so nothing breaks.
return shutdown_calibration_thread();
}
//-----------------------------------------------------------------------------
// wtime wrapper: emulates POSIX functions
//-----------------------------------------------------------------------------
// NT system time and FILETIME are hectonanoseconds since Jan. 1, 1601 UTC.
// SYSTEMTIME is a struct containing month, year, etc.
//
// FILETIME -> time_t routines; used by wposix filetime_to_time_t wrapper.
//
// hectonanoseconds between Windows and POSIX epoch
static const u64 posix_epoch_hns = 0x019DB1DED53E8000;
// this function avoids the pitfall of casting FILETIME* to u64*,
// which is not safe due to differing alignment guarantees!
// on some platforms, that would result in an exception.
static u64 u64_from_FILETIME(const FILETIME* ft)
{
return u64_from_u32(ft->dwHighDateTime, ft->dwLowDateTime);
}
// convert UTC FILETIME to seconds-since-1970 UTC:
// we just have to subtract POSIX epoch and scale down to units of seconds.
//
// used by wfilesystem.
//
// note: RtlTimeToSecondsSince1970 isn't officially documented,
// so don't use that.
time_t wtime_utc_filetime_to_time_t(FILETIME* ft)
{
u64 hns = u64_from_FILETIME(ft);
u64 s = (hns - posix_epoch_hns) / _1e7;
return (time_t)(s & 0xFFFFFFFF);
}
// return nanoseconds since posix epoch as reported by system time
// only 10 or 15 ms resolution!
static i64 st_time_ns()
{
FILETIME ft;
GetSystemTimeAsFileTime(&ft);
u64 hns = u64_from_FILETIME(&ft);
return (hns - posix_epoch_hns) * 100;
}
// return nanoseconds since posix epoch as reported by HRT.
// we get system time at init and add HRT elapsed time.
static i64 time_ns()
{
// we don't really need to get the HRT start time (it starts at 0,
// and will be slightly higher when we get here; doesn't matter if the
// time returned is a few ms off the real system time). do so anyway,
// because we have to get the starting ST value anyway.
static double hrt_start_time;
static i64 st_start;
if(!st_start)
{
hrt_start_time = hrt_time();
st_start = st_time_ns();
}
const double dt = hrt_time() - hrt_start_time;
const i64 ns = st_start + cpu_i64FromDouble(dt * _1e9);
return ns;
}
static LibError wtime_init()
{
hrt_init();
// first call latches start times
time_ns();
return INFO::OK;
}
static LibError wtime_shutdown()
{
return hrt_shutdown();
}
void wtime_reset_impl()
{
hrt_override_impl(HRT_DEFAULT, HRT_NONE);
}
static void sleep_ns(i64 ns)
{
DWORD ms = DWORD(ns / _1e6);
if(ms != 0)
Sleep(ms);
else
{
i64 t0 = hrt_ticks(), t1;
do
t1 = hrt_ticks();
while(hrt_delta_s(t0, t1) * _1e9 < ns);
}
}
int clock_gettime(clockid_t clock, struct timespec* t)
{
debug_assert(clock == CLOCK_REALTIME);
const i64 ns = time_ns();
t->tv_sec = (time_t)((ns / _1e9) & 0xFFFFFFFF);
t->tv_nsec = (long) (ns % _1e9);
return 0;
}
int clock_getres(clockid_t clock, struct timespec* ts)
{
debug_assert(clock == CLOCK_REALTIME);
HRTImpl impl;
double nominal_freq, res;
hrt_query_impl(impl, nominal_freq, res);
ts->tv_sec = 0;
ts->tv_nsec = (long)(res * 1e9);
return 0;
}
int nanosleep(const struct timespec* rqtp, struct timespec* /* rmtp */)
{
i64 ns = rqtp->tv_sec; // make sure we don't overflow
ns *= _1e9;
ns += rqtp->tv_nsec;
sleep_ns(ns);
return 0;
}
int gettimeofday(struct timeval* tv, void* UNUSED(tzp))
{
const long us = (long)(time_ns() / 1000);
tv->tv_sec = (time_t) (us / _1e6);
tv->tv_usec = (suseconds_t)(us % _1e6);
return 0;
}
uint sleep(uint sec)
{
Sleep(sec * 1000); // don't bother checking for overflow (user's fault)
return sec;
}
int usleep(useconds_t us)
{
debug_assert(us < _1e6);
sleep_ns(us * 1000); // can't overflow due to <us> limit
return 0;
}