0ad/source/lib/sysdep/win/hrt.cpp

486 lines
12 KiB
C++
Raw Normal View History

// Windows-specific high resolution timer
// Copyright (c) 2003 Jan Wassenberg
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// Contact info:
// Jan.Wassenberg@stud.uni-karlsruhe.de
// http://www.stud.uni-karlsruhe.de/~urkt/
#include <cmath>
#include <cassert>
#include <cstdlib>
#include <numeric>
#include "hrt.h"
#include "lib.h"
#include "adts.h"
#include "sysdep/ia32.h"
#include "detect.h"
#include "win_internal.h"
#include <mmsystem.h> // not included by win due to WIN32_LEAN_AND_MEAN
#ifdef _MSC_VER
#pragma comment(lib, "winmm.lib")
#endif
// ticks per second; average of last few values measured in calibrate
static double hrt_freq = -1.0;
// used to start the hrt tick values near 0
static i64 hrt_origin = 0;
static HRTImpl hrt_impl = HRT_NONE;
static HRTOverride overrides[3] = { HRT_DEFAULT, HRT_DEFAULT, HRT_DEFAULT };
// HRTImpl enums as index
static i64 hrt_nominal_freq = -1;
#define lock() win_lock(HRT_CS)
#define unlock() win_unlock(HRT_CS)
// decide upon a HRT implementation, checking if we can work around
// each timer's issues on this platform, but allow user override
// in case there are unforeseen problems with one of them.
// order of preference (due to resolution and speed): TSC, QPC, TGT.
// split out of reset_impl so we can just return when impl is chosen.
static void choose_impl()
{
bool safe;
#define SAFETY_OVERRIDE(impl)\
if(overrides[impl] == HRT_DISABLE)\
safe = false;\
if(overrides[impl] == HRT_FORCE)\
safe = true;
#if defined(_M_IX86) && !defined(NO_TSC)
// CPU Timestamp Counter (incremented every clock)
// ns resolution, moderate precision (poor clock crystal?)
//
// issues:
// - multiprocessor systems: may be inconsistent across CPUs.
// could fix by keeping per-CPU timer state, but we'd need
// GetCurrentProcessorNumber (only available on Win Server 2003).
// spinning off a thread with set CPU affinity is too slow
// (we may have to wait until the next timeslice).
// we could discard really bad values, but that's still inaccurate.
// => unsafe.
// - deep sleep modes: TSC may not be advanced.
// not a problem though, because if the TSC is disabled, the CPU
// isn't doing any other work, either.
// - SpeedStep/'gearshift' CPUs: frequency may change.
// this happens on notebooks now, but eventually desktop systems
// will do this as well (if not to save power, for heat reasons).
// frequency changes are too often and drastic to correct,
// and we don't want to mess with the system power settings.
// => unsafe.
if(cpu_caps & TSC && cpu_freq > 0.0)
{
safe = (cpus == 1 && !cpu_speedstep);
SAFETY_OVERRIDE(HRT_TSC);
if(safe)
{
hrt_impl = HRT_TSC;
hrt_nominal_freq = (i64)cpu_freq;
return;
}
}
#endif // TSC
#if defined(_WIN32) && !defined(NO_QPC)
// Windows QueryPerformanceCounter API
// implementations:
// - PIT on Win2k - 838 ns resolution, slow to read (~3 �s)
// - PMT on WinXP - 279 ns ", moderate overhead (700 ns?)
// issues:
// 1) Q274323: may jump several seconds under heavy PCI bus load.
// not a problem, because the older systems on which this occurs
// have safe TSCs, so that is used instead.
// 2) "System clock problem can inflate benchmark scores":
// incorrect value if not polled every 4.5 seconds? solved
// by calibration thread, which reads timer every second anyway.
// - TSC on MP HAL - see TSC above.
// cache freq because QPF is fairly slow.
static i64 qpc_freq = -1;
// first call - check if QPC is supported
if(qpc_freq == -1)
{
LARGE_INTEGER i;
BOOL qpc_ok = QueryPerformanceFrequency(&i);
qpc_freq = qpc_ok? i.QuadPart : 0;
}
// QPC is available
if(qpc_freq > 0)
{
// PIT and PMT are safe.
if(qpc_freq == 1193182 || qpc_freq == 3579545)
safe = true;
// make sure QPC doesn't use the TSC
// (if it were safe, we would have chosen it above)
else
{
// can't decide yet - assume unsafe
if(cpu_freq == 0.0)
safe = false;
else
{
// compare QPC freq to CPU clock freq - can't rule out HPET,
// because its frequency isn't known (it's at least 10 MHz).
double freq_dist = fabs(cpu_freq / qpc_freq - 1.0);
safe = freq_dist > 0.05;
// safe if freqs not within 5% (i.e. it doesn't use TSC)
}
}
SAFETY_OVERRIDE(HRT_QPC);
if(safe)
{
hrt_impl = HRT_QPC;
hrt_nominal_freq = qpc_freq;
return;
}
}
#endif // QPC
//
// TGT
//
hrt_impl = HRT_TGT;
hrt_nominal_freq = 1000;
return;
assert(0 && "hrt_choose_impl: no safe timer found!");
hrt_impl = HRT_NONE;
hrt_nominal_freq = -1;
return;
}
// return ticks since first call. lock must be held.
//
// split to allow calling from reset_impl_lk without recursive locking.
static i64 ticks_lk()
{
i64 t;
switch(hrt_impl)
{
// TSC
#if defined(_M_IX86) && !defined(NO_TSC)
case HRT_TSC:
t = rdtsc();
break;
#endif
// QPC
#if defined(_WIN32) && !defined(NO_QPC)
case HRT_QPC:
LARGE_INTEGER i;
QueryPerformanceCounter(&i);
t = i.QuadPart;
break;
#endif
// TGT
#ifdef _WIN32
case HRT_TGT:
t = (i64)timeGetTime();
break;
#endif
// add further timers here.
default:
assert(0 && "hrt_ticks: invalid impl");
// fall through
case HRT_NONE:
t = 0;
} // switch(impl)
return t - hrt_origin;
}
// this module is dependent upon detect (supplies system information needed to
// choose a HRT), which in turn uses our timer to detect the CPU clock
// when running on Windows (clock(), the only cross platform HRT available on
// Windows, isn't good enough - only 10..15 ms resolution).
//
// we first use a safe timer, and choose again after client code calls
// hrt_override_impl when system information is available.
// the timer will work without this call, but it won't use certain
// implementations. we do it this way, instead of polling every hrt_ticks,
// because a timer implementation change causes hrt_ticks to jump.
// choose a HRT implementation. lock must be held.
//
// don't want to saddle timer with the problem of initializing us
// on first call - it wouldn't otherwise need to be thread-safe.
static void reset_impl_lk()
{
HRTImpl old_impl = hrt_impl;
double old_time = 0.0;
// if not first time: want to reset tick origin
if(hrt_nominal_freq > 0)
old_time = ticks_lk() / hrt_freq;
// don't call hrt_time to avoid recursive lock.
choose_impl();
// post: hrt_impl != HRT_NONE, hrt_nominal_freq > 0
hrt_freq = (double)hrt_nominal_freq;
// if impl has changed, re-base tick counter.
// want it 0-based, but it must not go backwards WRT previous reading.
if(old_impl != hrt_impl)
hrt_origin = ticks_lk() - (i64)(old_time * hrt_freq);
}
// multiple entry points, can't use ONCE.
static bool initialized;
static void init_calibration_thread();
// call iff !initialized. lock must be held.
static void init_lk()
{
assert(!initialized && "init_lk called more than once!");
reset_impl_lk();
init_calibration_thread();
initialized = true;
}
// return ticks since first call.
i64 hrt_ticks()
{
lock();
// ugly, but it'll fall-through in common case.
if(!initialized)
goto init;
ready:
{ // VC6 goto fix
i64 t = ticks_lk();
unlock();
return t;
}
// reached from first call if init_lk hasn't been called yet. lock is held.
init:
init_lk();
goto ready;
}
// return seconds since first call.
double hrt_time()
{
lock();
// ugly, but it'll fall-through in common case.
if(!initialized)
goto init;
ready:
{ // VC6 goto fix
double t = ticks_lk() / hrt_freq;
unlock();
return t;
}
// reached from first call if init_lk hasn't been called yet. lock is held.
init:
init_lk();
goto ready;
}
// return seconds between start and end timestamps (returned by hrt_ticks).
// negative if end comes before start.
double hrt_delta_s(i64 start, i64 end)
{
// paranoia: reading double may not be atomic.
lock();
double freq = hrt_freq;
unlock();
assert(freq != -1.0 && "hrt_delta_s called before hrt_ticks");
return (end - start) / freq;
}
// return current timer implementation and its nominal (rated) frequency.
// nominal_freq is never 0.
// implementation only changes after hrt_override_impl.
//
// may be called before first hrt_ticks / hrt_time, so do init here also.
void hrt_query_impl(HRTImpl& impl, i64& nominal_freq)
{
lock();
if(!initialized)
init_lk();
impl = hrt_impl;
nominal_freq = hrt_nominal_freq;
unlock();
assert(nominal_freq > 0 && "hrt_query_impl: invalid hrt_nominal_freq");
}
// override our 'safe to use' decision.
// resets (and chooses another, if applicable) implementation;
// the timer may jump after doing so.
// call with HRT_DEFAULT, HRT_NONE to re-evaluate implementation choice
// after system info becomes available.
int hrt_override_impl(HRTOverride ovr, HRTImpl impl)
{
if((ovr != HRT_DISABLE && ovr != HRT_FORCE && ovr != HRT_DEFAULT) ||
(impl != HRT_TSC && impl != HRT_QPC && impl != HRT_TGT && impl != HRT_NONE))
{
assert(0 && "hrt_override: invalid ovr or impl param");
return -1;
}
lock();
overrides[impl] = ovr;
reset_impl_lk();
unlock();
return 0;
}
// 'safe' millisecond timer, used to measure HRT freq
static long ms_time()
{
#ifdef _WIN32
return (long)timeGetTime();
#else
return (long)clock();
#endif
}
static void calibrate()
{
lock();
// past couple of calculated hrt freqs, for averaging
typedef RingBuf<double, 8> SampleBuf;
static SampleBuf samples;
const i64 hrt_cur = ticks_lk();
const long ms_cur = ms_time();
// get elapsed times since last call
static long ms_cal_time;
static i64 hrt_cal_time;
double hrt_ds = (hrt_cur - hrt_cal_time) / hrt_freq;
double ms_ds = (ms_cur - ms_cal_time) / 1e3;
hrt_cal_time = hrt_cur;
ms_cal_time = ms_cur;
//
// when we wake up, we don't know if timer has been updated yet.
// they may be off by 1 tick - try to compensate.
//
double dt = ms_ds; // actual elapsed time since last calibration
double hrt_err = ms_ds - hrt_ds;
double hrt_abs_err = fabs(hrt_err), hrt_rel_err = hrt_abs_err / ms_ds;
double hrt_est_freq = hrt_ds / ms_ds;
// only add to buffer if within 10% of nominal
// (don't want to pollute buffer with flukes / incorrect results)
if(fabs(hrt_est_freq / hrt_nominal_freq - 1.0) < 0.10)
{
samples.push_back(hrt_est_freq);
// average all samples in buffer
double freq_sum = std::accumulate(samples.begin(), samples.end(), 0.0);
hrt_freq = freq_sum / (int)samples.size();
}
else
{
samples.clear();
hrt_freq = (double)hrt_nominal_freq;
}
unlock();
}
#ifdef _WIN32
// setup calibration thread
// note: winmm event is better than a thread or just checking elapsed time
// in hrt_ticks, because it's called right after TGT is updated;
// otherwise, we may be in the middle of a tick.
static UINT mm_event;
// keep calibrate() portable, don't need args anyway
static void CALLBACK trampoline(UINT uTimerID, UINT uMsg, DWORD_PTR dwUser, DWORD_PTR dw1, DWORD_PTR dw2)
{
calibrate();
}
#endif
static void init_calibration_thread()
{
#ifdef _WIN32
// choosing resolution of winmm timer. don't want to increase the
// system clock interrupt rate (=> higher system load),
// so set res to current tick rate.
DWORD adj, incr;
BOOL adj_disabled;
GetSystemTimeAdjustment(&adj, &incr, &adj_disabled);
DWORD res = adj / 10000;
mm_event = timeSetEvent(1000, res, trampoline, 0, TIME_PERIODIC);
atexit2(timeKillEvent, mm_event);
#else
// TODO: port thread. no big deal, and the timer works without.
#endif
}