1
0
forked from 0ad/0ad

fix stupidly incorrect calculation of smoothed frequency and timer resolution (fixes observed dependence of sim rate on framerate, see http://www.wildfiregames.com/forum/index.php?showtopic=1671)

also improved filtering of the frame delta times.
while at it, improved interface of whrt backends to avoid leaking
implementation details.

This was SVN commit r6401.
This commit is contained in:
janwas 2008-09-19 17:56:05 +00:00
parent 33ca85cb4c
commit c3a42633fa
15 changed files with 490 additions and 575 deletions

View File

@ -1,6 +1,8 @@
#include "precompiled.h"
#include "frequency_filter.h"
static const double errorTolerance = 0.05f;
static const double sensitivity = 0.10;
/**
* variable-width window for frequency determination
@ -29,7 +31,7 @@ public:
if(deltaTime <= m_minDeltaTime)
return false;
frequency = (1.0 / deltaTime) / m_numEvents;
frequency = m_numEvents / deltaTime;
m_numEvents = 0;
m_lastTime = time;
return true; // success
@ -48,8 +50,8 @@ private:
class IirFilter
{
public:
IirFilter(double initialValue)
: m_prev(initialValue)
IirFilter(double sensitivity, double initialValue)
: m_sensitivity(sensitivity), m_prev(initialValue)
{
}
@ -57,8 +59,7 @@ public:
double operator()(double x, int bias)
{
// sensitivity to changes ([0,1]).
// approximately equal to a 16 sample average.
const double gain = pow(0.08, ComputeExponent(bias));
const double gain = pow(m_sensitivity, ComputeExponent(bias));
return m_prev = x*gain + m_prev*(1.0-gain);
}
@ -73,6 +74,7 @@ private:
return -bias; // power-of-n
}
double m_sensitivity;
double m_prev;
};
@ -94,31 +96,32 @@ public:
// bias := exponential change to gain, (-inf, inf)
int ComputeBias(double smoothedValue, double value)
{
if(!WasOnSameSide(value)) // (must be done before updating history)
m_timesOnSameSide = 0; // see below
if(WasOnSameSide(value)) // (must be checked before updating history)
m_timesOnSameSide++;
else
m_timesOnSameSide = 0;
// update history
std::copy(m_history, m_history+m_historySize, m_history+1);
m_history[m_historySize-1] = value;
// suppress large jumps.
if(Change(m_history[m_historySize-1], value) > 0.30)
return -4; // gain -> 0
// dampen jitter
if(Change(smoothedValue, value) < 0.04)
return -1;
// dampen spikes/bounces.
if(WasSpike())
return -1;
return -2;
if(Change(smoothedValue, value) > 0.02) // ignore minor jitter
{
m_timesOnSameSide++;
// if the past few samples have been consistently above/below
// average, the function is changing and we need to catch up.
// (similar to I in a PID)
if(m_timesOnSameSide >= 3)
return std::min(m_timesOnSameSide, 4);
// if the past few samples have been consistently above/below
// average, the function is changing and we need to catch up.
// (similar to I in a PID)
if(m_timesOnSameSide >= 3)
return std::min(m_timesOnSameSide, 4);
}
// suppress large jumps.
if(Change(m_history[m_historySize-1], value) > 0.30)
return -4; // gain -> 0
return 0;
}
@ -164,7 +167,7 @@ class FrequencyFilter : public IFrequencyFilter
{
public:
FrequencyFilter(double resolution, double expectedFrequency)
: m_controller(expectedFrequency), m_frequencyEstimator(resolution), m_iirFilter(expectedFrequency)
: m_controller(expectedFrequency), m_frequencyEstimator(resolution), m_iirFilter(sensitivity, expectedFrequency)
, m_stableFrequency(expectedFrequency), m_smoothedFrequency(expectedFrequency)
{
}
@ -182,7 +185,7 @@ public:
// previous stable FPS value. round up because values are more often
// too low than too high.
const double difference = fabs(m_smoothedFrequency - m_stableFrequency);
if(difference > fminf(5.f, 0.05f*m_stableFrequency))
if(difference > errorTolerance*m_stableFrequency)
m_stableFrequency = (int)(m_smoothedFrequency + 0.99);
}

View File

@ -44,20 +44,15 @@ static ICounter* ConstructCounterAt(size_t id, void* address, size_t size)
switch(id)
{
case 0:
debug_assert(sizeof(CounterHPET) <= size);
return new(address) CounterHPET();
return CreateCounterHPET(address, size);
case 1:
debug_assert(sizeof(CounterTSC) <= size);
return new(address) CounterTSC();
return CreateCounterTSC(address, size);
case 2:
debug_assert(sizeof(CounterQPC) <= size);
return new(address) CounterQPC();
return CreateCounterQPC(address, size);
case 3:
debug_assert(sizeof(CounterPMT) <= size);
return new(address) CounterPMT();
return CreateCounterPMT(address, size);
case 4:
debug_assert(sizeof(CounterTGT) <= size);
return new(address) CounterTGT();
return CreateCounterTGT(address, size);
default:
return 0;
}

View File

@ -13,7 +13,7 @@
// derived implementations must be called CounterIMPL,
// where IMPL matches the WHRT_IMPL identifier. (see CREATE)
class ICounter : noncopyable
class ICounter
{
public:
// (compiled-generated) ctor only sets up the vptr
@ -51,13 +51,10 @@ public:
virtual double NominalFrequency() const = 0;
/**
* actual resolution [s]
* (override if the timer adjustment is greater than 1 tick).
* actual resolution [s]. differs from 1/NominalFrequency if the
* timer adjustment is greater than 1 tick.
**/
virtual double Resolution() const
{
return 1.0 / NominalFrequency();
}
virtual double Resolution() const = 0;
};

View File

@ -11,6 +11,8 @@
#include "precompiled.h"
#include "hpet.h"
#include "counter.h"
#include "lib/sysdep/os/win/win.h"
#include "lib/sysdep/os/win/mahaf.h"
#include "lib/sysdep/acpi.h"
@ -28,7 +30,7 @@ struct HpetDescriptionTable
u8 attributes;
};
struct CounterHPET::HpetRegisters
struct HpetRegisters
{
u64 capabilities;
u64 reserved1;
@ -51,78 +53,98 @@ static const u64 CONFIG_ENABLE = Bit<u64>(0);
//-----------------------------------------------------------------------------
LibError CounterHPET::Activate()
class CounterHPET : public ICounter
{
if(mahaf_IsPhysicalMappingDangerous())
return ERR::FAIL; // NOWARN (happens on Win2k)
if(!mahaf_Init())
return ERR::FAIL; // NOWARN (no Administrator privileges)
if(!acpi_Init())
WARN_RETURN(ERR::FAIL); // shouldn't fail, since we've checked mahaf_IsPhysicalMappingDangerous
const HpetDescriptionTable* hpet = (const HpetDescriptionTable*)acpi_GetTable("HPET");
if(!hpet)
return ERR::NO_SYS; // NOWARN (HPET not reported by BIOS)
debug_assert(hpet->baseAddress.addressSpaceId == ACPI_AS_MEMORY);
m_hpetRegisters = (volatile HpetRegisters*)mahaf_MapPhysicalMemory(hpet->baseAddress.address, sizeof(HpetRegisters));
if(!m_hpetRegisters)
WARN_RETURN(ERR::NO_MEM);
// start the counter (if not already running)
// note: do not reset value to 0 to avoid interfering with any
// other users of the timer (e.g. Vista QPC)
m_hpetRegisters->config |= CONFIG_ENABLE;
return INFO::OK;
}
void CounterHPET::Shutdown()
{
if(m_hpetRegisters)
public:
CounterHPET()
: m_hpetRegisters(0)
{
mahaf_UnmapPhysicalMemory((void*)m_hpetRegisters);
m_hpetRegisters = 0;
}
acpi_Shutdown();
mahaf_Shutdown();
}
virtual const char* Name() const
{
return "HPET";
}
bool CounterHPET::IsSafe() const
{
// the HPET having been created to address other timers' problems,
// it has no issues of its own.
return true;
}
LibError Activate()
{
if(mahaf_IsPhysicalMappingDangerous())
return ERR::FAIL; // NOWARN (happens on Win2k)
if(!mahaf_Init())
return ERR::FAIL; // NOWARN (no Administrator privileges)
if(!acpi_Init())
WARN_RETURN(ERR::FAIL); // shouldn't fail, since we've checked mahaf_IsPhysicalMappingDangerous
const HpetDescriptionTable* hpet = (const HpetDescriptionTable*)acpi_GetTable("HPET");
if(!hpet)
return ERR::NO_SYS; // NOWARN (HPET not reported by BIOS)
debug_assert(hpet->baseAddress.addressSpaceId == ACPI_AS_MEMORY);
m_hpetRegisters = (volatile HpetRegisters*)mahaf_MapPhysicalMemory(hpet->baseAddress.address, sizeof(HpetRegisters));
if(!m_hpetRegisters)
WARN_RETURN(ERR::NO_MEM);
u64 CounterHPET::Counter() const
{
// note: we assume the data bus can do atomic 64-bit transfers,
// which has been the case since the original Pentium.
// (note: see implementation of GetTickCount for an algorithm to
// cope with non-atomic reads)
return m_hpetRegisters->counterValue;
}
// start the counter (if not already running)
// note: do not reset value to 0 to avoid interfering with any
// other users of the timer (e.g. Vista QPC)
m_hpetRegisters->config |= CONFIG_ENABLE;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
size_t CounterHPET::CounterBits() const
{
const u64 caps = m_hpetRegisters->capabilities;
const size_t counterBits = (caps & CAP_SIZE64)? 64 : 32;
return counterBits;
}
return INFO::OK;
}
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
**/
double CounterHPET::NominalFrequency() const
void Shutdown()
{
if(m_hpetRegisters)
{
mahaf_UnmapPhysicalMemory((void*)m_hpetRegisters);
m_hpetRegisters = 0;
}
acpi_Shutdown();
mahaf_Shutdown();
}
bool IsSafe() const
{
// the HPET having been created to address other timers' problems,
// it has no issues of its own.
return true;
}
u64 Counter() const
{
// note: we assume the data bus can do atomic 64-bit transfers,
// which has been the case since the original Pentium.
// (note: see implementation of GetTickCount for an algorithm to
// cope with non-atomic reads)
return m_hpetRegisters->counterValue;
}
size_t CounterBits() const
{
const u64 caps = m_hpetRegisters->capabilities;
const size_t counterBits = (caps & CAP_SIZE64)? 64 : 32;
return counterBits;
}
double NominalFrequency() const
{
const u64 caps = m_hpetRegisters->capabilities;
const u32 timerPeriod_fs = (u32)bits(caps, 32, 63);
debug_assert(timerPeriod_fs != 0); // guaranteed by HPET spec
const double frequency = 1e15 / timerPeriod_fs;
return frequency;
}
double Resolution() const
{
return 1.0 / NominalFrequency();
}
private:
volatile HpetRegisters* m_hpetRegisters;
};
ICounter* CreateCounterHPET(void* address, size_t size)
{
const u64 caps = m_hpetRegisters->capabilities;
const u32 timerPeriod_fs = (u32)bits(caps, 32, 63);
debug_assert(timerPeriod_fs != 0); // guaranteed by HPET spec
const double frequency = 1e15 / timerPeriod_fs;
return frequency;
debug_assert(sizeof(CounterHPET) <= size);
return new(address) CounterHPET();
}

View File

@ -11,44 +11,7 @@
#ifndef INCLUDED_HPET
#define INCLUDED_HPET
#include "counter.h"
class CounterHPET : public ICounter
{
public:
CounterHPET()
: m_hpetRegisters(0)
{
}
virtual const char* Name() const
{
return "HPET";
}
virtual LibError Activate();
virtual void Shutdown();
virtual bool IsSafe() const;
virtual u64 Counter() const;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual size_t CounterBits() const;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const;
private:
struct HpetRegisters;
volatile HpetRegisters* m_hpetRegisters;
};
class ICounter;
extern ICounter* CreateCounterHPET(void* address, size_t size);
#endif // #ifndef INCLUDED_HPET

View File

@ -11,6 +11,8 @@
#include "precompiled.h"
#include "pmt.h"
#include "counter.h"
#include "lib/sysdep/os/win/win.h"
#include "lib/sysdep/acpi.h"
#include "lib/sysdep/os/win/mahaf.h"
@ -31,62 +33,83 @@ static const u32 TMR_VAL_EXT = Bit<u32>(8);
//-----------------------------------------------------------------------------
LibError CounterPMT::Activate()
{
// mahaf is needed for port I/O.
if(!mahaf_Init())
return ERR::FAIL; // NOWARN (no Administrator privileges)
if(!acpi_Init())
return ERR::FAIL; // NOWARN (happens on Win2k; see mahaf_IsPhysicalMappingDangerous)
// (note: it's called FADT, but the signature is "FACP")
const FADT* fadt = (const FADT*)acpi_GetTable("FACP");
if(!fadt)
WARN_RETURN(ERR::NO_SYS);
m_portAddress = u16_from_larger(fadt->pmTimerPortAddress);
return INFO::OK;
}
void CounterPMT::Shutdown()
class CounterPMT : public ICounter
{
acpi_Shutdown();
mahaf_Shutdown();
}
public:
CounterPMT()
: m_portAddress(0xFFFF)
{
}
bool CounterPMT::IsSafe() const
{
// the PMT has one issue: "Performance counter value may unexpectedly
// leap forward" (Q274323). This happens on some buggy Pentium-era
// systems under heavy PCI bus load. We are clever and observe that
// the TSC implementation would be used on such systems (because it
// has higher precedence and is safe on P5 CPUs), so the PMT is fine
// in general.
return true;
}
virtual const char* Name() const
{
return "PMT";
}
u64 CounterPMT::Counter() const
{
return mahaf_ReadPort32(m_portAddress);
}
LibError Activate()
{
// mahaf is needed for port I/O.
if(!mahaf_Init())
return ERR::FAIL; // NOWARN (no Administrator privileges)
if(!acpi_Init())
return ERR::FAIL; // NOWARN (happens on Win2k; see mahaf_IsPhysicalMappingDangerous)
// (note: it's called FADT, but the signature is "FACP")
const FADT* fadt = (const FADT*)acpi_GetTable("FACP");
if(!fadt)
WARN_RETURN(ERR::NO_SYS);
m_portAddress = u16_from_larger(fadt->pmTimerPortAddress);
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
size_t CounterPMT::CounterBits() const
{
// (see previous acpi_GetTable call)
const FADT* fadt = (const FADT*)acpi_GetTable("FACP");
debug_assert(fadt); // Activate made sure FADT is available
const size_t counterBits = (fadt->flags & TMR_VAL_EXT)? 32 : 24;
return counterBits;
}
return INFO::OK;
}
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
**/
double CounterPMT::NominalFrequency() const
void Shutdown()
{
acpi_Shutdown();
mahaf_Shutdown();
}
bool IsSafe() const
{
// the PMT has one issue: "Performance counter value may unexpectedly
// leap forward" (Q274323). This happens on some buggy Pentium-era
// systems under heavy PCI bus load. We are clever and observe that
// the TSC implementation would be used on such systems (because it
// has higher precedence and is safe on P5 CPUs), so the PMT is fine
// in general.
return true;
}
u64 Counter() const
{
return mahaf_ReadPort32(m_portAddress);
}
size_t CounterBits() const
{
// (see previous acpi_GetTable call)
const FADT* fadt = (const FADT*)acpi_GetTable("FACP");
debug_assert(fadt); // Activate made sure FADT is available
const size_t counterBits = (fadt->flags & TMR_VAL_EXT)? 32 : 24;
return counterBits;
}
double NominalFrequency() const
{
return (double)PMT_FREQ;
}
double Resolution() const
{
return 1.0 / PMT_FREQ;
}
private:
u16 m_portAddress;
};
ICounter* CreateCounterPMT(void* address, size_t size)
{
return (double)PMT_FREQ;
debug_assert(sizeof(CounterPMT) <= size);
return new(address) CounterPMT();
}

View File

@ -11,44 +11,9 @@
#ifndef INCLUDED_PMT
#define INCLUDED_PMT
#include "counter.h"
static const i64 PMT_FREQ = 3579545; // (= master oscillator frequency/4)
class CounterPMT : public ICounter
{
public:
CounterPMT()
: m_portAddress(0xFFFF)
{
}
virtual const char* Name() const
{
return "PMT";
}
virtual LibError Activate();
virtual void Shutdown();
virtual bool IsSafe() const;
virtual u64 Counter() const;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual size_t CounterBits() const;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const;
private:
u16 m_portAddress;
};
class ICounter;
extern ICounter* CreateCounterPMT(void* address, size_t size);
#endif // #ifndef INCLUDED_PMT

View File

@ -11,6 +11,8 @@
#include "precompiled.h"
#include "qpc.h"
#include "counter.h"
#include "lib/sysdep/os_cpu.h"
#include "lib/sysdep/os/win/win.h"
#include "lib/sysdep/os/win/wutil.h" // wutil_argv
@ -18,105 +20,121 @@
#include "pmt.h" // PMT_FREQ
CounterQPC::CounterQPC()
: m_frequency(-1)
class CounterQPC : public ICounter
{
}
LibError CounterQPC::Activate()
{
// note: QPC is observed to be universally supported, but the API
// provides for failure, so play it safe.
LARGE_INTEGER qpcFreq, qpcValue;
const BOOL ok1 = QueryPerformanceFrequency(&qpcFreq);
const BOOL ok2 = QueryPerformanceCounter(&qpcValue);
WARN_RETURN_IF_FALSE(ok1 && ok2);
if(!qpcFreq.QuadPart || !qpcValue.QuadPart)
WARN_RETURN(ERR::FAIL);
m_frequency = (i64)qpcFreq.QuadPart;
return INFO::OK;
}
void CounterQPC::Shutdown()
{
}
bool CounterQPC::IsSafe() const
{
// note: we have separate modules that directly access some of the
// counters potentially used by QPC. disabling the redundant counters
// would be ugly (increased coupling). instead, we'll make sure our
// implementations could (if necessary) coexist with QPC, but it
// shouldn't come to that since only one counter is needed/used.
// the PIT is entirely safe (even if annoyingly slow to read)
if(m_frequency == PIT_FREQ)
return true;
// the PMT is generally safe (see discussion in CounterPmt::IsSafe),
// but older QPC implementations had problems with 24-bit rollover.
// "System clock problem can inflate benchmark scores"
// (http://www.lionbridge.com/bi/cont2000/200012/perfcnt.asp ; no longer
// online, nor findable in Google Cache / archive.org) tells of
// incorrect values every 4.6 seconds (i.e. 24 bits @ 3.57 MHz) unless
// the timer is polled in the meantime. fortunately, this is guaranteed
// by our periodic updates (which come at least that often).
if(m_frequency == PMT_FREQ)
return true;
// the TSC has been known to be buggy (even mentioned in MSDN). it is
// used on MP HAL systems and can be detected by comparing QPF with the
// CPU clock. we consider it unsafe unless the user promises (via
// command line) that it's patched and thus reliable on their system.
bool usesTsc = IsSimilarMagnitude(m_frequency, os_cpu_ClockFrequency());
// unconfirmed reports indicate QPC sometimes uses 1/3 of the
// CPU clock frequency, so check that as well.
usesTsc |= IsSimilarMagnitude(m_frequency, os_cpu_ClockFrequency()/3);
if(usesTsc)
public:
CounterQPC()
: m_frequency(-1)
{
const bool isTscSafe = wutil_HasCommandLineArgument("-wQpcTscSafe");
return isTscSafe;
}
// the HPET is reliable and used on Vista. it can't easily be recognized
// since its frequency is variable (the spec says > 10 MHz; the master
// 14.318 MHz oscillator is often used). considering frequencies in
// [10, 100 MHz) to be a HPET would be dangerous because it may actually
// be faster or RDTSC slower. we have to exclude all other cases and
// assume it's a HPET - and thus safe - if we get here.
return true;
}
virtual const char* Name() const
{
return "QPC";
}
u64 CounterQPC::Counter() const
{
// fairly time-critical here, don't check the return value
// (IsSupported made sure it succeeded initially)
LARGE_INTEGER qpc_value;
(void)QueryPerformanceCounter(&qpc_value);
return qpc_value.QuadPart;
}
LibError Activate()
{
// note: QPC is observed to be universally supported, but the API
// provides for failure, so play it safe.
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
size_t CounterQPC::CounterBits() const
{
// there are reports of incorrect rollover handling in the PMT
// implementation of QPC (see CounterPMT::IsSafe). however, other
// counters would be used on those systems, so it's irrelevant.
// we'll report the full 64 bits.
return 64;
}
LARGE_INTEGER qpcFreq, qpcValue;
const BOOL ok1 = QueryPerformanceFrequency(&qpcFreq);
const BOOL ok2 = QueryPerformanceCounter(&qpcValue);
WARN_RETURN_IF_FALSE(ok1 && ok2);
if(!qpcFreq.QuadPart || !qpcValue.QuadPart)
WARN_RETURN(ERR::FAIL);
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
**/
double CounterQPC::NominalFrequency() const
m_frequency = (i64)qpcFreq.QuadPart;
return INFO::OK;
}
void Shutdown()
{
}
bool IsSafe() const
{
// note: we have separate modules that directly access some of the
// counters potentially used by QPC. disabling the redundant counters
// would be ugly (increased coupling). instead, we'll make sure our
// implementations could (if necessary) coexist with QPC, but it
// shouldn't come to that since only one counter is needed/used.
// the PIT is entirely safe (even if annoyingly slow to read)
if(m_frequency == PIT_FREQ)
return true;
// the PMT is generally safe (see discussion in CounterPmt::IsSafe),
// but older QPC implementations had problems with 24-bit rollover.
// "System clock problem can inflate benchmark scores"
// (http://www.lionbridge.com/bi/cont2000/200012/perfcnt.asp ; no longer
// online, nor findable in Google Cache / archive.org) tells of
// incorrect values every 4.6 seconds (i.e. 24 bits @ 3.57 MHz) unless
// the timer is polled in the meantime. fortunately, this is guaranteed
// by our periodic updates (which come at least that often).
if(m_frequency == PMT_FREQ)
return true;
// the TSC has been known to be buggy (even mentioned in MSDN). it is
// used on MP HAL systems and can be detected by comparing QPF with the
// CPU clock. we consider it unsafe unless the user promises (via
// command line) that it's patched and thus reliable on their system.
bool usesTsc = IsSimilarMagnitude(m_frequency, os_cpu_ClockFrequency());
// unconfirmed reports indicate QPC sometimes uses 1/3 of the
// CPU clock frequency, so check that as well.
usesTsc |= IsSimilarMagnitude(m_frequency, os_cpu_ClockFrequency()/3);
if(usesTsc)
{
const bool isTscSafe = wutil_HasCommandLineArgument("-wQpcTscSafe");
return isTscSafe;
}
// the HPET is reliable and used on Vista. it can't easily be recognized
// since its frequency is variable (the spec says > 10 MHz; the master
// 14.318 MHz oscillator is often used). considering frequencies in
// [10, 100 MHz) to be a HPET would be dangerous because it may actually
// be faster or RDTSC slower. we have to exclude all other cases and
// assume it's a HPET - and thus safe - if we get here.
return true;
}
u64 Counter() const
{
// fairly time-critical here, don't check the return value
// (IsSupported made sure it succeeded initially)
LARGE_INTEGER qpc_value;
(void)QueryPerformanceCounter(&qpc_value);
return qpc_value.QuadPart;
}
size_t CounterBits() const
{
// there are reports of incorrect rollover handling in the PMT
// implementation of QPC (see CounterPMT::IsSafe). however, other
// counters would be used on those systems, so it's irrelevant.
// we'll report the full 64 bits.
return 64;
}
double NominalFrequency() const
{
return (double)m_frequency;
}
double Resolution() const
{
return 1.0 / m_frequency;
}
private:
// used in several places and QPF is a bit slow+cumbersome.
// (i64 allows easier conversion to double)
i64 m_frequency;
};
ICounter* CreateCounterQPC(void* address, size_t size)
{
return (double)m_frequency;
debug_assert(sizeof(CounterQPC) <= size);
return new(address) CounterQPC();
}

View File

@ -11,41 +11,7 @@
#ifndef INCLUDED_QPC
#define INCLUDED_QPC
#include "counter.h"
class CounterQPC : public ICounter
{
public:
CounterQPC();
virtual const char* Name() const
{
return "QPC";
}
virtual LibError Activate();
virtual void Shutdown();
virtual bool IsSafe() const;
virtual u64 Counter() const;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual size_t CounterBits() const;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const;
private:
// used in several places and QPF is a bit slow+cumbersome.
// (i64 allows easier conversion to double)
i64 m_frequency;
};
class ICounter;
extern ICounter* CreateCounterQPC(void* address, size_t size);
#endif // #ifndef INCLUDED_QPC

View File

@ -15,6 +15,8 @@
#include "precompiled.h"
#include "tgt.h"
#include "counter.h"
#include "lib/sysdep/os/win/win.h"
#include <mmsystem.h>
@ -28,58 +30,61 @@
// causes significant slowdown.
static const UINT PERIOD_MS = 2;
LibError CounterTGT::Activate()
class CounterTGT : public ICounter
{
// note: timeGetTime is always available and cannot fail.
public:
virtual const char* Name() const
{
return "TGT";
}
MMRESULT ret = timeBeginPeriod(PERIOD_MS);
debug_assert(ret == TIMERR_NOERROR);
LibError Activate()
{
// note: timeGetTime is always available and cannot fail.
return INFO::OK;
}
MMRESULT ret = timeBeginPeriod(PERIOD_MS);
debug_assert(ret == TIMERR_NOERROR);
void CounterTGT::Shutdown()
return INFO::OK;
}
void Shutdown()
{
timeEndPeriod(PERIOD_MS);
}
bool IsSafe() const
{
// the only point of criticism is the possibility of falling behind
// due to lost interrupts. this can happen to any interrupt-based timer
// and some systems may lack a counter-based timer, so consider TGT
// 'safe'. note that it is still only chosen when all other timers fail.
return true;
}
u64 Counter() const
{
return timeGetTime();
}
size_t CounterBits() const
{
return 32;
}
double NominalFrequency() const
{
return 1000.0;
}
double Resolution() const
{
return PERIOD_MS*1e-3;
}
};
ICounter* CreateCounterTGT(void* address, size_t size)
{
timeEndPeriod(PERIOD_MS);
}
bool CounterTGT::IsSafe() const
{
// the only point of criticism is the possibility of falling behind
// due to lost interrupts. this can happen to any interrupt-based timer
// and some systems may lack a counter-based timer, so consider TGT
// 'safe'. note that it is still only chosen when all other timers fail.
return true;
}
u64 CounterTGT::Counter() const
{
return timeGetTime();
}
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
size_t CounterTGT::CounterBits() const
{
return 32;
}
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
**/
double CounterTGT::NominalFrequency() const
{
return 1000.0;
}
/**
* actual resolution [s]
**/
double CounterTGT::Resolution() const
{
return PERIOD_MS*1e-3;
debug_assert(sizeof(CounterTGT) <= size);
return new(address) CounterTGT();
}

View File

@ -11,39 +11,7 @@
#ifndef INCLUDED_TGT
#define INCLUDED_TGT
#include "counter.h"
class CounterTGT : public ICounter
{
public:
virtual const char* Name() const
{
return "TGT";
}
virtual LibError Activate();
virtual void Shutdown();
virtual bool IsSafe() const;
virtual u64 Counter() const;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual size_t CounterBits() const;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const;
/**
* actual resolution [s]
**/
virtual double Resolution() const;
};
class ICounter;
extern ICounter* CreateCounterTGT(void* address, size_t size);
#endif // #ifndef INCLUDED_TGT

View File

@ -11,6 +11,8 @@
#include "precompiled.h"
#include "tsc.h"
#include "counter.h"
#include "lib/bits.h"
#include "lib/sysdep/os_cpu.h"
#include "lib/sysdep/os/win/win.h"
@ -59,116 +61,128 @@ static bool IsThrottlingPossible()
//-----------------------------------------------------------------------------
LibError CounterTSC::Activate()
class CounterTSC : public ICounter
{
public:
virtual const char* Name() const
{
return "TSC";
}
LibError Activate()
{
#if ARCH_IA32 || ARCH_AMD64
if(!x86_x64_cap(X86_X64_CAP_TSC))
return ERR::NO_SYS; // NOWARN (CPU doesn't support RDTSC)
if(!x86_x64_cap(X86_X64_CAP_TSC))
return ERR::NO_SYS; // NOWARN (CPU doesn't support RDTSC)
#endif
return INFO::OK;
}
void CounterTSC::Shutdown()
{
}
bool CounterTSC::IsSafe() const
{
// use of the TSC for timing is subject to a litany of potential problems:
// - separate, unsynchronized counters with offset and drift;
// - frequency changes (P-state transitions and STPCLK throttling);
// - failure to increment in C3 and C4 deep-sleep states.
// we will discuss the specifics below.
// SMP or multi-core => counters are unsynchronized. this could be
// solved by maintaining separate per-core counter states, but that
// requires atomic reads of the TSC and the current processor number.
//
// (otherwise, we have a subtle race condition: if preempted while
// reading the time and rescheduled on a different core, incorrect
// results may be returned, which would be unacceptable.)
//
// unfortunately this isn't possible without OS support or the
// as yet unavailable RDTSCP instruction => unsafe.
//
// (note: if the TSC is invariant, drift is no longer a concern.
// we could synchronize the TSC MSRs during initialization and avoid
// per-core counter state and the abovementioned race condition.
// however, we won't bother, since such platforms aren't yet widespread
// and would surely support the nice and safe HPET, anyway)
{
WinScopedLock lock(WHRT_CS);
const CpuTopology* topology = cpu_topology_Detect();
if(cpu_topology_NumPackages(topology) != 1 || cpu_topology_CoresPerPackage(topology) != 1)
return false;
return INFO::OK;
}
#if ARCH_IA32 || ARCH_AMD64
// recent CPU:
if(x86_x64_Generation() >= 7)
void Shutdown()
{
// note: 8th generation CPUs support C1-clock ramping, which causes
// drift on multi-core systems, but those were excluded above.
}
x86_x64_CpuidRegs regs;
regs.eax = 0x80000007;
if(x86_x64_cpuid(&regs))
bool IsSafe() const
{
// use of the TSC for timing is subject to a litany of potential problems:
// - separate, unsynchronized counters with offset and drift;
// - frequency changes (P-state transitions and STPCLK throttling);
// - failure to increment in C3 and C4 deep-sleep states.
// we will discuss the specifics below.
// SMP or multi-core => counters are unsynchronized. this could be
// solved by maintaining separate per-core counter states, but that
// requires atomic reads of the TSC and the current processor number.
//
// (otherwise, we have a subtle race condition: if preempted while
// reading the time and rescheduled on a different core, incorrect
// results may be returned, which would be unacceptable.)
//
// unfortunately this isn't possible without OS support or the
// as yet unavailable RDTSCP instruction => unsafe.
//
// (note: if the TSC is invariant, drift is no longer a concern.
// we could synchronize the TSC MSRs during initialization and avoid
// per-core counter state and the abovementioned race condition.
// however, we won't bother, since such platforms aren't yet widespread
// and would surely support the nice and safe HPET, anyway)
{
// TSC is invariant WRT P-state, C-state and STPCLK => safe.
if(regs.edx & PN_INVARIANT_TSC)
return true;
WinScopedLock lock(WHRT_CS);
const CpuTopology* topology = cpu_topology_Detect();
if(cpu_topology_NumPackages(topology) != 1 || cpu_topology_CoresPerPackage(topology) != 1)
return false;
}
// in addition to P-state transitions, we're also subject to
// STPCLK throttling. this happens when the chipset thinks the
// system is dangerously overheated; the OS isn't even notified.
// it may be rare, but could cause incorrect results => unsafe.
return false;
#if ARCH_IA32 || ARCH_AMD64
// recent CPU:
if(x86_x64_Generation() >= 7)
{
// note: 8th generation CPUs support C1-clock ramping, which causes
// drift on multi-core systems, but those were excluded above.
// newer systems also support the C3 Deep Sleep state, in which
// the TSC isn't incremented. that's not nice, but irrelevant
// since STPCLK dooms the TSC on those systems anyway.
}
x86_x64_CpuidRegs regs;
regs.eax = 0x80000007;
if(x86_x64_cpuid(&regs))
{
// TSC is invariant WRT P-state, C-state and STPCLK => safe.
if(regs.edx & PN_INVARIANT_TSC)
return true;
}
// in addition to P-state transitions, we're also subject to
// STPCLK throttling. this happens when the chipset thinks the
// system is dangerously overheated; the OS isn't even notified.
// it may be rare, but could cause incorrect results => unsafe.
return false;
// newer systems also support the C3 Deep Sleep state, in which
// the TSC isn't incremented. that's not nice, but irrelevant
// since STPCLK dooms the TSC on those systems anyway.
}
#endif
// we're dealing with a single older CPU; the only problem there is
// throttling, i.e. changes to the TSC frequency. we don't want to
// disable this because it may be important for cooling. the OS
// initiates changes but doesn't notify us; jumps are too frequent
// and drastic to detect and account for => unsafe.
if(IsThrottlingPossible())
return false;
// we're dealing with a single older CPU; the only problem there is
// throttling, i.e. changes to the TSC frequency. we don't want to
// disable this because it may be important for cooling. the OS
// initiates changes but doesn't notify us; jumps are too frequent
// and drastic to detect and account for => unsafe.
if(IsThrottlingPossible())
return false;
return true;
}
return true;
}
u64 CounterTSC::Counter() const
u64 Counter() const
{
return x86_x64_rdtsc();
}
size_t CounterBits() const
{
return 64;
}
double NominalFrequency() const
{
// WARNING: do not call x86_x64_ClockFrequency because it uses the
// HRT, which we're currently in the process of initializing.
// instead query CPU clock frequency via OS.
//
// note: even here, initial accuracy isn't critical because the
// clock is subject to thermal drift and would require continual
// recalibration anyway.
return os_cpu_ClockFrequency();
}
double Resolution() const
{
return 1.0 / NominalFrequency();
}
};
ICounter* CreateCounterTSC(void* address, size_t size)
{
return x86_x64_rdtsc();
}
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
size_t CounterTSC::CounterBits() const
{
return 64;
}
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
**/
double CounterTSC::NominalFrequency() const
{
// WARNING: do not call x86_x64_ClockFrequency because it uses the
// HRT, which we're currently in the process of initializing.
// instead query CPU clock frequency via OS.
//
// note: even here, initial accuracy isn't critical because the
// clock is subject to thermal drift and would require continual
// recalibration anyway.
return os_cpu_ClockFrequency();
debug_assert(sizeof(CounterTSC) <= size);
return new(address) CounterTSC();
}

View File

@ -11,34 +11,7 @@
#ifndef INCLUDED_TSC
#define INCLUDED_TSC
#include "counter.h"
class CounterTSC : public ICounter
{
public:
virtual const char* Name() const
{
return "TSC";
}
virtual LibError Activate();
virtual void Shutdown();
virtual bool IsSafe() const;
virtual u64 Counter() const;
/**
* WHRT uses this to ensure the counter (running at nominal frequency)
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
**/
virtual size_t CounterBits() const;
/**
* initial measurement of the tick rate. not necessarily correct
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
**/
virtual double NominalFrequency() const;
};
class ICounter;
extern ICounter* CreateCounterTSC(void* address, size_t size);
#endif // #ifndef INCLUDED_TSC

View File

@ -96,7 +96,7 @@ static void InitCounter()
nominalFrequency = counter->NominalFrequency();
resolution = counter->Resolution();
counterBits = counter->CounterBits();
debug_printf("HRT: counter=%s freq=%f res=%f bits=%d\n", counter->Name(), nominalFrequency, resolution, counterBits);
debug_printf("HRT: counter=%s freq=%g res=%g bits=%d\n", counter->Name(), nominalFrequency, resolution, counterBits);
// sanity checks
debug_assert(nominalFrequency >= 500.0-DBL_EPSILON);
@ -127,6 +127,7 @@ static inline u64 CounterDelta(u64 oldCounter, u64 newCounter)
double whrt_Resolution()
{
debug_assert(resolution != 0.0);
return resolution;
}

View File

@ -58,7 +58,9 @@ static struct timeval start;
void timer_LatchStartTime()
{
#if HAVE_CLOCK_GETTIME
#if OS_WIN
// whrt_Time starts at zero, nothing needs to be done.
#elif HAVE_CLOCK_GETTIME
(void)clock_gettime(CLOCK_REALTIME, &start);
#elif HAVE_GETTIMEOFDAY
gettimeofday(&start, 0);
@ -103,17 +105,17 @@ double timer_Resolution()
double res = 0.0;
#if HAVE_CLOCK_GETTIME
#if OS_WIN
res = whrt_Resolution();
#elif HAVE_CLOCK_GETTIME
struct timespec ts;
if(clock_getres(CLOCK_REALTIME, &ts) == 0)
res = ts.tv_nsec * 1e-9;
#elif OS_WIN
res = whrt_Resolution();
#else
const double t0 = timer_Time();
double t1, t2;
do t1 = timer_Time(); while(t1 == t0);
do t2 = timer_Time(); while(t2 == t1);
do t1 = timer_Time(); while(t1 == t0);
do t2 = timer_Time(); while(t2 == t1);
res = t2-t1;
#endif