1
0
forked from 0ad/0ad

fixes+improvements from work:

timer: cleanup, remove duplication
topology: fix incorrect param order
tsc: ensure MSRs are available before accessing
msr: add prefix to make clear that registers are nehalem-specific

This was SVN commit r7892.
This commit is contained in:
janwas 2010-08-10 17:03:17 +00:00
parent e6c3f1c482
commit 7eebe05485
8 changed files with 130 additions and 130 deletions

View File

@ -29,11 +29,12 @@
namespace MSR {
bool IsSupported()
bool IsAccessible()
{
if(!x86_x64_Cap(X86_X64_CAP_MSR))
return false;
// only read/writable from ring 0, so we need the driver.
if(mahaf_Init() < 0)
return false;

View File

@ -45,15 +45,15 @@ enum ModelSpecificRegisters
IA32_PERF_GLOBAL_OVF_CTRL = 0x390,
// Nehalem (requires HasNehalem)
PLATFORM_INFO = 0x0CE,
UNCORE_PERF_GLOBAL_CTRL = 0x391,
UNCORE_PERF_GLOBAL_STATUS = 0x392,
UNCORE_PERF_GLOBAL_OVF_CTRL = 0x393,
UNCORE_PMC0 = 0x3B0,
UNCORE_PERFEVTSEL0 = 0x3C0
NHM_PLATFORM_INFO = 0x0CE,
NHM_UNCORE_PERF_GLOBAL_CTRL = 0x391,
NHM_UNCORE_PERF_GLOBAL_STATUS = 0x392,
NHM_UNCORE_PERF_GLOBAL_OVF_CTRL = 0x393,
NHM_UNCORE_PMC0 = 0x3B0,
NHM_UNCORE_PERFEVTSEL0 = 0x3C0
};
LIB_API bool IsSupported();
LIB_API bool IsAccessible();
LIB_API bool HasEnergyPerfBias();
LIB_API bool HasNehalem();

View File

@ -222,7 +222,7 @@ static LibError InitCpuTopology()
std::set<size_t> values;
for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
{
const size_t value = ApicField(apicIds[processor], numValues, indexOfLowestBit);
const size_t value = ApicField(apicIds[processor], indexOfLowestBit, numValues);
values.insert(value);
}
return values.size();
@ -299,19 +299,19 @@ size_t cpu_topology_LogicalPerCore()
return cpuTopology.logicalPerCore;
}
size_t cpu_topology_LogicalFromId(size_t apicId)
size_t cpu_topology_LogicalFromApicId(size_t apicId)
{
ModuleInit(&cpuInitState, InitCpuTopology);
return ApicField(apicId, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore);
}
size_t cpu_topology_CoreFromId(size_t apicId)
size_t cpu_topology_CoreFromApicId(size_t apicId)
{
ModuleInit(&cpuInitState, InitCpuTopology);
return ApicField(apicId, cpuTopology.coreOffset, cpuTopology.maxCoresPerPackage);
}
size_t cpu_topology_PackageFromId(size_t apicId)
size_t cpu_topology_PackageFromApicId(size_t apicId)
{
ModuleInit(&cpuInitState, InitCpuTopology);
return ApicField(apicId, cpuTopology.packageOffset, 256);

View File

@ -65,9 +65,9 @@ LIB_API size_t cpu_topology_CoresPerPackage();
LIB_API size_t cpu_topology_LogicalPerCore();
LIB_API size_t cpu_topology_LogicalFromId(size_t apicId);
LIB_API size_t cpu_topology_CoreFromId(size_t apicId);
LIB_API size_t cpu_topology_PackageFromId(size_t apicId);
LIB_API size_t cpu_topology_LogicalFromApicId(size_t apicId);
LIB_API size_t cpu_topology_CoreFromApicId(size_t apicId);
LIB_API size_t cpu_topology_PackageFromApicId(size_t apicId);
//-----------------------------------------------------------------------------

View File

@ -219,9 +219,9 @@ public:
// clock is subject to thermal drift and would require continual
// recalibration anyway.
#if ARCH_X86_X64
if(MSR::HasNehalem())
if(MSR::IsAccessible && MSR::HasNehalem())
{
const u64 platformInfo = MSR::Read(MSR::PLATFORM_INFO);
const u64 platformInfo = MSR::Read(MSR::NHM_PLATFORM_INFO);
const u8 maxNonTurboRatio = bits(platformInfo, 8, 15);
return maxNonTurboRatio * 133.33e6f;
}

View File

@ -27,14 +27,16 @@
#include "precompiled.h"
#include "lib/timer.h"
#include <sstream> // std::stringstream
#include <numeric>
#include <math.h>
#include <float.h>
#include <stdarg.h>
#include <cmath>
#include <cfloat>
#include <cstdarg>
#include "lib/module_init.h"
#include "lib/posix/posix_time.h"
#if OS_WIN
#include "lib/sysdep/os/win/whrt/whrt.h"
# include "lib/sysdep/os/win/whrt/whrt.h"
#endif
#if OS_UNIX
# include <unistd.h>
@ -107,31 +109,33 @@ double timer_Time()
}
double timer_Resolution()
// cached because the default implementation may take several milliseconds
static double resolution;
static LibError InitResolution()
{
// may take a while to determine, so cache it
static double cached_res = 0.0;
if(cached_res != 0.0)
return cached_res;
double res = 0.0;
#if OS_WIN
res = whrt_Resolution();
resolution = whrt_Resolution();
#elif HAVE_CLOCK_GETTIME
struct timespec ts;
if(clock_getres(CLOCK_REALTIME, &ts) == 0)
res = ts.tv_nsec * 1e-9;
resolution = ts.tv_nsec * 1e-9;
#else
const double t0 = timer_Time();
double t1, t2;
do t1 = timer_Time(); while(t1 == t0);
do t2 = timer_Time(); while(t2 == t1);
res = t2-t1;
resolution = t2-t1;
#endif
cached_res = res;
return res;
return INFO::OK;
}
double timer_Resolution()
{
static ModuleInitState initState;
ModuleInit(&initState, InitResolution);
return resolution;
}
@ -182,3 +186,39 @@ void timer_DisplayClientTotals()
debug_printf(L"-----------------------------------------------------\n");
}
//-----------------------------------------------------------------------------
std::wstring StringForSeconds(double seconds)
{
double scale = 1e6;
const wchar_t* unit = L" us";
if(seconds > 1.0)
scale = 1, unit = L" s";
else if(seconds > 1e-3)
scale = 1e3, unit = L" ms";
std::wstringstream ss;
ss << seconds*scale;
ss << unit;
return ss.str();
}
std::wstring StringForCycles(i64 cycles)
{
double scale = 1.0;
const wchar_t* unit = L" c";
if(cycles > 10000000000LL) // 10 Gc
scale = 1e-9, unit = L" Gc";
else if(cycles > 10000000) // 10 Mc
scale = 1e-6, unit = L" Mc";
else if(cycles > 10000) // 10 kc
scale = 1e-3, unit = L" kc";
std::wstringstream ss;
ss << cycles*scale;
ss << unit;
return ss.str();
}

View File

@ -34,7 +34,6 @@
# include "lib/sysdep/os_cpu.h" // os_cpu_ClockFrequency
#endif
#include <sstream> // std::stringstream
/**
* timer_Time will subsequently return values relative to the current time.
@ -52,6 +51,14 @@ LIB_API double timer_Time();
LIB_API double timer_Resolution();
/**
* internal helper functions for returning an easily readable
* string (i.e. re-scaled to appropriate units)
**/
LIB_API std::wstring StringForSeconds(double seconds);
LIB_API std::wstring StringForCycles(i64 cycles);
//-----------------------------------------------------------------------------
// scope timing
@ -67,18 +74,9 @@ public:
~ScopeTimer()
{
double t1 = timer_Time();
double dt = t1-m_t0;
// determine scale factor for pretty display
double scale = 1e6;
const wchar_t* unit = L"us";
if(dt > 1.0)
scale = 1, unit = L"s";
else if(dt > 1e-3)
scale = 1e3, unit = L"ms";
debug_printf(L"TIMER| %ls: %g %ls\n", m_description, dt*scale, unit);
const double t1 = timer_Time();
const std::wstring elapsedTimeString = StringForSeconds(t1-m_t0);
debug_printf(L"TIMER| %ls: %ls\n", m_description, elapsedTimeString.c_str());
}
private:
@ -137,7 +135,7 @@ private:
// since TIMER_ACCRUE et al. are called so often, we try to keep
// overhead to an absolute minimum. storing raw tick counts (e.g. CPU cycles
// returned by ia32_rdtsc) instead of absolute time has two benefits:
// returned by x86_x64_rdtsc) instead of absolute time has two benefits:
// - no need to convert from raw->time on every call
// (instead, it's only done once when displaying the totals)
// - possibly less overhead to querying the time itself
@ -160,63 +158,49 @@ class TimerUnit
public:
void SetToZero()
{
m_ticks = 0;
m_cycles = 0;
}
void SetFromTimer()
{
m_ticks = x86_x64_rdtsc();
m_cycles = x86_x64_rdtsc();
}
void AddDifference(TimerUnit t0, TimerUnit t1)
{
m_ticks += t1.m_ticks - t0.m_ticks;
m_cycles += t1.m_cycles - t0.m_cycles;
}
void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1)
{
const i64 delta = t1.m_ticks - t0.m_ticks;
const i64 delta = t1.m_cycles - t0.m_cycles;
#if ARCH_AMD64
cpu_AtomicAdd((volatile intptr_t*)&m_ticks, (intptr_t)delta);
cpu_AtomicAdd((volatile intptr_t*)&m_cycles, (intptr_t)delta);
#else
retry:
if(!cpu_CAS64(&m_ticks, m_ticks, m_ticks+delta))
if(!cpu_CAS64(&m_cycles, m_cycles, m_cycles+delta))
goto retry;
#endif
}
void Subtract(TimerUnit t)
{
m_ticks -= t.m_ticks;
m_cycles -= t.m_cycles;
}
std::wstring ToString() const
{
debug_assert(m_ticks >= 0.0);
// determine scale factor for pretty display
double scale = 1.0;
const wchar_t* unit = L" c";
if(m_ticks > 10000000000LL) // 10 Gc
scale = 1e-9, unit = L" Gc";
else if(m_ticks > 10000000) // 10 Mc
scale = 1e-6, unit = L" Mc";
else if(m_ticks > 10000) // 10 kc
scale = 1e-3, unit = L" kc";
std::wstringstream ss;
ss << m_ticks*scale;
ss << unit;
return ss.str();
debug_assert(m_cycles >= 0.0);
return StringForCycles(m_cycles);
}
double ToSeconds() const
{
return m_ticks / os_cpu_ClockFrequency();
return m_cycles / os_cpu_ClockFrequency();
}
private:
i64 m_ticks;
i64 m_cycles;
};
#else
@ -261,19 +245,7 @@ retry:
std::wstring ToString() const
{
debug_assert(m_seconds >= 0.0);
// determine scale factor for pretty display
double scale = 1e6;
const wchar_t* unit = L" us";
if(m_seconds > 1.0)
scale = 1, unit = L" s";
else if(m_seconds > 1e-3)
scale = 1e3, unit = L" ms";
std::wstringstream ss;
ss << m_seconds*scale;
ss << unit;
return ss.str();
return StringForSeconds(m_seconds);
}
double ToSeconds() const
@ -299,7 +271,7 @@ struct TimerClient
TimerClient* next;
// how often timer_BillClient was called (helps measure relative
// how often the timer was billed (helps measure relative
// performance of something that is done indeterminately often).
intptr_t num_calls;
};
@ -307,7 +279,7 @@ struct TimerClient
/**
* make the given TimerClient (usually instantiated as static data)
* ready for use. returns its address for TIMER_ADD_CLIENT's convenience.
* this client's total (added to by timer_BillClient) will be
* this client's total (which is increased by a BillingPolicy) will be
* displayed by timer_DisplayClientTotals.
* notes:
* - may be called at any time;
@ -331,21 +303,29 @@ LIB_API TimerClient* timer_AddClient(TimerClient* tc, const wchar_t* description
/**
* bill the difference between t0 and t1 to the client's total.
**/
inline void timer_BillClient(TimerClient* tc, TimerUnit t0, TimerUnit t1)
struct BillingPolicy_Default
{
tc->sum.AddDifference(t0, t1);
tc->num_calls++;
}
void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
{
tc->sum.AddDifference(t0, t1);
tc->num_calls++;
}
};
/**
* thread-safe version of timer_BillClient
* (not used by default due to its higher overhead)
* thread-safe (not used by default due to its higher overhead)
* note: we can't just use thread-local variables to avoid
* synchronization overhead because we don't have control over all
* threads (for accumulating their separate timer copies).
**/
inline void timer_BillClientAtomic(TimerClient* tc, TimerUnit t0, TimerUnit t1)
struct BillingPolicy_Atomic
{
tc->sum.AddDifferenceAtomic(t0, t1);
cpu_AtomicAdd(&tc->num_calls, +1);
}
void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
{
tc->sum.AddDifferenceAtomic(t0, t1);
cpu_AtomicAdd(&tc->num_calls, +1);
}
};
/**
* display all clients' totals; does not reset them.
@ -353,7 +333,9 @@ inline void timer_BillClientAtomic(TimerClient* tc, TimerUnit t0, TimerUnit t1)
**/
LIB_API void timer_DisplayClientTotals();
/// used by TIMER_ACCRUE
template<class BillingPolicy = BillingPolicy_Default>
class ScopeTimerAccrue
{
NONCOPYABLE(ScopeTimerAccrue);
@ -368,29 +350,7 @@ public:
{
TimerUnit t1;
t1.SetFromTimer();
timer_BillClient(m_tc, m_t0, t1);
}
private:
TimerUnit m_t0;
TimerClient* m_tc;
};
class ScopeTimerAccrueAtomic
{
NONCOPYABLE(ScopeTimerAccrueAtomic);
public:
ScopeTimerAccrueAtomic(TimerClient* tc)
: m_tc(tc)
{
m_t0.SetFromTimer();
}
~ScopeTimerAccrueAtomic()
{
TimerUnit t1;
t1.SetFromTimer();
timer_BillClientAtomic(m_tc, m_t0, t1);
BillingPolicy()(m_tc, m_t0, t1);
}
private:
@ -403,22 +363,21 @@ private:
* bill it to the given TimerClient object. Can safely be nested.
* Useful for measuring total time spent in a function or basic block over the
* entire program.
* <description> must remain valid over the lifetime of this object;
* a string literal is safest.
* `client' is an identifier registered via TIMER_ADD_CLIENT.
*
* Example usage:
* TIMER_ADD_CLIENT(identifier);
*
* TIMER_ADD_CLIENT(client);
*
* void func()
* {
* TIMER_ACCRUE(name_of_pointer_to_client);
* TIMER_ACCRUE(client);
* // code to be measured
* }
*
* [at exit]
* [later or at exit]
* timer_DisplayClientTotals();
**/
#define TIMER_ACCRUE(client) ScopeTimerAccrue UID__(client)
#define TIMER_ACCRUE_ATOMIC(client) ScopeTimerAccrueAtomic UID__(client)
#define TIMER_ACCRUE(client) ScopeTimerAccrue<> UID__(client)
#define TIMER_ACCRUE_ATOMIC(client) ScopeTimerAccrue<BillingPolicy_Atomic> UID__(client)
#endif // #ifndef INCLUDED_TIMER

View File

@ -182,7 +182,7 @@ JSBool StopJsTimer(JSContext* cx, JSObject*, uintN argc, jsval* argv, jsval* rva
TimerUnit now;
now.SetFromTimer();
now.Subtract(js_timer_overhead);
timer_BillClient(&js_timer_clients[slot], js_start_times[slot], now);
BillingPolicy_Default()(&js_timer_clients[slot], js_start_times[slot], now);
js_start_times[slot].SetToZero();
return JS_TRUE;
}