had a look at CPU freq detect due to win32asm thread.
added comments + slight improvement (get system time first, then rdtsc) => good bit more accurate This was SVN commit r1206.
This commit is contained in:
parent
bc669568f9
commit
b6c1ad7126
@ -126,6 +126,7 @@ static int have_brand_string = 0;
|
|||||||
// if false, need to detect cpu_type manually.
|
// if false, need to detect cpu_type manually.
|
||||||
// int instead of bool for easier setting from asm
|
// int instead of bool for easier setting from asm
|
||||||
|
|
||||||
|
// order in which registers are stored in regs array
|
||||||
enum Regs
|
enum Regs
|
||||||
{
|
{
|
||||||
EAX,
|
EAX,
|
||||||
@ -339,21 +340,22 @@ static void get_cpu_type()
|
|||||||
|
|
||||||
static void measure_cpu_freq()
|
static void measure_cpu_freq()
|
||||||
{
|
{
|
||||||
// set max priority, to avoid interference while measuring.
|
// set max priority, to reduce interference while measuring.
|
||||||
int old_policy; static sched_param old_param; // (static => 0-init)
|
int old_policy; static sched_param old_param; // (static => 0-init)
|
||||||
pthread_getschedparam(pthread_self(), &old_policy, &old_param);
|
pthread_getschedparam(pthread_self(), &old_policy, &old_param);
|
||||||
static sched_param max_param;
|
static sched_param max_param;
|
||||||
max_param.sched_priority = sched_get_priority_max(SCHED_RR);
|
max_param.sched_priority = sched_get_priority_max(SCHED_RR);
|
||||||
pthread_setschedparam(pthread_self(), SCHED_RR, &max_param);
|
pthread_setschedparam(pthread_self(), SCHED_RR, &max_param);
|
||||||
|
|
||||||
if(ia32_cap(TSC))
|
if(ia32_cap(TSC))
|
||||||
// we require the TSC to measure actual CPU cycles per clock tick.
|
// make sure the TSC is available, because we're going to
|
||||||
|
// measure actual CPU clocks per known time interval.
|
||||||
// counting loop iterations ("bogomips") is unreliable.
|
// counting loop iterations ("bogomips") is unreliable.
|
||||||
{
|
{
|
||||||
// rdtsc() uses cpuid to serialize instruction flow. the first
|
// note: no need to "warm up" cpuid - it will already have been
|
||||||
// few calls of this instruction are documented to take longer
|
// called several times by the time this code is reached.
|
||||||
// (no idea why), so we warm it up here.
|
// (background: it's used in rdtsc() to serialize instruction flow;
|
||||||
__asm cpuid __asm cpuid __asm cpuid
|
// the first call is documented to be slower on Intel CPUs)
|
||||||
|
|
||||||
int num_samples = 16;
|
int num_samples = 16;
|
||||||
// if clock is low-res, do less samples so it doesn't take too long.
|
// if clock is low-res, do less samples so it doesn't take too long.
|
||||||
@ -371,45 +373,53 @@ static void measure_cpu_freq()
|
|||||||
// i64 because VC6 can't convert u64 -> double,
|
// i64 because VC6 can't convert u64 -> double,
|
||||||
// and we don't need all 64 bits.
|
// and we don't need all 64 bits.
|
||||||
|
|
||||||
// count # of clocks in max{1 tick, 1 ms}
|
// count # of clocks in max{1 tick, 1 ms}:
|
||||||
// .. wait for start of tick
|
// .. wait for start of tick.
|
||||||
const double t0 = get_time();
|
const double t0 = get_time();
|
||||||
u64 c1; double t1;
|
u64 c1; double t1;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
c1 = rdtsc(); // changes quickly
|
// note: get_time effectively has a long delay (up to 5 µs)
|
||||||
|
// before returning the time. we call it before rdtsc to
|
||||||
|
// minimize the delay between actually sampling time / TSC,
|
||||||
|
// thus decreasing the chance for interference.
|
||||||
|
// (if unavoidable background activity, e.g. interrupts,
|
||||||
|
// delays the second reading, inaccuracy is introduced).
|
||||||
t1 = get_time();
|
t1 = get_time();
|
||||||
|
c1 = rdtsc();
|
||||||
}
|
}
|
||||||
while(t1 == t0);
|
while(t1 == t0);
|
||||||
// .. wait until start of next tick and at least 1 ms
|
// .. wait until start of next tick and at least 1 ms elapsed.
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
const u64 c2 = rdtsc();
|
|
||||||
const double t2 = get_time();
|
const double t2 = get_time();
|
||||||
|
const u64 c2 = rdtsc();
|
||||||
dc = (i64)(c2 - c1);
|
dc = (i64)(c2 - c1);
|
||||||
// i64 rationale: see decl
|
|
||||||
dt = t2 - t1;
|
dt = t2 - t1;
|
||||||
}
|
}
|
||||||
while(dt < 1e-3);
|
while(dt < 1e-3);
|
||||||
|
|
||||||
// .. freq = (delta_clocks) / (delta_seconds);
|
// .. freq = (delta_clocks) / (delta_seconds);
|
||||||
// cpuid/rdtsc/timer overhead is negligible
|
// cpuid/rdtsc/timer overhead is negligible.
|
||||||
const double freq = dc / dt;
|
const double freq = dc / dt;
|
||||||
samples[i] = freq;
|
samples[i] = freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::sort(samples.begin(), samples.end());
|
std::sort(samples.begin(), samples.end());
|
||||||
|
|
||||||
// median filter (remove upper and lower 25% and average the rest)
|
// median filter (remove upper and lower 25% and average the rest).
|
||||||
|
// note: don't just take the lowest value! it could conceivably be
|
||||||
|
// too low, if background processing delays reading c1 (see above).
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
const int lo = num_samples/4, hi = 3*num_samples/4;
|
const int lo = num_samples/4, hi = 3*num_samples/4;
|
||||||
for(i = lo; i < hi; i++)
|
for(i = lo; i < hi; i++)
|
||||||
sum += samples[i];
|
sum += samples[i];
|
||||||
cpu_freq = sum / (hi-lo);
|
cpu_freq = sum / (hi-lo);
|
||||||
}
|
|
||||||
// else: TSC not available, can't measure
|
|
||||||
|
|
||||||
// restore previous policy and priority
|
}
|
||||||
|
// else: TSC not available, can't measure; cpu_freq remains unchanged.
|
||||||
|
|
||||||
|
// restore previous policy and priority.
|
||||||
pthread_setschedparam(pthread_self(), old_policy, &old_param);
|
pthread_setschedparam(pthread_self(), old_policy, &old_param);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -514,7 +524,10 @@ void ia32_get_cpu_info()
|
|||||||
check_speedstep();
|
check_speedstep();
|
||||||
on_each_cpu(check_smp);
|
on_each_cpu(check_smp);
|
||||||
|
|
||||||
|
for(int i = 0; i < 10; i++){
|
||||||
measure_cpu_freq();
|
measure_cpu_freq();
|
||||||
|
debug_out("%f\n", cpu_freq);
|
||||||
|
}
|
||||||
|
|
||||||
// HACK: if _WIN32, the HRT makes its final implementation choice
|
// HACK: if _WIN32, the HRT makes its final implementation choice
|
||||||
// in the first calibrate call where cpu info is available.
|
// in the first calibrate call where cpu info is available.
|
||||||
|
Loading…
Reference in New Issue
Block a user