1
0
forked from 0ad/0ad

had a look at CPU freq detect due to win32asm thread.

added comments + slight improvement (get system time first, then rdtsc)
=> good bit more accurate

This was SVN commit r1206.
This commit is contained in:
janwas 2004-10-03 13:06:37 +00:00
parent bc669568f9
commit b6c1ad7126

View File

@ -126,6 +126,7 @@ static int have_brand_string = 0;
// if false, need to detect cpu_type manually.
// int instead of bool for easier setting from asm
// order in which registers are stored in regs array
enum Regs
{
EAX,
@ -339,21 +340,22 @@ static void get_cpu_type()
static void measure_cpu_freq()
{
// set max priority, to avoid interference while measuring.
int old_policy; static sched_param old_param; // (static => 0-init)
// set max priority, to reduce interference while measuring.
int old_policy; static sched_param old_param; // (static => 0-init)
pthread_getschedparam(pthread_self(), &old_policy, &old_param);
static sched_param max_param;
max_param.sched_priority = sched_get_priority_max(SCHED_RR);
pthread_setschedparam(pthread_self(), SCHED_RR, &max_param);
if(ia32_cap(TSC))
// we require the TSC to measure actual CPU cycles per clock tick.
// make sure the TSC is available, because we're going to
// measure actual CPU clocks per known time interval.
// counting loop iterations ("bogomips") is unreliable.
{
// rdtsc() uses cpuid to serialize instruction flow. the first
// few calls of this instruction are documented to take longer
// (no idea why), so we warm it up here.
__asm cpuid __asm cpuid __asm cpuid
// note: no need to "warm up" cpuid - it will already have been
// called several times by the time this code is reached.
// (background: it's used in rdtsc() to serialize instruction flow;
// the first call is documented to be slower on Intel CPUs)
int num_samples = 16;
// if clock is low-res, do less samples so it doesn't take too long.
@ -371,45 +373,53 @@ static void measure_cpu_freq()
// i64 because VC6 can't convert u64 -> double,
// and we don't need all 64 bits.
// count # of clocks in max{1 tick, 1 ms}
// .. wait for start of tick
// count # of clocks in max{1 tick, 1 ms}:
// .. wait for start of tick.
const double t0 = get_time();
u64 c1; double t1;
do
{
c1 = rdtsc(); // changes quickly
// note: get_time effectively has a long delay (up to 5 µs)
// before returning the time. we call it before rdtsc to
// minimize the delay between actually sampling time / TSC,
// thus decreasing the chance for interference.
// (if unavoidable background activity, e.g. interrupts,
// delays the second reading, inaccuracy is introduced).
t1 = get_time();
c1 = rdtsc();
}
while(t1 == t0);
// .. wait until start of next tick and at least 1 ms
// .. wait until start of next tick and at least 1 ms elapsed.
do
{
const u64 c2 = rdtsc();
const double t2 = get_time();
const u64 c2 = rdtsc();
dc = (i64)(c2 - c1);
// i64 rationale: see decl
dt = t2 - t1;
}
while(dt < 1e-3);
// .. freq = (delta_clocks) / (delta_seconds);
// cpuid/rdtsc/timer overhead is negligible
// cpuid/rdtsc/timer overhead is negligible.
const double freq = dc / dt;
samples[i] = freq;
}
std::sort(samples.begin(), samples.end());
// median filter (remove upper and lower 25% and average the rest)
// median filter (remove upper and lower 25% and average the rest).
// note: don't just take the lowest value! it could conceivably be
// too low, if background processing delays reading c1 (see above).
double sum = 0.0;
const int lo = num_samples/4, hi = 3*num_samples/4;
for(i = lo; i < hi; i++)
sum += samples[i];
cpu_freq = sum / (hi-lo);
}
// else: TSC not available, can't measure
// restore previous policy and priority
}
// else: TSC not available, can't measure; cpu_freq remains unchanged.
// restore previous policy and priority.
pthread_setschedparam(pthread_self(), old_policy, &old_param);
}
@ -514,7 +524,10 @@ void ia32_get_cpu_info()
check_speedstep();
on_each_cpu(check_smp);
for(int i = 0; i < 10; i++){
measure_cpu_freq();
debug_out("%f\n", cpu_freq);
}
// HACK: if _WIN32, the HRT makes its final implementation choice
// in the first calibrate call where cpu info is available.