fixed HT detect (thanks to philip for testing)

This was SVN commit r590.
This commit is contained in:
janwas 2004-06-23 13:45:50 +00:00
parent 0517546d19
commit da7d60e3ec
3 changed files with 102 additions and 67 deletions

View File

@ -14,10 +14,9 @@ extern int cpu_smp;
// not only logical hyperthreaded CPUs? relevant for wtime. // not only logical hyperthreaded CPUs? relevant for wtime.
// set cpu_smp if there's more than 1 physical CPU - // not possible with POSIX calls.
// need to know this for wtime's TSC safety check. // called from ia32.cpp check_smp
// call on each processor (via on_each_cpu). extern int on_each_cpu(void(*cb)());
extern void cpu_check_smp();
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -44,13 +44,13 @@ double _ceil(double f)
double r; double r;
const float _49 = 0.499999f; const float _49 = 0.499999f;
__asm __asm
{ {
fld [f] fld [f]
fadd [_49] fadd [_49]
frndint frndint
fstp [r] fstp [r]
} }
UNUSED(f) UNUSED(f)
@ -109,6 +109,7 @@ __asm
static char vendor_str[13]; static char vendor_str[13];
static int family, model, ext_family; static int family, model, ext_family;
// used in manual cpu_type detect // used in manual cpu_type detect
static u32 max_ext_func;
// caps // caps
// treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx // treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
@ -119,6 +120,37 @@ static int have_brand_string = 0;
// if false, need to detect cpu_type manually. // if false, need to detect cpu_type manually.
// int instead of bool for easier setting from asm // int instead of bool for easier setting from asm
enum Regs
{
EAX,
EBX,
ECX,
EDX
};
static bool cpuid(u32 func, u32* regs)
{
if(func > max_ext_func)
return false;
__asm
{
mov eax, [func]
cpuid
mov edi, [regs]
stosd
xchg eax, ebx
stosd
xchg eax, ecx
stosd
xchg eax, edx
stosd
}
return true;
}
// (optimized for size) // (optimized for size)
static void __declspec(naked) cpuid() static void __declspec(naked) cpuid()
{ {
@ -168,6 +200,7 @@ __asm
mov esi, 0x80000000 mov esi, 0x80000000
mov eax, esi mov eax, esi
cpuid cpuid
mov [max_ext_func], eax
cmp eax, esi ; max ext <= 0x80000000? cmp eax, esi ; max ext <= 0x80000000?
jbe no_ext_funcs ; yes - no ext funcs at all jbe no_ext_funcs ; yes - no ext funcs at all
lea esi, [esi+4] ; esi = 0x80000004 lea esi, [esi+4] ; esi = 0x80000004
@ -300,63 +333,63 @@ static void get_cpu_type()
static void measure_cpu_freq() static void measure_cpu_freq()
{ {
// get old policy and priority // set max priority, to avoid interference while measuring.
int old_policy; int old_policy; static sched_param old_param; // (static => 0-init)
static sched_param old_param;
pthread_getschedparam(pthread_self(), &old_policy, &old_param); pthread_getschedparam(pthread_self(), &old_policy, &old_param);
// set max priority
static sched_param max_param; static sched_param max_param;
max_param.sched_priority = sched_get_priority_max(SCHED_RR); max_param.sched_priority = sched_get_priority_max(SCHED_RR);
pthread_setschedparam(pthread_self(), SCHED_RR, &max_param); pthread_setschedparam(pthread_self(), SCHED_RR, &max_param);
// measure CPU frequency. if(ia32_cap(TSC))
// balance measuring time (~ 10 ms) and accuracy (< 1 0/00 error - // we require the TSC to measure actual CPU cycles per clock tick.
// ok for using the TSC as a time reference) // counting loop iterations ("bogomips") is unreliable.
if(ia32_cap(TSC)) // needed to calculate freq; bogomips are a WAG
{ {
// stabilize CPUID for timing (first few calls take longer) // rdtsc() uses cpuid to serialize instruction flow. the first
// few calls of this instruction are documented to take longer
// (no idea why), so we warm it up here.
__asm cpuid __asm cpuid __asm cpuid __asm cpuid __asm cpuid __asm cpuid
u64 c0, c1;
std::vector<double> samples;
int num_samples = 16; int num_samples = 16;
// if clock is low-res, do less samples so it doesn't take too long // if clock is low-res, do less samples so it doesn't take too long.
// balance measuring time (~ 10 ms) and accuracy (< 1 0/00 error -
// ok for using the TSC as a time reference)
if(timer_res() >= 1e-3) if(timer_res() >= 1e-3)
num_samples = 8; num_samples = 8;
std::vector<double> samples(num_samples);
int i; int i;
for(i = 0; i < num_samples; i++) for(i = 0; i < num_samples; i++)
{ {
again: double dt;
i64 dc;
// i64 because VC6 can't convert u64 -> double,
// and we don't need all 64 bits.
// count # of clocks in max{1 tick, 1 ms} // count # of clocks in max{1 tick, 1 ms}
double t0;
double t1 = get_time();
// .. wait for start of tick // .. wait for start of tick
const double t0 = get_time();
u64 c1; double t1;
do do
{ {
c0 = rdtsc(); // changes quickly c1 = rdtsc(); // changes quickly
t0 = get_time(); t1 = get_time();
} }
while(t0 == t1); while(t1 == t0);
// .. wait until start of next tick and at least 1 ms // .. wait until start of next tick and at least 1 ms
do do
{ {
c1 = rdtsc(); const u64 c2 = rdtsc();
t1 = get_time(); const double t2 = get_time();
dc = (i64)(c2 - c1);
// i64 rationale: see decl
dt = t2 - t1;
} }
while(t1 < t0 + 1e-3); while(dt < 1e-3);
double ds = t1 - t0;
if(ds < 0.0) // bogus time delta - take another sample
goto again;
// .. freq = (delta_clocks) / (delta_seconds); // .. freq = (delta_clocks) / (delta_seconds);
// cpuid/rdtsc/timer overhead is negligible // cpuid/rdtsc/timer overhead is negligible
double freq = (i64)(c1-c0) / ds; const double freq = dc / dt;
// VC6 can't convert u64 -> double, and we don't need full range samples[i] = freq;
samples.push_back(freq);
} }
std::sort(samples.begin(), samples.end()); std::sort(samples.begin(), samples.end());
@ -401,8 +434,7 @@ int get_cur_processor_id()
// set cpu_smp if there's more than 1 physical CPU - // set cpu_smp if there's more than 1 physical CPU -
// need to know this for wtime's TSC safety check. // need to know this for wtime's TSC safety check.
// call on each processor (via on_each_cpu). static void check_smp()
void cpu_check_smp()
{ {
assert(cpus > 0 && "must know # CPUs (call OS-specific detect first)"); assert(cpus > 0 && "must know # CPUs (call OS-specific detect first)");
@ -447,15 +479,24 @@ void cpu_check_smp()
// more than 1 physical CPU found // more than 1 physical CPU found
static int last_phys_id = -1; static int last_phys_id = -1;
if(last_phys_id != phys_id) if(last_phys_id != -1 && last_phys_id != phys_id)
cpu_smp = 1; cpu_smp = 1;
last_phys_id = phys_id;
} }
static void check_speedstep() static void check_speedstep()
{ {
if(vendor == INTEL && ia32_cap(EST)) if(vendor == INTEL && ia32_cap(EST))
cpu_speedstep = true; cpu_speedstep = 1;
if(vendor == AMD)
{
u32 regs[4];
if(cpuid(0x80000007, regs))
if(regs[EDX] & 2) // frequency ID control
cpu_speedstep = 1;
}
} }
@ -474,6 +515,7 @@ void ia32_get_cpu_info()
get_cpu_type(); get_cpu_type();
measure_cpu_freq(); measure_cpu_freq();
check_speedstep(); check_speedstep();
on_each_cpu(check_smp);
} }
#endif // #ifndef _M_IX86 #endif // #ifndef _M_IX86

View File

@ -7,7 +7,8 @@
// not possible with POSIX calls. // not possible with POSIX calls.
static int on_each_cpu(void(*cb)()) // called from ia32.cpp check_smp
int on_each_cpu(void(*cb)())
{ {
const HANDLE hProcess = GetCurrentProcess(); const HANDLE hProcess = GetCurrentProcess();
@ -45,12 +46,6 @@ static int on_each_cpu(void(*cb)())
} }
static void check_smp()
{
on_each_cpu(cpu_check_smp);
}
static void check_speedstep() static void check_speedstep()
{ {
// CallNtPowerInformation // CallNtPowerInformation
@ -135,7 +130,6 @@ int win_get_cpu_info()
} }
check_speedstep(); check_speedstep();
check_smp();
return 0; return 0;
} }