fixed HT detect (thanks to philip for testing)
This was SVN commit r590.
This commit is contained in:
parent
0517546d19
commit
da7d60e3ec
@ -14,10 +14,9 @@ extern int cpu_smp;
|
|||||||
// not only logical hyperthreaded CPUs? relevant for wtime.
|
// not only logical hyperthreaded CPUs? relevant for wtime.
|
||||||
|
|
||||||
|
|
||||||
// set cpu_smp if there's more than 1 physical CPU -
|
// not possible with POSIX calls.
|
||||||
// need to know this for wtime's TSC safety check.
|
// called from ia32.cpp check_smp
|
||||||
// call on each processor (via on_each_cpu).
|
extern int on_each_cpu(void(*cb)());
|
||||||
extern void cpu_check_smp();
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -109,6 +109,7 @@ __asm
|
|||||||
static char vendor_str[13];
|
static char vendor_str[13];
|
||||||
static int family, model, ext_family;
|
static int family, model, ext_family;
|
||||||
// used in manual cpu_type detect
|
// used in manual cpu_type detect
|
||||||
|
static u32 max_ext_func;
|
||||||
|
|
||||||
// caps
|
// caps
|
||||||
// treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
|
// treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
|
||||||
@ -119,6 +120,37 @@ static int have_brand_string = 0;
|
|||||||
// if false, need to detect cpu_type manually.
|
// if false, need to detect cpu_type manually.
|
||||||
// int instead of bool for easier setting from asm
|
// int instead of bool for easier setting from asm
|
||||||
|
|
||||||
|
enum Regs
|
||||||
|
{
|
||||||
|
EAX,
|
||||||
|
EBX,
|
||||||
|
ECX,
|
||||||
|
EDX
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool cpuid(u32 func, u32* regs)
|
||||||
|
{
|
||||||
|
if(func > max_ext_func)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
__asm
|
||||||
|
{
|
||||||
|
mov eax, [func]
|
||||||
|
cpuid
|
||||||
|
mov edi, [regs]
|
||||||
|
stosd
|
||||||
|
xchg eax, ebx
|
||||||
|
stosd
|
||||||
|
xchg eax, ecx
|
||||||
|
stosd
|
||||||
|
xchg eax, edx
|
||||||
|
stosd
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// (optimized for size)
|
// (optimized for size)
|
||||||
static void __declspec(naked) cpuid()
|
static void __declspec(naked) cpuid()
|
||||||
{
|
{
|
||||||
@ -168,6 +200,7 @@ __asm
|
|||||||
mov esi, 0x80000000
|
mov esi, 0x80000000
|
||||||
mov eax, esi
|
mov eax, esi
|
||||||
cpuid
|
cpuid
|
||||||
|
mov [max_ext_func], eax
|
||||||
cmp eax, esi ; max ext <= 0x80000000?
|
cmp eax, esi ; max ext <= 0x80000000?
|
||||||
jbe no_ext_funcs ; yes - no ext funcs at all
|
jbe no_ext_funcs ; yes - no ext funcs at all
|
||||||
lea esi, [esi+4] ; esi = 0x80000004
|
lea esi, [esi+4] ; esi = 0x80000004
|
||||||
@ -300,63 +333,63 @@ static void get_cpu_type()
|
|||||||
|
|
||||||
static void measure_cpu_freq()
|
static void measure_cpu_freq()
|
||||||
{
|
{
|
||||||
// get old policy and priority
|
// set max priority, to avoid interference while measuring.
|
||||||
int old_policy;
|
int old_policy; static sched_param old_param; // (static => 0-init)
|
||||||
static sched_param old_param;
|
|
||||||
pthread_getschedparam(pthread_self(), &old_policy, &old_param);
|
pthread_getschedparam(pthread_self(), &old_policy, &old_param);
|
||||||
// set max priority
|
|
||||||
static sched_param max_param;
|
static sched_param max_param;
|
||||||
max_param.sched_priority = sched_get_priority_max(SCHED_RR);
|
max_param.sched_priority = sched_get_priority_max(SCHED_RR);
|
||||||
pthread_setschedparam(pthread_self(), SCHED_RR, &max_param);
|
pthread_setschedparam(pthread_self(), SCHED_RR, &max_param);
|
||||||
|
|
||||||
// measure CPU frequency.
|
if(ia32_cap(TSC))
|
||||||
// balance measuring time (~ 10 ms) and accuracy (< 1 0/00 error -
|
// we require the TSC to measure actual CPU cycles per clock tick.
|
||||||
// ok for using the TSC as a time reference)
|
// counting loop iterations ("bogomips") is unreliable.
|
||||||
if(ia32_cap(TSC)) // needed to calculate freq; bogomips are a WAG
|
|
||||||
{
|
{
|
||||||
// stabilize CPUID for timing (first few calls take longer)
|
// rdtsc() uses cpuid to serialize instruction flow. the first
|
||||||
|
// few calls of this instruction are documented to take longer
|
||||||
|
// (no idea why), so we warm it up here.
|
||||||
__asm cpuid __asm cpuid __asm cpuid
|
__asm cpuid __asm cpuid __asm cpuid
|
||||||
|
|
||||||
u64 c0, c1;
|
|
||||||
|
|
||||||
std::vector<double> samples;
|
|
||||||
int num_samples = 16;
|
int num_samples = 16;
|
||||||
// if clock is low-res, do less samples so it doesn't take too long
|
// if clock is low-res, do less samples so it doesn't take too long.
|
||||||
|
// balance measuring time (~ 10 ms) and accuracy (< 1 0/00 error -
|
||||||
|
// ok for using the TSC as a time reference)
|
||||||
if(timer_res() >= 1e-3)
|
if(timer_res() >= 1e-3)
|
||||||
num_samples = 8;
|
num_samples = 8;
|
||||||
|
std::vector<double> samples(num_samples);
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < num_samples; i++)
|
for(i = 0; i < num_samples; i++)
|
||||||
{
|
{
|
||||||
again:
|
double dt;
|
||||||
|
i64 dc;
|
||||||
|
// i64 because VC6 can't convert u64 -> double,
|
||||||
|
// and we don't need all 64 bits.
|
||||||
|
|
||||||
// count # of clocks in max{1 tick, 1 ms}
|
// count # of clocks in max{1 tick, 1 ms}
|
||||||
double t0;
|
|
||||||
double t1 = get_time();
|
|
||||||
// .. wait for start of tick
|
// .. wait for start of tick
|
||||||
|
const double t0 = get_time();
|
||||||
|
u64 c1; double t1;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
c0 = rdtsc(); // changes quickly
|
c1 = rdtsc(); // changes quickly
|
||||||
t0 = get_time();
|
t1 = get_time();
|
||||||
}
|
}
|
||||||
while(t0 == t1);
|
while(t1 == t0);
|
||||||
// .. wait until start of next tick and at least 1 ms
|
// .. wait until start of next tick and at least 1 ms
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
c1 = rdtsc();
|
const u64 c2 = rdtsc();
|
||||||
t1 = get_time();
|
const double t2 = get_time();
|
||||||
|
dc = (i64)(c2 - c1);
|
||||||
|
// i64 rationale: see decl
|
||||||
|
dt = t2 - t1;
|
||||||
}
|
}
|
||||||
while(t1 < t0 + 1e-3);
|
while(dt < 1e-3);
|
||||||
|
|
||||||
double ds = t1 - t0;
|
|
||||||
if(ds < 0.0) // bogus time delta - take another sample
|
|
||||||
goto again;
|
|
||||||
|
|
||||||
// .. freq = (delta_clocks) / (delta_seconds);
|
// .. freq = (delta_clocks) / (delta_seconds);
|
||||||
// cpuid/rdtsc/timer overhead is negligible
|
// cpuid/rdtsc/timer overhead is negligible
|
||||||
double freq = (i64)(c1-c0) / ds;
|
const double freq = dc / dt;
|
||||||
// VC6 can't convert u64 -> double, and we don't need full range
|
samples[i] = freq;
|
||||||
|
|
||||||
samples.push_back(freq);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::sort(samples.begin(), samples.end());
|
std::sort(samples.begin(), samples.end());
|
||||||
@ -401,8 +434,7 @@ int get_cur_processor_id()
|
|||||||
|
|
||||||
// set cpu_smp if there's more than 1 physical CPU -
|
// set cpu_smp if there's more than 1 physical CPU -
|
||||||
// need to know this for wtime's TSC safety check.
|
// need to know this for wtime's TSC safety check.
|
||||||
// call on each processor (via on_each_cpu).
|
static void check_smp()
|
||||||
void cpu_check_smp()
|
|
||||||
{
|
{
|
||||||
assert(cpus > 0 && "must know # CPUs (call OS-specific detect first)");
|
assert(cpus > 0 && "must know # CPUs (call OS-specific detect first)");
|
||||||
|
|
||||||
@ -447,15 +479,24 @@ void cpu_check_smp()
|
|||||||
|
|
||||||
// more than 1 physical CPU found
|
// more than 1 physical CPU found
|
||||||
static int last_phys_id = -1;
|
static int last_phys_id = -1;
|
||||||
if(last_phys_id != phys_id)
|
if(last_phys_id != -1 && last_phys_id != phys_id)
|
||||||
cpu_smp = 1;
|
cpu_smp = 1;
|
||||||
|
last_phys_id = phys_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void check_speedstep()
|
static void check_speedstep()
|
||||||
{
|
{
|
||||||
if(vendor == INTEL && ia32_cap(EST))
|
if(vendor == INTEL && ia32_cap(EST))
|
||||||
cpu_speedstep = true;
|
cpu_speedstep = 1;
|
||||||
|
|
||||||
|
if(vendor == AMD)
|
||||||
|
{
|
||||||
|
u32 regs[4];
|
||||||
|
if(cpuid(0x80000007, regs))
|
||||||
|
if(regs[EDX] & 2) // frequency ID control
|
||||||
|
cpu_speedstep = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -474,6 +515,7 @@ void ia32_get_cpu_info()
|
|||||||
get_cpu_type();
|
get_cpu_type();
|
||||||
measure_cpu_freq();
|
measure_cpu_freq();
|
||||||
check_speedstep();
|
check_speedstep();
|
||||||
|
on_each_cpu(check_smp);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // #ifndef _M_IX86
|
#endif // #ifndef _M_IX86
|
||||||
|
@ -7,7 +7,8 @@
|
|||||||
|
|
||||||
|
|
||||||
// not possible with POSIX calls.
|
// not possible with POSIX calls.
|
||||||
static int on_each_cpu(void(*cb)())
|
// called from ia32.cpp check_smp
|
||||||
|
int on_each_cpu(void(*cb)())
|
||||||
{
|
{
|
||||||
const HANDLE hProcess = GetCurrentProcess();
|
const HANDLE hProcess = GetCurrentProcess();
|
||||||
|
|
||||||
@ -45,12 +46,6 @@ static int on_each_cpu(void(*cb)())
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void check_smp()
|
|
||||||
{
|
|
||||||
on_each_cpu(cpu_check_smp);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void check_speedstep()
|
static void check_speedstep()
|
||||||
{
|
{
|
||||||
// CallNtPowerInformation
|
// CallNtPowerInformation
|
||||||
@ -135,7 +130,6 @@ int win_get_cpu_info()
|
|||||||
}
|
}
|
||||||
|
|
||||||
check_speedstep();
|
check_speedstep();
|
||||||
check_smp();
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user