1
0
forked from 0ad/0ad

read intel doc on multicore, realized it'd break our #cpu detect (which is actually critical because it determines safety of TSC). wrote new CPU detect code which should fix it (wasn't easy, grr); cannot test for lack of Pentium EE hardware ;p

This was SVN commit r2722.
This commit is contained in:
janwas 2005-09-14 16:58:10 +00:00
parent 4a7d0e9038
commit c50505f54c
9 changed files with 153 additions and 113 deletions

View File

@ -122,10 +122,9 @@ double cpu_freq = 0.f;
// -1 if detect not yet called, or cannot be determined
int cpus = -1;
int cpu_ht_units = -1;
int cpu_cores = -1;
int cpu_speedstep = -1;
int cpu_smp = -1;
// are there actually multiple physical processors,
// not only logical hyperthreaded CPUs? relevant for wtime.
void get_cpu_info()
{

View File

@ -11,18 +11,16 @@ extern char cpu_type[CPU_TYPE_LEN];
extern double cpu_freq;
// -1 if detect not yet called, or cannot be determined
extern int cpus;
// -1 if detect not yet called, or cannot be determined:
extern int cpus; // # packages (i.e. sockets; > 1 => SMP system)
extern int cpu_ht_units; // degree of hyperthreading, typically 2
extern int cpu_cores; // cores per package, typically 2
extern int cpu_speedstep;
extern int cpu_smp;
// are there actually multiple physical processors,
// not only logical hyperthreaded CPUs? relevant for wtime.
// not possible with POSIX calls.
// called from ia32.cpp check_smp
extern int on_each_cpu(void(*cb)());
extern void get_cpu_info(void);
@ -44,6 +42,7 @@ extern void mfence();
extern void serialize();
// Win32 CONTEXT field abstraction
// (there's no harm also defining this for other platforms)
#if CPU_AMD64

View File

@ -386,6 +386,108 @@ static void get_cpu_type()
}
//-----------------------------------------------------------------------------
static uint log_id_bits; // bit index; divides APIC ID into log and phys
static const uint INVALID_ID = ~0u;
static uint last_phys_id = INVALID_ID, last_log_id = INVALID_ID;
static uint phys_ids = 0, log_ids = 0;
// count # distinct physical and logical APIC IDs for get_cpu_count.
// called on each OS-visible "CPU" by on_each_cpu.
static void count_ids()
{
// get APIC id
u32 regs[4];
if(!ia32_cpuid(1, regs))
debug_warn("cpuid 1 failed");
const uint id = bits(regs[EBX], 24, 31);
// partition into physical and logical ID
const uint phys_id = bits(id, 0, log_id_bits-1);
const uint log_id = bits(id, log_id_bits, 7);
// note: APIC IDs are assigned sequentially, so we compare against the
// last one encountered.
if(last_phys_id != INVALID_ID && last_phys_id != phys_id)
cpus++;
if(last_log_id != INVALID_ID && last_log_id != log_id )
cpus++;
last_phys_id = phys_id;
last_log_id = log_id;
}
// fix CPU count reported by OS (incorrect if HT active or multicore);
// also separates it into cpu_ht_units and cpu_cores.
static void get_cpu_count()
{
debug_assert(cpus > 0 && "must know # 'CPU's (call OS-specific detect first)");
// get # "logical CPUs" per package (uniform over all packages).
// TFM is unclear but seems to imply this includes HT units *and* cores!
u32 regs[4];
if(!ia32_cpuid(1, regs))
debug_warn("ia32_cpuid(1) failed");
const uint log_cpu_per_package = bits(regs[EBX], 16, 23);
// .. and # cores
if(ia32_cpuid(4, regs))
cpu_cores = bits(regs[EBX], 26, 31)+1;
else
cpu_cores = 1;
// if HT is active (enabled in BIOS and OS), we have a problem:
// OSes (Windows at least) report # CPUs as packages * cores * HT_units.
// there is no direct way to determine if HT is actually enabled,
// so if it is supported, we have to examine all APIC IDs and
// figure out what kind of "CPU" each one is. *sigh*
//
// note: we don't check if it's Intel and P4 or above - HT may be
// supported on other CPUs in future. all processors should set this
// feature bit correctly, so it's not a problem.
if(ia32_cap(HT))
{
log_id_bits = log2(log_cpu_per_package); // see above
last_phys_id = last_log_id = INVALID_ID;
phys_ids = log_ids = 0;
if(on_each_cpu(count_ids) == 0)
{
cpus = phys_ids;
cpu_ht_units = log_ids / cpu_cores;
return; // this is authoritative
}
// OS apparently doesn't support CPU affinity.
// HT might be disabled, but return # units anyway.
else
cpu_ht_units = log_cpu_per_package / cpu_cores;
}
// not HT-capable; return 1 to allow total = cpus * HT_units * cores.
else
cpu_ht_units = 1;
cpus /= cpu_cores;
}
static void check_for_speedstep()
{
if(vendor == INTEL)
{
if(ia32_cap(EST))
cpu_speedstep = 1;
}
else if(vendor == AMD)
{
u32 regs[4];
if(ia32_cpuid(0x80000007, regs))
if(regs[EDX] & POWERNOW_FREQ_ID_CTRL)
cpu_speedstep = 1;
}
}
static void measure_cpu_freq()
{
@ -396,10 +498,10 @@ static void measure_cpu_freq()
max_param.sched_priority = sched_get_priority_max(SCHED_FIFO);
pthread_setschedparam(pthread_self(), SCHED_FIFO, &max_param);
// make sure the TSC is available, because we're going to
// measure actual CPU clocks per known time interval.
// counting loop iterations ("bogomips") is unreliable.
if(ia32_cap(TSC))
// make sure the TSC is available, because we're going to
// measure actual CPU clocks per known time interval.
// counting loop iterations ("bogomips") is unreliable.
{
// note: no need to "warm up" cpuid - it will already have been
// called several times by the time this code is reached.
@ -419,8 +521,8 @@ static void measure_cpu_freq()
{
double dt;
i64 dc;
// i64 because VC6 can't convert u64 -> double,
// and we don't need all 64 bits.
// i64 because VC6 can't convert u64 -> double,
// and we don't need all 64 bits.
// count # of clocks in max{1 tick, 1 ms}:
// .. wait for start of tick.
@ -473,99 +575,13 @@ static void measure_cpu_freq()
}
// set cpu_smp if there's more than 1 physical CPU -
// need to know this for wtime's TSC safety check.
// called on each CPU by on_each_cpu.
static void check_smp()
{
u32 regs[4];
debug_assert(cpus > 0 && "must know # CPUs (call OS-specific detect first)");
/*
if single-core and no HT
no change
if multi-core and no HT
phys = windows_cpus / cores_per_package
*/
// we don't check if it's Intel and P4 or above - HT may be supported
// on other CPUs in future. haven't come across a processor that
// incorrectly sets the HT feature bit.
if(!ia32_cap(HT))
{
// no HT supported, just check number of CPUs as reported by OS.
cpu_smp = (cpus > 1);
return;
}
// first call. we set cpu_smp below if more than 1 physical CPU is found,
// so clear it until then.
if(cpu_smp == -1)
cpu_smp = 0;
// multicore count
uint num_cores_per_package = 1;
if(ia32_cpuid(4, regs))
num_cores_per_package = bits(regs[EBX], 26, 31)+1;
//
// still need to check if HT is actually enabled (BIOS and OS);
// there might be 2 CPUs with HT supported but disabled.
//
// get number of logical CPUs per package
// (the same for all packages on this system)
if(!ia32_cpuid(1, regs))
debug_warn("cpuid 1 failed");
const uint log_cpus_per_package = bits(regs[EBX], 16, 23);
// logical CPUs are initialized after one another =>
// they have the same physical ID.
const uint cur_id = bits(regs[EBX], 24, 31);
const int phys_shift = ilog2(log_cpus_per_package);
const int phys_id = cur_id >> phys_shift;
// more than 1 physical CPU found
static int last_phys_id = -1;
if(last_phys_id != -1 && last_phys_id != phys_id)
cpu_smp = 1;
last_phys_id = phys_id;
}
static void check_speedstep()
{
if(vendor == INTEL)
{
if(ia32_cap(INTEL_EST))
cpu_speedstep = 1;
}
else if(vendor == AMD)
{
u32 regs[4];
if(ia32_cpuid(0x80000007, regs))
if(regs[EDX] & POWERNOW_FREQ_ID_CTRL)
cpu_speedstep = 1;
}
}
void ia32_get_cpu_info()
{
get_cpu_vendor();
get_cpu_type();
check_speedstep();
// linux doesn't have CPU affinity API:s (that we've found...)
#if OS_WIN
on_each_cpu(check_smp);
#endif
get_cpu_count();
check_for_speedstep();
measure_cpu_freq();
// HACK: on Windows, the HRT makes its final implementation choice

View File

@ -57,8 +57,8 @@ extern void ia32_memcpy(void* dst, const void* src, size_t nbytes);
enum CpuCap
{
// standard (ecx) - currently only defined by Intel
INTEL_SSE3 = 0+0, // Streaming SIMD Extensions 3
INTEL_EST = 0+7, // Enhanced Speedstep Technology
SSE3 = 0+0, // Streaming SIMD Extensions 3
EST = 0+7, // Enhanced Speedstep Technology
// standard (edx)
TSC = 32+4, // TimeStamp Counter

View File

@ -5,6 +5,9 @@
#if CPU_IA32
# include "ia32.h"
#endif
#if OS_WIN
# include "win/wcpu.h"
#endif
#include <memory.h>
@ -42,4 +45,18 @@ void memcpy2(void* dst, const void* src, size_t nbytes)
#else
memcpy(dst, src, nbytes);
#endif
}
}
// not possible with POSIX calls.
// called from ia32.cpp get_cpu_count
int on_each_cpu(void(*cb)())
{
#if OS_WIN
return wcpu_on_each_cpu(cb);
#else
// apparently not possible on non-Windows OSes because they seem to lack
// a CPU affinity API.
return ERR_NO_SYS;
#endif
}

View File

@ -128,7 +128,7 @@ extern int sys_cursor_set(void* cursor);
extern int sys_cursor_free(void* cursor);
extern void memcpy2(void* dst, const void* src, size_t nbytes);
extern int get_executable_name(char* n_path, size_t buf_size);
@ -137,8 +137,17 @@ extern int get_executable_name(char* n_path, size_t buf_size);
wchar_t* get_module_filename(void* addr, wchar_t* path);
extern int pick_directory(char* n_path, size_t buf_size);
extern void memcpy2(void* dst, const void* src, size_t nbytes);
// not possible with POSIX calls.
// called from ia32.cpp get_cpu_count
extern int on_each_cpu(void(*cb)());
#if MSC_VERSION
extern double round(double);

View File

@ -23,6 +23,7 @@
#include "win_internal.h"
#include "sysdep/cpu.h"
#include "wcpu.h"
// limit allows statically allocated per-CPU structures (for simplicity).
// we're Windows-specific anyway; such systems won't foreseeably have more.
@ -30,9 +31,7 @@
static const int MAX_CPUS = 32;
// not possible with POSIX calls.
// called from ia32.cpp check_smp
int on_each_cpu(void(*cb)())
int wcpu_on_each_cpu(void(*cb)())
{
const HANDLE hProcess = GetCurrentProcess();

View File

@ -0,0 +1 @@
extern int wcpu_on_each_cpu(void(*cb)());

View File

@ -188,7 +188,7 @@ static int choose_impl()
// and we don't want to mess with the system power settings => unsafe.
if(cpu_freq > 0.0 && ia32_cap(TSC))
{
safe = (cpu_smp == 0 && cpu_speedstep == 0);
safe = (cpus == 1 && cpu_speedstep == 0);
SAFETY_OVERRIDE(HRT_TSC);
if(safe)
{