1
0
forked from 0ad/0ad

cpuid -> ia32_cpuid

considerable continued cleanup in ia32
came across article detailing HT detect on multicore systems. that
requires new code, working on it.
pthread: while reading, came across idea of wrapping thread trampoline
in __except -> more reliable exception handling

This was SVN commit r2716.
This commit is contained in:
janwas 2005-09-14 01:34:16 +00:00
parent 4ab25c7274
commit f8ed148370
5 changed files with 159 additions and 147 deletions

View File

@ -56,9 +56,6 @@ extern void serialize();
# define SP_ Esp # define SP_ Esp
#endif #endif
// internal use only
extern bool cpuid(u32 func, u32* regs);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -229,9 +229,9 @@ max_func dd 0x7FFFFFFF
max_ext_func dd 0xFFFFFFFF max_ext_func dd 0xFFFFFFFF
__SECT__ __SECT__
; extern "C" bool __cdecl cpuid(u32 func, u32* regs) ; extern "C" bool __cdecl ia32_cpuid(u32 func, u32* regs)
global _cpuid global _ia32_cpuid
_cpuid: _ia32_cpuid:
; if unknown, detect; if not available, fail. ; if unknown, detect; if not available, fail.
xor eax, eax ; return val on failure xor eax, eax ; return val on failure
cmp [cpuid_available], eax cmp [cpuid_available], eax
@ -291,7 +291,7 @@ _cpuid:
cpuid cpuid
mov [max_ext_func], eax mov [max_ext_func], eax
jmp _cpuid ; now try again jmp _ia32_cpuid ; now try again

View File

@ -40,12 +40,13 @@
#endif #endif
// replace pathetic MS libc implementation. // replace pathetic MS libc implementation.
// not needed on non-Win32, so don't bother converting from MS inline asm.
#if HAVE_MS_ASM #if HAVE_MS_ASM
double _ceil(double f) double _ceil(double f)
{ {
double r; UNUSED2(f); // avoid bogus warning
const float _49 = 0.499999f; const float _49 = 0.499999f;
double r;
__asm __asm
{ {
fld [f] fld [f]
@ -53,16 +54,17 @@ __asm
frndint frndint
fstp [r] fstp [r]
} }
UNUSED2(f);
return r; return r;
} }
#endif #endif
// return convention for 64 bits with VC7.1, ICC8 is in edx:eax, // rationale: this function should return its output (instead of setting
// so temp variable is unnecessary, but we play it safe. // out params) to simplify its callers. it is written in inline asm
// (instead of moving to ia32.asm) to insulate from changing compiler
// calling conventions.
// MSC, ICC and GCC currently return 64 bits in edx:eax, which even
// matches rdtsc output, but we play it safe and return a temporary.
inline u64 rdtsc() inline u64 rdtsc()
{ {
u64 c; u64 c;
@ -85,14 +87,17 @@ __asm
} }
#if OS_WIN && HAVE_MS_ASM
void ia32_debug_break() void ia32_debug_break()
{ {
#if HAVE_MS_ASM
__asm int 3 __asm int 3
} // note: this probably isn't necessary, since unix_debug_break
// (SIGTRAP) is most probably available if HAVE_GNU_ASM.
// we include it for completeness, though.
#elif HAVE_GNU_ASM
__asm__ __volatile__ ("mfence");
#endif #endif
}
void ia32_memcpy(void* dst, const void* src, size_t nbytes) void ia32_memcpy(void* dst, const void* src, size_t nbytes)
@ -225,32 +230,45 @@ void serialize()
// CPU / feature detect // CPU / feature detect
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// bool ia32_cap(CpuCap cap)
// data returned by cpuid()
// each function using this data must call cpuid (no-op if already called)
//
static char vendor_str[13];
static int family, model, ext_family;
static int num_cores;
// caps
// treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
// keep in sync with enum CpuCap and cpuid() code!
u32 caps[4];
static bool have_brand_string; // if false, need to detect cpu_type manually.
// order in which registers are stored in regs array
// (do not change! brand string relies on this ordering)
enum IA32Regs
{ {
EAX, // treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
EBX, // keep in sync with enum CpuCap!
ECX, static u32 caps[4];
EDX ONCE(\
}; u32 regs[4];
if(ia32_cpuid(1, regs))\
{\
caps[0] = regs[ECX];\
caps[1] = regs[EDX];\
}\
if(ia32_cpuid(0x80000001, regs))\
{\
caps[2] = regs[ECX];\
caps[3] = regs[EDX];\
}\
);
const uint tbl_idx = cap >> 5;
const uint bit_idx = cap & 0x1f;
if(tbl_idx > 3)
{
debug_warn("cap invalid");
return false;
}
return (caps[tbl_idx] & BIT(bit_idx)) != 0;
}
// we only store enum Vendor rather than the string because that
// is easier to compare.
enum Vendor { UNKNOWN, INTEL, AMD };
static Vendor vendor = UNKNOWN;
enum MiscCpuCapBits enum MiscCpuCapBits
{ {
@ -259,90 +277,60 @@ enum MiscCpuCapBits
}; };
static int retrieve_cpuid_info()
static void get_cpu_vendor()
{ {
u32 regs[4]; u32 regs[4];
if(!ia32_cpuid(0, regs))
return;
// vendor string // copy regs to string
// notes: // note: 'strange' ebx,edx,ecx reg order is due to ModR/M encoding order.
// - vendor_str is already 0-terminated because it's static. char vendor_str[13];
// - 'strange' ebx,edx,ecx reg order is due to ModR/M encoding order.
if(!cpuid(0, regs))
return ERR_CPU_FEATURE_MISSING; // we need CPUID, i.e. Pentium+
u32* vendor_str_u32 = (u32*)vendor_str; u32* vendor_str_u32 = (u32*)vendor_str;
vendor_str_u32[0] = regs[EBX]; vendor_str_u32[0] = regs[EBX];
vendor_str_u32[1] = regs[EDX]; vendor_str_u32[1] = regs[EDX];
vendor_str_u32[2] = regs[ECX]; vendor_str_u32[2] = regs[ECX];
vendor_str[12] = '\0'; // 0-terminate
// processor signature, feature flags if(!strcmp(vendor_str, "AuthenticAMD"))
// (note: HT/SMP query is nontrivial and done below) vendor = AMD;
if(!cpuid(1, regs)) else if(!strcmp(vendor_str, "GenuineIntel"))
debug_warn("cpuid 1 failed"); vendor = INTEL;
model = bits(regs[EAX], 4, 7); else
family = bits(regs[EAX], 8, 11); debug_warn("unknown vendor");
ext_family = bits(regs[EAX], 20, 23);
caps[0] = regs[ECX];
caps[1] = regs[EDX];
// multicore count
if(cpuid(4, regs))
num_cores = bits(regs[EBX], 26, 31)+1;
// extended feature flags
if(cpuid(0x80000001, regs))
{
caps[2] = regs[ECX];
caps[3] = regs[EDX];
}
// CPU brand string (AthlonXP/P4 or above)
u32* cpu_type_u32 = (u32*)cpu_type;
if(cpuid(0x80000002, cpu_type_u32+0 ) &&
cpuid(0x80000003, cpu_type_u32+16) &&
cpuid(0x80000004, cpu_type_u32+32))
have_brand_string = true;
return 0;
} }
bool ia32_cap(CpuCap cap)
{
u32 idx = cap >> 5;
if(idx > 3)
{
debug_warn("cap invalid");
return false;
}
u32 bit = BIT(cap & 0x1f);
return (caps[idx] & bit) != 0;
}
// (for easier comparison)
enum Vendor { UNKNOWN, INTEL, AMD };
static Vendor vendor = UNKNOWN;
static void get_cpu_type() static void get_cpu_type()
{ {
// get processor signature
u32 regs[4];
if(!ia32_cpuid(1, regs))
debug_warn("cpuid 1 failed");
const uint model = bits(regs[EAX], 4, 7);
const uint family = bits(regs[EAX], 8, 11);
// get brand string (if available)
u32* cpu_type_u32 = (u32*)cpu_type;
bool have_brand_string = false;
if(ia32_cpuid(0x80000002, cpu_type_u32+0 ) &&
ia32_cpuid(0x80000003, cpu_type_u32+16) &&
ia32_cpuid(0x80000004, cpu_type_u32+32))
have_brand_string = true;
// note: cpu_type is guaranteed to hold 48+1 chars, since that's the // note: cpu_type is guaranteed to hold 48+1 chars, since that's the
// length of the CPU brand string. strcpy(cpu_type, literal) is safe. // length of the CPU brand string. strcpy(cpu_type, literal) is safe.
#define SAFE_STRCPY strcpy
// fall back to manual detect of CPU type if it didn't supply // fall back to manual detect of CPU type because either:
// a brand string, or if the brand string is useless (i.e. "Unknown"). // - CPU doesn't support brand string (we use a flag to indicate this
// rather than comparing against a default value because it is safer);
// - the brand string is useless, e.g. "Unknown". this happens on
// some older boards whose BIOS reprograms the string for CPUs it
// doesn't recognize.
if(!have_brand_string || strncmp(cpu_type, "Unknow", 6) == 0) if(!have_brand_string || strncmp(cpu_type, "Unknow", 6) == 0)
// we use an extra flag to detect if we got the brand string:
// safer than comparing against the default name, which may change.
//
// some older boards reprogram the brand string with
// "Unknow[n] CPU Type" on CPUs the BIOS doesn't recognize.
// in that case, we ignore the brand string and detect manually.
{ {
if(vendor == AMD) if(vendor == AMD)
{ {
@ -350,15 +338,15 @@ static void get_cpu_type()
if(family == 6) if(family == 6)
{ {
if(model == 3 || model == 7) if(model == 3 || model == 7)
strcpy(cpu_type, "AMD Duron"); // safe SAFE_STRCPY(cpu_type, "AMD Duron");
else if(model <= 5) else if(model <= 5)
strcpy(cpu_type, "AMD Athlon"); // safe SAFE_STRCPY(cpu_type, "AMD Athlon");
else else
{ {
if(ia32_cap(MP)) if(ia32_cap(AMD_MP))
strcpy(cpu_type, "AMD Athlon MP"); // safe SAFE_STRCPY(cpu_type, "AMD Athlon MP");
else else
strcpy(cpu_type, "AMD Athlon XP"); // safe SAFE_STRCPY(cpu_type, "AMD Athlon XP");
} }
} }
} }
@ -368,17 +356,17 @@ static void get_cpu_type()
if(family == 6) if(family == 6)
{ {
if(model == 1) if(model == 1)
strcpy(cpu_type, "Intel Pentium Pro"); // safe SAFE_STRCPY(cpu_type, "Intel Pentium Pro");
else if(model == 3 || model == 5) else if(model == 3 || model == 5)
strcpy(cpu_type, "Intel Pentium II"); // safe SAFE_STRCPY(cpu_type, "Intel Pentium II");
else if(model == 6) else if(model == 6)
strcpy(cpu_type, "Intel Celeron"); // safe SAFE_STRCPY(cpu_type, "Intel Celeron");
else else
strcpy(cpu_type, "Intel Pentium III"); // safe SAFE_STRCPY(cpu_type, "Intel Pentium III");
} }
} }
} }
// we have a valid brand string; try to pretty it up some // cpu_type already holds a valid brand string; pretty it up.
else else
{ {
// strip (tm) from Athlon string // strip (tm) from Athlon string
@ -387,15 +375,16 @@ static void get_cpu_type()
// remove 2x (R) and CPU freq from P4 string // remove 2x (R) and CPU freq from P4 string
float freq; float freq;
// the indicated frequency isn't necessarily correct - the CPU may be // we can't use this because it isn't necessarily correct - the CPU
// overclocked. need to pass a variable though, since scanf returns // may be overclocked. a variable must be passed, though, since
// the number of fields actually stored. // scanf returns the number of fields actually stored.
if(sscanf(cpu_type, " Intel(R) Pentium(R) 4 CPU %fGHz", &freq) == 1) if(sscanf(cpu_type, " Intel(R) Pentium(R) 4 CPU %fGHz", &freq) == 1)
strcpy(cpu_type, "Intel Pentium 4"); // safe SAFE_STRCPY(cpu_type, "Intel Pentium 4");
} }
} }
static void measure_cpu_freq() static void measure_cpu_freq()
{ {
// set max priority, to reduce interference while measuring. // set max priority, to reduce interference while measuring.
@ -487,8 +476,19 @@ static void measure_cpu_freq()
// called on each CPU by on_each_cpu. // called on each CPU by on_each_cpu.
static void check_smp() static void check_smp()
{ {
u32 regs[4];
debug_assert(cpus > 0 && "must know # CPUs (call OS-specific detect first)"); debug_assert(cpus > 0 && "must know # CPUs (call OS-specific detect first)");
/*
if single-core and no HT
no change
if multi-core and no HT
phys = windows_cpus / cores_per_package
*/
// we don't check if it's Intel and P4 or above - HT may be supported // we don't check if it's Intel and P4 or above - HT may be supported
// on other CPUs in future. haven't come across a processor that // on other CPUs in future. haven't come across a processor that
// incorrectly sets the HT feature bit. // incorrectly sets the HT feature bit.
@ -504,6 +504,12 @@ static void check_smp()
if(cpu_smp == -1) if(cpu_smp == -1)
cpu_smp = 0; cpu_smp = 0;
// multicore count
uint num_cores_per_package = 1;
if(ia32_cpuid(4, regs))
num_cores_per_package = bits(regs[EBX], 26, 31)+1;
// //
// still need to check if HT is actually enabled (BIOS and OS); // still need to check if HT is actually enabled (BIOS and OS);
@ -512,8 +518,7 @@ static void check_smp()
// get number of logical CPUs per package // get number of logical CPUs per package
// (the same for all packages on this system) // (the same for all packages on this system)
u32 regs[4]; if(!ia32_cpuid(1, regs))
if(!cpuid(1, regs))
debug_warn("cpuid 1 failed"); debug_warn("cpuid 1 failed");
const uint log_cpus_per_package = bits(regs[EBX], 16, 23); const uint log_cpus_per_package = bits(regs[EBX], 16, 23);
// logical CPUs are initialized after one another => // logical CPUs are initialized after one another =>
@ -535,13 +540,13 @@ static void check_speedstep()
{ {
if(vendor == INTEL) if(vendor == INTEL)
{ {
if(ia32_cap(EST)) if(ia32_cap(INTEL_EST))
cpu_speedstep = 1; cpu_speedstep = 1;
} }
else if(vendor == AMD) else if(vendor == AMD)
{ {
u32 regs[4]; u32 regs[4];
if(cpuid(0x80000007, regs)) if(ia32_cpuid(0x80000007, regs))
if(regs[EDX] & POWERNOW_FREQ_ID_CTRL) if(regs[EDX] & POWERNOW_FREQ_ID_CTRL)
cpu_speedstep = 1; cpu_speedstep = 1;
} }
@ -550,14 +555,9 @@ static void check_speedstep()
void ia32_get_cpu_info() void ia32_get_cpu_info()
{ {
WARN_ERR_RETURN(retrieve_cpuid_info()); get_cpu_vendor();
if(!strcmp(vendor_str, "AuthenticAMD"))
vendor = AMD;
else if(!strcmp(vendor_str, "GenuineIntel"))
vendor = INTEL;
get_cpu_type(); get_cpu_type();
check_speedstep(); check_speedstep();
// linux doesn't have CPU affinity API:s (that we've found...) // linux doesn't have CPU affinity API:s (that we've found...)
#if OS_WIN #if OS_WIN

View File

@ -53,13 +53,12 @@ extern void ia32_memcpy(void* dst, const void* src, size_t nbytes);
// CPU caps (128 bits) // CPU caps (128 bits)
// do not change the order! (breaks cpuid()) // do not change the order!
enum CpuCap enum CpuCap
{ {
// standard (ecx) // standard (ecx) - currently only defined by Intel
// currently only defined by Intel INTEL_SSE3 = 0+0, // Streaming SIMD Extensions 3
SSE3 = 0+0, // Streaming SIMD Extensions 3 INTEL_EST = 0+7, // Enhanced Speedstep Technology
EST = 0+7, // Enhanced Speedstep Technology
// standard (edx) // standard (edx)
TSC = 32+4, // TimeStamp Counter TSC = 32+4, // TimeStamp Counter
@ -71,11 +70,10 @@ enum CpuCap
// extended (ecx) // extended (ecx)
// extended (edx) // extended (edx) - currently only defined by AMD
// currently only defined by AMD AMD_MP = 96+19, // MultiProcessing capable; reserved on AMD64
MP = 96+19, // MultiProcessing capable; reserved on AMD64 AMD_3DNOW_PRO = 96+30,
_3DNOW_PRO = 96+30, AMD_3DNOW = 96+31
_3DNOW = 96+31
}; };
extern bool ia32_cap(CpuCap cap); extern bool ia32_cap(CpuCap cap);
@ -85,7 +83,17 @@ extern void ia32_get_cpu_info(void);
// internal use only // internal use only
extern int get_cur_processor_id();
// order in which registers are stored in regs array
// (do not change! brand string relies on this ordering)
enum IA32Regs
{
EAX,
EBX,
ECX,
EDX
};
extern bool ia32_cpuid(u32 func, u32* regs);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -255,7 +255,14 @@ static unsigned __stdcall thread_start(void* UNUSED(param))
void* arg = func_and_arg.arg; void* arg = func_and_arg.arg;
win_unlock(WPTHREAD_CS); win_unlock(WPTHREAD_CS);
void* ret = func(arg); void* ret = (void*)-1;
__try
{
ret = func(arg);
}
__except(wdbg_exception_filter(GetExceptionInformation()))
{
}
call_tls_dtors(); call_tls_dtors();