From f8ed148370d7c46cdab115cb29833e5fcca2ffa7 Mon Sep 17 00:00:00 2001 From: janwas Date: Wed, 14 Sep 2005 01:34:16 +0000 Subject: [PATCH] cpuid -> ia32_cpuid considerable continued cleanup in ia32 came across article detailing HT detect on multicore systems. that requires new code, working on it. pthread: while reading, came across idea of wrapping thread trampoline in __except -> more reliable exception handling This was SVN commit r2716. --- source/lib/sysdep/cpu.h | 3 - source/lib/sysdep/ia32.asm | 8 +- source/lib/sysdep/ia32.cpp | 256 ++++++++++++++--------------- source/lib/sysdep/ia32.h | 30 ++-- source/lib/sysdep/win/wpthread.cpp | 9 +- 5 files changed, 159 insertions(+), 147 deletions(-) diff --git a/source/lib/sysdep/cpu.h b/source/lib/sysdep/cpu.h index 01d9680a5f..85adab28a8 100755 --- a/source/lib/sysdep/cpu.h +++ b/source/lib/sysdep/cpu.h @@ -56,9 +56,6 @@ extern void serialize(); # define SP_ Esp #endif -// internal use only -extern bool cpuid(u32 func, u32* regs); - #ifdef __cplusplus } #endif diff --git a/source/lib/sysdep/ia32.asm b/source/lib/sysdep/ia32.asm index f2939d52cc..47109e5a0d 100644 --- a/source/lib/sysdep/ia32.asm +++ b/source/lib/sysdep/ia32.asm @@ -229,9 +229,9 @@ max_func dd 0x7FFFFFFF max_ext_func dd 0xFFFFFFFF __SECT__ -; extern "C" bool __cdecl cpuid(u32 func, u32* regs) -global _cpuid -_cpuid: +; extern "C" bool __cdecl ia32_cpuid(u32 func, u32* regs) +global _ia32_cpuid +_ia32_cpuid: ; if unknown, detect; if not available, fail. xor eax, eax ; return val on failure cmp [cpuid_available], eax @@ -291,7 +291,7 @@ _cpuid: cpuid mov [max_ext_func], eax - jmp _cpuid ; now try again + jmp _ia32_cpuid ; now try again diff --git a/source/lib/sysdep/ia32.cpp b/source/lib/sysdep/ia32.cpp index afddcd8194..6f5ef783cf 100755 --- a/source/lib/sysdep/ia32.cpp +++ b/source/lib/sysdep/ia32.cpp @@ -40,12 +40,13 @@ #endif // replace pathetic MS libc implementation. +// not needed on non-Win32, so don't bother converting from MS inline asm. #if HAVE_MS_ASM double _ceil(double f) { - double r; - + UNUSED2(f); // avoid bogus warning const float _49 = 0.499999f; + double r; __asm { fld [f] @@ -53,16 +54,17 @@ __asm frndint fstp [r] } - - UNUSED2(f); - return r; } #endif -// return convention for 64 bits with VC7.1, ICC8 is in edx:eax, -// so temp variable is unnecessary, but we play it safe. +// rationale: this function should return its output (instead of setting +// out params) to simplify its callers. it is written in inline asm +// (instead of moving to ia32.asm) to insulate from changing compiler +// calling conventions. +// MSC, ICC and GCC currently return 64 bits in edx:eax, which even +// matches rdtsc output, but we play it safe and return a temporary. inline u64 rdtsc() { u64 c; @@ -85,14 +87,17 @@ __asm } - - -#if OS_WIN && HAVE_MS_ASM void ia32_debug_break() { +#if HAVE_MS_ASM __asm int 3 -} +// note: this probably isn't necessary, since unix_debug_break +// (SIGTRAP) is most probably available if HAVE_GNU_ASM. +// we include it for completeness, though. +#elif HAVE_GNU_ASM + __asm__ __volatile__ ("mfence"); #endif +} void ia32_memcpy(void* dst, const void* src, size_t nbytes) @@ -225,32 +230,45 @@ void serialize() // CPU / feature detect //----------------------------------------------------------------------------- -// -// data returned by cpuid() -// each function using this data must call cpuid (no-op if already called) -// - -static char vendor_str[13]; -static int family, model, ext_family; -static int num_cores; - -// caps -// treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx -// keep in sync with enum CpuCap and cpuid() code! -u32 caps[4]; - -static bool have_brand_string; // if false, need to detect cpu_type manually. - - -// order in which registers are stored in regs array -// (do not change! brand string relies on this ordering) -enum IA32Regs +bool ia32_cap(CpuCap cap) { - EAX, - EBX, - ECX, - EDX -}; + // treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx + // keep in sync with enum CpuCap! + static u32 caps[4]; + ONCE(\ + u32 regs[4]; + if(ia32_cpuid(1, regs))\ + {\ + caps[0] = regs[ECX];\ + caps[1] = regs[EDX];\ + }\ + if(ia32_cpuid(0x80000001, regs))\ + {\ + caps[2] = regs[ECX];\ + caps[3] = regs[EDX];\ + }\ + ); + + const uint tbl_idx = cap >> 5; + const uint bit_idx = cap & 0x1f; + if(tbl_idx > 3) + { + debug_warn("cap invalid"); + return false; + } + return (caps[tbl_idx] & BIT(bit_idx)) != 0; +} + + + + + +// we only store enum Vendor rather than the string because that +// is easier to compare. +enum Vendor { UNKNOWN, INTEL, AMD }; +static Vendor vendor = UNKNOWN; + + enum MiscCpuCapBits { @@ -259,90 +277,60 @@ enum MiscCpuCapBits }; -static int retrieve_cpuid_info() + +static void get_cpu_vendor() { u32 regs[4]; + if(!ia32_cpuid(0, regs)) + return; - // vendor string - // notes: - // - vendor_str is already 0-terminated because it's static. - // - 'strange' ebx,edx,ecx reg order is due to ModR/M encoding order. - if(!cpuid(0, regs)) - return ERR_CPU_FEATURE_MISSING; // we need CPUID, i.e. Pentium+ + // copy regs to string + // note: 'strange' ebx,edx,ecx reg order is due to ModR/M encoding order. + char vendor_str[13]; u32* vendor_str_u32 = (u32*)vendor_str; vendor_str_u32[0] = regs[EBX]; vendor_str_u32[1] = regs[EDX]; vendor_str_u32[2] = regs[ECX]; + vendor_str[12] = '\0'; // 0-terminate - // processor signature, feature flags - // (note: HT/SMP query is nontrivial and done below) - if(!cpuid(1, regs)) - debug_warn("cpuid 1 failed"); - model = bits(regs[EAX], 4, 7); - family = bits(regs[EAX], 8, 11); - ext_family = bits(regs[EAX], 20, 23); - caps[0] = regs[ECX]; - caps[1] = regs[EDX]; - - // multicore count - if(cpuid(4, regs)) - num_cores = bits(regs[EBX], 26, 31)+1; - - // extended feature flags - if(cpuid(0x80000001, regs)) - { - caps[2] = regs[ECX]; - caps[3] = regs[EDX]; - } - - // CPU brand string (AthlonXP/P4 or above) - u32* cpu_type_u32 = (u32*)cpu_type; - if(cpuid(0x80000002, cpu_type_u32+0 ) && - cpuid(0x80000003, cpu_type_u32+16) && - cpuid(0x80000004, cpu_type_u32+32)) - have_brand_string = true; - - return 0; + if(!strcmp(vendor_str, "AuthenticAMD")) + vendor = AMD; + else if(!strcmp(vendor_str, "GenuineIntel")) + vendor = INTEL; + else + debug_warn("unknown vendor"); } -bool ia32_cap(CpuCap cap) -{ - u32 idx = cap >> 5; - if(idx > 3) - { - debug_warn("cap invalid"); - return false; - } - u32 bit = BIT(cap & 0x1f); - - return (caps[idx] & bit) != 0; -} - - - -// (for easier comparison) -enum Vendor { UNKNOWN, INTEL, AMD }; -static Vendor vendor = UNKNOWN; - - - - - static void get_cpu_type() { + // get processor signature + u32 regs[4]; + if(!ia32_cpuid(1, regs)) + debug_warn("cpuid 1 failed"); + const uint model = bits(regs[EAX], 4, 7); + const uint family = bits(regs[EAX], 8, 11); + + // get brand string (if available) + u32* cpu_type_u32 = (u32*)cpu_type; + bool have_brand_string = false; + if(ia32_cpuid(0x80000002, cpu_type_u32+0 ) && + ia32_cpuid(0x80000003, cpu_type_u32+16) && + ia32_cpuid(0x80000004, cpu_type_u32+32)) + have_brand_string = true; + + // note: cpu_type is guaranteed to hold 48+1 chars, since that's the // length of the CPU brand string. strcpy(cpu_type, literal) is safe. +#define SAFE_STRCPY strcpy - // fall back to manual detect of CPU type if it didn't supply - // a brand string, or if the brand string is useless (i.e. "Unknown"). + // fall back to manual detect of CPU type because either: + // - CPU doesn't support brand string (we use a flag to indicate this + // rather than comparing against a default value because it is safer); + // - the brand string is useless, e.g. "Unknown". this happens on + // some older boards whose BIOS reprograms the string for CPUs it + // doesn't recognize. if(!have_brand_string || strncmp(cpu_type, "Unknow", 6) == 0) - // we use an extra flag to detect if we got the brand string: - // safer than comparing against the default name, which may change. - // - // some older boards reprogram the brand string with - // "Unknow[n] CPU Type" on CPUs the BIOS doesn't recognize. - // in that case, we ignore the brand string and detect manually. { if(vendor == AMD) { @@ -350,15 +338,15 @@ static void get_cpu_type() if(family == 6) { if(model == 3 || model == 7) - strcpy(cpu_type, "AMD Duron"); // safe + SAFE_STRCPY(cpu_type, "AMD Duron"); else if(model <= 5) - strcpy(cpu_type, "AMD Athlon"); // safe + SAFE_STRCPY(cpu_type, "AMD Athlon"); else { - if(ia32_cap(MP)) - strcpy(cpu_type, "AMD Athlon MP"); // safe + if(ia32_cap(AMD_MP)) + SAFE_STRCPY(cpu_type, "AMD Athlon MP"); else - strcpy(cpu_type, "AMD Athlon XP"); // safe + SAFE_STRCPY(cpu_type, "AMD Athlon XP"); } } } @@ -368,17 +356,17 @@ static void get_cpu_type() if(family == 6) { if(model == 1) - strcpy(cpu_type, "Intel Pentium Pro"); // safe + SAFE_STRCPY(cpu_type, "Intel Pentium Pro"); else if(model == 3 || model == 5) - strcpy(cpu_type, "Intel Pentium II"); // safe + SAFE_STRCPY(cpu_type, "Intel Pentium II"); else if(model == 6) - strcpy(cpu_type, "Intel Celeron"); // safe + SAFE_STRCPY(cpu_type, "Intel Celeron"); else - strcpy(cpu_type, "Intel Pentium III"); // safe + SAFE_STRCPY(cpu_type, "Intel Pentium III"); } } } - // we have a valid brand string; try to pretty it up some + // cpu_type already holds a valid brand string; pretty it up. else { // strip (tm) from Athlon string @@ -387,15 +375,16 @@ static void get_cpu_type() // remove 2x (R) and CPU freq from P4 string float freq; - // the indicated frequency isn't necessarily correct - the CPU may be - // overclocked. need to pass a variable though, since scanf returns - // the number of fields actually stored. + // we can't use this because it isn't necessarily correct - the CPU + // may be overclocked. a variable must be passed, though, since + // scanf returns the number of fields actually stored. if(sscanf(cpu_type, " Intel(R) Pentium(R) 4 CPU %fGHz", &freq) == 1) - strcpy(cpu_type, "Intel Pentium 4"); // safe + SAFE_STRCPY(cpu_type, "Intel Pentium 4"); } } + static void measure_cpu_freq() { // set max priority, to reduce interference while measuring. @@ -487,8 +476,19 @@ static void measure_cpu_freq() // called on each CPU by on_each_cpu. static void check_smp() { + u32 regs[4]; + debug_assert(cpus > 0 && "must know # CPUs (call OS-specific detect first)"); +/* + if single-core and no HT + no change + if multi-core and no HT + phys = windows_cpus / cores_per_package + + +*/ + // we don't check if it's Intel and P4 or above - HT may be supported // on other CPUs in future. haven't come across a processor that // incorrectly sets the HT feature bit. @@ -504,6 +504,12 @@ static void check_smp() if(cpu_smp == -1) cpu_smp = 0; + // multicore count + uint num_cores_per_package = 1; + if(ia32_cpuid(4, regs)) + num_cores_per_package = bits(regs[EBX], 26, 31)+1; + + // // still need to check if HT is actually enabled (BIOS and OS); @@ -512,8 +518,7 @@ static void check_smp() // get number of logical CPUs per package // (the same for all packages on this system) - u32 regs[4]; - if(!cpuid(1, regs)) + if(!ia32_cpuid(1, regs)) debug_warn("cpuid 1 failed"); const uint log_cpus_per_package = bits(regs[EBX], 16, 23); // logical CPUs are initialized after one another => @@ -535,13 +540,13 @@ static void check_speedstep() { if(vendor == INTEL) { - if(ia32_cap(EST)) + if(ia32_cap(INTEL_EST)) cpu_speedstep = 1; } else if(vendor == AMD) { u32 regs[4]; - if(cpuid(0x80000007, regs)) + if(ia32_cpuid(0x80000007, regs)) if(regs[EDX] & POWERNOW_FREQ_ID_CTRL) cpu_speedstep = 1; } @@ -550,14 +555,9 @@ static void check_speedstep() void ia32_get_cpu_info() { - WARN_ERR_RETURN(retrieve_cpuid_info()); - - if(!strcmp(vendor_str, "AuthenticAMD")) - vendor = AMD; - else if(!strcmp(vendor_str, "GenuineIntel")) - vendor = INTEL; - + get_cpu_vendor(); get_cpu_type(); + check_speedstep(); // linux doesn't have CPU affinity API:s (that we've found...) #if OS_WIN diff --git a/source/lib/sysdep/ia32.h b/source/lib/sysdep/ia32.h index a93c18fbdb..b6a2a7f82f 100755 --- a/source/lib/sysdep/ia32.h +++ b/source/lib/sysdep/ia32.h @@ -53,13 +53,12 @@ extern void ia32_memcpy(void* dst, const void* src, size_t nbytes); // CPU caps (128 bits) -// do not change the order! (breaks cpuid()) +// do not change the order! enum CpuCap { - // standard (ecx) - // currently only defined by Intel - SSE3 = 0+0, // Streaming SIMD Extensions 3 - EST = 0+7, // Enhanced Speedstep Technology + // standard (ecx) - currently only defined by Intel + INTEL_SSE3 = 0+0, // Streaming SIMD Extensions 3 + INTEL_EST = 0+7, // Enhanced Speedstep Technology // standard (edx) TSC = 32+4, // TimeStamp Counter @@ -71,11 +70,10 @@ enum CpuCap // extended (ecx) - // extended (edx) - // currently only defined by AMD - MP = 96+19, // MultiProcessing capable; reserved on AMD64 - _3DNOW_PRO = 96+30, - _3DNOW = 96+31 + // extended (edx) - currently only defined by AMD + AMD_MP = 96+19, // MultiProcessing capable; reserved on AMD64 + AMD_3DNOW_PRO = 96+30, + AMD_3DNOW = 96+31 }; extern bool ia32_cap(CpuCap cap); @@ -85,7 +83,17 @@ extern void ia32_get_cpu_info(void); // internal use only -extern int get_cur_processor_id(); + +// order in which registers are stored in regs array +// (do not change! brand string relies on this ordering) +enum IA32Regs +{ + EAX, + EBX, + ECX, + EDX +}; +extern bool ia32_cpuid(u32 func, u32* regs); #ifdef __cplusplus } diff --git a/source/lib/sysdep/win/wpthread.cpp b/source/lib/sysdep/win/wpthread.cpp index 15ccfcaddb..a33ed0d008 100644 --- a/source/lib/sysdep/win/wpthread.cpp +++ b/source/lib/sysdep/win/wpthread.cpp @@ -255,7 +255,14 @@ static unsigned __stdcall thread_start(void* UNUSED(param)) void* arg = func_and_arg.arg; win_unlock(WPTHREAD_CS); - void* ret = func(arg); + void* ret = (void*)-1; + __try + { + ret = func(arg); + } + __except(wdbg_exception_filter(GetExceptionInformation())) + { + } call_tls_dtors();