further fixes from work (where i've finally managed to (temporarily) sign the Aken driver)

in particular, remove two hindrances to displaying the error dialog
during early init.

This was SVN commit r7754.
This commit is contained in:
janwas 2010-07-14 12:23:53 +00:00
parent c634a62f4c
commit 12568b0229
11 changed files with 157 additions and 85 deletions

View File

@ -62,6 +62,36 @@ struct AcpiGenericAddress
u64 address;
};
struct FADT // signature is FACP!
{
AcpiTable header;
u8 unused1[40];
u32 pmTimerPortAddress;
u8 unused2[16];
u16 c2Latency; // [us]
u16 c3Latency; // [us]
u8 unused3[5];
u8 dutyWidth;
u8 unused4[6];
u32 flags;
// (ACPI4 defines additional fields after this)
bool IsDutyCycleSupported() const
{
return dutyWidth != 0;
}
bool IsC2Supported() const
{
return c2Latency <= 100; // magic value specified by ACPI
}
bool IsC3Supported() const
{
return c3Latency <= 1000; // see above
}
};
#pragma pack(pop)
/**

View File

@ -46,7 +46,7 @@ static size_t MaxCoresPerPackage()
// assume single-core unless one of the following applies:
size_t maxCoresPerPackage = 1;
x86_x64_CpuidRegs regs;
x86_x64_CpuidRegs regs = { 0 };
switch(x86_x64_Vendor())
{
case X86_X64_VENDOR_INTEL:
@ -58,7 +58,6 @@ static size_t MaxCoresPerPackage()
case X86_X64_VENDOR_AMD:
regs.eax = 0x80000008;
regs.ecx = 0;
if(x86_x64_cpuid(&regs))
maxCoresPerPackage = bits(regs.ecx, 0, 7)+1;
break;
@ -88,9 +87,8 @@ static size_t MaxLogicalPerCore()
};
if(IsHyperthreadingCapable()())
{
x86_x64_CpuidRegs regs;
x86_x64_CpuidRegs regs = { 0 };
regs.eax = 1;
regs.ecx = 0;
if(!x86_x64_cpuid(&regs))
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
const size_t logicalPerPackage = bits(regs.ebx, 16, 23);

View File

@ -428,7 +428,7 @@ static void AddTLB2ParameterPair(u32 reg, size_t pageSize)
// AMD reports maxCpuidIdFunction > 4 but consider functions 2..4 to be
// "reserved". cache characteristics are returned via ext. functions.
static void InitCacheAndTLB()
static void DetectCacheAndTLB()
{
x86_x64_CpuidRegs regs = { 0 };
@ -458,6 +458,9 @@ static void InitCacheAndTLB()
} // namespace AMD
// note: CPUID 8000_0006 also returns L2 size, associativity, and
// line size, but I don't see any advantages vs. CPUID 4.
static void DetectCache_CPUID4()
{
// note: ordering is undefined (see Intel AP-485)
@ -481,7 +484,7 @@ static void DetectCache_CPUID4()
params.type = type;
params.level = level;
params.associativity = (size_t)bits(regs.ebx, 22, 31)+1;
params.lineSize = (size_t)bits(regs.ebx, 0, 11)+1; // (yes, this also uses +1 encoding)
params.lineSize = (size_t)bits(regs.ebx, 0, 11)+1; // (yes, this also uses +1 encoding)
params.sharedBy = (size_t)bits(regs.eax, 14, 25)+1;
{
const size_t partitions = (size_t)bits(regs.ebx, 12, 21)+1;
@ -680,10 +683,10 @@ static void DetectTLB_CPUID2()
static ModuleInitState cacheInitState;
static LibError InitCacheAndTLB()
static LibError DetectCacheAndTLB()
{
if(x86_x64_Vendor() == X86_X64_VENDOR_AMD)
AMD::InitCacheAndTLB();
AMD::DetectCacheAndTLB();
else
{
DetectCache_CPUID4();
@ -710,13 +713,13 @@ static LibError InitCacheAndTLB()
const x86_x64_Cache* x86_x64_ICache()
{
ModuleInit(&cacheInitState, InitCacheAndTLB);
ModuleInit(&cacheInitState, DetectCacheAndTLB);
return &icache;
}
const x86_x64_Cache* x86_x64_DCache()
{
ModuleInit(&cacheInitState, InitCacheAndTLB);
ModuleInit(&cacheInitState, DetectCacheAndTLB);
return &dcache;
}
@ -732,19 +735,19 @@ size_t x86_x64_L2CacheLineSize()
const x86_x64_TLB* x86_x64_ITLB()
{
ModuleInit(&cacheInitState, InitCacheAndTLB);
ModuleInit(&cacheInitState, DetectCacheAndTLB);
return &itlb;
}
const x86_x64_TLB* x86_x64_DTLB()
{
ModuleInit(&cacheInitState, InitCacheAndTLB);
ModuleInit(&cacheInitState, DetectCacheAndTLB);
return &dtlb;
}
size_t x86_x64_TLBCoverage(const x86_x64_TLB* tlb)
{
// note: receiving a TLB pointer means InitCacheAndTLB was called.
// note: receiving a TLB pointer means DetectCacheAndTLB was called.
const u64 pageSize = 4*KiB;
const u64 largePageSize = 4*MiB; // TODO: find out if we're using 2MB or 4MB

View File

@ -110,13 +110,14 @@ LIB_API void cpu_Serialize();
**/
inline void cpu_Pause()
{
#if MSC_VER && (ARCH_IA32 || ARCH_AMD64)
#if MSC_VERSION && ARCH_X86_X64
_mm_pause();
#elif GCC_VER && (ARCH_IA32 || ARCH_AMD64)
#elif GCC_VERSION && ARCH_X86_X64
__asm__ __volatile__( "rep; nop" : : : "memory" );
#endif
}
//-----------------------------------------------------------------------------
// misc

View File

@ -112,9 +112,8 @@ void mahaf_WritePort32(u16 port, u32 value)
bool mahaf_IsPhysicalMappingDangerous()
{
// WinXP introduced checks that ensure we don't re-map pages with
// incompatible attributes. without this, mapping physical pages risks
// disaster due to TLB corruption.
// pre-XP versions don't prevent re-mapping pages with incompatible
// attributes, which may lead to disaster due to TLB corruption.
if(wutil_WindowsVersion() < WUTIL_VERSION_XP)
return true;

View File

@ -69,19 +69,11 @@ static uintptr_t mod_base;
static WORD machine;
#endif
// note: RtlCaptureStackBackTrace (http://msinilo.pl/blog/?p=40)
// is likely to be much faster than StackWalk64 (especially relevant
// for debug_GetCaller), but wasn't known during development and
// remains undocumented.
static WUTIL_FUNC(pRtlCaptureContext, VOID, (PCONTEXT));
static LibError InitDbghelp()
{
hProcess = GetCurrentProcess();
dbghelp_ImportFunctions();
WUTIL_IMPORT_KERNEL32(RtlCaptureContext, pRtlCaptureContext);
// set options
// notes:
@ -312,6 +304,11 @@ static LibError ia32_walk_stack(_tagSTACKFRAME64* sf)
#endif
// note: RtlCaptureStackBackTrace (http://msinilo.pl/blog/?p=40)
// is likely to be much faster than StackWalk64 (especially relevant
// for debug_GetCaller), but wasn't known during development and
// remains undocumented.
LibError wdbg_sym_WalkStack(StackFrameCallback cb, uintptr_t cbData, const CONTEXT* pcontext, const wchar_t* lastFuncToSkip)
{
// to function properly, StackWalk64 requires a CONTEXT on
@ -342,8 +339,16 @@ LibError wdbg_sym_WalkStack(StackFrameCallback cb, uintptr_t cbData, const CONTE
#if ARCH_IA32
ia32_asm_GetCurrentContext(&context);
#else
// we need to capture the context ASAP, lest more registers be
// clobbered. since sym_init is no longer called from winit, the
// best we can do is import the function pointer directly.
static WUTIL_FUNC(pRtlCaptureContext, VOID, (PCONTEXT));
if(!pRtlCaptureContext)
return ERR::NOT_SUPPORTED; // NOWARN
{
WUTIL_IMPORT_KERNEL32(RtlCaptureContext, pRtlCaptureContext);
if(!pRtlCaptureContext)
return ERR::NOT_SUPPORTED; // NOWARN
}
memset(&context, 0, sizeof(context));
context.ContextFlags = CONTEXT_CONTROL|CONTEXT_INTEGER;
pRtlCaptureContext(&context);

View File

@ -71,7 +71,7 @@ public:
debug_assert(period_fs != 0); // "a value of 0 in this field is not permitted"
debug_assert(period_fs <= 0x05F5E100); // 100 ns (min freq is 10 MHz)
m_frequency = 1e15 / period_fs;
debug_printf(L"HPET: rev=%X vendor=%X bits=%d period=%X freq=%g\n", revision, vendorID, m_counterBits, period_fs, m_frequency);
debug_printf(L"HPET: rev=%X vendor=%X bits=%d period=%08X freq=%g\n", revision, vendorID, m_counterBits, period_fs, m_frequency);
}
// start the counter (if not already running)

View File

@ -34,18 +34,7 @@
#include "lib/sysdep/os/win/mahaf.h"
#include "lib/bits.h"
#pragma pack(push,1)
struct FADT
{
AcpiTable header;
u8 unused[40];
u32 pmTimerPortAddress;
u8 unused2[32];
u32 flags;
};
#pragma pack(pop)
static const u32 TMR_VAL_EXT = Bit<u32>(8);
static const u32 TMR_VAL_EXT = Bit<u32>(8); // FADT flags
//-----------------------------------------------------------------------------

View File

@ -30,6 +30,7 @@
#include "lib/sysdep/os/win/whrt/counter.h"
#include "lib/bits.h"
#include "lib/sysdep/acpi.h"
#include "lib/sysdep/os_cpu.h"
#include "lib/sysdep/os/win/win.h"
#include "lib/sysdep/os/win/wutil.h"
@ -41,19 +42,52 @@
//-----------------------------------------------------------------------------
// detect throttling
static bool IsUniprocessor()
{
const CpuTopology* topology = cpu_topology_Detect();
if(cpu_topology_NumPackages(topology) != 1)
return false;
if(cpu_topology_CoresPerPackage(topology) != 1)
return false;
return true;
}
enum AmdPowerNowFlags
{
PN_FREQ_ID_CTRL = BIT(1),
PN_HW_THERMAL_CTRL = BIT(4),
PN_SW_THERMAL_CTRL = BIT(5),
PN_INVARIANT_TSC = BIT(8)
};
static bool IsInvariantTSC()
{
#if ARCH_X86_X64
x86_x64_CpuidRegs regs = { 0 };
switch(x86_x64_Vendor())
{
case X86_X64_VENDOR_AMD:
regs.eax = 0x80000007;
if(x86_x64_cpuid(&regs))
{
// TSC is invariant across P-state, C-state and
// stop grant transitions (e.g. STPCLK)
if(regs.edx & PN_INVARIANT_TSC)
return true;
}
break;
}
#endif
return false;
}
static bool IsThrottlingPossible()
{
#if ARCH_X86_X64
x86_x64_CpuidRegs regs;
x86_x64_CpuidRegs regs = { 0 };
switch(x86_x64_Vendor())
{
case X86_X64_VENDOR_INTEL:
@ -65,13 +99,14 @@ static bool IsThrottlingPossible()
regs.eax = 0x80000007;
if(x86_x64_cpuid(&regs))
{
if(regs.edx & (PN_FREQ_ID_CTRL|PN_SW_THERMAL_CTRL))
if(regs.edx & (PN_FREQ_ID_CTRL|PN_HW_THERMAL_CTRL|PN_SW_THERMAL_CTRL))
return true;
}
break;
}
return false;
#endif
return false;
}
@ -101,15 +136,16 @@ public:
bool IsSafe() const
{
// use of the TSC for timing is subject to a litany of potential problems:
// - separate, unsynchronized counters with offset and drift;
// - frequency changes (P-state transitions and STPCLK throttling);
// - failure to increment in C3 and C4 deep-sleep states.
// we will discuss the specifics below.
// using the TSC for timing is subject to a litany of
// potential problems, discussed below:
// SMP or multi-core => counters are unsynchronized. this could be
// solved by maintaining separate per-core counter states, but that
// requires atomic reads of the TSC and the current processor number.
if(IsInvariantTSC())
return true;
// SMP or multi-core => counters are unsynchronized. both offset and
// drift could be solved by maintaining separate per-core
// counter states, but that requires atomic reads of the TSC and
// the current processor number.
//
// (otherwise, we have a subtle race condition: if preempted while
// reading the time and rescheduled on a different core, incorrect
@ -120,12 +156,23 @@ public:
//
// (note: if the TSC is invariant, drift is no longer a concern.
// we could synchronize the TSC MSRs during initialization and avoid
// per-core counter state and the abovementioned race condition.
// per-core counter state and the race condition mentioned above.
// however, we won't bother, since such platforms aren't yet widespread
// and would surely support the nice and safe HPET, anyway)
if(!IsUniprocessor())
return false;
const FADT* fadt = (const FADT*)acpi_GetTable("FACP");
if(fadt)
{
const CpuTopology* topology = cpu_topology_Detect();
if(cpu_topology_NumPackages(topology) != 1 || cpu_topology_CoresPerPackage(topology) != 1)
debug_assert(fadt->header.size >= sizeof(FADT));
// TSC isn't incremented in deep-sleep states => unsafe.
if(fadt->IsC3Supported())
return false;
// frequency throttling possible => unsafe.
if(fadt->IsDutyCycleSupported())
return false;
}
@ -136,24 +183,12 @@ public:
// note: 8th generation CPUs support C1-clock ramping, which causes
// drift on multi-core systems, but those were excluded above.
x86_x64_CpuidRegs regs;
regs.eax = 0x80000007;
if(x86_x64_cpuid(&regs))
{
// TSC is invariant WRT P-state, C-state and STPCLK => safe.
if(regs.edx & PN_INVARIANT_TSC)
return true;
}
// in addition to P-state transitions, we're also subject to
// STPCLK throttling. this happens when the chipset thinks the
// system is dangerously overheated; the OS isn't even notified.
// it may be rare, but could cause incorrect results => unsafe.
// in addition to frequency changes due to P-state transitions,
// we're also subject to STPCLK throttling. this happens when
// the chipset thinks the system is dangerously overheated; the
// OS isn't even notified. this may be rare, but could cause
// incorrect results => unsafe.
return false;
// newer systems also support the C3 Deep Sleep state, in which
// the TSC isn't incremented. that's not nice, but irrelevant
// since STPCLK dooms the TSC on those systems anyway.
}
#endif

View File

@ -1426,17 +1426,18 @@ static void RedirectStdout()
// that means stdout isn't associated with a lowio handle; _close is
// called with fd = -1. oh well, there's nothing we can do.
FILE* f = 0;
errno_t ret = _wfreopen_s(&f, pathname.string().c_str(), L"wt", stdout);
// (return value ignored - it indicates 'file already exists' even
// if f is valid)
(void)_wfreopen_s(&f, pathname.string().c_str(), L"wt", stdout);
// executable directory (probably Program Files) is read-only for
// non-Administrators. we can't pick another directory because
// ah_log_dir might not be valid until the app's init has run and
// the desired subdirectory of wutil_AppdataPath is unknown.
// ah_log_dir might not be valid until the app's init has run,
// nor should we pollute the (root) wutil_AppdataPath directory.
// since stdout usually isn't critical and is seen if launching the
// app from a console, just skip the redirection in this case.
if(f == 0)
return;
UNUSED2(ret); // indicates 'file already exists' even if f is valid
#if CONFIG_PARANOIA
// disable buffering, so that no writes are lost even if the program
// crashes. only enabled in full debug mode because this is really slow!

View File

@ -34,6 +34,7 @@
#include "lib/file/vfs/vfs.h"
#include "lib/posix/posix.h"
#include "lib/sysdep/os/win/win.h"
#include "lib/sysdep/os/win/wdbg.h" // wdbg_assert
#include "lib/sysdep/os/win/winit.h"
#include <shlobj.h> // SHGetFolderPath
@ -395,10 +396,18 @@ static void DetectWindowsVersion()
DWORD size = sizeof(windowsVersionString);
(void)RegQueryValueExW(hKey, L"CurrentVersion", 0, 0, (LPBYTE)windowsVersionString, &size);
int major = 0, minor = 0;
int ret = swscanf_s(windowsVersionString, L"%d.%d", &major, &minor);
debug_assert(ret == 2);
debug_assert(major <= 0xFF && minor <= 0xFF);
unsigned major = 0, minor = 0;
// ICC 11.1.082 generates incorrect code for the following:
// const int ret = swscanf_s(windowsVersionString, L"%u.%u", &major, &minor);
std::wstringstream ss(windowsVersionString);
ss >> major;
wchar_t dot;
ss >> dot;
debug_assert(dot == '.');
ss >> minor;
debug_assert(4 <= major && major <= 0xFF);
debug_assert(minor <= 0xFF);
windowsVersion = (major << 8) | minor;
RegCloseKey(hKey);
@ -510,18 +519,20 @@ WinScopedDisableWow64Redirection::~WinScopedDisableWow64Redirection()
#ifndef LIB_STATIC_LINK
static HMODULE s_hModule;
BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD UNUSED(reason), LPVOID UNUSED(reserved))
{
DisableThreadLibraryCalls(hInstance);
s_hModule = hInstance;
return TRUE; // success (ignored unless reason == DLL_PROCESS_ATTACH)
}
HMODULE wutil_LibModuleHandle()
{
return s_hModule;
HMODULE hModule;
const DWORD flags = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT;
const BOOL ok = GetModuleHandleEx(flags, (LPCWSTR)&DllMain, &hModule);
// (avoid debug_assert etc. because we're called from debug_DisplayError)
wdbg_assert(ok);
return hModule;
}
#else