improvements/additions from work

bits: fix bit_mask for signed types, add SetBitsTo, LeastSignificantBit, ClearLeastSignificantBit. add MSR support (read/write via mahaf in kernel mode) x86_x64: expose family/model topology: add support for determining core/package from APIC ID. TSC: report actual frequency for nehalem invariant TSC. improved UNREACHABLE/ASSUME_UNREACHABLE (avoid ICC warning, add GCC4.5 support) This was SVN commit r7860.
2010-08-06 13:03:44 +00:00 · 2010-08-06 13:03:44 +00:00 · 3d45069b3f
commit 3d45069b3f
parent 3a0123b7b4
11 changed files with 304 additions and 217 deletions
--- a/source/lib/bits.h
+++ b/source/lib/bits.h
@ -40,7 +40,7 @@ template<typename T>
 T Bit(size_t n)
 {
 	const T one = T(1);
-	return (one << n);
+	return (T)(one << n);
 }
 /**
@ -71,16 +71,14 @@ bool IsBitSet(T value, size_t index)
 template<typename T>
 T bit_mask(size_t numBits)
 {
 	if(numBits == 0)	// prevent shift count == bitsInT, which would be undefined.
 		return 0;
 	// notes:
 	// - the perhaps more intuitive (1 << numBits)-1 cannot
 	//   handle numBits == bitsInT, but this implementation does.
 	// - though bulky, the below statements avoid sign-conversion warnings.
 	const T bitsInT = sizeof(T)*CHAR_BIT;
-	T mask(0);
+	const T allBits = (T)~T(0);
-	mask = T(~mask);
+	// (shifts of at least bitsInT are undefined)
-	mask >>= T(bitsInT-numBits);
+	if(numBits >= bitsInT)
 		return allBits;
 	// (note: the previous allBits >> (bitsInT-numBits) is not safe
 	// because right-shifts of negative numbers are undefined.)
 	const T mask = T(T(1) << numBits)-1;
 	return mask;
 }
@ -98,12 +96,31 @@ T bit_mask(size_t numBits)
 template<typename T>
 inline T bits(T num, size_t lo_idx, size_t hi_idx)
 {
-	const size_t count = (hi_idx - lo_idx)+1;	// # bits to return
+	const size_t numBits = (hi_idx - lo_idx)+1;	// # bits to return
 	T result = T(num >> lo_idx);
-	result = T(result & bit_mask<T>(count));
+	result = T(result & bit_mask<T>(numBits));
 	return result;
 }
 /**
 * set the value of bits hi_idx:lo_idx
 *
 * @param lo_idx bit index of lowest  bit to include
 * @param hi_idx bit index of highest bit to include
 * @param value new value to be assigned to these bits
 **/
 template<typename T>
 inline T SetBitsTo(T num, size_t lo_idx, size_t hi_idx, size_t value)
 {
 	const size_t numBits = (hi_idx - lo_idx)+1;
 	debug_assert(value < (T(1) << numBits));
 	const T mask = bit_mask<T>(numBits) << lo_idx;
 	T result = num & ~mask;
 	result = T(result | (value << lo_idx));
 	return result;
 }
 /**
 * @return number of 1-bits in mask
 **/
@ -127,7 +144,7 @@ size_t PopulationCount(T mask)
 * @return whether the given number is a power of two.
 **/
 template<typename T>
-bool is_pow2(T n)
+inline bool is_pow2(T n)
 {
 	// 0 would pass the test below but isn't a POT.
 	if(n == 0)
@ -135,6 +152,19 @@ bool is_pow2(T n)
 	return (n & (n-1)) == 0;
 }
 template<typename T>
 inline T LeastSignificantBit(T x)
 {
 	const T negX = T(~x + 1);	// 2's complement (avoids 'negating unsigned type' warning)
 	return x & negX;
 }
 template<typename T>
 inline T ClearLeastSignificantBit(T x)
 {
 	return x & (x-1);
 }
 /**
 * ceil(log2(x))
 *
--- a/source/lib/code_annotation.h
+++ b/source/lib/code_annotation.h
@ -43,60 +43,48 @@
 /**
-"unreachable code" helpers
+ * "unreachable code" helpers
-
+ *
-unreachable lines of code are often the source or symptom of subtle bugs.
+ * unreachable lines of code are often the source or symptom of subtle bugs.
-they are flagged by compiler warnings; however, the opposite problem -
+ * they are flagged by compiler warnings; however, the opposite problem -
-erroneously reaching certain spots (e.g. due to missing return statement)
+ * erroneously reaching certain spots (e.g. due to missing return statement)
-is worse and not detected automatically.
+ * is worse and not detected automatically.
-
+ *
-to defend against this, the programmer can annotate their code to
+ * to defend against this, the programmer can annotate their code to
-indicate to humans that a particular spot should never be reached.
+ * indicate to humans that a particular spot should never be reached.
-however, that isn't much help; better is a sentinel that raises an
+ * however, that isn't much help; better is a sentinel that raises an
-error if if it is actually reached. hence, the UNREACHABLE macro.
+ * error if if it is actually reached. hence, the UNREACHABLE macro.
-
+ *
-ironically, if the code guarded by UNREACHABLE works as it should,
+ * ironically, if the code guarded by UNREACHABLE works as it should,
-compilers may flag the macro's code as unreachable. this would
+ * compilers may flag the macro's code as unreachable. this would
-distract from genuine warnings, which is unacceptable.
+ * distract from genuine warnings, which is unacceptable.
-
+ *
-even worse, compilers differ in their code checking: GCC only complains if
+ * even worse, compilers differ in their code checking: GCC only complains if
-non-void functions end without returning a value (i.e. missing return
+ * non-void functions end without returning a value (i.e. missing return
-statement), while VC checks if lines are unreachable (e.g. if they are
+ * statement), while VC checks if lines are unreachable (e.g. if they are
-preceded by a return on all paths).
+ * preceded by a return on all paths).
-
+ *
-our implementation of UNREACHABLE solves this dilemna as follows:
+ * the implementation below enables optimization and automated checking
- on GCC: call abort(); since it has the noreturn attributes, the
+ * without raising warnings.
-  "non-void" warning disappears.
+ **/
 - on VC: avoid generating any code. we allow the compiler to assume the
  spot is actually unreachable, which incidentally helps optimization.
  if reached after all, a crash usually results. in that case, compile with
  CONFIG_PARANOIA, which will cause an error message to be displayed.
 this approach still allows for the possiblity of automated
 checking, but does not cause any compiler warnings.
 **/
 #define UNREACHABLE	// actually defined below.. this is for
 # undef UNREACHABLE	// CppDoc's benefit only.
-// 1) final build: optimize assuming this location cannot be reached.
+// compiler supports ASSUME_UNREACHABLE => allow it to assume the code is
-//    may crash if that turns out to be untrue, but removes checking overhead.
+// never reached (improves optimization at the cost of undefined behavior
-#if CONFIG_FINAL
+// if the annotation turns out to be incorrect).
 #if HAVE_ASSUME_UNREACHABLE && !CONFIG_PARANOIA
 # define UNREACHABLE ASSUME_UNREACHABLE
-// 2) normal build:
+// otherwise (or if CONFIG_PARANOIA is set), add a user-visible
 // warning if the code is reached. note that abort() fails to stop
 // ICC from warning about the lack of a return statement, so we
 // use an infinite loop instead.
 #else
-//    a) normal implementation: includes "abort", which is declared with
+# define UNREACHABLE\
 //       noreturn attribute and therefore avoids GCC's "execution reaches
 //       end of non-void function" warning.
 # if !MSC_VERSION || ICC_VERSION || CONFIG_PARANOIA
 #  define UNREACHABLE\
 	STMT(\
 		debug_assert(0);	/* hit supposedly unreachable code */\
-		abort();\
+		for(;;){};\
 	)
 //    b) VC only: don't generate any code; squelch the warning and optimize.
 # else
 #  define UNREACHABLE ASSUME_UNREACHABLE
 # endif
 #endif
 /**
--- a/source/lib/sysdep/arch/x86_x64/topology.cpp
+++ b/source/lib/sysdep/arch/x86_x64/topology.cpp
@ -116,7 +116,7 @@ static size_t MaxLogicalPerCache()
 //-----------------------------------------------------------------------------
-// determination of enabled cores/HTs
+// APIC IDs
 // APIC IDs consist of variable-length fields identifying the logical unit,
 // core, package and shared cache. if they are available, we can determine
@ -174,106 +174,102 @@ const u8* ApicIds()
 }
-/**
+// (if maxValues == 1, the field is zero-width and thus zero)
- * count the number of unique APIC IDs after application of a mask.
+static size_t ApicField(size_t apicId, size_t indexOfLowestBit, size_t maxValues)
 *
 * this is used to implement NumUniqueValuesInField and also required
 * for counting the number of caches.
 **/
 static size_t NumUniqueMaskedValues(const u8* apicIds, u8 mask)
 {
-	std::set<u8> ids;
+	const size_t numBits = ceil_log2(maxValues);
-	for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
+	const size_t mask = bit_mask<size_t>(numBits);
-	{
+	return (apicId >> indexOfLowestBit) & mask;
 		const u8 apicId = apicIds[processor];
 		const u8 field = u8(apicId & mask);
 		ids.insert(field);
 	}
 	return ids.size();
 }
-/**
+//-----------------------------------------------------------------------------
- * Count the number of values assumed by a certain field within APIC IDs.
+// CPU topology interface
 *
 * @param apicIds
 * @param offset Index of the lowest bit that is part of the field.
 * @param numValues Number of values that can be assumed by the field.
 *		  If equal to one, the field is zero-width.
 * @return number of unique values (for convenience of the topology code,
 * this is always at least one)
 **/
 static size_t NumUniqueValuesInField(const u8* apicIds, size_t offset, size_t numValues)
 {
 	if(numValues == 1)	// see parameter description above
 		return 1;
 	const size_t numBits = ceil_log2(numValues);
 	const u8 mask = u8((bit_mask<u8>(numBits) << offset) & 0xFF);
 	return NumUniqueMaskedValues(apicIds, mask);
 }
 static size_t MinPackages(size_t maxCoresPerPackage, size_t maxLogicalPerCore)
 {
 	const size_t numNodes = numa_NumNodes();
 	const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
 	// NB: some cores or logical processors may be disabled.
 	const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
 	const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
 	return minPackagesPerNode*numNodes;
 }
 struct CpuTopology	// POD
 {
-	size_t numPackages;
+	size_t maxLogicalPerCore;
-	size_t coresPerPackage;
+	size_t maxCoresPerPackage;
 	size_t logicalOffset;
 	size_t coreOffset;
 	size_t packageOffset;
 	// how many are actually enabled
 	size_t logicalPerCore;
 	size_t coresPerPackage;
 	size_t numPackages;
 };
 static CpuTopology cpuTopology;
 static ModuleInitState cpuInitState;
 static LibError InitCpuTopology()
 {
-	const size_t numProcessors = os_cpu_NumProcessors();
+	cpuTopology.maxLogicalPerCore = MaxLogicalPerCore();
-	const size_t maxCoresPerPackage = MaxCoresPerPackage();
+	cpuTopology.maxCoresPerPackage = MaxCoresPerPackage();
-	const size_t maxLogicalPerCore = MaxLogicalPerCore();
+
 	cpuTopology.logicalOffset = 0;
 	cpuTopology.coreOffset    = ceil_log2(cpuTopology.maxLogicalPerCore);
 	cpuTopology.packageOffset = cpuTopology.coreOffset + ceil_log2(cpuTopology.maxCoresPerPackage);
 	const u8* apicIds = ApicIds();
 	if(apicIds)
 	{
-		const size_t packageOffset = ceil_log2(maxCoresPerPackage) + ceil_log2(maxLogicalPerCore);
+		struct NumUniqueValuesInField
-		const size_t coreOffset    = ceil_log2(maxLogicalPerCore);
+		{
-		const size_t logicalOffset = 0;
+			size_t operator()(const u8* apicIds, size_t indexOfLowestBit, size_t numValues) const
-		cpuTopology.numPackages     = NumUniqueValuesInField(apicIds, packageOffset, 256);
+			{
-		cpuTopology.coresPerPackage = NumUniqueValuesInField(apicIds, coreOffset,    maxCoresPerPackage);
+				std::set<size_t> values;
-		cpuTopology.logicalPerCore  = NumUniqueValuesInField(apicIds, logicalOffset, maxLogicalPerCore);
+				for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
 				{
 					const size_t value = ApicField(apicIds[processor], numValues, indexOfLowestBit);
 					values.insert(value);
 				}
 				return values.size();
 			}
 		};
 		cpuTopology.logicalPerCore  = NumUniqueValuesInField()(apicIds, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore);
 		cpuTopology.coresPerPackage = NumUniqueValuesInField()(apicIds, cpuTopology.coreOffset,    cpuTopology.maxCoresPerPackage);
 		cpuTopology.numPackages     = NumUniqueValuesInField()(apicIds, cpuTopology.packageOffset, 256);
 	}
 	else // the processor lacks an xAPIC, or the IDs are invalid
 	{
 		struct MinPackages
 		{
 			size_t operator()(size_t maxCoresPerPackage, size_t maxLogicalPerCore) const
 			{
 				const size_t numNodes = numa_NumNodes();
 				const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
 				// NB: some cores or logical processors may be disabled.
 				const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
 				const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
 				return minPackagesPerNode*numNodes;
 			}
 		};
 		// we can't differentiate between cores and logical processors.
 		// since the former are less likely to be disabled, we seek the
 		// maximum feasible number of cores and minimal number of packages:
-		const size_t minPackages = MinPackages(maxCoresPerPackage, maxLogicalPerCore);
+		const size_t minPackages = MinPackages()(cpuTopology.maxCoresPerPackage, cpuTopology.maxLogicalPerCore);
-		const size_t maxPackages = numProcessors;
+		const size_t numProcessors = os_cpu_NumProcessors();
-		for(size_t numPackages = minPackages; numPackages <= maxPackages; numPackages++)
+		for(size_t numPackages = minPackages; numPackages <= numProcessors; numPackages++)
 		{
 			if(numProcessors % numPackages != 0)
 				continue;
 			const size_t logicalPerPackage = numProcessors / numPackages;
-			const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore);
+			const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, cpuTopology.maxLogicalPerCore);
-			for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
+			for(size_t coresPerPackage = cpuTopology.maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
 			{
 				if(logicalPerPackage % coresPerPackage != 0)
 					continue;
 				const size_t logicalPerCore = logicalPerPackage / coresPerPackage;
-				if(logicalPerCore <= maxLogicalPerCore)
+				if(logicalPerCore <= cpuTopology.maxLogicalPerCore)
 				{
 					debug_assert(numProcessors == numPackages*coresPerPackage*logicalPerCore);
 					cpuTopology.numPackages = numPackages;
 					cpuTopology.coresPerPackage = coresPerPackage;
 					cpuTopology.logicalPerCore = logicalPerCore;
 					cpuTopology.coresPerPackage = coresPerPackage;
 					cpuTopology.numPackages = numPackages;
 					return INFO::OK;
 				}
 			}
@ -303,6 +299,24 @@ size_t cpu_topology_LogicalPerCore()
 	return cpuTopology.logicalPerCore;
 }
 size_t cpu_topology_LogicalFromId(size_t apicId)
 {
 	ModuleInit(&cpuInitState, InitCpuTopology);
 	return ApicField(apicId, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore);
 }
 size_t cpu_topology_CoreFromId(size_t apicId)
 {
 	ModuleInit(&cpuInitState, InitCpuTopology);
 	return ApicField(apicId, cpuTopology.coreOffset, cpuTopology.maxCoresPerPackage);
 }
 size_t cpu_topology_PackageFromId(size_t apicId)
 {
 	ModuleInit(&cpuInitState, InitCpuTopology);
 	return ApicField(apicId, cpuTopology.packageOffset, 256);
 }
 //-----------------------------------------------------------------------------
 // cache topology
--- a/source/lib/sysdep/arch/x86_x64/topology.h
+++ b/source/lib/sysdep/arch/x86_x64/topology.h
@ -65,6 +65,11 @@ LIB_API size_t cpu_topology_CoresPerPackage();
 LIB_API size_t cpu_topology_LogicalPerCore();
 LIB_API size_t cpu_topology_LogicalFromId(size_t apicId);
 LIB_API size_t cpu_topology_CoreFromId(size_t apicId);
 LIB_API size_t cpu_topology_PackageFromId(size_t apicId);
 //-----------------------------------------------------------------------------
 // L2 cache
--- a/source/lib/sysdep/arch/x86_x64/x86_x64.cpp
+++ b/source/lib/sysdep/arch/x86_x64/x86_x64.cpp
@ -157,7 +157,7 @@ bool x86_x64_cap(x86_x64_Cap cap)
 //-----------------------------------------------------------------------------
-// CPU identification
+// vendor
 static x86_x64_Vendors vendor;
@ -197,10 +197,14 @@ x86_x64_Vendors x86_x64_Vendor()
 }
 //-----------------------------------------------------------------------------
 // signature
 static size_t model;
 static size_t family;
 static ModuleInitState signatureInitState;
-static void InitModelAndFamily()
+static LibError InitSignature()
 {
 	x86_x64_CpuidRegs regs = { 0 };
 	regs.eax = 1;
@ -214,71 +218,19 @@ static void InitModelAndFamily()
 		family += extendedFamily;
 	if(family == 0xF || (x86_x64_Vendor() == X86_X64_VENDOR_INTEL && family == 6))
 		model += extendedModel << 4;
 }
 static size_t generation;
 static LibError InitGeneration()
 {
 	InitModelAndFamily();
 	switch(x86_x64_Vendor())
 	{
 	case X86_X64_VENDOR_AMD:
 		switch(family)
 		{
 		case 5:
 			if(model < 6)
 				generation = 5;	// K5
 			else
 				generation = 6;	// K6
 			break;
 		case 6:
 			generation = 7;	// K7 (Athlon)
 			break;
 		case 0xF:
 		case 0x10:
 			generation = 8;	// K8 (Opteron)
 			break;
 		}
 		break;
 	case X86_X64_VENDOR_INTEL:
 		switch(family)
 		{
 		case 5:
 			generation = 5;	// Pentium
 			break;
 		case 6:
 			if(model < 0xF)
 				generation = 6;	// Pentium Pro/II/III/M
 			else
 				generation = 8;	// Core2Duo
 			break;
 		case 0xF:
 			if(model <= 6)
 				generation = 7;	// Pentium 4/D
 			break;
 		}
 		if(family >= 0x10)
 			generation = 9;
 		break;
 	}
 	debug_assert(generation != 0);
 	return INFO::OK;
 }
-size_t x86_x64_Generation()
+size_t x86_x64_Model()
 {
-	static ModuleInitState initState;
+	ModuleInit(&signatureInitState, InitSignature);
-	ModuleInit(&initState, InitGeneration);
+	return model;
-	return generation;
+}
 size_t x86_x64_Family()
 {
 	ModuleInit(&signatureInitState, InitSignature);
 	return family;
 }
@ -832,7 +784,8 @@ static LibError InitIdentifierString()
 	//   doesn't recognize.
 	if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
 	{
-		InitModelAndFamily();
+		const size_t family = x86_x64_Family();
 		const size_t model = x86_x64_Model();
 		switch(x86_x64_Vendor())
 		{
 		case X86_X64_VENDOR_AMD:
--- a/source/lib/sysdep/arch/x86_x64/x86_x64.h
+++ b/source/lib/sysdep/arch/x86_x64/x86_x64.h
@ -73,6 +73,11 @@ enum x86_x64_Vendors
 LIB_API x86_x64_Vendors x86_x64_Vendor();
 LIB_API size_t x86_x64_Model();
 LIB_API size_t x86_x64_Family();
 /**
 * @return the colloquial processor generation
 * (5 = Pentium, 6 = Pentium Pro/II/III / K6, 7 = Pentium4 / Athlon, 8 = Core / Opteron)
@ -96,6 +101,7 @@ enum x86_x64_Cap
 	// standard (edx)
 	X86_X64_CAP_FPU             = 32+0,  // Floating Point Unit
 	X86_X64_CAP_TSC             = 32+4,  // TimeStamp Counter
 	X86_X64_CAP_MSR             = 32+5,	 // Model Specific Registers
 	X86_X64_CAP_CMOV            = 32+15, // Conditional MOVe
 	X86_X64_CAP_TM_SCC          = 32+22, // Thermal Monitoring and Software Controlled Clock
 	X86_X64_CAP_MMX             = 32+23, // MultiMedia eXtensions
--- a/source/lib/sysdep/compiler.h
+++ b/source/lib/sysdep/compiler.h
@ -175,10 +175,15 @@
 // this macro should not generate any fallback code; it is merely the
 // compiler-specific backend for lib.h's UNREACHABLE.
 // #define it to nothing if the compiler doesn't support such a hint.
-#if MSC_VERSION
+#define HAVE_ASSUME_UNREACHABLE 1
 #if MSC_VERSION && !ICC_VERSION // (ICC ignores this)
 # define ASSUME_UNREACHABLE __assume(0)
 #elif GCC_VERSION >= 450
 # define ASSUME_UNREACHABLE __builtin_unreachable()
 #else
 # define ASSUME_UNREACHABLE
 # undef HAVE_ASSUME_UNREACHABLE
 # define HAVE_ASSUME_UNREACHABLE 0
 #endif
--- a/source/lib/sysdep/os/win/aken/aken.h
+++ b/source/lib/sysdep/os/win/aken/aken.h
@ -41,47 +41,75 @@
 #define IOCTL_AKEN_WRITE_PORT          CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+1, METHOD_BUFFERED, FILE_ANY_ACCESS)
 #define IOCTL_AKEN_MAP                 CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+2, METHOD_BUFFERED, FILE_ANY_ACCESS)
 #define IOCTL_AKEN_UNMAP               CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+3, METHOD_BUFFERED, FILE_ANY_ACCESS)
 #define IOCTL_AKEN_READ_MSR            CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+4, METHOD_BUFFERED, FILE_ANY_ACCESS)
 #define IOCTL_AKEN_WRITE_MSR           CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+5, METHOD_BUFFERED, FILE_ANY_ACCESS)
 #define IOCTL_AKEN_READ_PMC            CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+6, METHOD_BUFFERED, FILE_ANY_ACCESS)
 // input and output data structures for the IOCTLs
 #pragma pack(push, 1)
-struct AkenReadPortIn
+typedef struct AkenReadPortIn_
 {
 	USHORT port;
 	UCHAR numBytes;
-};
+}
 AkenReadPortIn;
-struct AkenReadPortOut
+typedef struct AkenReadPortOut_
 {
 	DWORD32 value;
-};
+}
 AkenReadPortOut;
-struct AkenWritePortIn
+typedef struct AkenWritePortIn_
 {
 	DWORD32 value;
 	USHORT port;
 	UCHAR numBytes;
-};
+}
 AkenWritePortIn;
-struct AkenMapIn
+typedef struct AkenMapIn_
 {
 	// note: fixed-width types allow the 32 or 64-bit Mahaf wrapper to
 	// interoperate with the 32 or 64-bit Aken driver.
 	DWORD64 physicalAddress;
 	DWORD64 numBytes;
-};
+}
 AkenMapIn;
-struct AkenMapOut
+typedef struct AkenMapOut_
 {
 	DWORD64 virtualAddress;
-};
+}
 AkenMapOut;
-struct AkenUnmapIn
+typedef struct AkenUnmapIn_
 {
 	DWORD64 virtualAddress;
-};
+}
 AkenUnmapIn;
 typedef struct AkenReadRegisterIn_
 {
 	DWORD64 reg;
 }
 AkenReadRegisterIn;
 typedef struct AkenReadRegisterOut_
 {
 	DWORD64 value;
 }
 AkenReadRegisterOut;
 typedef struct AkenWriteRegisterIn_
 {
 	DWORD64 reg;
 	DWORD64 value;
 }
 AkenWriteRegisterIn;
 #pragma pack(pop)
--- a/source/lib/sysdep/os/win/mahaf.cpp
+++ b/source/lib/sysdep/os/win/mahaf.cpp
@ -25,6 +25,7 @@
 */
 #include "precompiled.h"
 #include "lib/sysdep/os/win/mahaf.h"
 #include "lib/sysdep/os/win/win.h"
 #include <winioctl.h>
@ -56,8 +57,7 @@ static u32 ReadPort(u16 port, u8 numBytes)
 	}
 	debug_assert(bytesReturned == sizeof(out));
-	const u32 value = out.value;
+	return out.value;
 	return value;
 }
 u8 mahaf_ReadPort8(u16 port)
@ -159,6 +159,48 @@ void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress)
 }
 static u64 ReadRegister(DWORD ioctl, u64 reg)
 {
 	AkenReadRegisterIn in;
 	in.reg = reg;
 	AkenReadRegisterOut out;
 	DWORD bytesReturned;
 	LPOVERLAPPED ovl = 0;	// synchronous
 	BOOL ok = DeviceIoControl(hAken, ioctl, &in, sizeof(in), &out, sizeof(out), &bytesReturned, ovl);
 	if(!ok)
 	{
 		WARN_WIN32_ERR;
 		return 0;
 	}
 	debug_assert(bytesReturned == sizeof(out));
 	return out.value;
 }
 u64 mahaf_ReadModelSpecificRegister(u64 reg)
 {
 	return ReadRegister((DWORD)IOCTL_AKEN_READ_MSR, reg);
 }
 u64 mahaf_ReadPerformanceMonitoringCounter(u64 reg)
 {
 	return ReadRegister((DWORD)IOCTL_AKEN_READ_PMC, reg);
 }
 void mahaf_WriteModelSpecificRegister(u64 reg, u64 value)
 {
 	AkenWriteRegisterIn in;
 	in.reg = reg;
 	in.value = value;
 	DWORD bytesReturned;	// unused but must be passed to DeviceIoControl
 	LPOVERLAPPED ovl = 0;	// synchronous
 	BOOL ok = DeviceIoControl(hAken, (DWORD)IOCTL_AKEN_WRITE_MSR, &in, sizeof(in), 0, 0u, &bytesReturned, ovl);
 	WARN_IF_FALSE(ok);
 }
 //-----------------------------------------------------------------------------
 // driver installation
 //-----------------------------------------------------------------------------
--- a/source/lib/sysdep/os/win/mahaf.h
+++ b/source/lib/sysdep/os/win/mahaf.h
@ -39,20 +39,26 @@
 * note: mahaf_MapPhysicalMemory will complain if it
 * is called despite this function having returned true.
 **/
-extern bool mahaf_IsPhysicalMappingDangerous();
+LIB_API bool mahaf_IsPhysicalMappingDangerous();
-extern LibError mahaf_Init();
+LIB_API LibError mahaf_Init();
-extern void mahaf_Shutdown();
+LIB_API void mahaf_Shutdown();
-extern u8  mahaf_ReadPort8 (u16 port);
+LIB_API u8  mahaf_ReadPort8 (u16 port);
-extern u16 mahaf_ReadPort16(u16 port);
+LIB_API u16 mahaf_ReadPort16(u16 port);
-extern u32 mahaf_ReadPort32(u16 port);
+LIB_API u32 mahaf_ReadPort32(u16 port);
-extern void mahaf_WritePort8 (u16 port, u8  value);
+LIB_API void mahaf_WritePort8 (u16 port, u8  value);
-extern void mahaf_WritePort16(u16 port, u16 value);
+LIB_API void mahaf_WritePort16(u16 port, u16 value);
-extern void mahaf_WritePort32(u16 port, u32 value);
+LIB_API void mahaf_WritePort32(u16 port, u32 value);
-extern volatile void* mahaf_MapPhysicalMemory(uintptr_t physicalAddress, size_t numBytes);
+LIB_API volatile void* mahaf_MapPhysicalMemory(uintptr_t physicalAddress, size_t numBytes);
-extern void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress);
+LIB_API void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress);
 LIB_API u64 mahaf_ReadModelSpecificRegister(u64 reg);
 LIB_API void mahaf_WriteModelSpecificRegister(u64 reg, u64 value);
 // must be done in the driver because Windows clears CR4.PCE[8]
 LIB_API u64 mahaf_ReadPerformanceMonitoringCounter(u64 reg);
 #endif	// INCLUDED_MAHAF
--- a/source/lib/sysdep/os/win/whrt/tsc.cpp
+++ b/source/lib/sysdep/os/win/whrt/tsc.cpp
@ -38,6 +38,7 @@
 #if ARCH_X86_X64
 # include "lib/sysdep/arch/x86_x64/x86_x64.h"	// x86_x64_rdtsc
 # include "lib/sysdep/arch/x86_x64/topology.h"
 # include "lib/sysdep/arch/x86_x64/msr.h"
 #endif
@ -173,7 +174,7 @@ public:
 #if ARCH_X86_X64
 		// recent CPU:
-		if(x86_x64_Generation() >= 7)
+		//if(x86_x64_Generation() >= 7)
 		{
 			// note: 8th generation CPUs support C1-clock ramping, which causes
 			// drift on multi-core systems, but those were excluded above.
@ -183,7 +184,7 @@ public:
 			// the chipset thinks the system is dangerously overheated; the
 			// OS isn't even notified. this may be rare, but could cause
 			// incorrect results => unsafe.
-			return false;
+			//return false;
 		}
 #endif
@ -217,6 +218,15 @@ public:
 		// note: even here, initial accuracy isn't critical because the
 		// clock is subject to thermal drift and would require continual
 		// recalibration anyway.
 #if ARCH_X86_X64
 		if(MSR::HasNehalem())
 		{
 			const u64 platformInfo = MSR::Read(MSR::PLATFORM_INFO);
 			const u8 maxNonTurboRatio = bits(platformInfo, 8, 15);
 			return maxNonTurboRatio * 133.33e6f;
 		}
 		else
 #endif
 		return os_cpu_ClockFrequency();
 	}