1
0
forked from 0ad/0ad

improvements/additions from work

bits: fix bit_mask for signed types, add SetBitsTo, LeastSignificantBit,
ClearLeastSignificantBit.
add MSR support (read/write via mahaf in kernel mode)
x86_x64: expose family/model
topology: add support for determining core/package from APIC ID.
TSC: report actual frequency for nehalem invariant TSC.
improved UNREACHABLE/ASSUME_UNREACHABLE (avoid ICC warning, add GCC4.5
support)

This was SVN commit r7860.
This commit is contained in:
janwas 2010-08-06 13:03:44 +00:00
parent 3a0123b7b4
commit 3d45069b3f
11 changed files with 304 additions and 217 deletions

View File

@ -40,7 +40,7 @@ template<typename T>
T Bit(size_t n)
{
const T one = T(1);
return (one << n);
return (T)(one << n);
}
/**
@ -71,16 +71,14 @@ bool IsBitSet(T value, size_t index)
template<typename T>
T bit_mask(size_t numBits)
{
if(numBits == 0) // prevent shift count == bitsInT, which would be undefined.
return 0;
// notes:
// - the perhaps more intuitive (1 << numBits)-1 cannot
// handle numBits == bitsInT, but this implementation does.
// - though bulky, the below statements avoid sign-conversion warnings.
const T bitsInT = sizeof(T)*CHAR_BIT;
T mask(0);
mask = T(~mask);
mask >>= T(bitsInT-numBits);
const T allBits = (T)~T(0);
// (shifts of at least bitsInT are undefined)
if(numBits >= bitsInT)
return allBits;
// (note: the previous allBits >> (bitsInT-numBits) is not safe
// because right-shifts of negative numbers are undefined.)
const T mask = T(T(1) << numBits)-1;
return mask;
}
@ -98,12 +96,31 @@ T bit_mask(size_t numBits)
template<typename T>
inline T bits(T num, size_t lo_idx, size_t hi_idx)
{
const size_t count = (hi_idx - lo_idx)+1; // # bits to return
const size_t numBits = (hi_idx - lo_idx)+1; // # bits to return
T result = T(num >> lo_idx);
result = T(result & bit_mask<T>(count));
result = T(result & bit_mask<T>(numBits));
return result;
}
/**
* set the value of bits hi_idx:lo_idx
*
* @param lo_idx bit index of lowest bit to include
* @param hi_idx bit index of highest bit to include
* @param value new value to be assigned to these bits
**/
template<typename T>
inline T SetBitsTo(T num, size_t lo_idx, size_t hi_idx, size_t value)
{
const size_t numBits = (hi_idx - lo_idx)+1;
debug_assert(value < (T(1) << numBits));
const T mask = bit_mask<T>(numBits) << lo_idx;
T result = num & ~mask;
result = T(result | (value << lo_idx));
return result;
}
/**
* @return number of 1-bits in mask
**/
@ -127,7 +144,7 @@ size_t PopulationCount(T mask)
* @return whether the given number is a power of two.
**/
template<typename T>
bool is_pow2(T n)
inline bool is_pow2(T n)
{
// 0 would pass the test below but isn't a POT.
if(n == 0)
@ -135,6 +152,19 @@ bool is_pow2(T n)
return (n & (n-1)) == 0;
}
template<typename T>
inline T LeastSignificantBit(T x)
{
const T negX = T(~x + 1); // 2's complement (avoids 'negating unsigned type' warning)
return x & negX;
}
template<typename T>
inline T ClearLeastSignificantBit(T x)
{
return x & (x-1);
}
/**
* ceil(log2(x))
*

View File

@ -43,60 +43,48 @@
/**
"unreachable code" helpers
unreachable lines of code are often the source or symptom of subtle bugs.
they are flagged by compiler warnings; however, the opposite problem -
erroneously reaching certain spots (e.g. due to missing return statement)
is worse and not detected automatically.
to defend against this, the programmer can annotate their code to
indicate to humans that a particular spot should never be reached.
however, that isn't much help; better is a sentinel that raises an
error if if it is actually reached. hence, the UNREACHABLE macro.
ironically, if the code guarded by UNREACHABLE works as it should,
compilers may flag the macro's code as unreachable. this would
distract from genuine warnings, which is unacceptable.
even worse, compilers differ in their code checking: GCC only complains if
non-void functions end without returning a value (i.e. missing return
statement), while VC checks if lines are unreachable (e.g. if they are
preceded by a return on all paths).
our implementation of UNREACHABLE solves this dilemna as follows:
- on GCC: call abort(); since it has the noreturn attributes, the
"non-void" warning disappears.
- on VC: avoid generating any code. we allow the compiler to assume the
spot is actually unreachable, which incidentally helps optimization.
if reached after all, a crash usually results. in that case, compile with
CONFIG_PARANOIA, which will cause an error message to be displayed.
this approach still allows for the possiblity of automated
checking, but does not cause any compiler warnings.
**/
* "unreachable code" helpers
*
* unreachable lines of code are often the source or symptom of subtle bugs.
* they are flagged by compiler warnings; however, the opposite problem -
* erroneously reaching certain spots (e.g. due to missing return statement)
* is worse and not detected automatically.
*
* to defend against this, the programmer can annotate their code to
* indicate to humans that a particular spot should never be reached.
* however, that isn't much help; better is a sentinel that raises an
* error if if it is actually reached. hence, the UNREACHABLE macro.
*
* ironically, if the code guarded by UNREACHABLE works as it should,
* compilers may flag the macro's code as unreachable. this would
* distract from genuine warnings, which is unacceptable.
*
* even worse, compilers differ in their code checking: GCC only complains if
* non-void functions end without returning a value (i.e. missing return
* statement), while VC checks if lines are unreachable (e.g. if they are
* preceded by a return on all paths).
*
* the implementation below enables optimization and automated checking
* without raising warnings.
**/
#define UNREACHABLE // actually defined below.. this is for
# undef UNREACHABLE // CppDoc's benefit only.
// 1) final build: optimize assuming this location cannot be reached.
// may crash if that turns out to be untrue, but removes checking overhead.
#if CONFIG_FINAL
// compiler supports ASSUME_UNREACHABLE => allow it to assume the code is
// never reached (improves optimization at the cost of undefined behavior
// if the annotation turns out to be incorrect).
#if HAVE_ASSUME_UNREACHABLE && !CONFIG_PARANOIA
# define UNREACHABLE ASSUME_UNREACHABLE
// 2) normal build:
// otherwise (or if CONFIG_PARANOIA is set), add a user-visible
// warning if the code is reached. note that abort() fails to stop
// ICC from warning about the lack of a return statement, so we
// use an infinite loop instead.
#else
// a) normal implementation: includes "abort", which is declared with
// noreturn attribute and therefore avoids GCC's "execution reaches
// end of non-void function" warning.
# if !MSC_VERSION || ICC_VERSION || CONFIG_PARANOIA
# define UNREACHABLE\
# define UNREACHABLE\
STMT(\
debug_assert(0); /* hit supposedly unreachable code */\
abort();\
for(;;){};\
)
// b) VC only: don't generate any code; squelch the warning and optimize.
# else
# define UNREACHABLE ASSUME_UNREACHABLE
# endif
#endif
/**

View File

@ -116,7 +116,7 @@ static size_t MaxLogicalPerCache()
//-----------------------------------------------------------------------------
// determination of enabled cores/HTs
// APIC IDs
// APIC IDs consist of variable-length fields identifying the logical unit,
// core, package and shared cache. if they are available, we can determine
@ -174,106 +174,102 @@ const u8* ApicIds()
}
/**
* count the number of unique APIC IDs after application of a mask.
*
* this is used to implement NumUniqueValuesInField and also required
* for counting the number of caches.
**/
static size_t NumUniqueMaskedValues(const u8* apicIds, u8 mask)
// (if maxValues == 1, the field is zero-width and thus zero)
static size_t ApicField(size_t apicId, size_t indexOfLowestBit, size_t maxValues)
{
std::set<u8> ids;
for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
{
const u8 apicId = apicIds[processor];
const u8 field = u8(apicId & mask);
ids.insert(field);
}
return ids.size();
const size_t numBits = ceil_log2(maxValues);
const size_t mask = bit_mask<size_t>(numBits);
return (apicId >> indexOfLowestBit) & mask;
}
/**
* Count the number of values assumed by a certain field within APIC IDs.
*
* @param apicIds
* @param offset Index of the lowest bit that is part of the field.
* @param numValues Number of values that can be assumed by the field.
* If equal to one, the field is zero-width.
* @return number of unique values (for convenience of the topology code,
* this is always at least one)
**/
static size_t NumUniqueValuesInField(const u8* apicIds, size_t offset, size_t numValues)
{
if(numValues == 1) // see parameter description above
return 1;
const size_t numBits = ceil_log2(numValues);
const u8 mask = u8((bit_mask<u8>(numBits) << offset) & 0xFF);
return NumUniqueMaskedValues(apicIds, mask);
}
static size_t MinPackages(size_t maxCoresPerPackage, size_t maxLogicalPerCore)
{
const size_t numNodes = numa_NumNodes();
const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
// NB: some cores or logical processors may be disabled.
const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
return minPackagesPerNode*numNodes;
}
//-----------------------------------------------------------------------------
// CPU topology interface
struct CpuTopology // POD
{
size_t numPackages;
size_t coresPerPackage;
size_t maxLogicalPerCore;
size_t maxCoresPerPackage;
size_t logicalOffset;
size_t coreOffset;
size_t packageOffset;
// how many are actually enabled
size_t logicalPerCore;
size_t coresPerPackage;
size_t numPackages;
};
static CpuTopology cpuTopology;
static ModuleInitState cpuInitState;
static LibError InitCpuTopology()
{
const size_t numProcessors = os_cpu_NumProcessors();
const size_t maxCoresPerPackage = MaxCoresPerPackage();
const size_t maxLogicalPerCore = MaxLogicalPerCore();
cpuTopology.maxLogicalPerCore = MaxLogicalPerCore();
cpuTopology.maxCoresPerPackage = MaxCoresPerPackage();
cpuTopology.logicalOffset = 0;
cpuTopology.coreOffset = ceil_log2(cpuTopology.maxLogicalPerCore);
cpuTopology.packageOffset = cpuTopology.coreOffset + ceil_log2(cpuTopology.maxCoresPerPackage);
const u8* apicIds = ApicIds();
if(apicIds)
{
const size_t packageOffset = ceil_log2(maxCoresPerPackage) + ceil_log2(maxLogicalPerCore);
const size_t coreOffset = ceil_log2(maxLogicalPerCore);
const size_t logicalOffset = 0;
cpuTopology.numPackages = NumUniqueValuesInField(apicIds, packageOffset, 256);
cpuTopology.coresPerPackage = NumUniqueValuesInField(apicIds, coreOffset, maxCoresPerPackage);
cpuTopology.logicalPerCore = NumUniqueValuesInField(apicIds, logicalOffset, maxLogicalPerCore);
struct NumUniqueValuesInField
{
size_t operator()(const u8* apicIds, size_t indexOfLowestBit, size_t numValues) const
{
std::set<size_t> values;
for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
{
const size_t value = ApicField(apicIds[processor], numValues, indexOfLowestBit);
values.insert(value);
}
return values.size();
}
};
cpuTopology.logicalPerCore = NumUniqueValuesInField()(apicIds, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore);
cpuTopology.coresPerPackage = NumUniqueValuesInField()(apicIds, cpuTopology.coreOffset, cpuTopology.maxCoresPerPackage);
cpuTopology.numPackages = NumUniqueValuesInField()(apicIds, cpuTopology.packageOffset, 256);
}
else // the processor lacks an xAPIC, or the IDs are invalid
{
struct MinPackages
{
size_t operator()(size_t maxCoresPerPackage, size_t maxLogicalPerCore) const
{
const size_t numNodes = numa_NumNodes();
const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
// NB: some cores or logical processors may be disabled.
const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
return minPackagesPerNode*numNodes;
}
};
// we can't differentiate between cores and logical processors.
// since the former are less likely to be disabled, we seek the
// maximum feasible number of cores and minimal number of packages:
const size_t minPackages = MinPackages(maxCoresPerPackage, maxLogicalPerCore);
const size_t maxPackages = numProcessors;
for(size_t numPackages = minPackages; numPackages <= maxPackages; numPackages++)
const size_t minPackages = MinPackages()(cpuTopology.maxCoresPerPackage, cpuTopology.maxLogicalPerCore);
const size_t numProcessors = os_cpu_NumProcessors();
for(size_t numPackages = minPackages; numPackages <= numProcessors; numPackages++)
{
if(numProcessors % numPackages != 0)
continue;
const size_t logicalPerPackage = numProcessors / numPackages;
const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore);
for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, cpuTopology.maxLogicalPerCore);
for(size_t coresPerPackage = cpuTopology.maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
{
if(logicalPerPackage % coresPerPackage != 0)
continue;
const size_t logicalPerCore = logicalPerPackage / coresPerPackage;
if(logicalPerCore <= maxLogicalPerCore)
if(logicalPerCore <= cpuTopology.maxLogicalPerCore)
{
debug_assert(numProcessors == numPackages*coresPerPackage*logicalPerCore);
cpuTopology.numPackages = numPackages;
cpuTopology.coresPerPackage = coresPerPackage;
cpuTopology.logicalPerCore = logicalPerCore;
cpuTopology.coresPerPackage = coresPerPackage;
cpuTopology.numPackages = numPackages;
return INFO::OK;
}
}
@ -303,6 +299,24 @@ size_t cpu_topology_LogicalPerCore()
return cpuTopology.logicalPerCore;
}
size_t cpu_topology_LogicalFromId(size_t apicId)
{
ModuleInit(&cpuInitState, InitCpuTopology);
return ApicField(apicId, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore);
}
size_t cpu_topology_CoreFromId(size_t apicId)
{
ModuleInit(&cpuInitState, InitCpuTopology);
return ApicField(apicId, cpuTopology.coreOffset, cpuTopology.maxCoresPerPackage);
}
size_t cpu_topology_PackageFromId(size_t apicId)
{
ModuleInit(&cpuInitState, InitCpuTopology);
return ApicField(apicId, cpuTopology.packageOffset, 256);
}
//-----------------------------------------------------------------------------
// cache topology

View File

@ -65,6 +65,11 @@ LIB_API size_t cpu_topology_CoresPerPackage();
LIB_API size_t cpu_topology_LogicalPerCore();
LIB_API size_t cpu_topology_LogicalFromId(size_t apicId);
LIB_API size_t cpu_topology_CoreFromId(size_t apicId);
LIB_API size_t cpu_topology_PackageFromId(size_t apicId);
//-----------------------------------------------------------------------------
// L2 cache

View File

@ -157,7 +157,7 @@ bool x86_x64_cap(x86_x64_Cap cap)
//-----------------------------------------------------------------------------
// CPU identification
// vendor
static x86_x64_Vendors vendor;
@ -197,10 +197,14 @@ x86_x64_Vendors x86_x64_Vendor()
}
//-----------------------------------------------------------------------------
// signature
static size_t model;
static size_t family;
static ModuleInitState signatureInitState;
static void InitModelAndFamily()
static LibError InitSignature()
{
x86_x64_CpuidRegs regs = { 0 };
regs.eax = 1;
@ -214,71 +218,19 @@ static void InitModelAndFamily()
family += extendedFamily;
if(family == 0xF || (x86_x64_Vendor() == X86_X64_VENDOR_INTEL && family == 6))
model += extendedModel << 4;
}
static size_t generation;
static LibError InitGeneration()
{
InitModelAndFamily();
switch(x86_x64_Vendor())
{
case X86_X64_VENDOR_AMD:
switch(family)
{
case 5:
if(model < 6)
generation = 5; // K5
else
generation = 6; // K6
break;
case 6:
generation = 7; // K7 (Athlon)
break;
case 0xF:
case 0x10:
generation = 8; // K8 (Opteron)
break;
}
break;
case X86_X64_VENDOR_INTEL:
switch(family)
{
case 5:
generation = 5; // Pentium
break;
case 6:
if(model < 0xF)
generation = 6; // Pentium Pro/II/III/M
else
generation = 8; // Core2Duo
break;
case 0xF:
if(model <= 6)
generation = 7; // Pentium 4/D
break;
}
if(family >= 0x10)
generation = 9;
break;
}
debug_assert(generation != 0);
return INFO::OK;
}
size_t x86_x64_Generation()
size_t x86_x64_Model()
{
static ModuleInitState initState;
ModuleInit(&initState, InitGeneration);
return generation;
ModuleInit(&signatureInitState, InitSignature);
return model;
}
size_t x86_x64_Family()
{
ModuleInit(&signatureInitState, InitSignature);
return family;
}
@ -832,7 +784,8 @@ static LibError InitIdentifierString()
// doesn't recognize.
if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
{
InitModelAndFamily();
const size_t family = x86_x64_Family();
const size_t model = x86_x64_Model();
switch(x86_x64_Vendor())
{
case X86_X64_VENDOR_AMD:

View File

@ -73,6 +73,11 @@ enum x86_x64_Vendors
LIB_API x86_x64_Vendors x86_x64_Vendor();
LIB_API size_t x86_x64_Model();
LIB_API size_t x86_x64_Family();
/**
* @return the colloquial processor generation
* (5 = Pentium, 6 = Pentium Pro/II/III / K6, 7 = Pentium4 / Athlon, 8 = Core / Opteron)
@ -96,6 +101,7 @@ enum x86_x64_Cap
// standard (edx)
X86_X64_CAP_FPU = 32+0, // Floating Point Unit
X86_X64_CAP_TSC = 32+4, // TimeStamp Counter
X86_X64_CAP_MSR = 32+5, // Model Specific Registers
X86_X64_CAP_CMOV = 32+15, // Conditional MOVe
X86_X64_CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock
X86_X64_CAP_MMX = 32+23, // MultiMedia eXtensions

View File

@ -175,10 +175,15 @@
// this macro should not generate any fallback code; it is merely the
// compiler-specific backend for lib.h's UNREACHABLE.
// #define it to nothing if the compiler doesn't support such a hint.
#if MSC_VERSION
#define HAVE_ASSUME_UNREACHABLE 1
#if MSC_VERSION && !ICC_VERSION // (ICC ignores this)
# define ASSUME_UNREACHABLE __assume(0)
#elif GCC_VERSION >= 450
# define ASSUME_UNREACHABLE __builtin_unreachable()
#else
# define ASSUME_UNREACHABLE
# undef HAVE_ASSUME_UNREACHABLE
# define HAVE_ASSUME_UNREACHABLE 0
#endif

View File

@ -41,47 +41,75 @@
#define IOCTL_AKEN_WRITE_PORT CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+1, METHOD_BUFFERED, FILE_ANY_ACCESS)
#define IOCTL_AKEN_MAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+2, METHOD_BUFFERED, FILE_ANY_ACCESS)
#define IOCTL_AKEN_UNMAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+3, METHOD_BUFFERED, FILE_ANY_ACCESS)
#define IOCTL_AKEN_READ_MSR CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+4, METHOD_BUFFERED, FILE_ANY_ACCESS)
#define IOCTL_AKEN_WRITE_MSR CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+5, METHOD_BUFFERED, FILE_ANY_ACCESS)
#define IOCTL_AKEN_READ_PMC CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+6, METHOD_BUFFERED, FILE_ANY_ACCESS)
// input and output data structures for the IOCTLs
#pragma pack(push, 1)
struct AkenReadPortIn
typedef struct AkenReadPortIn_
{
USHORT port;
UCHAR numBytes;
};
}
AkenReadPortIn;
struct AkenReadPortOut
typedef struct AkenReadPortOut_
{
DWORD32 value;
};
}
AkenReadPortOut;
struct AkenWritePortIn
typedef struct AkenWritePortIn_
{
DWORD32 value;
USHORT port;
UCHAR numBytes;
};
}
AkenWritePortIn;
struct AkenMapIn
typedef struct AkenMapIn_
{
// note: fixed-width types allow the 32 or 64-bit Mahaf wrapper to
// interoperate with the 32 or 64-bit Aken driver.
DWORD64 physicalAddress;
DWORD64 numBytes;
};
}
AkenMapIn;
struct AkenMapOut
typedef struct AkenMapOut_
{
DWORD64 virtualAddress;
};
}
AkenMapOut;
struct AkenUnmapIn
typedef struct AkenUnmapIn_
{
DWORD64 virtualAddress;
};
}
AkenUnmapIn;
typedef struct AkenReadRegisterIn_
{
DWORD64 reg;
}
AkenReadRegisterIn;
typedef struct AkenReadRegisterOut_
{
DWORD64 value;
}
AkenReadRegisterOut;
typedef struct AkenWriteRegisterIn_
{
DWORD64 reg;
DWORD64 value;
}
AkenWriteRegisterIn;
#pragma pack(pop)

View File

@ -25,6 +25,7 @@
*/
#include "precompiled.h"
#include "lib/sysdep/os/win/mahaf.h"
#include "lib/sysdep/os/win/win.h"
#include <winioctl.h>
@ -56,8 +57,7 @@ static u32 ReadPort(u16 port, u8 numBytes)
}
debug_assert(bytesReturned == sizeof(out));
const u32 value = out.value;
return value;
return out.value;
}
u8 mahaf_ReadPort8(u16 port)
@ -159,6 +159,48 @@ void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress)
}
static u64 ReadRegister(DWORD ioctl, u64 reg)
{
AkenReadRegisterIn in;
in.reg = reg;
AkenReadRegisterOut out;
DWORD bytesReturned;
LPOVERLAPPED ovl = 0; // synchronous
BOOL ok = DeviceIoControl(hAken, ioctl, &in, sizeof(in), &out, sizeof(out), &bytesReturned, ovl);
if(!ok)
{
WARN_WIN32_ERR;
return 0;
}
debug_assert(bytesReturned == sizeof(out));
return out.value;
}
u64 mahaf_ReadModelSpecificRegister(u64 reg)
{
return ReadRegister((DWORD)IOCTL_AKEN_READ_MSR, reg);
}
u64 mahaf_ReadPerformanceMonitoringCounter(u64 reg)
{
return ReadRegister((DWORD)IOCTL_AKEN_READ_PMC, reg);
}
void mahaf_WriteModelSpecificRegister(u64 reg, u64 value)
{
AkenWriteRegisterIn in;
in.reg = reg;
in.value = value;
DWORD bytesReturned; // unused but must be passed to DeviceIoControl
LPOVERLAPPED ovl = 0; // synchronous
BOOL ok = DeviceIoControl(hAken, (DWORD)IOCTL_AKEN_WRITE_MSR, &in, sizeof(in), 0, 0u, &bytesReturned, ovl);
WARN_IF_FALSE(ok);
}
//-----------------------------------------------------------------------------
// driver installation
//-----------------------------------------------------------------------------

View File

@ -39,20 +39,26 @@
* note: mahaf_MapPhysicalMemory will complain if it
* is called despite this function having returned true.
**/
extern bool mahaf_IsPhysicalMappingDangerous();
LIB_API bool mahaf_IsPhysicalMappingDangerous();
extern LibError mahaf_Init();
extern void mahaf_Shutdown();
LIB_API LibError mahaf_Init();
LIB_API void mahaf_Shutdown();
extern u8 mahaf_ReadPort8 (u16 port);
extern u16 mahaf_ReadPort16(u16 port);
extern u32 mahaf_ReadPort32(u16 port);
extern void mahaf_WritePort8 (u16 port, u8 value);
extern void mahaf_WritePort16(u16 port, u16 value);
extern void mahaf_WritePort32(u16 port, u32 value);
LIB_API u8 mahaf_ReadPort8 (u16 port);
LIB_API u16 mahaf_ReadPort16(u16 port);
LIB_API u32 mahaf_ReadPort32(u16 port);
LIB_API void mahaf_WritePort8 (u16 port, u8 value);
LIB_API void mahaf_WritePort16(u16 port, u16 value);
LIB_API void mahaf_WritePort32(u16 port, u32 value);
extern volatile void* mahaf_MapPhysicalMemory(uintptr_t physicalAddress, size_t numBytes);
extern void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress);
LIB_API volatile void* mahaf_MapPhysicalMemory(uintptr_t physicalAddress, size_t numBytes);
LIB_API void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress);
LIB_API u64 mahaf_ReadModelSpecificRegister(u64 reg);
LIB_API void mahaf_WriteModelSpecificRegister(u64 reg, u64 value);
// must be done in the driver because Windows clears CR4.PCE[8]
LIB_API u64 mahaf_ReadPerformanceMonitoringCounter(u64 reg);
#endif // INCLUDED_MAHAF

View File

@ -38,6 +38,7 @@
#if ARCH_X86_X64
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc
# include "lib/sysdep/arch/x86_x64/topology.h"
# include "lib/sysdep/arch/x86_x64/msr.h"
#endif
@ -173,7 +174,7 @@ public:
#if ARCH_X86_X64
// recent CPU:
if(x86_x64_Generation() >= 7)
//if(x86_x64_Generation() >= 7)
{
// note: 8th generation CPUs support C1-clock ramping, which causes
// drift on multi-core systems, but those were excluded above.
@ -183,7 +184,7 @@ public:
// the chipset thinks the system is dangerously overheated; the
// OS isn't even notified. this may be rare, but could cause
// incorrect results => unsafe.
return false;
//return false;
}
#endif
@ -217,6 +218,15 @@ public:
// note: even here, initial accuracy isn't critical because the
// clock is subject to thermal drift and would require continual
// recalibration anyway.
#if ARCH_X86_X64
if(MSR::HasNehalem())
{
const u64 platformInfo = MSR::Read(MSR::PLATFORM_INFO);
const u8 maxNonTurboRatio = bits(platformInfo, 8, 15);
return maxNonTurboRatio * 133.33e6f;
}
else
#endif
return os_cpu_ClockFrequency();
}