313 lines
6.5 KiB
C++
Executable File
313 lines
6.5 KiB
C++
Executable File
// IA-32 (x86) specific code
|
|
// Copyright (c) 2003 Jan Wassenberg
|
|
//
|
|
// This program is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU General Public License as
|
|
// published by the Free Software Foundation; either version 2 of the
|
|
// License, or (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful, but
|
|
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
// General Public License for more details.
|
|
//
|
|
// Contact info:
|
|
// Jan.Wassenberg@stud.uni-karlsruhe.de
|
|
// http://www.stud.uni-karlsruhe.de/~urkt/
|
|
|
|
#include "precompiled.h"
|
|
|
|
#ifdef _M_IX86
|
|
|
|
#include "ia32.h"
|
|
#include "lib.h"
|
|
#include "detect.h"
|
|
#include "timer.h"
|
|
|
|
#ifdef _WIN32
|
|
#include "win/hrt.h"
|
|
#endif
|
|
|
|
|
|
// replace pathetic MS libc implementation
|
|
#ifdef _WIN32
|
|
double _ceil(double f)
|
|
{
|
|
double r;
|
|
|
|
const float _49 = 0.499999f;
|
|
__asm
|
|
{
|
|
fld [f]
|
|
fadd [_49]
|
|
frndint
|
|
fstp [r]
|
|
}
|
|
|
|
UNUSED(f)
|
|
|
|
return r;
|
|
}
|
|
#endif
|
|
|
|
|
|
// return convention for 64 bits with VC7.1, ICC8 is in edx:eax,
|
|
// so temp variable is unnecessary, but we play it safe.
|
|
inline u64 rdtsc()
|
|
{
|
|
u64 c;
|
|
__asm
|
|
{
|
|
cpuid
|
|
rdtsc
|
|
mov dword ptr [c], eax
|
|
mov dword ptr [c+4], edx
|
|
}
|
|
return c;
|
|
}
|
|
|
|
|
|
// change FPU control word (used to set precision)
|
|
uint _control87(uint new_cw, uint mask)
|
|
{
|
|
__asm
|
|
{
|
|
push eax
|
|
fnstcw [esp]
|
|
pop eax ; old_cw
|
|
mov ecx, [new_cw]
|
|
mov edx, [mask]
|
|
and ecx, edx ; new_cw & mask
|
|
not edx ; ~mask
|
|
and eax, edx ; old_cw & ~mask
|
|
or eax, ecx ; (old_cw & ~mask) | (new_cw & mask)
|
|
push eax
|
|
fldcw [esp]
|
|
pop eax
|
|
}
|
|
|
|
UNUSED(new_cw)
|
|
UNUSED(mask)
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
long cpu_caps = 0;
|
|
long cpu_ext_caps = 0;
|
|
|
|
static char cpu_vendor[13];
|
|
static int family, model; // used to detect cpu_type
|
|
|
|
int tsc_is_safe = -1;
|
|
|
|
|
|
// optimized for size
|
|
static void __declspec(naked) cpuid()
|
|
{
|
|
__asm
|
|
{
|
|
pushad
|
|
; ICC7: pusha is the 16-bit form!
|
|
|
|
; make sure CPUID is supported
|
|
pushfd
|
|
or byte ptr [esp+2], 32
|
|
popfd
|
|
pushfd
|
|
pop eax
|
|
shr eax, 22 ; bit 21 toggled?
|
|
jnc no_cpuid
|
|
|
|
; get vendor string
|
|
xor eax, eax
|
|
cpuid
|
|
mov edi, offset cpu_vendor
|
|
xchg eax, ebx
|
|
stosd
|
|
xchg eax, edx
|
|
stosd
|
|
xchg eax, ecx
|
|
stosd
|
|
; (already 0 terminated)
|
|
|
|
; get CPU signature and std feature bits
|
|
push 1
|
|
pop eax
|
|
cpuid
|
|
mov [cpu_caps], edx
|
|
mov edx, eax
|
|
shr edx, 4
|
|
and edx, 0x0f
|
|
mov [model], edx
|
|
shr eax, 8
|
|
and eax, 0x0f
|
|
mov [family], eax
|
|
|
|
; make sure CPUID ext functions are supported
|
|
mov esi, 0x80000000
|
|
mov eax, esi
|
|
cpuid
|
|
cmp eax, esi
|
|
jbe no_ext_funcs
|
|
lea esi, [esi+4]
|
|
cmp eax, esi
|
|
jb no_brand_str
|
|
|
|
; get CPU brand string (>= Athlon XP, P4)
|
|
mov edi, offset cpu_type
|
|
push -2
|
|
pop ebp ; loop counter: [-2, 0]
|
|
$1: lea eax, [ebp+esi] ; 0x80000002 .. 4
|
|
cpuid
|
|
stosd
|
|
xchg eax, ebx
|
|
stosd
|
|
xchg eax, ecx
|
|
stosd
|
|
xchg eax, edx
|
|
stosd
|
|
inc ebp
|
|
jle $1
|
|
; (already 0 terminated)
|
|
|
|
no_brand_str:
|
|
|
|
; get extended feature flags
|
|
lea eax, [esi-3] ; 0x80000001
|
|
cpuid
|
|
mov [cpu_ext_caps], edx
|
|
|
|
no_ext_funcs:
|
|
|
|
no_cpuid:
|
|
|
|
popad
|
|
ret
|
|
}
|
|
}
|
|
|
|
|
|
void ia32_get_cpu_info()
|
|
{
|
|
cpuid();
|
|
if(cpu_caps == 0) // cpuid not supported - can't do the rest
|
|
return;
|
|
|
|
// cpu_type set from brand string - clean it up some
|
|
if(strcmp(cpu_type, "unknown") != 0)
|
|
{
|
|
// strip (tm) from Athlon string
|
|
if(!strncmp(cpu_type, "AMD Athlon(tm)", 14))
|
|
memmove(cpu_type+10, cpu_type+14, 34);
|
|
|
|
// remove 2x (R) and CPU freq from P4 string
|
|
float a;
|
|
// the indicated frequency isn't necessarily correct - the CPU may be
|
|
// overclocked. need to pass a variable though, since scanf returns
|
|
// the number of fields actually stored.
|
|
if(sscanf(cpu_type, " Intel(R) Pentium(R) 4 CPU %fGHz", &a) == 1)
|
|
strcpy(cpu_type, "Intel Pentium 4");
|
|
}
|
|
// detect cpu_type ourselves
|
|
else
|
|
{
|
|
// AMD
|
|
if(!strcmp(cpu_vendor, "AuthenticAMD"))
|
|
{
|
|
if(family == 6)
|
|
strcpy(cpu_type, (model == 3)? "AMD Duron" : "AMD Athlon");
|
|
}
|
|
// Intel
|
|
else if(!strcmp(cpu_vendor, "GenuineIntel"))
|
|
{
|
|
if(family == 6 && model >= 7)
|
|
strcpy(cpu_type, "Intel Pentium III / Celeron");
|
|
}
|
|
}
|
|
|
|
// .. get old policy and priority
|
|
int old_policy;
|
|
static sched_param old_param;
|
|
pthread_getschedparam(pthread_self(), &old_policy, &old_param);
|
|
// .. set max priority
|
|
static sched_param max_param;
|
|
max_param.sched_priority = sched_get_priority_max(SCHED_RR);
|
|
pthread_setschedparam(pthread_self(), SCHED_RR, &max_param);
|
|
|
|
// calculate CPU frequency.
|
|
// balance measuring time (~ 10 ms) and accuracy (< 1 0/00 error -
|
|
// ok for using the TSC as a time reference)
|
|
if(cpu_caps & TSC) // needed to calculate freq; bogomips are a WAG
|
|
{
|
|
// stabilize CPUID for timing (first few calls take longer)
|
|
__asm cpuid __asm cpuid __asm cpuid
|
|
|
|
u64 c0, c1;
|
|
|
|
std::vector<double> samples;
|
|
int num_samples = 20;
|
|
// if clock is low-res, do less samples so it doesn't take too long
|
|
if(timer_res() >= 1e-3)
|
|
num_samples = 10;
|
|
|
|
int i;
|
|
for(i = 0; i < num_samples; i++)
|
|
{
|
|
again:
|
|
// count # of clocks in max{1 tick, 1 ms}
|
|
double t0;
|
|
double t1 = get_time();
|
|
// .. wait for start of tick
|
|
do
|
|
{
|
|
c0 = rdtsc(); // changes quickly
|
|
t0 = get_time();
|
|
}
|
|
while(t0 == t1);
|
|
// .. wait until start of next tick and at least 1 ms
|
|
do
|
|
{
|
|
c1 = rdtsc();
|
|
t1 = get_time();
|
|
}
|
|
while(t1 < t0 + 1e-3);
|
|
|
|
double ds = t1 - t0;
|
|
if(ds < 0.0) // bogus time delta - take another sample
|
|
goto again;
|
|
|
|
// .. freq = (delta_clocks) / (delta_seconds);
|
|
// cpuid/rdtsc/timer overhead is negligible
|
|
double freq = (i64)(c1-c0) / ds;
|
|
// VC6 can't convert u64 -> double, and we don't need full range
|
|
|
|
samples.push_back(freq);
|
|
}
|
|
|
|
std::sort(samples.begin(), samples.end());
|
|
// double median = samples[num_samples/2];
|
|
|
|
// median filter (remove upper and lower 25% and average the rest)
|
|
double sum = 0.0;
|
|
const int lo = num_samples/4, hi = 3*num_samples/4;
|
|
for(i = lo; i < hi; i++)
|
|
sum += samples[i];
|
|
cpu_freq = sum / (hi-lo);
|
|
|
|
// HACK: if _WIN32, the HRT makes its final implementation choice
|
|
// in the first calibrate call where cpu_freq and cpu_caps are
|
|
// available. call it here (via get_time) to have that happen now,
|
|
// so app code isn't surprised by a timer change, although the HRT
|
|
// does try to keep the timer continuous.
|
|
#ifdef _WIN32
|
|
hrt_override_impl(HRT_DEFAULT, HRT_NONE);
|
|
#endif
|
|
}
|
|
// else: TSC not available, can't measure
|
|
|
|
// restore previous policy and priority
|
|
pthread_setschedparam(pthread_self(), old_policy, &old_param);
|
|
}
|
|
|
|
#endif // #ifndef _M_IX86
|