ia32: remove _ceil, mark functions as being sysdep implementations; prefix fpucw constants with IA32 to avoid conflicts

sysdep: remove round; document several functions; use ia32.h header
instead of a hack that extern-declared sysdep functions that are
implemented as macro-redirection
wsdl: add SDL_EnableKeyRepeat stub; group related functions in header
together; no longer need OS_WIN wrapper around that

This was SVN commit r3115.
This commit is contained in:
janwas 2005-11-07 02:45:25 +00:00
parent 14c62e721d
commit 9476dc4ad0
8 changed files with 100 additions and 106 deletions

View File

@ -157,20 +157,20 @@ void cpu_init()
// we need full precision when calculating the time.
// if there's a spot where we want to speed up divides|sqrts,
// we can temporarily change precision there.
//_control87(_PC_24, _MCW_PC);
//ia32_control87(IA32_PC_24, IA32_MCW_PC);
// to help catch bugs, enable as many floating-point exceptions as
// possible. that means only zero-divide, because the JS engine is
// triggering the rest.
// note: passing a flag *disables* that exception.
_control87(_EM_INVALID|_EM_DENORMAL|_EM_OVERFLOW|_EM_UNDERFLOW|_EM_INEXACT, _MCW_EM);
ia32_control87(IA32_EM_INVALID|IA32_EM_DENORMAL|IA32_EM_OVERFLOW|IA32_EM_UNDERFLOW|IA32_EM_INEXACT, IA32_MCW_EM);
// no longer round toward zero (truncate). changing this setting
// resulted in much faster float->int casts, because the compiler
// could be told (via /QIfist) to use FISTP while still truncating
// the result as required by ANSI C. however, FPU calculation
// results were changed significantly, so it had to be disabled.
//_control87(_RC_CHOP, _MCW_RC);
//ia32_control87(IA32_RC_CHOP, IA32_MCW_RC);
// If possible, hook up capability-sensitive assembler routines
ia32_hook_capabilities();

View File

@ -40,26 +40,13 @@
#error ia32.cpp needs inline assembly support!
#endif
//-----------------------------------------------------------------------------
// fast implementations of some sysdep.h functions; see documentation there
//-----------------------------------------------------------------------------
#if HAVE_MS_ASM
// replace pathetic MS libc implementation.
// not needed on non-Win32, so don't bother converting from MS inline asm.
double _ceil(double f)
{
UNUSED2(f); // avoid bogus warning
const float _49 = 0.499999f;
double r;
__asm
{
fld [f]
fadd [_49]
frndint
fstp [r]
}
return r;
}
// note: declspec naked is significantly faster: it avoids redundant
// store/load, even though it prevents inlining.
@ -135,7 +122,7 @@ __asm{
#endif // USE_IA32_FLOAT_TO_INT
//-----------------------------------------------------------------------------
// rationale: this function should return its output (instead of setting
// out params) to simplify its callers. it is written in inline asm

View File

@ -35,60 +35,46 @@ extern "C" {
extern void ia32_init();
extern double _ceil(double);
//
// fast implementations of some sysdep.h functions; see documentation there
//
extern float ia32_rintf(float f);
extern double ia32_rint(double f);
extern i32 ia32_i32_from_float(float f);
extern i32 ia32_i32_from_double(double d);
extern i64 ia32_i64_from_double(double d);
extern u64 rdtsc(void);
extern void* ia32_memcpy(void* dst, const void* src, size_t nbytes); // asm
// these may have been defined by system headers; we redefine them to
// the real IA-32 values for use with ia32_control87.
// FPU control word
// .. Precision Control:
#undef _MCW_PC
#define _MCW_PC 0x0300
#undef _PC_24
#define _PC_24 0x0000
#define IA32_MCW_PC 0x0300
#define IA32_PC_24 0x0000
// .. Rounding Control:
#undef _MCW_RC
#define _MCW_RC 0x0C00
#undef _RC_NEAR
#define _RC_NEAR 0x0000
#undef _RC_DOWN
#define _RC_DOWN 0x0400
#undef _RC_UP
#define _RC_UP 0x0800
#undef _RC_CHOP
#define _RC_CHOP 0x0C00
#define IA32_MCW_RC 0x0C00
#define IA32_RC_NEAR 0x0000
#define IA32_RC_DOWN 0x0400
#define IA32_RC_UP 0x0800
#define IA32_RC_CHOP 0x0C00
// .. Exception Mask:
#undef _MCW_EM
#define _MCW_EM 0x003f
#undef _EM_INVALID
#define _EM_INVALID BIT(0)
#undef _EM_DENORMAL
#define _EM_DENORMAL BIT(1)
#undef _EM_ZERODIVIDE
#define _EM_ZERODIVIDE BIT(2)
#undef _EM_OVERFLOW
#define _EM_OVERFLOW BIT(3)
#undef _EM_UNDERFLOW
#define _EM_UNDERFLOW BIT(4)
#undef _EM_INEXACT
#define _EM_INEXACT BIT(5)
#define IA32_MCW_EM 0x003f
#define IA32_EM_INVALID BIT(0)
#define IA32_EM_DENORMAL BIT(1)
#define IA32_EM_ZERODIVIDE BIT(2)
#define IA32_EM_OVERFLOW BIT(3)
#define IA32_EM_UNDERFLOW BIT(4)
#define IA32_EM_INEXACT BIT(5)
#define _control87 ia32_control87
extern uint ia32_control87(uint new_val, uint mask); // asm
extern u64 rdtsc(void);
extern void ia32_debug_break(void);
extern void* ia32_memcpy(void* dst, const void* src, size_t nbytes);
// write the current execution state (e.g. all register values) into
// (Win32::CONTEXT*)pcontext (defined as void* to avoid dependency).
extern void ia32_get_current_context(void* pcontext);
// CPU caps (128 bits)
// do not change the order!
@ -121,8 +107,13 @@ extern void ia32_get_cpu_info(void);
extern void ia32_hook_capabilities(void);
//-----------------------------------------------------------------------------
// internal use only
// write the current execution state (e.g. all register values) into
// (Win32::CONTEXT*)pcontext (defined as void* to avoid dependency).
extern void ia32_get_current_context(void* pcontext);
extern int ia32_get_call_target(void* ret_addr, void** target);
// order in which registers are stored in regs array

View File

@ -13,15 +13,6 @@
#include <memory.h>
#include <stdarg.h>
#if MSC_VERSION
double round(double x)
{
return (long)(x + 0.5);
}
#endif // MSC_VERSION
#if !HAVE_C99
@ -35,8 +26,12 @@ float fmaxf(float a, float b)
return (a > b)? a : b;
}
#endif
#ifndef rint
// no C99, and not running on IA-32 (where this is defined to ia32_rint)
// => need to implement our fallback version.
#if !HAVE_C99 && !defined(rint)
inline float rintf(float f)
{
@ -50,9 +45,9 @@ inline double rint(double d)
#endif
#endif // !HAVE_C99
// float->int conversion: not using the ia32 version; just implement as a
// cast. (see USE_IA32_FLOAT_TO_INT definition for details)
#if !USE_IA32_FLOAT_TO_INT
i32 i32_from_float(float f)

View File

@ -3,11 +3,19 @@
#include "config.h"
// some functions among the sysdep API are implemented as macros
// that redirect to the platform-dependent version. this is done where
// the cost of a trampoline function would be too great; VC7 does not
// always inline them.
// we therefore need to include those headers.
#if OS_WIN
# include "win/win.h"
#elif OS_UNIX
# include "unix/unix.h"
#endif
#if CPU_IA32
#include "ia32.h"
#endif
#ifdef __cplusplus
extern "C" {
@ -50,6 +58,9 @@ extern int vsnprintf2(char* buffer, size_t count, const char* format, va_list ar
extern void* alloca(size_t size);
#endif
// memcpy2: hand-tuned version; works for all sizes and aligments and is
// significantly faster. uses SSE-optimized codepath when available.
// 10% for < 64byte transfers and up to 300% on large sizes.
#ifdef CPU_IA32
# define memcpy2 ia32_memcpy
extern void* ia32_memcpy(void* dst, const void* src, size_t nbytes);
@ -57,30 +68,33 @@ extern void* ia32_memcpy(void* dst, const void* src, size_t nbytes);
# define memcpy2 memcpy
#endif
// rint: round float to nearest integer.
// rint: round float to nearest integral value.
// provided by C99, otherwise:
#if !HAVE_C99
// .. implemented on IA-32; define as macro to avoid jmp overhead
// .. fast IA-32 version
# if CPU_IA32
# define rintf ia32_rintf
# define rint ia32_rint
// .. portable C emulation
# else
extern float rintf(float f);
extern double rint(double d);
# endif
// .. forward-declare either the IA-32 version or portable C emulation.
extern float rintf(float f);
extern double rint(double d);
#endif
// fast float->int conversion; does not specify rounding mode,
// so do not use them if exact values are needed.
// i32_from_float et al: convert float to int. much faster than _ftol2,
// which would normally be used by (int) casts.
// .. fast IA-32 version: only used in some cases; see macro definition.
#if USE_IA32_FLOAT_TO_INT
# define i32_from_float ia32_i32_from_float
# define i32_from_double ia32_i32_from_double
# define i64_from_double ia32_i64_from_double
// .. portable C emulation
#else
extern i32 i32_from_float(float);
extern i32 i32_from_double(double);
extern i64 i64_from_double(double);
#endif
// .. forward-declare either the IA-32 version or portable C emulation.
extern i32 i32_from_float(float);
extern i32 i32_from_double(double);
extern i64 i64_from_double(double);
// finite: return 0 iff the given double is infinite or NaN.
#if OS_WIN
@ -216,9 +230,6 @@ extern int on_each_cpu(void(*cb)());
#if MSC_VERSION
extern double round(double);
#endif
#if !HAVE_C99
extern float fminf(float a, float b);

View File

@ -16,7 +16,7 @@
// Jan.Wassenberg@stud.uni-karlsruhe.de
// http://www.stud.uni-karlsruhe.de/~urkt/
// TODO: should use GetMessage when not active to reduce CPU load.
// where to do this?
// - force the app to check for SDL's activation messages, and call

View File

@ -46,7 +46,10 @@ extern int SDL_Init(Uint32 flags);
extern void SDL_Quit(void);
extern Uint8 SDL_GetAppState();
//
// video
//
typedef enum
{
@ -57,14 +60,12 @@ SDL_GLattr;
extern int SDL_GL_SetAttribute(SDL_GLattr attr, int value);
// SDL_SetVideoMode() flags
#define SDL_OPENGL 0
#define SDL_FULLSCREEN 1
extern int SDL_SetVideoMode(int w, int h, int bpp, unsigned long flags);
typedef struct
{
int w, h;
@ -73,7 +74,6 @@ SDL_Surface;
extern SDL_Surface* SDL_GetVideoSurface(void);
typedef struct
{
int video_mem;
@ -82,6 +82,10 @@ SDL_VideoInfo;
extern SDL_VideoInfo* SDL_GetVideoInfo(void);
extern void* SDL_GL_GetProcAddress(const char*);
extern void SDL_GL_SwapBuffers(void);
//
// threads / sync
@ -90,10 +94,6 @@ extern SDL_VideoInfo* SDL_GetVideoInfo(void);
typedef void SDL_sem;
typedef void SDL_Thread;
extern void* SDL_GL_GetProcAddress(const char*);
extern void SDL_GL_SwapBuffers(void);
extern u32 SDL_GetTicks(void);
extern void SDL_Delay(u32 ms);
@ -105,6 +105,7 @@ extern int SDL_SemWait(SDL_sem* sem);
extern SDL_Thread* SDL_CreateThread(int(*)(void*), void*);
extern int SDL_KillThread(SDL_Thread*);
extern void SDL_WarpMouse(int, int);
enum ShowCursorToggle
@ -118,14 +119,10 @@ extern int SDL_ShowCursor(int toggle);
extern int SDL_SetGamma(float r, float g, float b);
// macros
#define SDL_GRAB_ON 0
#define SDL_WM_GrabInput(a)
#define SDL_GetError() ""
//////////////////////////////////////////////////////////////////////////////
//
// byte swapping
//
#ifdef linux
@ -285,13 +282,28 @@ extern int SDL_WaitEvent(SDL_Event*);
extern int SDL_PollEvent(SDL_Event* ev);
extern int SDL_PushEvent(SDL_Event* ev);
//
// misc
//
#define SDL_GRAB_ON 0
#define SDL_WM_GrabInput(a)
#define SDL_GetError() ""
// from real SDL, but they're ignored anyway
#define SDL_DEFAULT_REPEAT_DELAY 500
#define SDL_DEFAULT_REPEAT_INTERVAL 30
#define SDL_EnableKeyRepeat(delay, interval)
extern void SDL_WM_SetCaption(const char *title, const char *icon);
extern Uint8* SDL_GetKeyState(int* num_keys);
extern Uint8 SDL_GetMouseState(int* x, int* y);
//( SDLMod and KMOD_* already defined by SDL_keysym.h)
extern SDLMod SDL_GetModState(void);
extern Uint8 SDL_GetAppState();
#ifdef __cplusplus

View File

@ -567,9 +567,7 @@ static void InitPs(bool setup_gui)
static void InitInput()
{
#if !OS_WIN
SDL_EnableKeyRepeat(SDL_DEFAULT_REPEAT_DELAY, SDL_DEFAULT_REPEAT_INTERVAL);
#endif
// register input handlers
// This stack is constructed so the first added, will be the last