ia32: remove _ceil, mark functions as being sysdep implementations; prefix fpucw constants with IA32 to avoid conflicts

sysdep: remove round; document several functions; use ia32.h header instead of a hack that extern-declared sysdep functions that are implemented as macro-redirection wsdl: add SDL_EnableKeyRepeat stub; group related functions in header together; no longer need OS_WIN wrapper around that This was SVN commit r3115.
2005-11-07 02:45:25 +00:00 · 2005-11-07 02:45:25 +00:00 · 9476dc4ad0
commit 9476dc4ad0
parent 14c62e721d
8 changed files with 100 additions and 106 deletions
--- a/source/lib/detect.cpp
+++ b/source/lib/detect.cpp
@ -157,20 +157,20 @@ void cpu_init()
 	// we need full precision when calculating the time.
 	// if there's a spot where we want to speed up divides|sqrts,
 	// we can temporarily change precision there.
-	//_control87(_PC_24, _MCW_PC);
+	//ia32_control87(IA32_PC_24, IA32_MCW_PC);

 	// to help catch bugs, enable as many floating-point exceptions as
 	// possible. that means only zero-divide, because the JS engine is
 	// triggering the rest.
 	// note: passing a flag *disables* that exception.
-	_control87(_EM_INVALID|_EM_DENORMAL|_EM_OVERFLOW|_EM_UNDERFLOW|_EM_INEXACT, _MCW_EM);
+	ia32_control87(IA32_EM_INVALID|IA32_EM_DENORMAL|IA32_EM_OVERFLOW|IA32_EM_UNDERFLOW|IA32_EM_INEXACT, IA32_MCW_EM);

 	// no longer round toward zero (truncate). changing this setting
 	// resulted in much faster float->int casts, because the compiler
 	// could be told (via /QIfist) to use FISTP while still truncating
 	// the result as required by ANSI C. however, FPU calculation
 	// results were changed significantly, so it had to be disabled.
-	//_control87(_RC_CHOP, _MCW_RC);
+	//ia32_control87(IA32_RC_CHOP, IA32_MCW_RC);

 	// If possible, hook up capability-sensitive assembler routines
 	ia32_hook_capabilities();
--- a/source/lib/sysdep/ia32.cpp
+++ b/source/lib/sysdep/ia32.cpp
@ -40,26 +40,13 @@
 #error ia32.cpp needs inline assembly support!
 #endif

+
+//-----------------------------------------------------------------------------
+// fast implementations of some sysdep.h functions; see documentation there
+//-----------------------------------------------------------------------------
+
 #if HAVE_MS_ASM

-// replace pathetic MS libc implementation.
-// not needed on non-Win32, so don't bother converting from MS inline asm.
-double _ceil(double f)
-{
-	UNUSED2(f);	// avoid bogus warning
-	const float _49 = 0.499999f;
-	double r;
-__asm
-{
-	fld			[f]
-	fadd		[_49]
-	frndint
-	fstp		[r]
-}
-	return r;
-}
-
-
 // note: declspec naked is significantly faster: it avoids redundant
 // store/load, even though it prevents inlining.

@ -135,7 +122,7 @@ __asm{
 #endif	// USE_IA32_FLOAT_TO_INT


-
+//-----------------------------------------------------------------------------

 // rationale: this function should return its output (instead of setting
 // out params) to simplify its callers. it is written in inline asm
--- a/source/lib/sysdep/ia32.h
+++ b/source/lib/sysdep/ia32.h
@ -35,60 +35,46 @@ extern "C" {
 extern void ia32_init();


-extern double _ceil(double);
+//
+// fast implementations of some sysdep.h functions; see documentation there
+//

 extern float ia32_rintf(float f);
 extern double ia32_rint(double f);

+extern i32 ia32_i32_from_float(float f);
+extern i32 ia32_i32_from_double(double d);
+extern i64 ia32_i64_from_double(double d);

-extern u64 rdtsc(void);
+extern void* ia32_memcpy(void* dst, const void* src, size_t nbytes);	// asm


-// these may have been defined by system headers; we redefine them to
-// the real IA-32 values for use with ia32_control87.
+// FPU control word
 // .. Precision Control:
-#undef _MCW_PC
-#define _MCW_PC 0x0300
-#undef _PC_24
-#define _PC_24  0x0000
+#define IA32_MCW_PC 0x0300
+#define IA32_PC_24  0x0000
 // .. Rounding Control:
-#undef _MCW_RC
-#define _MCW_RC  0x0C00
-#undef _RC_NEAR
-#define _RC_NEAR 0x0000
-#undef _RC_DOWN
-#define _RC_DOWN 0x0400
-#undef _RC_UP
-#define _RC_UP   0x0800
-#undef _RC_CHOP
-#define _RC_CHOP 0x0C00
+#define IA32_MCW_RC  0x0C00
+#define IA32_RC_NEAR 0x0000
+#define IA32_RC_DOWN 0x0400
+#define IA32_RC_UP   0x0800
+#define IA32_RC_CHOP 0x0C00
 // .. Exception Mask:
-#undef _MCW_EM
-#define _MCW_EM 0x003f
-#undef _EM_INVALID
-#define _EM_INVALID    BIT(0)
-#undef _EM_DENORMAL
-#define _EM_DENORMAL   BIT(1)
-#undef _EM_ZERODIVIDE
-#define _EM_ZERODIVIDE BIT(2)
-#undef _EM_OVERFLOW
-#define _EM_OVERFLOW   BIT(3)
-#undef _EM_UNDERFLOW
-#define _EM_UNDERFLOW  BIT(4)
-#undef _EM_INEXACT
-#define _EM_INEXACT    BIT(5)
+#define IA32_MCW_EM 0x003f
+#define IA32_EM_INVALID    BIT(0)
+#define IA32_EM_DENORMAL   BIT(1)
+#define IA32_EM_ZERODIVIDE BIT(2)
+#define IA32_EM_OVERFLOW   BIT(3)
+#define IA32_EM_UNDERFLOW  BIT(4)
+#define IA32_EM_INEXACT    BIT(5)

-#define _control87 ia32_control87
 extern uint ia32_control87(uint new_val, uint mask);	// asm


+extern u64 rdtsc(void);
+
 extern void ia32_debug_break(void);

-extern void* ia32_memcpy(void* dst, const void* src, size_t nbytes);
-
-// write the current execution state (e.g. all register values) into
-// (Win32::CONTEXT*)pcontext (defined as void* to avoid dependency).
-extern void ia32_get_current_context(void* pcontext);

 // CPU caps (128 bits)
 // do not change the order!
@ -121,8 +107,13 @@ extern void ia32_get_cpu_info(void);
 extern void ia32_hook_capabilities(void);


+//-----------------------------------------------------------------------------
 // internal use only

+// write the current execution state (e.g. all register values) into
+// (Win32::CONTEXT*)pcontext (defined as void* to avoid dependency).
+extern void ia32_get_current_context(void* pcontext);
+
 extern int ia32_get_call_target(void* ret_addr, void** target);

 // order in which registers are stored in regs array
--- a/source/lib/sysdep/sysdep.cpp
+++ b/source/lib/sysdep/sysdep.cpp
@ -13,15 +13,6 @@
 #include <memory.h>
 #include <stdarg.h>

-#if MSC_VERSION
-
-double round(double x)
-{
-	return (long)(x + 0.5);
-}
-
-#endif	// MSC_VERSION
-

 #if !HAVE_C99

@ -35,8 +26,12 @@ float fmaxf(float a, float b)
 	return (a > b)? a : b;
 }

+#endif

-#ifndef rint
+
+// no C99, and not running on IA-32 (where this is defined to ia32_rint)
+// => need to implement our fallback version.
+#if !HAVE_C99 && !defined(rint)

 inline float rintf(float f)
 {
@ -50,9 +45,9 @@ inline double rint(double d)

 #endif

-#endif	// !HAVE_C99
-

+// float->int conversion: not using the ia32 version; just implement as a
+// cast. (see USE_IA32_FLOAT_TO_INT definition for details)
 #if !USE_IA32_FLOAT_TO_INT

 i32 i32_from_float(float f)
--- a/source/lib/sysdep/sysdep.h
+++ b/source/lib/sysdep/sysdep.h
@ -3,11 +3,19 @@

 #include "config.h"

+// some functions among the sysdep API are implemented as macros
+// that redirect to the platform-dependent version. this is done where
+// the cost of a trampoline function would be too great; VC7 does not
+// always inline them.
+// we therefore need to include those headers.
 #if OS_WIN
 # include "win/win.h"
 #elif OS_UNIX
 # include "unix/unix.h"
 #endif
+#if CPU_IA32
+#include "ia32.h"
+#endif

 #ifdef __cplusplus
 extern "C" {
@ -50,6 +58,9 @@ extern int vsnprintf2(char* buffer, size_t count, const char* format, va_list ar
 extern void* alloca(size_t size);
 #endif

+// memcpy2: hand-tuned version; works for all sizes and aligments and is
+// significantly faster. uses SSE-optimized codepath when available.
+// 10% for < 64byte transfers and up to 300% on large sizes.
 #ifdef CPU_IA32
 # define memcpy2 ia32_memcpy
 extern void* ia32_memcpy(void* dst, const void* src, size_t nbytes);
@ -57,30 +68,33 @@ extern void* ia32_memcpy(void* dst, const void* src, size_t nbytes);
 # define memcpy2 memcpy
 #endif

-// rint: round float to nearest integer.
+// rint: round float to nearest integral value.
 // provided by C99, otherwise:
 #if !HAVE_C99
-// .. implemented on IA-32; define as macro to avoid jmp overhead
+// .. fast IA-32 version
 # if CPU_IA32
 #  define rintf ia32_rintf
 #  define rint ia32_rint
+// .. portable C emulation
+# else
+   extern float rintf(float f);
+   extern double rint(double d);
 # endif
-// .. forward-declare either the IA-32 version or portable C emulation.
-extern float rintf(float f);
-extern double rint(double d);
 #endif

-// fast float->int conversion; does not specify rounding mode,
-// so do not use them if exact values are needed.
+// i32_from_float et al: convert float to int. much faster than _ftol2,
+// which would normally be used by (int) casts.
+// .. fast IA-32 version: only used in some cases; see macro definition.
 #if USE_IA32_FLOAT_TO_INT
 # define i32_from_float ia32_i32_from_float
 # define i32_from_double ia32_i32_from_double
 # define i64_from_double ia32_i64_from_double
+// .. portable C emulation
+#else
+  extern i32 i32_from_float(float);
+  extern i32 i32_from_double(double);
+  extern i64 i64_from_double(double);
 #endif
-// .. forward-declare either the IA-32 version or portable C emulation.
-extern i32 i32_from_float(float);
-extern i32 i32_from_double(double);
-extern i64 i64_from_double(double);

 // finite: return 0 iff the given double is infinite or NaN.
 #if OS_WIN
@ -216,9 +230,6 @@ extern int on_each_cpu(void(*cb)());



-#if MSC_VERSION
-extern double round(double);
-#endif

 #if !HAVE_C99
 extern float fminf(float a, float b);
--- a/source/lib/sysdep/win/wsdl.cpp
+++ b/source/lib/sysdep/win/wsdl.cpp
@ -16,7 +16,7 @@
 //   Jan.Wassenberg@stud.uni-karlsruhe.de
 //   http://www.stud.uni-karlsruhe.de/~urkt/

- 
+
 // TODO: should use GetMessage when not active to reduce CPU load.
 // where to do this?
 // - force the app to check for SDL's activation messages, and call
--- a/source/lib/sysdep/win/wsdl.h
+++ b/source/lib/sysdep/win/wsdl.h
@ -46,7 +46,10 @@ extern int SDL_Init(Uint32 flags);

 extern void SDL_Quit(void);

-extern Uint8 SDL_GetAppState();
+
+//
+// video
+//

 typedef enum
 {
@ -57,14 +60,12 @@ SDL_GLattr;

 extern int SDL_GL_SetAttribute(SDL_GLattr attr, int value);

-
 // SDL_SetVideoMode() flags
 #define SDL_OPENGL 0
 #define SDL_FULLSCREEN 1

 extern int SDL_SetVideoMode(int w, int h, int bpp, unsigned long flags);

-
 typedef struct
 {
 	int w, h;
@ -73,7 +74,6 @@ SDL_Surface;

 extern SDL_Surface* SDL_GetVideoSurface(void);

-
 typedef struct
 {
 	int video_mem;
@ -82,6 +82,10 @@ SDL_VideoInfo;

 extern SDL_VideoInfo* SDL_GetVideoInfo(void);

+extern void* SDL_GL_GetProcAddress(const char*);
+
+extern void SDL_GL_SwapBuffers(void);
+

 //
 // threads / sync
@ -90,10 +94,6 @@ extern SDL_VideoInfo* SDL_GetVideoInfo(void);
 typedef void SDL_sem;
 typedef void SDL_Thread;

-extern void* SDL_GL_GetProcAddress(const char*);
-
-extern void SDL_GL_SwapBuffers(void);
-
 extern u32 SDL_GetTicks(void);
 extern void SDL_Delay(u32 ms);

@ -105,6 +105,7 @@ extern int SDL_SemWait(SDL_sem* sem);
 extern SDL_Thread* SDL_CreateThread(int(*)(void*), void*);
 extern int SDL_KillThread(SDL_Thread*);

+
 extern void SDL_WarpMouse(int, int);

 enum ShowCursorToggle
@ -118,14 +119,10 @@ extern int SDL_ShowCursor(int toggle);

 extern int SDL_SetGamma(float r, float g, float b);

-// macros

-#define SDL_GRAB_ON 0
-#define SDL_WM_GrabInput(a)
-#define SDL_GetError() ""
-
-
-//////////////////////////////////////////////////////////////////////////////
+//
+// byte swapping
+//


 #ifdef linux
@ -285,13 +282,28 @@ extern int SDL_WaitEvent(SDL_Event*);
 extern int SDL_PollEvent(SDL_Event* ev);
 extern int SDL_PushEvent(SDL_Event* ev);

+
+//
+// misc
+//
+
+#define SDL_GRAB_ON 0
+#define SDL_WM_GrabInput(a)
+
+#define SDL_GetError() ""
+
+// from real SDL, but they're ignored anyway
+#define SDL_DEFAULT_REPEAT_DELAY	500
+#define SDL_DEFAULT_REPEAT_INTERVAL	30
+#define SDL_EnableKeyRepeat(delay, interval)
+
+
 extern void SDL_WM_SetCaption(const char *title, const char *icon);

 extern Uint8* SDL_GetKeyState(int* num_keys);
 extern Uint8 SDL_GetMouseState(int* x, int* y);

-//( SDLMod and KMOD_* already defined by SDL_keysym.h)
-extern SDLMod SDL_GetModState(void);
+extern Uint8 SDL_GetAppState();


 #ifdef __cplusplus
--- a/source/ps/GameSetup/GameSetup.cpp
+++ b/source/ps/GameSetup/GameSetup.cpp
@ -567,9 +567,7 @@ static void InitPs(bool setup_gui)

 static void InitInput()
 {
-#if !OS_WIN
 	SDL_EnableKeyRepeat(SDL_DEFAULT_REPEAT_DELAY, SDL_DEFAULT_REPEAT_INTERVAL);
-#endif

 	// register input handlers
 	// This stack is constructed so the first added, will be the last