0ad/source/lib/sysdep/arch/ia32/ia32_asm.asm
janwas 4e8c305c6e sync with work - export app_hooks_update; add boost replacement FileExists and FileSize; avoid conflict with stdint.h;
remove asm rintf implementation (sse is faster); avoid the need for an
external 64-bit asm file with vc2008/vc2010; allow arbitrarily long
command line args

This was SVN commit r8985.
2011-02-25 16:31:42 +00:00

182 lines
4.6 KiB
NASM

; Copyright (c) 2010 Wildfire Games
;
; Permission is hereby granted, free of charge, to any person obtaining
; a copy of this software and associated documentation files (the
; "Software"), to deal in the Software without restriction, including
; without limitation the rights to use, copy, modify, merge, publish,
; distribute, sublicense, and/or sell copies of the Software, and to
; permit persons to whom the Software is furnished to do so, subject to
; the following conditions:
;
; The above copyright notice and this permission notice shall be included
; in all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
; optimized assembly code for IA-32. not provided as
; inline assembly because that's compiler-specific.
%include "ia32.inc"
; note: pure asm functions prevent inlining but also avoid redundant
; store/loads generated by VC inline asm (ugh).
;-------------------------------------------------------------------------------
; CPUID support
;-------------------------------------------------------------------------------
; extern "C" void __cdecl ia32_asm_cpuid(x86_x64_CpuidRegs* regs);
global sym(ia32_asm_cpuid)
sym(ia32_asm_cpuid):
push ebx ; (clobbered by CPUID)
push edi ; (need a register other than eax..edx)
mov edi, [esp+8+4] ; -> regs
mov eax, [edi+0] ; eax (function)
mov ecx, [edi+8] ; ecx (count)
cpuid
stosd
xchg eax, ebx
stosd
xchg eax, ecx
stosd
xchg eax, edx
stosd
pop edi
pop ebx
ret
;-------------------------------------------------------------------------------
; FPU
;-------------------------------------------------------------------------------
; extern "C" u32 __cdecl ia32_asm_control87(u32 new_cw, u32 mask);
global sym(ia32_asm_control87)
sym(ia32_asm_control87):
push eax
fnstcw [esp]
pop eax ; old_cw
mov ecx, [esp+4] ; new_val
mov edx, [esp+8] ; mask
and ecx, edx ; new_val & mask
not edx ; ~mask
and eax, edx ; old_cw & ~mask
or eax, ecx ; (old_cw & ~mask) | (new_val & mask)
push eax ; = new_cw
fldcw [esp]
pop eax
xor eax, eax ; return value
ret
; possible IA-32 FPU control word flags after FXAM: NAN|NORMAL|ZERO
FP_CLASSIFY_MASK equ 0x4500
; extern "C" size_t __cdecl ia32_asm_fpclassifyd(double d);
global sym(ia32_asm_fpclassifyd)
sym(ia32_asm_fpclassifyd):
fld qword [esp+4]
fxam
fnstsw ax
fstp st0
and eax, FP_CLASSIFY_MASK
ret
; extern "C" size_t __cdecl ia32_asm_fpclassifyf(float f);
global sym(ia32_asm_fpclassifyf)
sym(ia32_asm_fpclassifyf):
fld dword [esp+4]
fxam
fnstsw ax
fstp st0
and eax, FP_CLASSIFY_MASK
ret
;-------------------------------------------------------------------------------
; misc
;-------------------------------------------------------------------------------
; write the current execution state (e.g. all register values) into
; (Win32::CONTEXT*)pcontext (defined as void* to avoid dependency).
; optimized for size; this must be straight asm because ; extern "C"
; is compiler-specific and compiler-generated prolog code inserted before
; inline asm trashes EBP and ESP (unacceptable).
; extern "C" void ia32_asm_GetCurrentContext(void* pcontext);
global sym(ia32_asm_GetCurrentContext)
sym(ia32_asm_GetCurrentContext):
pushad
pushfd
mov edi, [esp+4+32+4] ; pcontext
; ContextFlags
mov eax, 0x10007 ; segs, int, control
stosd
; DRx and FloatSave
; rationale: we can't access the debug registers from Ring3, and
; the FPU save area is irrelevant, so zero them.
xor eax, eax
push byte 6+8+20
pop ecx
rep stosd
; CONTEXT_SEGMENTS
mov ax, gs
stosd
mov ax, fs
stosd
mov ax, es
stosd
mov ax, ds
stosd
; CONTEXT_INTEGER
mov eax, [esp+4+32-32] ; edi
stosd
xchg eax, esi
stosd
xchg eax, ebx
stosd
xchg eax, edx
stosd
mov eax, [esp+4+32-8] ; ecx
stosd
mov eax, [esp+4+32-4] ; eax
stosd
; CONTEXT_CONTROL
xchg eax, ebp ; ebp restored by POPAD
stosd
mov eax, [esp+4+32] ; return address
sub eax, 5 ; skip CALL instruction -> call site.
stosd
xor eax, eax
mov ax, cs
stosd
pop eax ; eflags
stosd
lea eax, [esp+32+4+4] ; esp
stosd
xor eax, eax
mov ax, ss
stosd
; ExtendedRegisters
xor ecx, ecx
mov cl, 512/4
rep stosd
popad
ret