forked from 0ad/0ad
janwas
4e8c305c6e
remove asm rintf implementation (sse is faster); avoid the need for an external 64-bit asm file with vc2008/vc2010; allow arbitrarily long command line args This was SVN commit r8985.
182 lines
4.6 KiB
NASM
182 lines
4.6 KiB
NASM
; Copyright (c) 2010 Wildfire Games
|
|
;
|
|
; Permission is hereby granted, free of charge, to any person obtaining
|
|
; a copy of this software and associated documentation files (the
|
|
; "Software"), to deal in the Software without restriction, including
|
|
; without limitation the rights to use, copy, modify, merge, publish,
|
|
; distribute, sublicense, and/or sell copies of the Software, and to
|
|
; permit persons to whom the Software is furnished to do so, subject to
|
|
; the following conditions:
|
|
;
|
|
; The above copyright notice and this permission notice shall be included
|
|
; in all copies or substantial portions of the Software.
|
|
;
|
|
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
; optimized assembly code for IA-32. not provided as
|
|
; inline assembly because that's compiler-specific.
|
|
|
|
%include "ia32.inc"
|
|
|
|
; note: pure asm functions prevent inlining but also avoid redundant
|
|
; store/loads generated by VC inline asm (ugh).
|
|
|
|
|
|
;-------------------------------------------------------------------------------
|
|
; CPUID support
|
|
;-------------------------------------------------------------------------------
|
|
|
|
; extern "C" void __cdecl ia32_asm_cpuid(x86_x64_CpuidRegs* regs);
|
|
global sym(ia32_asm_cpuid)
|
|
sym(ia32_asm_cpuid):
|
|
push ebx ; (clobbered by CPUID)
|
|
push edi ; (need a register other than eax..edx)
|
|
|
|
mov edi, [esp+8+4] ; -> regs
|
|
|
|
mov eax, [edi+0] ; eax (function)
|
|
mov ecx, [edi+8] ; ecx (count)
|
|
cpuid
|
|
stosd
|
|
xchg eax, ebx
|
|
stosd
|
|
xchg eax, ecx
|
|
stosd
|
|
xchg eax, edx
|
|
stosd
|
|
|
|
pop edi
|
|
pop ebx
|
|
ret
|
|
|
|
|
|
;-------------------------------------------------------------------------------
|
|
; FPU
|
|
;-------------------------------------------------------------------------------
|
|
|
|
; extern "C" u32 __cdecl ia32_asm_control87(u32 new_cw, u32 mask);
|
|
global sym(ia32_asm_control87)
|
|
sym(ia32_asm_control87):
|
|
push eax
|
|
fnstcw [esp]
|
|
pop eax ; old_cw
|
|
mov ecx, [esp+4] ; new_val
|
|
mov edx, [esp+8] ; mask
|
|
and ecx, edx ; new_val & mask
|
|
not edx ; ~mask
|
|
and eax, edx ; old_cw & ~mask
|
|
or eax, ecx ; (old_cw & ~mask) | (new_val & mask)
|
|
push eax ; = new_cw
|
|
fldcw [esp]
|
|
pop eax
|
|
xor eax, eax ; return value
|
|
ret
|
|
|
|
|
|
; possible IA-32 FPU control word flags after FXAM: NAN|NORMAL|ZERO
|
|
FP_CLASSIFY_MASK equ 0x4500
|
|
|
|
; extern "C" size_t __cdecl ia32_asm_fpclassifyd(double d);
|
|
global sym(ia32_asm_fpclassifyd)
|
|
sym(ia32_asm_fpclassifyd):
|
|
fld qword [esp+4]
|
|
fxam
|
|
fnstsw ax
|
|
fstp st0
|
|
and eax, FP_CLASSIFY_MASK
|
|
ret
|
|
|
|
; extern "C" size_t __cdecl ia32_asm_fpclassifyf(float f);
|
|
global sym(ia32_asm_fpclassifyf)
|
|
sym(ia32_asm_fpclassifyf):
|
|
fld dword [esp+4]
|
|
fxam
|
|
fnstsw ax
|
|
fstp st0
|
|
and eax, FP_CLASSIFY_MASK
|
|
ret
|
|
|
|
|
|
;-------------------------------------------------------------------------------
|
|
; misc
|
|
;-------------------------------------------------------------------------------
|
|
|
|
; write the current execution state (e.g. all register values) into
|
|
; (Win32::CONTEXT*)pcontext (defined as void* to avoid dependency).
|
|
; optimized for size; this must be straight asm because ; extern "C"
|
|
; is compiler-specific and compiler-generated prolog code inserted before
|
|
; inline asm trashes EBP and ESP (unacceptable).
|
|
; extern "C" void ia32_asm_GetCurrentContext(void* pcontext);
|
|
global sym(ia32_asm_GetCurrentContext)
|
|
sym(ia32_asm_GetCurrentContext):
|
|
pushad
|
|
pushfd
|
|
mov edi, [esp+4+32+4] ; pcontext
|
|
|
|
; ContextFlags
|
|
mov eax, 0x10007 ; segs, int, control
|
|
stosd
|
|
|
|
; DRx and FloatSave
|
|
; rationale: we can't access the debug registers from Ring3, and
|
|
; the FPU save area is irrelevant, so zero them.
|
|
xor eax, eax
|
|
push byte 6+8+20
|
|
pop ecx
|
|
rep stosd
|
|
|
|
; CONTEXT_SEGMENTS
|
|
mov ax, gs
|
|
stosd
|
|
mov ax, fs
|
|
stosd
|
|
mov ax, es
|
|
stosd
|
|
mov ax, ds
|
|
stosd
|
|
|
|
; CONTEXT_INTEGER
|
|
mov eax, [esp+4+32-32] ; edi
|
|
stosd
|
|
xchg eax, esi
|
|
stosd
|
|
xchg eax, ebx
|
|
stosd
|
|
xchg eax, edx
|
|
stosd
|
|
mov eax, [esp+4+32-8] ; ecx
|
|
stosd
|
|
mov eax, [esp+4+32-4] ; eax
|
|
stosd
|
|
|
|
; CONTEXT_CONTROL
|
|
xchg eax, ebp ; ebp restored by POPAD
|
|
stosd
|
|
mov eax, [esp+4+32] ; return address
|
|
sub eax, 5 ; skip CALL instruction -> call site.
|
|
stosd
|
|
xor eax, eax
|
|
mov ax, cs
|
|
stosd
|
|
pop eax ; eflags
|
|
stosd
|
|
lea eax, [esp+32+4+4] ; esp
|
|
stosd
|
|
xor eax, eax
|
|
mov ax, ss
|
|
stosd
|
|
|
|
; ExtendedRegisters
|
|
xor ecx, ecx
|
|
mov cl, 512/4
|
|
rep stosd
|
|
|
|
popad
|
|
ret
|