1
0
forked from 0ad/0ad
0ad/source/lib/sysdep/ia32_asm.asm

285 lines
7.3 KiB
NASM
Raw Normal View History

; =========================================================================
; File : ia32_asm.asm
; Project : 0 A.D.
; Description : optimized assembly code for IA-32. not provided as
; : inline assembly because that's compiler-specific.
;
; @author Jan.Wassenberg@stud.uni-karlsruhe.de
; =========================================================================
; Copyright (c) 2004-2005 Jan Wassenberg
;
; Redistribution and/or modification are also permitted under the
; terms of the GNU General Public License as published by th;e
; Free Software Foundation (version 2 or later, at your option).
;
; This program is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
%include "ia32.inc"
;-------------------------------------------------------------------------------
; CPUID support
;-------------------------------------------------------------------------------
[section .data]
; these are actually max_func+1, i.e. the first invalid value.
; the idea here is to avoid a separate cpuid_available flag;
; using signed values doesn't work because ext_funcs are >= 0x80000000.
max_func dd 0
max_ext_func dd 0
__SECT__
; extern "C" bool __cdecl ia32_cpuid(u32 func, u32* regs)
global sym(ia32_cpuid)
sym(ia32_cpuid):
push ebx
push edi
mov ecx, [esp+8+4+0] ; func
mov edi, [esp+8+4+4] ; -> regs
; compare against max supported func and fail if above
xor eax, eax ; return value on failure
test ecx, ecx
mov edx, [max_ext_func]
js .is_ext_func
mov edx, [max_func]
.is_ext_func:
cmp ecx, edx
jae .ret ; (see max_func decl)
; issue CPUID and store result registers in array
mov eax, ecx
cpuid
stosd
xchg eax, ebx
stosd
xchg eax, ecx
stosd
xchg eax, edx
stosd
; success
xor eax, eax
inc eax
.ret:
pop edi
pop ebx
ret
;-------------------------------------------------------------------------------
; lock-free support routines
;-------------------------------------------------------------------------------
extern sym(cpus)
; extern "C" void __cdecl atomic_add(intptr_t* location, intptr_t increment);
global sym(atomic_add)
sym(atomic_add):
cmp byte [sym(cpus)], 1
mov edx, [esp+4] ; location
mov eax, [esp+8] ; increment
je .no_lock
db 0xf0 ; LOCK prefix
.no_lock:
add [edx], eax
ret
; notes:
; - this is called via CAS macro, which silently casts its inputs for
; convenience. mixing up the <expected> and <location> parameters would
; go unnoticed; we therefore perform a basic sanity check on <location> and
; raise a warning if it is invalid.
; - a 486 or later processor is required since we use CMPXCHG.
; there's no feature flag we can check, and the ia32 code doesn't
; bother detecting anything < Pentium, so this'll crash and burn if
; run on 386. we could fall back to simple MOVs there (since 386 CPUs
; aren't MP-capable), but it's not worth the trouble.
; extern "C" __declspec(naked) bool __cdecl CAS_(uintptr_t* location, uintptr_t expected, uintptr_t new_value);
global sym(CAS_)
sym(CAS_):
cmp byte [sym(cpus)], 1
mov eax, [esp+8] ; expected
mov edx, [esp+4] ; location
cmp edx, 0x10000 ; .. valid pointer?
jb .invalid_location ; no - raise warning
mov ecx, [esp+12] ; new_value
je .no_lock
db 0xf0 ; LOCK prefix
.no_lock:
cmpxchg [edx], ecx
sete al
movzx eax, al
ret
; NOTE: nasm 0.98.39 doesn't support generating debug info for win32
; output format. that means this code may be misattributed to other
; functions, which makes tracking it down very difficult.
; we therefore raise an "Invalid Opcode" exception, which is rather distinct.
.invalid_location:
ud2
;-------------------------------------------------------------------------------
; FPU
;-------------------------------------------------------------------------------
; extern "C" uint __cdecl ia32_control87(uint new_cw, uint mask);
global sym(ia32_control87)
sym(ia32_control87):
push eax
fnstcw [esp]
pop eax ; old_cw
mov ecx, [esp+4] ; new_val
mov edx, [esp+8] ; mask
and ecx, edx ; new_val & mask
not edx ; ~mask
and eax, edx ; old_cw & ~mask
or eax, ecx ; (old_cw & ~mask) | (new_val & mask)
push eax ; = new_cw
fldcw [esp]
pop eax
xor eax, eax ; return value
ret
; possible IA-32 FPU control word flags after FXAM: NAN|NORMAL|ZERO
FP_CLASSIFY_MASK equ 0x4500
; extern "C" uint __cdecl ia32_fpclassify(double d);
global sym(ia32_fpclassify)
sym(ia32_fpclassify):
fld qword [esp+4]
fxam
fnstsw ax
and eax, FP_CLASSIFY_MASK
ret
; extern "C" uint __cdecl ia32_fpclassifyf(float f);
global sym(ia32_fpclassifyf)
sym(ia32_fpclassifyf):
fld dword [esp+4]
fxam
fnstsw ax
and eax, FP_CLASSIFY_MASK
ret
;-------------------------------------------------------------------------------
; misc
;-------------------------------------------------------------------------------
; write the current execution state (e.g. all register values) into
; (Win32::CONTEXT*)pcontext (defined as void* to avoid dependency).
; optimized for size; this must be straight asm because __declspec(naked)
; is compiler-specific and compiler-generated prolog code inserted before
; inline asm trashes EBP and ESP (unacceptable).
; extern "C" void ia32_get_current_context(void* pcontext)
global sym(ia32_get_current_context)
sym(ia32_get_current_context):
pushad
pushfd
mov edi, [esp+4+32+4] ; pcontext
; ContextFlags
mov eax, 0x10007 ; segs, int, control
stosd
; DRx and FloatSave
; rationale: we can't access the debug registers from Ring3, and
; the FPU save area is irrelevant, so zero them.
xor eax, eax
push byte 6+8+20
pop ecx
rep stosd
; CONTEXT_SEGMENTS
mov ax, gs
stosd
mov ax, fs
stosd
mov ax, es
stosd
mov ax, ds
stosd
; CONTEXT_INTEGER
mov eax, [esp+4+32-32] ; edi
stosd
xchg eax, esi
stosd
xchg eax, ebx
stosd
xchg eax, edx
stosd
mov eax, [esp+4+32-8] ; ecx
stosd
mov eax, [esp+4+32-4] ; eax
stosd
; CONTEXT_CONTROL
xchg eax, ebp ; ebp restored by POPAD
stosd
mov eax, [esp+4+32] ; return address
sub eax, 5 ; skip CALL instruction -> call site.
stosd
xor eax, eax
mov ax, cs
stosd
pop eax ; eflags
stosd
lea eax, [esp+32+4+4] ; esp
stosd
xor eax, eax
mov ax, ss
stosd
; ExtendedRegisters
xor ecx, ecx
mov cl, 512/4
rep stosd
popad
ret
;-------------------------------------------------------------------------------
; init
;-------------------------------------------------------------------------------
; extern "C" bool __cdecl ia32_asm_init()
global sym(ia32_asm_init)
sym(ia32_asm_init):
push ebx
; check if CPUID is supported
pushfd
or byte [esp+2], 32
popfd
pushfd
pop eax
xor edx, edx
shr eax, 22 ; bit 21 toggled?
jnc .no_cpuid
; determine max supported CPUID function
xor eax, eax
cpuid
inc eax ; (see max_func decl)
mov [max_func], eax
mov eax, 0x80000000
cpuid
inc eax ; (see max_func decl)
mov [max_ext_func], eax
.no_cpuid:
pop ebx
ret