0ad/source/lib/memcpy.cpp
janwas 5299dcad86 - config: all macros are defined as 1 / 0. testing with #if allows compiler warnings (testing undefined macro) to spot misspelled macros
- debug: add provision for naming threads. allows adding current thread
name to log messages and displays their names in the debugger.
- replaced various if(err < 0) complain() sequences with new variants of
CHECK_ERR (see lib.h)
- fixes to mmgr/VC debug alloc enable code
- improved h_mgr error reporting (now complains when h_free fails)
- US -> UK english (partial)
- fix tex_load double-free bug
- move win32 mouse cursor code into sysdep
- error dialog is now topmost to make sure it's visible (was a problem)
- handle WM_QUIT before displaying error dialog (makes sure it's shown)

also as in previous 3 revisions.

This was SVN commit r2588.
2005-08-09 16:23:19 +00:00

69 lines
1.2 KiB
C++
Executable File

// block prefetch memcpy for large, uncached arrays
//
// src and len must be multiples of CHUNK_SIZE.
#include "precompiled.h"
#include "config.h"
#if HAVE_ASM
void memcpy_nt(void* dst, void* src, int len)
{
__asm
{
push esi
mov edx, [dst]
mov esi, [src]
mov ecx, [len]
shr ecx, 12 ; # chunks
; smaller than sub ecx, CHUNK_SIZE below
main_loop:
; prefetch: touch each cache line in chunk
; (backwards to prevent hardware prefetches)
; add esi, CHUNK_SIZE
prefetch_loop:
mov eax, [esi-64]
mov eax, [esi-128]
sub esi, 128
test esi, 4095 ; CHUNK_SIZE-1 (icc doesnt preprocess asm)
jnz prefetch_loop
; copy the chunk 64 bytes at a time
write_loop:
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
movq mm3, [esi+24]
movq mm4, [esi+32]
movq mm5, [esi+40]
movq mm6, [esi+48]
movq mm7, [esi+56]
add esi, 64
test esi, 4095 ; CHUNK_SIZE-1
movntq [edx], mm0
movntq [edx+8], mm1
movntq [edx+16], mm2
movntq [edx+24], mm3
movntq [edx+32], mm4
movntq [edx+40], mm5
movntq [edx+48], mm6
movntq [edx+56], mm7
lea edx, [edx+64] ; leave flags intact
jnz write_loop
dec ecx
jnz main_loop
sfence
emms
pop esi
}
}
#endif // #if HAVE_ASM