0ad/source/lib/memcpy.cpp
2004-03-03 00:44:06 +00:00

67 lines
1.2 KiB
C++
Executable File

/*
* block prefetch memcpy for large, uncached arrays
*
* src and len must be multiples of CHUNK_SIZE.
*/
#if _MSC_VER >= 0x1300
void memcpy_nt(void* dst, void* src, int len)
{
__asm
{
push esi
mov edx, [dst]
mov esi, [src]
mov ecx, [len]
shr ecx, 12 ; # chunks
; smaller than sub ecx, CHUNK_SIZE below
main_loop:
; prefetch: touch each cache line in chunk
; (backwards to prevent hardware prefetches)
; add esi, CHUNK_SIZE
prefetch_loop:
mov eax, [esi-64]
mov eax, [esi-128]
sub esi, 128
test esi, 4095 ; CHUNK_SIZE-1 (icc doesn't preprocess asm)
jnz prefetch_loop
; copy the chunk 64 bytes at a time
write_loop:
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
movq mm3, [esi+24]
movq mm4, [esi+32]
movq mm5, [esi+40]
movq mm6, [esi+48]
movq mm7, [esi+56]
add esi, 64
test esi, 4095 ; CHUNK_SIZE-1
movntq [edx], mm0
movntq [edx+8], mm1
movntq [edx+16], mm2
movntq [edx+24], mm3
movntq [edx+32], mm4
movntq [edx+40], mm5
movntq [edx+48], mm6
movntq [edx+56], mm7
lea edx, [edx+64] ; leave flags intact
jnz write_loop
dec ecx
jnz main_loop
sfence
emms
pop esi
}
}
#endif // #if _MSC_VER >= 0x1300