Files
xmrig-hac/src/crypto/randomx/asm/program_loop_store.inc
SChernykh 5fffeed646 RandomX: optimized loading from scratchpad
Prefetches scratchpad data as soon as possible to calculate data address for the next load.

Up to ~1.4% speedup on Ryzen 7 3700X @ 4.1 GHz, RAM 3200 MHz 14-14-14-28 with optimized sub-timings:
Variant|Before H/S|After H/S
-------|----------|---------
rx/0|8663|8777
rx/wow|9867|10009
rx/loki|8652|8731
2019-09-11 19:10:01 +02:00

19 lines
457 B
PHP

pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
mov qword ptr [rcx+24], r11
mov qword ptr [rcx+32], r12
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
pop rcx
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6
xorpd xmm3, xmm7
movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2
movapd xmmword ptr [rcx+48], xmm3