soft_aes: fix previous optimization

Previously removed unrolled variant is faster on some CPUs
Some CPUs are faster with added unrolled variant
The best variant depends on number of threads on some CPUs
This commit is contained in:
cohcho
2020-10-04 14:47:58 +00:00
parent 1c369c52fe
commit a5f4548b27
6 changed files with 116 additions and 72 deletions

View File

@@ -119,15 +119,10 @@ namespace randomx {
template<int softAes>
void VmBase<softAes>::hashAndFill(void* out, uint64_t (&fill_state)[8]) {
if (!softAes) {
hashAndFillAes1Rx4<0>(scratchpad, ScratchpadSize, &reg.a, fill_state);
hashAndFillAes1Rx4<0, 2>(scratchpad, ScratchpadSize, &reg.a, fill_state);
}
else {
if (GetSoftAESImpl() == 1) {
hashAndFillAes1Rx4<1>(scratchpad, ScratchpadSize, &reg.a, fill_state);
}
else {
hashAndFillAes1Rx4<2>(scratchpad, ScratchpadSize, &reg.a, fill_state);
}
(*GetSoftAESImpl())(scratchpad, ScratchpadSize, &reg.a, fill_state);
}
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, &reg, sizeof(RegisterFile));