Radical hash rate optimization
This commit is contained in:
@@ -28,6 +28,7 @@ Optimized Implementations for Haraka256 and Haraka512
|
||||
#include "crypto/haraka.h"
|
||||
|
||||
u128 rc[40];
|
||||
u128 rc0[40] = {0};
|
||||
|
||||
void load_constants() {
|
||||
rc[0] = _mm_set_epi32(0x0684704c,0xe620c00a,0xb2c5fef0,0x75817b9d);
|
||||
@@ -365,6 +366,37 @@ void haraka512(unsigned char *out, const unsigned char *in) {
|
||||
TRUNCSTORE(out, s[0], s[1], s[2], s[3]);
|
||||
}
|
||||
|
||||
void haraka512_zero(unsigned char *out, const unsigned char *in) {
|
||||
u128 s[4], tmp;
|
||||
|
||||
s[0] = LOAD(in);
|
||||
s[1] = LOAD(in + 16);
|
||||
s[2] = LOAD(in + 32);
|
||||
s[3] = LOAD(in + 48);
|
||||
|
||||
AES4_zero(s[0], s[1], s[2], s[3], 0);
|
||||
MIX4(s[0], s[1], s[2], s[3]);
|
||||
|
||||
AES4_zero(s[0], s[1], s[2], s[3], 8);
|
||||
MIX4(s[0], s[1], s[2], s[3]);
|
||||
|
||||
AES4_zero(s[0], s[1], s[2], s[3], 16);
|
||||
MIX4(s[0], s[1], s[2], s[3]);
|
||||
|
||||
AES4_zero(s[0], s[1], s[2], s[3], 24);
|
||||
MIX4(s[0], s[1], s[2], s[3]);
|
||||
|
||||
AES4_zero(s[0], s[1], s[2], s[3], 32);
|
||||
MIX4(s[0], s[1], s[2], s[3]);
|
||||
|
||||
s[0] = _mm_xor_si128(s[0], LOAD(in));
|
||||
s[1] = _mm_xor_si128(s[1], LOAD(in + 16));
|
||||
s[2] = _mm_xor_si128(s[2], LOAD(in + 32));
|
||||
s[3] = _mm_xor_si128(s[3], LOAD(in + 48));
|
||||
|
||||
TRUNCSTORE(out, s[0], s[1], s[2], s[3]);
|
||||
}
|
||||
|
||||
void haraka512_4x(unsigned char *out, const unsigned char *in) {
|
||||
u128 s[4][4], tmp;
|
||||
|
||||
|
||||
@@ -68,6 +68,16 @@ extern u128 rc[40];
|
||||
s2 = _mm_aesenc_si128(s2, rc[rci + 6]); \
|
||||
s3 = _mm_aesenc_si128(s3, rc[rci + 7]); \
|
||||
|
||||
#define AES4_zero(s0, s1, s2, s3, rci) \
|
||||
s0 = _mm_aesenc_si128(s0, rc0[rci]); \
|
||||
s1 = _mm_aesenc_si128(s1, rc0[rci + 1]); \
|
||||
s2 = _mm_aesenc_si128(s2, rc0[rci + 2]); \
|
||||
s3 = _mm_aesenc_si128(s3, rc0[rci + 3]); \
|
||||
s0 = _mm_aesenc_si128(s0, rc0[rci + 4]); \
|
||||
s1 = _mm_aesenc_si128(s1, rc0[rci + 5]); \
|
||||
s2 = _mm_aesenc_si128(s2, rc0[rci + 6]); \
|
||||
s3 = _mm_aesenc_si128(s3, rc0[rci + 7]); \
|
||||
|
||||
#define AES4_4x(s0, s1, s2, s3, rci) \
|
||||
AES4(s0[0], s0[1], s0[2], s0[3], rci); \
|
||||
AES4(s1[0], s1[1], s1[2], s1[3], rci); \
|
||||
@@ -109,6 +119,7 @@ void haraka256_4x(unsigned char *out, const unsigned char *in);
|
||||
void haraka256_8x(unsigned char *out, const unsigned char *in);
|
||||
|
||||
void haraka512(unsigned char *out, const unsigned char *in);
|
||||
void haraka512_zero(unsigned char *out, const unsigned char *in);
|
||||
void haraka512_4x(unsigned char *out, const unsigned char *in);
|
||||
void haraka512_8x(unsigned char *out, const unsigned char *in);
|
||||
|
||||
|
||||
@@ -53,6 +53,7 @@ static const unsigned char haraka_rc[40][16] = {
|
||||
};
|
||||
|
||||
static unsigned char rc[40][16];
|
||||
static unsigned char rc0[40][16];
|
||||
static unsigned char rc_sseed[40][16];
|
||||
|
||||
static const unsigned char sbox[256] =
|
||||
@@ -121,6 +122,12 @@ void unpackhi32(unsigned char *t, unsigned char *a, unsigned char *b)
|
||||
memcpy(t, tmp, 16);
|
||||
}
|
||||
|
||||
void load_constants_port()
|
||||
{
|
||||
/* Use the standard constants to generate tweaked ones. */
|
||||
memcpy(rc, haraka_rc, 40*16);
|
||||
}
|
||||
|
||||
void tweak_constants(const unsigned char *pk_seed, const unsigned char *sk_seed,
|
||||
unsigned long long seed_length)
|
||||
{
|
||||
@@ -258,6 +265,58 @@ void haraka512_port(unsigned char *out, const unsigned char *in)
|
||||
memcpy(out + 24, buf + 48, 8);
|
||||
}
|
||||
|
||||
void haraka512_perm_zero(unsigned char *out, const unsigned char *in)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
unsigned char s[64], tmp[16];
|
||||
|
||||
memcpy(s, in, 16);
|
||||
memcpy(s + 16, in + 16, 16);
|
||||
memcpy(s + 32, in + 32, 16);
|
||||
memcpy(s + 48, in + 48, 16);
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
// aes round(s)
|
||||
for (j = 0; j < 2; ++j) {
|
||||
aesenc(s, rc0[4*2*i + 4*j]);
|
||||
aesenc(s + 16, rc0[4*2*i + 4*j + 1]);
|
||||
aesenc(s + 32, rc0[4*2*i + 4*j + 2]);
|
||||
aesenc(s + 48, rc0[4*2*i + 4*j + 3]);
|
||||
}
|
||||
|
||||
// mixing
|
||||
unpacklo32(tmp, s, s + 16);
|
||||
unpackhi32(s, s, s + 16);
|
||||
unpacklo32(s + 16, s + 32, s + 48);
|
||||
unpackhi32(s + 32, s + 32, s + 48);
|
||||
unpacklo32(s + 48, s, s + 32);
|
||||
unpackhi32(s, s, s + 32);
|
||||
unpackhi32(s + 32, s + 16, tmp);
|
||||
unpacklo32(s + 16, s + 16, tmp);
|
||||
}
|
||||
|
||||
memcpy(out, s, 64);
|
||||
}
|
||||
|
||||
void haraka512_port_zero(unsigned char *out, const unsigned char *in)
|
||||
{
|
||||
int i;
|
||||
|
||||
unsigned char buf[64];
|
||||
|
||||
haraka512_perm_zero(buf, in);
|
||||
/* Feed-forward */
|
||||
for (i = 0; i < 64; i++) {
|
||||
buf[i] = buf[i] ^ in[i];
|
||||
}
|
||||
|
||||
/* Truncated */
|
||||
memcpy(out, buf + 8, 8);
|
||||
memcpy(out + 8, buf + 24, 8);
|
||||
memcpy(out + 16, buf + 32, 8);
|
||||
memcpy(out + 24, buf + 48, 8);
|
||||
}
|
||||
|
||||
void haraka256_port(unsigned char *out, const unsigned char *in)
|
||||
{
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
#ifndef SPX_HARAKA_H
|
||||
#define SPX_HARAKA_H
|
||||
|
||||
/* load constants */
|
||||
void load_constants_port();
|
||||
|
||||
/* Tweak constants with seed */
|
||||
void tweak_constants(const unsigned char *pk_seed, const unsigned char *sk_seed,
|
||||
unsigned long long seed_length);
|
||||
@@ -15,6 +18,12 @@ void haraka512_perm(unsigned char *out, const unsigned char *in);
|
||||
/* Implementation of Haraka-512 */
|
||||
void haraka512_port(unsigned char *out, const unsigned char *in);
|
||||
|
||||
/* Applies the 512-bit Haraka permutation to in, using zero key. */
|
||||
void haraka512_perm_zero(unsigned char *out, const unsigned char *in);
|
||||
|
||||
/* Implementation of Haraka-512, using zero key */
|
||||
void haraka512_port_zero(unsigned char *out, const unsigned char *in);
|
||||
|
||||
/* Implementation of Haraka-256 */
|
||||
void haraka256_port(unsigned char *out, const unsigned char *in);
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ bit output.
|
||||
#include "crypto/common.h"
|
||||
#include "crypto/verus_hash.h"
|
||||
|
||||
void (*CVerusHash::haraka512Function)(unsigned char *out, const unsigned char *in);
|
||||
|
||||
void CVerusHash::Hash(void *result, const void *data, size_t len)
|
||||
{
|
||||
unsigned char buf[128];
|
||||
@@ -36,7 +38,7 @@ void CVerusHash::Hash(void *result, const void *data, size_t len)
|
||||
memcpy(bufPtr + 32, ptr + pos, i);
|
||||
memset(bufPtr + 32 + i, 0, 32 - i);
|
||||
}
|
||||
haraka512(bufPtr2, bufPtr);
|
||||
(*haraka512Function)(bufPtr2, bufPtr);
|
||||
bufPtr2 = bufPtr;
|
||||
bufPtr += nextOffset;
|
||||
nextOffset *= -1;
|
||||
@@ -44,6 +46,18 @@ void CVerusHash::Hash(void *result, const void *data, size_t len)
|
||||
memcpy(result, bufPtr, 32);
|
||||
};
|
||||
|
||||
void CVerusHash::init()
|
||||
{
|
||||
if (IsCPUVerusOptimized())
|
||||
{
|
||||
haraka512Function = &haraka512_zero;
|
||||
}
|
||||
else
|
||||
{
|
||||
haraka512Function = &haraka512_port_zero;
|
||||
}
|
||||
}
|
||||
|
||||
CVerusHash &CVerusHash::Write(const unsigned char *data, size_t len)
|
||||
{
|
||||
unsigned char *tmp;
|
||||
@@ -56,7 +70,7 @@ CVerusHash &CVerusHash::Write(const unsigned char *data, size_t len)
|
||||
if (len - pos >= room)
|
||||
{
|
||||
memcpy(curBuf + 32 + curPos, data + pos, room);
|
||||
haraka512(result, curBuf);
|
||||
(*haraka512Function)(result, curBuf);
|
||||
tmp = curBuf;
|
||||
curBuf = result;
|
||||
result = tmp;
|
||||
@@ -73,7 +87,31 @@ CVerusHash &CVerusHash::Write(const unsigned char *data, size_t len)
|
||||
return *this;
|
||||
}
|
||||
|
||||
void CVerusHashPortable::Hash(void *result, const void *data, size_t len)
|
||||
// to be declared and accessed from C
|
||||
void verus_hash(void *result, const void *data, size_t len)
|
||||
{
|
||||
return CVerusHash::Hash(result, data, len);
|
||||
}
|
||||
|
||||
void (*CVerusHashV2::haraka512Function)(unsigned char *out, const unsigned char *in);
|
||||
|
||||
void CVerusHashV2::init()
|
||||
{
|
||||
// load and tweak the haraka constants
|
||||
load_constants();
|
||||
load_constants_port();
|
||||
|
||||
if (IsCPUVerusOptimized())
|
||||
{
|
||||
haraka512Function = &haraka512;
|
||||
}
|
||||
else
|
||||
{
|
||||
haraka512Function = &haraka512_port;
|
||||
}
|
||||
}
|
||||
|
||||
void CVerusHashV2::Hash(void *result, const void *data, size_t len)
|
||||
{
|
||||
unsigned char buf[128];
|
||||
unsigned char *bufPtr = buf;
|
||||
@@ -97,7 +135,7 @@ void CVerusHashPortable::Hash(void *result, const void *data, size_t len)
|
||||
memcpy(bufPtr + 32, ptr + pos, i);
|
||||
memset(bufPtr + 32 + i, 0, 32 - i);
|
||||
}
|
||||
haraka512_port(bufPtr2, bufPtr);
|
||||
(*haraka512Function)(bufPtr2, bufPtr);
|
||||
bufPtr2 = bufPtr;
|
||||
bufPtr += nextOffset;
|
||||
nextOffset *= -1;
|
||||
@@ -105,7 +143,7 @@ void CVerusHashPortable::Hash(void *result, const void *data, size_t len)
|
||||
memcpy(result, bufPtr, 32);
|
||||
};
|
||||
|
||||
CVerusHashPortable &CVerusHashPortable::Write(const unsigned char *data, size_t len)
|
||||
CVerusHashV2 &CVerusHashV2::Write(const unsigned char *data, size_t len)
|
||||
{
|
||||
unsigned char *tmp;
|
||||
|
||||
@@ -117,7 +155,7 @@ CVerusHashPortable &CVerusHashPortable::Write(const unsigned char *data, size_t
|
||||
if (len - pos >= room)
|
||||
{
|
||||
memcpy(curBuf + 32 + curPos, data + pos, room);
|
||||
haraka512_port(result, curBuf);
|
||||
(*haraka512Function)(result, curBuf);
|
||||
tmp = curBuf;
|
||||
curBuf = result;
|
||||
result = tmp;
|
||||
@@ -135,14 +173,7 @@ CVerusHashPortable &CVerusHashPortable::Write(const unsigned char *data, size_t
|
||||
}
|
||||
|
||||
// to be declared and accessed from C
|
||||
void verus_hash(void *result, const void *data, size_t len)
|
||||
void verus_hash_v2(void *result, const void *data, size_t len)
|
||||
{
|
||||
return CVerusHashPortable::Hash(result, data, len);
|
||||
return CVerusHashV2::Hash(result, data, len);
|
||||
}
|
||||
|
||||
// to be declared and accessed from C
|
||||
void verus_hash_optimized(void *result, const void *data, size_t len)
|
||||
{
|
||||
return CVerusHash::Hash(result, data, len);
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,9 @@ class CVerusHash
|
||||
{
|
||||
public:
|
||||
static void Hash(void *result, const void *data, size_t len);
|
||||
static void (*haraka512Function)(unsigned char *out, const unsigned char *in);
|
||||
|
||||
static void init();
|
||||
|
||||
CVerusHash() {}
|
||||
|
||||
@@ -36,12 +39,22 @@ class CVerusHash
|
||||
std::fill(buf1, buf1 + sizeof(buf1), 0);
|
||||
}
|
||||
|
||||
int64_t *ExtraI64Ptr() { return (int64_t *)(curBuf + 32); }
|
||||
void ClearExtra()
|
||||
{
|
||||
if (curPos)
|
||||
{
|
||||
std::fill(curBuf + 32 + curPos, curBuf + 64, 0);
|
||||
}
|
||||
}
|
||||
void ExtraHash(unsigned char hash[32]) { (*haraka512Function)(hash, curBuf); }
|
||||
|
||||
void Finalize(unsigned char hash[32])
|
||||
{
|
||||
if (curPos)
|
||||
{
|
||||
std::fill(curBuf + 32 + curPos, curBuf + 64, 0);
|
||||
haraka512(hash, curBuf);
|
||||
(*haraka512Function)(hash, curBuf);
|
||||
}
|
||||
else
|
||||
std::memcpy(hash, curBuf, 32);
|
||||
@@ -54,16 +67,19 @@ class CVerusHash
|
||||
size_t curPos = 0;
|
||||
};
|
||||
|
||||
class CVerusHashPortable
|
||||
class CVerusHashV2
|
||||
{
|
||||
public:
|
||||
static void Hash(void *result, const void *data, size_t len);
|
||||
static void (*haraka512Function)(unsigned char *out, const unsigned char *in);
|
||||
|
||||
CVerusHashPortable() {}
|
||||
static void init();
|
||||
|
||||
CVerusHashPortable &Write(const unsigned char *data, size_t len);
|
||||
CVerusHashV2() {}
|
||||
|
||||
CVerusHashPortable &Reset()
|
||||
CVerusHashV2 &Write(const unsigned char *data, size_t len);
|
||||
|
||||
CVerusHashV2 &Reset()
|
||||
{
|
||||
curBuf = buf1;
|
||||
result = buf2;
|
||||
@@ -71,12 +87,22 @@ class CVerusHashPortable
|
||||
std::fill(buf1, buf1 + sizeof(buf1), 0);
|
||||
}
|
||||
|
||||
int64_t *ExtraI64Ptr() { return (int64_t *)(curBuf + 32); }
|
||||
void ClearExtra()
|
||||
{
|
||||
if (curPos)
|
||||
{
|
||||
std::fill(curBuf + 32 + curPos, curBuf + 64, 0);
|
||||
}
|
||||
}
|
||||
void ExtraHash(unsigned char hash[32]) { (*haraka512Function)(hash, curBuf); }
|
||||
|
||||
void Finalize(unsigned char hash[32])
|
||||
{
|
||||
if (curPos)
|
||||
{
|
||||
std::fill(curBuf + 32 + curPos, curBuf + 64, 0);
|
||||
haraka512_port(hash, curBuf);
|
||||
(*haraka512Function)(hash, curBuf);
|
||||
}
|
||||
else
|
||||
std::memcpy(hash, curBuf, 32);
|
||||
@@ -90,6 +116,7 @@ class CVerusHashPortable
|
||||
};
|
||||
|
||||
extern void verus_hash(void *result, const void *data, size_t len);
|
||||
extern void verus_hash_v2(void *result, const void *data, size_t len);
|
||||
|
||||
inline bool IsCPUVerusOptimized()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user