diff --git a/.gitignore b/.gitignore
index b2010491e..9fc50df80 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,7 +31,6 @@ libtool
src/config/bitcoin-config.h
src/config/bitcoin-config.h.in
src/config/stamp-h1
-share/setup.nsi
cache/
venv-mnf/
diff --git a/AUTHORS b/AUTHORS
index d2d49e2e0..20bd581e2 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -2,6 +2,7 @@
Duke Leto https://git.hush.is/duke https://github.com/leto
Miodrag https://github.com/miodragpop
+jahway603 https://git.hush.is/jahway603 https://github.com/jahway603
# The SuperNET Developers
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3a2b1e08a..5866ab2e3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -37,7 +37,7 @@ Before we get started, here are a few things we expect from you (and that you sh
## How to contribute
-If you'd like to contribute, start by searching through the [issues](https://github.com/MyHush/hush3/issues) and [pull requests](https://github.com/MyHush/hush3/pulls) to see whether someone else has raised a similar idea or question.
+If you'd like to contribute, start by searching through the [issues](https://git.hush.is/hush/hush3/issues) and [pull requests](https://git.hush.is/hush/hush3/pulls) to see whether someone else has raised a similar idea or question.
If you don't see your idea listed, and you think it can contribute to Hush, do one of the following:
* **If your contribution is minor,** such as a fixing a typo, open a pull request.
@@ -49,7 +49,7 @@ Don't write shitty code. Do not emulate "jl777 code style" from Komodo, we consi
## Setting up your environment
-The Hush Core (hushd) is mainly written in C++ with specific modules written in C. Follow the [Install](https://github.com/MyHush/hush3/blob/master/INSTALL.md) instructions to build hushd from sources. For more informations about the Hush Platform and a full API documentation please visit the official [Hush Developer documentation](https://gilardh.github.io/dev-website/developers/)
+The Hush Core (hushd) is mainly written in C++ with specific modules written in C. Follow the [Install](https://git.hush.is/hush/hush3/src/branch/master/INSTALL.md) instructions to build hushd from sources. For more informations about the Hush Platform and a full API documentation please visit the official [Hush Developer documentation](https://faq.hush.is/rpc/)
Other Hush software is written in Rust or Go. We avoid Javascript at all costs.
diff --git a/DEVELOPING.md b/DEVELOPING.md
index 0aae790f5..1de87fcc6 100644
--- a/DEVELOPING.md
+++ b/DEVELOPING.md
@@ -78,13 +78,13 @@ of a dependency or something inside of Rust, you will need `build.sh` .
Make sure that you have updated all version numbers in hushd and compiled, then
to generate new unix man pages for that version :
- ./contrib/devtools/gen-manpages.sh
+ ./util/gen-manpages.sh
## Generating new debian packages
After successfully compiling Hush, you can generate a debian package of these binaries with:
- ./zcutil/build-debian-package.sh
+ ./util/build-debian-package.sh
This command will not work on Mac OS X. Currently you cannot generate a Debian package
from operating systems other than Linux. Oh well.
diff --git a/Dockerfile b/Dockerfile
index e1323ec2f..7e639b16f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
FROM ubuntu:16.04
@@ -26,7 +26,7 @@ RUN ln -sf /hush/src/hushd /usr/bin/hushd && \
ln -sf /hush/src/hush-tx /usr/bin/hush-tx && \
ln -sf /hush/src/wallet-utility /usr/bin/hush-wallet-utility && \
ln -sf /hush/src/hush-smart-chain /usr/bin/hush-smart-chain && \
- ln -sf /hush/zcutil/docker-entrypoint.sh /usr/bin/entrypoint && \
- ln -sf /hush/zcutil/docker-hush-cli.sh /usr/bin/hush-cli
+ ln -sf /hush/util/docker-entrypoint.sh /usr/bin/entrypoint && \
+ ln -sf /hush/util/docker-hush-cli.sh /usr/bin/hush-cli
CMD ["entrypoint"]
diff --git a/INSTALL.md b/INSTALL.md
index eba94faa0..ad81f40b9 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -25,12 +25,13 @@ sudo swapon /swapfile
# install build dependencies
sudo apt-get install build-essential pkg-config libc6-dev m4 g++-multilib \
autoconf libtool ncurses-dev unzip git python zlib1g-dev wget \
- bsdmainutils automake curl unzip nano libsodium-dev
-# pull
+ bsdmainutils automake curl unzip nano libsodium-dev cmake
+# clone git repo
git clone https://git.hush.is/hush/hush3
cd hush3
# Build
-./build.sh -j$(nproc)
+# This uses 3 build processes, you need 2GB of RAM for each.
+./build.sh -j3
```
### Building On Ubuntu 16.04 and older systems
@@ -45,6 +46,23 @@ apt-get install -y gcc-7 g++-7 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 60
```
+### Build on mac
+
+These instructions are a work in progress. Please report issues to https://hush.is/tg_support
+
+```
+sudo port update
+sudo port upgrade outdated
+sudo port install qt5
+
+# clone git repo
+git clone https://git.hush.is/hush/hush3
+cd hush3
+# Build
+# This uses 3 build processes, you need 2GB of RAM for each.
+./build.sh -j3
+```
+
## Run a HUSH Node
After you have compiled Hush, then you can run it with the following command:
@@ -80,5 +98,5 @@ Currently, any ARMv7 machine will not be able to build this repo, because the
underlying tech (zcash and the zksnark library) do not support that instruction
set.
-This also means that RaspberryPi devices will not work, unless they have a
-newer ARMv8-based Raspberry Pi.
+This also means that old RaspberryPi devices will not work, unless they have a
+newer ARMv8-based Raspberry Pi. Raspberry Pi 4 and newer are known to work.
diff --git a/Makefile.am b/Makefile.am
index bc04a0cdd..48c498853 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,4 +1,4 @@
-# Copyright 2016-2021 The Hush developers
+# Copyright 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
@@ -24,8 +24,8 @@ BITCOIN_WIN_INSTALLER=$(PACKAGE)-$(PACKAGE_VERSION)-win$(WINDOWS_BITS)-setup$(EX
DIST_DOCS = $(wildcard doc/*.md) $(wildcard doc/release-notes/*.md)
-BIN_CHECKS=$(top_srcdir)/contrib/devtools/symbol-check.py \
- $(top_srcdir)/contrib/devtools/security-check.py
+BIN_CHECKS=$(top_srcdir)/util/symbol-check.py \
+ $(top_srcdir)/util/security-check.py
@@ -233,7 +233,7 @@ endif
endif
dist_bin_SCRIPTS =
-dist_noinst_SCRIPTS = autogen.sh zcutil/build-debian-package.sh zcutil/build.sh
+dist_noinst_SCRIPTS = autogen.sh util/build-debian-package.sh util/build.sh
EXTRA_DIST = $(top_srcdir)/share/genbuild.sh qa/pull-tester/rpc-tests.sh qa/pull-tester/run-bitcoin-cli qa/rpc-tests qa/hush $(DIST_DOCS) $(BIN_CHECKS)
@@ -247,4 +247,8 @@ DISTCHECK_CONFIGURE_FLAGS = --enable-man
clean-local:
rm -rf test_bitcoin.coverage/ total.coverage/
+manpages:
+ ./util/gen-manpages.sh
+ @echo "Please review the man pages changes to see if they look correct, then commit and push"
+
# I also heard about a bug in the UNIVAC but it's too big for this comment. -- Duke
diff --git a/autogen.sh b/autogen.sh
index 6932fae53..2aa9ad130 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
set -e
diff --git a/build.sh b/build.sh
index 864a20548..1a2d1c8a0 100755
--- a/build.sh
+++ b/build.sh
@@ -1,7 +1,7 @@
-#!/bin/bash
-# Copyright (c) 2016-2021 The Hush developers
+#!/usr/bin/env bash
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
set -eu -o pipefail
-./zcutil/build.sh --disable-tests $@
+./util/build.sh --disable-tests $@
diff --git a/configure.ac b/configure.ac
index 794c486c0..e863c6384 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,7 +2,7 @@ dnl require autoconf 2.60 (AS_ECHO/AS_ECHO_N)
AC_PREREQ([2.60])
define(_CLIENT_VERSION_MAJOR, 3)
define(_CLIENT_VERSION_MINOR, 9)
-define(_CLIENT_VERSION_REVISION, 1)
+define(_CLIENT_VERSION_REVISION, 2)
define(_CLIENT_VERSION_BUILD, 50)
define(_ZC_BUILD_VAL, m4_if(m4_eval(_CLIENT_VERSION_BUILD < 25), 1, m4_incr(_CLIENT_VERSION_BUILD), m4_eval(_CLIENT_VERSION_BUILD < 50), 1, m4_eval(_CLIENT_VERSION_BUILD - 24), m4_eval(_CLIENT_VERSION_BUILD == 50), 1, , m4_eval(_CLIENT_VERSION_BUILD - 50)))
define(_CLIENT_VERSION_SUFFIX, m4_if(m4_eval(_CLIENT_VERSION_BUILD < 25), 1, _CLIENT_VERSION_REVISION-beta$1, m4_eval(_CLIENT_VERSION_BUILD < 50), 1, _CLIENT_VERSION_REVISION-rc$1, m4_eval(_CLIENT_VERSION_BUILD == 50), 1, _CLIENT_VERSION_REVISION, _CLIENT_VERSION_REVISION-$1)))
@@ -151,12 +151,6 @@ AC_ARG_ENABLE([glibc-back-compat],
[use_glibc_compat=$enableval],
[use_glibc_compat=no])
-AC_ARG_ENABLE([zmq],
- [AS_HELP_STRING([--disable-zmq],
- [disable ZMQ notifications])],
- [use_zmq=$enableval],
- [use_zmq=yes])
-
AC_ARG_ENABLE([experimental-asm],
[AS_HELP_STRING([--enable-experimental-asm],
[Enable experimental assembly routines (default is no)])],
@@ -673,15 +667,6 @@ if test x$use_pkgconfig = xyes; then
fi
fi
- if test "x$use_zmq" = "xyes"; then
- PKG_CHECK_MODULES([ZMQ],[libzmq >= 4],
- [AC_DEFINE([ENABLE_ZMQ],[1],[Define to 1 to enable ZMQ functions])],
- [AC_DEFINE([ENABLE_ZMQ],[0],[Define to 1 to enable ZMQ functions])
- AC_MSG_WARN([libzmq version 4.x or greater not found, disabling])
- use_zmq=no])
- else
- AC_DEFINE_UNQUOTED([ENABLE_ZMQ],[0],[Define to 1 to enable ZMQ functions])
- fi
]
)
else
@@ -718,28 +703,7 @@ fi
#AC_CHECK_HEADER([gmpxx.h],,AC_MSG_ERROR(libgmpxx headers missing))
#AC_CHECK_LIB([gmpxx],[main],GMPXX_LIBS=-lgmpxx, [AC_MSG_ERROR(libgmpxx missing)])
- if test "x$use_zmq" = "xyes"; then
- AC_CHECK_HEADER([zmq.h],
- [AC_DEFINE([ENABLE_ZMQ],[1],[Define to 1 to enable ZMQ functions])],
- [AC_MSG_WARN([zmq.h not found, disabling zmq support])
- use_zmq=no
- AC_DEFINE([ENABLE_ZMQ],[0],[Define to 1 to enable ZMQ functions])])
- AC_CHECK_LIB([zmq],[zmq_ctx_shutdown],ZMQ_LIBS=-lzmq,
- [AC_MSG_WARN([libzmq >= 4.0 not found, disabling zmq support])
- use_zmq=no
- AC_DEFINE([ENABLE_ZMQ],[0],[Define to 1 to enable ZMQ functions])])
- else
- AC_DEFINE_UNQUOTED([ENABLE_ZMQ],[0],[Define to 1 to enable ZMQ functions])
- fi
- if test "x$use_zmq" = "xyes"; then
- dnl Assume libzmq was built for static linking
- case $host in
- *mingw*)
- ZMQ_CFLAGS="$ZMQ_CFLAGS -DZMQ_STATIC"
- ;;
- esac
- fi
fi
# These packages don't provide pkgconfig config files across all
@@ -824,8 +788,6 @@ else
AC_MSG_RESULT(no)
fi
-AM_CONDITIONAL([ENABLE_ZMQ], [test "x$use_zmq" = "xyes"])
-
AC_MSG_CHECKING([whether to build test_bitcoin])
if test x$use_tests = xyes; then
AC_MSG_RESULT([yes])
@@ -894,7 +856,6 @@ AC_SUBST(CRYPTO_LIBS)
AC_SUBST(SSL_LIBS)
AC_SUBST(EVENT_LIBS)
AC_SUBST(EVENT_PTHREADS_LIBS)
-AC_SUBST(ZMQ_LIBS)
AC_SUBST(GMP_LIBS)
AC_SUBST(GMPXX_LIBS)
AC_SUBST(LIBZCASH_LIBS)
@@ -944,7 +905,6 @@ esac
echo
echo "Options used to compile and link:"
echo " with wallet = $enable_wallet"
-echo " with zmq = $use_zmq"
echo " with test = $use_tests"
echo " debug enabled = $enable_debug"
echo " werror = $enable_werror"
diff --git a/contrib/README.md b/contrib/README.md
index 916e4d094..36c04c969 100644
--- a/contrib/README.md
+++ b/contrib/README.md
@@ -6,10 +6,25 @@ Do not expect all scripts to work!
Please fix bugs and report things you find.
-## Hush Tools
+# Hush Tools
+
+## block\_time.pl
+
+Estimate when a Hush block will happen.
+
+Example:
+
+ ./contrib/block_time.pl 123456 # Print out datetime of when block height 123456 happens
+
+## gen-zaddrs.pl
+
+Generate zaddrs in bulk, by default 50 at a time. Prints out a zaddr one per line.
+
+Example:
+
+ ./contrib/gen-zaddrs.pl # generate 50 zaddrs
+ ./contrib/gen-zaddrs.pl 500 # generate 500 zaddrs
-checkpoints.pl - generate checkpoint data for main.cpp
-block\_time.pl - estimate when a Hush block will happen
## Wallet Tools
diff --git a/contrib/block_time.pl b/contrib/block_time.pl
index 1b8c13351..9e980ea29 100755
--- a/contrib/block_time.pl
+++ b/contrib/block_time.pl
@@ -1,5 +1,5 @@
-#!/usr/bin/perl
-# Copyright (c) 2016-2021 The Hush developers
+#!/usr/bin/env perl
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
use warnings;
@@ -8,6 +8,9 @@ use strict;
# Given a block time, estimate when it will happen
my $block = shift || die "Usage: $0 123";
my $hush = "./src/hush-cli";
+unless (-e $hush) {
+ die "$hush does not exist, aborting";
+}
my $blockcount = qx{$hush getblockcount};
unless ($blockcount = int($blockcount)) {
@@ -19,7 +22,7 @@ if ($block <= $blockcount) {
die "That block has already happened!";
} else {
my $diff = $block - $blockcount;
- my $minutes = $diff*2.5;
+ my $minutes = $diff*1.25; # 75s in minutes
my $seconds = $minutes*60;
my $now = time;
my $then = $now + $seconds;
diff --git a/contrib/debian/control b/contrib/debian/control
index 4b542452a..8526dd6b4 100644
--- a/contrib/debian/control
+++ b/contrib/debian/control
@@ -11,6 +11,6 @@ Vcs-Git: https://git.hush.is/hush/hush3.git
Vcs-Browser: https://git.hush.is/hush/hush3
Package: hush
-Architecture: amd64
+Architecture: amd64 arm64
Depends: ${shlibs:Depends}
Description: Hush cryptocoin full node. Speak And Transact Freely. Hush inherits from Bitcoin Protocol and Zcash Protocol and is focused on private communications.
diff --git a/contrib/debian/copyright b/contrib/debian/copyright
index f2d71c287..6231c2023 100644
--- a/contrib/debian/copyright
+++ b/contrib/debian/copyright
@@ -2,7 +2,7 @@ Files: *
Copyright: 2016-2020, The Hush developers
2009-2016, Bitcoin Core developers
License: GPLv3
-Comment: https://hush.is/developers
+Comment: https://hush.is
Files: depends/sources/libsodium-*.tar.gz
Copyright: 2013-2016 Frank Denis
@@ -23,19 +23,6 @@ Copyright: 1990, 2016 Oracle and/or its affiliates;
2000-2005 INRIA, France Telecom
License: BDB
-Files: depends/sources/zeromq-*.tar.gz
-Copyright:
- 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
- 2007-2014 iMatix Corporation
- 2009-2011 250bpm s.r.o.
- 2010-2011 Miru Limited
- 2011 VMware, Inc.
- 2012 Spotify AB
- 2013 Ericsson AB
- 2014 AppDynamics Inc.
- 2015-2016 Brocade Communications Systems Inc.
-License: LGPL-with-ZeroMQ-exception
-
Files: depends/sources/google*.tar.gz
Copyright: 2008 Google Inc.
License: BSD-3clause-Google
@@ -1056,35 +1043,6 @@ Comment:
You should have received a copy of the GNU General Public License
along with this program. If not, see .
-License: LGPL-with-ZeroMQ-exception
- GNU LESSER GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
- .
- On Debian systems the GNU Lesser General Public License (LGPL) is
- located in '/usr/share/common-licenses/LGPL'.
- .
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- --------------------------------------------------------------------------------
- SPECIAL EXCEPTION GRANTED BY COPYRIGHT HOLDERS
- .
- As a special exception, copyright holders give you permission to link this
- library with independent modules to produce an executable, regardless of
- the license terms of these independent modules, and to copy and distribute
- the resulting executable under terms of your choice, provided that you also
- meet, for each linked independent module, the terms and conditions of
- the license of that module. An independent module is a module which is not
- derived from or based on this library. If you modify this library, you must
- extend this exception to your version of the library.
-
- Note: this exception relieves you of any obligations under sections 4 and 5
- of this license, and section 6 of the GNU General Public License.
-Comment:
- You should have received a copy of the GNU General Public License
- along with this program. If not, see .
-
License: GNU-All-permissive-License
Copying and distribution of this file, with or without modification, are
permitted in any medium without royalty provided the copyright notice
diff --git a/contrib/devtools/README.md b/contrib/devtools/README.md
deleted file mode 100644
index fa03b6cff..000000000
--- a/contrib/devtools/README.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# Contrib Devtools
-
-This directory contains tools for developers working on this repository.
-
-## security-check.py
-
-Perform basic ELF security checks on a series of executables.
-
-## symbol-check.py
-
-A script to check that the (Linux) executables produced by gitian only contain
-allowed gcc, glibc and libstdc++ version symbols. This makes sure they are
-still compatible with the minimum supported Linux distribution versions.
-
-Example usage after a gitian build:
-
- find ../gitian-builder/build -type f -executable | xargs python contrib/devtools/symbol-check.py
-
-If only supported symbols are used the return value will be 0 and the output will be empty.
-
-If there are 'unsupported' symbols, the return value will be 1 a list like this will be printed:
-
- .../64/test_bitcoin: symbol memcpy from unsupported version GLIBC_2.14
- .../64/test_bitcoin: symbol __fdelt_chk from unsupported version GLIBC_2.15
- .../64/test_bitcoin: symbol std::out_of_range::~out_of_range() from unsupported version GLIBCXX_3.4.15
- .../64/test_bitcoin: symbol _ZNSt8__detail15_List_nod from unsupported version GLIBCXX_3.4.15
-
-## update-translations.py
-
-Run this script from the root of the repository to update all translations from transifex.
-It will do the following automatically:
-
-- fetch all translations
-- post-process them into valid and committable format
-- add missing translations to the build system (TODO)
-
-See doc/translation-process.md for more information.
-
-## gen-manpages.sh
-
-A small script to automatically create manpages in ../../doc/man by running the release binaries with the -help option.
-This requires help2man which can be found at: https://www.gnu.org/software/help2man/
diff --git a/contrib/devtools/optimize-pngs.py b/contrib/devtools/optimize-pngs.py
deleted file mode 100755
index 38aaa00f3..000000000
--- a/contrib/devtools/optimize-pngs.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-import subprocess
-import hashlib
-from PIL import Image
-
-def file_hash(filename):
- '''Return hash of raw file contents'''
- with open(filename, 'rb') as f:
- return hashlib.sha256(f.read()).hexdigest()
-
-def content_hash(filename):
- '''Return hash of RGBA contents of image'''
- i = Image.open(filename)
- i = i.convert('RGBA')
- data = i.tostring()
- return hashlib.sha256(data).hexdigest()
-
-#optimize png, remove various color profiles, remove ancillary chunks (alla) and text chunks (text)
-#pngcrush -brute -ow -rem gAMA -rem cHRM -rem iCCP -rem sRGB -rem alla -rem text
-
-pngcrush = 'pngcrush'
-git = 'git'
-folders = ["src/qt/res/movies", "src/qt/res/icons", "src/qt/res/images"]
-basePath = subprocess.check_output([git, 'rev-parse', '--show-toplevel']).rstrip('\n')
-totalSaveBytes = 0
-
-outputArray = []
-for folder in folders:
- absFolder=os.path.join(basePath, folder)
- for file in os.listdir(absFolder):
- extension = os.path.splitext(file)[1]
- if extension.lower() == '.png':
- print("optimizing "+file+"..."),
- file_path = os.path.join(absFolder, file)
- fileMetaMap = {'file' : file, 'osize': os.path.getsize(file_path), 'sha256Old' : file_hash(file_path)};
- fileMetaMap['contentHashPre'] = content_hash(file_path)
-
- pngCrushOutput = ""
- try:
- pngCrushOutput = subprocess.check_output(
- [pngcrush, "-brute", "-ow", "-rem", "gAMA", "-rem", "cHRM", "-rem", "iCCP", "-rem", "sRGB", "-rem", "alla", "-rem", "text", file_path],
- stderr=subprocess.STDOUT).rstrip('\n')
- except:
- print "pngcrush is not installed, aborting..."
- sys.exit(0)
-
- #verify
- if "Not a PNG file" in subprocess.check_output([pngcrush, "-n", "-v", file_path], stderr=subprocess.STDOUT):
- print "PNG file "+file+" is corrupted after crushing, check out pngcursh version"
- sys.exit(1)
-
- fileMetaMap['sha256New'] = file_hash(file_path)
- fileMetaMap['contentHashPost'] = content_hash(file_path)
-
- if fileMetaMap['contentHashPre'] != fileMetaMap['contentHashPost']:
- print "Image contents of PNG file "+file+" before and after crushing don't match"
- sys.exit(1)
-
- fileMetaMap['psize'] = os.path.getsize(file_path)
- outputArray.append(fileMetaMap)
- print("done\n"),
-
-print "summary:\n+++++++++++++++++"
-for fileDict in outputArray:
- oldHash = fileDict['sha256Old']
- newHash = fileDict['sha256New']
- totalSaveBytes += fileDict['osize'] - fileDict['psize']
- print fileDict['file']+"\n size diff from: "+str(fileDict['osize'])+" to: "+str(fileDict['psize'])+"\n old sha256: "+oldHash+"\n new sha256: "+newHash+"\n"
-
-print "completed. Total reduction: "+str(totalSaveBytes)+" bytes"
diff --git a/contrib/fresh_clone_compile_and_run.sh b/contrib/fresh_clone_compile_and_run.sh
old mode 100644
new mode 100755
index 54fc69ed6..a895f3cc3
--- a/contrib/fresh_clone_compile_and_run.sh
+++ b/contrib/fresh_clone_compile_and_run.sh
@@ -1,19 +1,28 @@
-#!/bin/bash
-# Copyright (c) 2016-2021 The Hush developers
+#!/usr/bin/env bash
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
-# OPEN BOUNTY
-# Send your CPU and RAM and time it takes to sync a HUSH full node for
-# 5 HUSH as thanks for decentralized performance testing
+
+# Usage: $0 BRANCH JOBS
+# TODO: default to master branch with 2 jobs
+
+# we don't want this for our build.sh and make commands
+#set -eu -o pipefail
+
+BRANCH=$1
+
git clone https://git.hush.is/hush/hush3
cd hush3
-# Build with 2 cores
+git checkout $BRANCH
# You need 2GB of RAM per core, don't use too many
# (GB of RAM)/2 - 1 is the optimal core count for compiling Hush
# `nproc` tells you how many cores you have
-JOBS=2
+JOBS=$2
+JOBZ=$(nproc) # if build.sh fails, we can use many more jobs with make
# Want to fix this parrallel-only build system bug we inherited ? you are a new hush dev
-./build.sh -j$JOBS;make -j$JOBS;make -j$JOBS;make -j$JOBS
+# Sometimes the parrallel build fails because of a race condition, so
+# we do it a few times to Make Really Sure
+./build.sh -j$JOBS;make -j$JOBZ;make -j$JOBZ;make -j$JOBZ
./src/hushd &> hush.log &
# You can give the entire or parts of this file to Hush developers for debugging,
# but there is a lot of metadata!!! We don't want any more than we need to fix bugz
diff --git a/contrib/gen-zaddrs.pl b/contrib/gen-zaddrs.pl
new file mode 100755
index 000000000..ccf8e8e78
--- /dev/null
+++ b/contrib/gen-zaddrs.pl
@@ -0,0 +1,17 @@
+#!/usr/bin/env perl
+# Copyright 2019-2022 The Hush developers
+# Released under the GPLv3
+use warnings;
+use strict;
+
+my $hush = "./src/hush-cli";
+my $znew = "$hush z_getnewaddress";
+my $count = 1;
+my $howmany = shift || 50;
+
+while ($count < $howmany) {
+ my $zaddr = qx{$znew};
+ chomp($zaddr);
+ print qq{$zaddr\n};
+ $count++;
+}
diff --git a/contrib/gitian-descriptors/gitian-linux.yml b/contrib/gitian-descriptors/gitian-linux.yml
index 317bd8694..da8be060b 100644
--- a/contrib/gitian-descriptors/gitian-linux.yml
+++ b/contrib/gitian-descriptors/gitian-linux.yml
@@ -53,7 +53,7 @@ script: |
function create_global_faketime_wrappers {
for prog in ${FAKETIME_PROGS}; do
- echo '#!/bin/bash' > ${WRAP_DIR}/${prog}
+ echo '#!/usr/bin/env bash' > ${WRAP_DIR}/${prog}
echo "REAL=\`which -a ${prog} | grep -v ${WRAP_DIR}/${prog} | head -1\`" >> ${WRAP_DIR}/${prog}
echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/faketime/libfaketime.so.1' >> ${WRAP_DIR}/${prog}
echo "export FAKETIME=\"$1\"" >> ${WRAP_DIR}/${prog}
@@ -65,7 +65,7 @@ script: |
function create_per-host_faketime_wrappers {
for i in $HOSTS; do
for prog in ${FAKETIME_HOST_PROGS}; do
- echo '#!/bin/bash' > ${WRAP_DIR}/${i}-${prog}
+ echo '#!/usr/bin/env bash' > ${WRAP_DIR}/${i}-${prog}
echo "REAL=\`which -a ${i}-${prog} | grep -v ${WRAP_DIR}/${i}-${prog} | head -1\`" >> ${WRAP_DIR}/${i}-${prog}
echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/faketime/libfaketime.so.1' >> ${WRAP_DIR}/${i}-${prog}
echo "export FAKETIME=\"$1\"" >> ${WRAP_DIR}/${i}-${prog}
diff --git a/contrib/gitian-descriptors/gitian-osx-signer.yml b/contrib/gitian-descriptors/gitian-osx-signer.yml
index b00eb2fb9..73a2f12f3 100644
--- a/contrib/gitian-descriptors/gitian-osx-signer.yml
+++ b/contrib/gitian-descriptors/gitian-osx-signer.yml
@@ -20,7 +20,7 @@ script: |
# Create global faketime wrappers
for prog in ${FAKETIME_PROGS}; do
- echo '#!/bin/bash' > ${WRAP_DIR}/${prog}
+ echo '#!/usr/bin/env bash' > ${WRAP_DIR}/${prog}
echo "REAL=\`which -a ${prog} | grep -v ${WRAP_DIR}/${prog} | head -1\`" >> ${WRAP_DIR}/${prog}
echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/faketime/libfaketime.so.1' >> ${WRAP_DIR}/${prog}
echo "export FAKETIME=\"${REFERENCE_DATETIME}\"" >> ${WRAP_DIR}/${prog}
diff --git a/contrib/gitian-descriptors/gitian-osx.yml b/contrib/gitian-descriptors/gitian-osx.yml
index 794a82af8..4d5a8736f 100644
--- a/contrib/gitian-descriptors/gitian-osx.yml
+++ b/contrib/gitian-descriptors/gitian-osx.yml
@@ -48,7 +48,7 @@ script: |
function create_global_faketime_wrappers {
for prog in ${FAKETIME_PROGS}; do
- echo '#!/bin/bash' > ${WRAP_DIR}/${prog}
+ echo '#!/usr/bin/env bash' > ${WRAP_DIR}/${prog}
echo "REAL=\`which -a ${prog} | grep -v ${WRAP_DIR}/${prog} | head -1\`" >> ${WRAP_DIR}/${prog}
echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/faketime/libfaketime.so.1' >> ${WRAP_DIR}/${prog}
echo "export FAKETIME=\"$1\"" >> ${WRAP_DIR}/${prog}
@@ -60,7 +60,7 @@ script: |
function create_per-host_faketime_wrappers {
for i in $HOSTS; do
for prog in ${FAKETIME_HOST_PROGS}; do
- echo '#!/bin/bash' > ${WRAP_DIR}/${i}-${prog}
+ echo '#!/usr/bin/env bash' > ${WRAP_DIR}/${i}-${prog}
echo "REAL=\`which -a ${i}-${prog} | grep -v ${WRAP_DIR}/${i}-${prog} | head -1\`" >> ${WRAP_DIR}/${i}-${prog}
echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/faketime/libfaketime.so.1' >> ${WRAP_DIR}/${i}-${prog}
echo "export FAKETIME=\"$1\"" >> ${WRAP_DIR}/${i}-${prog}
diff --git a/contrib/gitian-descriptors/gitian-win.yml b/contrib/gitian-descriptors/gitian-win.yml
index 44fc4e356..cab6187fa 100644
--- a/contrib/gitian-descriptors/gitian-win.yml
+++ b/contrib/gitian-descriptors/gitian-win.yml
@@ -47,7 +47,7 @@ script: |
function create_global_faketime_wrappers {
for prog in ${FAKETIME_PROGS}; do
- echo '#!/bin/bash' > ${WRAP_DIR}/${prog}
+ echo '#!/usr/bin/env bash' > ${WRAP_DIR}/${prog}
echo "REAL=\`which -a ${prog} | grep -v ${WRAP_DIR}/${prog} | head -1\`" >> ${WRAP_DIR}/${prog}
echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/faketime/libfaketime.so.1' >> ${WRAP_DIR}/${prog}
echo "export FAKETIME=\"$1\"" >> ${WRAP_DIR}/${prog}
@@ -59,7 +59,7 @@ script: |
function create_per-host_faketime_wrappers {
for i in $HOSTS; do
for prog in ${FAKETIME_HOST_PROGS}; do
- echo '#!/bin/bash' > ${WRAP_DIR}/${i}-${prog}
+ echo '#!/usr/bin/env bash' > ${WRAP_DIR}/${i}-${prog}
echo "REAL=\`which -a ${i}-${prog} | grep -v ${WRAP_DIR}/${i}-${prog} | head -1\`" >> ${WRAP_DIR}/${i}-${prog}
echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/faketime/libfaketime.so.1' >> ${WRAP_DIR}/${i}-${prog}
echo "export FAKETIME=\"$1\"" >> ${WRAP_DIR}/${i}-${prog}
@@ -75,14 +75,14 @@ script: |
for i in $HOSTS; do
mkdir -p ${WRAP_DIR}/${i}
for prog in collect2; do
- echo '#!/bin/bash' > ${WRAP_DIR}/${i}/${prog}
+ echo '#!/usr/bin/env bash' > ${WRAP_DIR}/${i}/${prog}
REAL=$(${i}-gcc -print-prog-name=${prog})
echo "export MALLOC_PERTURB_=255" >> ${WRAP_DIR}/${i}/${prog}
echo "${REAL} \$@" >> $WRAP_DIR/${i}/${prog}
chmod +x ${WRAP_DIR}/${i}/${prog}
done
for prog in gcc g++; do
- echo '#!/bin/bash' > ${WRAP_DIR}/${i}-${prog}
+ echo '#!/usr/bin/env bash' > ${WRAP_DIR}/${i}-${prog}
echo "REAL=\`which -a ${i}-${prog} | grep -v ${WRAP_DIR}/${i}-${prog} | head -1\`" >> ${WRAP_DIR}/${i}-${prog}
echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/faketime/libfaketime.so.1' >> ${WRAP_DIR}/${i}-${prog}
echo "export FAKETIME=\"$1\"" >> ${WRAP_DIR}/${i}-${prog}
diff --git a/contrib/hush_halvings b/contrib/hush_halvings
index 80bb24b04..cfb899ced 100755
--- a/contrib/hush_halvings
+++ b/contrib/hush_halvings
@@ -1,5 +1,5 @@
#!/usr/bin/env perl
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Released under the GPLv3
use strict;
diff --git a/contrib/hush_scanner b/contrib/hush_scanner
index 6ec4300b9..af4ee4144 100755
--- a/contrib/hush_scanner
+++ b/contrib/hush_scanner
@@ -1,5 +1,5 @@
#!/usr/bin/env perl
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
use strict;
diff --git a/contrib/hush_supply b/contrib/hush_supply
index e12770f84..ef78fef3c 100755
--- a/contrib/hush_supply
+++ b/contrib/hush_supply
@@ -1,5 +1,5 @@
#!/usr/bin/env perl
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Released under the GPLv3
use warnings;
use strict;
diff --git a/contrib/init/hushd.init b/contrib/init/hushd.init
index 225a5ea2a..370aaf101 100644
--- a/contrib/init/hushd.init
+++ b/contrib/init/hushd.init
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# hushd The hush core server.
#
diff --git a/contrib/sdl_checkpoints.pl b/contrib/sdl_checkpoints.pl
new file mode 100755
index 000000000..e140fb0ec
--- /dev/null
+++ b/contrib/sdl_checkpoints.pl
@@ -0,0 +1,44 @@
+#!/usr/bin/perl
+# Copyright (c) 2016-2022 The Hush developers
+# Distributed under the GPLv3 software license, see the accompanying
+# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
+
+# This script is used to generate the data used by the silentdragonlite-cli checkpoints.rs file
+# https://git.hush.is/hush/silentdragonlite-cli/src/branch/master/lib/src/lightclient/checkpoints.rs#L24
+
+use warnings;
+use strict;
+my $hush = "./src/hush-cli";
+my $gethash = "$hush getblockhash";
+my $gettree = "$hush getblockmerkletree";
+my $start = shift || 300000;
+my $end = shift || 840000;
+my $stride = shift || 10000;
+
+my $blocks = qx{$hush getblockcount};
+if($?) {
+ print "ERROR, is hushd running? exiting...\n";
+ exit 1;
+}
+
+if ($end > $blocks) {
+ print "The block $end is beyond how many blocks this node knows about, exiting...\n";
+ exit 1;
+}
+
+if ($start < 1) {
+ print "Invalid start block $start, exiting...\n";
+ exit 1;
+}
+
+my $block = $start;
+while (1) {
+ last if $block > $end;
+ my $blockhash = qx{$gethash $block};
+ my $merkle = qx{$gettree $block};
+ chomp $merkle;
+ chomp $blockhash;
+ print qq{($block,"$blockhash",\n\t"$merkle"\n),\n};
+
+ $block += $stride;
+}
diff --git a/contrib/snapshot/airdrop_hush3.sh b/contrib/snapshot/airdrop_hush3.sh
index 87a68369c..c3ee8b700 100755
--- a/contrib/snapshot/airdrop_hush3.sh
+++ b/contrib/snapshot/airdrop_hush3.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This code inspired by and dedicated to Decker
echo "Airdropping funds to HUSH3, hold onto your butts..."
diff --git a/contrib/snapshot/testnet_airdrop_hush3.sh b/contrib/snapshot/testnet_airdrop_hush3.sh
index fd20926d4..33dee5a1a 100755
--- a/contrib/snapshot/testnet_airdrop_hush3.sh
+++ b/contrib/snapshot/testnet_airdrop_hush3.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This code inspired by and dedicated to Decker
echo "Airdropping funds to HUSH3, hold onto your butts..."
diff --git a/contrib/tidy_datadir.sh b/contrib/tidy_datadir.sh
index 6e5300394..7cc5b3834 100755
--- a/contrib/tidy_datadir.sh
+++ b/contrib/tidy_datadir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2018-2020 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/contrib/verify-commits/gpg.sh b/contrib/verify-commits/gpg.sh
index dc6eb9c61..b25d5dc5c 100755
--- a/contrib/verify-commits/gpg.sh
+++ b/contrib/verify-commits/gpg.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
INPUT=$(.
@@ -29,7 +29,7 @@ Specify configuration file (default: HUSH3.conf)
.HP
\fB\-datadir=\fR
.IP
-Specify data directory
+Specify data directory (this path cannot use '~')
.HP
\fB\-testnet\fR
.IP
@@ -75,7 +75,7 @@ Read extra arguments from standard input, one per line until EOF/Ctrl\-D
In order to ensure you are adequately protecting your privacy when using Hush,
please see .
-Copyright (C) 2016-2021 Duke Leto and The Hush Developers
+Copyright (C) 2016-2022 Duke Leto and The Hush Developers
Copyright (C) 2016-2020 jl777 and SuperNET developers
Copyright (C) 2016-2018 The Zcash developers
Copyright (C) 2009-2014 The Bitcoin Core developers
diff --git a/doc/man/hush-tx.1 b/doc/man/hush-tx.1
index 54149df40..4a01ad1e7 100644
--- a/doc/man/hush-tx.1
+++ b/doc/man/hush-tx.1
@@ -1,9 +1,9 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH HUSH-TX "1" "January 2022" "hush-tx v3.9.1" "User Commands"
+.TH HUSH-TX "1" "September 2022" "hush-tx v3.9.2" "User Commands"
.SH NAME
-hush-tx \- manual page for hush-tx v3.9.1
+hush-tx \- manual page for hush-tx v3.9.2
.SH DESCRIPTION
-hush\-tx utility version v3.9.1\-804a4abbe\-dirty
+hush\-tx utility version v3.9.2\-54cfe1099
.SS "Usage:"
.TP
hush\-tx [options] [commands]
@@ -88,7 +88,7 @@ Set register NAME to given JSON\-STRING
In order to ensure you are adequately protecting your privacy when using Hush,
please see .
-Copyright (C) 2016-2021 Duke Leto and The Hush Developers
+Copyright (C) 2016-2022 Duke Leto and The Hush Developers
Copyright (C) 2016-2020 jl777 and SuperNET developers
Copyright (C) 2016-2018 The Zcash developers
Copyright (C) 2009-2014 The Bitcoin Core developers
diff --git a/doc/man/hushd.1 b/doc/man/hushd.1
index f5c1dd68e..3cd181f7c 100644
--- a/doc/man/hushd.1
+++ b/doc/man/hushd.1
@@ -1,9 +1,9 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH HUSHD "1" "January 2022" "hushd v3.9.1" "User Commands"
+.TH HUSHD "1" "September 2022" "hushd v3.9.2" "User Commands"
.SH NAME
-hushd \- manual page for hushd v3.9.1
+hushd \- manual page for hushd v3.9.2
.SH DESCRIPTION
-Hush Daemon version v3.9.1\-804a4abbe\-dirty
+Hush Daemon version v3.9.2\-54cfe1099
.PP
In order to ensure you are adequately protecting your privacy when using Hush,
please see .
@@ -49,7 +49,7 @@ Run in the background as a daemon and accept commands
.HP
\fB\-datadir=\fR
.IP
-Specify data directory
+Specify data directory (this path cannot use '~')
.HP
\fB\-exportdir=\fR
.IP
@@ -314,6 +314,35 @@ Specify Sapling Address to Consolidate. (default: all)
Fee amount in Puposhis used send consolidation transactions. (default
10000)
.HP
+\fB\-zsweep\fR
+.IP
+Enable zaddr sweeping, automatically move all shielded funds to a one
+address once per X blocks
+.HP
+\fB\-zsweepaddress=\fR
+.IP
+Specify the shielded address where swept funds will be sent)
+.HP
+\fB\-zsweepfee\fR
+.IP
+Fee amount in puposhis used send sweep transactions. (default 10000)
+.HP
+\fB\-zsweepinterval\fR
+.IP
+Sweep shielded funds every X blocks (default 5)
+.HP
+\fB\-zsweepmaxinputs\fR
+.IP
+Maximum number of shielded inputs to sweep per transaction (default 8)
+.HP
+\fB\-zsweepexternal\fR
+.IP
+Enable sweeping to an external wallet (default false)
+.HP
+\fB\-zsweepexclude\fR
+.IP
+Addresses to exclude from sweeping (default none)
+.HP
\fB\-deletetx\fR
.IP
Enable Old Transaction Deletion
@@ -414,24 +443,6 @@ Delete all wallet transactions and only recover those parts of the
blockchain through \fB\-rescan\fR on startup (1 = keep tx meta data e.g.
account owner and payment request information, 2 = drop tx meta data)
.PP
-ZeroMQ notification options:
-.HP
-\fB\-zmqpubhashblock=\fR
-.IP
-Enable publish hash block in
-.HP
-\fB\-zmqpubhashtx=\fR
-.IP
-Enable publish hash transaction in
-.HP
-\fB\-zmqpubrawblock=\fR
-.IP
-Enable publish raw block in
-.HP
-\fB\-zmqpubrawtx=\fR
-.IP
-Enable publish raw transaction in
-.PP
Debugging/Testing options:
.HP
\fB\-debug=\fR
@@ -440,8 +451,8 @@ Output debugging information (default: 0, supplying is
optional). If is not supplied or if = 1, output
all debugging information. can be: addrman, alert, bench,
coindb, db, deletetx, estimatefee, http, libevent, lock, mempool, net,
-tls, partitioncheck, pow, proxy, prune, rand, reindex, rpc, selectcoins,
-stratum, tor, zmq, zrpc, zrpcunsafe (implies zrpc).
+tls, partitioncheck, pow, proxy, prune, rand, randomx, reindex, rpc,
+selectcoins, stratum, tor, zrpc, zrpcunsafe (implies zrpc).
.HP
\fB\-experimentalfeatures\fR
.IP
@@ -614,7 +625,8 @@ Hush Smart Chain options:
.HP
\fB\-ac_algo\fR
.IP
-Choose PoW mining algorithm, default is Equihash (200,9)
+Choose PoW mining algorithm, either 'equihash' or 'randomx'. default is
+Equihash (200,9)
.HP
\fB\-ac_blocktime\fR
.IP
@@ -712,7 +724,7 @@ Enforce transaction\-rate limit, default 0
In order to ensure you are adequately protecting your privacy when using Hush,
please see .
-Copyright (C) 2016-2021 Duke Leto and The Hush Developers
+Copyright (C) 2016-2022 Duke Leto and The Hush Developers
Copyright (C) 2016-2020 jl777 and SuperNET developers
Copyright (C) 2016-2018 The Zcash developers
Copyright (C) 2009-2014 The Bitcoin Core developers
diff --git a/doc/randomx.md b/doc/randomx.md
new file mode 100644
index 000000000..a690ded2b
--- /dev/null
+++ b/doc/randomx.md
@@ -0,0 +1,104 @@
+# RandomX
+
+Hush Smart Chains support using RandomX as a Proof-Of-Work algorithm as of release 3.9.2 .
+This means you can now launch a privacy coin with Hush tech that can be mined with a CPU
+instead of requiring an ASIC or GPU. RandomX is the same algorithm that Monero (XMR) and
+various other cryptocoins use. As far as we know, Hush Smart Chains are the first coins
+based on Zcash Protocol that can use the RandomX PoW algorithm. Many thanks to all the
+people who helped make this possible.
+
+# Example
+
+The following command can be used to launch an HSC on a single computer. Each option will be explained.
+HSC CLI arguments that start with `-ac_` means they *Affect Consensus*.
+
+```
+./src/hush-smart-chain -ac_halving=100 -ac_algo=randomx -ac_name=RANDOMX -ac_private=1 -ac_blocktime=15 -ac_reward=500000000 -ac_supply=55555 -gen=1 -genproclimit=1 -testnode=1
+```
+
+ * `hush-smart-chain` is the script used to launch or connect to HSCs
+ * It lives in the `./src` directory, next to `hushd` and `hush-cli`
+ * It is called `hush-smart-chain.bat` on Windows
+ * `-ac_halving=100` means "the block reward halves every 100 blocks"
+ * `-ac_algo=randomx` means "use RandomX for Proof-Of-Work
+ * The default is Equihash (200,9)
+ * `-ac_name=RANDOMX` sets the name of the HSC to RANDOMX
+ * `-ac_private=1` means only z2z transactions will be allowed, like HUSH mainnet
+ * `-ac_blocktime=15` means blocks will be 15 seconds on average
+ * The default is 60 seconds
+ * `-ac_reward=500000000` means the block reward will start at 5 RANDOMX coins per block
+ * This argument is given in satoshis
+ * `-ac_supply=55555` means an existing supply of 55555 will exist at block 1
+ * This argument is given in coins, not satoshis
+ * This is sometimes called a "pre-mine" and is useful when migrating an existing coin
+ * Block 0 of HSC's is always the BTC mainnet genesis block.
+ * So the genesis block of HSC's is actually block 1, not block 0
+ * `-gen=1` means this node is a mining node
+ * `-genproclimit=1` means use 1 CPU thread will be used for mining
+ * `-testnode=1` means only 1 node can be used to mine a genesis block
+ * testnode is primarily for testing, when launching a real genesis block, this option should not be used
+ * By default, at least two nodes are required to mine a genesis block
+ * One node would use
+```
+# first node
+./src/hush-smart-chain -ac_halving=100 -ac_algo=randomx -ac_name=RANDOMX -ac_private=1 -ac_blocktime=15 -ac_reward=500000000 -ac_supply=55555
+```
+ * And the second node would use:
+```
+# mining node. NOTE: This node will mine the genesis block and pre-mine, if any
+./src/hush-smart-chain -ac_halving=100 -ac_algo=randomx -ac_name=RANDOMX -ac_private=1 -ac_blocktime=15 -ac_reward=500000000 -ac_supply=55555 -gen=1 -genproclimit=1
+```
+
+# Advanced Options
+
+HUSH RandomX currently has two advanced options that some may want to use:
+
+ * `ac_randomx_interval` controls how often the RandomX key block will change
+ * The default is 1024 blocks and is good for most use cases.
+ * This corresponds to ~17 hours for HSCs with the default block time of 60s
+ * `ac_randomx_lag` sets the number of blocks to wait before updating the key block
+ * The default is 64 blocks
+ * This corresponds to 64 mins for HSCs with the default block time of 60s
+ * `ac_randomx_interval` should always be larger than 2 times `ac_randomx_lag`
+ * Setting these to arbitrary values could affect the chain security of your coin
+ * It is not recommended to change these values unless you are really sure why you are doing it
+
+# RandomX Internals
+
+This section is not required reading if you just want to use it as a PoW algorithm for an HSC. Here we will explain how the internals of RandomX works inside of the Hush codebase.
+
+We use the official RandomX implementation from https://github.com/tevador/RandomX with custom configuration options. If some type of hardware is created to mine the XMR RandomX algorithm, it will not be compatible with the Hush RandomX algorithm. This is by design. All Hush Smart Chains use the same RandomX config options, so if a hardware device is created to mine one HSC that uses RandomX, it can be used to mine any HSC using RandomX. Every HSC with unique consensus parameters will start off with it's own unique key block with at least 9 bytes of entropy.
+
+The source code of RandomX is embedded in the Hush source code at `./src/RandomX` and the configuration options used are at `./src/RandomX/src/configuration.h` .
+
+The changes from default RandomX configuration options are listed below.
+
+```
+ //Argon2d salt
+-#define RANDOMX_ARGON_SALT "RandomX\x03"
++#define RANDOMX_ARGON_SALT "RandomXHUSH\x03"
+
+ //Number of Argon2d iterations for Cache initialization.
+-#define RANDOMX_ARGON_ITERATIONS 3
++#define RANDOMX_ARGON_ITERATIONS 5
+
+ //Number of parallel lanes for Cache initialization.
+ #define RANDOMX_ARGON_LANES 1
+@@ -53,13 +53,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ #define RANDOMX_DATASET_EXTRA_SIZE 33554368
+
+ //Number of instructions in a RandomX program. Must be divisible by 8.
+-#define RANDOMX_PROGRAM_SIZE 256
++#define RANDOMX_PROGRAM_SIZE 512
+
+ //Number of iterations during VM execution.
+-#define RANDOMX_PROGRAM_ITERATIONS 2048
++#define RANDOMX_PROGRAM_ITERATIONS 4096
+
+ //Number of chained VM executions per hash.
+-#define RANDOMX_PROGRAM_COUNT 8
++#define RANDOMX_PROGRAM_COUNT 16
+
+```
+RandomX opcode frequencies were not modfiied, the defaults are used.
+
diff --git a/doc/release-process.md b/doc/release-process.md
index b5dee897d..cdfbeec24 100644
--- a/doc/release-process.md
+++ b/doc/release-process.md
@@ -1,17 +1,51 @@
# Hush Release Process
-## Pre-release
+## High-Level Philosophy
+
+Beware of making high-risk changes (such as consensus changes, p2p layer changes and wallet/transaction changes) too close to a new release, because they will not get as much testing as they should. Don't merge large branches which haven't undergone lots of testing just before a release.
+
+## Check for changes on master that should be on dev
+
+Often there are trivial changes made directly on master, such as documentation changes. In theory, no code changes should happen on master without being on dev first, but it's better to be safe than sorry. We want the dev branch which undergoes testing to be as close as possible to what the master branch will become, so we don't want to merge dev into master and just assume everything works. So it's best to merge the master branch into dev just before merging the dev branch into master.
+
+To check if the master branch has any changes that the dev branch does not:
+
+```
+# this assumes you are working with https://git.hush.is/hush/hush3 as your remote
+git checkout dev
+git pull # make sure dev is up to date
+git checkout master
+git pull # make sure master is up to date
+git diff dev...master # look at the set of changes which exist in master but not dev
+```
+
+If the last command has no output, congrats, there is nothing to do. If the last command has output, then you should merge master into dev:
+
+```
+git checkout dev
+git merge --no-ff master # using the default commit message is fine
+git push
+```
+
+The `--no-ff` flag above makes sure to make a merge commit, no matter what, even if a "fast forward" could be done. For those in the future looking back, it's much better to see evidence of when branches were merged.
+
### Git Issues
-Look for Git issues that should be fixed in the next release.
+Look for Git issues that should be fixed in the next release. Especially low-risk and simple things, like documentation changes, improvements to error messages and RPC help output.
### Pre-release checklist:
- * Is this release changing consensus rules?
+ * Is this release changing consensus rules? Definitely update protocol version.
### Protocol Safety Checks:
- * Does MIN_PROTO_VERSION need to change?
+ * Does `PROTOCOL_VERSION` in src/version.h need to be increased?
+ * All releases with a consensus change should increase the value by 1
+ * This identifies a nodes protocol version to all other peers it connects to.
+ * Does `MIN_PEER_PROTO_VERSION` in src/version.h need to change?
+ * If it does, new nodes will not be able to talk to nodes with a version less than `MIN_PROTO_VERSION`
+ * The main use of these is for newer nodes that know they do not want to talk to older nodes to prevent connecting to older nodes efficiently
+ * For instance, when a new release has different consensus rules than older nodes, `MIN_PROTO_VERSION` prevents wasting lots of network bandwidth talking to incompatible nodes which will eventually be banned for disagreeing on consensus rules
## Release dependencies
@@ -21,4 +55,51 @@ Install deps on Linux:
## Release process
-...
+ - Update version in configure.ac and src/clientversion.h to update the hushd version
+ - In src/clientversion.h you update `CLIENT_VERSION_*` variables. Usually you will just update `CLIENT_VERSION_REVISION`
+ - If there is a consensus change, it may be a good idea to update `CLIENT_VERSION_MINOR` or `CLIENT_VERSION_MAJOR`
+ - To make a pre-release "beta" you can modify `CLIENT_VERSION_BUILD` but that is rarely done in Hush world.
+ - A `CLIENT_VERSION_BUILD` of 50 means "actual non-beta release"
+ - Make sure to keep the values in configure.ac and src/clientversion.h the same. The variables are prefixed wth an underscore in configure.ac
+ - Run `make manpages`, commit + push results
+ - hushd must be running so the script can automatically get the correct version number
+ - There is a hack in the script where you can hardcode a version number if hushd isn't running.
+ - Comment out the HUSHVER line and uncomment the line above it with a hardcoded version number
+ - PROTIP: Man page creation must be done after updating the version number and recompiling and before Debian package creation
+ - Update checkpoints in src/chainparams.cpp via util/checkpoints.pl
+ - hushd must be running to run this script, since it uses hush-cli to get the data
+ - Look for line which says "END HUSH mainnet checkpoint data" near line 560 in chainparams.cpp , that is where checkpoint data ends
+ - Find the highest block height of data, let's call it HEIGHT
+ - Run `./util/checkpoints.pl 1000 HEIGHT &> checkpoints.txt` to generate the latest checkpoint data
+ - To copy the new data from checkpoints.txt into the file, one way in Vim is to type ":r checkpoints.txt" which will read in a file and paste it as the current cursor
+ - You will see 3 lines of "stats" at the end of the output, you just pasted in the newest stats. Delete the old stats that should be the 3 lines under the current stats
+ - The script generates a comment "Generated at ...", that should be moved to the very beginning of the checkpoint data
+ - Make sure the new code compiles, commit and push
+ - Run `./util/checkpoints.pl help` to see some basic help
+ - By default it will generate checkpoints for every 1000 blocks, the "stride"
+ - You can get a different "stride" by passing it in as the first arg to the script
+ - To get checkpoint data for every 5000 blocks: `./util/checkpoints.pl 5000 &> checkpoints.txt`
+ - Currently checkpoints from before block 340k are given for every 5k blocks to keep the data smaller
+ - checkpoints.pl will just generate the data you need, it must be manually copied into the correct place
+ - Checkpoints are a list of block heights and block hashes that tell a full node the correct block history of the blockchain
+ - Checkpoints make block verification a bit faster, because nodes can say "is this block a descendant of a checkpoint block?" instead of doing full consensus checks, which take more time
+ - Checkpoints also provide a bit of security against some attacks that would create malicious chainforks
+ - They only provide limited security, because they talk about the past, not future block heights.
+ - Try to generate checkpoints as close to the release as possible, so you can have a recent block height be protected.
+ - For instance, don't update checkpoints and then do a release a month later. You can always update checkpoint data again or multiple times
+ - Update copyright years (if applicable) with util/replace.pl
+ - Update doc/relnotes/README.md
+ - To get the stats of file changes: `git diff --stat master...dev`
+ - Do a fresh clone and fresh sync with new checkpoints
+ - Stop node, wait 20 minutes, and then do a partial sync with new checkpoints
+ - Merge dev into master: `git checkout dev && git pull && git checkout master && git pull && git merge --no-ff dev && git push`
+ - The above command makes sure that your local dev branch is up to date before doing anything
+ - The above command will not merge if "git pull" creates a merge conflict
+ - The above command will not push if there is a problem with merging dev
+ - Make Gitea release with git tag from master branch (make sure to merge dev in first)
+ - Use util/gen-linux-binary-release.sh to make a Linux release binary
+ - Use util/build-debian-package.sh to make an x86 Debian package for the release
+ - Debian packages should be done after you make manpages, because those are included in Debian packages
+ - Use util/build-debian-package-ARM.sh (does this still work?) to make an ARM Debian package for the release
+ - Upload the debian packages to the Gitea release page, with SHA256 sums
+
diff --git a/doc/relnotes/README.md b/doc/relnotes/README.md
index 3993799ae..6502a6143 100644
--- a/doc/relnotes/README.md
+++ b/doc/relnotes/README.md
@@ -10,6 +10,45 @@ and no longer on Github, since they banned Duke Leto and
also because they censor many people around the world and work with
evil organizations.
+# Hush 3.9.2 "Anecdotal Axolotl"
+
+```
+ 966 files changed, 26999 insertions(+), 2758 deletions(-)
+```
+
+This is an OPTIONAL release. It is recommended for exchanges, solo miners and mining pools to update to this release
+since it contains various bugfixes and improvements that will benefit busy wallets.
+
+ * This release adds RandomX support in Hush Smart Chains (HSCs)
+ * When creating a new HSC specify `-ac_algo=randomx` to use RandomX instead of Equihash
+ * Example: `hush-smart-chain -ac_name=RANDX -ac_algo=randomx ...`
+ * This allows HSCs to be mined with CPUs instead of ASICs
+ * Because of RandomX, `cmake` is a new dependency needed to compile Hush from source
+ * More detailed documentation available at https://git.hush.is/hush/hush3/src/branch/dev/doc/randomx.md
+ * For general info about HSCs see https://git.hush.is/hush/hush3/src/branch/dev/doc/hsc.md
+ * A new feature called "zsweep" has been added, which defaults to disabled.
+ * It will sweep funds from all zaddrs into a single specified zaddr
+ * A new RPC `z_sweepstatus` will show various details about the sweeping
+ * There are many advanced options to it, see full docs at https://git.hush.is/hush/hush3/src/branch/dev/doc/zsweep-consolidation.md
+ * Expired transactions will not be relayed and nodes who relay expired transactions are no longer banned
+ * A node relaying expired transactions is usually misconfigured or confused, not attacking us
+ * This change means node operators no longer need to deal with unbanning or allowlisting confused nodes
+ * Fix a rare coredump that could happen when nodes have many unconfirmed or expired transactions
+ * New RPC: `getblockmerkletree`, which will show the full Merkle Tree for a block height
+ * New RPC: `z_consolidationstatus` is similar to `z_sweepstatus` and will show config+stats about consolidation
+ * New RPC: `z_anonsettxdelta` : Returns delta (difference) in the anonset for a given txid.
+ * A delta > 0 increases the anonset
+ * A delta < 0 reduces the anonset
+ * A delta=0 leaves the anonset the same
+ * New RPC: `z_anonsetblockdelta` : Returns delta (difference) in the anonset for a given block.
+ * getrawtransaction RPC now returns a "size" key with the size in bytes of a transaction
+ * sendmany RPC will now reject transactions that send to taddrs immediately, instead of them being rejected in mempool
+ * Preliminary support for FreeBSD has been added to the Hush build system
+ * New contrib scripts:
+ * `contrib/gen-zaddrs.pl` - Generate zaddrs in bulk, defaults to 50
+ * `contrib/sdl_checkpoints.pl` - Generate SDL checkpoints using `getblockmerkletree`
+ * ZeroMQ support has been removed from Hush
+
# Hush 3.9.1 "Luciferous Locust"
This is an OPTIONAL release. It is most important for users with large wallets, as it fixes an important
diff --git a/doc/zmq.md b/doc/zmq.md
deleted file mode 100644
index c5ce11ff6..000000000
--- a/doc/zmq.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Block and Transaction Broadcasting With ZeroMQ
-
-[ZeroMQ](http://zeromq.org/) is a lightweight wrapper around TCP
-connections, inter-process communication, and shared-memory,
-providing various message-oriented semantics such as publish/subscribe,
-request/reply, and push/pull.
-
-The Hush daemon can be configured to act as a trusted "border
-router", implementing the Hush wire protocol and relay, making
-consensus decisions, maintaining the local blockchain database,
-broadcasting locally generated transactions into the network, and
-providing a queryable RPC interface to interact on a polled basis for
-requesting blockchain related data. However, there exists only a
-limited service to notify external software of events like the arrival
-of new blocks or transactions.
-
-The ZeroMQ facility implements a notification interface through a set
-of specific notifiers. Currently there are notifiers that publish
-blocks and transactions. This read-only facility requires only the
-connection of a corresponding ZeroMQ subscriber port in receiving
-software; it is not authenticated nor is there any two-way protocol
-involvement. Therefore, subscribers should validate the received data
-since it may be out of date, incomplete or even invalid.
-
-ZeroMQ sockets are self-connecting and self-healing; that is,
-connections made between two endpoints will be automatically restored
-after an outage, and either end may be freely started or stopped in
-any order.
-
-Because ZeroMQ is message oriented, subscribers receive transactions
-and blocks all-at-once and do not need to implement any sort of
-buffering or reassembly.
-
-## Prerequisites
-
-The ZeroMQ feature in Hush requires ZeroMQ API version 4.x or
-newer, which you will need to install if you are not using the depends
-system. Typically, it is packaged by distributions as something like
-*libzmq5-dev*. The C++ wrapper for ZeroMQ is *not* needed.
-
-In order to run the example Python client scripts in contrib/ one must
-also install *python-zmq*, though this is not necessary for daemon
-operation.
-
-## Enabling
-
-By default, the ZeroMQ feature is automatically compiled in if the
-necessary prerequisites are found. To disable, use --disable-zmq
-during the *configure* step of building hushd:
-
- $ ./configure --disable-zmq (other options)
-
-To actually enable operation, one must set the appropriate options on
-the commandline or in the configuration file.
-
-## Usage
-
-Currently, the following notifications are supported:
-
- -zmqpubhashtx=address
- -zmqpubhashblock=address
- -zmqpubrawblock=address
- -zmqpubrawtx=address
-
-The socket type is PUB and the address must be a valid ZeroMQ socket
-address. The same address can be used in more than one notification.
-
-For instance:
-
- $ hushd -zmqpubhashtx=tcp://127.0.0.1:28332 \
- -zmqpubrawtx=ipc:///tmp/hushd.tx.raw
-
-Each PUB notification has a topic and body, where the header
-corresponds to the notification type. For instance, for the
-notification `-zmqpubhashtx` the topic is `hashtx` (no null
-terminator) and the body is the hexadecimal transaction hash (32
-bytes).
-
-These options can also be provided in zcash.conf.
-
-ZeroMQ endpoint specifiers for TCP (and others) are documented in the
-[ZeroMQ API](http://api.zeromq.org/4-0:_start).
-
-Client side, then, the ZeroMQ subscriber socket must have the
-ZMQ_SUBSCRIBE option set to one or either of these prefixes (for
-instance, just `hash`); without doing so will result in no messages
-arriving. Please see `contrib/zmq/zmq_sub.py` for a working example.
-
-## Remarks
-
-From the perspective of hushd, the ZeroMQ socket is write-only; PUB
-sockets don't even have a read function. Thus, there is no state
-introduced into hushd directly. Furthermore, no information is
-broadcast that wasn't already received from the public P2P network.
-
-No authentication or authorization is done on connecting clients; it
-is assumed that the ZeroMQ port is exposed only to trusted entities,
-using other means such as firewalling.
-
-Note that when the block chain tip changes, a reorganisation may occur
-and just the tip will be notified. It is up to the subscriber to
-retrieve the chain from the last known block to the new tip.
-
-There are several possibilities that ZMQ notification can get lost
-during transmission depending on the communication type you are
-using. Hushd appends an up-counting sequence number to each
-notification which allows listeners to detect lost notifications.
diff --git a/doc/zsweep-consolidation.md b/doc/zsweep-consolidation.md
new file mode 100644
index 000000000..43e62ed2e
--- /dev/null
+++ b/doc/zsweep-consolidation.md
@@ -0,0 +1,76 @@
+# zsweep and consolidation
+
+This is to document zsweep and consolidation for advanced HUSH users.
+
+**Warning: If you don't know what Zsweep or Consolidation are, there is a good chance that you will not be using these advanced options. User beware!**
+
+# Table of Contents
+1. [Using Zsweep](#zsweep)
+1. [Using Consolidation](#consolidation)
+1. [Using Zsweep & Consolidation Together](#zsweep-&-consolidation-together)
+
+# Pre-Step & Further Details
+
+A user can use these options at the command line, but it is **recommended to configure these options within the HUSH3.conf file**.
+
+Consolidation takes many unspent shielded UTXOs (zutxos) into one zutxo, which makes spending them in the future faster and potentially cost less in fees. It also helps prevent certain kinds of metadata leakages and spam attacks. It is not recommended for very large wallets (wallet.dat files with thousands of transactions) for performance reasons. This is why it defaults to OFF for CLI full nodes but ON for GUI wallets that use an embedded hushd.
+
+Zsweep is when you sweep numerous zutxos into one z-address that you configure. This z-address can be local to that system or it can be configured to sweep to a remote wallet on a different system with the zsweepexternal=1 option, which is explained below in the Zsweep section.
+
+## Zsweep
+
+1. We add the following to our conf file as per the Pre-Step.
+ ```
+ zsweep=1
+ zsweepaddress=zs1...
+ ```
+
+1. The above zsweepaddress will be the z-address you want to sweep into (zs1... is a placeholder for this documentation) and it must exist within the same local wallet you are configuring this for. If you want to zsweep to an address on another computer, then set zsweepexternal=1 as explained in the options below.
+
+1. The following are optional zsweep settings with their details:
+
+ | Zsweep Option Name| Details of what it does |
+ |-------------------|-------------------------|
+ | zsweepexternal=1 | Will enable the option to zsweep to an "external" z-address which exists in a wallet on a different system. |
+ | zsweepinterval=5 | By default zsweep runs every 5 blocks, so set and modify this value to change that. |
+ | zsweepmaxinputs=50 | By default zsweep makes sure to not reduce the anonset in any tx by having a maximum number of inputs of 8. This should be fine for new wallets, but if you have an existing wallet with many zutxos it can be changed with this option. Keep in mind that large values will make sweeping faster at the expense of reducing the AnonSet. |
+ | zsweepfee=0 | The default zsweep fee is 10000 puposhis or 0.0001 HUSH, the default for all transactions. To use fee=0 for zsweep transactions, set this option. |
+ | zsweepexclude=zs1... | Exclude a certain address from being swept. Can be used multiple times to exclude multiple addressses |
+
+1. The following HUSH RPC will let you view your zsweep configuration options and run-time stats at the command line: `hush-cli z_sweepstatus`
+
+## Consolidation
+
+1. We add the following to our conf file as per the Pre-Step.
+ ```
+ consolidation=1
+ ```
+
+1. The following are optional consolidation settings with their details:
+
+ | Consolidation Option Name| Details of what it does |
+ |--------------------------|-------------------------|
+ | consolidationtxfee=0 | The default consolidation fee is 10000 puposhis or 0.0001 HUSH, the default for all transactions. To use fee=0 for consolidation transactions, set this option. |
+ | consolidatesaplingaddress=zs1... | Default of consolidation is set to all, but you can set this option if you have one specific z-address (zs1... is a placeholder for this documentation) that you want to only consolidate to. |
+
+1. The following HUSH RPC will let you view your consolidation configuration options and run-time stats at the command line: `hush-cli z_sweepstatus`
+
+## Zsweep & Consolidation Together
+
+1. We add the following to our conf file as per the Pre-Step.
+ ```
+ zsweep=1
+ zsweepaddress=zs1...
+ consolidation=1
+ ```
+
+1. Then follow along with the zsweep section above if you want to set specific options for the zsweep behavior.
+
+### Copyright
+
+jahway603 and The Hush Developers
+
+### License
+
+GPLv3
+
diff --git a/migratecoin.sh b/migratecoin.sh
index 95aea910c..a32d30a5f 100644
--- a/migratecoin.sh
+++ b/migratecoin.sh
@@ -1,5 +1,5 @@
-#!/usr/bin/bash
-# Copyright (c) 2016-2021 The Hush developers
+#!/usr/usr/bin/env bash
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
# This script makes the neccesary transactions to migrate
diff --git a/qa/hush/checksec.sh b/qa/hush/checksec.sh
index dd1f72e54..2a4897030 100755
--- a/qa/hush/checksec.sh
+++ b/qa/hush/checksec.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# The BSD License (http://www.opensource.org/licenses/bsd-license.php)
# specifies the terms and conditions of use for checksec.sh:
diff --git a/qa/hush/create_benchmark_archive.py b/qa/hush/create_benchmark_archive.py
index 68938297e..5f9a08721 100644
--- a/qa/hush/create_benchmark_archive.py
+++ b/qa/hush/create_benchmark_archive.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
import binascii
diff --git a/qa/hush/create_wallet_200k_utxos.py b/qa/hush/create_wallet_200k_utxos.py
index 3499cdd70..d2174e864 100644
--- a/qa/hush/create_wallet_200k_utxos.py
+++ b/qa/hush/create_wallet_200k_utxos.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2017 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/hush/full-test-suite.sh b/qa/hush/full-test-suite.sh
index 0618ccdca..b7fb37f0a 100755
--- a/qa/hush/full-test-suite.sh
+++ b/qa/hush/full-test-suite.sh
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Execute all of the automated tests related to Hush
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/hush/full_test_suite.py b/qa/hush/full_test_suite.py
index 87fcc6ac0..c8cc05e45 100755
--- a/qa/hush/full_test_suite.py
+++ b/qa/hush/full_test_suite.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
# Execute all of the automated tests related to Hush
diff --git a/qa/pull-tester/cc-tests.sh b/qa/pull-tester/cc-tests.sh
index d68671e12..bfce8ff74 100755
--- a/qa/pull-tester/cc-tests.sh
+++ b/qa/pull-tester/cc-tests.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
CURDIR=$(cd $(dirname "$0"); pwd)
diff --git a/qa/pull-tester/rpc-tests.sh b/qa/pull-tester/rpc-tests.sh
index 8b8169a2f..07e123b6f 100755
--- a/qa/pull-tester/rpc-tests.sh
+++ b/qa/pull-tester/rpc-tests.sh
@@ -1,5 +1,5 @@
-#!/bin/bash
-# Copyright (c) 2016-2021 The Hush developers
+#!/usr/bin/env bash
+# Copyright (c) 2016-2022 The Hush developers
# Released under the GPLv3
set -e -o pipefail
@@ -92,10 +92,6 @@ testScriptsExt=(
'p2p-acceptblock.py'
);
-if [ "x$ENABLE_ZMQ" = "x1" ]; then
- testScripts+=('zmq_test.py')
-fi
-
extArg="-extended"
passOn=${@#$extArg}
diff --git a/qa/pull-tester/run-bitcoin-cli b/qa/pull-tester/run-bitcoin-cli
index 74f9b19e8..c7f09ce8a 100755
--- a/qa/pull-tester/run-bitcoin-cli
+++ b/qa/pull-tester/run-bitcoin-cli
@@ -1,5 +1,5 @@
-#!/bin/bash
-# Copyright (c) 2016-2021 The Hush developers
+#!/usr/bin/env bash
+# Copyright (c) 2016-2022 The Hush developers
# Released under the GPLv3
# This is a thin wrapper around bitcoin-cli that strips the Windows-style EOLs
diff --git a/qa/pull-tester/run-bitcoind-for-test.sh.in b/qa/pull-tester/run-bitcoind-for-test.sh.in
index 0ae527293..40b6683ad 100755
--- a/qa/pull-tester/run-bitcoind-for-test.sh.in
+++ b/qa/pull-tester/run-bitcoind-for-test.sh.in
@@ -1,7 +1,7 @@
-#!/bin/bash
+#!/usr/bin/env bash
# THIS FILE IS GENERATED FROM run-bitcoind-for-test.sh.in
# Copyright (c) 2013-2014 The Bitcoin Core developers
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Released under the GPLv3
#
HUSH_LOAD_TIMEOUT=500
diff --git a/qa/pull-tester/tests-config.sh.in b/qa/pull-tester/tests-config.sh.in
index a011831b1..6718ffbc1 100755
--- a/qa/pull-tester/tests-config.sh.in
+++ b/qa/pull-tester/tests-config.sh.in
@@ -1,7 +1,7 @@
-#!/bin/bash
+#!/usr/bin/env bash
# THIS FILE IS GENERATED FROM tests-config.sh.in
# Copyright (c) 2013-2014 The Bitcoin Core developers
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Released under the GPLv3
BUILDDIR="@abs_top_builddir@"
@@ -11,7 +11,6 @@ EXEEXT="@EXEEXT@"
@ENABLE_WALLET_TRUE@ENABLE_WALLET=1
@BUILD_BITCOIN_UTILS_TRUE@ENABLE_UTILS=1
@BUILD_BITCOIND_TRUE@ENABLE_BITCOIND=1
-@ENABLE_ZMQ_TRUE@ENABLE_ZMQ=1
@ENABLE_PROTON_TRUE@ENABLE_PROTON=1
REAL_BITCOIND="$BUILDDIR/src/hushd${EXEEXT}"
diff --git a/qa/rpc-tests/ac_private.py b/qa/rpc-tests/ac_private.py
index 7c066529a..03675b6b1 100755
--- a/qa/rpc-tests/ac_private.py
+++ b/qa/rpc-tests/ac_private.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 SuperNET developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/addressindex.py b/qa/rpc-tests/addressindex.py
index 3f2e77c96..62483e40a 100755
--- a/qa/rpc-tests/addressindex.py
+++ b/qa/rpc-tests/addressindex.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python2
# Copyright (c) 2014-2015 The Bitcoin Core developers
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
# Test addressindex generation and fetching
diff --git a/qa/rpc-tests/bip65-cltv-p2p.py b/qa/rpc-tests/bip65-cltv-p2p.py
index f8d637e64..60bab6854 100755
--- a/qa/rpc-tests/bip65-cltv-p2p.py
+++ b/qa/rpc-tests/bip65-cltv-p2p.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/bipdersig-p2p.py b/qa/rpc-tests/bipdersig-p2p.py
index bba8ff01b..f83f0a2c7 100755
--- a/qa/rpc-tests/bipdersig-p2p.py
+++ b/qa/rpc-tests/bipdersig-p2p.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3/X11 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
#
diff --git a/qa/rpc-tests/blockchain.py b/qa/rpc-tests/blockchain.py
index 7cd728d30..b66a6b3bc 100755
--- a/qa/rpc-tests/blockchain.py
+++ b/qa/rpc-tests/blockchain.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/cryptoconditions.py b/qa/rpc-tests/cryptoconditions.py
index 85016f2d5..2c6f95847 100755
--- a/qa/rpc-tests/cryptoconditions.py
+++ b/qa/rpc-tests/cryptoconditions.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 SuperNET developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/cryptoconditions_channels.py b/qa/rpc-tests/cryptoconditions_channels.py
index 9273efbe9..127df77ee 100755
--- a/qa/rpc-tests/cryptoconditions_channels.py
+++ b/qa/rpc-tests/cryptoconditions_channels.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 SuperNET developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/cryptoconditions_dice.py b/qa/rpc-tests/cryptoconditions_dice.py
index 894b25d0d..4f9d8c7c4 100755
--- a/qa/rpc-tests/cryptoconditions_dice.py
+++ b/qa/rpc-tests/cryptoconditions_dice.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 SuperNET developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/cryptoconditions_faucet.py b/qa/rpc-tests/cryptoconditions_faucet.py
index af9e8e033..3cccaf2db 100755
--- a/qa/rpc-tests/cryptoconditions_faucet.py
+++ b/qa/rpc-tests/cryptoconditions_faucet.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 SuperNET developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/cryptoconditions_gateways.py b/qa/rpc-tests/cryptoconditions_gateways.py
index b979e99ce..d60452269 100755
--- a/qa/rpc-tests/cryptoconditions_gateways.py
+++ b/qa/rpc-tests/cryptoconditions_gateways.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 SuperNET developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/cryptoconditions_heir.py b/qa/rpc-tests/cryptoconditions_heir.py
index 99ddb9af0..9ce32a625 100755
--- a/qa/rpc-tests/cryptoconditions_heir.py
+++ b/qa/rpc-tests/cryptoconditions_heir.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 SuperNET developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/cryptoconditions_oracles.py b/qa/rpc-tests/cryptoconditions_oracles.py
index 39672ba53..255b372f6 100755
--- a/qa/rpc-tests/cryptoconditions_oracles.py
+++ b/qa/rpc-tests/cryptoconditions_oracles.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 SuperNET developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/cryptoconditions_rewards.py b/qa/rpc-tests/cryptoconditions_rewards.py
index 7e4f0a879..3c1452ed4 100755
--- a/qa/rpc-tests/cryptoconditions_rewards.py
+++ b/qa/rpc-tests/cryptoconditions_rewards.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 SuperNET developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/cryptoconditions_token.py b/qa/rpc-tests/cryptoconditions_token.py
index 6df6b11a9..dbded0062 100755
--- a/qa/rpc-tests/cryptoconditions_token.py
+++ b/qa/rpc-tests/cryptoconditions_token.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 SuperNET developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/decodescript.py b/qa/rpc-tests/decodescript.py
index 70a7cdfbb..f4d63d317 100755
--- a/qa/rpc-tests/decodescript.py
+++ b/qa/rpc-tests/decodescript.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2015 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/disablewallet.py b/qa/rpc-tests/disablewallet.py
index ecefd096f..bb4cda6e5 100755
--- a/qa/rpc-tests/disablewallet.py
+++ b/qa/rpc-tests/disablewallet.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/dpowconfs.py b/qa/rpc-tests/dpowconfs.py
index 340db0bd6..89fb640ad 100755
--- a/qa/rpc-tests/dpowconfs.py
+++ b/qa/rpc-tests/dpowconfs.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2019 Duke Leto who wrote "The SuperNET developers" while never agreeing to the developer agreement nor being listed in AUTHORS
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/feature_walletfile.py b/qa/rpc-tests/feature_walletfile.py
index 5487a5245..b0364a7d3 100755
--- a/qa/rpc-tests/feature_walletfile.py
+++ b/qa/rpc-tests/feature_walletfile.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
# Copyright (c) 2017 The Bitcoin Core developers
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
"""Test wallet file location."""
diff --git a/qa/rpc-tests/finalsaplingroot.py b/qa/rpc-tests/finalsaplingroot.py
index 15078709d..b82053eb0 100755
--- a/qa/rpc-tests/finalsaplingroot.py
+++ b/qa/rpc-tests/finalsaplingroot.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/forknotify.py b/qa/rpc-tests/forknotify.py
index d017d2409..1f615f930 100755
--- a/qa/rpc-tests/forknotify.py
+++ b/qa/rpc-tests/forknotify.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/fundrawtransaction.py b/qa/rpc-tests/fundrawtransaction.py
index daae872e4..3a9ac0ca6 100755
--- a/qa/rpc-tests/fundrawtransaction.py
+++ b/qa/rpc-tests/fundrawtransaction.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/getblocktemplate.py b/qa/rpc-tests/getblocktemplate.py
index 308a21729..6d771b360 100755
--- a/qa/rpc-tests/getblocktemplate.py
+++ b/qa/rpc-tests/getblocktemplate.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2016 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/getblocktemplate_longpoll.py b/qa/rpc-tests/getblocktemplate_longpoll.py
index f2bf82172..ab393755b 100755
--- a/qa/rpc-tests/getblocktemplate_longpoll.py
+++ b/qa/rpc-tests/getblocktemplate_longpoll.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/getblocktemplate_proposals.py b/qa/rpc-tests/getblocktemplate_proposals.py
index 61c33af1c..f75032091 100755
--- a/qa/rpc-tests/getblocktemplate_proposals.py
+++ b/qa/rpc-tests/getblocktemplate_proposals.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/getchaintips.py b/qa/rpc-tests/getchaintips.py
index e40664196..84c3f7f6c 100755
--- a/qa/rpc-tests/getchaintips.py
+++ b/qa/rpc-tests/getchaintips.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/hardforkdetection.py b/qa/rpc-tests/hardforkdetection.py
index f98d274a8..bcd13e0d4 100755
--- a/qa/rpc-tests/hardforkdetection.py
+++ b/qa/rpc-tests/hardforkdetection.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
#
# Test hard fork detection
diff --git a/qa/rpc-tests/httpbasics.py b/qa/rpc-tests/httpbasics.py
index 034ff2a4d..ff5544a2c 100755
--- a/qa/rpc-tests/httpbasics.py
+++ b/qa/rpc-tests/httpbasics.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/invalidateblock.py b/qa/rpc-tests/invalidateblock.py
index 4edad27df..ea189ff85 100755
--- a/qa/rpc-tests/invalidateblock.py
+++ b/qa/rpc-tests/invalidateblock.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/invalidblockrequest.py b/qa/rpc-tests/invalidblockrequest.py
index e8440a8e7..f806d07ec 100755
--- a/qa/rpc-tests/invalidblockrequest.py
+++ b/qa/rpc-tests/invalidblockrequest.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
#
# Distributed under the GPLv3/X11 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/ivk_import_export.py b/qa/rpc-tests/ivk_import_export.py
index 6431edda2..a5eaf5233 100755
--- a/qa/rpc-tests/ivk_import_export.py
+++ b/qa/rpc-tests/ivk_import_export.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2019 Bartlomiej Lisiecki
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/key_import_export.py b/qa/rpc-tests/key_import_export.py
index 54b07b2ee..f7e13a732 100755
--- a/qa/rpc-tests/key_import_export.py
+++ b/qa/rpc-tests/key_import_export.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2017 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/keypool.py b/qa/rpc-tests/keypool.py
index a64d79525..0a22cae8a 100755
--- a/qa/rpc-tests/keypool.py
+++ b/qa/rpc-tests/keypool.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/listtransactions.py b/qa/rpc-tests/listtransactions.py
index ace7e9090..f82494ca2 100755
--- a/qa/rpc-tests/listtransactions.py
+++ b/qa/rpc-tests/listtransactions.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/maxblocksinflight.py b/qa/rpc-tests/maxblocksinflight.py
index b0c7ad66f..4b7e6e624 100755
--- a/qa/rpc-tests/maxblocksinflight.py
+++ b/qa/rpc-tests/maxblocksinflight.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
#
# Distributed under the GPLv3/X11 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/mempool_nu_activation.py b/qa/rpc-tests/mempool_nu_activation.py
index 36d8493cc..fead09452 100755
--- a/qa/rpc-tests/mempool_nu_activation.py
+++ b/qa/rpc-tests/mempool_nu_activation.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/mempool_reorg.py b/qa/rpc-tests/mempool_reorg.py
index 4c39eba46..e08b5985f 100755
--- a/qa/rpc-tests/mempool_reorg.py
+++ b/qa/rpc-tests/mempool_reorg.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/mempool_resurrect_test.py b/qa/rpc-tests/mempool_resurrect_test.py
index 3c3b1c9d7..6afad2c7d 100755
--- a/qa/rpc-tests/mempool_resurrect_test.py
+++ b/qa/rpc-tests/mempool_resurrect_test.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/mempool_spendcoinbase.py b/qa/rpc-tests/mempool_spendcoinbase.py
index 7a80d034e..ee6465868 100755
--- a/qa/rpc-tests/mempool_spendcoinbase.py
+++ b/qa/rpc-tests/mempool_spendcoinbase.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/mempool_tx_expiry.py b/qa/rpc-tests/mempool_tx_expiry.py
index 078028a6e..de43e3bf3 100755
--- a/qa/rpc-tests/mempool_tx_expiry.py
+++ b/qa/rpc-tests/mempool_tx_expiry.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/mempool_tx_input_limit.py b/qa/rpc-tests/mempool_tx_input_limit.py
index e88db77f1..28761aca8 100755
--- a/qa/rpc-tests/mempool_tx_input_limit.py
+++ b/qa/rpc-tests/mempool_tx_input_limit.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2017 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/merkle_blocks.py b/qa/rpc-tests/merkle_blocks.py
index 4d4103ed9..4de09a02d 100755
--- a/qa/rpc-tests/merkle_blocks.py
+++ b/qa/rpc-tests/merkle_blocks.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/nodehandling.py b/qa/rpc-tests/nodehandling.py
index ca5c7c319..f23e19ade 100755
--- a/qa/rpc-tests/nodehandling.py
+++ b/qa/rpc-tests/nodehandling.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/nspv_client_test.py b/qa/rpc-tests/nspv_client_test.py
index c232389f2..45606cec1 100755
--- a/qa/rpc-tests/nspv_client_test.py
+++ b/qa/rpc-tests/nspv_client_test.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
import sys
diff --git a/qa/rpc-tests/p2p-acceptblock.py b/qa/rpc-tests/p2p-acceptblock.py
index 22bc4212d..4dccdfae5 100755
--- a/qa/rpc-tests/p2p-acceptblock.py
+++ b/qa/rpc-tests/p2p-acceptblock.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
#
# Distributed under the GPLv3/X11 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/p2p_node_bloom.py b/qa/rpc-tests/p2p_node_bloom.py
index 78d4021f3..6bbb3be57 100755
--- a/qa/rpc-tests/p2p_node_bloom.py
+++ b/qa/rpc-tests/p2p_node_bloom.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/p2p_txexpiry_dos.py b/qa/rpc-tests/p2p_txexpiry_dos.py
index bde410d6a..c870f2e74 100755
--- a/qa/rpc-tests/p2p_txexpiry_dos.py
+++ b/qa/rpc-tests/p2p_txexpiry_dos.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/prioritisetransaction.py b/qa/rpc-tests/prioritisetransaction.py
index 813f5c00d..4038c7a8f 100755
--- a/qa/rpc-tests/prioritisetransaction.py
+++ b/qa/rpc-tests/prioritisetransaction.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2017 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/proxy_test.py b/qa/rpc-tests/proxy_test.py
index 15b83c66c..1a200db56 100755
--- a/qa/rpc-tests/proxy_test.py
+++ b/qa/rpc-tests/proxy_test.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2015 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/pruning.py b/qa/rpc-tests/pruning.py
index 16714e0f2..44d8c8b5d 100755
--- a/qa/rpc-tests/pruning.py
+++ b/qa/rpc-tests/pruning.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/rawtransactions.py b/qa/rpc-tests/rawtransactions.py
index 872c4f28c..b59d4f073 100755
--- a/qa/rpc-tests/rawtransactions.py
+++ b/qa/rpc-tests/rawtransactions.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/receivedby.py b/qa/rpc-tests/receivedby.py
index bb50e07f3..a2e3bc224 100755
--- a/qa/rpc-tests/receivedby.py
+++ b/qa/rpc-tests/receivedby.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/regtest_signrawtransaction.py b/qa/rpc-tests/regtest_signrawtransaction.py
index aaeb9107a..520d7fca9 100755
--- a/qa/rpc-tests/regtest_signrawtransaction.py
+++ b/qa/rpc-tests/regtest_signrawtransaction.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/reindex.py b/qa/rpc-tests/reindex.py
index 988ee21f4..f8f06d96a 100755
--- a/qa/rpc-tests/reindex.py
+++ b/qa/rpc-tests/reindex.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python2
# Copyright (c) 2014 The Bitcoin Core developers
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Released under the GPLv3
#
# Test -reindex with CheckBlockIndex
diff --git a/qa/rpc-tests/rest.py b/qa/rpc-tests/rest.py
index 2be0254c7..a1ba0cc06 100755
--- a/qa/rpc-tests/rest.py
+++ b/qa/rpc-tests/rest.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/rewind_index.py b/qa/rpc-tests/rewind_index.py
index ea971a818..65d1d2a13 100755
--- a/qa/rpc-tests/rewind_index.py
+++ b/qa/rpc-tests/rewind_index.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/rpcbind_test.py b/qa/rpc-tests/rpcbind_test.py
index 17835c694..d95e9aca5 100755
--- a/qa/rpc-tests/rpcbind_test.py
+++ b/qa/rpc-tests/rpcbind_test.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/script_test.py b/qa/rpc-tests/script_test.py
index a4d59e2bc..4175e7d13 100755
--- a/qa/rpc-tests/script_test.py
+++ b/qa/rpc-tests/script_test.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
#
# Distributed under the GPLv3/X11 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/signrawtransaction_offline.py b/qa/rpc-tests/signrawtransaction_offline.py
index f0fba1c50..84f3e1888 100755
--- a/qa/rpc-tests/signrawtransaction_offline.py
+++ b/qa/rpc-tests/signrawtransaction_offline.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/signrawtransactions.py b/qa/rpc-tests/signrawtransactions.py
index 332ffd635..a7fe196b1 100755
--- a/qa/rpc-tests/signrawtransactions.py
+++ b/qa/rpc-tests/signrawtransactions.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2015 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/smartfees.py b/qa/rpc-tests/smartfees.py
index 0d689e9e8..19c233d4f 100755
--- a/qa/rpc-tests/smartfees.py
+++ b/qa/rpc-tests/smartfees.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014-2015 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/spentindex.py b/qa/rpc-tests/spentindex.py
index f4b8f9209..2300df6c3 100755
--- a/qa/rpc-tests/spentindex.py
+++ b/qa/rpc-tests/spentindex.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014-2015 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/test_framework/authproxy.py b/qa/rpc-tests/test_framework/authproxy.py
index c4bd79c04..32cf6fd0d 100644
--- a/qa/rpc-tests/test_framework/authproxy.py
+++ b/qa/rpc-tests/test_framework/authproxy.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/test_framework/bignum.py b/qa/rpc-tests/test_framework/bignum.py
index 6b764a389..cf6e9418f 100644
--- a/qa/rpc-tests/test_framework/bignum.py
+++ b/qa/rpc-tests/test_framework/bignum.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
# This file is from python-bitcoinlib.
diff --git a/qa/rpc-tests/test_framework/blockstore.py b/qa/rpc-tests/test_framework/blockstore.py
index 9a7c2bd35..6687be3df 100644
--- a/qa/rpc-tests/test_framework/blockstore.py
+++ b/qa/rpc-tests/test_framework/blockstore.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
# BlockStore: a helper class that keeps a map of blocks and implements
diff --git a/qa/rpc-tests/test_framework/blocktools.py b/qa/rpc-tests/test_framework/blocktools.py
index 9955b4324..260ea9f62 100644
--- a/qa/rpc-tests/test_framework/blocktools.py
+++ b/qa/rpc-tests/test_framework/blocktools.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
# blocktools.py - utilities for manipulating blocks and transactions
diff --git a/qa/rpc-tests/test_framework/comptool.py b/qa/rpc-tests/test_framework/comptool.py
index a0f46d464..9daaae04e 100755
--- a/qa/rpc-tests/test_framework/comptool.py
+++ b/qa/rpc-tests/test_framework/comptool.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/test_framework/equihash.py b/qa/rpc-tests/test_framework/equihash.py
index 2f7b74c1e..e0d9cf306 100755
--- a/qa/rpc-tests/test_framework/equihash.py
+++ b/qa/rpc-tests/test_framework/equihash.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
from operator import itemgetter
diff --git a/qa/rpc-tests/test_framework/mininode.py b/qa/rpc-tests/test_framework/mininode.py
index ecbef5e24..421691bcb 100755
--- a/qa/rpc-tests/test_framework/mininode.py
+++ b/qa/rpc-tests/test_framework/mininode.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
# mininode.py - Bitcoin P2P network half-a-node
diff --git a/qa/rpc-tests/test_framework/netutil.py b/qa/rpc-tests/test_framework/netutil.py
index 07e7460ce..57baecdc0 100644
--- a/qa/rpc-tests/test_framework/netutil.py
+++ b/qa/rpc-tests/test_framework/netutil.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/test_framework/script.py b/qa/rpc-tests/test_framework/script.py
index 320151f7d..ae8a2fc7a 100644
--- a/qa/rpc-tests/test_framework/script.py
+++ b/qa/rpc-tests/test_framework/script.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
# This file is modified from python-bitcoinlib.
diff --git a/qa/rpc-tests/test_framework/socks5.py b/qa/rpc-tests/test_framework/socks5.py
index 27f67225e..9138f221b 100644
--- a/qa/rpc-tests/test_framework/socks5.py
+++ b/qa/rpc-tests/test_framework/socks5.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2015 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/test_framework/test_framework.py b/qa/rpc-tests/test_framework/test_framework.py
index 55565a2d3..8fa2876ea 100755
--- a/qa/rpc-tests/test_framework/test_framework.py
+++ b/qa/rpc-tests/test_framework/test_framework.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python2
# Copyright (c) 2014 The Bitcoin Core developers
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
# Base class for RPC testing
diff --git a/qa/rpc-tests/timestampindex.py b/qa/rpc-tests/timestampindex.py
index 17401d607..ca37d469d 100755
--- a/qa/rpc-tests/timestampindex.py
+++ b/qa/rpc-tests/timestampindex.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014-2015 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/txindex.py b/qa/rpc-tests/txindex.py
index 9424e75f9..89e5a6886 100755
--- a/qa/rpc-tests/txindex.py
+++ b/qa/rpc-tests/txindex.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014-2015 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/txn_doublespend.py b/qa/rpc-tests/txn_doublespend.py
index dd5fbafad..e3952f43e 100755
--- a/qa/rpc-tests/txn_doublespend.py
+++ b/qa/rpc-tests/txn_doublespend.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet.py b/qa/rpc-tests/wallet.py
index 27b603a14..6ef8e3162 100755
--- a/qa/rpc-tests/wallet.py
+++ b/qa/rpc-tests/wallet.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_1941.py b/qa/rpc-tests/wallet_1941.py
index f69f04d4d..678e75af4 100755
--- a/qa/rpc-tests/wallet_1941.py
+++ b/qa/rpc-tests/wallet_1941.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2016 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_addresses.py b/qa/rpc-tests/wallet_addresses.py
index 67ffa2975..53f16fcc4 100755
--- a/qa/rpc-tests/wallet_addresses.py
+++ b/qa/rpc-tests/wallet_addresses.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_anchorfork.py b/qa/rpc-tests/wallet_anchorfork.py
index cab7a6d1e..46e5cf3c5 100755
--- a/qa/rpc-tests/wallet_anchorfork.py
+++ b/qa/rpc-tests/wallet_anchorfork.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_changeindicator.py b/qa/rpc-tests/wallet_changeindicator.py
index b1027c09c..21274208a 100755
--- a/qa/rpc-tests/wallet_changeindicator.py
+++ b/qa/rpc-tests/wallet_changeindicator.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_import_export.py b/qa/rpc-tests/wallet_import_export.py
index c5c71acd8..b3a266cbe 100755
--- a/qa/rpc-tests/wallet_import_export.py
+++ b/qa/rpc-tests/wallet_import_export.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_listnotes.py b/qa/rpc-tests/wallet_listnotes.py
index 6c80b96ac..66715a682 100755
--- a/qa/rpc-tests/wallet_listnotes.py
+++ b/qa/rpc-tests/wallet_listnotes.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_listreceived.py b/qa/rpc-tests/wallet_listreceived.py
index 3cbb49aae..ca48cb975 100755
--- a/qa/rpc-tests/wallet_listreceived.py
+++ b/qa/rpc-tests/wallet_listreceived.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_mergetoaddress.py b/qa/rpc-tests/wallet_mergetoaddress.py
index ea4a3b910..0684ccf1e 100755
--- a/qa/rpc-tests/wallet_mergetoaddress.py
+++ b/qa/rpc-tests/wallet_mergetoaddress.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2017 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_nullifiers.py b/qa/rpc-tests/wallet_nullifiers.py
index 7eed3c984..d2cd485cc 100755
--- a/qa/rpc-tests/wallet_nullifiers.py
+++ b/qa/rpc-tests/wallet_nullifiers.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2016 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_overwintertx.py b/qa/rpc-tests/wallet_overwintertx.py
index fdd7a1947..6f8c03dce 100755
--- a/qa/rpc-tests/wallet_overwintertx.py
+++ b/qa/rpc-tests/wallet_overwintertx.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_persistence.py b/qa/rpc-tests/wallet_persistence.py
index 4cf2c52f0..90076b867 100755
--- a/qa/rpc-tests/wallet_persistence.py
+++ b/qa/rpc-tests/wallet_persistence.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_protectcoinbase.py b/qa/rpc-tests/wallet_protectcoinbase.py
index c6f9c2760..9f91ff55f 100755
--- a/qa/rpc-tests/wallet_protectcoinbase.py
+++ b/qa/rpc-tests/wallet_protectcoinbase.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2016 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_sapling.py b/qa/rpc-tests/wallet_sapling.py
index 7e58a08d9..9cb120583 100755
--- a/qa/rpc-tests/wallet_sapling.py
+++ b/qa/rpc-tests/wallet_sapling.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2018 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_shieldcoinbase.py b/qa/rpc-tests/wallet_shieldcoinbase.py
index befd8c5a1..4747c2c1e 100755
--- a/qa/rpc-tests/wallet_shieldcoinbase.py
+++ b/qa/rpc-tests/wallet_shieldcoinbase.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2017 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/wallet_shieldcoinbase_sapling.py b/qa/rpc-tests/wallet_shieldcoinbase_sapling.py
index 6ef6da873..aa5d733e1 100755
--- a/qa/rpc-tests/wallet_shieldcoinbase_sapling.py
+++ b/qa/rpc-tests/wallet_shieldcoinbase_sapling.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
import inspect
diff --git a/qa/rpc-tests/wallet_treestate.py b/qa/rpc-tests/wallet_treestate.py
index f3374ac21..d0dc37528 100755
--- a/qa/rpc-tests/wallet_treestate.py
+++ b/qa/rpc-tests/wallet_treestate.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2016 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/walletbackup.py b/qa/rpc-tests/walletbackup.py
index 33aa82694..f43947869 100755
--- a/qa/rpc-tests/walletbackup.py
+++ b/qa/rpc-tests/walletbackup.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/zapwallettxes.py b/qa/rpc-tests/zapwallettxes.py
index fb4b84caf..e14939fd0 100755
--- a/qa/rpc-tests/zapwallettxes.py
+++ b/qa/rpc-tests/zapwallettxes.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2014 The Bitcoin Core developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/zkey_import_export.py b/qa/rpc-tests/zkey_import_export.py
index 0fc78fd3b..b4abff512 100755
--- a/qa/rpc-tests/zkey_import_export.py
+++ b/qa/rpc-tests/zkey_import_export.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Copyright (c) 2017 The Zcash developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
diff --git a/qa/rpc-tests/zmq_test.py b/qa/rpc-tests/zmq_test.py
deleted file mode 100755
index 7ca50efb4..000000000
--- a/qa/rpc-tests/zmq_test.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python2
-# Copyright (c) 2016-2021 The Hush developers
-# Copyright (c) 2015 The Bitcoin Core developers
-# Distributed under the GPLv3 software license, see the accompanying
-# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
-# Test ZMQ interface
-
-from test_framework.test_framework import BitcoinTestFramework
-from test_framework.util import assert_equal, bytes_to_hex_str, start_nodes
-
-import zmq
-import struct
-
-class ZMQTest(BitcoinTestFramework):
-
- port = 28332
-
- def setup_nodes(self):
- self.zmqContext = zmq.Context()
- self.zmqSubSocket = self.zmqContext.socket(zmq.SUB)
- self.zmqSubSocket.setsockopt(zmq.SUBSCRIBE, b"hashblock")
- self.zmqSubSocket.setsockopt(zmq.SUBSCRIBE, b"hashtx")
- self.zmqSubSocket.connect("tcp://127.0.0.1:%i" % self.port)
- return start_nodes(4, self.options.tmpdir, extra_args=[
- ['-zmqpubhashtx=tcp://127.0.0.1:'+str(self.port), '-zmqpubhashblock=tcp://127.0.0.1:'+str(self.port)],
- [],
- [],
- []
- ])
-
- def run_test(self):
- self.sync_all()
-
- genhashes = self.nodes[0].generate(1)
- self.sync_all()
-
- print "listen..."
- msg = self.zmqSubSocket.recv_multipart()
- topic = msg[0]
- body = msg[1]
- msgSequence = struct.unpack('/dev/null)" -a "$(git rev-parse --is-inside-work-tree 2>/dev/null)" = "true" ]; then
# clean 'dirty' status of touched files that haven't been modified
git diff >/dev/null 2>/dev/null
@@ -30,8 +32,6 @@ if [ -e "$(which git 2>/dev/null)" -a "$(git rev-parse --is-inside-work-tree 2>/
SUFFIX=$(git rev-parse --short HEAD)
git diff-index --quiet HEAD -- || SUFFIX="$SUFFIX-dirty"
- # get a string like "2012-04-10 16:27:19 +0200"
- LAST_COMMIT_DATE="$(git log -n 1 --format="%ci")"
fi
if [ -n "$DESC" ]; then
@@ -45,7 +45,4 @@ fi
# only update build.h if necessary
if [ "$INFO" != "$NEWINFO" ]; then
echo "$NEWINFO" >"$FILE"
- if [ -n "$LAST_COMMIT_DATE" ]; then
- echo "#define BUILD_DATE \"$LAST_COMMIT_DATE\"" >> "$FILE"
- fi
fi
diff --git a/src/Makefile.am b/src/Makefile.am
index 3232cabdb..5fe0118c0 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,4 +1,4 @@
-# Copyright 2016-2021 The Hush developers
+# Copyright 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
@@ -53,10 +53,8 @@ LIBCRYPTOCONDITIONS=cryptoconditions/libcryptoconditions_core.la
LIBUNIVALUE=univalue/libunivalue.la
LIBZCASH=libzcash.a
LIBHUSH=libhush.a
+LIBRANDOMX=RandomX/build/librandomx.a
-if ENABLE_ZMQ
-LIBBITCOIN_ZMQ=libbitcoin_zmq.a
-endif
if BUILD_BITCOIN_LIBS
LIBZCASH_CONSENSUS=libzcashconsensus.la
endif
@@ -87,9 +85,6 @@ if ENABLE_WALLET
BITCOIN_INCLUDES += $(BDB_CPPFLAGS)
EXTRA_LIBRARIES += $(LIBBITCOIN_WALLET)
endif
-if ENABLE_ZMQ
-EXTRA_LIBRARIES += $(LIBBITCOIN_ZMQ)
-endif
lib_LTLIBRARIES = $(LIBZCASH_CONSENSUS)
@@ -230,6 +225,7 @@ BITCOIN_CORE_H = \
version.h \
wallet/asyncrpcoperation_mergetoaddress.h \
wallet/asyncrpcoperation_saplingconsolidation.h \
+ wallet/asyncrpcoperation_sweep.h \
wallet/asyncrpcoperation_sendmany.h \
wallet/asyncrpcoperation_shieldcoinbase.h \
wallet/crypter.h \
@@ -238,11 +234,7 @@ BITCOIN_CORE_H = \
wallet/rpchushwallet.h \
wallet/wallet.h \
wallet/wallet_ismine.h \
- wallet/walletdb.h \
- zmq/zmqabstractnotifier.h \
- zmq/zmqconfig.h\
- zmq/zmqnotificationinterface.h \
- zmq/zmqpublishnotifier.h
+ wallet/walletdb.h
LIBHUSH_H = \
hush/utiltls.h
@@ -326,23 +318,13 @@ libbitcoin_server_a_SOURCES = \
$(LIBZCASH_H) \
$(LIBHUSH_H)
-if ENABLE_ZMQ
-libbitcoin_zmq_a_CPPFLAGS = $(BITCOIN_INCLUDES) $(ZMQ_CFLAGS)
-libbitcoin_zmq_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
-libbitcoin_zmq_a_SOURCES = \
- zmq/zmqabstractnotifier.cpp \
- zmq/zmqnotificationinterface.cpp \
- zmq/zmqpublishnotifier.cpp
-endif
-
# wallet: hushd, but only linked when wallet enabled
libbitcoin_wallet_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
libbitcoin_wallet_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
libbitcoin_wallet_a_SOURCES = \
- zcbenchmarks.cpp \
- zcbenchmarks.h \
wallet/asyncrpcoperation_mergetoaddress.cpp \
wallet/asyncrpcoperation_saplingconsolidation.cpp \
+ wallet/asyncrpcoperation_sweep.cpp \
wallet/asyncrpcoperation_sendmany.cpp \
wallet/asyncrpcoperation_shieldcoinbase.cpp \
wallet/crypter.cpp \
@@ -498,14 +480,14 @@ hushd_LDADD = \
$(LIBBITCOIN_COMMON) \
$(LIBUNIVALUE) \
$(LIBBITCOIN_UTIL) \
- $(LIBBITCOIN_ZMQ) \
$(LIBBITCOIN_CRYPTO) \
$(LIBZCASH) \
$(LIBHUSH) \
$(LIBLEVELDB) \
$(LIBMEMENV) \
$(LIBSECP256K1) \
- $(LIBCRYPTOCONDITIONS)
+ $(LIBCRYPTOCONDITIONS) \
+ $(LIBRANDOMX)
if ENABLE_WALLET
hushd_LDADD += $(LIBBITCOIN_WALLET)
@@ -518,7 +500,6 @@ hushd_LDADD += \
$(CRYPTO_LIBS) \
$(EVENT_PTHREADS_LIBS) \
$(EVENT_LIBS) \
- $(ZMQ_LIBS) \
$(LIBBITCOIN_CRYPTO) \
$(LIBZCASH_LIBS)
@@ -583,7 +564,8 @@ wallet_utility_LDADD = \
$(CRYPTO_LIBS) \
$(LIBZCASH) \
$(LIBZCASH_LIBS)\
- $(LIBCRYPTOCONDITIONS)
+ $(LIBCRYPTOCONDITIONS) \
+ $(LIBRANDOMX)
endif
# hush-tx binary #
@@ -605,7 +587,8 @@ hush_tx_LDADD = \
$(LIBHUSH) \
$(LIBBITCOIN_CRYPTO) \
$(LIBZCASH_LIBS) \
- $(LIBCRYPTOCONDITIONS)
+ $(LIBCRYPTOCONDITIONS) \
+ $(LIBRANDOMX)
hush_tx_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)
@@ -700,13 +683,13 @@ clean-local:
check-symbols: $(bin_PROGRAMS)
if GLIBC_BACK_COMPAT
@echo "Checking glibc back compat of [$(bin_PROGRAMS)]..."
- $(AM_V_at) READELF=$(READELF) CPPFILT=$(CPPFILT) $(top_srcdir)/contrib/devtools/symbol-check.py $(bin_PROGRAMS)
+ $(AM_V_at) READELF=$(READELF) CPPFILT=$(CPPFILT) $(top_srcdir)/util/symbol-check.py $(bin_PROGRAMS)
endif
check-security: $(bin_PROGRAMS)
if HARDEN
@echo "Checking binary security of [$(bin_PROGRAMS)]..."
- $(AM_V_at) READELF=$(READELF) OBJDUMP=$(OBJDUMP) $(top_srcdir)/contrib/devtools/security-check.py $(bin_PROGRAMS)
+ $(AM_V_at) READELF=$(READELF) OBJDUMP=$(OBJDUMP) $(top_srcdir)/util/security-check.py $(bin_PROGRAMS)
endif
%.pb.cc %.pb.h: %.proto
diff --git a/src/Makefile.gtest.include b/src/Makefile.gtest.include
index 457ef6bdf..8cd1a5aba 100644
--- a/src/Makefile.gtest.include
+++ b/src/Makefile.gtest.include
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Released under the GPLv3
TESTS += hush-gtest
bin_PROGRAMS += hush-gtest
@@ -41,7 +41,6 @@ zcash_gtest_SOURCES += \
gtest/test_txid.cpp \
gtest/test_libzcash_utils.cpp \
gtest/test_proofs.cpp \
- gtest/test_paymentdisclosure.cpp \
gtest/test_pedersen_hash.cpp \
gtest/test_checkblock.cpp \
gtest/test_zip32.cpp
@@ -55,9 +54,6 @@ hush_gtest_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
hush_gtest_LDADD = -lgtest -lgmock $(LIBBITCOIN_SERVER) $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_UNIVALUE) $(LIBLEVELDB) $(LIBMEMENV) \
$(BOOST_LIBS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) $(LIBSECP256K1)
-if ENABLE_ZMQ
-zcash_gtest_LDADD += $(LIBBITCOIN_ZMQ) $(ZMQ_LIBS)
-endif
if ENABLE_WALLET
hush_gtest_LDADD += $(LIBBITCOIN_WALLET)
endif
diff --git a/src/Makefile.test-hush.include b/src/Makefile.test-hush.include
index f638ebe59..0cc4e43c1 100644
--- a/src/Makefile.test-hush.include
+++ b/src/Makefile.test-hush.include
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2021 The Hush developers
+# Copyright (c) 2016-2022 The Hush developers
# Distributed under the GPLv3 software license, see the accompanying
# file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html
@@ -12,8 +12,8 @@ hush_test_SOURCES = test-hush/main.cpp
# test-hush/test_cryptoconditions.cpp \
# test-hush/test_coinimport.cpp \
# test-hush/test_eval_bet.cpp \
-# test-hush/test_eval_notarisation.cpp \
-# test-hush/test_parse_notarisation.cpp \
+# test-hush/test_eval_notarization.cpp \
+# test-hush/test_parse_notarization.cpp \
# test-hush/test_addrman.cpp \
# test-hush/test_netbase_tests.cpp
diff --git a/src/Makefile.test.include b/src/Makefile.test.include
index 63d47cd33..775cda32a 100644
--- a/src/Makefile.test.include
+++ b/src/Makefile.test.include
@@ -125,10 +125,6 @@ test_test_bitcoin_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
test_test_bitcoin_LDADD += $(LIBZCASH_CONSENSUS) $(BDB_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(LIBZCASH) $(LIBZCASH_LIBS)
test_test_bitcoin_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(LIBTOOL_APP_LDFLAGS) -static
-if ENABLE_ZMQ
-test_test_bitcoin_LDADD += $(ZMQ_LIBS)
-endif
-
nodist_test_test_bitcoin_SOURCES = $(GENERATED_TEST_FILES)
$(BITCOIN_TESTS): $(GENERATED_TEST_FILES)
diff --git a/src/RandomX/.gitattributes b/src/RandomX/.gitattributes
new file mode 100644
index 000000000..2b93d4173
--- /dev/null
+++ b/src/RandomX/.gitattributes
@@ -0,0 +1,3 @@
+.gitignore export-ignore
+.gitattributes export-ignore
+audits export-ignore
diff --git a/src/RandomX/.gitignore b/src/RandomX/.gitignore
new file mode 100644
index 000000000..ec94c2c69
--- /dev/null
+++ b/src/RandomX/.gitignore
@@ -0,0 +1,9 @@
+bin/
+obj/
+*.user
+*.suo
+.vs
+x64/
+Release/
+Debug/
+build/
diff --git a/src/RandomX/CMakeLists.txt b/src/RandomX/CMakeLists.txt
new file mode 100644
index 000000000..f41f606b9
--- /dev/null
+++ b/src/RandomX/CMakeLists.txt
@@ -0,0 +1,236 @@
+# Copyright (c) 2019, The Monero Project
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification, are
+# permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this list of
+# conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice, this list
+# of conditions and the following disclaimer in the documentation and/or other
+# materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors may be
+# used to endorse or promote products derived from this software without specific
+# prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.5)
+
+project(RandomX)
+
+set(randomx_sources
+src/aes_hash.cpp
+src/argon2_ref.c
+src/argon2_ssse3.c
+src/argon2_avx2.c
+src/bytecode_machine.cpp
+src/cpu.cpp
+src/dataset.cpp
+src/soft_aes.cpp
+src/virtual_memory.cpp
+src/vm_interpreted.cpp
+src/allocator.cpp
+src/assembly_generator_x86.cpp
+src/instruction.cpp
+src/randomx.cpp
+src/superscalar.cpp
+src/vm_compiled.cpp
+src/vm_interpreted_light.cpp
+src/argon2_core.c
+src/blake2_generator.cpp
+src/instructions_portable.cpp
+src/reciprocal.c
+src/virtual_machine.cpp
+src/vm_compiled_light.cpp
+src/blake2/blake2b.c)
+
+if(NOT ARCH_ID)
+ # allow cross compiling
+ if(CMAKE_SYSTEM_PROCESSOR STREQUAL "")
+ set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR})
+ endif()
+ string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" ARCH_ID)
+endif()
+
+if(NOT ARM_ID)
+ set(ARM_ID "${ARCH_ID}")
+endif()
+
+if(NOT ARCH)
+ set(ARCH "default")
+endif()
+
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+ message(STATUS "Setting default build type: ${CMAKE_BUILD_TYPE}")
+endif()
+
+include(CheckCXXCompilerFlag)
+include(CheckCCompilerFlag)
+
+function(add_flag flag)
+ string(REPLACE "-" "_" supported_cxx ${flag}_cxx)
+ check_cxx_compiler_flag(${flag} ${supported_cxx})
+ if(${${supported_cxx}})
+ message(STATUS "Setting CXX flag ${flag}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE)
+ endif()
+ string(REPLACE "-" "_" supported_c ${flag}_c)
+ check_c_compiler_flag(${flag} ${supported_c})
+ if(${${supported_c}})
+ message(STATUS "Setting C flag ${flag}")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE)
+ endif()
+endfunction()
+
+# x86-64
+if(ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64")
+ list(APPEND randomx_sources
+ src/jit_compiler_x86.cpp)
+
+ if(MSVC)
+ enable_language(ASM_MASM)
+ list(APPEND randomx_sources src/jit_compiler_x86_static.asm)
+
+ set_property(SOURCE src/jit_compiler_x86_static.asm PROPERTY LANGUAGE ASM_MASM)
+
+ set_source_files_properties(src/argon2_avx2.c COMPILE_FLAGS /arch:AVX2)
+
+ set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} /DRELWITHDEBINFO")
+ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /DRELWITHDEBINFO")
+
+ add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/asm/configuration.asm
+ COMMAND powershell -ExecutionPolicy Bypass -File h2inc.ps1 ..\\src\\configuration.h > ..\\src\\asm\\configuration.asm SET ERRORLEVEL = 0
+ COMMENT "Generating configuration.asm at ${CMAKE_CURRENT_SOURCE_DIR}"
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vcxproj)
+ add_custom_target(generate-asm
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/asm/configuration.asm)
+ else()
+ list(APPEND randomx_sources src/jit_compiler_x86_static.S)
+
+ # cheat because cmake and ccache hate each other
+ set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C)
+ set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
+
+ if(ARCH STREQUAL "native")
+ add_flag("-march=native")
+ else()
+ # default build has hardware AES enabled (software AES can be selected at runtime)
+ add_flag("-maes")
+ check_c_compiler_flag(-mssse3 HAVE_SSSE3)
+ if(HAVE_SSSE3)
+ set_source_files_properties(src/argon2_ssse3.c COMPILE_FLAGS -mssse3)
+ endif()
+ check_c_compiler_flag(-mavx2 HAVE_AVX2)
+ if(HAVE_AVX2)
+ set_source_files_properties(src/argon2_avx2.c COMPILE_FLAGS -mavx2)
+ endif()
+ endif()
+ endif()
+endif()
+
+# PowerPC
+if(ARCH_ID STREQUAL "ppc64" OR ARCH_ID STREQUAL "ppc64le")
+ if(ARCH STREQUAL "native")
+ add_flag("-mcpu=native")
+ endif()
+ # PowerPC AES requires ALTIVEC (POWER7+), so it cannot be enabled in the default build
+endif()
+
+# ARMv8
+if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv8-a")
+ list(APPEND randomx_sources
+ src/jit_compiler_a64_static.S
+ src/jit_compiler_a64.cpp)
+ # cheat because cmake and ccache hate each other
+ set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
+ set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
+
+ # not sure if this check is needed
+ include(CheckIncludeFile)
+ check_include_file(asm/hwcap.h HAVE_HWCAP)
+ if(HAVE_HWCAP)
+ add_definitions(-DHAVE_HWCAP)
+ endif()
+
+ if(ARCH STREQUAL "native")
+ add_flag("-march=native")
+ else()
+ # default build has hardware AES enabled (software AES can be selected at runtime)
+ add_flag("-march=armv8-a+crypto")
+ endif()
+endif()
+
+set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path")
+
+add_library(randomx ${randomx_sources})
+
+if(TARGET generate-asm)
+ add_dependencies(randomx generate-asm)
+endif()
+
+set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_property(TARGET randomx PROPERTY CXX_STANDARD 11)
+set_property(TARGET randomx PROPERTY CXX_STANDARD_REQUIRED ON)
+set_property(TARGET randomx PROPERTY PUBLIC_HEADER src/randomx.h)
+
+include(GNUInstallDirs)
+install(TARGETS randomx
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+
+add_executable(randomx-tests
+ src/tests/tests.cpp)
+target_link_libraries(randomx-tests
+ PRIVATE randomx)
+set_property(TARGET randomx-tests PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_property(TARGET randomx-tests PROPERTY CXX_STANDARD 11)
+
+add_executable(randomx-codegen
+ src/tests/code-generator.cpp)
+target_link_libraries(randomx-codegen
+ PRIVATE randomx)
+
+set_property(TARGET randomx-codegen PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_property(TARGET randomx-codegen PROPERTY CXX_STANDARD 11)
+
+if(NOT Threads_FOUND AND UNIX AND NOT APPLE)
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
+ find_package(Threads)
+endif()
+
+add_executable(randomx-benchmark
+ src/tests/benchmark.cpp
+ src/tests/affinity.cpp)
+target_link_libraries(randomx-benchmark
+ PRIVATE randomx
+ PRIVATE ${CMAKE_THREAD_LIBS_INIT})
+
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles("
+#include
+#include
+int main() {
+ std::atomic a;
+ a.is_lock_free();
+}" HAVE_CXX_ATOMICS)
+
+if(NOT HAVE_CXX_ATOMICS)
+ target_link_libraries(randomx-benchmark
+ PRIVATE "atomic")
+endif()
+set_property(TARGET randomx-benchmark PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_property(TARGET randomx-benchmark PROPERTY CXX_STANDARD 11)
diff --git a/src/RandomX/LICENSE b/src/RandomX/LICENSE
new file mode 100644
index 000000000..b1572ae82
--- /dev/null
+++ b/src/RandomX/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2018-2019, tevador
+
+Copyright (c) 2014-2019, The Monero Project
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/RandomX/README.md b/src/RandomX/README.md
new file mode 100644
index 000000000..4c1dabb65
--- /dev/null
+++ b/src/RandomX/README.md
@@ -0,0 +1,158 @@
+# RandomX
+RandomX is a proof-of-work (PoW) algorithm that is optimized for general-purpose CPUs. RandomX uses random code execution (hence the name) together with several memory-hard techniques to minimize the efficiency advantage of specialized hardware.
+
+## Overview
+
+RandomX utilizes a virtual machine that executes programs in a special instruction set that consists of integer math, floating point math and branches. These programs can be translated into the CPU's native machine code on the fly (example: [program.asm](doc/program.asm)). At the end, the outputs of the executed programs are consolidated into a 256-bit result using a cryptographic hashing function ([Blake2b](https://blake2.net/)).
+
+RandomX can operate in two main modes with different memory requirements:
+
+* **Fast mode** - requires 2080 MiB of shared memory.
+* **Light mode** - requires only 256 MiB of shared memory, but runs significantly slower
+
+Both modes are interchangeable as they give the same results. The fast mode is suitable for "mining", while the light mode is expected to be used only for proof verification.
+
+## Documentation
+
+Full specification is available in [specs.md](doc/specs.md).
+
+Design description and analysis is available in [design.md](doc/design.md).
+
+## Audits
+
+Between May and August 2019, RandomX was audited by 4 independent security research teams:
+
+* [Trail of Bits](https://www.trailofbits.com/) (28 000 USD)
+* [X41 D-SEC](https://www.x41-dsec.de/) (42 000 EUR)
+* [Kudelski Security](https://www.kudelskisecurity.com/) (18 250 CHF)
+* [QuarksLab](https://quarkslab.com/en/) (52 800 USD)
+
+The first audit was generously funded by [Arweave](https://www.arweave.org/), one of the early adopters of RandomX. The remaining three audits were funded by donations from the [Monero community](https://ccs.getmonero.org/proposals/RandomX-audit.html). All four audits were coordinated by [OSTIF](https://ostif.org/).
+
+Final reports from all four audits are available in the [audits](audits/) directory. None of the audits found any critical vulnerabilities, but several changes in the algorithm and the code were made as a direct result of the audits. More details can be found in the [final report by OSTIF](https://ostif.org/four-audits-of-randomx-for-monero-and-arweave-have-been-completed-results/).
+
+## Build
+
+RandomX is written in C++11 and builds a static library with a C API provided by header file [randomx.h](src/randomx.h). Minimal API usage example is provided in [api-example1.c](src/tests/api-example1.c). The reference code includes a `randomx-benchmark` and `randomx-tests` executables for testing.
+
+### Linux
+
+Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended).
+
+To build optimized binaries for your machine, run:
+```
+git clone https://github.com/tevador/RandomX.git
+cd RandomX
+mkdir build && cd build
+cmake -DARCH=native ..
+make
+```
+
+To build portable binaries, omit the `ARCH` option when executing cmake.
+
+### Windows
+
+On Windows, it is possible to build using MinGW (same procedure as on Linux) or using Visual Studio (solution file is provided).
+
+### Precompiled binaries
+
+Precompiled `randomx-benchmark` binaries are available on the [Releases page](https://github.com/tevador/RandomX/releases).
+
+## Proof of work
+
+RandomX was primarily designed as a PoW algorithm for [Monero](https://www.getmonero.org/). The recommended usage is following:
+
+* The key `K` is selected to be the hash of a block in the blockchain - this block is called the 'key block'. For optimal mining and verification performance, the key should change every 2048 blocks (~2.8 days) and there should be a delay of 64 blocks (~2 hours) between the key block and the change of the key `K`. This can be achieved by changing the key when `blockHeight % 2048 == 64` and selecting key block such that `keyBlockHeight % 2048 == 0`.
+* The input `H` is the standard hashing blob with a selected nonce value.
+
+RandomX was successfully activated on the Monero network on the 30th November 2019.
+
+If you wish to use RandomX as a PoW algorithm for your cryptocurrency, please follow the [configuration guidelines](doc/configuration.md).
+
+**Note**: To achieve ASIC resistance, the key `K` must change and must not be miner-selectable. We recommend to use blockchain data as the key in a similar way to the Monero example above. If blockchain data cannot be used for some reason, use a predefined sequence of keys.
+
+### CPU performance
+The table below lists the performance of selected CPUs using the optimal number of threads (T) and large pages (if possible), in hashes per second (H/s). "CNv4" refers to the CryptoNight variant 4 (CN/R) hashrate measured using [XMRig](https://github.com/xmrig/xmrig) v2.14.1. "Fast mode" and "Light mode" are the two modes of RandomX.
+
+|CPU|RAM|OS|AES|CNv4|Fast mode|Light mode|
+|---|---|--|---|-----|------|--------------|
+Intel Core i9-9900K|32G DDR4-3200|Windows 10|hw|660 (8T)|5770 (8T)|1160 (16T)|
+AMD Ryzen 7 1700|16G DDR4-2666|Ubuntu 16.04|hw|520 (8T)|4100 (8T)|620 (16T)|
+Intel Core i7-8550U|16G DDR4-2400|Windows 10|hw|200 (4T)|1700 (4T)|350 (8T)|
+Intel Core i3-3220|4G DDR3-1333|Ubuntu 16.04|soft|42 (4T)|510 (4T)|150 (4T)|
+Raspberry Pi 3|1G LPDDR2|Ubuntu 16.04|soft|3.5 (4T)|-|20 (4T)|
+
+Note that RandomX currently includes a JIT compiler for x86-64 and ARM64. Other architectures have to use the portable interpreter, which is much slower.
+
+### GPU performance
+
+SChernykh is developing GPU mining code for RandomX. Benchmarks are included in the following repositories:
+
+* [CUDA miner](https://github.com/SChernykh/RandomX_CUDA) - NVIDIA GPUs.
+* [OpenCL miner](https://github.com/SChernykh/RandomX_OpenCL) - only for AMD Vega and AMD Polaris GPUs (uses GCN machine code).
+
+The code from the above repositories is included in the open source miner [XMRig](https://github.com/xmrig/xmrig).
+
+Note that GPUs are at a disadvantage when running RandomX since the algorithm was designed to be efficient on CPUs.
+
+# FAQ
+
+### Which CPU is best for mining RandomX?
+
+Most Intel and AMD CPUs made since 2011 should be fairly efficient at RandomX. More specifically, efficient mining requires:
+
+* 64-bit architecture
+* IEEE 754 compliant floating point unit
+* Hardware AES support ([AES-NI](https://en.wikipedia.org/wiki/AES_instruction_set) extension for x86, Cryptography extensions for ARMv8)
+* 16 KiB of L1 cache, 256 KiB of L2 cache and 2 MiB of L3 cache per mining thread
+* Support for large memory pages
+* At least 2.5 GiB of free RAM per NUMA node
+* Multiple memory channels may be required:
+ * DDR3 memory is limited to about 1500-2000 H/s per channel (depending on frequency and timings)
+ * DDR4 memory is limited to about 4000-6000 H/s per channel (depending on frequency and timings)
+
+### Does RandomX facilitate botnets/malware mining or web mining?
+
+Due to the way the algorithm works, mining malware is much easier to detect. [RandomX Sniffer](https://github.com/tevador/randomx-sniffer) is a proof of concept tool that can detect illicit mining activity on Windows.
+
+Efficient mining requires more than 2 GiB of memory, which also disqualifies many low-end machines such as IoT devices, which are often parts of large botnets.
+
+Web mining is infeasible due to the large memory requirement and the lack of directed rounding support for floating point operations in both Javascript and WebAssembly.
+
+### Since RandomX uses floating point math, does it give reproducible results on different platforms?
+
+RandomX uses only operations that are guaranteed to give correctly rounded results by the [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754) standard: addition, subtraction, multiplication, division and square root. Special care is taken to avoid corner cases such as NaN values or denormals.
+
+The reference implementation has been validated on the following platforms:
+* x86 (32-bit, little-endian)
+* x86-64 (64-bit, little-endian)
+* ARMv7+VFPv3 (32-bit, little-endian)
+* ARMv8 (64-bit, little-endian)
+* PPC64 (64-bit, big-endian)
+
+### Can FPGAs mine RandomX?
+
+RandomX generates multiple unique programs for every hash, so FPGAs cannot dynamically reconfigure their circuitry because typical FPGA takes tens of seconds to load a bitstream. It is also not possible to generate bitstreams for RandomX programs in advance due to the sheer number of combinations (there are 2512 unique programs).
+
+Sufficiently large FPGAs can mine RandomX in a [soft microprocessor](https://en.wikipedia.org/wiki/Soft_microprocessor) configuration by emulating a CPU. Under these circumstances, an FPGA will be much less efficient than a CPU or a specialized chip (ASIC).
+
+## Acknowledgements
+* [tevador](https://github.com/tevador) - author
+* [SChernykh](https://github.com/SChernykh) - contributed significantly to the design of RandomX
+* [hyc](https://github.com/hyc) - original idea of using random code execution for PoW
+* [Other contributors](https://github.com/tevador/RandomX/graphs/contributors)
+
+RandomX uses some source code from the following 3rd party repositories:
+* Argon2d, Blake2b hashing functions: https://github.com/P-H-C/phc-winner-argon2
+
+The author of RandomX declares no competing financial interest.
+
+## Donations
+
+If you'd like to use RandomX, please consider donating to help cover the development cost of the algorithm.
+
+Author's XMR address:
+```
+845xHUh5GvfHwc2R8DVJCE7BT2sd4YEcmjG8GNSdmeNsP5DTEjXd1CNgxTcjHjiFuthRHAoVEJjM7GyKzQKLJtbd56xbh7V
+```
+Total donations received: ~3.86 XMR (as of 30th August 2019). Thanks to all contributors.
diff --git a/src/RandomX/doc/configuration.md b/src/RandomX/doc/configuration.md
new file mode 100644
index 000000000..27b7c66d0
--- /dev/null
+++ b/src/RandomX/doc/configuration.md
@@ -0,0 +1,287 @@
+# RandomX configuration
+
+RandomX has 45 customizable parameters (see table below). We recommend each project using RandomX to select a unique configuration to prevent network attacks from hashpower rental services.
+
+These parameters can be modified in source file [configuration.h](../src/configuration.h).
+
+|parameter|description|default value|
+|---------|-----|-------|
+|`RANDOMX_ARGON_MEMORY`|The number of 1 KiB Argon2 blocks in the Cache| `262144`|
+|`RANDOMX_ARGON_ITERATIONS`|The number of Argon2d iterations for Cache initialization|`3`|
+|`RANDOMX_ARGON_LANES`|The number of parallel lanes for Cache initialization|`1`|
+|`RANDOMX_ARGON_SALT`|Argon2 salt|`"RandomX\x03"`|
+|`RANDOMX_CACHE_ACCESSES`|The number of random Cache accesses per Dataset item|`8`|
+|`RANDOMX_SUPERSCALAR_LATENCY`|Target latency for SuperscalarHash (in cycles of the reference CPU)|`170`|
+|`RANDOMX_DATASET_BASE_SIZE`|Dataset base size in bytes|`2147483648`|
+|`RANDOMX_DATASET_EXTRA_SIZE`|Dataset extra size in bytes|`33554368`|
+|`RANDOMX_PROGRAM_SIZE`|The number of instructions in a RandomX program|`256`|
+|`RANDOMX_PROGRAM_ITERATIONS`|The number of iterations per program|`2048`|
+|`RANDOMX_PROGRAM_COUNT`|The number of programs per hash|`8`|
+|`RANDOMX_JUMP_BITS`|Jump condition mask size in bits|`8`|
+|`RANDOMX_JUMP_OFFSET`|Jump condition mask offset in bits|`8`|
+|`RANDOMX_SCRATCHPAD_L3`|Scratchpad size in bytes|`2097152`|
+|`RANDOMX_SCRATCHPAD_L2`|Scratchpad L2 size in bytes|`262144`|
+|`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`|
+|`RANDOMX_FREQ_*` (29x)|Instruction frequencies|multiple values|
+
+Not all of the parameters can be changed safely and most parameters have some contraints on what values can be selected (checked at compile-time).
+
+**Disclaimer: The compile-time checks only prevent obviously broken configurations. Passing the checks does not imply that the configuration is safe and will not cause crashes or other issues. We recommend that each non-standard configuration is thoroughly tested before being deployed.**
+
+### RANDOMX_ARGON_MEMORY
+
+This parameter determines the amount of memory needed in the light mode. Memory is specified in KiB (1 KiB = 1024 bytes).
+
+#### Permitted values
+Integer powers of 2 in the range 8 - 2097152.
+
+#### Notes
+Lower sizes will reduce the memory-hardness of the algorithm.
+
+### RANDOMX_ARGON_ITERATIONS
+
+Determines the number of passes of Argon2 that are used to generate the Cache.
+
+#### Permitted values
+Any positive 32-bit integer.
+
+#### Notes
+The time needed to initialize the Cache is proportional to the value of this constant.
+
+### RANDOMX_ARGON_LANES
+
+The number of parallel lanes for Cache initialization.
+
+#### Permitted values
+Integers in the range 1 - 16777215.
+
+#### Notes
+This parameter determines how many threads can be used for Cache initialization.
+
+### RANDOMX_ARGON_SALT
+
+Salt value for Cache initialization.
+
+#### Permitted values
+A string of at least 8 characters.
+
+#### Note
+Every implementation should choose a unique salt value.
+
+### RANDOMX_CACHE_ACCESSES
+
+The number of random Cache access per Dataset item.
+
+#### Permitted values
+Any integer greater than 1.
+
+#### Notes
+This value directly determines the performance ratio between the 'fast' and 'light' modes.
+
+### RANDOMX_SUPERSCALAR_LATENCY
+Target latency for SuperscalarHash, in cycles of the reference CPU.
+
+#### Permitted values
+Integers in the range 1 - 10000.
+
+#### Notes
+The default value was tuned so that a high-performance superscalar CPU running at 2-4 GHz will execute SuperscalarHash in similar time it takes to load data from RAM (40-80 ns). Using a lower value will make Dataset generation (and light mode) more memory bound, while increasing this value will make Dataset generation (and light mode) more compute bound.
+
+### RANDOMX_DATASET_BASE_SIZE
+
+Dataset base size in bytes.
+
+#### Permitted values
+Integer powers of 2 in the range 64 - 4294967296 (inclusive).
+
+#### Note
+This constant affects the memory requirements in fast mode. Some values are unsafe depending on other parameters. See [Unsafe configurations](#unsafe-configurations).
+
+### RANDOMX_DATASET_EXTRA_SIZE
+
+Dataset extra size in bytes.
+
+#### Permitted values
+Non-negative integer divisible by 64.
+
+#### Note
+This constant affects the memory requirements in fast mode. Some values are unsafe depending on other parameters. See [Unsafe configurations](#unsafe-configurations).
+
+### RANDOMX_PROGRAM_SIZE
+
+The number of instructions in a RandomX program.
+
+#### Permitted values
+Positive integers divisible by 8 in the range 8 - 32768 (inclusive).
+
+#### Notes
+Smaller values will make RandomX more DRAM-latency bound, while higher values will make RandomX more compute-bound. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations).
+
+### RANDOMX_PROGRAM_ITERATIONS
+
+The number of iterations per program.
+
+#### Permitted values
+Any positive integer.
+
+#### Notes
+Time per hash increases linearly with this constant. Smaller values will increase the overhead of program compilation, while larger values may allow more time for optimizations. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations).
+
+### RANDOMX_PROGRAM_COUNT
+
+The number of programs per hash.
+
+#### Permitted values
+Any positive integer.
+
+#### Notes
+Time per hash increases linearly with this constant. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations).
+
+### RANDOMX_JUMP_BITS
+Jump condition mask size in bits.
+
+#### Permitted values
+Positive integers. The sum of `RANDOMX_JUMP_BITS` and `RANDOMX_JUMP_OFFSET` must not exceed 16.
+
+#### Notes
+This determines the jump probability of the CBRANCH instruction. The default value of 8 results in jump probability of 1/28 = 1/256. Increasing this constant will decrease the rate of jumps (and vice versa).
+
+### RANDOMX_JUMP_OFFSET
+Jump condition mask offset in bits.
+
+#### Permitted values
+Non-negative integers. The sum of `RANDOMX_JUMP_BITS` and `RANDOMX_JUMP_OFFSET` must not exceed 16.
+
+#### Notes
+Since the low-order bits of RandomX registers are slightly biased, this offset moves the condition mask to higher bits, which are less biased. Using values smaller than the default may result in a slightly lower jump probability than the theoretical value calculated from `RANDOMX_JUMP_BITS`.
+
+### RANDOMX_SCRATCHPAD_L3
+RandomX Scratchpad size in bytes.
+
+#### Permitted values
+Any integer power of 2. Must be larger than or equal to `RANDOMX_SCRATCHPAD_L2`.
+
+#### Notes
+
+The default value of 2 MiB was selected to match the typical cache/core ratio of desktop processors. Using a lower value will make RandomX more core-bound, while using larger values will make the algorithm more latency-bound. Some values are unsafe depending on other parameters. See [Unsafe configurations](#unsafe-configurations).
+
+### RANDOMX_SCRATCHPAD_L2
+
+Scratchpad L2 size in bytes.
+
+#### Permitted values
+Any integer power of 2. Must be larger than or equal to `RANDOMX_SCRATCHPAD_L1`.
+
+#### Notes
+The default value of 256 KiB was selected to match the typical per-core L2 cache size of desktop processors. Using a lower value will make RandomX more core-bound, while using larger values will make the algorithm more latency-bound.
+
+### RANDOMX_SCRATCHPAD_L1
+
+Scratchpad L1 size in bytes.
+
+#### Permitted values
+Any integer power of 2. The minimum is 64 bytes.
+
+#### Notes
+The default value of 16 KiB was selected to be about half of the per-core L1 cache size of desktop processors. Using a lower value will make RandomX more core-bound, while using larger values will make the algorithm more latency-bound.
+
+### RANDOMX_FREQ_*
+
+Instruction frequencies (per 256 instructions).
+
+#### Permitted values
+There is a total of 29 different instructions. The sum of frequencies must be equal to 256.
+
+#### Notes
+
+Making changes to the default values is not recommended. The only exceptions are the instruction pairs IROR_R/IROL_R, FADD_R/FSUB_R and FADD_M/FSUB_M, which are functionally equivalent. Example of a safe custom configuration:
+
+||default|custom|
+|-|------|------|-|
+|`RANDOMX_FREQ_IROR_R`|8|5|
+|`RANDOMX_FREQ_IROL_R`|2|5|
+
+||default|custom|
+|-|------|------|
+|`RANDOMX_FREQ_FADD_R`|16|17|
+|`RANDOMX_FREQ_FSUB_R`|16|15|
+
+||default|custom|
+|-|------|------|
+|`RANDOMX_FREQ_FADD_M`|5|4|
+|`RANDOMX_FREQ_FSUB_M`|5|6|
+
+## Unsafe configurations
+
+There are some configurations that are considered 'unsafe' because they affect the security of the algorithm against attacks. If the conditions listed below are not satisfied, the configuration is unsafe and a compilation error is emitted when building the RandomX library.
+
+These checks can be disabled by definining `RANDOMX_UNSAFE` when building RandomX, e.g. by using `-DRANDOMX_UNSAFE` command line switch in GCC or MSVC. It is not recommended to disable these checks except for testing purposes.
+
+
+### 1. Memory-time tradeoffs
+
+#### Condition
+````
+RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY * 1024 + 33554432 >= RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
+````
+
+Configurations not satisfying this condition are vulnerable to memory-time tradeoffs, which enables efficient mining in light mode.
+
+#### Solutions
+
+* Increase `RANDOMX_CACHE_ACCESSES` or `RANDOMX_ARGON_MEMORY`.
+* Decrease `RANDOMX_DATASET_BASE_SIZE` or `RANDOMX_DATASET_EXTRA_SIZE`.
+
+### 2. Insufficient Scratchpad writes
+
+#### Condition
+````
+(128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3
+````
+
+Configurations not satisfying this condition are vulnerable to Scratchpad size optimizations due to low amount of writes.
+
+#### Solutions
+
+* Increase `RANDOMX_PROGRAM_SIZE`, `RANDOMX_FREQ_ISTORE`, `RANDOMX_PROGRAM_COUNT` or `RANDOMX_PROGRAM_ITERATIONS`.
+* Decrease `RANDOMX_SCRATCHPAD_L3`.
+
+### 3. Program filtering strategies
+
+#### Condition
+```
+RANDOMX_PROGRAM_COUNT > 1
+```
+
+Configurations not satisfying this condition are vulnerable to program filtering strategies.
+
+#### Solution
+
+* Increase `RANDOMX_PROGRAM_COUNT` to at least 2.
+
+### 4. Low program entropy
+
+#### Condition
+```
+RANDOMX_PROGRAM_SIZE >= 64
+```
+
+Configurations not satisfying this condition do not have a sufficient number of instruction combinations.
+
+#### Solution
+
+* Increase `RANDOMX_PROGRAM_SIZE` to at least 64.
+
+### 5. High compilation overhead
+
+#### Condition
+```
+RANDOMX_PROGRAM_ITERATIONS >= 400
+```
+
+Configurations not satisfying this condition have a program compilation overhead exceeding 10%.
+
+#### Solution
+
+* Increase `RANDOMX_PROGRAM_ITERATIONS` to at least 400.
+
diff --git a/src/RandomX/doc/design.md b/src/RandomX/doc/design.md
new file mode 100644
index 000000000..7a1b8ef22
--- /dev/null
+++ b/src/RandomX/doc/design.md
@@ -0,0 +1,650 @@
+# RandomX design
+To minimize the performance advantage of specialized hardware, a proof of work (PoW) algorithm must achieve *device binding* by targeting specific features of existing general-purpose hardware. This is a complex task because we have to target a large class of devices with different architectures from different manufacturers.
+
+There are two distinct classes of general processing devices: central processing units (CPUs) and graphics processing units (GPUs). RandomX targets CPUs for the following reasons:
+
+* CPUs, being less specialized devices, are more prevalent and widely accessible. A CPU-bound algorithm is more egalitarian and allows more participants to join the network. This is one of the goals stated in the original CryptoNote whitepaper [[1](https://cryptonote.org/whitepaper.pdf)].
+* A large common subset of native hardware instructions exists among different CPU architectures. The same cannot be said about GPUs. For example, there is no common integer multiplication instruction for NVIDIA and AMD GPUs [[2](https://github.com/ifdefelse/ProgPOW/issues/16)].
+* All major CPU instruction sets are well documented with multiple open source compilers available. In comparison, GPU instruction sets are usually proprietary and may require vendor specific closed-source drivers for maximum performance.
+
+## 1. Design considerations
+
+The most basic idea of a CPU-bound proof of work is that the "work" must be dynamic. This takes advantage of the fact that CPUs accept two kinds of inputs: *data* (the main input) and *code* (which specifies what to perform with the data).
+
+Conversely, typical cryptographic hashing functions [[3](https://en.wikipedia.org/wiki/Cryptographic_hash_function)] do not represent suitable work for the CPU because their only input is *data*, while the sequence of operations is fixed and can be performed more efficiently by a specialized integrated circuit.
+
+### 1.1 Dynamic proof of work
+
+A dynamic proof of work algorithm can generally consist of the following 4 steps:
+
+1) Generate a random program.
+2) Translate it into the native machine code of the CPU.
+3) Execute the program.
+4) Transform the output of the program into a cryptographically secure value.
+
+The actual 'useful' CPU-bound work is performed in step 3, so the algorithm must be tuned to minimize the overhead of the remaining steps.
+
+#### 1.1.1 Generating a random program
+
+Early attempts at a dynamic proof of work design were based on generating a program in a high-level language, such as C or Javascript [[4](https://github.com/hyc/randprog), [5](https://github.com/tevador/RandomJS)]. However, this is very inefficient for two main reasons:
+
+* High level languages have a complex syntax, so generating a valid program is relatively slow since it requires the creation of an abstract syntax tree (ASL).
+* Once the source code of the program is generated, the compiler will generally parse the textual representation back into the ASL, which makes the whole process of generating source code redundant.
+
+The fastest way to generate a random program is to use a *logic-less* generator - simply filling a buffer with random data. This of course requires designing a syntaxless programming language (or instruction set) in which all random bit strings represent valid programs.
+
+#### 1.1.2 Translating the program into machine code
+
+This step is inevitable because we don't want to limit the algorithm to a specific CPU architecture. In order to generate machine code as fast as possible, we need our instruction set to be as close to native hardware as possible, while still generic enough to support different architectures. There is not enough time for expensive optimizations during code compilation.
+
+#### 1.1.3 Executing the program
+
+The actual program execution should utilize as many CPU components as possible. Some of the features that should be utilized in the program are:
+
+* multi-level caches (L1, L2, L3)
+* μop cache [[6](https://en.wikipedia.org/wiki/CPU_cache#Micro-operation_(%CE%BCop_or_uop)_cache)]
+* arithmetic logic unit (ALU)
+* floating point unit (FPU)
+* memory controller
+* instruction level parallelism [[7](https://en.wikipedia.org/wiki/Instruction-level_parallelism)]
+ * superscalar execution [[8](https://en.wikipedia.org/wiki/Superscalar_processor)]
+ * out-of-order execution [[9](https://en.wikipedia.org/wiki/Out-of-order_execution)]
+ * speculative execution [[10](https://en.wikipedia.org/wiki/Speculative_execution)]
+ * register renaming [[11](https://en.wikipedia.org/wiki/Register_renaming)]
+
+Chapter 2 describes how the RandomX VM takes advantages of these features.
+
+#### 1.1.4 Calculating the final result
+
+Blake2b [[12](https://blake2.net/)] is a cryptographically secure hashing function that was specifically designed to be fast in software, especially on modern 64-bit processors, where it's around three times faster than SHA-3 and can run at a speed of around 3 clock cycles per byte of input. This function is an ideal candidate to be used in a CPU-friendly proof of work.
+
+For processing larger amounts of data in a cryptographically secure way, the Advanced Encryption Standard (AES) [[13](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard)] can provide the fastest processing speed because many modern CPUs support hardware acceleration of these operations. See chapter 3 for more details about the use of AES in RandomX.
+
+### 1.2 The "Easy program problem"
+
+When a random program is generated, one may choose to execute it only when it's favorable. This strategy is viable for two main reasons:
+
+1. The runtime of randomly generated programs typically follows a log-normal distribution [[14](https://en.wikipedia.org/wiki/Log-normal_distribution)] (also see Appendix C). A generated program may be quickly analyzed and if it's likely to have above-average runtime, program execution may be skipped and a new program may be generated instead. This can significantly boost performance especially in case the runtime distribution has a heavy tail (many long-running outliers) and if program generation is cheap.
+2. An implementation may choose to optimize for a subset of the features required for program execution. For example, the support for some operations (such as division) may be dropped or some instruction sequences may be implemented more efficiently. Generated programs would then be analyzed and be executed only if they match the specific requirements of the optimized implementation.
+
+These strategies of searching for programs of particular properties deviate from the objectives of this proof of work, so they must be eliminated. This can be achieved by requiring a sequence of *N* random programs to be executed such that each program is generated from the output of the previous one. The output of the final program is then used as the result.
+
+```
+ +---------------+ +---------------+ +---------------+ +---------------+
+ | | | | | | | |
+input --> | program 1 | --> | program 2 | --> ... --> | program (N-1) | --> | program N | --> result
+ | | | | | | | |
+ +---------------+ +---------------+ +---------------+ +---------------+
+```
+
+The principle is that after the first program is executed, a miner has to either commit to finishing the whole chain (which may include unfavorable programs) or start over and waste the effort expended on the unfinished chain. Examples of how this affects the hashrate of different mining strategies are given in Appendix A.
+
+Additionally, this chained program execution has the benefit of equalizing the runtime for the whole chain since the relative deviation of a sum of identically distributed runtimes is decreased.
+
+### 1.3 Verification time
+
+Since the purpose of the proof of work is to be used in a trustless peer-to-peer network, network participants must be able to quickly verify if a proof is valid or not. This puts an upper bound on the complexity of the proof of work algorithm. In particular, we set a goal for RandomX to be at least as fast to verify as the CryptoNight hash function [[15](https://cryptonote.org/cns/cns008.txt)], which it aims to replace.
+
+### 1.4 Memory-hardness
+
+Besides pure computational resources, such as ALUs and FPUs, CPUs usually have access to a large amount of memory in the form of DRAM [[16](https://en.wikipedia.org/wiki/Dynamic_random-access_memory)]. The performance of the memory subsystem is typically tuned to match the compute capabilities, for example [[17](https://en.wikipedia.org/wiki/Multi-channel_memory_architecture)]:
+
+* single channel memory for embedded and low power CPUs
+* dual channel memory for desktop CPUs
+* triple or quad channel memory for workstation CPUs
+* six or eight channel memory for high-end server CPUs
+
+In order to utilize the external memory as well as the on-chip memory controllers, the proof of work algorithm should access a large memory buffer (called the "Dataset"). The Dataset must be:
+
+1. larger than what can be stored on-chip (to require external memory)
+2. dynamic (to require writable memory)
+
+The maximum amount of SRAM that can be put on a single chip is more than 512 MiB for a 16 nm process and more than 2 GiB for a 7 nm process [[18](https://www.grin-forum.org/t/obelisk-grn1-chip-details/4571)]. Ideally, the size of the Dataset should be at least 4 GiB. However, due to constraints on the verification time (see below), the size used by RandomX was selected to be 2080 MiB. While a single chip can theoretically be made with this amount of SRAM using current technology (7 nm in 2019), the feasibility of such solution is questionable, at least in the near future.
+
+#### 1.4.1 Light-client verification
+
+While it's reasonable to require >2 GiB for dedicated mining systems that solve the proof of work, an option must be provided for light clients to verify the proof using a much lower amount of memory.
+
+The ratio of memory required for the 'fast' and 'light' modes must be chosen carefully not to make the light mode viable for mining. In particular, the area-time (AT) product of the light mode should not be smaller than the AT product of the fast mode. Reduction of the AT product is a common way of measuring tradeoff attacks [[19](https://eprint.iacr.org/2015/227.pdf)].
+
+Given the constraints described in the previous chapters, the maximum possible performance ratio between the fast and the light verification modes was empirically determined to be 8. This is because:
+
+1. Further increase of the light verification time would violate the constraints set out in chapter 1.3.
+2. Further decrease of the fast mode runtime would violate the constraints set out in chapter 1.1, in particular the overhead time of program generation and result calculation would become too high.
+
+Additionally, 256 MiB was selected as the maximum amount of memory that can be required in the light-client mode. This amount is acceptable even for small single-board computers such as the Raspberry Pi.
+
+To keep a constant memory-time product, the maximum fast-mode memory requirement is:
+```
+8 * 256 MiB = 2048 MiB
+```
+This can be further increased since the light mode requires additional chip area for the SuperscalarHash function (see chapter 3.4 and chapter 6 of the Specification). Assuming a conservative estimate of 0.2 mm2 per SuperscalarHash core and DRAM density of 0.149 Gb/mm2 [[20](http://en.thelec.kr/news/articleView.html?idxno=20)], the additional memory is:
+
+```
+8 * 0.2 * 0.149 * 1024 / 8 = 30.5 MiB
+```
+or 32 MiB when rounded to the nearest power of 2. The total memory requirement of the fast mode can be 2080 MiB with a roughly constant AT product.
+
+## 2. Virtual machine architecture
+
+This section describes the design of the RandomX virtual machine (VM).
+
+### 2.1 Instruction set
+
+RandomX uses a fixed-length instruction encoding with 8 bytes per instruction. This allows a 32-bit immediate value to be included in the instruction word. The interpretation of the instruction word bits was chosen so that any 8-byte word is a valid instruction. This allows for very efficient random program generation (see chapter 1.1.1).
+
+#### 2.1.1 Instruction complexity
+
+The VM is a complex instruction set machine that allows both register and memory addressed operands. However, each RandomX instructions translates to only 1-7 x86 instructions (1.8 on average). It is important to keep the instruction complexity relatively low to minimize the efficiency advantage of specialized hardware with a tailored instruction set.
+
+### 2.2 Program
+
+The program executed by the VM has the form of a loop consisting of 256 random instructions.
+
+* 256 instructions is long enough to provide a large number of possible programs and enough space for branches. The number of different programs that can be generated is limited to 2512 = 1.3e+154, which is the number of possible seed values of the random generator.
+* 256 instructions is short enough so that high-performance CPUs can execute one iteration in similar time it takes to fetch data from DRAM. This is advantageous because it allows Dataset accesses to be synchronized and fully prefetchable (see chapter 2.9).
+* Since the program is a loop, it can take advantage of the μop cache [[6](https://en.wikipedia.org/wiki/CPU_cache#Micro-operation_(%CE%BCop_or_uop)_cache)] that is present in some x86 CPUs. Running a loop from the μop cache allows the CPU to power down the x86 instruction decoders, which should help to equalize the power efficiency between x86 and architectures with simple instruction decoding.
+
+### 2.3 Registers
+
+The VM uses 8 integer registers and 12 floating point registers. This is the maximum that can be allocated as physical registers in x86-64, which has the fewest architectural registers among common 64-bit CPU architectures. Using more registers would put x86 CPUs at a disadvantage since they would have to use memory to store VM register contents.
+
+### 2.4 Integer operations
+
+RandomX uses all primitive integer operations that have high output entropy: addition (IADD_RS, IADD_M), subtraction (ISUB_R, ISUB_M, INEG_R), multiplication (IMUL_R, IMUL_M, IMULH_R, IMULH_M, ISMULH_R, ISMULH_M, IMUL_RCP), exclusive or (IXOR_R, IXOR_M) and rotation (IROR_R, IROL_R).
+
+#### 2.4.1 IADD_RS
+
+The IADD_RS instruction utilizes the address calculation logic of CPUs and can be performed in a single hardware instruction by most CPUs (x86 `lea`, arm `add`).
+
+#### 2.4.2 IMUL_RCP
+
+Because integer division is not fully pipelined in CPUs and can be made faster in ASICs, the IMUL_RCP instruction requires only one division per program to calculate the reciprocal. This forces an ASIC to include a hardware divider without giving them a performance advantage during program execution.
+
+#### 2.4.3 IROR_R/IROL_R
+
+Rotation instructions are split between rotate right and rotate left with a 4:1 ratio. Rotate right has a higher frequency because some architecures (like ARM) don't support rotate left natively (it must be emulated using rotate right).
+
+#### 2.4.4 ISWAP_R
+
+This instruction can be executed efficiently by CPUs that support register renaming/move elimination.
+
+### 2.5 Floating point operations
+
+RandomX uses double precision floating point operations, which are supported by the majority of CPUs and require more complex hardware than single precision. All operations are performed as 128-bit vector operations, which is also supported by all major CPU architectures.
+
+RandomX uses five operations that are guaranteed by the IEEE 754 standard to give correctly rounded results: addition, subtraction, multiplication, division and square root. All 4 rounding modes defined by the standard are used.
+
+#### 2.5.1 Floating point register groups
+
+The domains of floating point operations are separated into "additive" operations, which use register group F and "multiplicative" operations, which use register group E. This is done to prevent addition/subtraction from becoming no-op when a small number is added to a large number. Since the range of the F group registers is limited to around `±3.0e+14`, adding or subtracting a floating point number with absolute value larger than 1 always changes at least 5 fraction bits.
+
+Because the limited range of group F registers would allow the use of a more efficient fixed-point representation (with 80-bit numbers), the FSCAL instruction manipulates the binary representation of the floating point format to make this optimization more difficult.
+
+Group E registers are restricted to positive values, which avoids `NaN` results (such as square root of a negative number or `0 * ∞`). Division uses only memory source operand to avoid being optimized into multiplication by constant reciprocal. The exponent of group E memory operands is set to a value between -255 and 0 to avoid division and multiplication by 0 and to increase the range of numbers that can be obtained. The approximate range of possible group E register values is `1.7E-77` to `infinity`.
+
+Approximate distribution of floating point register values at the end of each program loop is shown in these figures (left - group F, right - group E):
+
+
+
+*(Note: bins are marked by the left-side value of the interval, e.g. bin marked `1e-40` contains values from `1e-40` to `1e-20`.)*
+
+The small number of F register values at `1e+14` is caused by the FSCAL instruction, which significantly increases the range of the register values.
+
+Group E registers cover a very large range of values. About 2% of programs produce at least one `infinity` value.
+
+To maximize entropy and also to fit into one 64-byte cache line, floating point registers are combined using the XOR operation at the end of each iteration before being stored into the Scratchpad.
+
+### 2.6 Branches
+
+Modern CPUs invest a lot of die area and energy to handle branches. This includes:
+
+* Branch predictor unit [[21](https://en.wikipedia.org/wiki/Branch_predictor)]
+* Checkpoint/rollback states that allow the CPU to recover in case of a branch misprediction.
+
+To take advantage of speculative designs, the random programs should contain branches. However, if branch prediction fails, the speculatively executed instructions are thrown away, which results in a certain amount of wasted energy with each misprediction. Therefore we should aim to minimize the number of mispredictions.
+
+Additionally, branches in the code are essential because they significantly reduce the amount of static optimizations that can be made. For example, consider the following x86 instruction sequence:
+```asm
+ ...
+branch_target_00:
+ ...
+ xor r8, r9
+ test r10, 2088960
+ je branch_target_00
+ xor r8, r9
+ ...
+```
+The XOR operations would normally cancel out, but cannot be optimized away due to the branch because the result will be different if the branch is taken. Similarly, the ISWAP_R instruction could be always statically optimized out if it wasn't for branches.
+
+In general, random branches must be designed in such way that:
+
+1. Infinite loops are not possible.
+1. The number of mispredicted branches is small.
+1. Branch condition depends on a runtime value to disable static branch optimizations.
+
+#### 2.6.1 Branch prediction
+
+Unfortunately, we haven't found a way how to utilize branch prediction in RandomX. Because RandomX is a consensus protocol, all the rules must be set out in advance, which includes the rules for branches. Fully predictable branches cannot depend on the runtime value of any VM register (since register values are pseudorandom and unpredictable), so they would have to be static and therefore easily optimizable by specialized hardware.
+
+#### 2.6.2 CBRANCH instruction
+
+RandomX therefore uses random branches with a jump probability of 1/256 and branch condition that depends on an integer register value. These branches will be predicted as "not taken" by the CPU. Such branches are "free" in most CPU designs unless they are taken. While this doesn't take advantage of the branch predictors, speculative designs will see a significant performance boost compared to non-speculative branch handling - see Appendix B for more information.
+
+The branching conditions and jump targets are chosen in such way that infinite loops in RandomX code are impossible because the register controlling the branch will never be modified in the repeated code block. Each CBRANCH instruction can jump up to twice in a row. Handling CBRANCH using predicated execution [[22](https://en.wikipedia.org/wiki/Predication_(computer_architecture))] is impractical because the branch is not taken most of the time.
+
+### 2.7 Instruction-level parallelism
+
+CPUs improve their performance using several techniques that utilize instruction-level parallelism of the executed code. These techniques include:
+
+* Having multiple execution units that can execute operations in parallel (*superscalar execution*).
+* Executing instruction not in program order, but in the order of operand availability (*out-of-order execution*).
+* Predicting which way branches will go to enhance the benefits of both superscalar and out-of-order execution.
+
+RandomX benefits from all these optimizations. See Appendix B for a detailed analysis.
+
+### 2.8 Scratchpad
+
+The Scratchpad is used as read-write memory. Its size was selected to fit entirely into CPU cache.
+
+#### 2.8.1 Scratchpad levels
+
+The Scratchpad is split into 3 levels to mimic the typical CPU cache hierarchy [[23](https://en.wikipedia.org/wiki/CPU_cache)]. Most VM instructions access "L1" and "L2" Scratchpad because L1 and L2 CPU caches are located close to the CPU execution units and provide the best random access latency. The ratio of reads from L1 and L2 is 3:1, which matches the inverse ratio of typical latencies (see table below).
+
+|CPU μ-architecture|L1 latency|L2 latency|L3 latency|source|
+|----------------|----------|----------|----------|------|
+ARM Cortex A55|2|6|-|[[24](https://www.anandtech.com/show/11441/dynamiq-and-arms-new-cpus-cortex-a75-a55/4)]
+|AMD Zen+|4|12|40|[[25](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)]|
+|Intel Skylake|4|12|42|[[26](https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Memory_Hierarchy)]
+
+The L3 cache is much larger and located further from the CPU core. As a result, its access latencies are much higher and can cause stalls in program execution.
+
+RandomX therefore performs only 2 random accesses into "L3" Scratchpad per program iteration (steps 2 and 3 in chapter 4.6.2 of the Specification). Register values from a given iteration are written into the same locations they were loaded from, which guarantees that the required cache lines have been moved into the faster L1 or L2 caches.
+
+Additionally, integer instructions that read from a fixed address also use the whole "L3" Scratchpad (Table 5.1.4 of the Specification) because repetitive accesses will ensure that the cache line will be placed in the L1 cache of the CPU. This shows that the Scratchpad level doesn't always directly correspond to the same CPU cache level.
+
+#### 2.8.2 Scratchpad writes
+
+There are two ways the Scratchpad is modified during VM execution:
+
+1. At the end of each program iteration, all register values are written into "L3" Scratchpad (see Specification chapter 4.6.2, steps 9 and 11). This writes a total of 128 bytes per iteration in two 64-byte blocks.
+2. The ISTORE instruction does explicit stores. On average, there are 16 stores per program, out of which 2 stores are into the "L3" level. Each ISTORE instruction writes 8 bytes.
+
+The image below shows an example of the distribution of writes to the Scratchpad. Each pixel in the image represents 8 bytes of the Scratchpad. Red pixels represent portions of the Scratchpad that have been overwritten at least once during hash calculation. The "L1" and "L2" levels are on the left side (almost completely overwritten). The right side of the scratchpad represents the bottom 1792 KiB. Only about 66% of it are overwritten, but the writes are spread uniformly and randomly.
+
+
+
+See Appendix D for the analysis of Scratchpad entropy.
+
+#### 2.8.3 Read-write ratio
+
+Programs make, on average, 39 reads (instructions IADD_M, ISUB_M, IMUL_M, IMULH_M, ISMULH_M, IXOR_M, FADD_M, FSUB_M, FDIV_M) and 16 writes (instruction ISTORE) to the Scratchpad per program iteration. Additional 128 bytes are read and written implicitly to initialize and store register values. 64 bytes of data is read from the Dataset per iteration. In total:
+
+* The average amount of data read from memory per program iteration is: 39 * 8 + 128 + 64 = **504 bytes**.
+* The average mount of data written to memory per program iteration is: 16 * 8 + 128 = **256 bytes**.
+
+This is close to a 2:1 read/write ratio, which CPUs are optimized for.
+
+### 2.9 Dataset
+
+Since the Scratchpad is usually stored in the CPU cache, only Dataset accesses utilize the memory controllers.
+
+RandomX randomly reads from the Dataset once per program iteration (16384 times per hash result). Since the Dataset must be stored in DRAM, it provides a natural parallelization limit, because DRAM cannot do more than about 25 million random accesses per second per bank group. Each separately addressable bank group allows a throughput of around 1500 H/s.
+
+All Dataset accesses read one CPU cache line (64 bytes) and are fully prefetched. The time to execute one program iteration described in chapter 4.6.2 of the Specification is about the same as typical DRAM access latency (50-100 ns).
+
+#### 2.9.1 Cache
+
+The Cache, which is used for light verification and Dataset construction, is about 8 times smaller than the Dataset. To keep a constant area-time product, each Dataset item is constructed from 8 random Cache accesses.
+
+Because 256 MiB is small enough to be included on-chip, RandomX uses a custom high-latency, high-power mixing function ("SuperscalarHash") which defeats the benefits of using low-latency memory and the energy required to calculate SuperscalarHash makes light mode very inefficient for mining (see chapter 3.4).
+
+Using less than 256 MiB of memory is not possible due to the use of tradeoff-resistant Argon2d with 3 iterations. When using 3 iterations (passes), halving the memory usage increases computational cost 3423 times for the best tradeoff attack [[27](https://eprint.iacr.org/2015/430.pdf)].
+
+## 3. Custom functions
+
+### 3.1 AesGenerator1R
+
+AesGenerator1R was designed for the fastest possible generation of pseudorandom data to fill the Scratchpad. It takes advantage of hardware accelerated AES in modern CPUs. Only one AES round is performed per 16 bytes of output, which results in throughput exceeding 20 GB/s in most modern CPUs.
+
+AesGenerator1R gives a good output distribution provided that it's initialized with a sufficiently 'random' initial state (see Appendix F).
+
+### 3.2 AesGenerator4R
+
+AesGenerator4R uses 4 AES rounds to generate pseudorandom data for Program Buffer initialization. Since 2 AES rounds are sufficient for full avalanche of all input bits [[28](https://csrc.nist.gov/csrc/media/projects/cryptographic-standards-and-guidelines/documents/aes-development/rijndael-ammended.pdf)], AesGenerator4R has excellent statistical properties (see Appendix F) while maintaining very good performance.
+
+The reversible nature of this generator is not an issue since the generator state is always initialized using the output of a non-reversible hashing function (Blake2b).
+
+### 3.3 AesHash1R
+
+AesHash was designed for the fastest possible calculation of the Scratchpad fingerprint. It interprets the Scratchpad as a set of AES round keys, so it's equivalent to AES encryption with 32768 rounds. Two extra rounds are performed at the end to ensure avalanche of all Scratchpad bits in each lane.
+
+The reversible nature of AesHash1R is not a problem for two main reasons:
+
+* It is not possible to directly control the input of AesHash1R.
+* The output of AesHash1R is passed into the Blake2b hashing function, which is not reversible.
+
+### 3.4 SuperscalarHash
+
+SuperscalarHash was designed to burn as much power as possible while the CPU is waiting for data to be loaded from DRAM. The target latency of 170 cycles corresponds to the usual DRAM latency of 40-80 ns and clock frequency of 2-4 GHz. ASIC devices designed for light-mode mining with low-latency memory will be bottlenecked by SuperscalarHash when calculating Dataset items and their efficiency will be destroyed by the high power usage of SuperscalarHash.
+
+The average SuperscalarHash function contains a total of 450 instructions, out of which 155 are 64-bit multiplications. On average, the longest dependency chain is 95 instructions long. An ASIC design for light-mode mining, with 256 MiB of on-die memory and 1-cycle latency for all operations, will need on average 95 * 8 = 760 cycles to construct a Dataset item, assuming unlimited parallelization. It will have to execute 155 * 8 = 1240 64-bit multiplications per item, which will consume energy comparable to loading 64 bytes from DRAM.
+
+## Appendix
+
+### A. The effect of chaining VM executions
+
+Chapter 1.2 describes why `N` random programs are chained to prevent mining strategies that search for 'easy' programs. RandomX uses a value of `N = 8`.
+
+Let's define `Q` as the ratio of acceptable programs in a strategy that uses filtering. For example `Q = 0.75` means that 25% of programs are rejected.
+
+For `N = 1`, there are no wasted program executions and the only cost is program generation and the filtering itself. The calculations below assume that these costs are zero and the only real cost is program execution. However, this is a simplification because program generation in RandomX is not free (the first program generation requires full Scratchpad initialization), but it describes a best-case scenario for an attacker.
+
+
+ For `N > 1`, the first program can be filtered as usual, but after the program is executed, there is a chance of `1-Q` that the next program should be rejected and we have wasted one program execution.
+
+For `N` chained executions, the chance is only QN that all programs in the chain are acceptable. However, during each attempt to find such chain, we will waste the execution of some programs. For `N = 8`, the number of wasted programs per attempt is equal to (1-Q)*(1+2\*Q+3\*Q2+4\*Q3+5\*Q4+6\*Q5+7\*Q6) (approximately 2.5 for `Q = 0.75`).
+
+Let's consider 3 mining strategies:
+
+#### Strategy I
+
+Honest miner that doesn't reject any programs (`Q = 1`).
+
+#### Strategy II
+
+Miner that uses optimized custom hardware that cannot execute 25% of programs (`Q = 0.75`), but supported programs can be executed 50% faster.
+
+#### Strategy III
+
+Miner that can execute all programs, but rejects 25% of the slowest programs for the first program in the chain. This gives a 5% performance boost for the first program in the chain (this matches the runtime distribution from Appendix C).
+
+#### Results
+
+The table below lists the results for the above 3 strategies and different values of `N`. The columns **N(I)**, **N(II)** and **N(III)** list the number of programs that each strategy has to execute on average to get one valid hash result (this includes programs wasted in rejected chains). Columns **Speed(I)**, **Speed(II)** and **Speed(III)** list the average mining performance relative to strategy I.
+
+|N|N(I)|N(II)|N(III)|Speed(I)|Speed(II)|Speed(III)|
+|---|----|----|----|---------|---------|---------|
+|1|1|1|1|1.00|1.50|1.05|
+|2|2|2.3|2|1.00|1.28|1.02|
+|4|4|6.5|4|1.00|0.92|1.01|
+|8|8|27.0|8|1.00|0.44|1.00|
+
+For `N = 8`, strategy II will perform at less than half the speed of the honest miner despite having a 50% performance advantage for selected programs. The small statistical advantage of strategy III is negligible with `N = 8`.
+
+### B. Performance simulation
+
+As discussed in chapter 2.7, RandomX aims to take advantage of the complex design of modern high-performance CPUs. To evaluate the impact of superscalar, out-of-order and speculative execution, we performed a simplified CPU simulation. Source code is available in [perf-simulation.cpp](../src/tests/perf-simulation.cpp).
+
+#### CPU model
+
+The model CPU uses a 3-stage pipeline to achieve an ideal throughput of 1 instruction per cycle:
+```
+ (1) (2) (3)
++------------------+ +----------------+ +----------------+
+| Instruction | | | | |
+| fetch | ---> | Memory access | ---> | Execute |
+| + decode | | | | |
++------------------+ +----------------+ +----------------+
+```
+The 3 stages are:
+
+1. Instruction fetch and decode. This stage loads the instruction from the Program Buffer and decodes the instruction operation and operands.
+2. Memory access. If this instruction uses a memory operand, it is loaded from the Scratchpad in this stage. This includes the calculation of the memory address. Stores are also performed in this stage. The value of the address register must be available in this stage.
+3. Execute. This stage executes the instruction using the operands retrieved in the previous stages and writes the results into the register file.
+
+Note that this is an optimistically short pipeline that would not allow very high clock speeds. Designs using a longer pipeline would significantly increase the benefits of speculative execution.
+
+#### Superscalar execution
+
+Our model CPU contains two kinds of components:
+
+* Execution unit (EXU) - it is used to perform the actual integer or floating point operation. All RandomX instructions except ISTORE must use an execution unit in the 3rd pipeline stage. All operations are considered to take only 1 clock cycle.
+* Memory unit (MEM) - it is used for loads and stores into Scratchpad. All memory instructions (including ISTORE) use a memory unit in the 2nd pipeline stage.
+
+A superscalar design will contain multiple execution or memory units to improve performance.
+
+#### Out-of-order execution
+
+The simulation model supports two designs:
+
+1. **In-order** - all instructions are executed in the order they appear in the Program Buffer. This design will stall if a dependency is encountered or the required EXU/MEM unit is not available.
+2. **Out-of-order** - doesn't execute instructions in program order, but an instruction can be executed when its operands are ready and the required EXU/MEM units are available.
+
+#### Branch handling
+
+The simulation model supports two types of branch handling:
+
+1. **Non-speculative** - when a branch is encountered, the pipeline is stalled. This typically adds a 3-cycle penalty for each branch.
+2. **Speculative** - all branches are predicted not taken and the pipeline is flushed if a misprediction occurs (probability of 1/256).
+
+#### Results
+
+The following 10 designs were simulated and the average number of clock cycles to execute a RandomX program (256 instructions) was measured.
+
+|design|superscalar config.|reordering|branch handling|execution time [cycles]|IPC|
+|-------|-----------|----------|---------------|-----------------------|---|
+|#1|1 EXU + 1 MEM|in-order|non-speculative|293|0.87|
+|#2|1 EXU + 1 MEM|in-order|speculative|262|0.98|
+|#3|2 EXU + 1 MEM|in-order|non-speculative|197|1.3|
+|#4|2 EXU + 1 MEM|in-order|speculative|161|1.6|
+|#5|2 EXU + 1 MEM|out-of-order|non-speculative|144|1.8|
+|#6|2 EXU + 1 MEM|out-of-order|speculative|122|2.1|
+|#7|4 EXU + 2 MEM|in-order|non-speculative|135|1.9|
+|#8|4 EXU + 2 MEM|in-order|speculative|99|2.6|
+|#9|4 EXU + 2 MEM|out-of-order|non-speculative|89|2.9|
+|#10|4 EXU + 2 MEM|out-of-order|speculative|64|4.0|
+
+The benefits of superscalar, out-of-order and speculative designs are clearly demonstrated.
+
+### C. RandomX runtime distribution
+
+Runtime numbers were measured on AMD Ryzen 7 1700 running at 3.0 GHz using 1 core. Source code to measure program execution and verification times is available in [runtime-distr.cpp](../src/tests/runtime-distr.cpp). Source code to measure the performance of the x86 JIT compiler is available in [jit-performance.cpp](../src/tests/jit-performance.cpp).
+
+#### Fast mode - program execution
+
+The following figure shows the distribution of the runtimes of a single VM program (in fast mode). This includes: program generation, JIT compilation, VM execution and Blake2b hash of the register file. Program generation and JIT compilation was measured to take 3.6 μs per program.
+
+
+
+AMD Ryzen 7 1700 can calculate 625 hashes per second in fast mode (using 1 thread), which means a single hash result takes 1600 μs (1.6 ms). This consists of (approximately):
+
+* 1480 μs for VM execution (8 programs)
+* 45 μs for initial Scratchpad fill (AesGenerator1R).
+* 45 μs for final Scratchpad hash (AesHash1R).
+* 30 μs for program generation and JIT compilation (8 programs)
+
+This gives a total overhead of 7.5% (time per hash spent not executing VM).
+
+#### Light mode - verification time
+
+The following figure shows the distribution of times to calculate 1 hash result using the light mode. Most of the time is spent executing SuperscalarHash to calculate Dataset items (13.2 ms out of 14.8 ms). The average verification time exactly matches the performance of the CryptoNight algorithm.
+
+
+
+### D. Scratchpad entropy analysis
+
+The average entropy of the Scratchpad after 8 program executions was approximated using the LZMA compression algorithm:
+
+1. Hash resuls were calculated and the final scratchpads were written to disk as files with '.spad' extension (source code: [scratchpad-entropy.cpp](../src/tests/scratchpad-entropy.cpp))
+2. The files were compressed using 7-Zip [[29](https://www.7-zip.org/)] in Ultra compression mode: `7z.exe a -t7z -m0=lzma2 -mx=9 scratchpads.7z *.spad`
+
+The size of the resulting archive is approximately 99.98% of the uncompressed size of the scratchpad files. This shows that the Scratchpad retains high entropy during VM execution.
+
+### E. SuperscalarHash analysis
+
+SuperscalarHash is a custom function used by RandomX to generate Dataset items. It operates on 8 integer registers and uses a random sequence of instructions. About 1/3 of the instructions are multiplications.
+
+The following figure shows the sensitivity of SuperscalarHash to changing a single bit of an input register:
+
+
+
+This shows that SuperscalaHash has quite low sensitivity to high-order bits and somewhat decreased sensitivity to the lowest-order bits. Sensitivity is highest for bits 3-53 (inclusive).
+
+When calculating a Dataset item, the input of the first SuperscalarHash depends only on the item number. To ensure a good distribution of results, the constants described in section 7.3 of the Specification were chosen to provide unique values of bits 3-53 for *all* item numbers in the range 0-34078718 (the Dataset contains 34078719 items). All initial register values for all Dataset item numbers were checked to make sure bits 3-53 of each register are unique and there are no collisions (source code: [superscalar-init.cpp](../src/tests/superscalar-init.cpp)). While this is not strictly necessary to get unique output from SuperscalarHash, it's a security precaution that mitigates the non-perfect avalanche properties of the randomly generated SuperscalarHash instances.
+
+### F. Statistical tests of RNG
+
+Both AesGenerator1R and AesGenerator4R were tested using the TestU01 library [[30](http://simul.iro.umontreal.ca/testu01/tu01.html)] intended for empirical testing of random number generators. The source code is available in [rng-tests.cpp](../src/tests/rng-tests.cpp).
+
+The tests sample about 200 MB ("SmallCrush" test), 500 GB ("Crush" test) or 4 TB ("BigCrush" test) of output from each generator. This is considerably more than the amounts generated in RandomX (2176 bytes for AesGenerator4R and 2 MiB for AesGenerator1R), so failures in the tests don't necessarily imply that the generators are not suitable for their use case.
+
+
+#### AesGenerator4R
+The generator passes all tests in the "BigCrush" suite when initialized using the Blake2b hash function:
+
+```
+$ bin/rng-tests 1
+state0 = 67e8bbe567a1c18c91a316faf19fab73
+state1 = 39f7c0e0a8d96512c525852124fdc9fe
+state2 = 7abb07b2c90e04f098261e323eee8159
+state3 = 3df534c34cdfbb4e70f8c0e1826f4cf7
+
+...
+
+========= Summary results of BigCrush =========
+
+ Version: TestU01 1.2.3
+ Generator: AesGenerator4R
+ Number of statistics: 160
+ Total CPU time: 02:50:18.34
+
+ All tests were passed
+```
+
+
+The generator passes all tests in the "Crush" suite even with an initial state set to all zeroes.
+```
+$ bin/rng-tests 0
+state0 = 00000000000000000000000000000000
+state1 = 00000000000000000000000000000000
+state2 = 00000000000000000000000000000000
+state3 = 00000000000000000000000000000000
+
+...
+
+========= Summary results of Crush =========
+
+ Version: TestU01 1.2.3
+ Generator: AesGenerator4R
+ Number of statistics: 144
+ Total CPU time: 00:25:17.95
+
+ All tests were passed
+```
+
+#### AesGenerator1R
+
+The generator passes all tests in the "Crush" suite when initialized using the Blake2b hash function.
+
+```
+$ bin/rng-tests 1
+state0 = 67e8bbe567a1c18c91a316faf19fab73
+state1 = 39f7c0e0a8d96512c525852124fdc9fe
+state2 = 7abb07b2c90e04f098261e323eee8159
+state3 = 3df534c34cdfbb4e70f8c0e1826f4cf7
+
+...
+
+========= Summary results of Crush =========
+
+ Version: TestU01 1.2.3
+ Generator: AesGenerator1R
+ Number of statistics: 144
+ Total CPU time: 00:25:06.07
+
+ All tests were passed
+
+```
+
+When the initial state is initialized to all zeroes, the generator fails 1 test out of 144 tests in the "Crush" suite:
+
+```
+$ bin/rng-tests 0
+state0 = 00000000000000000000000000000000
+state1 = 00000000000000000000000000000000
+state2 = 00000000000000000000000000000000
+state3 = 00000000000000000000000000000000
+
+...
+
+========= Summary results of Crush =========
+
+ Version: TestU01 1.2.3
+ Generator: AesGenerator1R
+ Number of statistics: 144
+ Total CPU time: 00:26:12.75
+ The following tests gave p-values outside [0.001, 0.9990]:
+ (eps means a value < 1.0e-300):
+ (eps1 means a value < 1.0e-15):
+
+ Test p-value
+ ----------------------------------------------
+ 12 BirthdaySpacings, t = 3 1 - 4.4e-5
+ ----------------------------------------------
+ All other tests were passed
+
+```
+
+## References
+
+[1] CryptoNote whitepaper - https://cryptonote.org/whitepaper.pdf
+
+[2] ProgPoW: Inefficient integer multiplications - https://github.com/ifdefelse/ProgPOW/issues/16
+
+[3] Cryptographic Hashing function - https://en.wikipedia.org/wiki/Cryptographic_hash_function
+
+[4] randprog - https://github.com/hyc/randprog
+
+[5] RandomJS - https://github.com/tevador/RandomJS
+
+[6] μop cache - https://en.wikipedia.org/wiki/CPU_cache#Micro-operation_(%CE%BCop_or_uop)_cache
+
+[7] Instruction-level parallelism - https://en.wikipedia.org/wiki/Instruction-level_parallelism
+
+[8] Superscalar processor - https://en.wikipedia.org/wiki/Superscalar_processor
+
+[9] Out-of-order execution - https://en.wikipedia.org/wiki/Out-of-order_execution
+
+[10] Speculative execution - https://en.wikipedia.org/wiki/Speculative_execution
+
+[11] Register renaming - https://en.wikipedia.org/wiki/Register_renaming
+
+[12] Blake2 hashing function - https://blake2.net/
+
+[13] Advanced Encryption Standard - https://en.wikipedia.org/wiki/Advanced_Encryption_Standard
+
+[14] Log-normal distribution - https://en.wikipedia.org/wiki/Log-normal_distribution
+
+[15] CryptoNight hash function - https://cryptonote.org/cns/cns008.txt
+
+[16] Dynamic random-access memory - https://en.wikipedia.org/wiki/Dynamic_random-access_memory
+
+[17] Multi-channel memory architecture - https://en.wikipedia.org/wiki/Multi-channel_memory_architecture
+
+[18] Obelisk GRN1 chip details - https://www.grin-forum.org/t/obelisk-grn1-chip-details/4571
+
+[19] Biryukov et al.: Tradeoff Cryptanalysis of Memory-Hard Functions - https://eprint.iacr.org/2015/227.pdf
+
+[20] SK Hynix 20nm DRAM density - http://en.thelec.kr/news/articleView.html?idxno=20
+
+[21] Branch predictor - https://en.wikipedia.org/wiki/Branch_predictor
+
+[22] Predication - https://en.wikipedia.org/wiki/Predication_(computer_architecture)
+
+[23] CPU cache - https://en.wikipedia.org/wiki/CPU_cache
+
+[24] Cortex-A55 Microarchitecture - https://www.anandtech.com/show/11441/dynamiq-and-arms-new-cpus-cortex-a75-a55/4
+
+[25] AMD Zen+ Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy
+
+[26] Intel Skylake Microarchitecture - https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Memory_Hierarchy
+
+[27] Biryukov et al.: Fast and Tradeoff-Resilient Memory-Hard Functions for
+Cryptocurrencies and Password Hashing - https://eprint.iacr.org/2015/430.pdf Table 2, page 8
+
+[28] J. Daemen, V. Rijmen: AES Proposal: Rijndael - https://csrc.nist.gov/csrc/media/projects/cryptographic-standards-and-guidelines/documents/aes-development/rijndael-ammended.pdf page 28
+
+[29] 7-Zip File archiver - https://www.7-zip.org/
+
+[30] TestU01 library - http://simul.iro.umontreal.ca/testu01/tu01.html
diff --git a/src/RandomX/doc/program.asm b/src/RandomX/doc/program.asm
new file mode 100644
index 000000000..93c32f8a0
--- /dev/null
+++ b/src/RandomX/doc/program.asm
@@ -0,0 +1,985 @@
+randomx_isn_0:
+ ; ISMULH_R r0, r3
+ mov rax, r8
+ imul r11
+ mov r8, rdx
+randomx_isn_1:
+ ; IROR_R r0, r6
+ mov ecx, r14d
+ ror r8, cl
+randomx_isn_2:
+ ; FADD_R f1, a2
+ addpd xmm1, xmm10
+randomx_isn_3:
+ ; IXOR_M r1, L1[r5+1954652011]
+ lea eax, [r13d+1954652011]
+ and eax, 16376
+ xor r9, qword ptr [rsi+rax]
+randomx_isn_4:
+ ; FMUL_R e2, a3
+ mulpd xmm6, xmm11
+randomx_isn_5:
+ ; FADD_M f0, L2[r0-772804104]
+ lea eax, [r8d-772804104]
+ and eax, 262136
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm0, xmm12
+randomx_isn_6:
+ ; IMUL_R r6, r4
+ imul r14, r12
+randomx_isn_7:
+ ; CBRANCH r5, 1674196118, COND 2
+ add r13, 1674196118
+ test r13, 261120
+ jz randomx_isn_0
+randomx_isn_8:
+ ; ISWAP_R r7, r6
+ xchg r15, r14
+randomx_isn_9:
+ ; ISTORE L1[r1-439821682], r3
+ lea eax, [r9d-439821682]
+ and eax, 16376
+ mov qword ptr [rsi+rax], r11
+randomx_isn_10:
+ ; IXOR_R r2, r4
+ xor r10, r12
+randomx_isn_11:
+ ; FADD_R f2, a1
+ addpd xmm2, xmm9
+randomx_isn_12:
+ ; IXOR_M r0, L1[r1+952699079]
+ lea eax, [r9d+952699079]
+ and eax, 16376
+ xor r8, qword ptr [rsi+rax]
+randomx_isn_13:
+ ; ISMULH_R r5, r2
+ mov rax, r13
+ imul r10
+ mov r13, rdx
+randomx_isn_14:
+ ; INEG_R r4
+ neg r12
+randomx_isn_15:
+ ; INEG_R r1
+ neg r9
+randomx_isn_16:
+ ; IMUL_M r3, L1[r2+620091535]
+ lea eax, [r10d+620091535]
+ and eax, 16376
+ imul r11, qword ptr [rsi+rax]
+randomx_isn_17:
+ ; FADD_R f1, a0
+ addpd xmm1, xmm8
+randomx_isn_18:
+ ; IMUL_RCP r5, 2611385784
+ mov rax, 15169754503470242065
+ imul r13, rax
+randomx_isn_19:
+ ; IXOR_R r2, 922368940
+ xor r10, 922368940
+randomx_isn_20:
+ ; FADD_R f3, a1
+ addpd xmm3, xmm9
+randomx_isn_21:
+ ; IXOR_R r3, r6
+ xor r11, r14
+randomx_isn_22:
+ ; FSWAP_R e1
+ shufpd xmm5, xmm5, 1
+randomx_isn_23:
+ ; ISUB_R r0, r5
+ sub r8, r13
+randomx_isn_24:
+ ; ISTORE L1[r6-1574415460], r7
+ lea eax, [r14d-1574415460]
+ and eax, 16376
+ mov qword ptr [rsi+rax], r15
+randomx_isn_25:
+ ; FADD_M f3, L1[r3+1766115210]
+ lea eax, [r11d+1766115210]
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm3, xmm12
+randomx_isn_26:
+ ; FSCAL_R f1
+ xorps xmm1, xmm15
+randomx_isn_27:
+ ; CBRANCH r2, 1731738265, COND 6
+ add r10, 1731746457
+ test r10, 4177920
+ jz randomx_isn_20
+randomx_isn_28:
+ ; IXOR_R r4, r1
+ xor r12, r9
+randomx_isn_29:
+ ; CBRANCH r4, 1937048537, COND 3
+ add r12, 1937050585
+ test r12, 522240
+ jz randomx_isn_29
+randomx_isn_30:
+ ; ISWAP_R r3, r5
+ xchg r11, r13
+randomx_isn_31:
+ ; ISMULH_R r7, r5
+ mov rax, r15
+ imul r13
+ mov r15, rdx
+randomx_isn_32:
+ ; IMULH_M r6, L1[r2+1879111790]
+ lea ecx, [r10d+1879111790]
+ and ecx, 16376
+ mov rax, r14
+ mul qword ptr [rsi+rcx]
+ mov r14, rdx
+randomx_isn_33:
+ ; IMUL_R r5, r0
+ imul r13, r8
+randomx_isn_34:
+ ; ISWAP_R r5, r0
+ xchg r13, r8
+randomx_isn_35:
+ ; CBRANCH r4, 1174490916, COND 5
+ add r12, 1174499108
+ test r12, 2088960
+ jz randomx_isn_30
+randomx_isn_36:
+ ; CBRANCH r6, -1852457840, COND 8
+ add r14, -1852490608
+ test r14, 16711680
+ jz randomx_isn_36
+randomx_isn_37:
+ ; ISMULH_R r2, r0
+ mov rax, r10
+ imul r8
+ mov r10, rdx
+randomx_isn_38:
+ ; ISUB_R r2, r0
+ sub r10, r8
+randomx_isn_39:
+ ; ISTORE L1[r0-38118463], r5
+ lea eax, [r8d-38118463]
+ and eax, 16376
+ mov qword ptr [rsi+rax], r13
+randomx_isn_40:
+ ; IXOR_R r0, r1
+ xor r8, r9
+randomx_isn_41:
+ ; IMUL_R r6, r4
+ imul r14, r12
+randomx_isn_42:
+ ; ISUB_R r7, r5
+ sub r15, r13
+randomx_isn_43:
+ ; FDIV_M e0, L1[r2+1052956160]
+ lea eax, [r10d+1052956160]
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ andps xmm12, xmm13
+ orps xmm12, xmm14
+ divpd xmm4, xmm12
+randomx_isn_44:
+ ; CBRANCH r1, 1870241002, COND 11
+ add r9, 1870241002
+ test r9, 133693440
+ jz randomx_isn_37
+randomx_isn_45:
+ ; IXOR_R r1, r4
+ xor r9, r12
+randomx_isn_46:
+ ; FMUL_R e3, a1
+ mulpd xmm7, xmm9
+randomx_isn_47:
+ ; IXOR_M r0, L1[r2+839895331]
+ lea eax, [r10d+839895331]
+ and eax, 16376
+ xor r8, qword ptr [rsi+rax]
+randomx_isn_48:
+ ; CBRANCH r2, -2128896196, COND 6
+ add r10, -2128879812
+ test r10, 4177920
+ jz randomx_isn_45
+randomx_isn_49:
+ ; CFROUND r1, 13
+ mov rax, r9
+ and eax, 24576
+ or eax, 40896
+ push rax
+ ldmxcsr dword ptr [rsp]
+ pop rax
+randomx_isn_50:
+ ; ISWAP_R r3, r1
+ xchg r11, r9
+randomx_isn_51:
+ ; IMUL_RCP r1, 4205062916
+ mov rax, 9420568026795290117
+ imul r9, rax
+randomx_isn_52:
+ ; FSUB_R f0, a0
+ subpd xmm0, xmm8
+randomx_isn_53:
+ ; IMUL_R r7, r6
+ imul r15, r14
+randomx_isn_54:
+ ; IADD_RS r1, r2, SHFT 3
+ lea r9, [r9+r10*8]
+randomx_isn_55:
+ ; FSQRT_R e3
+ sqrtpd xmm7, xmm7
+randomx_isn_56:
+ ; FMUL_R e1, a0
+ mulpd xmm5, xmm8
+randomx_isn_57:
+ ; IMUL_RCP r3, 303101651
+ mov rax, 16336962008634921950
+ imul r11, rax
+randomx_isn_58:
+ ; IMUL_RCP r1, 3375482677
+ mov rax, 11735827153567160432
+ imul r9, rax
+randomx_isn_59:
+ ; CBRANCH r6, 2116776661, COND 12
+ add r14, 2117300949
+ test r14, 267386880
+ jz randomx_isn_49
+randomx_isn_60:
+ ; IMUL_R r3, r4
+ imul r11, r12
+randomx_isn_61:
+ ; FMUL_R e3, a0
+ mulpd xmm7, xmm8
+randomx_isn_62:
+ ; ISUB_R r3, 1514378938
+ sub r11, 1514378938
+randomx_isn_63:
+ ; FMUL_R e2, a0
+ mulpd xmm6, xmm8
+randomx_isn_64:
+ ; ISUB_R r4, r6
+ sub r12, r14
+randomx_isn_65:
+ ; FDIV_M e2, L1[r0+1496571595]
+ lea eax, [r8d+1496571595]
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ andps xmm12, xmm13
+ orps xmm12, xmm14
+ divpd xmm6, xmm12
+randomx_isn_66:
+ ; FSUB_R f0, a2
+ subpd xmm0, xmm10
+randomx_isn_67:
+ ; FDIV_M e3, L2[r7-2139079025]
+ lea eax, [r15d-2139079025]
+ and eax, 262136
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ andps xmm12, xmm13
+ orps xmm12, xmm14
+ divpd xmm7, xmm12
+randomx_isn_68:
+ ; FSUB_R f2, a2
+ subpd xmm2, xmm10
+randomx_isn_69:
+ ; CBRANCH r3, -1165095866, COND 7
+ add r11, -1165063098
+ test r11, 8355840
+ jz randomx_isn_63
+randomx_isn_70:
+ ; IMULH_R r0, r7
+ mov rax, r8
+ mul r15
+ mov r8, rdx
+randomx_isn_71:
+ ; FMUL_R e2, a0
+ mulpd xmm6, xmm8
+randomx_isn_72:
+ ; FMUL_R e0, a3
+ mulpd xmm4, xmm11
+randomx_isn_73:
+ ; IMUL_RCP r6, 1636610180
+ mov rax, 12102479179596746977
+ imul r14, rax
+randomx_isn_74:
+ ; FMUL_R e2, a2
+ mulpd xmm6, xmm10
+randomx_isn_75:
+ ; ISTORE L2[r2+473418592], r3
+ lea eax, [r10d+473418592]
+ and eax, 262136
+ mov qword ptr [rsi+rax], r11
+randomx_isn_76:
+ ; IADD_M r1, L1[r3-989917936]
+ lea eax, [r11d-989917936]
+ and eax, 16376
+ add r9, qword ptr [rsi+rax]
+randomx_isn_77:
+ ; CBRANCH r2, 1519854177, COND 7
+ add r10, 1519886945
+ test r10, 8355840
+ jz randomx_isn_70
+randomx_isn_78:
+ ; IMUL_R r2, r6
+ imul r10, r14
+randomx_isn_79:
+ ; IMUL_R r4, r1
+ imul r12, r9
+randomx_isn_80:
+ ; FMUL_R e2, a1
+ mulpd xmm6, xmm9
+randomx_isn_81:
+ ; FSCAL_R f2
+ xorps xmm2, xmm15
+randomx_isn_82:
+ ; IXOR_M r2, L1[r1+192323103]
+ lea eax, [r9d+192323103]
+ and eax, 16376
+ xor r10, qword ptr [rsi+rax]
+randomx_isn_83:
+ ; IMUL_R r7, r4
+ imul r15, r12
+randomx_isn_84:
+ ; FADD_R f2, a0
+ addpd xmm2, xmm8
+randomx_isn_85:
+ ; FSUB_M f1, L2[r6-1549504487]
+ lea eax, [r14d-1549504487]
+ and eax, 262136
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm1, xmm12
+randomx_isn_86:
+ ; FSUB_R f0, a3
+ subpd xmm0, xmm11
+randomx_isn_87:
+ ; CFROUND r3, 31
+ mov rax, r11
+ rol rax, 46
+ and eax, 24576
+ or eax, 40896
+ push rax
+ ldmxcsr dword ptr [rsp]
+ pop rax
+randomx_isn_88:
+ ; IXOR_R r5, r6
+ xor r13, r14
+randomx_isn_89:
+ ; FADD_R f3, a2
+ addpd xmm3, xmm10
+randomx_isn_90:
+ ; FADD_R f3, a0
+ addpd xmm3, xmm8
+randomx_isn_91:
+ ; FSQRT_R e1
+ sqrtpd xmm5, xmm5
+randomx_isn_92:
+ ; ISUB_R r6, r2
+ sub r14, r10
+randomx_isn_93:
+ ; ISUB_R r0, r4
+ sub r8, r12
+randomx_isn_94:
+ ; FADD_R f1, a2
+ addpd xmm1, xmm10
+randomx_isn_95:
+ ; IMUL_R r1, r2
+ imul r9, r10
+randomx_isn_96:
+ ; FSCAL_R f1
+ xorps xmm1, xmm15
+randomx_isn_97:
+ ; ISTORE L1[r7-1901001017], r7
+ lea eax, [r15d-1901001017]
+ and eax, 16376
+ mov qword ptr [rsi+rax], r15
+randomx_isn_98:
+ ; FADD_R f1, a3
+ addpd xmm1, xmm11
+randomx_isn_99:
+ ; CBRANCH r2, -425599201, COND 9
+ add r10, -425533665
+ test r10, 33423360
+ jz randomx_isn_83
+randomx_isn_100:
+ ; IXOR_R r4, r6
+ xor r12, r14
+randomx_isn_101:
+ ; FMUL_R e0, a3
+ mulpd xmm4, xmm11
+randomx_isn_102:
+ ; FADD_M f0, L1[r0+1590646897]
+ lea eax, [r8d+1590646897]
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm0, xmm12
+randomx_isn_103:
+ ; FMUL_R e0, a1
+ mulpd xmm4, xmm9
+randomx_isn_104:
+ ; IMUL_R r4, r7
+ imul r12, r15
+randomx_isn_105:
+ ; ISUB_R r1, r0
+ sub r9, r8
+randomx_isn_106:
+ ; FSUB_R f1, a2
+ subpd xmm1, xmm10
+randomx_isn_107:
+ ; FMUL_R e1, a1
+ mulpd xmm5, xmm9
+randomx_isn_108:
+ ; FMUL_R e1, a2
+ mulpd xmm5, xmm10
+randomx_isn_109:
+ ; FADD_R f3, a2
+ addpd xmm3, xmm10
+randomx_isn_110:
+ ; IXOR_R r0, r3
+ xor r8, r11
+randomx_isn_111:
+ ; IMUL_R r0, 1421329412
+ imul r8, 1421329412
+randomx_isn_112:
+ ; FSUB_R f0, a2
+ subpd xmm0, xmm10
+randomx_isn_113:
+ ; IMUL_R r5, r4
+ imul r13, r12
+randomx_isn_114:
+ ; IADD_RS r7, r3, SHFT 2
+ lea r15, [r15+r11*4]
+randomx_isn_115:
+ ; FADD_R f3, a3
+ addpd xmm3, xmm11
+randomx_isn_116:
+ ; ISTORE L1[r3-160363922], r0
+ lea eax, [r11d-160363922]
+ and eax, 16376
+ mov qword ptr [rsi+rax], r8
+randomx_isn_117:
+ ; IMULH_R r0, r6
+ mov rax, r8
+ mul r14
+ mov r8, rdx
+randomx_isn_118:
+ ; FSWAP_R f2
+ shufpd xmm2, xmm2, 1
+randomx_isn_119:
+ ; FMUL_R e1, a0
+ mulpd xmm5, xmm8
+randomx_isn_120:
+ ; IROR_R r0, 12
+ ror r8, 12
+randomx_isn_121:
+ ; FADD_M f0, L1[r0+282806289]
+ lea eax, [r8d+282806289]
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm0, xmm12
+randomx_isn_122:
+ ; FADD_M f3, L1[r7+1601529113]
+ lea eax, [r15d+1601529113]
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm3, xmm12
+randomx_isn_123:
+ ; IMUL_RCP r2, 2522040806
+ mov rax, 15707153176462985744
+ imul r10, rax
+randomx_isn_124:
+ ; ISUB_M r0, L1[r3+974906597]
+ lea eax, [r11d+974906597]
+ and eax, 16376
+ sub r8, qword ptr [rsi+rax]
+randomx_isn_125:
+ ; CBRANCH r2, 1508706439, COND 14
+ add r10, 1506609287
+ test r10, 1069547520
+ jz randomx_isn_124
+randomx_isn_126:
+ ; IXOR_R r4, r5
+ xor r12, r13
+randomx_isn_127:
+ ; IMUL_R r7, r2
+ imul r15, r10
+randomx_isn_128:
+ ; IROR_R r4, r0
+ mov ecx, r8d
+ ror r12, cl
+randomx_isn_129:
+ ; CBRANCH r0, -497803311, COND 3
+ add r8, -497804335
+ test r8, 522240
+ jz randomx_isn_126
+randomx_isn_130:
+ ; FSUB_M f0, L1[r3+1789853646]
+ lea eax, [r11d+1789853646]
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm0, xmm12
+randomx_isn_131:
+ ; ISMULH_R r6, r3
+ mov rax, r14
+ imul r11
+ mov r14, rdx
+randomx_isn_132:
+ ; FMUL_R e0, a3
+ mulpd xmm4, xmm11
+randomx_isn_133:
+ ; FSUB_R f2, a1
+ subpd xmm2, xmm9
+randomx_isn_134:
+ ; CBRANCH r3, -1567551204, COND 11
+ add r11, -1567026916
+ test r11, 133693440
+ jz randomx_isn_130
+randomx_isn_135:
+ ; FSUB_M f2, L2[r5+1167508659]
+ lea eax, [r13d+1167508659]
+ and eax, 262136
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm2, xmm12
+randomx_isn_136:
+ ; IMUL_R r4, r0
+ imul r12, r8
+randomx_isn_137:
+ ; IMULH_R r7, r6
+ mov rax, r15
+ mul r14
+ mov r15, rdx
+randomx_isn_138:
+ ; FMUL_R e3, a2
+ mulpd xmm7, xmm10
+randomx_isn_139:
+ ; IMUL_R r2, r6
+ imul r10, r14
+randomx_isn_140:
+ ; ISTORE L1[r0+1277653290], r3
+ lea eax, [r8d+1277653290]
+ and eax, 16376
+ mov qword ptr [rsi+rax], r11
+randomx_isn_141:
+ ; IXOR_M r0, L1[r6-2131931958]
+ lea eax, [r14d-2131931958]
+ and eax, 16376
+ xor r8, qword ptr [rsi+rax]
+randomx_isn_142:
+ ; FSUB_R f3, a3
+ subpd xmm3, xmm11
+randomx_isn_143:
+ ; IROL_R r6, r1
+ mov ecx, r9d
+ rol r14, cl
+randomx_isn_144:
+ ; FADD_R f1, a3
+ addpd xmm1, xmm11
+randomx_isn_145:
+ ; FMUL_R e0, a3
+ mulpd xmm4, xmm11
+randomx_isn_146:
+ ; FSQRT_R e0
+ sqrtpd xmm4, xmm4
+randomx_isn_147:
+ ; IADD_RS r7, r4, SHFT 0
+ lea r15, [r15+r12*1]
+randomx_isn_148:
+ ; FSUB_R f3, a1
+ subpd xmm3, xmm9
+randomx_isn_149:
+ ; ISTORE L2[r1-1073333533], r3
+ lea eax, [r9d-1073333533]
+ and eax, 262136
+ mov qword ptr [rsi+rax], r11
+randomx_isn_150:
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+randomx_isn_151:
+ ; ISUB_R r6, r3
+ sub r14, r11
+randomx_isn_152:
+ ; IMULH_M r7, L2[r1+1647843648]
+ lea ecx, [r9d+1647843648]
+ and ecx, 262136
+ mov rax, r15
+ mul qword ptr [rsi+rcx]
+ mov r15, rdx
+randomx_isn_153:
+ ; FMUL_R e0, a0
+ mulpd xmm4, xmm8
+randomx_isn_154:
+ ; IROR_R r3, r0
+ mov ecx, r8d
+ ror r11, cl
+randomx_isn_155:
+ ; IADD_M r3, L1[r7-1322060518]
+ lea eax, [r15d-1322060518]
+ and eax, 16376
+ add r11, qword ptr [rsi+rax]
+randomx_isn_156:
+ ; CBRANCH r3, 608981196, COND 1
+ add r11, 608981708
+ test r11, 130560
+ jz randomx_isn_156
+randomx_isn_157:
+ ; FSUB_M f0, L2[r7-252644586]
+ lea eax, [r15d-252644586]
+ and eax, 262136
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm0, xmm12
+randomx_isn_158:
+ ; CBRANCH r2, 868397474, COND 15
+ add r10, 864203170
+ test r10, 2139095040
+ jz randomx_isn_157
+randomx_isn_159:
+ ; ISUB_R r5, r3
+ sub r13, r11
+randomx_isn_160:
+ ; FMUL_R e0, a0
+ mulpd xmm4, xmm8
+randomx_isn_161:
+ ; FMUL_R e2, a1
+ mulpd xmm6, xmm9
+randomx_isn_162:
+ ; CBRANCH r0, 887338591, COND 6
+ add r8, 887346783
+ test r8, 4177920
+ jz randomx_isn_159
+randomx_isn_163:
+ ; IADD_RS r3, r3, SHFT 3
+ lea r11, [r11+r11*8]
+randomx_isn_164:
+ ; IMUL_RCP r7, 3593878304
+ mov rax, 11022655166993703745
+ imul r15, rax
+randomx_isn_165:
+ ; CBRANCH r0, 1452880957, COND 13
+ add r8, 1453929533
+ test r8, 534773760
+ jz randomx_isn_163
+randomx_isn_166:
+ ; ISUB_M r6, L2[r3+1539038396]
+ lea eax, [r11d+1539038396]
+ and eax, 262136
+ sub r14, qword ptr [rsi+rax]
+randomx_isn_167:
+ ; IMUL_RCP r3, 1202036339
+ mov rax, 16477905023274079568
+ imul r11, rax
+randomx_isn_168:
+ ; CBRANCH r1, -1295757940, COND 13
+ add r9, -1293660788
+ test r9, 534773760
+ jz randomx_isn_166
+randomx_isn_169:
+ ; FADD_M f2, L1[r2+876697387]
+ lea eax, [r10d+876697387]
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm2, xmm12
+randomx_isn_170:
+ ; IMUL_R r0, r6
+ imul r8, r14
+randomx_isn_171:
+ ; FMUL_R e1, a3
+ mulpd xmm5, xmm11
+randomx_isn_172:
+ ; FMUL_R e0, a2
+ mulpd xmm4, xmm10
+randomx_isn_173:
+ ; FSUB_M f3, L1[r2-1083472792]
+ lea eax, [r10d-1083472792]
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm3, xmm12
+randomx_isn_174:
+ ; CBRANCH r1, -1476890738, COND 14
+ add r9, -1478987890
+ test r9, 1069547520
+ jz randomx_isn_169
+randomx_isn_175:
+ ; ISUB_R r4, r7
+ sub r12, r15
+randomx_isn_176:
+ ; ISUB_R r0, 1685118604
+ sub r8, 1685118604
+randomx_isn_177:
+ ; FMUL_R e0, a1
+ mulpd xmm4, xmm9
+randomx_isn_178:
+ ; ISUB_M r0, L1[r7-1897974312]
+ lea eax, [r15d-1897974312]
+ and eax, 16376
+ sub r8, qword ptr [rsi+rax]
+randomx_isn_179:
+ ; IXOR_R r4, r0
+ xor r12, r8
+randomx_isn_180:
+ ; IXOR_R r7, r2
+ xor r15, r10
+randomx_isn_181:
+ ; FSCAL_R f1
+ xorps xmm1, xmm15
+randomx_isn_182:
+ ; ISWAP_R r6, r2
+ xchg r14, r10
+randomx_isn_183:
+ ; IADD_RS r3, r1, SHFT 3
+ lea r11, [r11+r9*8]
+randomx_isn_184:
+ ; ISTORE L1[r6-1997634426], r7
+ lea eax, [r14d-1997634426]
+ and eax, 16376
+ mov qword ptr [rsi+rax], r15
+randomx_isn_185:
+ ; IXOR_R r2, r7
+ xor r10, r15
+randomx_isn_186:
+ ; IMUL_R r4, r3
+ imul r12, r11
+randomx_isn_187:
+ ; IMUL_RCP r7, 1830833174
+ mov rax, 10818593911149047378
+ imul r15, rax
+randomx_isn_188:
+ ; FMUL_R e0, a2
+ mulpd xmm4, xmm10
+randomx_isn_189:
+ ; FADD_R f1, a3
+ addpd xmm1, xmm11
+randomx_isn_190:
+ ; CBRANCH r7, 121030040, COND 15
+ add r15, 129418648
+ test r15, 2139095040
+ jz randomx_isn_188
+randomx_isn_191:
+ ; IADD_RS r6, r1, SHFT 0
+ lea r14, [r14+r9*1]
+randomx_isn_192:
+ ; FSUB_R f3, a2
+ subpd xmm3, xmm10
+randomx_isn_193:
+ ; CBRANCH r5, 1139434462, COND 11
+ add r13, 1139434462
+ test r13, 133693440
+ jz randomx_isn_191
+randomx_isn_194:
+ ; FMUL_R e2, a0
+ mulpd xmm6, xmm8
+randomx_isn_195:
+ ; FMUL_R e2, a3
+ mulpd xmm6, xmm11
+randomx_isn_196:
+ ; CBRANCH r4, 429294077, COND 2
+ add r12, 429295101
+ test r12, 261120
+ jz randomx_isn_194
+randomx_isn_197:
+ ; IMUL_R r1, r2
+ imul r9, r10
+randomx_isn_198:
+ ; FMUL_R e3, a0
+ mulpd xmm7, xmm8
+randomx_isn_199:
+ ; IMUL_R r2, r3
+ imul r10, r11
+randomx_isn_200:
+ ; IMUL_RCP r1, 193535702
+ mov rax, 12792885514067893012
+ imul r9, rax
+randomx_isn_201:
+ ; IMUL_R r0, r5
+ imul r8, r13
+randomx_isn_202:
+ ; ISUB_R r1, r2
+ sub r9, r10
+randomx_isn_203:
+ ; FSUB_R f0, a3
+ subpd xmm0, xmm11
+randomx_isn_204:
+ ; FSQRT_R e3
+ sqrtpd xmm7, xmm7
+randomx_isn_205:
+ ; FMUL_R e0, a3
+ mulpd xmm4, xmm11
+randomx_isn_206:
+ ; IMUL_R r2, r1
+ imul r10, r9
+randomx_isn_207:
+ ; IADD_RS r1, r1, SHFT 3
+ lea r9, [r9+r9*8]
+randomx_isn_208:
+ ; ISUB_R r6, r4
+ sub r14, r12
+randomx_isn_209:
+ ; ISUB_R r0, r7
+ sub r8, r15
+randomx_isn_210:
+ ; IADD_M r6, L1[r1+313140284]
+ lea eax, [r9d+313140284]
+ and eax, 16376
+ add r14, qword ptr [rsi+rax]
+randomx_isn_211:
+ ; CBRANCH r4, 1358359929, COND 11
+ add r12, 1358622073
+ test r12, 133693440
+ jz randomx_isn_197
+randomx_isn_212:
+ ; FSQRT_R e0
+ sqrtpd xmm4, xmm4
+randomx_isn_213:
+ ; ISTORE L1[r3+18641493], r5
+ lea eax, [r11d+18641493]
+ and eax, 16376
+ mov qword ptr [rsi+rax], r13
+randomx_isn_214:
+ ; CBRANCH r2, 1232471888, COND 7
+ add r10, 1232504656
+ test r10, 8355840
+ jz randomx_isn_212
+randomx_isn_215:
+ ; IADD_M r1, L1[r3+1138069575]
+ lea eax, [r11d+1138069575]
+ and eax, 16376
+ add r9, qword ptr [rsi+rax]
+randomx_isn_216:
+ ; FSQRT_R e0
+ sqrtpd xmm4, xmm4
+randomx_isn_217:
+ ; IMUL_R r3, r4
+ imul r11, r12
+randomx_isn_218:
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+randomx_isn_219:
+ ; IROL_R r7, r1
+ mov ecx, r9d
+ rol r15, cl
+randomx_isn_220:
+ ; FMUL_R e2, a1
+ mulpd xmm6, xmm9
+randomx_isn_221:
+ ; IXOR_M r2, L3[697832]
+ xor r10, qword ptr [rsi+697832]
+randomx_isn_222:
+ ; IADD_RS r1, r6, SHFT 2
+ lea r9, [r9+r14*4]
+randomx_isn_223:
+ ; ISWAP_R r6, r2
+ xchg r14, r10
+randomx_isn_224:
+ ; ISUB_R r0, r1
+ sub r8, r9
+randomx_isn_225:
+ ; FSQRT_R e3
+ sqrtpd xmm7, xmm7
+randomx_isn_226:
+ ; ISUB_R r5, r1
+ sub r13, r9
+randomx_isn_227:
+ ; ISTORE L1[r0+238217802], r2
+ lea eax, [r8d+238217802]
+ and eax, 16376
+ mov qword ptr [rsi+rax], r10
+randomx_isn_228:
+ ; IMUL_RCP r5, 324261767
+ mov rax, 15270872674734795667
+ imul r13, rax
+randomx_isn_229:
+ ; FSCAL_R f0
+ xorps xmm0, xmm15
+randomx_isn_230:
+ ; FSQRT_R e3
+ sqrtpd xmm7, xmm7
+randomx_isn_231:
+ ; IROL_R r1, r5
+ mov ecx, r13d
+ rol r9, cl
+randomx_isn_232:
+ ; ISUB_R r6, r1
+ sub r14, r9
+randomx_isn_233:
+ ; FADD_R f2, a0
+ addpd xmm2, xmm8
+randomx_isn_234:
+ ; FADD_R f1, a3
+ addpd xmm1, xmm11
+randomx_isn_235:
+ ; IXOR_R r3, 1240450588
+ xor r11, 1240450588
+randomx_isn_236:
+ ; FSUB_R f1, a2
+ subpd xmm1, xmm10
+randomx_isn_237:
+ ; IMULH_R r6, r3
+ mov rax, r14
+ mul r11
+ mov r14, rdx
+randomx_isn_238:
+ ; FSUB_R f1, a3
+ subpd xmm1, xmm11
+randomx_isn_239:
+ ; FSUB_R f1, a2
+ subpd xmm1, xmm10
+randomx_isn_240:
+ ; FSUB_M f1, L1[r7+1330184615]
+ lea eax, [r15d+1330184615]
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm1, xmm12
+randomx_isn_241:
+ ; FMUL_R e2, a3
+ mulpd xmm6, xmm11
+randomx_isn_242:
+ ; CBRANCH r3, -427325404, COND 11
+ add r11, -427063260
+ test r11, 133693440
+ jz randomx_isn_236
+randomx_isn_243:
+ ; IMUL_R r5, r7
+ imul r13, r15
+randomx_isn_244:
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+randomx_isn_245:
+ ; ISMULH_M r7, L1[r0-84959236]
+ lea ecx, [r8d-84959236]
+ and ecx, 16376
+ mov rax, r15
+ imul qword ptr [rsi+rcx]
+ mov r15, rdx
+randomx_isn_246:
+ ; IMUL_R r6, r1
+ imul r14, r9
+randomx_isn_247:
+ ; FMUL_R e2, a1
+ mulpd xmm6, xmm9
+randomx_isn_248:
+ ; IADD_M r1, L2[r3+1223504721]
+ lea eax, [r11d+1223504721]
+ and eax, 262136
+ add r9, qword ptr [rsi+rax]
+randomx_isn_249:
+ ; FADD_R f1, a2
+ addpd xmm1, xmm10
+randomx_isn_250:
+ ; IXOR_M r4, L1[r2-1447740505]
+ lea eax, [r10d-1447740505]
+ and eax, 16376
+ xor r12, qword ptr [rsi+rax]
+randomx_isn_251:
+ ; IXOR_R r0, r5
+ xor r8, r13
+randomx_isn_252:
+ ; CBRANCH r4, -1337905977, COND 4
+ add r12, -1337903929
+ test r12, 1044480
+ jz randomx_isn_251
+randomx_isn_253:
+ ; FSUB_R f1, a1
+ subpd xmm1, xmm9
+randomx_isn_254:
+ ; FMUL_R e0, a0
+ mulpd xmm4, xmm8
+randomx_isn_255:
+ ; CBRANCH r5, 437071043, COND 11
+ add r13, 436808899
+ test r13, 133693440
+ jz randomx_isn_253
diff --git a/src/RandomX/doc/specs.md b/src/RandomX/doc/specs.md
new file mode 100644
index 000000000..f2ab8b24b
--- /dev/null
+++ b/src/RandomX/doc/specs.md
@@ -0,0 +1,943 @@
+# RandomX
+
+RandomX is a proof of work (PoW) algorithm which was designed to close the gap between general-purpose CPUs and specialized hardware. The core of the algorithm is a simulation of a virtual CPU.
+
+#### Table of contents
+
+1. [Definitions](#1-definitions)
+1. [Algorithm description](#2-algorithm-description)
+1. [Custom functions](#3-custom-functions)
+1. [Virtual Machine](#4-virtual-machine)
+1. [Instruction set](#5-instruction-set)
+1. [SuperscalarHash](#6-superscalarhash)
+1. [Dataset](#7-dataset)
+
+
+## 1. Definitions
+
+### 1.1 General definitions
+
+**Hash256** and **Hash512** refer to the [Blake2b](https://blake2.net/blake2_20130129.pdf) hashing function with a 256-bit and 512-bit output size, respectively.
+
+**Floating point format** refers to the [IEEE-754 double precision floating point format](https://en.wikipedia.org/wiki/Double-precision_floating-point_format) with a sign bit, 11-bit exponent and 52-bit fraction.
+
+**Argon2d** is a tradeoff-resistant variant of [Argon2](https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf), a memory-hard password derivation function.
+
+**AesGenerator1R** refers to an AES-based pseudo-random number generator described in chapter 3.2. It's initialized with a 512-bit seed value and is capable of producing more than 10 bytes per clock cycle.
+
+**AesGenerator4R** is a slower but more secure AES-based pseudo-random number generator described in chapter 3.3. It's initialized with a 512-bit seed value.
+
+**AesHash1R** refers to an AES-based fingerprinting function described in chapter 3.4. It's capable of processing more than 10 bytes per clock cycle and produces a 512-bit output.
+
+**BlakeGenerator** refers to a custom pseudo-random number generator described in chapter 3.5. It's based on the Blake2b hashing function.
+
+**SuperscalarHash** refers to a custom diffusion function designed to run efficiently on superscalar CPUs (see chapter 7). It transforms a 64-byte input value into a 64-byte output value.
+
+**Virtual Machine** or **VM** refers to the RandomX virtual machine as described in chapter 4.
+
+**Programming the VM** refers to the act of loading a program and configuration into the VM. This is described in chapter 4.5.
+
+**Executing the VM** refers to the act of running the program loop as described in chapter 4.6.
+
+**Scratchpad** refers to the workspace memory of the VM. The whole scratchpad is structured into 3 levels: L3 -> L2 -> L1 with each lower level being a subset of the higher levels.
+
+**Register File** refers to a 256-byte sequence formed by concatenating VM registers in little-endian format in the following order: `r0`-`r7`, `f0`-`f3`, `e0`-`e3` and `a0`-`a3`.
+
+**Program Buffer** refers to the buffer from which the VM reads instructions.
+
+**Cache** refers to a read-only buffer initialized by Argon2d as described in chapter 7.1.
+
+**Dataset** refers to a large read-only buffer described in chapter 7. It is constructed from the Cache using the SuperscalarHash function.
+
+### 1.2 Configurable parameters
+RandomX has several configurable parameters that are listed in Table 1.2.1 with their default values.
+
+*Table 1.2.1 - Configurable parameters*
+
+|parameter|description|default value|
+|---------|-----|-------|
+|`RANDOMX_ARGON_MEMORY`|The number of 1 KiB Argon2 blocks in the Cache| `262144`|
+|`RANDOMX_ARGON_ITERATIONS`|The number of Argon2d iterations for Cache initialization|`3`|
+|`RANDOMX_ARGON_LANES`|The number of parallel lanes for Cache initialization|`1`|
+|`RANDOMX_ARGON_SALT`|Argon2 salt|`"RandomX\x03"`|
+|`RANDOMX_CACHE_ACCESSES`|The number of random Cache accesses per Dataset item|`8`|
+|`RANDOMX_SUPERSCALAR_LATENCY`|Target latency for SuperscalarHash (in cycles of the reference CPU)|`170`|
+|`RANDOMX_DATASET_BASE_SIZE`|Dataset base size in bytes|`2147483648`|
+|`RANDOMX_DATASET_EXTRA_SIZE`|Dataset extra size in bytes|`33554368`|
+|`RANDOMX_PROGRAM_SIZE`|The number of instructions in a RandomX program|`256`|
+|`RANDOMX_PROGRAM_ITERATIONS`|The number of iterations per program|`2048`|
+|`RANDOMX_PROGRAM_COUNT`|The number of programs per hash|`8`|
+|`RANDOMX_JUMP_BITS`|Jump condition mask size in bits|`8`|
+|`RANDOMX_JUMP_OFFSET`|Jump condition mask offset in bits|`8`|
+|`RANDOMX_SCRATCHPAD_L3`|Scratchpad L3 size in bytes|`2097152`|
+|`RANDOMX_SCRATCHPAD_L2`|Scratchpad L2 size in bytes|`262144`|
+|`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`|
+
+Instruction frequencies listed in Tables 5.2.1, 5.3.1, 5.4.1 and 5.5.1 are also configurable.
+
+
+## 2. Algorithm description
+
+The RandomX algorithm accepts two input values:
+
+* String `K` with a size of 0-60 bytes (key)
+* String `H` of arbitrary length (the value to be hashed)
+
+and outputs a 256-bit result `R`.
+
+The algorithm consists of the following steps:
+
+1. The Dataset is initialized using the key value `K` (described in chapter 7).
+1. 64-byte seed `S` is calculated as `S = Hash512(H)`.
+1. Let `gen1 = AesGenerator1R(S)`.
+1. The Scratchpad is filled with `RANDOMX_SCRATCHPAD_L3` random bytes using generator `gen1`.
+1. Let `gen4 = AesGenerator4R(gen1.state)` (use the final state of `gen1`).
+1. The value of the VM register `fprc` is set to 0 (default rounding mode - chapter 4.3).
+1. The VM is programmed using `128 + 8 * RANDOMX_PROGRAM_SIZE` random bytes using generator `gen4` (chapter 4.5).
+1. The VM is executed (chapter 4.6).
+1. A new 64-byte seed is calculated as `S = Hash512(RegisterFile)`.
+1. Set `gen4.state = S` (modify the state of the generator).
+1. Steps 7-10 are performed a total of `RANDOMX_PROGRAM_COUNT` times. The last iteration skips steps 9 and 10.
+1. Scratchpad fingerprint is calculated as `A = AesHash1R(Scratchpad)`.
+1. Bytes 192-255 of the Register File are set to the value of `A`.
+1. Result is calculated as `R = Hash256(RegisterFile)`.
+
+The input of the `Hash512` function in step 9 is the following 256 bytes:
+```
+ +---------------------------------+
+ | registers r0-r7 | (64 bytes)
+ +---------------------------------+
+ | registers f0-f3 | (64 bytes)
+ +---------------------------------+
+ | registers e0-e3 | (64 bytes)
+ +---------------------------------+
+ | registers a0-a3 | (64 bytes)
+ +---------------------------------+
+```
+
+The input of the `Hash256` function in step 14 is the following 256 bytes:
+```
+ +---------------------------------+
+ | registers r0-r7 | (64 bytes)
+ +---------------------------------+
+ | registers f0-f3 | (64 bytes)
+ +---------------------------------+
+ | registers e0-e3 | (64 bytes)
+ +---------------------------------+
+ | AesHash1R(Scratchpad) | (64 bytes)
+ +---------------------------------+
+```
+
+## 3 Custom functions
+
+### 3.1 Definitions
+
+Two of the custom functions are based on the [Advanced Encryption Standard](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) (AES).
+
+**AES encryption round** refers to the application of the ShiftRows, SubBytes and MixColumns transformations followed by a XOR with the round key.
+
+**AES decryption round** refers to the application of inverse ShiftRows, inverse SubBytes and inverse MixColumns transformations followed by a XOR with the round key.
+
+### 3.2 AesGenerator1R
+
+AesGenerator1R produces a sequence of pseudo-random bytes.
+
+The internal state of the generator consists of 64 bytes arranged into four columns of 16 bytes each. During each output iteration, every column is decrypted (columns 0, 2) or encrypted (columns 1, 3) with one AES round using the following round keys (one key per column):
+
+```
+key0 = 53 a5 ac 6d 09 66 71 62 2b 55 b5 db 17 49 f4 b4
+key1 = 07 af 7c 6d 0d 71 6a 84 78 d3 25 17 4e dc a1 0d
+key2 = f1 62 12 3f c6 7e 94 9f 4f 79 c0 f4 45 e3 20 3e
+key3 = 35 81 ef 6a 7c 31 ba b1 88 4c 31 16 54 91 16 49
+```
+These keys were generated as:
+```
+key0, key1, key2, key3 = Hash512("RandomX AesGenerator1R keys")
+```
+
+
+Single iteration produces 64 bytes of output which also become the new generator state.
+```
+state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B)
+ | | | |
+ AES decrypt AES encrypt AES decrypt AES encrypt
+ (key0) (key1) (key2) (key3)
+ | | | |
+ v v v v
+ state0' state1' state2' state3'
+```
+
+### 3.3 AesGenerator4R
+
+AesGenerator4R works similar way as AesGenerator1R, except it uses 4 rounds per column. Columns 0 and 1 use a different set of keys than columns 2 and 3.
+
+```
+state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B)
+ | | | |
+ AES decrypt AES encrypt AES decrypt AES encrypt
+ (key0) (key0) (key4) (key4)
+ | | | |
+ v v v v
+ AES decrypt AES encrypt AES decrypt AES encrypt
+ (key1) (key1) (key5) (key5)
+ | | | |
+ v v v v
+ AES decrypt AES encrypt AES decrypt AES encrypt
+ (key2) (key2) (key6) (key6)
+ | | | |
+ v v v v
+ AES decrypt AES encrypt AES decrypt AES encrypt
+ (key3) (key3) (key7) (key7)
+ | | | |
+ v v v v
+ state0' state1' state2' state3'
+```
+
+AesGenerator4R uses the following 8 round keys:
+
+```
+key0 = dd aa 21 64 db 3d 83 d1 2b 6d 54 2f 3f d2 e5 99
+key1 = 50 34 0e b2 55 3f 91 b6 53 9d f7 06 e5 cd df a5
+key2 = 04 d9 3e 5c af 7b 5e 51 9f 67 a4 0a bf 02 1c 17
+key3 = 63 37 62 85 08 5d 8f e7 85 37 67 cd 91 d2 de d8
+key4 = 73 6f 82 b5 a6 a7 d6 e3 6d 8b 51 3d b4 ff 9e 22
+key5 = f3 6b 56 c7 d9 b3 10 9c 4e 4d 02 e9 d2 b7 72 b2
+key6 = e7 c9 73 f2 8b a3 65 f7 0a 66 a9 2b a7 ef 3b f6
+key7 = 09 d6 7c 7a de 39 58 91 fd d1 06 0c 2d 76 b0 c0
+```
+These keys were generated as:
+```
+key0, key1, key2, key3 = Hash512("RandomX AesGenerator4R keys 0-3")
+key4, key5, key6, key7 = Hash512("RandomX AesGenerator4R keys 4-7")
+```
+
+### 3.4 AesHash1R
+
+AesHash1R calculates a 512-bit fingerprint of its input.
+
+AesHash1R has a 64-byte internal state, which is arranged into four columns of 16 bytes each. The initial state is:
+
+```
+state0 = 0d 2c b5 92 de 56 a8 9f 47 db 82 cc ad 3a 98 d7
+state1 = 6e 99 8d 33 98 b7 c7 15 5a 12 9e f5 57 80 e7 ac
+state2 = 17 00 77 6a d0 c7 62 ae 6b 50 79 50 e4 7c a0 e8
+state3 = 0c 24 0a 63 8d 82 ad 07 05 00 a1 79 48 49 99 7e
+```
+
+The initial state vectors were generated as:
+```
+state0, state1, state2, state3 = Hash512("RandomX AesHash1R state")
+```
+
+The input is processed in 64-byte blocks. Each input block is considered to be a set of four AES round keys `key0`, `key1`, `key2`, `key3`. Each state column is encrypted (columns 0, 2) or decrypted (columns 1, 3) with one AES round using the corresponding round key:
+
+```
+state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B)
+ | | | |
+ AES encrypt AES decrypt AES encrypt AES decrypt
+ (key0) (key1) (key2) (key3)
+ | | | |
+ v v v v
+ state0' state1' state2' state3'
+```
+
+When all input bytes have been processed, the state is processed with two additional AES rounds with the following extra keys (one key per round, same pair of keys for all columns):
+
+```
+xkey0 = 89 83 fa f6 9f 94 24 8b bf 56 dc 90 01 02 89 06
+xkey1 = d1 63 b2 61 3c e0 f4 51 c6 43 10 ee 9b f9 18 ed
+```
+
+The extra keys were generated as:
+```
+xkey0, xkey1 = Hash256("RandomX AesHash1R xkeys")
+```
+
+```
+state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B)
+ | | | |
+ AES encrypt AES decrypt AES encrypt AES decrypt
+ (xkey0) (xkey0) (xkey0) (xkey0)
+ | | | |
+ v v v v
+ AES encrypt AES decrypt AES encrypt AES decrypt
+ (xkey1) (xkey1) (xkey1) (xkey1)
+ | | | |
+ v v v v
+finalState0 finalState1 finalState2 finalState3
+```
+
+The final state is the output of the function.
+
+### 3.5 BlakeGenerator
+
+BlakeGenerator is a simple pseudo-random number generator based on the Blake2b hashing function. It has a 64-byte internal state `S`.
+
+#### 3.5.1 Initialization
+
+The internal state is initialized from a seed value `K` (0-60 bytes long). The seed value is written into the internal state and padded with zeroes. Then the internal state is initialized as `S = Hash512(S)`.
+
+#### 3.5.2 Random number generation
+
+The generator can generate 1 byte or 4 bytes at a time by supplying data from its internal state `S`. If there are not enough unused bytes left, the internal state is reinitialized as `S = Hash512(S)`.
+
+## 4. Virtual Machine
+
+The components of the RandomX virtual machine are summarized in Fig. 4.1.
+
+*Figure 4.1 - Virtual Machine*
+
+
+
+The VM is a complex instruction set computer ([CISC](https://en.wikipedia.org/wiki/Complex_instruction_set_computer)). All data are loaded and stored in little-endian byte order. Signed integer numbers are represented using [two's complement](https://en.wikipedia.org/wiki/Two%27s_complement).
+
+### 4.1 Dataset
+
+Dataset is described in detail in chapter 7. It's a large read-only buffer. Its size is equal to `RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE` bytes. Each program uses only a random subset of the Dataset of size `RANDOMX_DATASET_BASE_SIZE`. All Dataset accesses read an aligned 64-byte item.
+
+### 4.2 Scratchpad
+
+Scratchpad represents the workspace memory of the VM. Its size is `RANDOMX_SCRATCHPAD_L3` bytes and it's divided into 3 "levels":
+
+* The whole scratchpad is the third level "L3".
+* The first `RANDOMX_SCRATCHPAD_L2` bytes of the scratchpad is the second level "L2".
+* The first `RANDOMX_SCRATCHPAD_L1` bytes of the scratchpad is the first level "L1".
+
+The scratchpad levels are inclusive, i.e. L3 contains both L2 and L1 and L2 contains L1.
+
+To access a particular scratchpad level, bitwise AND with a mask according to table 4.2.1 is applied to the memory address.
+
+*Table 4.2.1: Scratchpad access masks*
+
+|Level|8-byte aligned mask|64-byte aligned mask|
+|---------|-|-|
+|L1|`(RANDOMX_SCRATCHPAD_L1 - 1) & ~7`|-|
+|L2|`(RANDOMX_SCRATCHPAD_L2 - 1) & ~7`|-|
+|L3|`(RANDOMX_SCRATCHPAD_L3 - 1) & ~7`|`(RANDOMX_SCRATCHPAD_L3 - 1) & ~63`|
+
+### 4.3 Registers
+
+The VM has 8 integer registers `r0`-`r7` (group R) and a total of 12 floating point registers split into 3 groups: `f0`-`f3` (group F), `e0`-`e3` (group E) and `a0`-`a3` (group A). Integer registers are 64 bits wide, while floating point registers are 128 bits wide and contain a pair of numbers in floating point format. The lower and upper half of floating point registers are not separately addressable.
+
+Additionally, there are 3 internal registers `ma`, `mx` and `fprc`.
+
+Integer registers `r0`-`r7` can be the source or the destination operands of integer instructions or may be used as address registers for accessing the Scratchpad.
+
+Floating point registers `a0`-`a3` are read-only and their value is fixed for a given VM program. They can be the source operand of any floating point instruction. The value of these registers is restricted to the interval `[1, 4294967296)`.
+
+Floating point registers `f0`-`f3` are the "additive" registers, which can be the destination of floating point addition and subtraction instructions. The absolute value of these registers will not exceed about `3.0e+14`.
+
+Floating point registers `e0`-`e3` are the "multiplicative" registers, which can be the destination of floating point multiplication, division and square root instructions. Their value is always positive.
+
+`ma` and `mx` are the memory registers. Both are 32 bits wide. `ma` contains the memory address of the next Dataset read and `mx` contains the address of the next Dataset prefetch. The values of `ma` and `mx` registers are always aligned to be a multiple of 64.
+
+The 2-bit `fprc` register determines the rounding mode of all floating point operations according to Table 4.3.1. The four rounding modes are defined by the IEEE 754 standard.
+
+*Table 4.3.1: Rounding modes*
+
+|`fprc`|rounding mode|
+|-------|------------|
+|0|roundTiesToEven|
+|1|roundTowardNegative|
+|2|roundTowardPositive|
+|3|roundTowardZero|
+
+#### 4.3.1 Group F register conversion
+
+When an 8-byte value read from the memory is to be converted to an F group register value or operand, it is interpreted as a pair of 32-bit signed integers (in little endian, two's complement format) and converted to floating point format. This conversion is exact and doesn't need rounding because only 30 bits of the fraction significand are needed to represent the integer value.
+
+#### 4.3.2 Group E register conversion
+
+When an 8-byte value read from the memory is to be converted to an E group register value or operand, the same conversion procedure is applied as for F group registers (see 4.3.1) with additional post-processing steps for each of the two floating point values:
+
+1. The sign bit is set to `0`.
+2. Bits 0-2 of the exponent are set to the constant value of 0112.
+3. Bits 3-6 of the exponent are set to the value of the exponent mask described in chapter 4.5.6. This value is fixed for a given VM program.
+4. The bottom 22 bits of the fraction significand are set to the value of the fraction mask described in chapter 4.5.6. This value is fixed for a given VM program.
+
+### 4.4 Program buffer
+
+The Program buffer stores the program to be executed by the VM. The program consists of `RANDOMX_PROGRAM_SIZE` instructions. Each instruction is encoded by an 8-byte word. The instruction set is described in chapter 5.
+
+### 4.5 VM programming
+
+The VM requires `128 + 8 * RANDOMX_PROGRAM_SIZE` bytes to be programmed. This is split into two parts:
+
+* `128` bytes of configuration data = 16 quadwords (16×8 bytes), used according to Table 4.5.1
+* `8 * RANDOMX_PROGRAM_SIZE` bytes of program data, copied directly into the Program Buffer
+
+*Table 4.5.1 - Configuration data*
+
+|quadword|description|
+|-----|-----------|
+|0|initialize low half of register `a0`|
+|1|initialize high half of register `a0`|
+|2|initialize low half of register `a1`|
+|3|initialize high half of register `a1`|
+|4|initialize low half of register `a2`|
+|5|initialize high half of register `a2`|
+|6|initialize low half of register `a3`|
+|7|initialize high half of register `a3`|
+|8|initialize register `ma`|
+|9|(reserved)|
+|10|initialize register `mx`|
+|11|(reserved)|
+|12|select address registers|
+|13|select Dataset offset|
+|14|initialize register masks for low half of group E registers|
+|15|initialize register masks for high half of group E registers|
+
+#### 4.5.2 Group A register initialization
+
+The values of the floating point registers `a0`-`a3` are initialized using configuration quadwords 0-7 to have the following value:
+
++1.fraction x 2exponent
+
+The fraction has full 52 bits of precision and the exponent value ranges from 0 to 31. These values are obtained from the initialization quadword (in little endian format) according to Table 4.5.2.
+
+*Table 4.5.2 - Group A register initialization*
+
+|bits|description|
+|----|-----------|
+|0-51|fraction|
+|52-58|(reserved)|
+|59-63|exponent|
+
+#### 4.5.3 Memory registers
+
+Registers `ma` and `mx` are initialized using the low 32 bits of quadwords 8 and 10 in little endian format.
+
+#### 4.5.4 Address registers
+
+Bits 0-3 of quadword 12 are used to select 4 address registers for program execution. Each bit chooses one register from a pair of integer registers according to Table 4.5.3.
+
+*Table 4.5.3 - Address registers*
+
+|address register (bit)|value = 0|value = 1|
+|----------------------|-|-|
+|`readReg0` (0)|`r0`|`r1`|
+|`readReg1` (1)|`r2`|`r3`|
+|`readReg2` (2)|`r4`|`r5`|
+|`readReg3` (3)|`r6`|`r7`|
+
+#### 4.5.5 Dataset offset
+
+The `datasetOffset` is calculated as the remainder of dividing quadword 13 by `RANDOMX_DATASET_EXTRA_SIZE / 64 + 1`. The result is multiplied by `64`. This offset is used when reading values from the Dataset.
+
+#### 4.5.6 Group E register masks
+
+These masks are used for the conversion of group E registers (see 4.3.2). The low and high halves each have their own masks initialized from quadwords 14 and 15. The fraction mask is given by bits 0-21 and the exponent mask by bits 60-63 of the initialization quadword.
+
+### 4.6 VM execution
+
+During VM execution, 3 additional temporary registers are used: `ic`, `spAddr0` and `spAddr1`. Program execution consists of initialization and loop execution.
+
+#### 4.6.1 Initialization
+
+1. `ic` register is set to `RANDOMX_PROGRAM_ITERATIONS`.
+2. `spAddr0` is set to the value of `mx`.
+3. `spAddr1` is set to the value of `ma`.
+4. The values of all integer registers `r0`-`r7` are set to zero.
+
+#### 4.6.2 Loop execution
+
+The loop described below is repeated until the value of the `ic` register reaches zero.
+
+1. XOR of registers `readReg0` and `readReg1` (see Table 4.5.3) is calculated and `spAddr0` is XORed with the low 32 bits of the result and `spAddr1` with the high 32 bits.
+2. `spAddr0` is used to perform a 64-byte aligned read from Scratchpad level 3 (using mask from Table 4.2.1). The 64 bytes are XORed with all integer registers in order `r0`-`r7`.
+3. `spAddr1` is used to perform a 64-byte aligned read from Scratchpad level 3 (using mask from Table 4.2.1). Each floating point register `f0`-`f3` and `e0`-`e3` is initialized using an 8-byte value according to the conversion rules from chapters 4.3.1 and 4.3.2.
+4. The 256 instructions stored in the Program Buffer are executed.
+5. The `mx` register is XORed with the low 32 bits of registers `readReg2` and `readReg3` (see Table 4.5.3).
+6. A 64-byte Dataset item at address `datasetOffset + mx % RANDOMX_DATASET_BASE_SIZE` is prefetched from the Dataset (it will be used during the next iteration).
+7. A 64-byte Dataset item at address `datasetOffset + ma % RANDOMX_DATASET_BASE_SIZE` is loaded from the Dataset. The 64 bytes are XORed with all integer registers in order `r0`-`r7`.
+8. The values of registers `mx` and `ma` are swapped.
+9. The values of all integer registers `r0`-`r7` are written to the Scratchpad (L3) at address `spAddr1` (64-byte aligned).
+10. Register `f0` is XORed with register `e0` and the result is stored in register `f0`. Register `f1` is XORed with register `e1` and the result is stored in register `f1`. Register `f2` is XORed with register `e2` and the result is stored in register `f2`. Register `f3` is XORed with register `e3` and the result is stored in register `f3`.
+11. The values of registers `f0`-`f3` are written to the Scratchpad (L3) at address `spAddr0` (64-byte aligned).
+12. `spAddr0` and `spAddr1` are both set to zero.
+13. `ic` is decreased by 1.
+
+
+## 5. Instruction set
+
+The VM executes programs in a special instruction set, which was designed in such way that any random 8-byte word is a valid instruction and any sequence of valid instructions is a valid program. Because there are no "syntax" rules, generating a random program is as easy as filling the program buffer with random data.
+
+### 5.1 Instruction encoding
+
+Each instruction word is 64 bits long. Instruction fields are encoded as shown in Fig. 5.1.
+
+*Figure 5.1 - Instruction encoding*
+
+
+
+#### 5.1.1 opcode
+There are 256 opcodes, which are distributed between 29 distinct instructions. Each instruction can be encoded using multiple opcodes (the number of opcodes specifies the frequency of the instruction in a random program).
+
+*Table 5.1.1: Instruction groups*
+
+|group|# instructions|# opcodes||
+|---------|-----------------|----|-|
+|integer |17|120|46.9%|
+|floating point |9|94|36.7%|
+|control |2|26|10.2%|
+|store |1|16|6.2%|
+||**29**|**256**|**100%**
+
+All instructions are described below in chapters 5.2 - 5.5.
+
+#### 5.1.2 dst
+Destination register. Only bits 0-1 (register groups A, F, E) or 0-2 (groups R, F+E) are used to encode a register according to Table 5.1.2.
+
+*Table 5.1.2: Addressable register groups*
+
+|index|R|A|F|E|F+E|
+|--|--|--|--|--|--|
+|0|`r0`|`a0`|`f0`|`e0`|`f0`|
+|1|`r1`|`a1`|`f1`|`e1`|`f1`|
+|2|`r2`|`a2`|`f2`|`e2`|`f2`|
+|3|`r3`|`a3`|`f3`|`e3`|`f3`|
+|4|`r4`||||`e0`|
+|5|`r5`||||`e1`|
+|6|`r6`||||`e2`|
+|7|`r7`||||`e3`|
+
+#### 5.1.3 src
+
+The `src` flag encodes a source operand register according to Table 5.1.2 (only bits 0-1 or 0-2 are used).
+
+Some integer instructions use a constant value as the source operand in cases when `dst` and `src` encode the same register (see Table 5.2.1).
+
+For register-memory instructions, the source operand is used to calculate the memory address.
+
+#### 5.1.4 mod
+
+The `mod` flag is encoded as:
+
+*Table 5.1.3: mod flag encoding*
+
+|`mod` bits|description|range of values|
+|----|--------|----|
+|0-1|`mod.mem` flag|0-3|
+|2-3|`mod.shift` flag|0-3|
+|4-7|`mod.cond` flag|0-15|
+
+The `mod.mem` flag selects between Scratchpad levels L1 and L2 when reading from or writing to memory except for two cases:
+
+* it's a memory read and `dst` and `src` encode the same register
+* it's a memory write `mod.cond` is 14 or 15
+
+In these two cases, the Scratchpad level is L3 (see Table 5.1.4).
+
+*Table 5.1.4: memory access Scratchpad level*
+
+|condition|Scratchpad level|
+|---------|-|
+|`src == dst` (read)|L3|
+|`mod.cond >= 14` (write)|L3|
+|`mod.mem == 0`|L2|
+|`mod.mem != 0`|L1|
+
+The address for reading/writing is calculated by applying bitwise AND operation to the address and the 8-byte aligned address mask listed in Table 4.2.1.
+
+The `mod.cond` and `mod.shift` flags are used by some instructions (see 5.2, 5.4).
+
+#### 5.1.5 imm32
+A 32-bit immediate value that can be used as the source operand and is used to calculate addresses for memory operations. The immediate value is sign-extended to 64 bits unless specified otherwise.
+
+### 5.2 Integer instructions
+For integer instructions, the destination is always an integer register (register group R). Source operand (if applicable) can be either an integer register or memory value. If `dst` and `src` refer to the same register, most instructions use `0` or `imm32` instead of the register. This is indicated in the 'src == dst' column in Table 5.2.1.
+
+`[mem]` indicates a memory operand loaded as an 8-byte value from the address `src + imm32`.
+
+*Table 5.2.1 Integer instructions*
+
+|frequency|instruction|dst|src|`src == dst ?`|operation|
+|-|-|-|-|-|-|
+|16/256|IADD_RS|R|R|`src = dst`|`dst = dst + (src << mod.shift) (+ imm32)`|
+|7/256|IADD_M|R|R|`src = 0`|`dst = dst + [mem]`|
+|16/256|ISUB_R|R|R|`src = imm32`|`dst = dst - src`|
+|7/256|ISUB_M|R|R|`src = 0`|`dst = dst - [mem]`|
+|16/256|IMUL_R|R|R|`src = imm32`|`dst = dst * src`|
+|4/256|IMUL_M|R|R|`src = 0`|`dst = dst * [mem]`|
+|4/256|IMULH_R|R|R|`src = dst`|`dst = (dst * src) >> 64`|
+|1/256|IMULH_M|R|R|`src = 0`|`dst = (dst * [mem]) >> 64`|
+|4/256|ISMULH_R|R|R|`src = dst`|`dst = (dst * src) >> 64` (signed)|
+|1/256|ISMULH_M|R|R|`src = 0`|`dst = (dst * [mem]) >> 64` (signed)|
+|8/256|IMUL_RCP|R|-|-|dst = 2x / imm32 * dst|
+|2/256|INEG_R|R|-|-|`dst = -dst`|
+|15/256|IXOR_R|R|R|`src = imm32`|`dst = dst ^ src`|
+|5/256|IXOR_M|R|R|`src = 0`|`dst = dst ^ [mem]`|
+|8/256|IROR_R|R|R|`src = imm32`|`dst = dst >>> src`|
+|2/256|IROL_R|R|R|`src = imm32`|`dst = dst <<< src`|
+|4/256|ISWAP_R|R|R|`src = dst`|`temp = src; src = dst; dst = temp`|
+
+#### 5.2.1 IADD_RS
+
+This instructions adds the values of two registers (modulo 264). The value of the second operand is shifted left by 0-3 bits (determined by the `mod.shift` flag). Additionally, if `dst` is register `r5`, the immediate value `imm32` is added to the result.
+
+#### 5.2.2 IADD_M
+
+64-bit integer addition operation (performed modulo 264) with a memory source operand.
+
+#### 5.2.3 ISUB_R, ISUB_M
+
+64-bit integer subtraction (performed modulo 264). ISUB_R uses register source operand, ISUB_M uses a memory source operand.
+
+#### 5.2.4 IMUL_R, IMUL_M
+
+64-bit integer multiplication (performed modulo 264). IMUL_R uses a register source operand, IMUL_M uses a memory source operand.
+
+#### 5.2.5 IMULH_R, IMULH_M, ISMULH_R, ISMULH_M
+These instructions output the high 64 bits of the whole 128-bit multiplication result. The result differs for signed and unsigned multiplication (IMULH is unsigned, ISMULH is signed). The variants with a register source operand perform a squaring operation if `dst` equals `src`.
+
+#### 5.2.6 IMUL_RCP
+If `imm32` equals 0 or is a power of 2, IMUL_RCP is a no-op. In other cases, the instruction multiplies the destination register by a reciprocal of `imm32` (the immediate value is zero-extended and treated as unsigned). The reciprocal is calculated as rcp = 2x / imm32 by choosing the largest integer `x` such that rcp < 264.
+
+#### 5.2.7 INEG_R
+Performs two's complement negation of the destination register.
+
+#### 5.2.8 IXOR_R, IXOR_M
+64-bit exclusive OR operation. IXOR_R uses a register source operand, IXOR_M uses a memory source operand.
+
+#### 5.2.9 IROR_R, IROL_R
+Performs a cyclic shift (rotation) of the destination register. Source operand (shift count) is implicitly masked to 6 bits. IROR rotates bits right, IROL left.
+
+#### 5.2.9 ISWAP_R
+This instruction swaps the values of two registers. If source and destination refer to the same register, the result is a no-op.
+
+### 5.3 Floating point instructions
+For floating point instructions, the destination can be a group F or group E register. Source operand is either a group A register or a memory value.
+
+`[mem]` indicates a memory operand loaded as an 8-byte value from the address `src + imm32` and converted according to the rules in chapters 4.3.1 (group F) or 4.3.2 (group E). The lower and upper memory operands are denoted as `[mem][0]` and `[mem][1]`.
+
+All floating point operations are rounded according to the current value of the `fprc` register (see Table 4.3.1). Due to restrictions on the values of the floating point registers, no operation results in `NaN` or a denormal number.
+
+*Table 5.3.1 Floating point instructions*
+
+|frequency|instruction|dst|src|operation|
+|-|-|-|-|-|
+|4/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`|
+|16/256|FADD_R|F|A|`(dst0, dst1) = (dst0 + src0, dst1 + src1)`|
+|5/256|FADD_M|F|R|`(dst0, dst1) = (dst0 + [mem][0], dst1 + [mem][1])`|
+|16/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`|
+|5/256|FSUB_M|F|R|`(dst0, dst1) = (dst0 - [mem][0], dst1 - [mem][1])`|
+|6/256|FSCAL_R|F|-|(dst0, dst1) = (-2x0 * dst0, -2x1 * dst1)|
+|32/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`|
+|4/256|FDIV_M|E|R|`(dst0, dst1) = (dst0 / [mem][0], dst1 / [mem][1])`|
+|6/256|FSQRT_R|E|-|`(dst0, dst1) = (√dst0, √dst1)`|
+
+#### 5.3.1 FSWAP_R
+
+Swaps the lower and upper halves of the destination register. This is the only instruction that is applicable to both F an E register groups.
+
+#### 5.3.2 FADD_R, FADD_M
+
+Double precision floating point addition. FADD_R uses a group A register source operand, FADD_M uses a memory operand.
+
+#### 5.3.3 FSUB_R, FSUB_M
+
+Double precision floating point subtraction. FSUB_R uses a group A register source operand, FSUB_M uses a memory operand.
+
+#### 5.3.4 FSCAL_R
+This instruction negates the number and multiplies it by 2x. `x` is calculated by taking the 4 least significant digits of the biased exponent and interpreting them as a binary number using the digit set `{+1, -1}` as opposed to the traditional `{0, 1}`. The possible values of `x` are all odd numbers from -15 to +15.
+
+The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x80F0000000000000`.
+
+#### 5.3.5 FMUL_R
+
+Double precision floating point multiplication. This instruction uses only a register source operand.
+
+#### 5.3.6 FDIV_M
+
+Double precision floating point division. This instruction uses only a memory source operand.
+
+#### 5.3.7 FSQRT_R
+
+Double precision floating point square root of the destination register.
+
+### 5.4 Control instructions
+
+There are 2 control instructions.
+
+*Table 5.4.1 - Control instructions*
+
+|frequency|instruction|dst|src|operation|
+|-|-|-|-|-|
+|1/256|CFROUND|-|R|`fprc = src >>> imm32`
+|25/256|CBRANCH|R|-|`dst = dst + cimm`, conditional jump
+
+#### 5.4.1 CFROUND
+This instruction calculates a 2-bit value by rotating the source register right by `imm32` bits and taking the 2 least significant bits (the value of the source register is unaffected). The result is stored in the `fprc` register. This changes the rounding mode of all subsequent floating point instructions.
+
+#### 5.4.2 CBRANCH
+
+This instruction adds an immediate value `cimm` (constructed from `imm32`, see below) to the destination register and then performs a conditional jump in the Program Buffer based on the value of the destination register. The target of the jump is the instruction following the instruction when register `dst` was last modified.
+
+At the beginning of each program iteration, all registers are considered to be unmodified. A register is considered as modified by an instruction in the following cases:
+
+* It is the destination register of an integer instruction except IMUL_RCP and ISWAP_R.
+* It is the destination register of IMUL_RCP and `imm32` is not zero or a power of 2.
+* It is the source or the destination register of ISWAP_R and the destination and source registers are distinct.
+* The CBRANCH instruction is considered to modify all integer registers.
+
+If register `dst` has not been modified yet, the jump target is the first instruction in the Program Buffer.
+
+The CBRANCH instruction performs the following steps:
+
+1. A constant `b` is calculated as `mod.cond + RANDOMX_JUMP_OFFSET`.
+1. A constant `cimm` is constructed as sign-extended `imm32` with bit `b` set to 1 and bit `b-1` set to 0 (if `b > 0`).
+1. `cimm` is added to the destination register.
+1. If bits `b` to `b + RANDOMX_JUMP_BITS - 1` of the destination register are zero, the jump is executed (target is the instruction following the instruction where `dst` was last modified).
+
+Bits in immediate and register values are numbered from 0 to 63 with 0 being the least significant bit. For example, for `b = 10` and `RANDOMX_JUMP_BITS = 8`, the bits are arranged like this:
+
+```
+cimm = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSMMMMMMMMMMMMMMMMMMMMM10MMMMMMMMM
+ dst = ..............................................XXXXXXXX..........
+```
+
+`S` is a copied sign bit from `imm32`. `M` denotes bits of `imm32`. The 9th bit is set to 0 and the 10th bit is set to 1. This value will be added to `dst`.
+
+The second line uses `X` to mark bits of `dst` that will be checked by the condition. If all these bits are 0 after adding `cimm`, the jump is executed.
+
+The construction of the CBRANCH instruction ensures that no inifinite loops are possible in the program.
+
+### 5.5 Store instruction
+There is one explicit store instruction for integer values.
+
+`[mem]` indicates the destination is an 8-byte value at the address `dst + imm32`.
+
+*Table 5.5.1 - Store instruction*
+
+|frequency|instruction|dst|src|operation|
+|-|-|-|-|-|
+|16/256|ISTORE|R|R|`[mem] = src`
+
+#### 5.5.1 ISTORE
+This instruction stores the value of the source integer register to the memory at the address calculated from the value of the destination register. The `src` and `dst` can be the same register.
+
+## 6. SuperscalarHash
+
+SuperscalarHash is a custom diffusion function that was designed to burn as much power as possible using only the CPU's integer ALUs.
+
+The input and output of SuperscalarHash are 8 integer registers `r0`-`r7`, each 64 bits wide. The output of SuperscalarHash is used to construct the Dataset (see chapter 7.3).
+
+### 6.1 Instructions
+The body of SuperscalarHash is a random sequence of instructions that can run on the Virtual Machine. SuperscalarHash uses a reduced set of only integer register-register instructions listed in Table 6.1.1. `dst` refers to the destination register, `src` to the source register.
+
+*Table 6.1.1 - SuperscalarHash instructions*
+
+|freq. †|instruction|Macro-ops|operation|rules|
+|-|-|-|-|-|
+|0.11|ISUB_R|`sub_rr`|`dst = dst - src`|`dst != src`|
+|0.11|IXOR_R|`xor_rr`|`dst = dst ^ src`|`dst != src`|
+|0.11|IADD_RS|`lea_sib`|`dst = dst + (src << mod.shift)`|`dst != src`, `dst != r5`
+|0.22|IMUL_R|`imul_rr`|`dst = dst * src`|`dst != src`|
+|0.11|IROR_C|`ror_ri`|`dst = dst >>> imm32`|`imm32 % 64 != 0`
+|0.10|IADD_C|`add_ri`|`dst = dst + imm32`|
+|0.10|IXOR_C|`xor_ri`|`dst = dst ^ imm32`|
+|0.03|IMULH_R|`mov_rr`,`mul_r`,`mov_rr`|`dst = (dst * src) >> 64`|
+|0.03|ISMULH_R|`mov_rr`,`imul_r`,`mov_rr`|`dst = (dst * src) >> 64` (signed)|
+|0.06|IMUL_RCP|`mov_ri`,`imul_rr`|dst = 2x / imm32 * dst|`imm32 != 0`, imm32 != 2N|
+
+†Frequencies are approximate. Instructions are generated based on complex rules.
+
+#### 6.1.1 ISUB_R
+See chapter 5.2.3. Source and destination are always distinct registers.
+
+#### 6.1.2 IXOR_R
+See chapter 5.2.8. Source and destination are always distinct registers.
+
+#### 6.1.3 IADD_RS
+See chapter 5.2.1. Source and destination are always distinct registers and register `r5` cannot be the destination.
+
+#### 6.1.4 IMUL_R
+See chapter 5.2.4. Source and destination are always distinct registers.
+
+#### 6.1.5 IROR_C
+The destination register is rotated right. The rotation count is given by `imm32` masked to 6 bits and cannot be 0.
+
+#### 6.1.6 IADD_C
+A sign-extended `imm32` is added to the destination register.
+
+#### 6.1.7 IXOR_C
+The destination register is XORed with a sign-extended `imm32`.
+
+#### 6.1.8 IMULH_R, ISMULH_R
+See chapter 5.2.5.
+
+#### 6.1.9 IMUL_RCP
+See chapter 5.2.6. `imm32` is never 0 or a power of 2.
+
+### 6.2 The reference CPU
+
+Unlike a standard RandomX program, a SuperscalarHash program is generated using a strict set of rules to achieve the maximum performance on a superscalar CPU. For this purpose, the generator runs a simulation of a reference CPU.
+
+The reference CPU is loosely based on the [Intel Ivy Bridge microarchitecture](https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)). It has the following properties:
+
+* The CPU has 3 integer execution ports P0, P1 and P5 that can execute instructions in parallel. Multiplication can run only on port P1.
+* Each of the Superscalar instructions listed in Table 6.1.1 consist of one or more *Macro-ops*. Each Macro-op has certain execution latency (in cycles) and size (in bytes) as shown in Table 6.2.1.
+* Each of the Macro-ops listed in Table 6.2.1 consists of 0-2 *Micro-ops* that can go to a subset of the 3 execution ports. If a Macro-op consists of 2 Micro-ops, both must be executed together.
+* The CPU can decode at most 16 bytes of code per cycle and at most 4 Micro-ops per cycle.
+
+*Table 6.2.1 - Macro-ops*
+
+|Macro-op|latency|size|1st Micro-op|2nd Micro-op|
+|-|-|-|-|-|
+|`sub_rr`|1|3|P015|-|
+|`xor_rr`|1|3|P015|-|
+|`lea_sib`|1|4|P01|-|
+|`imul_rr`|3|4|P1|-|
+|`ror_ri`|1|4|P05|-|
+|`add_ri`|1|7, 8, 9|P015|-|
+|`xor_ri`|1|7, 8, 9|P015|-|
+|`mov_rr`|0|3|-|-|
+|`mul_r`|4|3|P1|P5|
+|`imul_r`|4|3|P1|P5|
+|`mov_ri`|1|10|P015|-|
+
+* P015 - Micro-op can be executed on any port
+* P01 - Micro-op can be executed on ports P0 or P1
+* P05 - Micro-op can be executed on ports P0 or P5
+* P1 - Micro-op can be executed only on port P1
+* P5 - Micro-op can be executed only on port P5
+
+Macro-ops `add_ri` and `xor_ri` can be optionally padded to a size of 8 or 9 bytes for code alignment purposes. `mov_rr` has 0 execution latency and doesn't use an execution port, but still occupies space during the decoding stage (see chapter 6.3.1).
+
+### 6.3 CPU simulation
+
+SuperscalarHash programs are generated to maximize the usage of all 3 execution ports of the reference CPU. The generation consists of 4 stages:
+
+* Decoding stage
+* Instruction selection
+* Port assignment
+* Operand assignment
+
+Program generation is complete when one of two conditions is met:
+
+1. An instruction is scheduled for execution on cycle that is equal to or greater than `RANDOMX_SUPERSCALAR_LATENCY`
+1. The number of generated instructions reaches `3 * RANDOMX_SUPERSCALAR_LATENCY + 2`.
+
+#### 6.3.1 Decoding stage
+
+The generator produces instructions in groups of 3 or 4 Macro-op slots such that the size of each group is exactly 16 bytes.
+
+*Table 6.3.1 - Decoder configurations*
+
+|decoder group|configuration|
+|-------------|-------------|
+|0|4-8-4|
+|1|7-3-3-3|
+|2|3-7-3-3|
+|3|4-9-3|
+|4|4-4-4-4|
+|5|3-3-10|
+
+The rules for the selection of the decoder group are following:
+
+* If the currently processed instruction is IMULH_R or ISMULH_R, the next decode group is group 5 (the only group that starts with a 3-byte slot and has only 3 slots).
+* If the total number of multiplications that have been generated is less than or equal to the current decoding cycle, the next decode group is group 4.
+* If the currently processed instruction is IMUL_RCP, the next decode group is group 0 or 3 (must begin with a 4-byte slot for multiplication).
+* Otherwise a random decode group is selected from groups 0-3.
+
+#### 6.3.2 Instruction selection
+
+Instructions are selected based on the size of the current decode group slot - see Table 6.3.2.
+
+*Table 6.3.2 - Decoder configurations*
+
+|slot size|note|instructions|
+|-------------|-------------|-----|
+|3|-|ISUB_R, IXOR_R
+|3|last slot in the group|ISUB_R, IXOR_R, IMULH_R, ISMULH_R|
+|4|decode group 4, not the last slot|IMUL_R|
+|4|-|IROR_C, IADD_RS|
+|7,8,9|-|IADD_C, IXOR_C|
+|10|-|IMUL_RCP|
+
+#### 6.3.3 Port assignment
+
+Micro-ops are issued to execution ports as soon as an available port is free. The scheduling is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload port P1 (multiplication) by instructions that can go to any port. The cycle when all Micro-ops of an instruction can be executed is called the 'scheduleCycle'.
+
+#### 6.3.4 Operand assignment
+
+The source operand (if needed) is selected first. is it selected from the group of registers that are available at the 'scheduleCycle' of the instruction. A register is available if the latency of its last operation has elapsed.
+
+The destination operand is selected with more strict rules (see column 'rules' in Table 6.1.1):
+
+* value must be ready at the required cycle
+* cannot be the same as the source register unless the instruction allows it (see column 'rules' in Table 6.1.1)
+ * this avoids optimizable operations such as `reg ^ reg` or `reg - reg`
+ * it also increases intermixing of register values
+* register cannot be multiplied twice in a row unless `allowChainedMul` is true
+ * this avoids accumulation of trailing zeroes in registers due to excessive multiplication
+ * `allowChainedMul` is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
+* either the last instruction applied to the register or its source must be different than the current instruction
+ * this avoids optimizable instruction sequences such as `r1 = r1 ^ r2; r1 = r1 ^ r2` (can be eliminated) or `reg = reg >>> C1; reg = reg >>> C2` (can be reduced to one rotation) or `reg = reg + C1; reg = reg + C2` (can be reduced to one addition)
+* register `r5` cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
+
+## 7. Dataset
+
+The Dataset is a read-only memory structure that is used during program execution (chapter 4.6.2, steps 6 and 7). The size of the Dataset is `RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE` bytes and it's divided into 64-byte 'items'.
+
+In order to allow PoW verification with a lower amount of memory, the Dataset is constructed in two steps using an intermediate structure called the "Cache", which can be used to calculate Dataset items on the fly.
+
+The whole Dataset is constructed from the key value `K`, which is an input parameter of RandomX. The whole Dataset needs to be recalculated everytime the key value changes. Fig. 7.1 shows the process of Dataset construction. Note: the maximum supported length of `K` is 60 bytes. Using a longer key results in implementation-defined behavior.
+
+*Figure 7.1 - Dataset construction*
+
+
+
+### 7.1 Cache construction
+
+The key `K` is expanded into the Cache using the "memory fill" function of Argon2d with parameters according to Table 7.1.1. The key is used as the "password" field.
+
+*Table 7.1.1 - Argon2 parameters*
+
+|parameter|value|
+|------------|--|
+|parallelism|`RANDOMX_ARGON_LANES`|
+|output size|0|
+|memory|`RANDOMX_ARGON_MEMORY`|
+|iterations|`RANDOMX_ARGON_ITERATIONS`|
+|version|`0x13`|
+|hash type|0 (Argon2d)|
+|password|key value `K`|
+|salt|`RANDOMX_ARGON_SALT`
+|secret size|0|
+|assoc. data size|0|
+
+The finalizer and output calculation steps of Argon2 are omitted. The output is the filled memory array.
+
+### 7.2 SuperscalarHash initialization
+
+The key value `K` is used to initialize a BlakeGenerator (see chapter 3.5), which is then used to generate 8 SuperscalarHash instances for Dataset initialization.
+
+### 7.3 Dataset block generation
+Dataset items are numbered sequentially with `itemNumber` starting from 0. Each 64-byte Dataset item is generated independently using 8 SuperscalarHash functions (generated according to chapter 7.2) and by XORing randomly selected data from the Cache (constructed according to chapter 7.1).
+
+The item data is represented by 8 64-bit integer registers: `r0`-`r7`.
+
+1. The register values are initialized as follows (`*` = multiplication, `^` = XOR):
+ * `r0 = (itemNumber + 1) * 6364136223846793005`
+ * `r1 = r0 ^ 9298411001130361340`
+ * `r2 = r0 ^ 12065312585734608966`
+ * `r3 = r0 ^ 9306329213124626780`
+ * `r4 = r0 ^ 5281919268842080866`
+ * `r5 = r0 ^ 10536153434571861004`
+ * `r6 = r0 ^ 3398623926847679864`
+ * `r7 = r0 ^ 9549104520008361294`
+1. Let `cacheIndex = itemNumber`
+1. Let `i = 0`
+1. Load a 64-byte item from the Cache. The item index is given by `cacheIndex` modulo the total number of 64-byte items in Cache.
+1. Execute `SuperscalarHash[i](r0, r1, r2, r3, r4, r5, r6, r7)`, where `SuperscalarHash[i]` refers to the i-th SuperscalarHash function. This modifies the values of the registers `r0`-`r7`.
+1. XOR all registers with the 64 bytes loaded in step 4 (8 bytes per column in order `r0`-`r7`).
+1. Set `cacheIndex` to the value of the register that has the longest dependency chain in the SuperscalarHash function executed in step 5.
+1. Set `i = i + 1` and go back to step 4 if `i < RANDOMX_CACHE_ACCESSES`.
+1. Concatenate registers `r0`-`r7` in little endian format to get the final Dataset item data.
+
+The constants used to initialize register values in step 1 were determined as follows:
+
+* Multiplier `6364136223846793005` was selected because it gives an excellent distribution for linear generators (D. Knuth: The Art of Computer Programming – Vol 2., also listed in [Commonly used LCG parameters](https://en.wikipedia.org/wiki/Linear_congruential_generator#Parameters_in_common_use))
+* XOR constants used to initialize registers `r1`-`r7` were determined by calculating `Hash512` of the ASCII value `"RandomX SuperScalarHash initialize"` and taking bytes 8-63 as 7 little-endian unsigned 64-bit integers. Additionally, the constant for `r1` was increased by 233+700 and the constant for `r3` was increased by 214 (these changes are necessary to ensure that all registers have unique initial values for all values of `itemNumber`).
+
diff --git a/src/RandomX/doc/tevador.asc b/src/RandomX/doc/tevador.asc
new file mode 100644
index 000000000..b998f1ef2
--- /dev/null
+++ b/src/RandomX/doc/tevador.asc
@@ -0,0 +1,13 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mDMEXd+PeBYJKwYBBAHaRw8BAQdAZ0nqJ+nRYoScG2QLX62pl+WO1+Mkv6Yyt2Kb
+ntGUuLq0G3RldmFkb3IgPHRldmFkb3JAZ21haWwuY29tPoiWBBMWCAA+FiEEMoWj
+LVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwMFCQWnqDgFCwkIBwIGFQoJCAsCBBYC
+AwECHgECF4AACgkQWijIaue9c6YBFQD+N1XTUqSCZp9jB/yTHQ9ahSaIUMtmuvdT
+So2s+quudP4A/R5wLwukpfGN9UZ4cfpmKCJ9jO1HJ2udmlGMsJbQpDAIuDgEXd+P
+eBIKKwYBBAGXVQEFAQEHQBNbQuPcDojMCkRb5B5u7Ld/AFLClOh+6ElL+u61rIY/
+AwEIB4h+BBgWCAAmFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwwFCQWn
+qDgACgkQWijIaue9c6YJvgD+IY1Q9mCM1P1iZIoXuafRihXJ7UgVXpQqW2yoaUT3
+bfQA/RkisI2eElYoOjdwPszPP6VfL5+SViwDmDuJG2P5llgE
+=V4vd
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/src/RandomX/randomx.sln b/src/RandomX/randomx.sln
new file mode 100644
index 000000000..3f003b78e
--- /dev/null
+++ b/src/RandomX/randomx.sln
@@ -0,0 +1,177 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.28307.572
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "randomx", "vcxproj\randomx.vcxproj", "{3346A4AD-C438-4324-8B77-47A16452954B}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{4A4A689F-86AF-41C0-A974-1080506D0923}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "superscalar-avalanche", "vcxproj\superscalar-avalanche.vcxproj", "{CF34A7EF-7DC9-4077-94A5-76F5425EA938}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "superscalar-init", "vcxproj\superscalar-init.vcxproj", "{E59DC709-9B12-4A53-BAF3-79398821C376}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "superscalar-stats", "vcxproj\superscalar-stats.vcxproj", "{0173D560-8C12-46B3-B467-0C6E7573AA0B}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "benchmark", "vcxproj\benchmark.vcxproj", "{1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "api-example1", "vcxproj\api-example1.vcxproj", "{83EA3E54-5D91-4E01-8EF6-C1E718334F83}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "api-example2", "vcxproj\api-example2.vcxproj", "{44947B9C-E6B1-4C06-BD01-F8EF43B59223}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "code-generator", "vcxproj\code-generator.vcxproj", "{3E490DEC-1874-43AA-92DA-1AC57C217EAC}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "scratchpad-entropy", "vcxproj\scratchpad-entropy.vcxproj", "{FF8BD408-AFD8-43C6-BE98-4D03B37E840B}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jit-performance", "vcxproj\jit-performance.vcxproj", "{535F2111-FA81-4C76-A354-EDD2F9AA00E3}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "perf-simulation", "vcxproj\perf-simulation.vcxproj", "{F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "runtime-distr", "vcxproj\runtime-distr.vcxproj", "{F207EC8C-C55F-46C0-8851-887A71574F54}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "randomx-dll", "vcxproj\randomx-dll.vcxproj", "{59560AD8-18E3-463E-A941-BBD808EC7C83}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tests", "vcxproj\tests.vcxproj", "{41F3F4DF-8113-4029-9915-FDDC44C43D49}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Debug|x86 = Debug|x86
+ Release|x64 = Release|x64
+ Release|x86 = Release|x86
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {3346A4AD-C438-4324-8B77-47A16452954B}.Debug|x64.ActiveCfg = Debug|x64
+ {3346A4AD-C438-4324-8B77-47A16452954B}.Debug|x64.Build.0 = Debug|x64
+ {3346A4AD-C438-4324-8B77-47A16452954B}.Debug|x86.ActiveCfg = Debug|Win32
+ {3346A4AD-C438-4324-8B77-47A16452954B}.Debug|x86.Build.0 = Debug|Win32
+ {3346A4AD-C438-4324-8B77-47A16452954B}.Release|x64.ActiveCfg = Release|x64
+ {3346A4AD-C438-4324-8B77-47A16452954B}.Release|x64.Build.0 = Release|x64
+ {3346A4AD-C438-4324-8B77-47A16452954B}.Release|x86.ActiveCfg = Release|Win32
+ {3346A4AD-C438-4324-8B77-47A16452954B}.Release|x86.Build.0 = Release|Win32
+ {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Debug|x64.ActiveCfg = Debug|x64
+ {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Debug|x64.Build.0 = Debug|x64
+ {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Debug|x86.ActiveCfg = Debug|Win32
+ {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Debug|x86.Build.0 = Debug|Win32
+ {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Release|x64.ActiveCfg = Release|x64
+ {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Release|x64.Build.0 = Release|x64
+ {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Release|x86.ActiveCfg = Release|Win32
+ {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Release|x86.Build.0 = Release|Win32
+ {E59DC709-9B12-4A53-BAF3-79398821C376}.Debug|x64.ActiveCfg = Debug|x64
+ {E59DC709-9B12-4A53-BAF3-79398821C376}.Debug|x64.Build.0 = Debug|x64
+ {E59DC709-9B12-4A53-BAF3-79398821C376}.Debug|x86.ActiveCfg = Debug|Win32
+ {E59DC709-9B12-4A53-BAF3-79398821C376}.Debug|x86.Build.0 = Debug|Win32
+ {E59DC709-9B12-4A53-BAF3-79398821C376}.Release|x64.ActiveCfg = Release|x64
+ {E59DC709-9B12-4A53-BAF3-79398821C376}.Release|x64.Build.0 = Release|x64
+ {E59DC709-9B12-4A53-BAF3-79398821C376}.Release|x86.ActiveCfg = Release|Win32
+ {E59DC709-9B12-4A53-BAF3-79398821C376}.Release|x86.Build.0 = Release|Win32
+ {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Debug|x64.ActiveCfg = Debug|x64
+ {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Debug|x64.Build.0 = Debug|x64
+ {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Debug|x86.ActiveCfg = Debug|Win32
+ {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Debug|x86.Build.0 = Debug|Win32
+ {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Release|x64.ActiveCfg = Release|x64
+ {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Release|x64.Build.0 = Release|x64
+ {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Release|x86.ActiveCfg = Release|Win32
+ {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Release|x86.Build.0 = Release|Win32
+ {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Debug|x64.ActiveCfg = Debug|x64
+ {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Debug|x64.Build.0 = Debug|x64
+ {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Debug|x86.ActiveCfg = Debug|Win32
+ {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Debug|x86.Build.0 = Debug|Win32
+ {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Release|x64.ActiveCfg = Release|x64
+ {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Release|x64.Build.0 = Release|x64
+ {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Release|x86.ActiveCfg = Release|Win32
+ {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Release|x86.Build.0 = Release|Win32
+ {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Debug|x64.ActiveCfg = Debug|x64
+ {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Debug|x64.Build.0 = Debug|x64
+ {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Debug|x86.ActiveCfg = Debug|Win32
+ {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Debug|x86.Build.0 = Debug|Win32
+ {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Release|x64.ActiveCfg = Release|x64
+ {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Release|x64.Build.0 = Release|x64
+ {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Release|x86.ActiveCfg = Release|Win32
+ {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Release|x86.Build.0 = Release|Win32
+ {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Debug|x64.ActiveCfg = Debug|x64
+ {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Debug|x64.Build.0 = Debug|x64
+ {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Debug|x86.ActiveCfg = Debug|Win32
+ {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Debug|x86.Build.0 = Debug|Win32
+ {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Release|x64.ActiveCfg = Release|x64
+ {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Release|x64.Build.0 = Release|x64
+ {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Release|x86.ActiveCfg = Release|Win32
+ {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Release|x86.Build.0 = Release|Win32
+ {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Debug|x64.ActiveCfg = Debug|x64
+ {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Debug|x64.Build.0 = Debug|x64
+ {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Debug|x86.ActiveCfg = Debug|Win32
+ {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Debug|x86.Build.0 = Debug|Win32
+ {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Release|x64.ActiveCfg = Release|x64
+ {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Release|x64.Build.0 = Release|x64
+ {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Release|x86.ActiveCfg = Release|Win32
+ {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Release|x86.Build.0 = Release|Win32
+ {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Debug|x64.ActiveCfg = Debug|x64
+ {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Debug|x64.Build.0 = Debug|x64
+ {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Debug|x86.ActiveCfg = Debug|Win32
+ {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Debug|x86.Build.0 = Debug|Win32
+ {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x64.ActiveCfg = Release|x64
+ {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x64.Build.0 = Release|x64
+ {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x86.ActiveCfg = Release|Win32
+ {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x86.Build.0 = Release|Win32
+ {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x64.ActiveCfg = Debug|x64
+ {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x64.Build.0 = Debug|x64
+ {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x86.ActiveCfg = Debug|Win32
+ {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x86.Build.0 = Debug|Win32
+ {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x64.ActiveCfg = Release|x64
+ {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x64.Build.0 = Release|x64
+ {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x86.ActiveCfg = Release|Win32
+ {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x86.Build.0 = Release|Win32
+ {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x64.ActiveCfg = Debug|x64
+ {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x64.Build.0 = Debug|x64
+ {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x86.ActiveCfg = Debug|Win32
+ {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x86.Build.0 = Debug|Win32
+ {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x64.ActiveCfg = Release|x64
+ {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x64.Build.0 = Release|x64
+ {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x86.ActiveCfg = Release|Win32
+ {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x86.Build.0 = Release|Win32
+ {F207EC8C-C55F-46C0-8851-887A71574F54}.Debug|x64.ActiveCfg = Debug|x64
+ {F207EC8C-C55F-46C0-8851-887A71574F54}.Debug|x64.Build.0 = Debug|x64
+ {F207EC8C-C55F-46C0-8851-887A71574F54}.Debug|x86.ActiveCfg = Debug|Win32
+ {F207EC8C-C55F-46C0-8851-887A71574F54}.Debug|x86.Build.0 = Debug|Win32
+ {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x64.ActiveCfg = Release|x64
+ {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x64.Build.0 = Release|x64
+ {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x86.ActiveCfg = Release|Win32
+ {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x86.Build.0 = Release|Win32
+ {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x64.ActiveCfg = Debug|x64
+ {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x64.Build.0 = Debug|x64
+ {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x86.ActiveCfg = Debug|Win32
+ {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x86.Build.0 = Debug|Win32
+ {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x64.ActiveCfg = Release|x64
+ {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x64.Build.0 = Release|x64
+ {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x86.ActiveCfg = Release|Win32
+ {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x86.Build.0 = Release|Win32
+ {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x64.ActiveCfg = Debug|x64
+ {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x64.Build.0 = Debug|x64
+ {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x86.ActiveCfg = Debug|Win32
+ {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Debug|x86.Build.0 = Debug|Win32
+ {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x64.ActiveCfg = Release|x64
+ {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x64.Build.0 = Release|x64
+ {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x86.ActiveCfg = Release|Win32
+ {41F3F4DF-8113-4029-9915-FDDC44C43D49}.Release|x86.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(NestedProjects) = preSolution
+ {CF34A7EF-7DC9-4077-94A5-76F5425EA938} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {E59DC709-9B12-4A53-BAF3-79398821C376} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {0173D560-8C12-46B3-B467-0C6E7573AA0B} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {83EA3E54-5D91-4E01-8EF6-C1E718334F83} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {44947B9C-E6B1-4C06-BD01-F8EF43B59223} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {3E490DEC-1874-43AA-92DA-1AC57C217EAC} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {FF8BD408-AFD8-43C6-BE98-4D03B37E840B} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {535F2111-FA81-4C76-A354-EDD2F9AA00E3} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {F207EC8C-C55F-46C0-8851-887A71574F54} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ {41F3F4DF-8113-4029-9915-FDDC44C43D49} = {4A4A689F-86AF-41C0-A974-1080506D0923}
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {4EBC03DB-AE37-4141-8147-692F16E0ED02}
+ EndGlobalSection
+EndGlobal
diff --git a/src/RandomX/src/aes_hash.cpp b/src/RandomX/src/aes_hash.cpp
new file mode 100644
index 000000000..a3b7395bc
--- /dev/null
+++ b/src/RandomX/src/aes_hash.cpp
@@ -0,0 +1,322 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "soft_aes.h"
+#include
+
+//NOTE: The functions below were tuned for maximum performance
+//and are not cryptographically secure outside of the scope of RandomX.
+//It's not recommended to use them as general hash functions and PRNGs.
+
+//AesHash1R:
+//state0, state1, state2, state3 = Blake2b-512("RandomX AesHash1R state")
+//xkey0, xkey1 = Blake2b-256("RandomX AesHash1R xkeys")
+
+#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
+#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
+#define AES_HASH_1R_STATE2 0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017
+#define AES_HASH_1R_STATE3 0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c
+
+#define AES_HASH_1R_XKEY0 0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389
+#define AES_HASH_1R_XKEY1 0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1
+
+/*
+ Calculate a 512-bit hash of 'input' using 4 lanes of AES.
+ The input is treated as a set of round keys for the encryption
+ of the initial state.
+
+ 'inputSize' must be a multiple of 64.
+
+ For a 2 MiB input, this has the same security as 32768-round
+ AES encryption.
+
+ Hashing throughput: >20 GiB/s per CPU core with hardware AES
+*/
+template
+void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
+ assert(inputSize % 64 == 0);
+ const uint8_t* inptr = (uint8_t*)input;
+ const uint8_t* inputEnd = inptr + inputSize;
+
+ rx_vec_i128 state0, state1, state2, state3;
+ rx_vec_i128 in0, in1, in2, in3;
+
+ //intial state
+ state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
+ state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
+ state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
+ state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
+
+ //process 64 bytes at a time in 4 lanes
+ while (inptr < inputEnd) {
+ in0 = rx_load_vec_i128((rx_vec_i128*)inptr + 0);
+ in1 = rx_load_vec_i128((rx_vec_i128*)inptr + 1);
+ in2 = rx_load_vec_i128((rx_vec_i128*)inptr + 2);
+ in3 = rx_load_vec_i128((rx_vec_i128*)inptr + 3);
+
+ state0 = aesenc(state0, in0);
+ state1 = aesdec(state1, in1);
+ state2 = aesenc(state2, in2);
+ state3 = aesdec(state3, in3);
+
+ inptr += 64;
+ }
+
+ //two extra rounds to achieve full diffusion
+ rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
+ rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
+
+ state0 = aesenc(state0, xkey0);
+ state1 = aesdec(state1, xkey0);
+ state2 = aesenc(state2, xkey0);
+ state3 = aesdec(state3, xkey0);
+
+ state0 = aesenc(state0, xkey1);
+ state1 = aesdec(state1, xkey1);
+ state2 = aesenc(state2, xkey1);
+ state3 = aesdec(state3, xkey1);
+
+ //output hash
+ rx_store_vec_i128((rx_vec_i128*)hash + 0, state0);
+ rx_store_vec_i128((rx_vec_i128*)hash + 1, state1);
+ rx_store_vec_i128((rx_vec_i128*)hash + 2, state2);
+ rx_store_vec_i128((rx_vec_i128*)hash + 3, state3);
+}
+
+template void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
+template void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
+
+//AesGenerator1R:
+//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator1R keys")
+
+#define AES_GEN_1R_KEY0 0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553
+#define AES_GEN_1R_KEY1 0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07
+#define AES_GEN_1R_KEY2 0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1
+#define AES_GEN_1R_KEY3 0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135
+
+/*
+ Fill 'buffer' with pseudorandom data based on 512-bit 'state'.
+ The state is encrypted using a single AES round per 16 bytes of output
+ in 4 lanes.
+
+ 'outputSize' must be a multiple of 64.
+
+ The modified state is written back to 'state' to allow multiple
+ calls to this function.
+*/
+template
+void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
+ assert(outputSize % 64 == 0);
+ const uint8_t* outptr = (uint8_t*)buffer;
+ const uint8_t* outputEnd = outptr + outputSize;
+
+ rx_vec_i128 state0, state1, state2, state3;
+ rx_vec_i128 key0, key1, key2, key3;
+
+ key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
+ key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
+ key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
+ key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
+
+ state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
+ state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
+ state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
+ state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
+
+ while (outptr < outputEnd) {
+ state0 = aesdec(state0, key0);
+ state1 = aesenc(state1, key1);
+ state2 = aesdec(state2, key2);
+ state3 = aesenc(state3, key3);
+
+ rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
+ rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
+ rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
+ rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
+
+ outptr += 64;
+ }
+
+ rx_store_vec_i128((rx_vec_i128*)state + 0, state0);
+ rx_store_vec_i128((rx_vec_i128*)state + 1, state1);
+ rx_store_vec_i128((rx_vec_i128*)state + 2, state2);
+ rx_store_vec_i128((rx_vec_i128*)state + 3, state3);
+}
+
+template void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
+template void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
+
+//AesGenerator4R:
+//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator4R keys 0-3")
+//key4, key5, key6, key7 = Blake2b-512("RandomX AesGenerator4R keys 4-7")
+
+#define AES_GEN_4R_KEY0 0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd
+#define AES_GEN_4R_KEY1 0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450
+#define AES_GEN_4R_KEY2 0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904
+#define AES_GEN_4R_KEY3 0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763
+#define AES_GEN_4R_KEY4 0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73
+#define AES_GEN_4R_KEY5 0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3
+#define AES_GEN_4R_KEY6 0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7
+#define AES_GEN_4R_KEY7 0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609
+
+template
+void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
+ assert(outputSize % 64 == 0);
+ const uint8_t* outptr = (uint8_t*)buffer;
+ const uint8_t* outputEnd = outptr + outputSize;
+
+ rx_vec_i128 state0, state1, state2, state3;
+ rx_vec_i128 key0, key1, key2, key3, key4, key5, key6, key7;
+
+ key0 = rx_set_int_vec_i128(AES_GEN_4R_KEY0);
+ key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1);
+ key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2);
+ key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3);
+ key4 = rx_set_int_vec_i128(AES_GEN_4R_KEY4);
+ key5 = rx_set_int_vec_i128(AES_GEN_4R_KEY5);
+ key6 = rx_set_int_vec_i128(AES_GEN_4R_KEY6);
+ key7 = rx_set_int_vec_i128(AES_GEN_4R_KEY7);
+
+ state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
+ state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
+ state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
+ state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
+
+ while (outptr < outputEnd) {
+ state0 = aesdec(state0, key0);
+ state1 = aesenc(state1, key0);
+ state2 = aesdec(state2, key4);
+ state3 = aesenc(state3, key4);
+
+ state0 = aesdec(state0, key1);
+ state1 = aesenc(state1, key1);
+ state2 = aesdec(state2, key5);
+ state3 = aesenc(state3, key5);
+
+ state0 = aesdec(state0, key2);
+ state1 = aesenc(state1, key2);
+ state2 = aesdec(state2, key6);
+ state3 = aesenc(state3, key6);
+
+ state0 = aesdec(state0, key3);
+ state1 = aesenc(state1, key3);
+ state2 = aesdec(state2, key7);
+ state3 = aesenc(state3, key7);
+
+ rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
+ rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
+ rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
+ rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
+
+ outptr += 64;
+ }
+}
+
+template void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
+template void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
+
+template
+void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
+ uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
+ const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
+
+ // initial state
+ rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
+ rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
+ rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
+ rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
+
+ const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
+ const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
+ const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
+ const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
+
+ rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0);
+ rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1);
+ rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
+ rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
+
+ constexpr int PREFETCH_DISTANCE = 4096;
+ const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
+ scratchpadEnd -= PREFETCH_DISTANCE;
+
+ for (int i = 0; i < 2; ++i) {
+ //process 64 bytes at a time in 4 lanes
+ while (scratchpadPtr < scratchpadEnd) {
+ hash_state0 = aesenc(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
+ hash_state1 = aesdec(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
+ hash_state2 = aesenc(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
+ hash_state3 = aesdec(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
+
+ fill_state0 = aesdec(fill_state0, key0);
+ fill_state1 = aesenc(fill_state1, key1);
+ fill_state2 = aesdec(fill_state2, key2);
+ fill_state3 = aesenc(fill_state3, key3);
+
+ rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
+ rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
+ rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
+ rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
+
+ rx_prefetch_t0(prefetchPtr);
+
+ scratchpadPtr += 64;
+ prefetchPtr += 64;
+ }
+ prefetchPtr = (const char*) scratchpad;
+ scratchpadEnd += PREFETCH_DISTANCE;
+ }
+
+ rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0);
+ rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1);
+ rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2);
+ rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3);
+
+ //two extra rounds to achieve full diffusion
+ rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
+ rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
+
+ hash_state0 = aesenc(hash_state0, xkey0);
+ hash_state1 = aesdec(hash_state1, xkey0);
+ hash_state2 = aesenc(hash_state2, xkey0);
+ hash_state3 = aesdec(hash_state3, xkey0);
+
+ hash_state0 = aesenc(hash_state0, xkey1);
+ hash_state1 = aesdec(hash_state1, xkey1);
+ hash_state2 = aesenc(hash_state2, xkey1);
+ hash_state3 = aesdec(hash_state3, xkey1);
+
+ //output hash
+ rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0);
+ rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1);
+ rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
+ rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
+}
+
+template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
+template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
diff --git a/src/RandomX/src/aes_hash.hpp b/src/RandomX/src/aes_hash.hpp
new file mode 100644
index 000000000..9f75f73ae
--- /dev/null
+++ b/src/RandomX/src/aes_hash.hpp
@@ -0,0 +1,43 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include
+
+template
+void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
+
+template
+void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
+
+template
+void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
+
+template
+void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
diff --git a/src/RandomX/src/allocator.cpp b/src/RandomX/src/allocator.cpp
new file mode 100644
index 000000000..4c6d86e05
--- /dev/null
+++ b/src/RandomX/src/allocator.cpp
@@ -0,0 +1,60 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include
+#include "allocator.hpp"
+#include "intrin_portable.h"
+#include "virtual_memory.hpp"
+#include "common.hpp"
+
+namespace randomx {
+
+ template
+ void* AlignedAllocator::allocMemory(size_t count) {
+ void *mem = rx_aligned_alloc(count, alignment);
+ if (mem == nullptr)
+ throw std::bad_alloc();
+ return mem;
+ }
+
+ template
+ void AlignedAllocator::freeMemory(void* ptr, size_t count) {
+ rx_aligned_free(ptr);
+ }
+
+ template struct AlignedAllocator;
+
+ void* LargePageAllocator::allocMemory(size_t count) {
+ return allocLargePagesMemory(count);
+ }
+
+ void LargePageAllocator::freeMemory(void* ptr, size_t count) {
+ freePagedMemory(ptr, count);
+ };
+
+}
\ No newline at end of file
diff --git a/src/RandomX/src/allocator.hpp b/src/RandomX/src/allocator.hpp
new file mode 100644
index 000000000..d7aa3f95d
--- /dev/null
+++ b/src/RandomX/src/allocator.hpp
@@ -0,0 +1,46 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include
+
+namespace randomx {
+
+ template
+ struct AlignedAllocator {
+ static void* allocMemory(size_t);
+ static void freeMemory(void*, size_t);
+ };
+
+ struct LargePageAllocator {
+ static void* allocMemory(size_t);
+ static void freeMemory(void*, size_t);
+ };
+
+}
\ No newline at end of file
diff --git a/src/RandomX/src/argon2.h b/src/RandomX/src/argon2.h
new file mode 100644
index 000000000..9052f42a5
--- /dev/null
+++ b/src/RandomX/src/argon2.h
@@ -0,0 +1,261 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#pragma once
+
+#include
+#include
+#include
+
+/*
+ * Argon2 input parameter restrictions
+ */
+
+ /* Minimum and maximum number of lanes (degree of parallelism) */
+#define ARGON2_MIN_LANES UINT32_C(1)
+#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
+
+/* Minimum and maximum number of threads */
+#define ARGON2_MIN_THREADS UINT32_C(1)
+#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
+
+/* Number of synchronization points between lanes per pass */
+#define ARGON2_SYNC_POINTS UINT32_C(4)
+
+/* Minimum and maximum digest size in bytes */
+#define ARGON2_MIN_OUTLEN UINT32_C(4)
+#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
+
+/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
+#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
+
+#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
+/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */
+#define ARGON2_MAX_MEMORY_BITS \
+ ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
+#define ARGON2_MAX_MEMORY \
+ ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
+
+/* Minimum and maximum number of passes */
+#define ARGON2_MIN_TIME UINT32_C(1)
+#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
+
+/* Minimum and maximum password length in bytes */
+#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
+#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
+
+/* Minimum and maximum associated data length in bytes */
+#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
+#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
+
+/* Minimum and maximum salt length in bytes */
+#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
+#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
+
+/* Minimum and maximum key length in bytes */
+#define ARGON2_MIN_SECRET UINT32_C(0)
+#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
+
+/* Flags to determine which fields are securely wiped (default = no wipe). */
+#define ARGON2_DEFAULT_FLAGS UINT32_C(0)
+#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
+#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
+
+
+/* Error codes */
+typedef enum Argon2_ErrorCodes {
+ ARGON2_OK = 0,
+
+ ARGON2_OUTPUT_PTR_NULL = -1,
+
+ ARGON2_OUTPUT_TOO_SHORT = -2,
+ ARGON2_OUTPUT_TOO_LONG = -3,
+
+ ARGON2_PWD_TOO_SHORT = -4,
+ ARGON2_PWD_TOO_LONG = -5,
+
+ ARGON2_SALT_TOO_SHORT = -6,
+ ARGON2_SALT_TOO_LONG = -7,
+
+ ARGON2_AD_TOO_SHORT = -8,
+ ARGON2_AD_TOO_LONG = -9,
+
+ ARGON2_SECRET_TOO_SHORT = -10,
+ ARGON2_SECRET_TOO_LONG = -11,
+
+ ARGON2_TIME_TOO_SMALL = -12,
+ ARGON2_TIME_TOO_LARGE = -13,
+
+ ARGON2_MEMORY_TOO_LITTLE = -14,
+ ARGON2_MEMORY_TOO_MUCH = -15,
+
+ ARGON2_LANES_TOO_FEW = -16,
+ ARGON2_LANES_TOO_MANY = -17,
+
+ ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */
+ ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */
+ ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */
+ ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */
+
+ ARGON2_MEMORY_ALLOCATION_ERROR = -22,
+
+ ARGON2_FREE_MEMORY_CBK_NULL = -23,
+ ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24,
+
+ ARGON2_INCORRECT_PARAMETER = -25,
+ ARGON2_INCORRECT_TYPE = -26,
+
+ ARGON2_OUT_PTR_MISMATCH = -27,
+
+ ARGON2_THREADS_TOO_FEW = -28,
+ ARGON2_THREADS_TOO_MANY = -29,
+
+ ARGON2_MISSING_ARGS = -30,
+
+ ARGON2_ENCODING_FAIL = -31,
+
+ ARGON2_DECODING_FAIL = -32,
+
+ ARGON2_THREAD_FAIL = -33,
+
+ ARGON2_DECODING_LENGTH_FAIL = -34,
+
+ ARGON2_VERIFY_MISMATCH = -35
+} argon2_error_codes;
+
+/* Memory allocator types --- for external allocation */
+typedef int(*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate);
+typedef void(*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate);
+
+/* Argon2 external data structures */
+
+/*
+ *****
+ * Context: structure to hold Argon2 inputs:
+ * output array and its length,
+ * password and its length,
+ * salt and its length,
+ * secret and its length,
+ * associated data and its length,
+ * number of passes, amount of used memory (in KBytes, can be rounded up a bit)
+ * number of parallel threads that will be run.
+ * All the parameters above affect the output hash value.
+ * Additionally, two function pointers can be provided to allocate and
+ * deallocate the memory (if NULL, memory will be allocated internally).
+ * Also, three flags indicate whether to erase password, secret as soon as they
+ * are pre-hashed (and thus not needed anymore), and the entire memory
+ *****
+ * Simplest situation: you have output array out[8], password is stored in
+ * pwd[32], salt is stored in salt[16], you do not have keys nor associated
+ * data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with
+ * 4 parallel lanes.
+ * You want to erase the password, but you're OK with last pass not being
+ * erased. You want to use the default memory allocator.
+ * Then you initialize:
+ Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false)
+ */
+typedef struct Argon2_Context {
+ uint8_t *out; /* output array */
+ uint32_t outlen; /* digest length */
+
+ uint8_t *pwd; /* password array */
+ uint32_t pwdlen; /* password length */
+
+ uint8_t *salt; /* salt array */
+ uint32_t saltlen; /* salt length */
+
+ uint8_t *secret; /* key array */
+ uint32_t secretlen; /* key length */
+
+ uint8_t *ad; /* associated data array */
+ uint32_t adlen; /* associated data length */
+
+ uint32_t t_cost; /* number of passes */
+ uint32_t m_cost; /* amount of memory requested (KB) */
+ uint32_t lanes; /* number of lanes */
+ uint32_t threads; /* maximum number of threads */
+
+ uint32_t version; /* version number */
+
+ allocate_fptr allocate_cbk; /* pointer to memory allocator */
+ deallocate_fptr free_cbk; /* pointer to memory deallocator */
+
+ uint32_t flags; /* array of bool options */
+} argon2_context;
+
+/* Argon2 primitive type */
+typedef enum Argon2_type {
+ Argon2_d = 0,
+ Argon2_i = 1,
+ Argon2_id = 2
+} argon2_type;
+
+/* Version of the algorithm */
+typedef enum Argon2_version {
+ ARGON2_VERSION_10 = 0x10,
+ ARGON2_VERSION_13 = 0x13,
+ ARGON2_VERSION_NUMBER = ARGON2_VERSION_13
+} argon2_version;
+
+//Argon2 instance - forward declaration
+typedef struct Argon2_instance_t argon2_instance_t;
+
+//Argon2 position = forward declaration
+typedef struct Argon2_position_t argon2_position_t;
+
+//Argon2 implementation function
+typedef void randomx_argon2_impl(const argon2_instance_t* instance,
+ argon2_position_t position);
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Function that fills the segment using previous segments also from other
+ * threads
+ * @param context current context
+ * @param instance Pointer to the current instance
+ * @param position Current position
+ * @pre all block pointers must be valid
+ */
+void randomx_argon2_fill_segment_ref(const argon2_instance_t* instance,
+ argon2_position_t position);
+
+randomx_argon2_impl *randomx_argon2_impl_ssse3();
+randomx_argon2_impl *randomx_argon2_impl_avx2();
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/src/RandomX/src/argon2_avx2.c b/src/RandomX/src/argon2_avx2.c
new file mode 100644
index 000000000..213530326
--- /dev/null
+++ b/src/RandomX/src/argon2_avx2.c
@@ -0,0 +1,174 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include
+#include
+#include
+
+#include "argon2.h"
+
+void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance,
+ argon2_position_t position);
+
+randomx_argon2_impl* randomx_argon2_impl_avx2() {
+#if defined(__AVX2__)
+ return &randomx_argon2_fill_segment_avx2;
+#endif
+ return NULL;
+}
+
+#if defined(__AVX2__)
+
+#include "argon2_core.h"
+
+#include "blake2/blamka-round-avx2.h"
+#include "blake2/blake2-impl.h"
+#include "blake2/blake2.h"
+
+static void fill_block(__m256i* state, const block* ref_block,
+ block* next_block, int with_xor) {
+ __m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
+ unsigned int i;
+
+ if (with_xor) {
+ for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
+ state[i] = _mm256_xor_si256(
+ state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i));
+ block_XY[i] = _mm256_xor_si256(
+ state[i], _mm256_loadu_si256((const __m256i*)next_block->v + i));
+ }
+ }
+ else {
+ for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
+ block_XY[i] = state[i] = _mm256_xor_si256(
+ state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i));
+ }
+ }
+
+ for (i = 0; i < 4; ++i) {
+ BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
+ state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
+ }
+
+ for (i = 0; i < 4; ++i) {
+ BLAKE2_ROUND_2(state[0 + i], state[4 + i], state[8 + i], state[12 + i],
+ state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
+ }
+
+ for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
+ state[i] = _mm256_xor_si256(state[i], block_XY[i]);
+ _mm256_storeu_si256((__m256i*)next_block->v + i, state[i]);
+ }
+}
+
+void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance,
+ argon2_position_t position) {
+ block* ref_block = NULL, * curr_block = NULL;
+ block address_block, input_block;
+ uint64_t pseudo_rand, ref_index, ref_lane;
+ uint32_t prev_offset, curr_offset;
+ uint32_t starting_index, i;
+ __m256i state[ARGON2_HWORDS_IN_BLOCK];
+
+ if (instance == NULL) {
+ return;
+ }
+
+ starting_index = 0;
+
+ if ((0 == position.pass) && (0 == position.slice)) {
+ starting_index = 2; /* we have already generated the first two blocks */
+ }
+
+ /* Offset of the current block */
+ curr_offset = position.lane * instance->lane_length +
+ position.slice * instance->segment_length + starting_index;
+
+ if (0 == curr_offset % instance->lane_length) {
+ /* Last block in this lane */
+ prev_offset = curr_offset + instance->lane_length - 1;
+ }
+ else {
+ /* Previous block */
+ prev_offset = curr_offset - 1;
+ }
+
+ memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
+
+ for (i = starting_index; i < instance->segment_length;
+ ++i, ++curr_offset, ++prev_offset) {
+ /*1.1 Rotating prev_offset if needed */
+ if (curr_offset % instance->lane_length == 1) {
+ prev_offset = curr_offset - 1;
+ }
+
+ /* 1.2 Computing the index of the reference block */
+ /* 1.2.1 Taking pseudo-random value from the previous block */
+ pseudo_rand = instance->memory[prev_offset].v[0];
+
+ /* 1.2.2 Computing the lane of the reference block */
+ ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
+
+ if ((position.pass == 0) && (position.slice == 0)) {
+ /* Can not reference other lanes yet */
+ ref_lane = position.lane;
+ }
+
+ /* 1.2.3 Computing the number of possible reference block within the
+ * lane.
+ */
+ position.index = i;
+ ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
+ ref_lane == position.lane);
+
+ /* 2 Creating a new block */
+ ref_block =
+ instance->memory + instance->lane_length * ref_lane + ref_index;
+ curr_block = instance->memory + curr_offset;
+ if (ARGON2_VERSION_10 == instance->version) {
+ /* version 1.2.1 and earlier: overwrite, not XOR */
+ fill_block(state, ref_block, curr_block, 0);
+ }
+ else {
+ if (0 == position.pass) {
+ fill_block(state, ref_block, curr_block, 0);
+ }
+ else {
+ fill_block(state, ref_block, curr_block, 1);
+ }
+ }
+ }
+}
+
+#endif
diff --git a/src/RandomX/src/argon2_core.c b/src/RandomX/src/argon2_core.c
new file mode 100644
index 000000000..f2e7f3d1a
--- /dev/null
+++ b/src/RandomX/src/argon2_core.c
@@ -0,0 +1,411 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+ /*For memory wiping*/
+#ifdef _MSC_VER
+#include
+#include /* For SecureZeroMemory */
+#endif
+#if defined __STDC_LIB_EXT1__
+#define __STDC_WANT_LIB_EXT1__ 1
+#endif
+#define VC_GE_2005(version) (version >= 1400)
+
+#include
+#include
+#include
+
+#include "argon2_core.h"
+#include "blake2/blake2.h"
+#include "blake2/blake2-impl.h"
+
+#ifdef GENKAT
+#include "genkat.h"
+#endif
+
+#if defined(__clang__)
+#if __has_attribute(optnone)
+#define NOT_OPTIMIZED __attribute__((optnone))
+#endif
+#elif defined(__GNUC__)
+#define GCC_VERSION \
+ (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#if GCC_VERSION >= 40400
+#define NOT_OPTIMIZED __attribute__((optimize("O0")))
+#endif
+#endif
+#ifndef NOT_OPTIMIZED
+#define NOT_OPTIMIZED
+#endif
+
+/***************Instance and Position constructors**********/
+
+static void load_block(block *dst, const void *input) {
+ unsigned i;
+ for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
+ dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
+ }
+}
+
+static void store_block(void *output, const block *src) {
+ unsigned i;
+ for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
+ store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
+ }
+}
+
+uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance,
+ const argon2_position_t *position, uint32_t pseudo_rand,
+ int same_lane) {
+ /*
+ * Pass 0:
+ * This lane : all already finished segments plus already constructed
+ * blocks in this segment
+ * Other lanes : all already finished segments
+ * Pass 1+:
+ * This lane : (SYNC_POINTS - 1) last segments plus already constructed
+ * blocks in this segment
+ * Other lanes : (SYNC_POINTS - 1) last segments
+ */
+ uint32_t reference_area_size;
+ uint64_t relative_position;
+ uint32_t start_position, absolute_position;
+
+ if (0 == position->pass) {
+ /* First pass */
+ if (0 == position->slice) {
+ /* First slice */
+ reference_area_size =
+ position->index - 1; /* all but the previous */
+ }
+ else {
+ if (same_lane) {
+ /* The same lane => add current segment */
+ reference_area_size =
+ position->slice * instance->segment_length +
+ position->index - 1;
+ }
+ else {
+ reference_area_size =
+ position->slice * instance->segment_length +
+ ((position->index == 0) ? (-1) : 0);
+ }
+ }
+ }
+ else {
+ /* Second pass */
+ if (same_lane) {
+ reference_area_size = instance->lane_length -
+ instance->segment_length + position->index -
+ 1;
+ }
+ else {
+ reference_area_size = instance->lane_length -
+ instance->segment_length +
+ ((position->index == 0) ? (-1) : 0);
+ }
+ }
+
+ /* 1.2.4. Mapping pseudo_rand to 0.. and produce
+ * relative position */
+ relative_position = pseudo_rand;
+ relative_position = relative_position * relative_position >> 32;
+ relative_position = reference_area_size - 1 -
+ (reference_area_size * relative_position >> 32);
+
+ /* 1.2.5 Computing starting position */
+ start_position = 0;
+
+ if (0 != position->pass) {
+ start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
+ ? 0
+ : (position->slice + 1) * instance->segment_length;
+ }
+
+ /* 1.2.6. Computing absolute position */
+ absolute_position = (start_position + relative_position) %
+ instance->lane_length; /* absolute position */
+ return absolute_position;
+}
+
+/* Single-threaded version for p=1 case */
+static int fill_memory_blocks_st(argon2_instance_t *instance) {
+ uint32_t r, s, l;
+
+ for (r = 0; r < instance->passes; ++r) {
+ for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
+ for (l = 0; l < instance->lanes; ++l) {
+ argon2_position_t position = { r, l, (uint8_t)s, 0 };
+ //fill the segment using the selected implementation
+ instance->impl(instance, position);
+ }
+ }
+ }
+ return ARGON2_OK;
+}
+
+int randomx_argon2_fill_memory_blocks(argon2_instance_t *instance) {
+ if (instance == NULL || instance->lanes == 0) {
+ return ARGON2_INCORRECT_PARAMETER;
+ }
+ return fill_memory_blocks_st(instance);
+}
+
+int randomx_argon2_validate_inputs(const argon2_context *context) {
+ if (NULL == context) {
+ return ARGON2_INCORRECT_PARAMETER;
+ }
+
+ /* Validate password (required param) */
+ if (NULL == context->pwd) {
+ if (0 != context->pwdlen) {
+ return ARGON2_PWD_PTR_MISMATCH;
+ }
+ }
+
+ if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) {
+ return ARGON2_PWD_TOO_SHORT;
+ }
+
+ if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) {
+ return ARGON2_PWD_TOO_LONG;
+ }
+
+ /* Validate salt (required param) */
+ if (NULL == context->salt) {
+ if (0 != context->saltlen) {
+ return ARGON2_SALT_PTR_MISMATCH;
+ }
+ }
+
+ if (ARGON2_MIN_SALT_LENGTH > context->saltlen) {
+ return ARGON2_SALT_TOO_SHORT;
+ }
+
+ if (ARGON2_MAX_SALT_LENGTH < context->saltlen) {
+ return ARGON2_SALT_TOO_LONG;
+ }
+
+ /* Validate secret (optional param) */
+ if (NULL == context->secret) {
+ if (0 != context->secretlen) {
+ return ARGON2_SECRET_PTR_MISMATCH;
+ }
+ }
+ else {
+ if (ARGON2_MIN_SECRET > context->secretlen) {
+ return ARGON2_SECRET_TOO_SHORT;
+ }
+ if (ARGON2_MAX_SECRET < context->secretlen) {
+ return ARGON2_SECRET_TOO_LONG;
+ }
+ }
+
+ /* Validate associated data (optional param) */
+ if (NULL == context->ad) {
+ if (0 != context->adlen) {
+ return ARGON2_AD_PTR_MISMATCH;
+ }
+ }
+ else {
+ if (ARGON2_MIN_AD_LENGTH > context->adlen) {
+ return ARGON2_AD_TOO_SHORT;
+ }
+ if (ARGON2_MAX_AD_LENGTH < context->adlen) {
+ return ARGON2_AD_TOO_LONG;
+ }
+ }
+
+ /* Validate memory cost */
+ if (ARGON2_MIN_MEMORY > context->m_cost) {
+ return ARGON2_MEMORY_TOO_LITTLE;
+ }
+
+ if (ARGON2_MAX_MEMORY < context->m_cost) {
+ return ARGON2_MEMORY_TOO_MUCH;
+ }
+
+ if (context->m_cost < 8 * context->lanes) {
+ return ARGON2_MEMORY_TOO_LITTLE;
+ }
+
+ /* Validate time cost */
+ if (ARGON2_MIN_TIME > context->t_cost) {
+ return ARGON2_TIME_TOO_SMALL;
+ }
+
+ if (ARGON2_MAX_TIME < context->t_cost) {
+ return ARGON2_TIME_TOO_LARGE;
+ }
+
+ /* Validate lanes */
+ if (ARGON2_MIN_LANES > context->lanes) {
+ return ARGON2_LANES_TOO_FEW;
+ }
+
+ if (ARGON2_MAX_LANES < context->lanes) {
+ return ARGON2_LANES_TOO_MANY;
+ }
+
+ /* Validate threads */
+ if (ARGON2_MIN_THREADS > context->threads) {
+ return ARGON2_THREADS_TOO_FEW;
+ }
+
+ if (ARGON2_MAX_THREADS < context->threads) {
+ return ARGON2_THREADS_TOO_MANY;
+ }
+
+ if (NULL != context->allocate_cbk && NULL == context->free_cbk) {
+ return ARGON2_FREE_MEMORY_CBK_NULL;
+ }
+
+ if (NULL == context->allocate_cbk && NULL != context->free_cbk) {
+ return ARGON2_ALLOCATE_MEMORY_CBK_NULL;
+ }
+
+ return ARGON2_OK;
+}
+
+void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
+ uint32_t l;
+ /* Make the first and second block in each lane as G(H0||0||i) or
+ G(H0||1||i) */
+ uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
+ for (l = 0; l < instance->lanes; ++l) {
+
+ store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
+ store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
+ blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
+ ARGON2_PREHASH_SEED_LENGTH);
+ load_block(&instance->memory[l * instance->lane_length + 0],
+ blockhash_bytes);
+
+ store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
+ blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
+ ARGON2_PREHASH_SEED_LENGTH);
+ load_block(&instance->memory[l * instance->lane_length + 1],
+ blockhash_bytes);
+ }
+}
+
+void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type) {
+ blake2b_state BlakeHash;
+ uint8_t value[sizeof(uint32_t)];
+
+ if (NULL == context || NULL == blockhash) {
+ return;
+ }
+
+ blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
+
+ store32(&value, context->lanes);
+ blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+ store32(&value, context->outlen);
+ blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+ store32(&value, context->m_cost);
+ blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+ store32(&value, context->t_cost);
+ blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+ store32(&value, context->version);
+ blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+ store32(&value, (uint32_t)type);
+ blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+ store32(&value, context->pwdlen);
+ blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+ if (context->pwd != NULL) {
+ blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
+ context->pwdlen);
+ }
+
+ store32(&value, context->saltlen);
+ blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+ if (context->salt != NULL) {
+ blake2b_update(&BlakeHash, (const uint8_t *)context->salt, context->saltlen);
+ }
+
+ store32(&value, context->secretlen);
+ blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+ if (context->secret != NULL) {
+ blake2b_update(&BlakeHash, (const uint8_t *)context->secret,
+ context->secretlen);
+ }
+
+ store32(&value, context->adlen);
+ blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+ if (context->ad != NULL) {
+ blake2b_update(&BlakeHash, (const uint8_t *)context->ad,
+ context->adlen);
+ }
+
+ blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
+}
+
+int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context) {
+ uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
+ int result = ARGON2_OK;
+
+ if (instance == NULL || context == NULL)
+ return ARGON2_INCORRECT_PARAMETER;
+ instance->context_ptr = context;
+
+ /* 1. Memory allocation */
+ //RandomX takes care of memory allocation
+
+ /* 2. Initial hashing */
+ /* H_0 + 8 extra bytes to produce the first blocks */
+ /* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
+ /* Hashing all inputs */
+ rxa2_initial_hash(blockhash, context, instance->type);
+ /* Zeroing 8 extra bytes */
+ /*rxa2_clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
+ ARGON2_PREHASH_SEED_LENGTH -
+ ARGON2_PREHASH_DIGEST_LENGTH);*/
+
+ /* 3. Creating first blocks, we always have at least two blocks in a slice
+ */
+ rxa2_fill_first_blocks(blockhash, instance);
+
+ return ARGON2_OK;
+}
diff --git a/src/RandomX/src/argon2_core.h b/src/RandomX/src/argon2_core.h
new file mode 100644
index 000000000..def27c6da
--- /dev/null
+++ b/src/RandomX/src/argon2_core.h
@@ -0,0 +1,163 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef ARGON2_CORE_H
+#define ARGON2_CORE_H
+
+#include
+#include "argon2.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define CONST_CAST(x) (x)(uintptr_t)
+
+ /**********************Argon2 internal constants*******************************/
+
+enum argon2_core_constants {
+ /* Memory block size in bytes */
+ ARGON2_BLOCK_SIZE = 1024,
+ ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
+ ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
+ ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32,
+ ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64,
+
+ /* Number of pseudo-random values generated by one call to Blake in Argon2i
+ to
+ generate reference block positions */
+ ARGON2_ADDRESSES_IN_BLOCK = 128,
+
+ /* Pre-hashing digest length and its extension*/
+ ARGON2_PREHASH_DIGEST_LENGTH = 64,
+ ARGON2_PREHASH_SEED_LENGTH = 72
+};
+
+/*************************Argon2 internal data types***********************/
+
+/*
+ * Structure for the (1KB) memory block implemented as 128 64-bit words.
+ * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
+ * bounds checking).
+ */
+typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
+
+/*
+ * Argon2 instance: memory pointer, number of passes, amount of memory, type,
+ * and derived values.
+ * Used to evaluate the number and location of blocks to construct in each
+ * thread
+ */
+typedef struct Argon2_instance_t {
+ block *memory; /* Memory pointer */
+ uint32_t version;
+ uint32_t passes; /* Number of passes */
+ uint32_t memory_blocks; /* Number of blocks in memory */
+ uint32_t segment_length;
+ uint32_t lane_length;
+ uint32_t lanes;
+ uint32_t threads;
+ argon2_type type;
+ int print_internals; /* whether to print the memory blocks */
+ argon2_context *context_ptr; /* points back to original context */
+ randomx_argon2_impl *impl;
+} argon2_instance_t;
+
+/*
+ * Argon2 position: where we construct the block right now. Used to distribute
+ * work between threads.
+ */
+typedef struct Argon2_position_t {
+ uint32_t pass;
+ uint32_t lane;
+ uint8_t slice;
+ uint32_t index;
+} argon2_position_t;
+
+/*Struct that holds the inputs for thread handling FillSegment*/
+typedef struct Argon2_thread_data {
+ argon2_instance_t *instance_ptr;
+ argon2_position_t pos;
+} argon2_thread_data;
+
+/*************************Argon2 core functions********************************/
+
+/*
+ * Computes absolute position of reference block in the lane following a skewed
+ * distribution and using a pseudo-random value as input
+ * @param instance Pointer to the current instance
+ * @param position Pointer to the current position
+ * @param pseudo_rand 32-bit pseudo-random value used to determine the position
+ * @param same_lane Indicates if the block will be taken from the current lane.
+ * If so we can reference the current segment
+ * @pre All pointers must be valid
+ */
+uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance,
+ const argon2_position_t *position, uint32_t pseudo_rand,
+ int same_lane);
+
+/*
+ * Function that validates all inputs against predefined restrictions and return
+ * an error code
+ * @param context Pointer to current Argon2 context
+ * @return ARGON2_OK if everything is all right, otherwise one of error codes
+ * (all defined in
+ */
+int randomx_argon2_validate_inputs(const argon2_context *context);
+
+/*
+ * Function allocates memory, hashes the inputs with Blake, and creates first
+ * two blocks. Returns the pointer to the main memory with 2 blocks per lane
+ * initialized
+ * @param context Pointer to the Argon2 internal structure containing memory
+ * pointer, and parameters for time and space requirements.
+ * @param instance Current Argon2 instance
+ * @return Zero if successful, -1 if memory failed to allocate. @context->state
+ * will be modified if successful.
+ */
+int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context);
+
+/*
+ * Function that fills the entire memory t_cost times based on the first two
+ * blocks in each lane
+ * @param instance Pointer to the current instance
+ * @return ARGON2_OK if successful, @context->state
+ */
+int randomx_argon2_fill_memory_blocks(argon2_instance_t* instance);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/src/RandomX/src/argon2_ref.c b/src/RandomX/src/argon2_ref.c
new file mode 100644
index 000000000..dc4a8049b
--- /dev/null
+++ b/src/RandomX/src/argon2_ref.c
@@ -0,0 +1,187 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include
+#include
+#include
+
+#include "argon2.h"
+#include "argon2_core.h"
+
+#include "blake2/blamka-round-ref.h"
+#include "blake2/blake2-impl.h"
+#include "blake2/blake2.h"
+
+static void copy_block(block* dst, const block* src) {
+ memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
+}
+
+static void xor_block(block* dst, const block* src) {
+ int i;
+ for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
+ dst->v[i] ^= src->v[i];
+ }
+}
+
+ /*
+ * Function fills a new memory block and optionally XORs the old block over the new one.
+ * @next_block must be initialized.
+ * @param prev_block Pointer to the previous block
+ * @param ref_block Pointer to the reference block
+ * @param next_block Pointer to the block to be constructed
+ * @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
+ * @pre all block pointers must be valid
+ */
+static void fill_block(const block *prev_block, const block *ref_block,
+ block *next_block, int with_xor) {
+ block blockR, block_tmp;
+ unsigned i;
+
+ copy_block(&blockR, ref_block);
+ xor_block(&blockR, prev_block);
+ copy_block(&block_tmp, &blockR);
+ /* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */
+ if (with_xor) {
+ /* Saving the next block contents for XOR over: */
+ xor_block(&block_tmp, next_block);
+ /* Now blockR = ref_block + prev_block and
+ block_tmp = ref_block + prev_block + next_block */
+ }
+
+ /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
+ (16,17,..31)... finally (112,113,...127) */
+ for (i = 0; i < 8; ++i) {
+ BLAKE2_ROUND_NOMSG(
+ blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
+ blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
+ blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
+ blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
+ blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
+ blockR.v[16 * i + 15]);
+ }
+
+ /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
+ (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
+ for (i = 0; i < 8; i++) {
+ BLAKE2_ROUND_NOMSG(
+ blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
+ blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
+ blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
+ blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
+ blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
+ blockR.v[2 * i + 113]);
+ }
+
+ copy_block(next_block, &block_tmp);
+ xor_block(next_block, &blockR);
+}
+
+void randomx_argon2_fill_segment_ref(const argon2_instance_t *instance,
+ argon2_position_t position) {
+ block *ref_block = NULL, *curr_block = NULL;
+ block address_block, input_block, zero_block;
+ uint64_t pseudo_rand, ref_index, ref_lane;
+ uint32_t prev_offset, curr_offset;
+ uint32_t starting_index;
+ uint32_t i;
+
+ if (instance == NULL) {
+ return;
+ }
+
+ starting_index = 0;
+
+ if ((0 == position.pass) && (0 == position.slice)) {
+ starting_index = 2; /* we have already generated the first two blocks */
+ }
+
+ /* Offset of the current block */
+ curr_offset = position.lane * instance->lane_length +
+ position.slice * instance->segment_length + starting_index;
+
+ if (0 == curr_offset % instance->lane_length) {
+ /* Last block in this lane */
+ prev_offset = curr_offset + instance->lane_length - 1;
+ }
+ else {
+ /* Previous block */
+ prev_offset = curr_offset - 1;
+ }
+
+ for (i = starting_index; i < instance->segment_length;
+ ++i, ++curr_offset, ++prev_offset) {
+ /*1.1 Rotating prev_offset if needed */
+ if (curr_offset % instance->lane_length == 1) {
+ prev_offset = curr_offset - 1;
+ }
+
+ /* 1.2 Computing the index of the reference block */
+ /* 1.2.1 Taking pseudo-random value from the previous block */
+ pseudo_rand = instance->memory[prev_offset].v[0];
+
+ /* 1.2.2 Computing the lane of the reference block */
+ ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
+
+ if ((position.pass == 0) && (position.slice == 0)) {
+ /* Can not reference other lanes yet */
+ ref_lane = position.lane;
+ }
+
+ /* 1.2.3 Computing the number of possible reference block within the
+ * lane.
+ */
+ position.index = i;
+ ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
+ ref_lane == position.lane);
+
+ /* 2 Creating a new block */
+ ref_block =
+ instance->memory + instance->lane_length * ref_lane + ref_index;
+ curr_block = instance->memory + curr_offset;
+ if (ARGON2_VERSION_10 == instance->version) {
+ /* version 1.2.1 and earlier: overwrite, not XOR */
+ fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
+ }
+ else {
+ if (0 == position.pass) {
+ fill_block(instance->memory + prev_offset, ref_block,
+ curr_block, 0);
+ }
+ else {
+ fill_block(instance->memory + prev_offset, ref_block,
+ curr_block, 1);
+ }
+ }
+ }
+}
diff --git a/src/RandomX/src/argon2_ssse3.c b/src/RandomX/src/argon2_ssse3.c
new file mode 100644
index 000000000..778edd7de
--- /dev/null
+++ b/src/RandomX/src/argon2_ssse3.c
@@ -0,0 +1,182 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include
+#include
+#include
+
+#include "argon2.h"
+
+#if defined(_MSC_VER) //MSVC doesn't define SSSE3
+#define __SSSE3__
+#endif
+
+void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance,
+ argon2_position_t position);
+
+randomx_argon2_impl* randomx_argon2_impl_ssse3() {
+#if defined(__SSSE3__)
+ return &randomx_argon2_fill_segment_ssse3;
+#endif
+ return NULL;
+}
+
+#if defined(__SSSE3__)
+
+#include /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
+
+#include "argon2_core.h"
+
+#include "blake2/blamka-round-ssse3.h"
+#include "blake2/blake2-impl.h"
+#include "blake2/blake2.h"
+
+static void fill_block(__m128i* state, const block* ref_block,
+ block* next_block, int with_xor) {
+ __m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
+ unsigned int i;
+
+ if (with_xor) {
+ for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+ state[i] = _mm_xor_si128(
+ state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i));
+ block_XY[i] = _mm_xor_si128(
+ state[i], _mm_loadu_si128((const __m128i*)next_block->v + i));
+ }
+ }
+ else {
+ for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+ block_XY[i] = state[i] = _mm_xor_si128(
+ state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i));
+ }
+ }
+
+ for (i = 0; i < 8; ++i) {
+ BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
+ state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
+ state[8 * i + 6], state[8 * i + 7]);
+ }
+
+ for (i = 0; i < 8; ++i) {
+ BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
+ state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
+ state[8 * 6 + i], state[8 * 7 + i]);
+ }
+
+ for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+ state[i] = _mm_xor_si128(state[i], block_XY[i]);
+ _mm_storeu_si128((__m128i*)next_block->v + i, state[i]);
+ }
+}
+
+void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance,
+ argon2_position_t position) {
+ block* ref_block = NULL, * curr_block = NULL;
+ block address_block, input_block;
+ uint64_t pseudo_rand, ref_index, ref_lane;
+ uint32_t prev_offset, curr_offset;
+ uint32_t starting_index, i;
+ __m128i state[ARGON2_OWORDS_IN_BLOCK];
+
+ if (instance == NULL) {
+ return;
+ }
+
+ starting_index = 0;
+
+ if ((0 == position.pass) && (0 == position.slice)) {
+ starting_index = 2; /* we have already generated the first two blocks */
+ }
+
+ /* Offset of the current block */
+ curr_offset = position.lane * instance->lane_length +
+ position.slice * instance->segment_length + starting_index;
+
+ if (0 == curr_offset % instance->lane_length) {
+ /* Last block in this lane */
+ prev_offset = curr_offset + instance->lane_length - 1;
+ }
+ else {
+ /* Previous block */
+ prev_offset = curr_offset - 1;
+ }
+
+ memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
+
+ for (i = starting_index; i < instance->segment_length;
+ ++i, ++curr_offset, ++prev_offset) {
+ /*1.1 Rotating prev_offset if needed */
+ if (curr_offset % instance->lane_length == 1) {
+ prev_offset = curr_offset - 1;
+ }
+
+ /* 1.2 Computing the index of the reference block */
+ /* 1.2.1 Taking pseudo-random value from the previous block */
+ pseudo_rand = instance->memory[prev_offset].v[0];
+
+ /* 1.2.2 Computing the lane of the reference block */
+ ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
+
+ if ((position.pass == 0) && (position.slice == 0)) {
+ /* Can not reference other lanes yet */
+ ref_lane = position.lane;
+ }
+
+ /* 1.2.3 Computing the number of possible reference block within the
+ * lane.
+ */
+ position.index = i;
+ ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
+ ref_lane == position.lane);
+
+ /* 2 Creating a new block */
+ ref_block =
+ instance->memory + instance->lane_length * ref_lane + ref_index;
+ curr_block = instance->memory + curr_offset;
+ if (ARGON2_VERSION_10 == instance->version) {
+ /* version 1.2.1 and earlier: overwrite, not XOR */
+ fill_block(state, ref_block, curr_block, 0);
+ }
+ else {
+ if (0 == position.pass) {
+ fill_block(state, ref_block, curr_block, 0);
+ }
+ else {
+ fill_block(state, ref_block, curr_block, 1);
+ }
+ }
+ }
+}
+
+#endif
diff --git a/src/RandomX/src/asm/configuration.asm b/src/RandomX/src/asm/configuration.asm
new file mode 100644
index 000000000..794d7ad01
--- /dev/null
+++ b/src/RandomX/src/asm/configuration.asm
@@ -0,0 +1,48 @@
+; File start: ..\src\configuration.h
+RANDOMX_ARGON_MEMORY EQU 262144t
+RANDOMX_ARGON_ITERATIONS EQU 3t
+RANDOMX_ARGON_LANES EQU 1t
+RANDOMX_ARGON_SALT TEXTEQU <"RandomX\x03">
+RANDOMX_CACHE_ACCESSES EQU 8t
+RANDOMX_SUPERSCALAR_LATENCY EQU 170t
+RANDOMX_DATASET_BASE_SIZE EQU 2147483648t
+RANDOMX_DATASET_EXTRA_SIZE EQU 33554368t
+RANDOMX_PROGRAM_SIZE EQU 256t
+RANDOMX_PROGRAM_ITERATIONS EQU 2048t
+RANDOMX_PROGRAM_COUNT EQU 8t
+RANDOMX_SCRATCHPAD_L3 EQU 2097152t
+RANDOMX_SCRATCHPAD_L2 EQU 262144t
+RANDOMX_SCRATCHPAD_L1 EQU 16384t
+RANDOMX_JUMP_BITS EQU 8t
+RANDOMX_JUMP_OFFSET EQU 8t
+RANDOMX_FREQ_IADD_RS EQU 16t
+RANDOMX_FREQ_IADD_M EQU 7t
+RANDOMX_FREQ_ISUB_R EQU 16t
+RANDOMX_FREQ_ISUB_M EQU 7t
+RANDOMX_FREQ_IMUL_R EQU 16t
+RANDOMX_FREQ_IMUL_M EQU 4t
+RANDOMX_FREQ_IMULH_R EQU 4t
+RANDOMX_FREQ_IMULH_M EQU 1t
+RANDOMX_FREQ_ISMULH_R EQU 4t
+RANDOMX_FREQ_ISMULH_M EQU 1t
+RANDOMX_FREQ_IMUL_RCP EQU 8t
+RANDOMX_FREQ_INEG_R EQU 2t
+RANDOMX_FREQ_IXOR_R EQU 15t
+RANDOMX_FREQ_IXOR_M EQU 5t
+RANDOMX_FREQ_IROR_R EQU 8t
+RANDOMX_FREQ_IROL_R EQU 2t
+RANDOMX_FREQ_ISWAP_R EQU 4t
+RANDOMX_FREQ_FSWAP_R EQU 4t
+RANDOMX_FREQ_FADD_R EQU 16t
+RANDOMX_FREQ_FADD_M EQU 5t
+RANDOMX_FREQ_FSUB_R EQU 16t
+RANDOMX_FREQ_FSUB_M EQU 5t
+RANDOMX_FREQ_FSCAL_R EQU 6t
+RANDOMX_FREQ_FMUL_R EQU 32t
+RANDOMX_FREQ_FDIV_M EQU 4t
+RANDOMX_FREQ_FSQRT_R EQU 6t
+RANDOMX_FREQ_CBRANCH EQU 25t
+RANDOMX_FREQ_CFROUND EQU 1t
+RANDOMX_FREQ_ISTORE EQU 16t
+RANDOMX_FREQ_NOP EQU 0t
+; File end: ..\src\configuration.h
diff --git a/src/RandomX/src/asm/program_epilogue_linux.inc b/src/RandomX/src/asm/program_epilogue_linux.inc
new file mode 100644
index 000000000..eaacae547
--- /dev/null
+++ b/src/RandomX/src/asm/program_epilogue_linux.inc
@@ -0,0 +1,10 @@
+ ;# restore callee-saved registers - System V AMD64 ABI
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+
+ ;# program finished
+ ret 0
\ No newline at end of file
diff --git a/src/RandomX/src/asm/program_epilogue_store.inc b/src/RandomX/src/asm/program_epilogue_store.inc
new file mode 100644
index 000000000..b94fa4d99
--- /dev/null
+++ b/src/RandomX/src/asm/program_epilogue_store.inc
@@ -0,0 +1,19 @@
+ ;# save VM register values
+ pop rcx
+ mov qword ptr [rcx+0], r8
+ mov qword ptr [rcx+8], r9
+ mov qword ptr [rcx+16], r10
+ mov qword ptr [rcx+24], r11
+ mov qword ptr [rcx+32], r12
+ mov qword ptr [rcx+40], r13
+ mov qword ptr [rcx+48], r14
+ mov qword ptr [rcx+56], r15
+ movdqa xmmword ptr [rcx+64], xmm0
+ movdqa xmmword ptr [rcx+80], xmm1
+ movdqa xmmword ptr [rcx+96], xmm2
+ movdqa xmmword ptr [rcx+112], xmm3
+ lea rcx, [rcx+64]
+ movdqa xmmword ptr [rcx+64], xmm4
+ movdqa xmmword ptr [rcx+80], xmm5
+ movdqa xmmword ptr [rcx+96], xmm6
+ movdqa xmmword ptr [rcx+112], xmm7
\ No newline at end of file
diff --git a/src/RandomX/src/asm/program_epilogue_win64.inc b/src/RandomX/src/asm/program_epilogue_win64.inc
new file mode 100644
index 000000000..8d70a0a3f
--- /dev/null
+++ b/src/RandomX/src/asm/program_epilogue_win64.inc
@@ -0,0 +1,24 @@
+ ;# restore callee-saved registers - Microsoft x64 calling convention
+ movdqu xmm15, xmmword ptr [rsp]
+ movdqu xmm14, xmmword ptr [rsp+16]
+ movdqu xmm13, xmmword ptr [rsp+32]
+ movdqu xmm12, xmmword ptr [rsp+48]
+ movdqu xmm11, xmmword ptr [rsp+64]
+ add rsp, 80
+ movdqu xmm10, xmmword ptr [rsp]
+ movdqu xmm9, xmmword ptr [rsp+16]
+ movdqu xmm8, xmmword ptr [rsp+32]
+ movdqu xmm7, xmmword ptr [rsp+48]
+ movdqu xmm6, xmmword ptr [rsp+64]
+ add rsp, 80
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rsi
+ pop rdi
+ pop rbp
+ pop rbx
+
+ ;# program finished
+ ret
diff --git a/src/RandomX/src/asm/program_loop_load.inc b/src/RandomX/src/asm/program_loop_load.inc
new file mode 100644
index 000000000..c29332313
--- /dev/null
+++ b/src/RandomX/src/asm/program_loop_load.inc
@@ -0,0 +1,28 @@
+ lea rcx, [rsi+rax]
+ push rcx
+ xor r8, qword ptr [rcx+0]
+ xor r9, qword ptr [rcx+8]
+ xor r10, qword ptr [rcx+16]
+ xor r11, qword ptr [rcx+24]
+ xor r12, qword ptr [rcx+32]
+ xor r13, qword ptr [rcx+40]
+ xor r14, qword ptr [rcx+48]
+ xor r15, qword ptr [rcx+56]
+ lea rcx, [rsi+rdx]
+ push rcx
+ cvtdq2pd xmm0, qword ptr [rcx+0]
+ cvtdq2pd xmm1, qword ptr [rcx+8]
+ cvtdq2pd xmm2, qword ptr [rcx+16]
+ cvtdq2pd xmm3, qword ptr [rcx+24]
+ cvtdq2pd xmm4, qword ptr [rcx+32]
+ cvtdq2pd xmm5, qword ptr [rcx+40]
+ cvtdq2pd xmm6, qword ptr [rcx+48]
+ cvtdq2pd xmm7, qword ptr [rcx+56]
+ andps xmm4, xmm13
+ andps xmm5, xmm13
+ andps xmm6, xmm13
+ andps xmm7, xmm13
+ orps xmm4, xmm14
+ orps xmm5, xmm14
+ orps xmm6, xmm14
+ orps xmm7, xmm14
diff --git a/src/RandomX/src/asm/program_loop_store.inc b/src/RandomX/src/asm/program_loop_store.inc
new file mode 100644
index 000000000..1ba1635c6
--- /dev/null
+++ b/src/RandomX/src/asm/program_loop_store.inc
@@ -0,0 +1,18 @@
+ pop rcx
+ mov qword ptr [rcx+0], r8
+ mov qword ptr [rcx+8], r9
+ mov qword ptr [rcx+16], r10
+ mov qword ptr [rcx+24], r11
+ mov qword ptr [rcx+32], r12
+ mov qword ptr [rcx+40], r13
+ mov qword ptr [rcx+48], r14
+ mov qword ptr [rcx+56], r15
+ pop rcx
+ xorpd xmm0, xmm4
+ xorpd xmm1, xmm5
+ xorpd xmm2, xmm6
+ xorpd xmm3, xmm7
+ movapd xmmword ptr [rcx+0], xmm0
+ movapd xmmword ptr [rcx+16], xmm1
+ movapd xmmword ptr [rcx+32], xmm2
+ movapd xmmword ptr [rcx+48], xmm3
diff --git a/src/RandomX/src/asm/program_prologue_linux.inc b/src/RandomX/src/asm/program_prologue_linux.inc
new file mode 100644
index 000000000..033584a7e
--- /dev/null
+++ b/src/RandomX/src/asm/program_prologue_linux.inc
@@ -0,0 +1,35 @@
+ ;# callee-saved registers - System V AMD64 ABI
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+ ;# function arguments
+ mov rbx, rcx ;# loop counter
+ push rdi ;# RegisterFile& registerFile
+ mov rcx, rdi
+ mov rbp, qword ptr [rsi] ;# "mx", "ma"
+ mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
+ mov rsi, rdx ;# uint8_t* scratchpad
+
+ mov rax, rbp
+ ror rbp, 32
+
+ ;# zero integer registers
+ xor r8, r8
+ xor r9, r9
+ xor r10, r10
+ xor r11, r11
+ xor r12, r12
+ xor r13, r13
+ xor r14, r14
+ xor r15, r15
+
+ ;# load constant registers
+ lea rcx, [rcx+120]
+ movapd xmm8, xmmword ptr [rcx+72]
+ movapd xmm9, xmmword ptr [rcx+88]
+ movapd xmm10, xmmword ptr [rcx+104]
+ movapd xmm11, xmmword ptr [rcx+120]
diff --git a/src/RandomX/src/asm/program_prologue_win64.inc b/src/RandomX/src/asm/program_prologue_win64.inc
new file mode 100644
index 000000000..10f21d375
--- /dev/null
+++ b/src/RandomX/src/asm/program_prologue_win64.inc
@@ -0,0 +1,48 @@
+ ;# callee-saved registers - Microsoft x64 calling convention
+ push rbx
+ push rbp
+ push rdi
+ push rsi
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp, 80
+ movdqu xmmword ptr [rsp+64], xmm6
+ movdqu xmmword ptr [rsp+48], xmm7
+ movdqu xmmword ptr [rsp+32], xmm8
+ movdqu xmmword ptr [rsp+16], xmm9
+ movdqu xmmword ptr [rsp+0], xmm10
+ sub rsp, 80
+ movdqu xmmword ptr [rsp+64], xmm11
+ movdqu xmmword ptr [rsp+48], xmm12
+ movdqu xmmword ptr [rsp+32], xmm13
+ movdqu xmmword ptr [rsp+16], xmm14
+ movdqu xmmword ptr [rsp+0], xmm15
+
+ ;# function arguments
+ push rcx ;# RegisterFile& registerFile
+ mov rbp, qword ptr [rdx] ;# "mx", "ma"
+ mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
+ mov rsi, r8 ;# uint8_t* scratchpad
+ mov rbx, r9 ;# loop counter
+
+ mov rax, rbp
+ ror rbp, 32
+
+ ;# zero integer registers
+ xor r8, r8
+ xor r9, r9
+ xor r10, r10
+ xor r11, r11
+ xor r12, r12
+ xor r13, r13
+ xor r14, r14
+ xor r15, r15
+
+ ;# load constant registers
+ lea rcx, [rcx+120]
+ movapd xmm8, xmmword ptr [rcx+72]
+ movapd xmm9, xmmword ptr [rcx+88]
+ movapd xmm10, xmmword ptr [rcx+104]
+ movapd xmm11, xmmword ptr [rcx+120]
diff --git a/src/RandomX/src/asm/program_read_dataset.inc b/src/RandomX/src/asm/program_read_dataset.inc
new file mode 100644
index 000000000..9c61092f9
--- /dev/null
+++ b/src/RandomX/src/asm/program_read_dataset.inc
@@ -0,0 +1,16 @@
+ mov ecx, ebp ;# ecx = ma
+ and ecx, RANDOMX_DATASET_BASE_MASK
+ xor r8, qword ptr [rdi+rcx]
+ ror rbp, 32 ;# swap "ma" and "mx"
+ xor rbp, rax ;# modify "mx"
+ mov edx, ebp ;# edx = mx
+ and edx, RANDOMX_DATASET_BASE_MASK
+ prefetchnta byte ptr [rdi+rdx]
+ xor r9, qword ptr [rdi+rcx+8]
+ xor r10, qword ptr [rdi+rcx+16]
+ xor r11, qword ptr [rdi+rcx+24]
+ xor r12, qword ptr [rdi+rcx+32]
+ xor r13, qword ptr [rdi+rcx+40]
+ xor r14, qword ptr [rdi+rcx+48]
+ xor r15, qword ptr [rdi+rcx+56]
+
\ No newline at end of file
diff --git a/src/RandomX/src/asm/program_read_dataset_sshash_fin.inc b/src/RandomX/src/asm/program_read_dataset_sshash_fin.inc
new file mode 100644
index 000000000..f5a067d2c
--- /dev/null
+++ b/src/RandomX/src/asm/program_read_dataset_sshash_fin.inc
@@ -0,0 +1,10 @@
+ mov rbx, qword ptr [rsp+64]
+ xor r8, qword ptr [rsp+56]
+ xor r9, qword ptr [rsp+48]
+ xor r10, qword ptr [rsp+40]
+ xor r11, qword ptr [rsp+32]
+ xor r12, qword ptr [rsp+24]
+ xor r13, qword ptr [rsp+16]
+ xor r14, qword ptr [rsp+8]
+ xor r15, qword ptr [rsp+0]
+ add rsp, 72
\ No newline at end of file
diff --git a/src/RandomX/src/asm/program_read_dataset_sshash_init.inc b/src/RandomX/src/asm/program_read_dataset_sshash_init.inc
new file mode 100644
index 000000000..9491f3d2f
--- /dev/null
+++ b/src/RandomX/src/asm/program_read_dataset_sshash_init.inc
@@ -0,0 +1,17 @@
+ sub rsp, 72
+ mov qword ptr [rsp+64], rbx
+ mov qword ptr [rsp+56], r8
+ mov qword ptr [rsp+48], r9
+ mov qword ptr [rsp+40], r10
+ mov qword ptr [rsp+32], r11
+ mov qword ptr [rsp+24], r12
+ mov qword ptr [rsp+16], r13
+ mov qword ptr [rsp+8], r14
+ mov qword ptr [rsp+0], r15
+ ror rbp, 32 ;# swap "ma" and "mx"
+ xor rbp, rax ;# modify "mx"
+ mov rbx, rbp ;# ebx = ma
+ shr rbx, 38
+ and ebx, RANDOMX_DATASET_BASE_MASK / 64 ;# ebx = Dataset block number
+ ;# add ebx, datasetOffset / 64
+ ;# call 32768
\ No newline at end of file
diff --git a/src/RandomX/src/asm/program_sshash_constants.inc b/src/RandomX/src/asm/program_sshash_constants.inc
new file mode 100644
index 000000000..53dc1755c
--- /dev/null
+++ b/src/RandomX/src/asm/program_sshash_constants.inc
@@ -0,0 +1,24 @@
+r0_mul:
+ ;#/ 6364136223846793005
+ db 45, 127, 149, 76, 45, 244, 81, 88
+r1_add:
+ ;#/ 9298411001130361340
+ db 252, 161, 245, 89, 138, 151, 10, 129
+r2_add:
+ ;#/ 12065312585734608966
+ db 70, 216, 194, 56, 223, 153, 112, 167
+r3_add:
+ ;#/ 9306329213124626780
+ db 92, 73, 34, 191, 28, 185, 38, 129
+r4_add:
+ ;#/ 5281919268842080866
+ db 98, 138, 159, 23, 151, 37, 77, 73
+r5_add:
+ ;#/ 10536153434571861004
+ db 12, 236, 170, 206, 185, 239, 55, 146
+r6_add:
+ ;#/ 3398623926847679864
+ db 120, 45, 230, 108, 116, 86, 42, 47
+r7_add:
+ ;#/ 9549104520008361294
+ db 78, 229, 44, 182, 247, 59, 133, 132
\ No newline at end of file
diff --git a/src/RandomX/src/asm/program_sshash_load.inc b/src/RandomX/src/asm/program_sshash_load.inc
new file mode 100644
index 000000000..535135691
--- /dev/null
+++ b/src/RandomX/src/asm/program_sshash_load.inc
@@ -0,0 +1,8 @@
+ xor r8, qword ptr [rbx+0]
+ xor r9, qword ptr [rbx+8]
+ xor r10, qword ptr [rbx+16]
+ xor r11, qword ptr [rbx+24]
+ xor r12, qword ptr [rbx+32]
+ xor r13, qword ptr [rbx+40]
+ xor r14, qword ptr [rbx+48]
+ xor r15, qword ptr [rbx+56]
\ No newline at end of file
diff --git a/src/RandomX/src/asm/program_sshash_prefetch.inc b/src/RandomX/src/asm/program_sshash_prefetch.inc
new file mode 100644
index 000000000..26efb5159
--- /dev/null
+++ b/src/RandomX/src/asm/program_sshash_prefetch.inc
@@ -0,0 +1,4 @@
+ and rbx, RANDOMX_CACHE_MASK
+ shl rbx, 6
+ add rbx, rdi
+ prefetchnta byte ptr [rbx]
\ No newline at end of file
diff --git a/src/RandomX/src/asm/program_xmm_constants.inc b/src/RandomX/src/asm/program_xmm_constants.inc
new file mode 100644
index 000000000..296237a45
--- /dev/null
+++ b/src/RandomX/src/asm/program_xmm_constants.inc
@@ -0,0 +1,6 @@
+mantissaMask:
+ db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0
+exp240:
+ db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+scaleMask:
+ db 0, 0, 0, 0, 0, 0, 240, 128, 0, 0, 0, 0, 0, 0, 240, 128
\ No newline at end of file
diff --git a/src/RandomX/src/asm/randomx_reciprocal.inc b/src/RandomX/src/asm/randomx_reciprocal.inc
new file mode 100644
index 000000000..e1f22fdc6
--- /dev/null
+++ b/src/RandomX/src/asm/randomx_reciprocal.inc
@@ -0,0 +1,7 @@
+ mov edx, 1
+ mov r8, rcx
+ xor eax, eax
+ bsr rcx, rcx
+ shl rdx, cl
+ div r8
+ ret
\ No newline at end of file
diff --git a/src/RandomX/src/assembly_generator_x86.cpp b/src/RandomX/src/assembly_generator_x86.cpp
new file mode 100644
index 000000000..e7e5258b7
--- /dev/null
+++ b/src/RandomX/src/assembly_generator_x86.cpp
@@ -0,0 +1,611 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include
+#include "assembly_generator_x86.hpp"
+#include "common.hpp"
+#include "reciprocal.h"
+#include "program.hpp"
+#include "superscalar.hpp"
+
+namespace randomx {
+
+ static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
+ static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
+ static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
+ static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" };
+ static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" };
+ static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
+
+ static const char* tempRegx = "xmm12";
+ static const char* mantissaMaskReg = "xmm13";
+ static const char* exponentMaskReg = "xmm14";
+ static const char* scaleMaskReg = "xmm15";
+ static const char* regIc = "rbx";
+ static const char* regIc32 = "ebx";
+ static const char* regIc8 = "bl";
+ static const char* regScratchpadAddr = "rsi";
+
+ void AssemblyGeneratorX86::generateProgram(Program& prog) {
+ for (unsigned i = 0; i < RegistersCount; ++i) {
+ registerUsage[i] = -1;
+ }
+ asmCode.str(std::string()); //clear
+ for (unsigned i = 0; i < prog.getSize(); ++i) {
+ asmCode << "randomx_isn_" << i << ":" << std::endl;
+ Instruction& instr = prog(i);
+ instr.src %= RegistersCount;
+ instr.dst %= RegistersCount;
+ generateCode(instr, i);
+ }
+ }
+
+ void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) {
+ asmCode.str(std::string()); //clear
+#ifdef RANDOMX_ALIGN
+ asmCode << "ALIGN 16" << std::endl;
+#endif
+ for (unsigned i = 0; i < prog.getSize(); ++i) {
+ Instruction& instr = prog(i);
+ switch ((SuperscalarInstructionType)instr.opcode)
+ {
+ case SuperscalarInstructionType::ISUB_R:
+ asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+ break;
+ case SuperscalarInstructionType::IXOR_R:
+ asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+ break;
+ case SuperscalarInstructionType::IADD_RS:
+ asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl;
+ break;
+ case SuperscalarInstructionType::IMUL_R:
+ asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+ break;
+ case SuperscalarInstructionType::IROR_C:
+ asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl;
+ break;
+ case SuperscalarInstructionType::IADD_C7:
+ asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+ break;
+ case SuperscalarInstructionType::IXOR_C7:
+ asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+ break;
+ case SuperscalarInstructionType::IADD_C8:
+ asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+#ifdef RANDOMX_ALIGN
+ asmCode << "nop" << std::endl;
+#endif
+ break;
+ case SuperscalarInstructionType::IXOR_C8:
+ asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+#ifdef RANDOMX_ALIGN
+ asmCode << "nop" << std::endl;
+#endif
+ break;
+ case SuperscalarInstructionType::IADD_C9:
+ asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+#ifdef RANDOMX_ALIGN
+ asmCode << "xchg ax, ax ;nop" << std::endl;
+#endif
+ break;
+ case SuperscalarInstructionType::IXOR_C9:
+ asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+#ifdef RANDOMX_ALIGN
+ asmCode << "xchg ax, ax ;nop" << std::endl;
+#endif
+ break;
+ case SuperscalarInstructionType::IMULH_R:
+ asmCode << "mov rax, " << regR[instr.dst] << std::endl;
+ asmCode << "mul " << regR[instr.src] << std::endl;
+ asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
+ break;
+ case SuperscalarInstructionType::ISMULH_R:
+ asmCode << "mov rax, " << regR[instr.dst] << std::endl;
+ asmCode << "imul " << regR[instr.src] << std::endl;
+ asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
+ break;
+ case SuperscalarInstructionType::IMUL_RCP:
+ asmCode << "mov rax, " << (int64_t)randomx_reciprocal(instr.getImm32()) << std::endl;
+ asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl;
+ break;
+ default:
+ UNREACHABLE;
+ }
+ }
+ }
+
+ void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) {
+ asmCode.str(std::string()); //clear
+ asmCode << "#include " << std::endl;
+ asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl;
+ asmCode << " static inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
+ asmCode << " return ((unsigned __int128)a * b) >> 64;" << std::endl;
+ asmCode << " }" << std::endl;
+ asmCode << " static inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
+ asmCode << " return ((__int128)a * b) >> 64;" << std::endl;
+ asmCode << " }" << std::endl;
+ asmCode << " #define HAVE_MULH" << std::endl;
+ asmCode << " #define HAVE_SMULH" << std::endl;
+ asmCode << "#endif" << std::endl;
+ asmCode << "#if defined(_MSC_VER)" << std::endl;
+ asmCode << " #define HAS_VALUE(X) X ## 0" << std::endl;
+ asmCode << " #define EVAL_DEFINE(X) HAS_VALUE(X)" << std::endl;
+ asmCode << " #include " << std::endl;
+ asmCode << " #include " << std::endl;
+ asmCode << " static __inline uint64_t rotr(uint64_t x , int c) {" << std::endl;
+ asmCode << " return _rotr64(x, c);" << std::endl;
+ asmCode << " }" << std::endl;
+ asmCode << " #define HAVE_ROTR" << std::endl;
+ asmCode << " #if EVAL_DEFINE(__MACHINEARM64_X64(1))" << std::endl;
+ asmCode << " static __inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
+ asmCode << " return __umulh(a, b);" << std::endl;
+ asmCode << " }" << std::endl;
+ asmCode << " #define HAVE_MULH" << std::endl;
+ asmCode << " #endif" << std::endl;
+ asmCode << " #if EVAL_DEFINE(__MACHINEX64(1))" << std::endl;
+ asmCode << " static __inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
+ asmCode << " int64_t hi;" << std::endl;
+ asmCode << " _mul128(a, b, &hi);" << std::endl;
+ asmCode << " return hi;" << std::endl;
+ asmCode << " }" << std::endl;
+ asmCode << " #define HAVE_SMULH" << std::endl;
+ asmCode << " #endif" << std::endl;
+ asmCode << "#endif" << std::endl;
+ asmCode << "#ifndef HAVE_ROTR" << std::endl;
+ asmCode << " static inline uint64_t rotr(uint64_t a, int b) {" << std::endl;
+ asmCode << " return (a >> b) | (a << (64 - b));" << std::endl;
+ asmCode << " }" << std::endl;
+ asmCode << " #define HAVE_ROTR" << std::endl;
+ asmCode << "#endif" << std::endl;
+ asmCode << "#if !defined(HAVE_MULH) || !defined(HAVE_SMULH) || !defined(HAVE_ROTR)" << std::endl;
+ asmCode << " #error \"Required functions are not defined\"" << std::endl;
+ asmCode << "#endif" << std::endl;
+ asmCode << "void superScalar(uint64_t r[8]) {" << std::endl;
+ asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl;
+ for (unsigned i = 0; i < prog.getSize(); ++i) {
+ Instruction& instr = prog(i);
+ switch ((SuperscalarInstructionType)instr.opcode)
+ {
+ case SuperscalarInstructionType::ISUB_R:
+ asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl;
+ break;
+ case SuperscalarInstructionType::IXOR_R:
+ asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl;
+ break;
+ case SuperscalarInstructionType::IADD_RS:
+ asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift())) << ";" << std::endl;
+ break;
+ case SuperscalarInstructionType::IMUL_R:
+ asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl;
+ break;
+ case SuperscalarInstructionType::IROR_C:
+ asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl;
+ break;
+ case SuperscalarInstructionType::IADD_C7:
+ case SuperscalarInstructionType::IADD_C8:
+ case SuperscalarInstructionType::IADD_C9:
+ asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl;
+ break;
+ case SuperscalarInstructionType::IXOR_C7:
+ case SuperscalarInstructionType::IXOR_C8:
+ case SuperscalarInstructionType::IXOR_C9:
+ asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl;
+ break;
+ case SuperscalarInstructionType::IMULH_R:
+ asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
+ break;
+ case SuperscalarInstructionType::ISMULH_R:
+ asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
+ break;
+ case SuperscalarInstructionType::IMUL_RCP:
+ asmCode << regR[instr.dst] << " *= " << (int64_t)randomx_reciprocal(instr.getImm32()) << ";" << std::endl;
+ break;
+ default:
+ UNREACHABLE;
+ }
+ }
+ asmCode << "r[0] = r8; r[1] = r9; r[2] = r10; r[3] = r11; r[4] = r12; r[5] = r13; r[6] = r14; r[7] = r15;" << std::endl;
+ asmCode << "}" << std::endl;
+ }
+
+ void AssemblyGeneratorX86::traceint(Instruction& instr) {
+ if (trace) {
+ asmCode << "\tpush " << regR[instr.dst] << std::endl;
+ }
+ }
+
+ void AssemblyGeneratorX86::traceflt(Instruction& instr) {
+ if (trace) {
+ asmCode << "\tpush 0" << std::endl;
+ }
+ }
+
+ void AssemblyGeneratorX86::tracenop(Instruction& instr) {
+ if (trace) {
+ asmCode << "\tpush 0" << std::endl;
+ }
+ }
+
+ void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
+ asmCode << "\t; " << instr;
+ auto generator = engine[instr.opcode];
+ (this->*generator)(instr, i);
+ }
+
+ void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
+ asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
+ asmCode << "\tand " << reg << ", " << ((instr.getModMem()) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
+ }
+
+ void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
+ asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
+ int mask;
+ if (instr.getModCond() < StoreL3Condition) {
+ mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask;
+ }
+ else {
+ mask = ScratchpadL3Mask;
+ }
+ asmCode << "\tand eax" << ", " << (mask & (-maskAlign)) << std::endl;
+ }
+
+ int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
+ return (int32_t)instr.getImm32() & ScratchpadL3Mask;
+ }
+
+ void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if(instr.dst == RegisterNeedsDisplacement)
+ asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
+ else
+ asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl;
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ genAddressReg(instr);
+ asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+ }
+ else {
+ asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+ }
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+ }
+ else {
+ asmCode << "\tsub " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+ }
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ genAddressReg(instr);
+ asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+ }
+ else {
+ asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+ }
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+ }
+ else {
+ asmCode << "\timul " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+ }
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ genAddressReg(instr);
+ asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+ }
+ else {
+ asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+ }
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+ asmCode << "\tmul " << regR[instr.src] << std::endl;
+ asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ genAddressReg(instr, "ecx");
+ asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+ asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl;
+ }
+ else {
+ asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+ asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+ }
+ asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+ asmCode << "\timul " << regR[instr.src] << std::endl;
+ asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ genAddressReg(instr, "ecx");
+ asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+ asmCode << "\timul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl;
+ }
+ else {
+ asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+ asmCode << "\timul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+ }
+ asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ asmCode << "\tneg " << regR[instr.dst] << std::endl;
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+ }
+ else {
+ asmCode << "\txor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+ }
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ genAddressReg(instr);
+ asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+ }
+ else {
+ asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+ }
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
+ asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl;
+ }
+ else {
+ asmCode << "\tror " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl;
+ }
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
+ if (instr.src != instr.dst) {
+ asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
+ asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl;
+ }
+ else {
+ asmCode << "\trol " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl;
+ }
+ traceint(instr);
+ }
+
+ void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
+ uint64_t divisor = instr.getImm32();
+ if (!isZeroOrPowerOf2(divisor)) {
+ registerUsage[instr.dst] = i;
+ asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;
+ asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl;
+ traceint(instr);
+ }
+ else {
+ tracenop(instr);
+ }
+ }
+
+ void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) {
+ if (instr.src != instr.dst) {
+ registerUsage[instr.dst] = i;
+ registerUsage[instr.src] = i;
+ asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+ traceint(instr);
+ }
+ else {
+ tracenop(instr);
+ }
+ }
+
+ void AssemblyGeneratorX86::h_FSWAP_R(Instruction& instr, int i) {
+ asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl;
+ traceflt(instr);
+ }
+
+ void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) {
+ instr.dst %= RegisterCountFlt;
+ instr.src %= RegisterCountFlt;
+ asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
+ traceflt(instr);
+ }
+
+ void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) {
+ instr.dst %= RegisterCountFlt;
+ genAddressReg(instr);
+ asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+ asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
+ traceflt(instr);
+ }
+
+ void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) {
+ instr.dst %= RegisterCountFlt;
+ instr.src %= RegisterCountFlt;
+ asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
+ traceflt(instr);
+ }
+
+ void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) {
+ instr.dst %= RegisterCountFlt;
+ genAddressReg(instr);
+ asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+ asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
+ traceflt(instr);
+ }
+
+ void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
+ instr.dst %= RegisterCountFlt;
+ asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMaskReg << std::endl;
+ traceflt(instr);
+ }
+
+ void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) {
+ instr.dst %= RegisterCountFlt;
+ instr.src %= RegisterCountFlt;
+ asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
+ traceflt(instr);
+ }
+
+ void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) {
+ instr.dst %= RegisterCountFlt;
+ genAddressReg(instr);
+ asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+ asmCode << "\tandps " << tempRegx << ", " << mantissaMaskReg << std::endl;
+ asmCode << "\torps " << tempRegx << ", " << exponentMaskReg << std::endl;
+ asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl;
+ traceflt(instr);
+ }
+
+ void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) {
+ instr.dst %= RegisterCountFlt;
+ asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
+ traceflt(instr);
+ }
+
+ void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
+ asmCode << "\tmov rax, " << regR[instr.src] << std::endl;
+ int rotate = (13 - (instr.getImm32() & 63)) & 63;
+ if (rotate != 0)
+ asmCode << "\trol rax, " << rotate << std::endl;
+ asmCode << "\tand eax, 24576" << std::endl;
+ asmCode << "\tor eax, 40896" << std::endl;
+ asmCode << "\tpush rax" << std::endl;
+ asmCode << "\tldmxcsr dword ptr [rsp]" << std::endl;
+ asmCode << "\tpop rax" << std::endl;
+ tracenop(instr);
+ }
+
+ void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) {
+ int reg = instr.dst;
+ int target = registerUsage[reg] + 1;
+ int shift = instr.getModCond() + ConditionOffset;
+ int32_t imm = instr.getImm32() | (1L << shift);
+ if (ConditionOffset > 0 || shift > 0)
+ imm &= ~(1L << (shift - 1));
+ asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl;
+ asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl;
+ asmCode << "\tjz randomx_isn_" << target << std::endl;
+ //mark all registers as used
+ for (unsigned j = 0; j < RegistersCount; ++j) {
+ registerUsage[j] = i;
+ }
+ }
+
+ void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) {
+ genAddressRegDst(instr);
+ asmCode << "\tmov qword ptr [" << regScratchpadAddr << "+rax], " << regR[instr.src] << std::endl;
+ tracenop(instr);
+ }
+
+ void AssemblyGeneratorX86::h_NOP(Instruction& instr, int i) {
+ asmCode << "\tnop" << std::endl;
+ tracenop(instr);
+ }
+
+#include "instruction_weights.hpp"
+#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
+
+ InstructionGenerator AssemblyGeneratorX86::engine[256] = {
+ INST_HANDLE(IADD_RS)
+ INST_HANDLE(IADD_M)
+ INST_HANDLE(ISUB_R)
+ INST_HANDLE(ISUB_M)
+ INST_HANDLE(IMUL_R)
+ INST_HANDLE(IMUL_M)
+ INST_HANDLE(IMULH_R)
+ INST_HANDLE(IMULH_M)
+ INST_HANDLE(ISMULH_R)
+ INST_HANDLE(ISMULH_M)
+ INST_HANDLE(IMUL_RCP)
+ INST_HANDLE(INEG_R)
+ INST_HANDLE(IXOR_R)
+ INST_HANDLE(IXOR_M)
+ INST_HANDLE(IROR_R)
+ INST_HANDLE(IROL_R)
+ INST_HANDLE(ISWAP_R)
+ INST_HANDLE(FSWAP_R)
+ INST_HANDLE(FADD_R)
+ INST_HANDLE(FADD_M)
+ INST_HANDLE(FSUB_R)
+ INST_HANDLE(FSUB_M)
+ INST_HANDLE(FSCAL_R)
+ INST_HANDLE(FMUL_R)
+ INST_HANDLE(FDIV_M)
+ INST_HANDLE(FSQRT_R)
+ INST_HANDLE(CBRANCH)
+ INST_HANDLE(CFROUND)
+ INST_HANDLE(ISTORE)
+ INST_HANDLE(NOP)
+ };
+}
\ No newline at end of file
diff --git a/src/RandomX/src/assembly_generator_x86.hpp b/src/RandomX/src/assembly_generator_x86.hpp
new file mode 100644
index 000000000..e96239885
--- /dev/null
+++ b/src/RandomX/src/assembly_generator_x86.hpp
@@ -0,0 +1,94 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include "common.hpp"
+#include
+
+namespace randomx {
+
+ class Program;
+ class SuperscalarProgram;
+ class AssemblyGeneratorX86;
+ class Instruction;
+
+ typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int);
+
+ class AssemblyGeneratorX86 {
+ public:
+ void generateProgram(Program& prog);
+ void generateAsm(SuperscalarProgram& prog);
+ void generateC(SuperscalarProgram& prog);
+ void printCode(std::ostream& os) {
+ os << asmCode.rdbuf();
+ }
+ private:
+ void genAddressReg(Instruction&, const char*);
+ void genAddressRegDst(Instruction&, int);
+ int32_t genAddressImm(Instruction&);
+ void generateCode(Instruction&, int);
+ void traceint(Instruction&);
+ void traceflt(Instruction&);
+ void tracenop(Instruction&);
+ void h_IADD_RS(Instruction&, int);
+ void h_IADD_M(Instruction&, int);
+ void h_ISUB_R(Instruction&, int);
+ void h_ISUB_M(Instruction&, int);
+ void h_IMUL_R(Instruction&, int);
+ void h_IMUL_M(Instruction&, int);
+ void h_IMULH_R(Instruction&, int);
+ void h_IMULH_M(Instruction&, int);
+ void h_ISMULH_R(Instruction&, int);
+ void h_ISMULH_M(Instruction&, int);
+ void h_IMUL_RCP(Instruction&, int);
+ void h_INEG_R(Instruction&, int);
+ void h_IXOR_R(Instruction&, int);
+ void h_IXOR_M(Instruction&, int);
+ void h_IROR_R(Instruction&, int);
+ void h_IROL_R(Instruction&, int);
+ void h_ISWAP_R(Instruction&, int);
+ void h_FSWAP_R(Instruction&, int);
+ void h_FADD_R(Instruction&, int);
+ void h_FADD_M(Instruction&, int);
+ void h_FSUB_R(Instruction&, int);
+ void h_FSUB_M(Instruction&, int);
+ void h_FSCAL_R(Instruction&, int);
+ void h_FMUL_R(Instruction&, int);
+ void h_FDIV_M(Instruction&, int);
+ void h_FSQRT_R(Instruction&, int);
+ void h_CBRANCH(Instruction&, int);
+ void h_CFROUND(Instruction&, int);
+ void h_ISTORE(Instruction&, int);
+ void h_NOP(Instruction&, int);
+
+ static InstructionGenerator engine[256];
+ std::stringstream asmCode;
+ int registerUsage[RegistersCount];
+ };
+}
\ No newline at end of file
diff --git a/src/RandomX/src/blake2/blake2-impl.h b/src/RandomX/src/blake2/blake2-impl.h
new file mode 100644
index 000000000..617f7c8a3
--- /dev/null
+++ b/src/RandomX/src/blake2/blake2-impl.h
@@ -0,0 +1,76 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef PORTABLE_BLAKE2_IMPL_H
+#define PORTABLE_BLAKE2_IMPL_H
+
+#include
+
+#include "endian.h"
+
+static FORCE_INLINE uint64_t load48(const void *src) {
+ const uint8_t *p = (const uint8_t *)src;
+ uint64_t w = *p++;
+ w |= (uint64_t)(*p++) << 8;
+ w |= (uint64_t)(*p++) << 16;
+ w |= (uint64_t)(*p++) << 24;
+ w |= (uint64_t)(*p++) << 32;
+ w |= (uint64_t)(*p++) << 40;
+ return w;
+}
+
+static FORCE_INLINE void store48(void *dst, uint64_t w) {
+ uint8_t *p = (uint8_t *)dst;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+}
+
+static FORCE_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
+ return (w >> c) | (w << (32 - c));
+}
+
+static FORCE_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
+ return (w >> c) | (w << (64 - c));
+}
+
+#endif
diff --git a/src/RandomX/src/blake2/blake2.h b/src/RandomX/src/blake2/blake2.h
new file mode 100644
index 000000000..3d15be136
--- /dev/null
+++ b/src/RandomX/src/blake2/blake2.h
@@ -0,0 +1,116 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef PORTABLE_BLAKE2_H
+#define PORTABLE_BLAKE2_H
+
+#include
+#include
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+ enum blake2b_constant {
+ BLAKE2B_BLOCKBYTES = 128,
+ BLAKE2B_OUTBYTES = 64,
+ BLAKE2B_KEYBYTES = 64,
+ BLAKE2B_SALTBYTES = 16,
+ BLAKE2B_PERSONALBYTES = 16
+ };
+
+#pragma pack(push, 1)
+ typedef struct __blake2b_param {
+ uint8_t digest_length; /* 1 */
+ uint8_t key_length; /* 2 */
+ uint8_t fanout; /* 3 */
+ uint8_t depth; /* 4 */
+ uint32_t leaf_length; /* 8 */
+ uint64_t node_offset; /* 16 */
+ uint8_t node_depth; /* 17 */
+ uint8_t inner_length; /* 18 */
+ uint8_t reserved[14]; /* 32 */
+ uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
+ uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
+ } blake2b_param;
+#pragma pack(pop)
+
+ typedef struct __blake2b_state {
+ uint64_t h[8];
+ uint64_t t[2];
+ uint64_t f[2];
+ uint8_t buf[BLAKE2B_BLOCKBYTES];
+ unsigned buflen;
+ unsigned outlen;
+ uint8_t last_node;
+ } blake2b_state;
+
+ /* Ensure param structs have not been wrongly padded */
+ /* Poor man's static_assert */
+ enum {
+ blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
+ blake2_size_check_2 =
+ 1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
+ };
+
+ //randomx namespace
+#define blake2b_init randomx_blake2b_init
+#define blake2b_init_key randomx_blake2b_init_key
+#define blake2b_init_param randomx_blake2b_init_param
+#define blake2b_update randomx_blake2b_update
+#define blake2b_final randomx_blake2b_final
+#define blake2b randomx_blake2b
+#define blake2b_long randomx_blake2b_long
+
+ /* Streaming API */
+ int blake2b_init(blake2b_state *S, size_t outlen);
+ int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
+ size_t keylen);
+ int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
+ int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
+ int blake2b_final(blake2b_state *S, void *out, size_t outlen);
+
+ /* Simple API */
+ int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
+ const void *key, size_t keylen);
+
+ /* Argon2 Team - Begin Code */
+ int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
+ /* Argon2 Team - End Code */
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/src/RandomX/src/blake2/blake2b.c b/src/RandomX/src/blake2/blake2b.c
new file mode 100644
index 000000000..b9f1b565c
--- /dev/null
+++ b/src/RandomX/src/blake2/blake2b.c
@@ -0,0 +1,409 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include
+#include
+#include
+
+#include "blake2.h"
+#include "blake2-impl.h"
+
+static const uint64_t blake2b_IV[8] = {
+ UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
+ UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
+ UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
+ UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
+
+static const unsigned int blake2b_sigma[12][16] = {
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+ {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+ {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
+ {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
+ {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
+ {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
+ {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
+ {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
+ {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+};
+
+static FORCE_INLINE void blake2b_set_lastnode(blake2b_state *S) {
+ S->f[1] = (uint64_t)-1;
+}
+
+static FORCE_INLINE void blake2b_set_lastblock(blake2b_state *S) {
+ if (S->last_node) {
+ blake2b_set_lastnode(S);
+ }
+ S->f[0] = (uint64_t)-1;
+}
+
+static FORCE_INLINE void blake2b_increment_counter(blake2b_state *S,
+ uint64_t inc) {
+ S->t[0] += inc;
+ S->t[1] += (S->t[0] < inc);
+}
+
+static FORCE_INLINE void blake2b_invalidate_state(blake2b_state *S) {
+ //clear_internal_memory(S, sizeof(*S)); /* wipe */
+ blake2b_set_lastblock(S); /* invalidate for further use */
+}
+
+static FORCE_INLINE void blake2b_init0(blake2b_state *S) {
+ memset(S, 0, sizeof(*S));
+ memcpy(S->h, blake2b_IV, sizeof(S->h));
+}
+
+int blake2b_init_param(blake2b_state *S, const blake2b_param *P) {
+ const unsigned char *p = (const unsigned char *)P;
+ unsigned int i;
+
+ if (NULL == P || NULL == S) {
+ return -1;
+ }
+
+ blake2b_init0(S);
+ /* IV XOR Parameter Block */
+ for (i = 0; i < 8; ++i) {
+ S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
+ }
+ S->outlen = P->digest_length;
+ return 0;
+}
+
+/* Sequential blake2b initialization */
+int blake2b_init(blake2b_state *S, size_t outlen) {
+ blake2b_param P;
+
+ if (S == NULL) {
+ return -1;
+ }
+
+ if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
+ blake2b_invalidate_state(S);
+ return -1;
+ }
+
+ /* Setup Parameter Block for unkeyed BLAKE2 */
+ P.digest_length = (uint8_t)outlen;
+ P.key_length = 0;
+ P.fanout = 1;
+ P.depth = 1;
+ P.leaf_length = 0;
+ P.node_offset = 0;
+ P.node_depth = 0;
+ P.inner_length = 0;
+ memset(P.reserved, 0, sizeof(P.reserved));
+ memset(P.salt, 0, sizeof(P.salt));
+ memset(P.personal, 0, sizeof(P.personal));
+
+ return blake2b_init_param(S, &P);
+}
+
+int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen) {
+ blake2b_param P;
+
+ if (S == NULL) {
+ return -1;
+ }
+
+ if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
+ blake2b_invalidate_state(S);
+ return -1;
+ }
+
+ if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) {
+ blake2b_invalidate_state(S);
+ return -1;
+ }
+
+ /* Setup Parameter Block for keyed BLAKE2 */
+ P.digest_length = (uint8_t)outlen;
+ P.key_length = (uint8_t)keylen;
+ P.fanout = 1;
+ P.depth = 1;
+ P.leaf_length = 0;
+ P.node_offset = 0;
+ P.node_depth = 0;
+ P.inner_length = 0;
+ memset(P.reserved, 0, sizeof(P.reserved));
+ memset(P.salt, 0, sizeof(P.salt));
+ memset(P.personal, 0, sizeof(P.personal));
+
+ if (blake2b_init_param(S, &P) < 0) {
+ blake2b_invalidate_state(S);
+ return -1;
+ }
+
+ {
+ uint8_t block[BLAKE2B_BLOCKBYTES];
+ memset(block, 0, BLAKE2B_BLOCKBYTES);
+ memcpy(block, key, keylen);
+ blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
+ /* Burn the key from stack */
+ //clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
+ }
+ return 0;
+}
+
+static void blake2b_compress(blake2b_state *S, const uint8_t *block) {
+ uint64_t m[16];
+ uint64_t v[16];
+ unsigned int i, r;
+
+ for (i = 0; i < 16; ++i) {
+ m[i] = load64(block + i * sizeof(m[i]));
+ }
+
+ for (i = 0; i < 8; ++i) {
+ v[i] = S->h[i];
+ }
+
+ v[8] = blake2b_IV[0];
+ v[9] = blake2b_IV[1];
+ v[10] = blake2b_IV[2];
+ v[11] = blake2b_IV[3];
+ v[12] = blake2b_IV[4] ^ S->t[0];
+ v[13] = blake2b_IV[5] ^ S->t[1];
+ v[14] = blake2b_IV[6] ^ S->f[0];
+ v[15] = blake2b_IV[7] ^ S->f[1];
+
+#define G(r, i, a, b, c, d) \
+ do { \
+ a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
+ d = rotr64(d ^ a, 32); \
+ c = c + d; \
+ b = rotr64(b ^ c, 24); \
+ a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
+ d = rotr64(d ^ a, 16); \
+ c = c + d; \
+ b = rotr64(b ^ c, 63); \
+ } while ((void)0, 0)
+
+#define ROUND(r) \
+ do { \
+ G(r, 0, v[0], v[4], v[8], v[12]); \
+ G(r, 1, v[1], v[5], v[9], v[13]); \
+ G(r, 2, v[2], v[6], v[10], v[14]); \
+ G(r, 3, v[3], v[7], v[11], v[15]); \
+ G(r, 4, v[0], v[5], v[10], v[15]); \
+ G(r, 5, v[1], v[6], v[11], v[12]); \
+ G(r, 6, v[2], v[7], v[8], v[13]); \
+ G(r, 7, v[3], v[4], v[9], v[14]); \
+ } while ((void)0, 0)
+
+ for (r = 0; r < 12; ++r) {
+ ROUND(r);
+ }
+
+ for (i = 0; i < 8; ++i) {
+ S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+ }
+
+#undef G
+#undef ROUND
+}
+
+int blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
+ const uint8_t *pin = (const uint8_t *)in;
+
+ if (inlen == 0) {
+ return 0;
+ }
+
+ /* Sanity check */
+ if (S == NULL || in == NULL) {
+ return -1;
+ }
+
+ /* Is this a reused state? */
+ if (S->f[0] != 0) {
+ return -1;
+ }
+
+ if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
+ /* Complete current block */
+ size_t left = S->buflen;
+ size_t fill = BLAKE2B_BLOCKBYTES - left;
+ memcpy(&S->buf[left], pin, fill);
+ blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+ blake2b_compress(S, S->buf);
+ S->buflen = 0;
+ inlen -= fill;
+ pin += fill;
+ /* Avoid buffer copies when possible */
+ while (inlen > BLAKE2B_BLOCKBYTES) {
+ blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+ blake2b_compress(S, pin);
+ inlen -= BLAKE2B_BLOCKBYTES;
+ pin += BLAKE2B_BLOCKBYTES;
+ }
+ }
+ memcpy(&S->buf[S->buflen], pin, inlen);
+ S->buflen += (unsigned int)inlen;
+ return 0;
+}
+
+int blake2b_final(blake2b_state *S, void *out, size_t outlen) {
+ uint8_t buffer[BLAKE2B_OUTBYTES] = { 0 };
+ unsigned int i;
+
+ /* Sanity checks */
+ if (S == NULL || out == NULL || outlen < S->outlen) {
+ return -1;
+ }
+
+ /* Is this a reused state? */
+ if (S->f[0] != 0) {
+ return -1;
+ }
+
+ blake2b_increment_counter(S, S->buflen);
+ blake2b_set_lastblock(S);
+ memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
+ blake2b_compress(S, S->buf);
+
+ for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
+ store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
+ }
+
+ memcpy(out, buffer, S->outlen);
+ //clear_internal_memory(buffer, sizeof(buffer));
+ //clear_internal_memory(S->buf, sizeof(S->buf));
+ //clear_internal_memory(S->h, sizeof(S->h));
+ return 0;
+}
+
+int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
+ const void *key, size_t keylen) {
+ blake2b_state S;
+ int ret = -1;
+
+ /* Verify parameters */
+ if (NULL == in && inlen > 0) {
+ goto fail;
+ }
+
+ if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
+ goto fail;
+ }
+
+ if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
+ goto fail;
+ }
+
+ if (keylen > 0) {
+ if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
+ goto fail;
+ }
+ }
+ else {
+ if (blake2b_init(&S, outlen) < 0) {
+ goto fail;
+ }
+ }
+
+ if (blake2b_update(&S, in, inlen) < 0) {
+ goto fail;
+ }
+ ret = blake2b_final(&S, out, outlen);
+
+fail:
+ //clear_internal_memory(&S, sizeof(S));
+ return ret;
+}
+
+/* Argon2 Team - Begin Code */
+int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
+ uint8_t *out = (uint8_t *)pout;
+ blake2b_state blake_state;
+ uint8_t outlen_bytes[sizeof(uint32_t)] = { 0 };
+ int ret = -1;
+
+ if (outlen > UINT32_MAX) {
+ goto fail;
+ }
+
+ /* Ensure little-endian byte order! */
+ store32(outlen_bytes, (uint32_t)outlen);
+
+#define TRY(statement) \
+ do { \
+ ret = statement; \
+ if (ret < 0) { \
+ goto fail; \
+ } \
+ } while ((void)0, 0)
+
+ if (outlen <= BLAKE2B_OUTBYTES) {
+ TRY(blake2b_init(&blake_state, outlen));
+ TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
+ TRY(blake2b_update(&blake_state, in, inlen));
+ TRY(blake2b_final(&blake_state, out, outlen));
+ }
+ else {
+ uint32_t toproduce;
+ uint8_t out_buffer[BLAKE2B_OUTBYTES];
+ uint8_t in_buffer[BLAKE2B_OUTBYTES];
+ TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
+ TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
+ TRY(blake2b_update(&blake_state, in, inlen));
+ TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
+ memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
+ out += BLAKE2B_OUTBYTES / 2;
+ toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
+
+ while (toproduce > BLAKE2B_OUTBYTES) {
+ memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+ TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
+ BLAKE2B_OUTBYTES, NULL, 0));
+ memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
+ out += BLAKE2B_OUTBYTES / 2;
+ toproduce -= BLAKE2B_OUTBYTES / 2;
+ }
+
+ memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+ TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
+ 0));
+ memcpy(out, out_buffer, toproduce);
+ }
+fail:
+ //clear_internal_memory(&blake_state, sizeof(blake_state));
+ return ret;
+#undef TRY
+}
+/* Argon2 Team - End Code */
+
diff --git a/src/RandomX/src/blake2/blamka-round-avx2.h b/src/RandomX/src/blake2/blamka-round-avx2.h
new file mode 100644
index 000000000..483826179
--- /dev/null
+++ b/src/RandomX/src/blake2/blamka-round-avx2.h
@@ -0,0 +1,189 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef BLAKE_ROUND_MKA_OPT_H
+#define BLAKE_ROUND_MKA_OPT_H
+
+#include "blake2-impl.h"
+
+#ifdef __GNUC__
+#include
+#else
+#include
+#endif
+
+#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
+#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
+#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
+#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
+
+#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ __m256i ml = _mm256_mul_epu32(A0, B0); \
+ ml = _mm256_add_epi64(ml, ml); \
+ A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
+ D0 = _mm256_xor_si256(D0, A0); \
+ D0 = rotr32(D0); \
+ \
+ ml = _mm256_mul_epu32(C0, D0); \
+ ml = _mm256_add_epi64(ml, ml); \
+ C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
+ \
+ B0 = _mm256_xor_si256(B0, C0); \
+ B0 = rotr24(B0); \
+ \
+ ml = _mm256_mul_epu32(A1, B1); \
+ ml = _mm256_add_epi64(ml, ml); \
+ A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
+ D1 = _mm256_xor_si256(D1, A1); \
+ D1 = rotr32(D1); \
+ \
+ ml = _mm256_mul_epu32(C1, D1); \
+ ml = _mm256_add_epi64(ml, ml); \
+ C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
+ \
+ B1 = _mm256_xor_si256(B1, C1); \
+ B1 = rotr24(B1); \
+ } while((void)0, 0);
+
+#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ __m256i ml = _mm256_mul_epu32(A0, B0); \
+ ml = _mm256_add_epi64(ml, ml); \
+ A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
+ D0 = _mm256_xor_si256(D0, A0); \
+ D0 = rotr16(D0); \
+ \
+ ml = _mm256_mul_epu32(C0, D0); \
+ ml = _mm256_add_epi64(ml, ml); \
+ C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
+ B0 = _mm256_xor_si256(B0, C0); \
+ B0 = rotr63(B0); \
+ \
+ ml = _mm256_mul_epu32(A1, B1); \
+ ml = _mm256_add_epi64(ml, ml); \
+ A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
+ D1 = _mm256_xor_si256(D1, A1); \
+ D1 = rotr16(D1); \
+ \
+ ml = _mm256_mul_epu32(C1, D1); \
+ ml = _mm256_add_epi64(ml, ml); \
+ C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
+ B1 = _mm256_xor_si256(B1, C1); \
+ B1 = rotr63(B1); \
+ } while((void)0, 0);
+
+#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
+ C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+ D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
+ \
+ B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
+ C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+ D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
+ } while((void)0, 0);
+
+#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
+ __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
+ B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+ B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+ \
+ tmp1 = C0; \
+ C0 = C1; \
+ C1 = tmp1; \
+ \
+ tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
+ tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
+ D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+ D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+ } while(0);
+
+#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
+ C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+ D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
+ \
+ B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
+ C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+ D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
+ } while((void)0, 0);
+
+#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
+ __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
+ B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+ B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+ \
+ tmp1 = C0; \
+ C0 = C1; \
+ C1 = tmp1; \
+ \
+ tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
+ tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
+ D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+ D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+ } while((void)0, 0);
+
+#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do{ \
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ \
+ DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ \
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ \
+ UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ } while((void)0, 0);
+
+#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do{ \
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ \
+ DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ \
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ \
+ UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ } while((void)0, 0);
+
+#endif /* BLAKE_ROUND_MKA_OPT_H */
diff --git a/src/RandomX/src/blake2/blamka-round-ref.h b/src/RandomX/src/blake2/blamka-round-ref.h
new file mode 100644
index 000000000..f1fb50bf8
--- /dev/null
+++ b/src/RandomX/src/blake2/blamka-round-ref.h
@@ -0,0 +1,73 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef BLAKE_ROUND_MKA_H
+#define BLAKE_ROUND_MKA_H
+
+#include "blake2.h"
+#include "blake2-impl.h"
+
+ /* designed by the Lyra PHC team */
+static FORCE_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
+ const uint64_t m = UINT64_C(0xFFFFFFFF);
+ const uint64_t xy = (x & m) * (y & m);
+ return x + y + 2 * xy;
+}
+
+#define G(a, b, c, d) \
+ do { \
+ a = fBlaMka(a, b); \
+ d = rotr64(d ^ a, 32); \
+ c = fBlaMka(c, d); \
+ b = rotr64(b ^ c, 24); \
+ a = fBlaMka(a, b); \
+ d = rotr64(d ^ a, 16); \
+ c = fBlaMka(c, d); \
+ b = rotr64(b ^ c, 63); \
+ } while ((void)0, 0)
+
+#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
+ v12, v13, v14, v15) \
+ do { \
+ G(v0, v4, v8, v12); \
+ G(v1, v5, v9, v13); \
+ G(v2, v6, v10, v14); \
+ G(v3, v7, v11, v15); \
+ G(v0, v5, v10, v15); \
+ G(v1, v6, v11, v12); \
+ G(v2, v7, v8, v13); \
+ G(v3, v4, v9, v14); \
+ } while ((void)0, 0)
+
+#endif
diff --git a/src/RandomX/src/blake2/blamka-round-ssse3.h b/src/RandomX/src/blake2/blamka-round-ssse3.h
new file mode 100644
index 000000000..f2d3b5d05
--- /dev/null
+++ b/src/RandomX/src/blake2/blamka-round-ssse3.h
@@ -0,0 +1,162 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef BLAKE_ROUND_MKA_OPT_H
+#define BLAKE_ROUND_MKA_OPT_H
+
+#include "blake2-impl.h"
+
+#ifdef __GNUC__
+#include
+#else
+#include
+#endif
+
+#ifdef _mm_roti_epi64 //clang defines it using the XOP instruction set
+#undef _mm_roti_epi64
+#endif
+
+#define r16 \
+ (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
+#define r24 \
+ (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
+#define _mm_roti_epi64(x, c) \
+ (-(c) == 32) \
+ ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
+ : (-(c) == 24) \
+ ? _mm_shuffle_epi8((x), r24) \
+ : (-(c) == 16) \
+ ? _mm_shuffle_epi8((x), r16) \
+ : (-(c) == 63) \
+ ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
+ _mm_add_epi64((x), (x))) \
+ : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
+ _mm_slli_epi64((x), 64 - (-(c))))
+
+static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
+ const __m128i z = _mm_mul_epu32(x, y);
+ return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
+}
+
+#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ A0 = fBlaMka(A0, B0); \
+ A1 = fBlaMka(A1, B1); \
+ \
+ D0 = _mm_xor_si128(D0, A0); \
+ D1 = _mm_xor_si128(D1, A1); \
+ \
+ D0 = _mm_roti_epi64(D0, -32); \
+ D1 = _mm_roti_epi64(D1, -32); \
+ \
+ C0 = fBlaMka(C0, D0); \
+ C1 = fBlaMka(C1, D1); \
+ \
+ B0 = _mm_xor_si128(B0, C0); \
+ B1 = _mm_xor_si128(B1, C1); \
+ \
+ B0 = _mm_roti_epi64(B0, -24); \
+ B1 = _mm_roti_epi64(B1, -24); \
+ } while ((void)0, 0)
+
+#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ A0 = fBlaMka(A0, B0); \
+ A1 = fBlaMka(A1, B1); \
+ \
+ D0 = _mm_xor_si128(D0, A0); \
+ D1 = _mm_xor_si128(D1, A1); \
+ \
+ D0 = _mm_roti_epi64(D0, -16); \
+ D1 = _mm_roti_epi64(D1, -16); \
+ \
+ C0 = fBlaMka(C0, D0); \
+ C1 = fBlaMka(C1, D1); \
+ \
+ B0 = _mm_xor_si128(B0, C0); \
+ B1 = _mm_xor_si128(B1, C1); \
+ \
+ B0 = _mm_roti_epi64(B0, -63); \
+ B1 = _mm_roti_epi64(B1, -63); \
+ } while ((void)0, 0)
+
+#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
+ __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
+ B0 = t0; \
+ B1 = t1; \
+ \
+ t0 = C0; \
+ C0 = C1; \
+ C1 = t0; \
+ \
+ t0 = _mm_alignr_epi8(D1, D0, 8); \
+ t1 = _mm_alignr_epi8(D0, D1, 8); \
+ D0 = t1; \
+ D1 = t0; \
+ } while ((void)0, 0)
+
+#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
+ __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
+ B0 = t0; \
+ B1 = t1; \
+ \
+ t0 = C0; \
+ C0 = C1; \
+ C1 = t0; \
+ \
+ t0 = _mm_alignr_epi8(D0, D1, 8); \
+ t1 = _mm_alignr_epi8(D1, D0, 8); \
+ D0 = t1; \
+ D1 = t0; \
+ } while ((void)0, 0)
+
+#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ G1(A0, B0, C0, D0, A1, B1, C1, D1); \
+ G2(A0, B0, C0, D0, A1, B1, C1, D1); \
+ \
+ DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
+ \
+ G1(A0, B0, C0, D0, A1, B1, C1, D1); \
+ G2(A0, B0, C0, D0, A1, B1, C1, D1); \
+ \
+ UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
+ } while ((void)0, 0)
+
+
+#endif /* BLAKE_ROUND_MKA_OPT_H */
diff --git a/src/RandomX/src/blake2/endian.h b/src/RandomX/src/blake2/endian.h
new file mode 100644
index 000000000..c7afed261
--- /dev/null
+++ b/src/RandomX/src/blake2/endian.h
@@ -0,0 +1,107 @@
+#pragma once
+#include
+#include
+
+#if defined(_MSC_VER)
+#define FORCE_INLINE __inline
+#elif defined(__GNUC__) || defined(__clang__)
+#define FORCE_INLINE __inline__
+#else
+#define FORCE_INLINE
+#endif
+
+ /* Argon2 Team - Begin Code */
+ /*
+ Not an exhaustive list, but should cover the majority of modern platforms
+ Additionally, the code will always be correct---this is only a performance
+ tweak.
+ */
+#if (defined(__BYTE_ORDER__) && \
+ (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
+ defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
+ defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
+ defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
+ defined(_M_ARM)
+#define NATIVE_LITTLE_ENDIAN
+#endif
+ /* Argon2 Team - End Code */
+
+static FORCE_INLINE uint32_t load32(const void *src) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ uint32_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ const uint8_t *p = (const uint8_t *)src;
+ uint32_t w = *p++;
+ w |= (uint32_t)(*p++) << 8;
+ w |= (uint32_t)(*p++) << 16;
+ w |= (uint32_t)(*p++) << 24;
+ return w;
+#endif
+}
+
+static FORCE_INLINE uint64_t load64_native(const void *src) {
+ uint64_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+}
+
+static FORCE_INLINE uint64_t load64(const void *src) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ return load64_native(src);
+#else
+ const uint8_t *p = (const uint8_t *)src;
+ uint64_t w = *p++;
+ w |= (uint64_t)(*p++) << 8;
+ w |= (uint64_t)(*p++) << 16;
+ w |= (uint64_t)(*p++) << 24;
+ w |= (uint64_t)(*p++) << 32;
+ w |= (uint64_t)(*p++) << 40;
+ w |= (uint64_t)(*p++) << 48;
+ w |= (uint64_t)(*p++) << 56;
+ return w;
+#endif
+}
+
+static FORCE_INLINE void store32(void *dst, uint32_t w) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ memcpy(dst, &w, sizeof w);
+#else
+ uint8_t *p = (uint8_t *)dst;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+#endif
+}
+
+static FORCE_INLINE void store64_native(void *dst, uint64_t w) {
+ memcpy(dst, &w, sizeof w);
+}
+
+static FORCE_INLINE void store64(void *dst, uint64_t w) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ store64_native(dst, w);
+#else
+ uint8_t *p = (uint8_t *)dst;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+#endif
+}
diff --git a/src/RandomX/src/blake2_generator.cpp b/src/RandomX/src/blake2_generator.cpp
new file mode 100644
index 000000000..3f2d028c3
--- /dev/null
+++ b/src/RandomX/src/blake2_generator.cpp
@@ -0,0 +1,62 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include
+#include "blake2/blake2.h"
+#include "blake2/endian.h"
+#include "blake2_generator.hpp"
+
+namespace randomx {
+
+ constexpr int maxSeedSize = 60;
+
+ Blake2Generator::Blake2Generator(const void* seed, size_t seedSize, int nonce) : dataIndex(sizeof(data)) {
+ memset(data, 0, sizeof(data));
+ memcpy(data, seed, seedSize > maxSeedSize ? maxSeedSize : seedSize);
+ store32(&data[maxSeedSize], nonce);
+ }
+
+ uint8_t Blake2Generator::getByte() {
+ checkData(1);
+ return data[dataIndex++];
+ }
+
+ uint32_t Blake2Generator::getUInt32() {
+ checkData(4);
+ auto ret = load32(&data[dataIndex]);
+ dataIndex += 4;
+ return ret;
+ }
+
+ void Blake2Generator::checkData(const size_t bytesNeeded) {
+ if (dataIndex + bytesNeeded > sizeof(data)) {
+ blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
+ dataIndex = 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/RandomX/src/blake2_generator.hpp b/src/RandomX/src/blake2_generator.hpp
new file mode 100644
index 000000000..5e7f61f25
--- /dev/null
+++ b/src/RandomX/src/blake2_generator.hpp
@@ -0,0 +1,46 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include
+
+namespace randomx {
+
+ class Blake2Generator {
+ public:
+ Blake2Generator(const void* seed, size_t seedSize, int nonce = 0);
+ uint8_t getByte();
+ uint32_t getUInt32();
+ private:
+ void checkData(const size_t);
+
+ uint8_t data[64];
+ size_t dataIndex;
+ };
+}
\ No newline at end of file
diff --git a/src/RandomX/src/bytecode_machine.cpp b/src/RandomX/src/bytecode_machine.cpp
new file mode 100644
index 000000000..7d8e902d2
--- /dev/null
+++ b/src/RandomX/src/bytecode_machine.cpp
@@ -0,0 +1,482 @@
+/*
+Copyright (c) 2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "bytecode_machine.hpp"
+#include "reciprocal.h"
+
+namespace randomx {
+
+ const int_reg_t BytecodeMachine::zero = 0;
+
+#define INSTR_CASE(x) case InstructionType::x: \
+ exe_ ## x(ibc, pc, scratchpad, config); \
+ break;
+
+ void BytecodeMachine::executeInstruction(RANDOMX_EXE_ARGS) {
+ switch (ibc.type)
+ {
+ INSTR_CASE(IADD_RS)
+ INSTR_CASE(IADD_M)
+ INSTR_CASE(ISUB_R)
+ INSTR_CASE(ISUB_M)
+ INSTR_CASE(IMUL_R)
+ INSTR_CASE(IMUL_M)
+ INSTR_CASE(IMULH_R)
+ INSTR_CASE(IMULH_M)
+ INSTR_CASE(ISMULH_R)
+ INSTR_CASE(ISMULH_M)
+ INSTR_CASE(INEG_R)
+ INSTR_CASE(IXOR_R)
+ INSTR_CASE(IXOR_M)
+ INSTR_CASE(IROR_R)
+ INSTR_CASE(IROL_R)
+ INSTR_CASE(ISWAP_R)
+ INSTR_CASE(FSWAP_R)
+ INSTR_CASE(FADD_R)
+ INSTR_CASE(FADD_M)
+ INSTR_CASE(FSUB_R)
+ INSTR_CASE(FSUB_M)
+ INSTR_CASE(FSCAL_R)
+ INSTR_CASE(FMUL_R)
+ INSTR_CASE(FDIV_M)
+ INSTR_CASE(FSQRT_R)
+ INSTR_CASE(CBRANCH)
+ INSTR_CASE(CFROUND)
+ INSTR_CASE(ISTORE)
+
+ case InstructionType::NOP:
+ break;
+
+ case InstructionType::IMUL_RCP: //executed as IMUL_R
+ default:
+ UNREACHABLE;
+ }
+ }
+
+ void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
+ int opcode = instr.opcode;
+
+ if (opcode < ceil_IADD_RS) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::IADD_RS;
+ ibc.idst = &nreg->r[dst];
+ if (dst != RegisterNeedsDisplacement) {
+ ibc.isrc = &nreg->r[src];
+ ibc.shift = instr.getModShift();
+ ibc.imm = 0;
+ }
+ else {
+ ibc.isrc = &nreg->r[src];
+ ibc.shift = instr.getModShift();
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_IADD_M) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::IADD_M;
+ ibc.idst = &nreg->r[dst];
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+ }
+ else {
+ ibc.isrc = &zero;
+ ibc.memMask = ScratchpadL3Mask;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_ISUB_R) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::ISUB_R;
+ ibc.idst = &nreg->r[dst];
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ }
+ else {
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ ibc.isrc = &ibc.imm;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_ISUB_M) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::ISUB_M;
+ ibc.idst = &nreg->r[dst];
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+ }
+ else {
+ ibc.isrc = &zero;
+ ibc.memMask = ScratchpadL3Mask;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_IMUL_R) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::IMUL_R;
+ ibc.idst = &nreg->r[dst];
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ }
+ else {
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ ibc.isrc = &ibc.imm;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_IMUL_M) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::IMUL_M;
+ ibc.idst = &nreg->r[dst];
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+ }
+ else {
+ ibc.isrc = &zero;
+ ibc.memMask = ScratchpadL3Mask;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_IMULH_R) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::IMULH_R;
+ ibc.idst = &nreg->r[dst];
+ ibc.isrc = &nreg->r[src];
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_IMULH_M) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::IMULH_M;
+ ibc.idst = &nreg->r[dst];
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+ }
+ else {
+ ibc.isrc = &zero;
+ ibc.memMask = ScratchpadL3Mask;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_ISMULH_R) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::ISMULH_R;
+ ibc.idst = &nreg->r[dst];
+ ibc.isrc = &nreg->r[src];
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_ISMULH_M) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::ISMULH_M;
+ ibc.idst = &nreg->r[dst];
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+ }
+ else {
+ ibc.isrc = &zero;
+ ibc.memMask = ScratchpadL3Mask;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_IMUL_RCP) {
+ uint64_t divisor = instr.getImm32();
+ if (!isZeroOrPowerOf2(divisor)) {
+ auto dst = instr.dst % RegistersCount;
+ ibc.type = InstructionType::IMUL_R;
+ ibc.idst = &nreg->r[dst];
+ ibc.imm = randomx_reciprocal(divisor);
+ ibc.isrc = &ibc.imm;
+ registerUsage[dst] = i;
+ }
+ else {
+ ibc.type = InstructionType::NOP;
+ }
+ return;
+ }
+
+ if (opcode < ceil_INEG_R) {
+ auto dst = instr.dst % RegistersCount;
+ ibc.type = InstructionType::INEG_R;
+ ibc.idst = &nreg->r[dst];
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_IXOR_R) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::IXOR_R;
+ ibc.idst = &nreg->r[dst];
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ }
+ else {
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ ibc.isrc = &ibc.imm;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_IXOR_M) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::IXOR_M;
+ ibc.idst = &nreg->r[dst];
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+ }
+ else {
+ ibc.isrc = &zero;
+ ibc.memMask = ScratchpadL3Mask;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_IROR_R) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::IROR_R;
+ ibc.idst = &nreg->r[dst];
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ }
+ else {
+ ibc.imm = instr.getImm32();
+ ibc.isrc = &ibc.imm;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_IROL_R) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::IROL_R;
+ ibc.idst = &nreg->r[dst];
+ if (src != dst) {
+ ibc.isrc = &nreg->r[src];
+ }
+ else {
+ ibc.imm = instr.getImm32();
+ ibc.isrc = &ibc.imm;
+ }
+ registerUsage[dst] = i;
+ return;
+ }
+
+ if (opcode < ceil_ISWAP_R) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ if (src != dst) {
+ ibc.idst = &nreg->r[dst];
+ ibc.isrc = &nreg->r[src];
+ ibc.type = InstructionType::ISWAP_R;
+ registerUsage[dst] = i;
+ registerUsage[src] = i;
+ }
+ else {
+ ibc.type = InstructionType::NOP;
+ }
+ return;
+ }
+
+ if (opcode < ceil_FSWAP_R) {
+ auto dst = instr.dst % RegistersCount;
+ ibc.type = InstructionType::FSWAP_R;
+ if (dst < RegisterCountFlt)
+ ibc.fdst = &nreg->f[dst];
+ else
+ ibc.fdst = &nreg->e[dst - RegisterCountFlt];
+ return;
+ }
+
+ if (opcode < ceil_FADD_R) {
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegisterCountFlt;
+ ibc.type = InstructionType::FADD_R;
+ ibc.fdst = &nreg->f[dst];
+ ibc.fsrc = &nreg->a[src];
+ return;
+ }
+
+ if (opcode < ceil_FADD_M) {
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::FADD_M;
+ ibc.fdst = &nreg->f[dst];
+ ibc.isrc = &nreg->r[src];
+ ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ return;
+ }
+
+ if (opcode < ceil_FSUB_R) {
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegisterCountFlt;
+ ibc.type = InstructionType::FSUB_R;
+ ibc.fdst = &nreg->f[dst];
+ ibc.fsrc = &nreg->a[src];
+ return;
+ }
+
+ if (opcode < ceil_FSUB_M) {
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::FSUB_M;
+ ibc.fdst = &nreg->f[dst];
+ ibc.isrc = &nreg->r[src];
+ ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ return;
+ }
+
+ if (opcode < ceil_FSCAL_R) {
+ auto dst = instr.dst % RegisterCountFlt;
+ ibc.fdst = &nreg->f[dst];
+ ibc.type = InstructionType::FSCAL_R;
+ return;
+ }
+
+ if (opcode < ceil_FMUL_R) {
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegisterCountFlt;
+ ibc.type = InstructionType::FMUL_R;
+ ibc.fdst = &nreg->e[dst];
+ ibc.fsrc = &nreg->a[src];
+ return;
+ }
+
+ if (opcode < ceil_FDIV_M) {
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::FDIV_M;
+ ibc.fdst = &nreg->e[dst];
+ ibc.isrc = &nreg->r[src];
+ ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ return;
+ }
+
+ if (opcode < ceil_FSQRT_R) {
+ auto dst = instr.dst % RegisterCountFlt;
+ ibc.type = InstructionType::FSQRT_R;
+ ibc.fdst = &nreg->e[dst];
+ return;
+ }
+
+ if (opcode < ceil_CBRANCH) {
+ ibc.type = InstructionType::CBRANCH;
+ //jump condition
+ int creg = instr.dst % RegistersCount;
+ ibc.idst = &nreg->r[creg];
+ ibc.target = registerUsage[creg];
+ int shift = instr.getModCond() + ConditionOffset;
+ ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
+ if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
+ ibc.imm &= ~(1ULL << (shift - 1));
+ ibc.memMask = ConditionMask << shift;
+ //mark all registers as used
+ for (unsigned j = 0; j < RegistersCount; ++j) {
+ registerUsage[j] = i;
+ }
+ return;
+ }
+
+ if (opcode < ceil_CFROUND) {
+ auto src = instr.src % RegistersCount;
+ ibc.isrc = &nreg->r[src];
+ ibc.type = InstructionType::CFROUND;
+ ibc.imm = instr.getImm32() & 63;
+ return;
+ }
+
+ if (opcode < ceil_ISTORE) {
+ auto dst = instr.dst % RegistersCount;
+ auto src = instr.src % RegistersCount;
+ ibc.type = InstructionType::ISTORE;
+ ibc.idst = &nreg->r[dst];
+ ibc.isrc = &nreg->r[src];
+ ibc.imm = signExtend2sCompl(instr.getImm32());
+ if (instr.getModCond() < StoreL3Condition)
+ ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+ else
+ ibc.memMask = ScratchpadL3Mask;
+ return;
+ }
+
+ if (opcode < ceil_NOP) {
+ ibc.type = InstructionType::NOP;
+ return;
+ }
+
+ UNREACHABLE;
+ }
+}
diff --git a/src/RandomX/src/bytecode_machine.hpp b/src/RandomX/src/bytecode_machine.hpp
new file mode 100644
index 000000000..5e82e0d37
--- /dev/null
+++ b/src/RandomX/src/bytecode_machine.hpp
@@ -0,0 +1,322 @@
+/*
+Copyright (c) 2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include "common.hpp"
+#include "intrin_portable.h"
+#include "instruction.hpp"
+#include "program.hpp"
+
+namespace randomx {
+
+ //register file in machine byte order
+ struct NativeRegisterFile {
+ int_reg_t r[RegistersCount] = { 0 };
+ rx_vec_f128 f[RegisterCountFlt];
+ rx_vec_f128 e[RegisterCountFlt];
+ rx_vec_f128 a[RegisterCountFlt];
+ };
+
+ struct InstructionByteCode {
+ union {
+ int_reg_t* idst;
+ rx_vec_f128* fdst;
+ };
+ union {
+ const int_reg_t* isrc;
+ const rx_vec_f128* fsrc;
+ };
+ union {
+ uint64_t imm;
+ int64_t simm;
+ };
+ InstructionType type;
+ union {
+ int16_t target;
+ uint16_t shift;
+ };
+ uint32_t memMask;
+ };
+
+#define OPCODE_CEIL_DECLARE(curr, prev) constexpr int ceil_ ## curr = ceil_ ## prev + RANDOMX_FREQ_ ## curr;
+ constexpr int ceil_NULL = 0;
+ OPCODE_CEIL_DECLARE(IADD_RS, NULL);
+ OPCODE_CEIL_DECLARE(IADD_M, IADD_RS);
+ OPCODE_CEIL_DECLARE(ISUB_R, IADD_M);
+ OPCODE_CEIL_DECLARE(ISUB_M, ISUB_R);
+ OPCODE_CEIL_DECLARE(IMUL_R, ISUB_M);
+ OPCODE_CEIL_DECLARE(IMUL_M, IMUL_R);
+ OPCODE_CEIL_DECLARE(IMULH_R, IMUL_M);
+ OPCODE_CEIL_DECLARE(IMULH_M, IMULH_R);
+ OPCODE_CEIL_DECLARE(ISMULH_R, IMULH_M);
+ OPCODE_CEIL_DECLARE(ISMULH_M, ISMULH_R);
+ OPCODE_CEIL_DECLARE(IMUL_RCP, ISMULH_M);
+ OPCODE_CEIL_DECLARE(INEG_R, IMUL_RCP);
+ OPCODE_CEIL_DECLARE(IXOR_R, INEG_R);
+ OPCODE_CEIL_DECLARE(IXOR_M, IXOR_R);
+ OPCODE_CEIL_DECLARE(IROR_R, IXOR_M);
+ OPCODE_CEIL_DECLARE(IROL_R, IROR_R);
+ OPCODE_CEIL_DECLARE(ISWAP_R, IROL_R);
+ OPCODE_CEIL_DECLARE(FSWAP_R, ISWAP_R);
+ OPCODE_CEIL_DECLARE(FADD_R, FSWAP_R);
+ OPCODE_CEIL_DECLARE(FADD_M, FADD_R);
+ OPCODE_CEIL_DECLARE(FSUB_R, FADD_M);
+ OPCODE_CEIL_DECLARE(FSUB_M, FSUB_R);
+ OPCODE_CEIL_DECLARE(FSCAL_R, FSUB_M);
+ OPCODE_CEIL_DECLARE(FMUL_R, FSCAL_R);
+ OPCODE_CEIL_DECLARE(FDIV_M, FMUL_R);
+ OPCODE_CEIL_DECLARE(FSQRT_R, FDIV_M);
+ OPCODE_CEIL_DECLARE(CBRANCH, FSQRT_R);
+ OPCODE_CEIL_DECLARE(CFROUND, CBRANCH);
+ OPCODE_CEIL_DECLARE(ISTORE, CFROUND);
+ OPCODE_CEIL_DECLARE(NOP, ISTORE);
+#undef OPCODE_CEIL_DECLARE
+
+#define RANDOMX_EXE_ARGS InstructionByteCode& ibc, int& pc, uint8_t* scratchpad, ProgramConfiguration& config
+#define RANDOMX_GEN_ARGS Instruction& instr, int i, InstructionByteCode& ibc
+
+ class BytecodeMachine;
+
+ typedef void(BytecodeMachine::*InstructionGenBytecode)(RANDOMX_GEN_ARGS);
+
+ class BytecodeMachine {
+ public:
+ void beginCompilation(NativeRegisterFile& regFile) {
+ for (unsigned i = 0; i < RegistersCount; ++i) {
+ registerUsage[i] = -1;
+ }
+ nreg = ®File;
+ }
+
+ void compileProgram(Program& program, InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], NativeRegisterFile& regFile) {
+ beginCompilation(regFile);
+ for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
+ auto& instr = program(i);
+ auto& ibc = bytecode[i];
+ compileInstruction(instr, i, ibc);
+ }
+ }
+
+ static void executeBytecode(InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], uint8_t* scratchpad, ProgramConfiguration& config) {
+ for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) {
+ auto& ibc = bytecode[pc];
+ executeInstruction(ibc, pc, scratchpad, config);
+ }
+ }
+
+ void compileInstruction(RANDOMX_GEN_ARGS)
+#ifdef RANDOMX_GEN_TABLE
+ {
+ auto generator = genTable[instr.opcode];
+ (this->*generator)(instr, i, ibc);
+ }
+#else
+ ;
+#endif
+
+ static void executeInstruction(RANDOMX_EXE_ARGS);
+
+ static void exe_IADD_RS(RANDOMX_EXE_ARGS) {
+ *ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm;
+ }
+
+ static void exe_IADD_M(RANDOMX_EXE_ARGS) {
+ *ibc.idst += load64(getScratchpadAddress(ibc, scratchpad));
+ }
+
+ static void exe_ISUB_R(RANDOMX_EXE_ARGS) {
+ *ibc.idst -= *ibc.isrc;
+ }
+
+ static void exe_ISUB_M(RANDOMX_EXE_ARGS) {
+ *ibc.idst -= load64(getScratchpadAddress(ibc, scratchpad));
+ }
+
+ static void exe_IMUL_R(RANDOMX_EXE_ARGS) {
+ *ibc.idst *= *ibc.isrc;
+ }
+
+ static void exe_IMUL_M(RANDOMX_EXE_ARGS) {
+ *ibc.idst *= load64(getScratchpadAddress(ibc, scratchpad));
+ }
+
+ static void exe_IMULH_R(RANDOMX_EXE_ARGS) {
+ *ibc.idst = mulh(*ibc.idst, *ibc.isrc);
+ }
+
+ static void exe_IMULH_M(RANDOMX_EXE_ARGS) {
+ *ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc, scratchpad)));
+ }
+
+ static void exe_ISMULH_R(RANDOMX_EXE_ARGS) {
+ *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc));
+ }
+
+ static void exe_ISMULH_M(RANDOMX_EXE_ARGS) {
+ *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc, scratchpad))));
+ }
+
+ static void exe_INEG_R(RANDOMX_EXE_ARGS) {
+ *ibc.idst = ~(*ibc.idst) + 1; //two's complement negative
+ }
+
+ static void exe_IXOR_R(RANDOMX_EXE_ARGS) {
+ *ibc.idst ^= *ibc.isrc;
+ }
+
+ static void exe_IXOR_M(RANDOMX_EXE_ARGS) {
+ *ibc.idst ^= load64(getScratchpadAddress(ibc, scratchpad));
+ }
+
+ static void exe_IROR_R(RANDOMX_EXE_ARGS) {
+ *ibc.idst = rotr(*ibc.idst, *ibc.isrc & 63);
+ }
+
+ static void exe_IROL_R(RANDOMX_EXE_ARGS) {
+ *ibc.idst = rotl(*ibc.idst, *ibc.isrc & 63);
+ }
+
+ static void exe_ISWAP_R(RANDOMX_EXE_ARGS) {
+ int_reg_t temp = *ibc.isrc;
+ *(int_reg_t*)ibc.isrc = *ibc.idst;
+ *ibc.idst = temp;
+ }
+
+ static void exe_FSWAP_R(RANDOMX_EXE_ARGS) {
+ *ibc.fdst = rx_swap_vec_f128(*ibc.fdst);
+ }
+
+ static void exe_FADD_R(RANDOMX_EXE_ARGS) {
+ *ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc);
+ }
+
+ static void exe_FADD_M(RANDOMX_EXE_ARGS) {
+ rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
+ *ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc);
+ }
+
+ static void exe_FSUB_R(RANDOMX_EXE_ARGS) {
+ *ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc);
+ }
+
+ static void exe_FSUB_M(RANDOMX_EXE_ARGS) {
+ rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
+ *ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc);
+ }
+
+ static void exe_FSCAL_R(RANDOMX_EXE_ARGS) {
+ const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000);
+ *ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask);
+ }
+
+ static void exe_FMUL_R(RANDOMX_EXE_ARGS) {
+ *ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc);
+ }
+
+ static void exe_FDIV_M(RANDOMX_EXE_ARGS) {
+ rx_vec_f128 fsrc = maskRegisterExponentMantissa(
+ config,
+ rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad))
+ );
+ *ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc);
+ }
+
+ static void exe_FSQRT_R(RANDOMX_EXE_ARGS) {
+ *ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst);
+ }
+
+ static void exe_CBRANCH(RANDOMX_EXE_ARGS) {
+ *ibc.idst += ibc.imm;
+ if ((*ibc.idst & ibc.memMask) == 0) {
+ pc = ibc.target;
+ }
+ }
+
+ static void exe_CFROUND(RANDOMX_EXE_ARGS) {
+ rx_set_rounding_mode(rotr(*ibc.isrc, ibc.imm) % 4);
+ }
+
+ static void exe_ISTORE(RANDOMX_EXE_ARGS) {
+ store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc);
+ }
+ protected:
+ static rx_vec_f128 maskRegisterExponentMantissa(ProgramConfiguration& config, rx_vec_f128 x) {
+ const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask);
+ const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask);
+ x = rx_and_vec_f128(x, xmantissaMask);
+ x = rx_or_vec_f128(x, xexponentMask);
+ return x;
+ }
+
+ private:
+ static const int_reg_t zero;
+ int registerUsage[RegistersCount];
+ NativeRegisterFile* nreg;
+
+ static void* getScratchpadAddress(InstructionByteCode& ibc, uint8_t* scratchpad) {
+ uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
+ return scratchpad + addr;
+ }
+
+#ifdef RANDOMX_GEN_TABLE
+ static InstructionGenBytecode genTable[256];
+
+ void gen_IADD_RS(RANDOMX_GEN_ARGS);
+ void gen_IADD_M(RANDOMX_GEN_ARGS);
+ void gen_ISUB_R(RANDOMX_GEN_ARGS);
+ void gen_ISUB_M(RANDOMX_GEN_ARGS);
+ void gen_IMUL_R(RANDOMX_GEN_ARGS);
+ void gen_IMUL_M(RANDOMX_GEN_ARGS);
+ void gen_IMULH_R(RANDOMX_GEN_ARGS);
+ void gen_IMULH_M(RANDOMX_GEN_ARGS);
+ void gen_ISMULH_R(RANDOMX_GEN_ARGS);
+ void gen_ISMULH_M(RANDOMX_GEN_ARGS);
+ void gen_IMUL_RCP(RANDOMX_GEN_ARGS);
+ void gen_INEG_R(RANDOMX_GEN_ARGS);
+ void gen_IXOR_R(RANDOMX_GEN_ARGS);
+ void gen_IXOR_M(RANDOMX_GEN_ARGS);
+ void gen_IROR_R(RANDOMX_GEN_ARGS);
+ void gen_IROL_R(RANDOMX_GEN_ARGS);
+ void gen_ISWAP_R(RANDOMX_GEN_ARGS);
+ void gen_FSWAP_R(RANDOMX_GEN_ARGS);
+ void gen_FADD_R(RANDOMX_GEN_ARGS);
+ void gen_FADD_M(RANDOMX_GEN_ARGS);
+ void gen_FSUB_R(RANDOMX_GEN_ARGS);
+ void gen_FSUB_M(RANDOMX_GEN_ARGS);
+ void gen_FSCAL_R(RANDOMX_GEN_ARGS);
+ void gen_FMUL_R(RANDOMX_GEN_ARGS);
+ void gen_FDIV_M(RANDOMX_GEN_ARGS);
+ void gen_FSQRT_R(RANDOMX_GEN_ARGS);
+ void gen_CBRANCH(RANDOMX_GEN_ARGS);
+ void gen_CFROUND(RANDOMX_GEN_ARGS);
+ void gen_ISTORE(RANDOMX_GEN_ARGS);
+ void gen_NOP(RANDOMX_GEN_ARGS);
+#endif
+ };
+}
diff --git a/src/RandomX/src/common.hpp b/src/RandomX/src/common.hpp
new file mode 100644
index 000000000..a77feb3bf
--- /dev/null
+++ b/src/RandomX/src/common.hpp
@@ -0,0 +1,187 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include
+#include
+#include
+#include "blake2/endian.h"
+#include "configuration.h"
+#include "randomx.h"
+
+namespace randomx {
+
+ static_assert(RANDOMX_ARGON_MEMORY >= 8, "RANDOMX_ARGON_MEMORY must be at least 8.");
+ static_assert(RANDOMX_ARGON_MEMORY <= 2097152, "RANDOMX_ARGON_MEMORY must not exceed 2097152.");
+ static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2.");
+ static_assert(RANDOMX_ARGON_ITERATIONS > 0 && RANDOMX_ARGON_ITERATIONS < UINT32_MAX, "RANDOMX_ARGON_ITERATIONS must be a positive 32-bit integer.");
+ static_assert(RANDOMX_ARGON_LANES > 0 && RANDOMX_ARGON_LANES <= 16777215, "RANDOMX_ARGON_LANES out of range");
+ static_assert(RANDOMX_DATASET_BASE_SIZE >= 64, "RANDOMX_DATASET_BASE_SIZE must be at least 64.");
+ static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2.");
+ static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296.");
+ static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64.");
+ static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB.");
+ static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0");
+ static_assert(RANDOMX_PROGRAM_SIZE <= 32768, "RANDOMX_PROGRAM_SIZE must not exceed 32768");
+ static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0");
+ static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0");
+ static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2.");
+ static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2.");
+ static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2.");
+ static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1.");
+ static_assert(RANDOMX_SCRATCHPAD_L1 >= 64, "RANDOMX_SCRATCHPAD_L1 must be at least 64.");
+ static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
+ static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
+ static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0");
+ static_assert(RANDOMX_SUPERSCALAR_LATENCY <= 10000, "RANDOMX_SUPERSCALAR_LATENCY must not exceed 10000");
+ static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0.");
+ static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0.");
+ static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16.");
+
+ constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
+ RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
+ RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
+ RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
+ RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
+ RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
+ RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;
+
+ static_assert(wtSum == 256, "Sum of instruction frequencies must be 256.");
+
+
+ constexpr uint32_t ArgonBlockSize = 1024;
+ constexpr int ArgonSaltSize = sizeof("" RANDOMX_ARGON_SALT) - 1;
+ static_assert(ArgonSaltSize >= 8, "RANDOMX_ARGON_SALT must be at least 8 characters long");
+ constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2;
+ constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
+ constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3;
+ constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1);
+ constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize;
+ constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE;
+ constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE;
+ constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1);
+ constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET;
+ constexpr int StoreL3Condition = 14;
+
+ //Prevent some unsafe configurations.
+#ifndef RANDOMX_UNSAFE
+ static_assert((uint64_t)ArgonBlockSize * RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY + 33554432 >= (uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE, "Unsafe configuration: Memory-time tradeoffs");
+ static_assert((128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3, "Unsafe configuration: Insufficient Scratchpad writes");
+ static_assert(RANDOMX_PROGRAM_COUNT > 1, "Unsafe configuration: Program filtering strategies");
+ static_assert(RANDOMX_PROGRAM_SIZE >= 64, "Unsafe configuration: Low program entropy");
+ static_assert(RANDOMX_PROGRAM_ITERATIONS >= 400, "Unsafe configuration: High compilation overhead");
+#endif
+
+#ifdef TRACE
+ constexpr bool trace = true;
+#else
+ constexpr bool trace = false;
+#endif
+
+#ifndef UNREACHABLE
+#ifdef __GNUC__
+#define UNREACHABLE __builtin_unreachable()
+#elif _MSC_VER
+#define UNREACHABLE __assume(false)
+#else
+#define UNREACHABLE
+#endif
+#endif
+
+#if defined(_M_X64) || defined(__x86_64__)
+ #define RANDOMX_HAVE_COMPILER 1
+ class JitCompilerX86;
+ using JitCompiler = JitCompilerX86;
+#elif defined(__aarch64__)
+ #define RANDOMX_HAVE_COMPILER 1
+ class JitCompilerA64;
+ using JitCompiler = JitCompilerA64;
+#else
+ #define RANDOMX_HAVE_COMPILER 0
+ class JitCompilerFallback;
+ using JitCompiler = JitCompilerFallback;
+#endif
+
+ using addr_t = uint32_t;
+
+ using int_reg_t = uint64_t;
+
+ struct fpu_reg_t {
+ double lo;
+ double hi;
+ };
+
+ constexpr uint32_t ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / sizeof(int_reg_t);
+ constexpr uint32_t ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / sizeof(int_reg_t);
+ constexpr uint32_t ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / sizeof(int_reg_t);
+ constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
+ constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
+ constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16;
+ constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16;
+ constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
+ constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
+ constexpr int RegistersCount = 8;
+ constexpr int RegisterCountFlt = RegistersCount / 2;
+ constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
+ constexpr int RegisterNeedsSib = 4; //x86 r12 register
+
+ inline bool isZeroOrPowerOf2(uint64_t x) {
+ return (x & (x - 1)) == 0;
+ }
+
+ constexpr int mantissaSize = 52;
+ constexpr int exponentSize = 11;
+ constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
+ constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
+ constexpr int exponentBias = 1023;
+ constexpr int dynamicExponentBits = 4;
+ constexpr int staticExponentBits = 4;
+ constexpr uint64_t constExponentBits = 0x300;
+ constexpr uint64_t dynamicMantissaMask = (1ULL << (mantissaSize + dynamicExponentBits)) - 1;
+
+ struct MemoryRegisters {
+ addr_t mx, ma;
+ uint8_t* memory = nullptr;
+ };
+
+ //register file in little-endian byte order
+ struct RegisterFile {
+ int_reg_t r[RegistersCount];
+ fpu_reg_t f[RegisterCountFlt];
+ fpu_reg_t e[RegisterCountFlt];
+ fpu_reg_t a[RegisterCountFlt];
+ };
+
+ typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
+ typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
+
+ typedef void(DatasetDeallocFunc)(randomx_dataset*);
+ typedef void(CacheDeallocFunc)(randomx_cache*);
+ typedef void(CacheInitializeFunc)(randomx_cache*, const void*, size_t);
+}
diff --git a/src/RandomX/src/configuration.h b/src/RandomX/src/configuration.h
new file mode 100644
index 000000000..f74a74a4c
--- /dev/null
+++ b/src/RandomX/src/configuration.h
@@ -0,0 +1,125 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+//Cache size in KiB. Must be a power of 2.
+#define RANDOMX_ARGON_MEMORY 262144
+
+//Number of Argon2d iterations for Cache initialization.
+#define RANDOMX_ARGON_ITERATIONS 5
+
+//Number of parallel lanes for Cache initialization.
+#define RANDOMX_ARGON_LANES 1
+
+//Argon2d salt
+#define RANDOMX_ARGON_SALT "RandomXHUSH\x03"
+
+//Number of random Cache accesses per Dataset item. Minimum is 2.
+#define RANDOMX_CACHE_ACCESSES 8
+
+//Target latency for SuperscalarHash (in cycles of the reference CPU).
+#define RANDOMX_SUPERSCALAR_LATENCY 170
+
+//Dataset base size in bytes. Must be a power of 2.
+#define RANDOMX_DATASET_BASE_SIZE 2147483648
+
+//Dataset extra size. Must be divisible by 64.
+#define RANDOMX_DATASET_EXTRA_SIZE 33554368
+
+//Number of instructions in a RandomX program. Must be divisible by 8.
+#define RANDOMX_PROGRAM_SIZE 512
+
+//Number of iterations during VM execution.
+#define RANDOMX_PROGRAM_ITERATIONS 4096
+
+//Number of chained VM executions per hash.
+#define RANDOMX_PROGRAM_COUNT 16
+
+//Scratchpad L3 size in bytes. Must be a power of 2.
+#define RANDOMX_SCRATCHPAD_L3 2097152
+
+//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
+#define RANDOMX_SCRATCHPAD_L2 262144
+
+//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
+#define RANDOMX_SCRATCHPAD_L1 16384
+
+//Jump condition mask size in bits.
+#define RANDOMX_JUMP_BITS 8
+
+//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
+#define RANDOMX_JUMP_OFFSET 8
+
+/*
+Instruction frequencies (per 256 opcodes)
+Total sum of frequencies must be 256
+*/
+
+//Integer instructions
+#define RANDOMX_FREQ_IADD_RS 16
+#define RANDOMX_FREQ_IADD_M 7
+#define RANDOMX_FREQ_ISUB_R 16
+#define RANDOMX_FREQ_ISUB_M 7
+#define RANDOMX_FREQ_IMUL_R 16
+#define RANDOMX_FREQ_IMUL_M 4
+#define RANDOMX_FREQ_IMULH_R 4
+#define RANDOMX_FREQ_IMULH_M 1
+#define RANDOMX_FREQ_ISMULH_R 4
+#define RANDOMX_FREQ_ISMULH_M 1
+#define RANDOMX_FREQ_IMUL_RCP 8
+#define RANDOMX_FREQ_INEG_R 2
+#define RANDOMX_FREQ_IXOR_R 15
+#define RANDOMX_FREQ_IXOR_M 5
+#define RANDOMX_FREQ_IROR_R 8
+#define RANDOMX_FREQ_IROL_R 2
+#define RANDOMX_FREQ_ISWAP_R 4
+
+//Floating point instructions
+#define RANDOMX_FREQ_FSWAP_R 4
+#define RANDOMX_FREQ_FADD_R 16
+#define RANDOMX_FREQ_FADD_M 5
+#define RANDOMX_FREQ_FSUB_R 16
+#define RANDOMX_FREQ_FSUB_M 5
+#define RANDOMX_FREQ_FSCAL_R 6
+#define RANDOMX_FREQ_FMUL_R 32
+#define RANDOMX_FREQ_FDIV_M 4
+#define RANDOMX_FREQ_FSQRT_R 6
+
+//Control instructions
+#define RANDOMX_FREQ_CBRANCH 25
+#define RANDOMX_FREQ_CFROUND 1
+
+//Store instruction
+#define RANDOMX_FREQ_ISTORE 16
+
+//No-op instruction
+#define RANDOMX_FREQ_NOP 0
+/* ------
+ 256
+*/
diff --git a/src/RandomX/src/cpu.cpp b/src/RandomX/src/cpu.cpp
new file mode 100644
index 000000000..44ac402ce
--- /dev/null
+++ b/src/RandomX/src/cpu.cpp
@@ -0,0 +1,76 @@
+/*
+Copyright (c) 2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "cpu.hpp"
+
+#if defined(_M_X64) || defined(__x86_64__)
+ #define HAVE_CPUID
+ #if defined(_MSC_VER)
+ #include
+ #define cpuid(info, x) __cpuidex(info, x, 0)
+ #else //GCC
+ #include
+ void cpuid(int info[4], int InfoType) {
+ __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
+ }
+ #endif
+#endif
+
+#if defined(HAVE_HWCAP)
+ #include
+ #include
+#endif
+
+namespace randomx {
+
+ Cpu::Cpu() : aes_(false), ssse3_(false), avx2_(false) {
+#ifdef HAVE_CPUID
+ int info[4];
+ cpuid(info, 0);
+ int nIds = info[0];
+ if (nIds >= 0x00000001) {
+ cpuid(info, 0x00000001);
+ ssse3_ = (info[2] & (1 << 9)) != 0;
+ aes_ = (info[2] & (1 << 25)) != 0;
+ }
+ if (nIds >= 0x00000007) {
+ cpuid(info, 0x00000007);
+ avx2_ = (info[1] & (1 << 5)) != 0;
+ }
+#elif defined(__aarch64__)
+ #if defined(HWCAP_AES)
+ long hwcaps = getauxval(AT_HWCAP);
+ aes_ = (hwcaps & HWCAP_AES) != 0;
+ #elif defined(__APPLE__)
+ aes_ = true;
+ #endif
+#endif
+ //TODO POWER8 AES
+ }
+
+}
diff --git a/src/RandomX/src/cpu.hpp b/src/RandomX/src/cpu.hpp
new file mode 100644
index 000000000..516dd47a2
--- /dev/null
+++ b/src/RandomX/src/cpu.hpp
@@ -0,0 +1,49 @@
+/*
+Copyright (c) 2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+namespace randomx {
+
+ class Cpu {
+ public:
+ Cpu();
+ bool hasAes() const {
+ return aes_;
+ }
+ bool hasSsse3() const {
+ return ssse3_;
+ }
+ bool hasAvx2() const {
+ return avx2_;
+ }
+ private:
+ bool aes_, ssse3_, avx2_;
+ };
+
+}
diff --git a/src/RandomX/src/dataset.cpp b/src/RandomX/src/dataset.cpp
new file mode 100644
index 000000000..675c5abc5
--- /dev/null
+++ b/src/RandomX/src/dataset.cpp
@@ -0,0 +1,196 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "common.hpp"
+#include "dataset.hpp"
+#include "virtual_memory.hpp"
+#include "superscalar.hpp"
+#include "blake2_generator.hpp"
+#include "reciprocal.h"
+#include "blake2/endian.h"
+#include "argon2.h"
+#include "argon2_core.h"
+#include "jit_compiler.hpp"
+#include "intrin_portable.h"
+
+static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
+static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE");
+
+namespace randomx {
+
+ template
+ void deallocCache(randomx_cache* cache) {
+ if (cache->memory != nullptr)
+ Allocator::freeMemory(cache->memory, CacheSize);
+ if (cache->jit != nullptr)
+ delete cache->jit;
+ }
+
+ template void deallocCache(randomx_cache* cache);
+ template void deallocCache(randomx_cache* cache);
+
+ void initCache(randomx_cache* cache, const void* key, size_t keySize) {
+ uint32_t memory_blocks, segment_length;
+ argon2_instance_t instance;
+ argon2_context context;
+
+ context.out = nullptr;
+ context.outlen = 0;
+ context.pwd = CONST_CAST(uint8_t *)key;
+ context.pwdlen = (uint32_t)keySize;
+ context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT;
+ context.saltlen = (uint32_t)randomx::ArgonSaltSize;
+ context.secret = NULL;
+ context.secretlen = 0;
+ context.ad = NULL;
+ context.adlen = 0;
+ context.t_cost = RANDOMX_ARGON_ITERATIONS;
+ context.m_cost = RANDOMX_ARGON_MEMORY;
+ context.lanes = RANDOMX_ARGON_LANES;
+ context.threads = 1;
+ context.allocate_cbk = NULL;
+ context.free_cbk = NULL;
+ context.flags = ARGON2_DEFAULT_FLAGS;
+ context.version = ARGON2_VERSION_NUMBER;
+
+ int inputsValid = randomx_argon2_validate_inputs(&context);
+ assert(inputsValid == ARGON2_OK);
+
+ /* 2. Align memory size */
+ /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
+ memory_blocks = context.m_cost;
+
+ segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
+
+ instance.version = context.version;
+ instance.memory = NULL;
+ instance.passes = context.t_cost;
+ instance.memory_blocks = memory_blocks;
+ instance.segment_length = segment_length;
+ instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
+ instance.lanes = context.lanes;
+ instance.threads = context.threads;
+ instance.type = Argon2_d;
+ instance.memory = (block*)cache->memory;
+ instance.impl = cache->argonImpl;
+
+ if (instance.threads > instance.lanes) {
+ instance.threads = instance.lanes;
+ }
+
+ /* 3. Initialization: Hashing inputs, allocating memory, filling first
+ * blocks
+ */
+ randomx_argon2_initialize(&instance, &context);
+
+ randomx_argon2_fill_memory_blocks(&instance);
+
+ cache->reciprocalCache.clear();
+ randomx::Blake2Generator gen(key, keySize);
+ for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
+ randomx::generateSuperscalar(cache->programs[i], gen);
+ for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) {
+ auto& instr = cache->programs[i](j);
+ if ((SuperscalarInstructionType)instr.opcode == SuperscalarInstructionType::IMUL_RCP) {
+ auto rcp = randomx_reciprocal(instr.getImm32());
+ instr.setImm32(cache->reciprocalCache.size());
+ cache->reciprocalCache.push_back(rcp);
+ }
+ }
+ }
+ }
+
+ void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) {
+ initCache(cache, key, keySize);
+ cache->jit->enableWriting();
+ cache->jit->generateSuperscalarHash(cache->programs, cache->reciprocalCache);
+ cache->jit->generateDatasetInitCode();
+ cache->jit->enableExecution();
+ }
+
+ constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
+ constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL;
+ constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
+ constexpr uint64_t superscalarAdd3 = 9306329213124626780ULL;
+ constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
+ constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
+ constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
+ constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
+
+ static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) {
+ constexpr uint32_t mask = CacheSize / CacheLineSize - 1;
+ return memory + (registerValue & mask) * CacheLineSize;
+ }
+
+ void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t itemNumber) {
+ int_reg_t rl[8];
+ uint8_t* mixBlock;
+ uint64_t registerValue = itemNumber;
+ rl[0] = (itemNumber + 1) * superscalarMul0;
+ rl[1] = rl[0] ^ superscalarAdd1;
+ rl[2] = rl[0] ^ superscalarAdd2;
+ rl[3] = rl[0] ^ superscalarAdd3;
+ rl[4] = rl[0] ^ superscalarAdd4;
+ rl[5] = rl[0] ^ superscalarAdd5;
+ rl[6] = rl[0] ^ superscalarAdd6;
+ rl[7] = rl[0] ^ superscalarAdd7;
+ for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
+ mixBlock = getMixBlock(registerValue, cache->memory);
+ rx_prefetch_nta(mixBlock);
+ SuperscalarProgram& prog = cache->programs[i];
+
+ executeSuperscalar(rl, prog, &cache->reciprocalCache);
+
+ for (unsigned q = 0; q < 8; ++q)
+ rl[q] ^= load64_native(mixBlock + 8 * q);
+
+ registerValue = rl[prog.getAddressRegister()];
+ }
+
+ memcpy(out, &rl, CacheLineSize);
+ }
+
+ void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startItem, uint32_t endItem) {
+ for (uint32_t itemNumber = startItem; itemNumber < endItem; ++itemNumber, dataset += CacheLineSize)
+ initDatasetItem(cache, dataset, itemNumber);
+ }
+}
diff --git a/src/RandomX/src/dataset.hpp b/src/RandomX/src/dataset.hpp
new file mode 100644
index 000000000..d01911f9a
--- /dev/null
+++ b/src/RandomX/src/dataset.hpp
@@ -0,0 +1,103 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include
+#include
+#include
+#include "common.hpp"
+#include "superscalar_program.hpp"
+#include "allocator.hpp"
+#include "argon2.h"
+
+/* Global scope for C binding */
+struct randomx_dataset {
+ uint8_t* memory = nullptr;
+ randomx::DatasetDeallocFunc* dealloc;
+};
+
+/* Global scope for C binding */
+struct randomx_cache {
+ uint8_t* memory = nullptr;
+ randomx::CacheDeallocFunc* dealloc;
+ randomx::JitCompiler* jit;
+ randomx::CacheInitializeFunc* initialize;
+ randomx::DatasetInitFunc* datasetInit;
+ randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES];
+ std::vector reciprocalCache;
+ std::string cacheKey;
+ randomx_argon2_impl* argonImpl;
+
+ bool isInitialized() {
+ return programs[0].getSize() != 0;
+ }
+};
+
+//A pointer to a standard-layout struct object points to its initial member
+static_assert(std::is_standard_layout(), "randomx_dataset must be a standard-layout struct");
+
+//the following assert fails when compiling Debug in Visual Studio (JIT mode will crash in Debug)
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && defined(_DEBUG)
+#define TO_STR(x) #x
+#define STR(x) TO_STR(x)
+#pragma message ( __FILE__ "(" STR(__LINE__) ") warning: check std::is_standard_layout() is disabled for Debug configuration. JIT mode will crash." )
+#undef STR
+#undef TO_STR
+#else
+static_assert(std::is_standard_layout(), "randomx_cache must be a standard-layout struct");
+#endif
+
+namespace randomx {
+
+ using DefaultAllocator = AlignedAllocator;
+
+ template
+ void deallocDataset(randomx_dataset* dataset) {
+ if (dataset->memory != nullptr)
+ Allocator::freeMemory(dataset->memory, DatasetSize);
+ }
+
+ template
+ void deallocCache(randomx_cache* cache);
+
+ void initCache(randomx_cache*, const void*, size_t);
+ void initCacheCompile(randomx_cache*, const void*, size_t);
+ void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t blockNumber);
+ void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
+
+ inline randomx_argon2_impl* selectArgonImpl(randomx_flags flags) {
+ if (flags & RANDOMX_FLAG_ARGON2_AVX2) {
+ return randomx_argon2_impl_avx2();
+ }
+ if (flags & RANDOMX_FLAG_ARGON2_SSSE3) {
+ return randomx_argon2_impl_ssse3();
+ }
+ return &randomx_argon2_fill_segment_ref;
+ }
+}
diff --git a/src/RandomX/src/instruction.cpp b/src/RandomX/src/instruction.cpp
new file mode 100644
index 000000000..12e6f49b9
--- /dev/null
+++ b/src/RandomX/src/instruction.cpp
@@ -0,0 +1,390 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "instruction.hpp"
+#include "common.hpp"
+
+namespace randomx {
+
+ void Instruction::print(std::ostream& os) const {
+ os << names[opcode] << " ";
+ auto handler = engine[opcode];
+ (this->*handler)(os);
+ }
+
+ void Instruction::genAddressReg(std::ostream& os, int srcIndex) const {
+ os << (getModMem() ? "L1" : "L2") << "[r" << srcIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
+ }
+
+ void Instruction::genAddressRegDst(std::ostream& os, int dstIndex) const {
+ if (getModCond() < StoreL3Condition)
+ os << (getModMem() ? "L1" : "L2");
+ else
+ os << "L3";
+ os << "[r" << dstIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
+ }
+
+ void Instruction::genAddressImm(std::ostream& os) const {
+ os << "L3" << "[" << (getImm32() & ScratchpadL3Mask) << "]";
+ }
+
+ void Instruction::h_IADD_RS(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ os << "r" << dstIndex << ", r" << srcIndex;
+ if(dstIndex == RegisterNeedsDisplacement) {
+ os << ", " << (int32_t)getImm32();
+ }
+ os << ", SHFT " << getModShift() << std::endl;
+ }
+
+ void Instruction::h_IADD_M(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", ";
+ genAddressReg(os, srcIndex);
+ os << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", ";
+ genAddressImm(os);
+ os << std::endl;
+ }
+ }
+
+ void Instruction::h_ISUB_R(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
+ }
+ }
+
+ void Instruction::h_ISUB_M(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", ";
+ genAddressReg(os, srcIndex);
+ os << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", ";
+ genAddressImm(os);
+ os << std::endl;
+ }
+ }
+
+ void Instruction::h_IMUL_R(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
+ }
+ }
+
+ void Instruction::h_IMUL_M(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", ";
+ genAddressReg(os, srcIndex);
+ os << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", ";
+ genAddressImm(os);
+ os << std::endl;
+ }
+ }
+
+ void Instruction::h_IMULH_R(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+ }
+
+ void Instruction::h_IMULH_M(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", ";
+ genAddressReg(os, srcIndex);
+ os << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", ";
+ genAddressImm(os);
+ os << std::endl;
+ }
+ }
+
+ void Instruction::h_ISMULH_R(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+ }
+
+ void Instruction::h_ISMULH_M(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", ";
+ genAddressReg(os, srcIndex);
+ os << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", ";
+ genAddressImm(os);
+ os << std::endl;
+ }
+ }
+
+ void Instruction::h_INEG_R(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ os << "r" << dstIndex << std::endl;
+ }
+
+ void Instruction::h_IXOR_R(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
+ }
+ }
+
+ void Instruction::h_IXOR_M(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", ";
+ genAddressReg(os, srcIndex);
+ os << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", ";
+ genAddressImm(os);
+ os << std::endl;
+ }
+ }
+
+ void Instruction::h_IROR_R(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl;
+ }
+ }
+
+ void Instruction::h_IROL_R(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ if (dstIndex != srcIndex) {
+ os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+ }
+ else {
+ os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl;
+ }
+ }
+
+ void Instruction::h_IMUL_RCP(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ os << "r" << dstIndex << ", " << getImm32() << std::endl;
+ }
+
+ void Instruction::h_ISWAP_R(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+ }
+
+ void Instruction::h_FSWAP_R(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ const char reg = (dstIndex >= RegisterCountFlt) ? 'e' : 'f';
+ dstIndex %= RegisterCountFlt;
+ os << reg << dstIndex << std::endl;
+ }
+
+ void Instruction::h_FADD_R(std::ostream& os) const {
+ auto dstIndex = dst % RegisterCountFlt;
+ auto srcIndex = src % RegisterCountFlt;
+ os << "f" << dstIndex << ", a" << srcIndex << std::endl;
+ }
+
+ void Instruction::h_FADD_M(std::ostream& os) const {
+ auto dstIndex = dst % RegisterCountFlt;
+ auto srcIndex = src % RegistersCount;
+ os << "f" << dstIndex << ", ";
+ genAddressReg(os, srcIndex);
+ os << std::endl;
+ }
+
+ void Instruction::h_FSUB_R(std::ostream& os) const {
+ auto dstIndex = dst % RegisterCountFlt;
+ auto srcIndex = src % RegisterCountFlt;
+ os << "f" << dstIndex << ", a" << srcIndex << std::endl;
+ }
+
+ void Instruction::h_FSUB_M(std::ostream& os) const {
+ auto dstIndex = dst % RegisterCountFlt;
+ auto srcIndex = src % RegistersCount;
+ os << "f" << dstIndex << ", ";
+ genAddressReg(os, srcIndex);
+ os << std::endl;
+ }
+
+ void Instruction::h_FSCAL_R(std::ostream& os) const {
+ auto dstIndex = dst % RegisterCountFlt;
+ os << "f" << dstIndex << std::endl;
+ }
+
+ void Instruction::h_FMUL_R(std::ostream& os) const {
+ auto dstIndex = dst % RegisterCountFlt;
+ auto srcIndex = src % RegisterCountFlt;
+ os << "e" << dstIndex << ", a" << srcIndex << std::endl;
+ }
+
+ void Instruction::h_FDIV_M(std::ostream& os) const {
+ auto dstIndex = dst % RegisterCountFlt;
+ auto srcIndex = src % RegistersCount;
+ os << "e" << dstIndex << ", ";
+ genAddressReg(os, srcIndex);
+ os << std::endl;
+ }
+
+ void Instruction::h_FSQRT_R(std::ostream& os) const {
+ auto dstIndex = dst % RegisterCountFlt;
+ os << "e" << dstIndex << std::endl;
+ }
+
+ void Instruction::h_CFROUND(std::ostream& os) const {
+ auto srcIndex = src % RegistersCount;
+ os << "r" << srcIndex << ", " << (getImm32() & 63) << std::endl;
+ }
+
+ void Instruction::h_CBRANCH(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ os << "r" << dstIndex << ", " << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl;
+ }
+
+ void Instruction::h_ISTORE(std::ostream& os) const {
+ auto dstIndex = dst % RegistersCount;
+ auto srcIndex = src % RegistersCount;
+ genAddressRegDst(os, dstIndex);
+ os << ", r" << srcIndex << std::endl;
+ }
+
+ void Instruction::h_NOP(std::ostream& os) const {
+ os << std::endl;
+ }
+
+#include "instruction_weights.hpp"
+#define INST_NAME(x) REPN(#x, WT(x))
+#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
+
+ const char* Instruction::names[256] = {
+ INST_NAME(IADD_RS)
+ INST_NAME(IADD_M)
+ INST_NAME(ISUB_R)
+ INST_NAME(ISUB_M)
+ INST_NAME(IMUL_R)
+ INST_NAME(IMUL_M)
+ INST_NAME(IMULH_R)
+ INST_NAME(IMULH_M)
+ INST_NAME(ISMULH_R)
+ INST_NAME(ISMULH_M)
+ INST_NAME(IMUL_RCP)
+ INST_NAME(INEG_R)
+ INST_NAME(IXOR_R)
+ INST_NAME(IXOR_M)
+ INST_NAME(IROR_R)
+ INST_NAME(IROL_R)
+ INST_NAME(ISWAP_R)
+ INST_NAME(FSWAP_R)
+ INST_NAME(FADD_R)
+ INST_NAME(FADD_M)
+ INST_NAME(FSUB_R)
+ INST_NAME(FSUB_M)
+ INST_NAME(FSCAL_R)
+ INST_NAME(FMUL_R)
+ INST_NAME(FDIV_M)
+ INST_NAME(FSQRT_R)
+ INST_NAME(CBRANCH)
+ INST_NAME(CFROUND)
+ INST_NAME(ISTORE)
+ INST_NAME(NOP)
+ };
+
+ InstructionFormatter Instruction::engine[256] = {
+ INST_HANDLE(IADD_RS)
+ INST_HANDLE(IADD_M)
+ INST_HANDLE(ISUB_R)
+ INST_HANDLE(ISUB_M)
+ INST_HANDLE(IMUL_R)
+ INST_HANDLE(IMUL_M)
+ INST_HANDLE(IMULH_R)
+ INST_HANDLE(IMULH_M)
+ INST_HANDLE(ISMULH_R)
+ INST_HANDLE(ISMULH_M)
+ INST_HANDLE(IMUL_RCP)
+ INST_HANDLE(INEG_R)
+ INST_HANDLE(IXOR_R)
+ INST_HANDLE(IXOR_M)
+ INST_HANDLE(IROR_R)
+ INST_HANDLE(IROL_R)
+ INST_HANDLE(ISWAP_R)
+ INST_HANDLE(FSWAP_R)
+ INST_HANDLE(FADD_R)
+ INST_HANDLE(FADD_M)
+ INST_HANDLE(FSUB_R)
+ INST_HANDLE(FSUB_M)
+ INST_HANDLE(FSCAL_R)
+ INST_HANDLE(FMUL_R)
+ INST_HANDLE(FDIV_M)
+ INST_HANDLE(FSQRT_R)
+ INST_HANDLE(CBRANCH)
+ INST_HANDLE(CFROUND)
+ INST_HANDLE(ISTORE)
+ INST_HANDLE(NOP)
+ };
+
+}
\ No newline at end of file
diff --git a/src/RandomX/src/instruction.hpp b/src/RandomX/src/instruction.hpp
new file mode 100644
index 000000000..b1863b5ed
--- /dev/null
+++ b/src/RandomX/src/instruction.hpp
@@ -0,0 +1,149 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include
+#include
+#include
+#include "blake2/endian.h"
+
+namespace randomx {
+
+ class Instruction;
+
+ typedef void(Instruction::*InstructionFormatter)(std::ostream&) const;
+
+ enum class InstructionType : uint16_t {
+ IADD_RS = 0,
+ IADD_M = 1,
+ ISUB_R = 2,
+ ISUB_M = 3,
+ IMUL_R = 4,
+ IMUL_M = 5,
+ IMULH_R = 6,
+ IMULH_M = 7,
+ ISMULH_R = 8,
+ ISMULH_M = 9,
+ IMUL_RCP = 10,
+ INEG_R = 11,
+ IXOR_R = 12,
+ IXOR_M = 13,
+ IROR_R = 14,
+ IROL_R = 15,
+ ISWAP_R = 16,
+ FSWAP_R = 17,
+ FADD_R = 18,
+ FADD_M = 19,
+ FSUB_R = 20,
+ FSUB_M = 21,
+ FSCAL_R = 22,
+ FMUL_R = 23,
+ FDIV_M = 24,
+ FSQRT_R = 25,
+ CBRANCH = 26,
+ CFROUND = 27,
+ ISTORE = 28,
+ NOP = 29,
+ };
+
+ class Instruction {
+ public:
+ uint32_t getImm32() const {
+ return load32(&imm32);
+ }
+ void setImm32(uint32_t val) {
+ return store32(&imm32, val);
+ }
+ const char* getName() const {
+ return names[opcode];
+ }
+ friend std::ostream& operator<<(std::ostream& os, const Instruction& i) {
+ i.print(os);
+ return os;
+ }
+ int getModMem() const {
+ return mod % 4; //bits 0-1
+ }
+ int getModShift() const {
+ return (mod >> 2) % 4; //bits 2-3
+ }
+ int getModCond() const {
+ return mod >> 4; //bits 4-7
+ }
+ void setMod(uint8_t val) {
+ mod = val;
+ }
+
+ uint8_t opcode;
+ uint8_t dst;
+ uint8_t src;
+ uint8_t mod;
+ uint32_t imm32;
+ private:
+ void print(std::ostream&) const;
+ static const char* names[256];
+ static InstructionFormatter engine[256];
+ void genAddressReg(std::ostream& os, int) const;
+ void genAddressImm(std::ostream& os) const;
+ void genAddressRegDst(std::ostream&, int) const;
+ void h_IADD_RS(std::ostream&) const;
+ void h_IADD_M(std::ostream&) const;
+ void h_ISUB_R(std::ostream&) const;
+ void h_ISUB_M(std::ostream&) const;
+ void h_IMUL_R(std::ostream&) const;
+ void h_IMUL_M(std::ostream&) const;
+ void h_IMULH_R(std::ostream&) const;
+ void h_IMULH_M(std::ostream&) const;
+ void h_ISMULH_R(std::ostream&) const;
+ void h_ISMULH_M(std::ostream&) const;
+ void h_IMUL_RCP(std::ostream&) const;
+ void h_INEG_R(std::ostream&) const;
+ void h_IXOR_R(std::ostream&) const;
+ void h_IXOR_M(std::ostream&) const;
+ void h_IROR_R(std::ostream&) const;
+ void h_IROL_R(std::ostream&) const;
+ void h_ISWAP_R(std::ostream&) const;
+ void h_FSWAP_R(std::ostream&) const;
+ void h_FADD_R(std::ostream&) const;
+ void h_FADD_M(std::ostream&) const;
+ void h_FSUB_R(std::ostream&) const;
+ void h_FSUB_M(std::ostream&) const;
+ void h_FSCAL_R(std::ostream&) const;
+ void h_FMUL_R(std::ostream&) const;
+ void h_FDIV_M(std::ostream&) const;
+ void h_FSQRT_R(std::ostream&) const;
+ void h_CBRANCH(std::ostream&) const;
+ void h_CFROUND(std::ostream&) const;
+ void h_ISTORE(std::ostream&) const;
+ void h_NOP(std::ostream&) const;
+ };
+
+ static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction");
+ static_assert(std::is_standard_layout(), "randomx::Instruction must be a standard-layout struct");
+}
\ No newline at end of file
diff --git a/src/RandomX/src/instruction_weights.hpp b/src/RandomX/src/instruction_weights.hpp
new file mode 100644
index 000000000..f6c887349
--- /dev/null
+++ b/src/RandomX/src/instruction_weights.hpp
@@ -0,0 +1,73 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#define REP0(x)
+#define REP1(x) x,
+#define REP2(x) REP1(x) x,
+#define REP3(x) REP2(x) x,
+#define REP4(x) REP3(x) x,
+#define REP5(x) REP4(x) x,
+#define REP6(x) REP5(x) x,
+#define REP7(x) REP6(x) x,
+#define REP8(x) REP7(x) x,
+#define REP9(x) REP8(x) x,
+#define REP10(x) REP9(x) x,
+#define REP11(x) REP10(x) x,
+#define REP12(x) REP11(x) x,
+#define REP13(x) REP12(x) x,
+#define REP14(x) REP13(x) x,
+#define REP15(x) REP14(x) x,
+#define REP16(x) REP15(x) x,
+#define REP17(x) REP16(x) x,
+#define REP18(x) REP17(x) x,
+#define REP19(x) REP18(x) x,
+#define REP20(x) REP19(x) x,
+#define REP21(x) REP20(x) x,
+#define REP22(x) REP21(x) x,
+#define REP23(x) REP22(x) x,
+#define REP24(x) REP23(x) x,
+#define REP25(x) REP24(x) x,
+#define REP26(x) REP25(x) x,
+#define REP27(x) REP26(x) x,
+#define REP28(x) REP27(x) x,
+#define REP29(x) REP28(x) x,
+#define REP30(x) REP29(x) x,
+#define REP31(x) REP30(x) x,
+#define REP32(x) REP31(x) x,
+#define REP33(x) REP32(x) x,
+#define REP40(x) REP32(x) REP8(x)
+#define REP64(x) REP32(x) REP32(x)
+#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
+#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x)
+#define REP256(x) REP128(x) REP128(x)
+#define REPNX(x,N) REP##N(x)
+#define REPN(x,N) REPNX(x,N)
+#define NUM(x) x
+#define WT(x) NUM(RANDOMX_FREQ_##x)
diff --git a/src/RandomX/src/instructions_portable.cpp b/src/RandomX/src/instructions_portable.cpp
new file mode 100644
index 000000000..d74672714
--- /dev/null
+++ b/src/RandomX/src/instructions_portable.cpp
@@ -0,0 +1,208 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include
+#include
+#include "common.hpp"
+#include "intrin_portable.h"
+#include "blake2/endian.h"
+
+#if defined(__SIZEOF_INT128__)
+ typedef unsigned __int128 uint128_t;
+ typedef __int128 int128_t;
+ uint64_t mulh(uint64_t a, uint64_t b) {
+ return ((uint128_t)a * b) >> 64;
+ }
+ int64_t smulh(int64_t a, int64_t b) {
+ return ((int128_t)a * b) >> 64;
+ }
+ #define HAVE_MULH
+ #define HAVE_SMULH
+#endif
+
+#if defined(_MSC_VER)
+ #define HAS_VALUE(X) X ## 0
+ #define EVAL_DEFINE(X) HAS_VALUE(X)
+ #include
+ #include
+
+ uint64_t rotl(uint64_t x, unsigned int c) {
+ return _rotl64(x, c);
+ }
+ uint64_t rotr(uint64_t x, unsigned int c) {
+ return _rotr64(x, c);
+ }
+ #define HAVE_ROTL
+ #define HAVE_ROTR
+
+ #if EVAL_DEFINE(__MACHINEARM64_X64(1))
+ uint64_t mulh(uint64_t a, uint64_t b) {
+ return __umulh(a, b);
+ }
+ #define HAVE_MULH
+ #endif
+
+ #if EVAL_DEFINE(__MACHINEX64(1))
+ int64_t smulh(int64_t a, int64_t b) {
+ int64_t hi;
+ _mul128(a, b, &hi);
+ return hi;
+ }
+ #define HAVE_SMULH
+ #endif
+
+ static void setRoundMode_(uint32_t mode) {
+ _controlfp(mode, _MCW_RC);
+ }
+ #define HAVE_SETROUNDMODE_IMPL
+#endif
+
+#ifndef HAVE_SETROUNDMODE_IMPL
+ static void setRoundMode_(uint32_t mode) {
+ fesetround(mode);
+ }
+#endif
+
+#ifndef HAVE_ROTR
+ uint64_t rotr(uint64_t a, unsigned int b) {
+ return (a >> b) | (a << (-b & 63));
+ }
+ #define HAVE_ROTR
+#endif
+
+#ifndef HAVE_ROTL
+ uint64_t rotl(uint64_t a, unsigned int b) {
+ return (a << b) | (a >> (-b & 63));
+ }
+ #define HAVE_ROTL
+#endif
+
+#ifndef HAVE_MULH
+ #define LO(x) ((x)&0xffffffff)
+ #define HI(x) ((x)>>32)
+ uint64_t mulh(uint64_t a, uint64_t b) {
+ uint64_t ah = HI(a), al = LO(a);
+ uint64_t bh = HI(b), bl = LO(b);
+ uint64_t x00 = al * bl;
+ uint64_t x01 = al * bh;
+ uint64_t x10 = ah * bl;
+ uint64_t x11 = ah * bh;
+ uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
+ uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
+ uint64_t m3 = HI(x11) + HI(m2);
+
+ return (m3 << 32) + LO(m2);
+ }
+ #define HAVE_MULH
+#endif
+
+#ifndef HAVE_SMULH
+ int64_t smulh(int64_t a, int64_t b) {
+ int64_t hi = mulh(a, b);
+ if (a < 0LL) hi -= b;
+ if (b < 0LL) hi -= a;
+ return hi;
+ }
+ #define HAVE_SMULH
+#endif
+
+#ifdef RANDOMX_DEFAULT_FENV
+
+void rx_reset_float_state() {
+ setRoundMode_(FE_TONEAREST);
+ rx_set_double_precision(); //set precision to 53 bits if needed by the platform
+}
+
+void rx_set_rounding_mode(uint32_t mode) {
+ switch (mode & 3) {
+ case RoundDown:
+ setRoundMode_(FE_DOWNWARD);
+ break;
+ case RoundUp:
+ setRoundMode_(FE_UPWARD);
+ break;
+ case RoundToZero:
+ setRoundMode_(FE_TOWARDZERO);
+ break;
+ case RoundToNearest:
+ setRoundMode_(FE_TONEAREST);
+ break;
+ default:
+ UNREACHABLE;
+ }
+}
+
+uint32_t rx_get_rounding_mode() {
+ switch (fegetround()) {
+ case FE_DOWNWARD:
+ return RoundDown;
+ case FE_UPWARD:
+ return RoundUp;
+ case FE_TOWARDZERO:
+ return RoundToZero;
+ case FE_TONEAREST:
+ return RoundToNearest;
+ default:
+ UNREACHABLE;
+ }
+}
+
+#endif
+
+#ifdef RANDOMX_USE_X87
+
+#if defined(_MSC_VER) && defined(_M_IX86)
+
+void rx_set_double_precision() {
+ _control87(_PC_53, _MCW_PC);
+}
+
+#elif defined(__i386)
+
+void rx_set_double_precision() {
+ uint16_t volatile x87cw;
+ asm volatile("fstcw %0" : "=m" (x87cw));
+ x87cw &= ~0x300;
+ x87cw |= 0x200;
+ asm volatile("fldcw %0" : : "m" (x87cw));
+}
+
+#endif
+
+#endif //RANDOMX_USE_X87
+
+union double_ser_t {
+ double f;
+ uint64_t i;
+};
+
+double loadDoublePortable(const void* addr) {
+ double_ser_t ds;
+ ds.i = load64(addr);
+ return ds.f;
+}
diff --git a/src/RandomX/src/intrin_portable.h b/src/RandomX/src/intrin_portable.h
new file mode 100644
index 000000000..05f6cd33b
--- /dev/null
+++ b/src/RandomX/src/intrin_portable.h
@@ -0,0 +1,751 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include
+#include "blake2/endian.h"
+
+constexpr int32_t unsigned32ToSigned2sCompl(uint32_t x) {
+ return (-1 == ~0) ? (int32_t)x : (x > INT32_MAX ? (-(int32_t)(UINT32_MAX - x) - 1) : (int32_t)x);
+}
+
+constexpr int64_t unsigned64ToSigned2sCompl(uint64_t x) {
+ return (-1 == ~0) ? (int64_t)x : (x > INT64_MAX ? (-(int64_t)(UINT64_MAX - x) - 1) : (int64_t)x);
+}
+
+constexpr uint64_t signExtend2sCompl(uint32_t x) {
+ return (-1 == ~0) ? (int64_t)(int32_t)(x) : (x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x);
+}
+
+constexpr int RoundToNearest = 0;
+constexpr int RoundDown = 1;
+constexpr int RoundUp = 2;
+constexpr int RoundToZero = 3;
+
+//MSVC doesn't define __SSE2__, so we have to define it manually if SSE2 is available
+#if !defined(__SSE2__) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))
+#define __SSE2__ 1
+#endif
+
+//MSVC doesn't define __AES__
+#if defined(_MSC_VER) && defined(__SSE2__)
+#define __AES__
+#endif
+
+//the library "sqrt" function provided by MSVC for x86 targets doesn't give
+//the correct results, so we have to use inline assembly to call x87 fsqrt directly
+#if !defined(__SSE2__)
+#if defined(_MSC_VER) && defined(_M_IX86)
+inline double __cdecl rx_sqrt(double x) {
+ __asm {
+ fld x
+ fsqrt
+ }
+}
+#define rx_sqrt rx_sqrt
+
+void rx_set_double_precision();
+#define RANDOMX_USE_X87
+
+#elif defined(__i386)
+
+void rx_set_double_precision();
+#define RANDOMX_USE_X87
+
+#endif
+#endif //__SSE2__
+
+#if !defined(rx_sqrt)
+#define rx_sqrt sqrt
+#endif
+
+#if !defined(RANDOMX_USE_X87)
+#define rx_set_double_precision(x)
+#endif
+
+#ifdef __SSE2__
+#ifdef __GNUC__
+#include
+#else
+#include
+#endif
+
+typedef __m128i rx_vec_i128;
+typedef __m128d rx_vec_f128;
+
+#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
+#define rx_aligned_free(a) _mm_free(a)
+#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
+#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
+
+#define rx_load_vec_f128 _mm_load_pd
+#define rx_store_vec_f128 _mm_store_pd
+#define rx_add_vec_f128 _mm_add_pd
+#define rx_sub_vec_f128 _mm_sub_pd
+#define rx_mul_vec_f128 _mm_mul_pd
+#define rx_div_vec_f128 _mm_div_pd
+#define rx_sqrt_vec_f128 _mm_sqrt_pd
+
+FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
+ return _mm_shuffle_pd(a, a, 1);
+}
+
+FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
+ return _mm_castsi128_pd(_mm_set_epi64x(x1, x0));
+}
+
+FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
+ return _mm_castsi128_pd(_mm_set1_epi64x(x));
+}
+
+#define rx_xor_vec_f128 _mm_xor_pd
+#define rx_and_vec_f128 _mm_and_pd
+#define rx_or_vec_f128 _mm_or_pd
+
+#ifdef __AES__
+
+#define rx_aesenc_vec_i128 _mm_aesenc_si128
+#define rx_aesdec_vec_i128 _mm_aesdec_si128
+
+#define HAVE_AES 1
+
+#endif //__AES__
+
+FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
+ return _mm_cvtsi128_si32(a);
+}
+
+FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
+ return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55));
+}
+
+FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
+ return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xaa));
+}
+
+FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
+ return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xff));
+}
+
+#define rx_set_int_vec_i128 _mm_set_epi32
+#define rx_xor_vec_i128 _mm_xor_si128
+#define rx_load_vec_i128 _mm_load_si128
+#define rx_store_vec_i128 _mm_store_si128
+
+FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
+ __m128i ix = _mm_loadl_epi64((const __m128i*)addr);
+ return _mm_cvtepi32_pd(ix);
+}
+
+constexpr uint32_t rx_mxcsr_default = 0x9FC0; //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled
+
+FORCE_INLINE void rx_reset_float_state() {
+ _mm_setcsr(rx_mxcsr_default);
+}
+
+FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
+ _mm_setcsr(rx_mxcsr_default | (mode << 13));
+}
+
+FORCE_INLINE uint32_t rx_get_rounding_mode() {
+ return (_mm_getcsr() >> 13) & 3;
+}
+
+#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
+#include
+#include
+#include
+#include
+#undef vector
+#undef pixel
+#undef bool
+
+typedef __vector uint8_t __m128i;
+typedef __vector uint32_t __m128l;
+typedef __vector int __m128li;
+typedef __vector uint64_t __m128ll;
+typedef __vector double __m128d;
+
+typedef __m128i rx_vec_i128;
+typedef __m128d rx_vec_f128;
+typedef union{
+ rx_vec_i128 i;
+ rx_vec_f128 d;
+ uint64_t u64[2];
+ double d64[2];
+ uint32_t u32[4];
+ int i32[4];
+} vec_u;
+
+#define rx_aligned_alloc(a, b) malloc(a)
+#define rx_aligned_free(a) free(a)
+#define rx_prefetch_nta(x)
+#define rx_prefetch_t0(x)
+
+/* Splat 64-bit long long to 2 64-bit long longs */
+FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
+{ return (__m128i) vec_splats (scalar); }
+
+FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ return (rx_vec_f128)vec_vsx_ld(0,pd);
+#else
+ vec_u t;
+ t.u64[0] = load64(pd + 0);
+ t.u64[1] = load64(pd + 1);
+ return (rx_vec_f128)t.d;
+#endif
+}
+
+FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ vec_vsx_st(a,0,(rx_vec_f128*)mem_addr);
+#else
+ vec_u _a;
+ _a.d = a;
+ store64(mem_addr + 0, _a.u64[0]);
+ store64(mem_addr + 1, _a.u64[1]);
+#endif
+}
+
+FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
+ return (rx_vec_f128)vec_perm((__m128i)a,(__m128i)a,(__m128i){8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7});
+}
+
+FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ return (rx_vec_f128)vec_add(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ return (rx_vec_f128)vec_sub(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ return (rx_vec_f128)vec_mul(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ return (rx_vec_f128)vec_div(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
+ return (rx_vec_f128)vec_sqrt(a);
+}
+
+FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) {
+ return (rx_vec_i128)vec_splat2sd(a);
+}
+
+FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) {
+ return (rx_vec_f128)a;
+}
+
+FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
+ return (rx_vec_f128)(__m128ll){x0,x1};
+}
+
+FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
+ return (rx_vec_f128)vec_splat2sd(x);
+}
+
+FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ return (rx_vec_f128)vec_xor(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ return (rx_vec_f128)vec_and(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ return (rx_vec_f128)vec_or(a,b);
+}
+
+#if defined(__CRYPTO__)
+
+FORCE_INLINE __m128ll vrev(__m128i v){
+#if defined(NATIVE_LITTLE_ENDIAN)
+ return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0});
+#else
+ return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12});
+#endif
+}
+
+FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
+ __m128ll _v = vrev(v);
+ __m128ll _rkey = vrev(rkey);
+ __m128ll result = vrev((__m128i)__builtin_crypto_vcipher(_v,_rkey));
+ return (rx_vec_i128)result;
+}
+
+FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
+ __m128ll _v = vrev(v);
+ __m128ll zero = (__m128ll){0};
+ __m128ll out = vrev((__m128i)__builtin_crypto_vncipher(_v,zero));
+ return (rx_vec_i128)vec_xor((__m128i)out,rkey);
+}
+#define HAVE_AES 1
+
+#endif //__CRYPTO__
+
+FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
+ vec_u _a;
+ _a.i = a;
+ return _a.i32[0];
+}
+
+FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
+ vec_u _a;
+ _a.i = a;
+ return _a.i32[1];
+}
+
+FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
+ vec_u _a;
+ _a.i = a;
+ return _a.i32[2];
+}
+
+FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
+ vec_u _a;
+ _a.i = a;
+ return _a.i32[3];
+}
+
+FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
+ return (rx_vec_i128)((__m128li){_I0,_I1,_I2,_I3});
+};
+
+FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) {
+ return (rx_vec_i128)vec_xor(_A,_B);
+}
+
+FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *_P) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ return *_P;
+#else
+ uint32_t* ptr = (uint32_t*)_P;
+ vec_u c;
+ c.u32[0] = load32(ptr + 0);
+ c.u32[1] = load32(ptr + 1);
+ c.u32[2] = load32(ptr + 2);
+ c.u32[3] = load32(ptr + 3);
+ return (rx_vec_i128)c.i;
+#endif
+}
+
+FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ *_P = _B;
+#else
+ uint32_t* ptr = (uint32_t*)_P;
+ vec_u B;
+ B.i = _B;
+ store32(ptr + 0, B.u32[0]);
+ store32(ptr + 1, B.u32[1]);
+ store32(ptr + 2, B.u32[2]);
+ store32(ptr + 3, B.u32[3]);
+#endif
+}
+
+FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
+ vec_u x;
+ x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
+ x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
+ return (rx_vec_f128)x.d;
+}
+
+#define RANDOMX_DEFAULT_FENV
+
+#elif defined(__aarch64__)
+
+#include
+#include
+#include
+
+typedef uint8x16_t rx_vec_i128;
+typedef float64x2_t rx_vec_f128;
+
+inline void* rx_aligned_alloc(size_t size, size_t align) {
+ void* p;
+ if (posix_memalign(&p, align, size) == 0)
+ return p;
+
+ return 0;
+};
+
+#define rx_aligned_free(a) free(a)
+
+inline void rx_prefetch_nta(void* ptr) {
+ asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
+}
+
+inline void rx_prefetch_t0(const void* ptr) {
+ asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
+}
+
+FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
+ return vld1q_f64((const float64_t*)pd);
+}
+
+FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) {
+ vst1q_f64((float64_t*)mem_addr, val);
+}
+
+FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
+ float64x2_t temp;
+ temp = vcopyq_laneq_f64(temp, 1, a, 1);
+ a = vcopyq_laneq_f64(a, 1, a, 0);
+ return vcopyq_laneq_f64(a, 0, temp, 1);
+}
+
+FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
+ uint64x2_t temp0 = vdupq_n_u64(x0);
+ uint64x2_t temp1 = vdupq_n_u64(x1);
+ return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0));
+}
+
+FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
+ return vreinterpretq_f64_u64(vdupq_n_u64(x));
+}
+
+#define rx_add_vec_f128 vaddq_f64
+#define rx_sub_vec_f128 vsubq_f64
+#define rx_mul_vec_f128 vmulq_f64
+#define rx_div_vec_f128 vdivq_f64
+#define rx_sqrt_vec_f128 vsqrtq_f64
+
+FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
+}
+
+FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
+}
+
+FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+
+
+FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
+ const uint8x16_t zero = { 0 };
+ return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key;
+}
+
+FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
+ const uint8x16_t zero = { 0 };
+ return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key;
+}
+
+#define HAVE_AES 1
+
+#endif
+
+#define rx_xor_vec_i128 veorq_u8
+
+FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
+ return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0);
+}
+
+FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
+ return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1);
+}
+
+FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
+ return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2);
+}
+
+FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
+ return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3);
+}
+
+FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
+ int32_t data[4];
+ data[0] = _I0;
+ data[1] = _I1;
+ data[2] = _I2;
+ data[3] = _I3;
+ return vreinterpretq_u8_s32(vld1q_s32(data));
+};
+
+#define rx_xor_vec_i128 veorq_u8
+
+FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) {
+ return vld1q_u8((const uint8_t*)mem_addr);
+}
+
+FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) {
+ vst1q_u8((uint8_t*)mem_addr, val);
+}
+
+FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
+ double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
+ double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
+ rx_vec_f128 x;
+ x = vsetq_lane_f64(lo, x, 0);
+ x = vsetq_lane_f64(hi, x, 1);
+ return x;
+}
+
+#define RANDOMX_DEFAULT_FENV
+
+#else //portable fallback
+
+#include
+#include
+#include
+#include
+
+typedef union {
+ uint64_t u64[2];
+ uint32_t u32[4];
+ uint16_t u16[8];
+ uint8_t u8[16];
+} rx_vec_i128;
+
+typedef union {
+ struct {
+ double lo;
+ double hi;
+ };
+ rx_vec_i128 i;
+} rx_vec_f128;
+
+#define rx_aligned_alloc(a, b) malloc(a)
+#define rx_aligned_free(a) free(a)
+#define rx_prefetch_nta(x)
+#define rx_prefetch_t0(x)
+
+FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
+ rx_vec_f128 x;
+ x.i.u64[0] = load64(pd + 0);
+ x.i.u64[1] = load64(pd + 1);
+ return x;
+}
+
+FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
+ store64(mem_addr + 0, a.i.u64[0]);
+ store64(mem_addr + 1, a.i.u64[1]);
+}
+
+FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
+ double temp = a.hi;
+ a.hi = a.lo;
+ a.lo = temp;
+ return a;
+}
+
+FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ rx_vec_f128 x;
+ x.lo = a.lo + b.lo;
+ x.hi = a.hi + b.hi;
+ return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ rx_vec_f128 x;
+ x.lo = a.lo - b.lo;
+ x.hi = a.hi - b.hi;
+ return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ rx_vec_f128 x;
+ x.lo = a.lo * b.lo;
+ x.hi = a.hi * b.hi;
+ return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ rx_vec_f128 x;
+ x.lo = a.lo / b.lo;
+ x.hi = a.hi / b.hi;
+ return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
+ rx_vec_f128 x;
+ x.lo = rx_sqrt(a.lo);
+ x.hi = rx_sqrt(a.hi);
+ return x;
+}
+
+FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) {
+ rx_vec_i128 x;
+ x.u64[0] = a;
+ x.u64[1] = a;
+ return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) {
+ rx_vec_f128 x;
+ x.i = a;
+ return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
+ rx_vec_f128 v;
+ v.i.u64[0] = x0;
+ v.i.u64[1] = x1;
+ return v;
+}
+
+FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
+ rx_vec_f128 v;
+ v.i.u64[0] = x;
+ v.i.u64[1] = x;
+ return v;
+}
+
+FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ rx_vec_f128 x;
+ x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0];
+ x.i.u64[1] = a.i.u64[1] ^ b.i.u64[1];
+ return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ rx_vec_f128 x;
+ x.i.u64[0] = a.i.u64[0] & b.i.u64[0];
+ x.i.u64[1] = a.i.u64[1] & b.i.u64[1];
+ return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+ rx_vec_f128 x;
+ x.i.u64[0] = a.i.u64[0] | b.i.u64[0];
+ x.i.u64[1] = a.i.u64[1] | b.i.u64[1];
+ return x;
+}
+
+FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
+ return a.u32[0];
+}
+
+FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
+ return a.u32[1];
+}
+
+FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
+ return a.u32[2];
+}
+
+FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
+ return a.u32[3];
+}
+
+FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
+ rx_vec_i128 v;
+ v.u32[0] = _I0;
+ v.u32[1] = _I1;
+ v.u32[2] = _I2;
+ v.u32[3] = _I3;
+ return v;
+};
+
+FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) {
+ rx_vec_i128 c;
+ c.u32[0] = _A.u32[0] ^ _B.u32[0];
+ c.u32[1] = _A.u32[1] ^ _B.u32[1];
+ c.u32[2] = _A.u32[2] ^ _B.u32[2];
+ c.u32[3] = _A.u32[3] ^ _B.u32[3];
+ return c;
+}
+
+FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const*_P) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ return *_P;
+#else
+ uint32_t* ptr = (uint32_t*)_P;
+ rx_vec_i128 c;
+ c.u32[0] = load32(ptr + 0);
+ c.u32[1] = load32(ptr + 1);
+ c.u32[2] = load32(ptr + 2);
+ c.u32[3] = load32(ptr + 3);
+ return c;
+#endif
+}
+
+FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ *_P = _B;
+#else
+ uint32_t* ptr = (uint32_t*)_P;
+ store32(ptr + 0, _B.u32[0]);
+ store32(ptr + 1, _B.u32[1]);
+ store32(ptr + 2, _B.u32[2]);
+ store32(ptr + 3, _B.u32[3]);
+#endif
+}
+
+FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
+ rx_vec_f128 x;
+ x.lo = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
+ x.hi = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
+ return x;
+}
+
+#define RANDOMX_DEFAULT_FENV
+
+#endif
+
+#ifndef HAVE_AES
+static const char* platformError = "Platform doesn't support hardware AES";
+
+#include
+
+FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
+ throw std::runtime_error(platformError);
+}
+
+FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
+ throw std::runtime_error(platformError);
+}
+
+#define HAVE_AES 0
+
+#endif
+
+#ifdef RANDOMX_DEFAULT_FENV
+
+void rx_reset_float_state();
+
+void rx_set_rounding_mode(uint32_t mode);
+
+uint32_t rx_get_rounding_mode();
+
+#endif
+
+double loadDoublePortable(const void* addr);
+uint64_t mulh(uint64_t, uint64_t);
+int64_t smulh(int64_t, int64_t);
+uint64_t rotl(uint64_t, unsigned int);
+uint64_t rotr(uint64_t, unsigned int);
diff --git a/src/RandomX/src/jit_compiler.hpp b/src/RandomX/src/jit_compiler.hpp
new file mode 100644
index 000000000..17fdad4e3
--- /dev/null
+++ b/src/RandomX/src/jit_compiler.hpp
@@ -0,0 +1,41 @@
+/*
+Copyright (c) 2018-2019, tevador
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#if defined(_M_X64) || defined(__x86_64__)
+#include "jit_compiler_x86.hpp"
+#elif defined(__aarch64__)
+#include "jit_compiler_a64.hpp"
+#else
+#include "jit_compiler_fallback.hpp"
+#endif
+
+#if defined(__OpenBSD__) || defined(__NetBSD__) || (defined(__APPLE__) && defined(__aarch64__))
+#define RANDOMX_FORCE_SECURE
+#endif
diff --git a/src/RandomX/src/jit_compiler_a64.cpp b/src/RandomX/src/jit_compiler_a64.cpp
new file mode 100644
index 000000000..e45774e93
--- /dev/null
+++ b/src/RandomX/src/jit_compiler_a64.cpp
@@ -0,0 +1,1072 @@
+/*
+Copyright (c) 2018-2019, tevador
+Copyright (c) 2019, SChernykh
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "jit_compiler_a64.hpp"
+#include "superscalar.hpp"
+#include "program.hpp"
+#include "reciprocal.h"
+#include "virtual_memory.hpp"
+
+namespace ARMV8A {
+
+constexpr uint32_t B = 0x14000000;
+constexpr uint32_t EOR = 0xCA000000;
+constexpr uint32_t EOR32 = 0x4A000000;
+constexpr uint32_t ADD = 0x8B000000;
+constexpr uint32_t SUB = 0xCB000000;
+constexpr uint32_t MUL = 0x9B007C00;
+constexpr uint32_t UMULH = 0x9BC07C00;
+constexpr uint32_t SMULH = 0x9B407C00;
+constexpr uint32_t MOVZ = 0xD2800000;
+constexpr uint32_t MOVN = 0x92800000;
+constexpr uint32_t MOVK = 0xF2800000;
+constexpr uint32_t ADD_IMM_LO = 0x91000000;
+constexpr uint32_t ADD_IMM_HI = 0x91400000;
+constexpr uint32_t LDR_LITERAL = 0x58000000;
+constexpr uint32_t ROR = 0x9AC02C00;
+constexpr uint32_t ROR_IMM = 0x93C00000;
+constexpr uint32_t MOV_REG = 0xAA0003E0;
+constexpr uint32_t MOV_VREG_EL = 0x6E080400;
+constexpr uint32_t FADD = 0x4E60D400;
+constexpr uint32_t FSUB = 0x4EE0D400;
+constexpr uint32_t FEOR = 0x6E201C00;
+constexpr uint32_t FMUL = 0x6E60DC00;
+constexpr uint32_t FDIV = 0x6E60FC00;
+constexpr uint32_t FSQRT = 0x6EE1F800;
+
+}
+
+namespace randomx {
+
+static const size_t CodeSize = ((uint8_t*)randomx_init_dataset_aarch64_end) - ((uint8_t*)randomx_program_aarch64);
+static const size_t MainLoopBegin = ((uint8_t*)randomx_program_aarch64_main_loop) - ((uint8_t*)randomx_program_aarch64);
+static const size_t PrologueSize = ((uint8_t*)randomx_program_aarch64_vm_instructions) - ((uint8_t*)randomx_program_aarch64);
+static const size_t ImulRcpLiteralsEnd = ((uint8_t*)randomx_program_aarch64_imul_rcp_literals_end) - ((uint8_t*)randomx_program_aarch64);
+
+static const size_t CalcDatasetItemSize =
+ // Prologue
+ ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) +
+ // Main loop
+ RANDOMX_CACHE_ACCESSES * (
+ // Main loop prologue
+ ((uint8_t*)randomx_calc_dataset_item_aarch64_mix - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch)) + 4 +
+ // Inner main loop (instructions)
+ ((RANDOMX_SUPERSCALAR_LATENCY * 3) + 2) * 16 +
+ // Main loop epilogue
+ ((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4
+ ) +
+ // Epilogue
+ ((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result);
+
+constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
+
+template static constexpr size_t Log2(T value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
+
+JitCompilerA64::JitCompilerA64()
+ : code((uint8_t*) allocMemoryPages(CodeSize + CalcDatasetItemSize))
+ , literalPos(ImulRcpLiteralsEnd)
+ , num32bitLiterals(0)
+{
+ memset(reg_changed_offset, 0, sizeof(reg_changed_offset));
+ memcpy(code, (void*) randomx_program_aarch64, CodeSize);
+
+#ifdef __GNUC__
+ __builtin___clear_cache(reinterpret_cast(code), reinterpret_cast(code + CodeSize));
+#endif
+}
+
+JitCompilerA64::~JitCompilerA64()
+{
+ freePagedMemory(code, CodeSize + CalcDatasetItemSize);
+}
+
+void JitCompilerA64::enableWriting()
+{
+ setPagesRW(code, CodeSize + CalcDatasetItemSize);
+}
+
+void JitCompilerA64::enableExecution()
+{
+ setPagesRX(code, CodeSize + CalcDatasetItemSize);
+}
+
+void JitCompilerA64::enableAll()
+{
+ setPagesRWX(code, CodeSize + CalcDatasetItemSize);
+}
+
+void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config)
+{
+ uint32_t codePos = MainLoopBegin + 4;
+
+ // and w16, w10, ScratchpadL3Mask64
+ emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
+
+ // and w17, w18, ScratchpadL3Mask64
+ emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
+
+ codePos = PrologueSize;
+ literalPos = ImulRcpLiteralsEnd;
+ num32bitLiterals = 0;
+
+ for (uint32_t i = 0; i < RegistersCount; ++i)
+ reg_changed_offset[i] = codePos;
+
+ for (uint32_t i = 0; i < program.getSize(); ++i)
+ {
+ Instruction& instr = program(i);
+ instr.src %= RegistersCount;
+ instr.dst %= RegistersCount;
+ (this->*engine[instr.opcode])(instr, codePos);
+ }
+
+ // Update spMix2
+ // eor w18, config.readReg2, config.readReg3
+ emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
+
+ // Jump back to the main loop
+ const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
+ emit32(ARMV8A::B | (offset / 4), code, codePos);
+
+ // and w18, w18, CacheLineAlignMask
+ codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
+ emit32(0x121A0000 | 18 | (18 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
+
+ // and w10, w10, CacheLineAlignMask
+ codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
+ emit32(0x121A0000 | 10 | (10 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
+
+ // Update spMix1
+ // eor x10, config.readReg0, config.readReg1
+ codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64);
+ emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
+
+#ifdef __GNUC__
+ __builtin___clear_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos));
+#endif
+}
+
+void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset)
+{
+ uint32_t codePos = MainLoopBegin + 4;
+
+ // and w16, w10, ScratchpadL3Mask64
+ emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
+
+ // and w17, w18, ScratchpadL3Mask64
+ emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
+
+ codePos = PrologueSize;
+ literalPos = ImulRcpLiteralsEnd;
+ num32bitLiterals = 0;
+
+ for (uint32_t i = 0; i < RegistersCount; ++i)
+ reg_changed_offset[i] = codePos;
+
+ for (uint32_t i = 0; i < program.getSize(); ++i)
+ {
+ Instruction& instr = program(i);
+ instr.src %= RegistersCount;
+ instr.dst %= RegistersCount;
+ (this->*engine[instr.opcode])(instr, codePos);
+ }
+
+ // Update spMix2
+ // eor w18, config.readReg2, config.readReg3
+ emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
+
+ // Jump back to the main loop
+ const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
+ emit32(ARMV8A::B | (offset / 4), code, codePos);
+
+ // and w2, w9, CacheLineAlignMask
+ codePos = (((uint8_t*)randomx_program_aarch64_light_cacheline_align_mask) - ((uint8_t*)randomx_program_aarch64));
+ emit32(0x121A0000 | 2 | (9 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
+
+ // Update spMix1
+ // eor x10, config.readReg0, config.readReg1
+ codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64);
+ emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
+
+ // Apply dataset offset
+ codePos = ((uint8_t*)randomx_program_aarch64_light_dataset_offset) - ((uint8_t*)randomx_program_aarch64);
+
+ datasetOffset /= CacheLineSize;
+ const uint32_t imm_lo = datasetOffset & ((1 << 12) - 1);
+ const uint32_t imm_hi = datasetOffset >> 12;
+
+ emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos);
+ emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos);
+
+#ifdef __GNUC__
+ __builtin___clear_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos));
+#endif
+}
+
+template
+void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &reciprocalCache)
+{
+ uint32_t codePos = CodeSize;
+
+ uint8_t* p1 = (uint8_t*)randomx_calc_dataset_item_aarch64;
+ uint8_t* p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_prefetch;
+ memcpy(code + codePos, p1, p2 - p1);
+ codePos += p2 - p1;
+
+ num32bitLiterals = 64;
+ constexpr uint32_t tmp_reg = 12;
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ // and x11, x10, CacheSize / CacheLineSize - 1
+ emit32(0x92400000 | 11 | (10 << 5) | ((Log2(CacheSize / CacheLineSize) - 1) << 10), code, codePos);
+
+ p1 = ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch) + 4;
+ p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_mix;
+ memcpy(code + codePos, p1, p2 - p1);
+ codePos += p2 - p1;
+
+ SuperscalarProgram& prog = programs[i];
+ const size_t progSize = prog.getSize();
+
+ uint32_t jmp_pos = codePos;
+ codePos += 4;
+
+ // Fill in literal pool
+ for (size_t j = 0; j < progSize; ++j)
+ {
+ const Instruction& instr = prog(j);
+ if (static_cast(instr.opcode) == randomx::SuperscalarInstructionType::IMUL_RCP)
+ emit64(reciprocalCache[instr.getImm32()], code, codePos);
+ }
+
+ // Jump over literal pool
+ uint32_t literal_pos = jmp_pos;
+ emit32(ARMV8A::B | ((codePos - jmp_pos) / 4), code, literal_pos);
+
+ for (size_t j = 0; j < progSize; ++j)
+ {
+ const Instruction& instr = prog(j);
+ const uint32_t src = instr.src;
+ const uint32_t dst = instr.dst;
+
+ switch (static_cast(instr.opcode))
+ {
+ case randomx::SuperscalarInstructionType::ISUB_R:
+ emit32(ARMV8A::SUB | dst | (dst << 5) | (src << 16), code, codePos);
+ break;
+ case randomx::SuperscalarInstructionType::IXOR_R:
+ emit32(ARMV8A::EOR | dst | (dst << 5) | (src << 16), code, codePos);
+ break;
+ case randomx::SuperscalarInstructionType::IADD_RS:
+ emit32(ARMV8A::ADD | dst | (dst << 5) | (instr.getModShift() << 10) | (src << 16), code, codePos);
+ break;
+ case randomx::SuperscalarInstructionType::IMUL_R:
+ emit32(ARMV8A::MUL | dst | (dst << 5) | (src << 16), code, codePos);
+ break;
+ case randomx::SuperscalarInstructionType::IROR_C:
+ emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos);
+ break;
+ case randomx::SuperscalarInstructionType::IADD_C7:
+ case randomx::SuperscalarInstructionType::IADD_C8:
+ case randomx::SuperscalarInstructionType::IADD_C9:
+ emitAddImmediate(dst, dst, instr.getImm32(), code, codePos);
+ break;
+ case randomx::SuperscalarInstructionType::IXOR_C7:
+ case randomx::SuperscalarInstructionType::IXOR_C8:
+ case randomx::SuperscalarInstructionType::IXOR_C9:
+ emitMovImmediate(tmp_reg, instr.getImm32(), code, codePos);
+ emit32(ARMV8A::EOR | dst | (dst << 5) | (tmp_reg << 16), code, codePos);
+ break;
+ case randomx::SuperscalarInstructionType::IMULH_R:
+ emit32(ARMV8A::UMULH | dst | (dst << 5) | (src << 16), code, codePos);
+ break;
+ case randomx::SuperscalarInstructionType::ISMULH_R:
+ emit32(ARMV8A::SMULH | dst | (dst << 5) | (src << 16), code, codePos);
+ break;
+ case randomx::SuperscalarInstructionType::IMUL_RCP:
+ {
+ int32_t offset = (literal_pos - codePos) / 4;
+ offset &= (1 << 19) - 1;
+ literal_pos += 8;
+
+ // ldr tmp_reg, reciprocal
+ emit32(ARMV8A::LDR_LITERAL | tmp_reg | (offset << 5), code, codePos);
+
+ // mul dst, dst, tmp_reg
+ emit32(ARMV8A::MUL | dst | (dst << 5) | (tmp_reg << 16), code, codePos);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ p1 = (uint8_t*)randomx_calc_dataset_item_aarch64_mix;
+ p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_store_result;
+ memcpy(code + codePos, p1, p2 - p1);
+ codePos += p2 - p1;
+
+ // Update registerValue
+ emit32(ARMV8A::MOV_REG | 10 | (prog.getAddressRegister() << 16), code, codePos);
+ }
+
+ p1 = (uint8_t*)randomx_calc_dataset_item_aarch64_store_result;
+ p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_end;
+ memcpy(code + codePos, p1, p2 - p1);
+ codePos += p2 - p1;
+
+#ifdef __GNUC__
+ __builtin___clear_cache(reinterpret_cast(code + CodeSize), reinterpret_cast(code + codePos));
+#endif
+}
+
+template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector &reciprocalCache);
+
+DatasetInitFunc* JitCompilerA64::getDatasetInitFunc()
+{
+ return (DatasetInitFunc*)(code + (((uint8_t*)randomx_init_dataset_aarch64) - ((uint8_t*)randomx_program_aarch64)));
+}
+
+size_t JitCompilerA64::getCodeSize()
+{
+ return CodeSize;
+}
+
+void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ if (imm < (1 << 16))
+ {
+ // movz tmp_reg, imm32 (16 low bits)
+ emit32(ARMV8A::MOVZ | dst | (imm << 5), code, k);
+ }
+ else
+ {
+ if (num32bitLiterals < 64)
+ {
+ if (static_cast(imm) < 0)
+ {
+ // smov dst, vN.s[M]
+ emit32(0x4E042C00 | dst | ((num32bitLiterals / 4) << 5) | ((num32bitLiterals % 4) << 19), code, k);
+ }
+ else
+ {
+ // umov dst, vN.s[M]
+ emit32(0x0E043C00 | dst | ((num32bitLiterals / 4) << 5) | ((num32bitLiterals % 4) << 19), code, k);
+ }
+
+ ((uint32_t*)(code + ImulRcpLiteralsEnd))[num32bitLiterals] = imm;
+ ++num32bitLiterals;
+ }
+ else
+ {
+ if (static_cast(imm) < 0)
+ {
+ // movn tmp_reg, ~imm32 (16 high bits)
+ emit32(ARMV8A::MOVN | dst | (1 << 21) | ((~imm >> 16) << 5), code, k);
+ }
+ else
+ {
+ // movz tmp_reg, imm32 (16 high bits)
+ emit32(ARMV8A::MOVZ | dst | (1 << 21) | ((imm >> 16) << 5), code, k);
+ }
+
+ // movk tmp_reg, imm32 (16 low bits)
+ emit32(ARMV8A::MOVK | dst | ((imm & 0xFFFF) << 5), code, k);
+ }
+ }
+
+ codePos = k;
+}
+
+void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ if (imm < (1 << 24))
+ {
+ const uint32_t imm_lo = imm & ((1 << 12) - 1);
+ const uint32_t imm_hi = imm >> 12;
+
+ if (imm_lo && imm_hi)
+ {
+ emit32(ARMV8A::ADD_IMM_LO | dst | (src << 5) | (imm_lo << 10), code, k);
+ emit32(ARMV8A::ADD_IMM_HI | dst | (dst << 5) | (imm_hi << 10), code, k);
+ }
+ else if (imm_lo)
+ {
+ emit32(ARMV8A::ADD_IMM_LO | dst | (src << 5) | (imm_lo << 10), code, k);
+ }
+ else
+ {
+ emit32(ARMV8A::ADD_IMM_HI | dst | (src << 5) | (imm_hi << 10), code, k);
+ }
+ }
+ else
+ {
+ constexpr uint32_t tmp_reg = 18;
+ emitMovImmediate(tmp_reg, imm, code, k);
+
+ // add dst, src, tmp_reg
+ emit32(ARMV8A::ADD | dst | (src << 5) | (tmp_reg << 16), code, k);
+ }
+
+ codePos = k;
+}
+
+template
+void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ uint32_t imm = instr.getImm32();
+
+ if (src != dst)
+ {
+ imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1);
+ emitAddImmediate(tmp_reg, src, imm, code, k);
+
+ constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
+ constexpr uint32_t andInstrL1 = t | ((Log2(RANDOMX_SCRATCHPAD_L1) - 4) << 10);
+ constexpr uint32_t andInstrL2 = t | ((Log2(RANDOMX_SCRATCHPAD_L2) - 4) << 10);
+
+ emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k);
+
+ // ldr tmp_reg, [x2, tmp_reg]
+ emit32(0xf8606840 | tmp_reg | (tmp_reg << 16), code, k);
+ }
+ else
+ {
+ imm = (imm & ScratchpadL3Mask) >> 3;
+ emitMovImmediate(tmp_reg, imm, code, k);
+
+ // ldr tmp_reg, [x2, tmp_reg, lsl 3]
+ emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
+ }
+
+ codePos = k;
+}
+
+template
+void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ uint32_t imm = instr.getImm32();
+ constexpr uint32_t tmp_reg = 18;
+
+ imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1);
+ emitAddImmediate(tmp_reg, src, imm, code, k);
+
+ constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
+ constexpr uint32_t andInstrL1 = t | ((Log2(RANDOMX_SCRATCHPAD_L1) - 4) << 10);
+ constexpr uint32_t andInstrL2 = t | ((Log2(RANDOMX_SCRATCHPAD_L2) - 4) << 10);
+
+ emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k);
+
+ // add tmp_reg, x2, tmp_reg
+ emit32(ARMV8A::ADD | tmp_reg | (2 << 5) | (tmp_reg << 16), code, k);
+
+ // ldpsw tmp_reg, tmp_reg + 1, [tmp_reg]
+ emit32(0x69400000 | tmp_reg | (tmp_reg << 5) | ((tmp_reg + 1) << 10), code, k);
+
+ // ins tmp_reg_fp.d[0], tmp_reg
+ emit32(0x4E081C00 | tmp_reg_fp | (tmp_reg << 5), code, k);
+
+ // ins tmp_reg_fp.d[1], tmp_reg + 1
+ emit32(0x4E181C00 | tmp_reg_fp | ((tmp_reg + 1) << 5), code, k);
+
+ // scvtf tmp_reg_fp.2d, tmp_reg_fp.2d
+ emit32(0x4E61D800 | tmp_reg_fp | (tmp_reg_fp << 5), code, k);
+
+ codePos = k;
+}
+
+void JitCompilerA64::h_IADD_RS(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+ const uint32_t shift = instr.getModShift();
+
+ // add dst, src << shift
+ emit32(ARMV8A::ADD | dst | (dst << 5) | (shift << 10) | (src << 16), code, k);
+
+ if (instr.dst == RegisterNeedsDisplacement)
+ emitAddImmediate(dst, dst, instr.getImm32(), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ constexpr uint32_t tmp_reg = 18;
+ emitMemLoad(dst, src, instr, code, k);
+
+ // add dst, dst, tmp_reg
+ emit32(ARMV8A::ADD | dst | (dst << 5) | (tmp_reg << 16), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_ISUB_R(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ if (src != dst)
+ {
+ // sub dst, dst, src
+ emit32(ARMV8A::SUB | dst | (dst << 5) | (src << 16), code, k);
+ }
+ else
+ {
+ emitAddImmediate(dst, dst, -instr.getImm32(), code, k);
+ }
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ constexpr uint32_t tmp_reg = 18;
+ emitMemLoad(dst, src, instr, code, k);
+
+ // sub dst, dst, tmp_reg
+ emit32(ARMV8A::SUB | dst | (dst << 5) | (tmp_reg << 16), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ if (src == dst)
+ {
+ src = 18;
+ emitMovImmediate(src, instr.getImm32(), code, k);
+ }
+
+ // mul dst, dst, src
+ emit32(ARMV8A::MUL | dst | (dst << 5) | (src << 16), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ constexpr uint32_t tmp_reg = 18;
+ emitMemLoad(dst, src, instr, code, k);
+
+ // sub dst, dst, tmp_reg
+ emit32(ARMV8A::MUL | dst | (dst << 5) | (tmp_reg << 16), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_IMULH_R(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ // umulh dst, dst, src
+ emit32(ARMV8A::UMULH | dst | (dst << 5) | (src << 16), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ constexpr uint32_t tmp_reg = 18;
+ emitMemLoad(dst, src, instr, code, k);
+
+ // umulh dst, dst, tmp_reg
+ emit32(ARMV8A::UMULH | dst | (dst << 5) | (tmp_reg << 16), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_ISMULH_R(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ // smulh dst, dst, src
+ emit32(ARMV8A::SMULH | dst | (dst << 5) | (src << 16), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ constexpr uint32_t tmp_reg = 18;
+ emitMemLoad(dst, src, instr, code, k);
+
+ // smulh dst, dst, tmp_reg
+ emit32(ARMV8A::SMULH | dst | (dst << 5) | (tmp_reg << 16), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
+{
+ const uint64_t divisor = instr.getImm32();
+ if (isZeroOrPowerOf2(divisor))
+ return;
+
+ uint32_t k = codePos;
+
+ constexpr uint32_t tmp_reg = 18;
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ constexpr uint64_t N = 1ULL << 63;
+ const uint64_t q = N / divisor;
+ const uint64_t r = N % divisor;
+#ifdef __GNUC__
+ const uint64_t shift = 64 - __builtin_clzll(divisor);
+#else
+ uint64_t shift = 32;
+ for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
+ --shift;
+#endif
+
+ const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t);
+
+ literalPos -= sizeof(uint64_t);
+ *(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
+
+ if (literal_id < 13)
+ {
+ static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };
+
+ // mul dst, dst, literal_reg
+ emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
+ }
+ else
+ {
+ // ldr tmp_reg, reciprocal
+ const uint32_t offset = (literalPos - k) / 4;
+ emit32(ARMV8A::LDR_LITERAL | tmp_reg | (offset << 5), code, k);
+
+ // mul dst, dst, tmp_reg
+ emit32(ARMV8A::MUL | dst | (dst << 5) | (tmp_reg << 16), code, k);
+ }
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_INEG_R(Instruction& instr, uint32_t& codePos)
+{
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ // sub dst, xzr, dst
+ emit32(ARMV8A::SUB | dst | (31 << 5) | (dst << 16), code, codePos);
+
+ reg_changed_offset[instr.dst] = codePos;
+}
+
+void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ if (src == dst)
+ {
+ src = 18;
+ emitMovImmediate(src, instr.getImm32(), code, k);
+ }
+
+ // eor dst, dst, src
+ emit32(ARMV8A::EOR | dst | (dst << 5) | (src << 16), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ constexpr uint32_t tmp_reg = 18;
+ emitMemLoad(dst, src, instr, code, k);
+
+ // eor dst, dst, tmp_reg
+ emit32(ARMV8A::EOR | dst | (dst << 5) | (tmp_reg << 16), code, k);
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_IROR_R(Instruction& instr, uint32_t& codePos)
+{
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ if (src != dst)
+ {
+ // ror dst, dst, src
+ emit32(ARMV8A::ROR | dst | (dst << 5) | (src << 16), code, codePos);
+ }
+ else
+ {
+ // ror dst, dst, imm
+ emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos);
+ }
+
+ reg_changed_offset[instr.dst] = codePos;
+}
+
+void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ if (src != dst)
+ {
+ constexpr uint32_t tmp_reg = 18;
+
+ // sub tmp_reg, xzr, src
+ emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
+
+ // ror dst, dst, tmp_reg
+ emit32(ARMV8A::ROR | dst | (dst << 5) | (tmp_reg << 16), code, k);
+ }
+ else
+ {
+ // ror dst, dst, imm
+ emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((-instr.getImm32() & 63) << 10) | (dst << 16), code, k);
+ }
+
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
+{
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+
+ if (src == dst)
+ return;
+
+ uint32_t k = codePos;
+
+ constexpr uint32_t tmp_reg = 18;
+ emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
+ emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
+ emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
+
+ reg_changed_offset[instr.src] = k;
+ reg_changed_offset[instr.dst] = k;
+ codePos = k;
+}
+
+void JitCompilerA64::h_FSWAP_R(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t dst = instr.dst + 16;
+
+ constexpr uint32_t tmp_reg_fp = 28;
+ constexpr uint32_t src_index1 = 1 << 14;
+ constexpr uint32_t dst_index1 = 1 << 20;
+
+ emit32(ARMV8A::MOV_VREG_EL | tmp_reg_fp | (dst << 5) | src_index1, code, k);
+ emit32(ARMV8A::MOV_VREG_EL | dst | (dst << 5) | dst_index1, code, k);
+ emit32(ARMV8A::MOV_VREG_EL | dst | (tmp_reg_fp << 5), code, k);
+
+ codePos = k;
+}
+
+void JitCompilerA64::h_FADD_R(Instruction& instr, uint32_t& codePos)
+{
+ const uint32_t src = (instr.src % 4) + 24;
+ const uint32_t dst = (instr.dst % 4) + 16;
+
+ emit32(ARMV8A::FADD | dst | (dst << 5) | (src << 16), code, codePos);
+}
+
+void JitCompilerA64::h_FADD_M(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = (instr.dst % 4) + 16;
+
+ constexpr uint32_t tmp_reg_fp = 28;
+ emitMemLoadFP(src, instr, code, k);
+
+ emit32(ARMV8A::FADD | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);
+
+ codePos = k;
+}
+
+void JitCompilerA64::h_FSUB_R(Instruction& instr, uint32_t& codePos)
+{
+ const uint32_t src = (instr.src % 4) + 24;
+ const uint32_t dst = (instr.dst % 4) + 16;
+
+ emit32(ARMV8A::FSUB | dst | (dst << 5) | (src << 16), code, codePos);
+}
+
+void JitCompilerA64::h_FSUB_M(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = (instr.dst % 4) + 16;
+
+ constexpr uint32_t tmp_reg_fp = 28;
+ emitMemLoadFP(src, instr, code, k);
+
+ emit32(ARMV8A::FSUB | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);
+
+ codePos = k;
+}
+
+void JitCompilerA64::h_FSCAL_R(Instruction& instr, uint32_t& codePos)
+{
+ const uint32_t dst = (instr.dst % 4) + 16;
+
+ emit32(ARMV8A::FEOR | dst | (dst << 5) | (31 << 16), code, codePos);
+}
+
+void JitCompilerA64::h_FMUL_R(Instruction& instr, uint32_t& codePos)
+{
+ const uint32_t src = (instr.src % 4) + 24;
+ const uint32_t dst = (instr.dst % 4) + 20;
+
+ emit32(ARMV8A::FMUL | dst | (dst << 5) | (src << 16), code, codePos);
+}
+
+void JitCompilerA64::h_FDIV_M(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = (instr.dst % 4) + 20;
+
+ constexpr uint32_t tmp_reg_fp = 28;
+ emitMemLoadFP(src, instr, code, k);
+
+ // and tmp_reg_fp, tmp_reg_fp, and_mask_reg
+ emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k);
+
+ // orr tmp_reg_fp, tmp_reg_fp, or_mask_reg
+ emit32(0x4EA01C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (30 << 16), code, k);
+
+ emit32(ARMV8A::FDIV | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);
+
+ codePos = k;
+}
+
+void JitCompilerA64::h_FSQRT_R(Instruction& instr, uint32_t& codePos)
+{
+ const uint32_t dst = (instr.dst % 4) + 20;
+
+ emit32(ARMV8A::FSQRT | dst | (dst << 5), code, codePos);
+}
+
+void JitCompilerA64::h_CBRANCH(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t dst = IntRegMap[instr.dst];
+ const uint32_t modCond = instr.getModCond();
+ const uint32_t shift = modCond + ConditionOffset;
+ const uint32_t imm = (instr.getImm32() | (1U << shift)) & ~(1U << (shift - 1));
+
+ emitAddImmediate(dst, dst, imm, code, k);
+
+ // tst dst, mask
+ static_assert((ConditionMask == 0xFF) && (ConditionOffset == 8), "Update tst encoding for different mask and offset");
+ emit32((0xF2781C1F - (modCond << 16)) | (dst << 5), code, k);
+
+ int32_t offset = reg_changed_offset[instr.dst];
+ offset = ((offset - k) >> 2) & ((1 << 19) - 1);
+
+ // beq target
+ emit32(0x54000000 | (offset << 5), code, k);
+
+ for (uint32_t i = 0; i < RegistersCount; ++i)
+ reg_changed_offset[i] = k;
+
+ codePos = k;
+}
+
+void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+
+ constexpr uint32_t tmp_reg = 18;
+ constexpr uint32_t fpcr_tmp_reg = 8;
+
+ // ror tmp_reg, src, imm
+ emit32(ARMV8A::ROR_IMM | tmp_reg | (src << 5) | ((instr.getImm32() & 63) << 10) | (src << 16), code, k);
+
+ // bfi fpcr_tmp_reg, tmp_reg, 40, 2
+ emit32(0xB3580400 | fpcr_tmp_reg | (tmp_reg << 5), code, k);
+
+ // rbit tmp_reg, fpcr_tmp_reg
+ emit32(0xDAC00000 | tmp_reg | (fpcr_tmp_reg << 5), code, k);
+
+ // msr fpcr, tmp_reg
+ emit32(0xD51B4400 | tmp_reg, code, k);
+
+ codePos = k;
+}
+
+void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
+{
+ uint32_t k = codePos;
+
+ const uint32_t src = IntRegMap[instr.src];
+ const uint32_t dst = IntRegMap[instr.dst];
+ constexpr uint32_t tmp_reg = 18;
+
+ uint32_t imm = instr.getImm32();
+
+ if (instr.getModCond() < StoreL3Condition)
+ imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1);
+ else
+ imm &= RANDOMX_SCRATCHPAD_L3 - 1;
+
+ emitAddImmediate(tmp_reg, dst, imm, code, k);
+
+ constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
+ constexpr uint32_t andInstrL1 = t | ((Log2(RANDOMX_SCRATCHPAD_L1) - 4) << 10);
+ constexpr uint32_t andInstrL2 = t | ((Log2(RANDOMX_SCRATCHPAD_L2) - 4) << 10);
+ constexpr uint32_t andInstrL3 = t | ((Log2(RANDOMX_SCRATCHPAD_L3) - 4) << 10);
+
+ emit32((instr.getModCond() < StoreL3Condition) ? (instr.getModMem() ? andInstrL1 : andInstrL2) : andInstrL3, code, k);
+
+ // str src, [x2, tmp_reg]
+ emit32(0xF8206840 | src | (tmp_reg << 16), code, k);
+
+ codePos = k;
+}
+
+void JitCompilerA64::h_NOP(Instruction& instr, uint32_t& codePos)
+{
+}
+
+#include "instruction_weights.hpp"
+#define INST_HANDLE(x) REPN(&JitCompilerA64::h_##x, WT(x))
+
+ InstructionGeneratorA64 JitCompilerA64::engine[256] = {
+ INST_HANDLE(IADD_RS)
+ INST_HANDLE(IADD_M)
+ INST_HANDLE(ISUB_R)
+ INST_HANDLE(ISUB_M)
+ INST_HANDLE(IMUL_R)
+ INST_HANDLE(IMUL_M)
+ INST_HANDLE(IMULH_R)
+ INST_HANDLE(IMULH_M)
+ INST_HANDLE(ISMULH_R)
+ INST_HANDLE(ISMULH_M)
+ INST_HANDLE(IMUL_RCP)
+ INST_HANDLE(INEG_R)
+ INST_HANDLE(IXOR_R)
+ INST_HANDLE(IXOR_M)
+ INST_HANDLE(IROR_R)
+ INST_HANDLE(IROL_R)
+ INST_HANDLE(ISWAP_R)
+ INST_HANDLE(FSWAP_R)
+ INST_HANDLE(FADD_R)
+ INST_HANDLE(FADD_M)
+ INST_HANDLE(FSUB_R)
+ INST_HANDLE(FSUB_M)
+ INST_HANDLE(FSCAL_R)
+ INST_HANDLE(FMUL_R)
+ INST_HANDLE(FDIV_M)
+ INST_HANDLE(FSQRT_R)
+ INST_HANDLE(CBRANCH)
+ INST_HANDLE(CFROUND)
+ INST_HANDLE(ISTORE)
+ INST_HANDLE(NOP)
+ };
+}
diff --git a/src/RandomX/src/jit_compiler_a64.hpp b/src/RandomX/src/jit_compiler_a64.hpp
new file mode 100644
index 000000000..a02824ffb
--- /dev/null
+++ b/src/RandomX/src/jit_compiler_a64.hpp
@@ -0,0 +1,128 @@
+/*
+Copyright (c) 2018-2019, tevador
+Copyright (c) 2019, SChernykh
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holder nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include
+#include