diff --git a/src/cc/cclib.cpp b/src/cc/cclib.cpp
index c770917ea..84f229069 100644
--- a/src/cc/cclib.cpp
+++ b/src/cc/cclib.cpp
@@ -581,6 +581,7 @@ uint256 juint256(cJSON *obj)
 #else
 #include "sudoku.cpp"
 #include "musig.cpp"
-#include "../secp256k1/src/modules/musig/example.c"
+#inclide "dilithium.c"
+//#include "../secp256k1/src/modules/musig/example.c"
 #endif
 
diff --git a/src/cc/dilithium.c b/src/cc/dilithium.c
new file mode 100644
index 000000000..b6624353e
--- /dev/null
+++ b/src/cc/dilithium.c
@@ -0,0 +1,2837 @@
+/* Based on the public domain implementation in
+ * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html
+ * by Ronny Van Keer
+ * and the public domain "TweetFips202" implementation
+ * from https://twitter.com/tweetfips202
+ * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */
+
+#include <stdint.h>
+#include "dilithium.h"
+
+
+#define NROUNDS 24
+#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset)))
+
+/*************************************************
+* Name:        load64
+*
+* Description: Load 8 bytes into uint64_t in little-endian order
+*
+* Arguments:   - const uint8_t *x: pointer to input byte array
+*
+* Returns the loaded 64-bit unsigned integer
+**************************************************/
+static uint64_t load64(const uint8_t *x) {
+  uint32_t i;
+  uint64_t r = 0;
+
+  for (i = 0; i < 8; ++i)
+    r |= (uint64_t)x[i] << 8*i;
+
+  return r;
+}
+
+/*************************************************
+* Name:        store64
+*
+* Description: Store a 64-bit integer to array of 8 bytes in little-endian order
+*
+* Arguments:   - uint8_t *x: pointer to the output byte array (allocated)
+*              - uint64_t u: input 64-bit unsigned integer
+**************************************************/
+static void store64(uint8_t *x, uint64_t u) {
+  uint32_t i;
+
+  for(i = 0; i < 8; ++i)
+    x[i] = u >> 8*i;
+}
+
+/* Keccak round constants */
+static const uint64_t KeccakF_RoundConstants[NROUNDS] = {
+  (uint64_t)0x0000000000000001ULL,
+  (uint64_t)0x0000000000008082ULL,
+  (uint64_t)0x800000000000808aULL,
+  (uint64_t)0x8000000080008000ULL,
+  (uint64_t)0x000000000000808bULL,
+  (uint64_t)0x0000000080000001ULL,
+  (uint64_t)0x8000000080008081ULL,
+  (uint64_t)0x8000000000008009ULL,
+  (uint64_t)0x000000000000008aULL,
+  (uint64_t)0x0000000000000088ULL,
+  (uint64_t)0x0000000080008009ULL,
+  (uint64_t)0x000000008000000aULL,
+  (uint64_t)0x000000008000808bULL,
+  (uint64_t)0x800000000000008bULL,
+  (uint64_t)0x8000000000008089ULL,
+  (uint64_t)0x8000000000008003ULL,
+  (uint64_t)0x8000000000008002ULL,
+  (uint64_t)0x8000000000000080ULL,
+  (uint64_t)0x000000000000800aULL,
+  (uint64_t)0x800000008000000aULL,
+  (uint64_t)0x8000000080008081ULL,
+  (uint64_t)0x8000000000008080ULL,
+  (uint64_t)0x0000000080000001ULL,
+  (uint64_t)0x8000000080008008ULL
+};
+
+/*************************************************
+* Name:        KeccakF1600_StatePermute
+*
+* Description: The Keccak F1600 Permutation
+*
+* Arguments:   - uint64_t *state: pointer to input/output Keccak state
+**************************************************/
+static void KeccakF1600_StatePermute(uint64_t *state)
+{
+        int round;
+
+        uint64_t Aba, Abe, Abi, Abo, Abu;
+        uint64_t Aga, Age, Agi, Ago, Agu;
+        uint64_t Aka, Ake, Aki, Ako, Aku;
+        uint64_t Ama, Ame, Ami, Amo, Amu;
+        uint64_t Asa, Ase, Asi, Aso, Asu;
+        uint64_t BCa, BCe, BCi, BCo, BCu;
+        uint64_t Da, De, Di, Do, Du;
+        uint64_t Eba, Ebe, Ebi, Ebo, Ebu;
+        uint64_t Ega, Ege, Egi, Ego, Egu;
+        uint64_t Eka, Eke, Eki, Eko, Eku;
+        uint64_t Ema, Eme, Emi, Emo, Emu;
+        uint64_t Esa, Ese, Esi, Eso, Esu;
+
+        //copyFromState(A, state)
+        Aba = state[ 0];
+        Abe = state[ 1];
+        Abi = state[ 2];
+        Abo = state[ 3];
+        Abu = state[ 4];
+        Aga = state[ 5];
+        Age = state[ 6];
+        Agi = state[ 7];
+        Ago = state[ 8];
+        Agu = state[ 9];
+        Aka = state[10];
+        Ake = state[11];
+        Aki = state[12];
+        Ako = state[13];
+        Aku = state[14];
+        Ama = state[15];
+        Ame = state[16];
+        Ami = state[17];
+        Amo = state[18];
+        Amu = state[19];
+        Asa = state[20];
+        Ase = state[21];
+        Asi = state[22];
+        Aso = state[23];
+        Asu = state[24];
+
+        for( round = 0; round < NROUNDS; round += 2 )
+        {
+            //    prepareTheta
+            BCa = Aba^Aga^Aka^Ama^Asa;
+            BCe = Abe^Age^Ake^Ame^Ase;
+            BCi = Abi^Agi^Aki^Ami^Asi;
+            BCo = Abo^Ago^Ako^Amo^Aso;
+            BCu = Abu^Agu^Aku^Amu^Asu;
+
+            //thetaRhoPiChiIotaPrepareTheta(round  , A, E)
+            Da = BCu^ROL(BCe, 1);
+            De = BCa^ROL(BCi, 1);
+            Di = BCe^ROL(BCo, 1);
+            Do = BCi^ROL(BCu, 1);
+            Du = BCo^ROL(BCa, 1);
+
+            Aba ^= Da;
+            BCa = Aba;
+            Age ^= De;
+            BCe = ROL(Age, 44);
+            Aki ^= Di;
+            BCi = ROL(Aki, 43);
+            Amo ^= Do;
+            BCo = ROL(Amo, 21);
+            Asu ^= Du;
+            BCu = ROL(Asu, 14);
+            Eba =   BCa ^((~BCe)&  BCi );
+            Eba ^= (uint64_t)KeccakF_RoundConstants[round];
+            Ebe =   BCe ^((~BCi)&  BCo );
+            Ebi =   BCi ^((~BCo)&  BCu );
+            Ebo =   BCo ^((~BCu)&  BCa );
+            Ebu =   BCu ^((~BCa)&  BCe );
+
+            Abo ^= Do;
+            BCa = ROL(Abo, 28);
+            Agu ^= Du;
+            BCe = ROL(Agu, 20);
+            Aka ^= Da;
+            BCi = ROL(Aka,  3);
+            Ame ^= De;
+            BCo = ROL(Ame, 45);
+            Asi ^= Di;
+            BCu = ROL(Asi, 61);
+            Ega =   BCa ^((~BCe)&  BCi );
+            Ege =   BCe ^((~BCi)&  BCo );
+            Egi =   BCi ^((~BCo)&  BCu );
+            Ego =   BCo ^((~BCu)&  BCa );
+            Egu =   BCu ^((~BCa)&  BCe );
+
+            Abe ^= De;
+            BCa = ROL(Abe,  1);
+            Agi ^= Di;
+            BCe = ROL(Agi,  6);
+            Ako ^= Do;
+            BCi = ROL(Ako, 25);
+            Amu ^= Du;
+            BCo = ROL(Amu,  8);
+            Asa ^= Da;
+            BCu = ROL(Asa, 18);
+            Eka =   BCa ^((~BCe)&  BCi );
+            Eke =   BCe ^((~BCi)&  BCo );
+            Eki =   BCi ^((~BCo)&  BCu );
+            Eko =   BCo ^((~BCu)&  BCa );
+            Eku =   BCu ^((~BCa)&  BCe );
+
+            Abu ^= Du;
+            BCa = ROL(Abu, 27);
+            Aga ^= Da;
+            BCe = ROL(Aga, 36);
+            Ake ^= De;
+            BCi = ROL(Ake, 10);
+            Ami ^= Di;
+            BCo = ROL(Ami, 15);
+            Aso ^= Do;
+            BCu = ROL(Aso, 56);
+            Ema =   BCa ^((~BCe)&  BCi );
+            Eme =   BCe ^((~BCi)&  BCo );
+            Emi =   BCi ^((~BCo)&  BCu );
+            Emo =   BCo ^((~BCu)&  BCa );
+            Emu =   BCu ^((~BCa)&  BCe );
+
+            Abi ^= Di;
+            BCa = ROL(Abi, 62);
+            Ago ^= Do;
+            BCe = ROL(Ago, 55);
+            Aku ^= Du;
+            BCi = ROL(Aku, 39);
+            Ama ^= Da;
+            BCo = ROL(Ama, 41);
+            Ase ^= De;
+            BCu = ROL(Ase,  2);
+            Esa =   BCa ^((~BCe)&  BCi );
+            Ese =   BCe ^((~BCi)&  BCo );
+            Esi =   BCi ^((~BCo)&  BCu );
+            Eso =   BCo ^((~BCu)&  BCa );
+            Esu =   BCu ^((~BCa)&  BCe );
+
+            //    prepareTheta
+            BCa = Eba^Ega^Eka^Ema^Esa;
+            BCe = Ebe^Ege^Eke^Eme^Ese;
+            BCi = Ebi^Egi^Eki^Emi^Esi;
+            BCo = Ebo^Ego^Eko^Emo^Eso;
+            BCu = Ebu^Egu^Eku^Emu^Esu;
+
+            //thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
+            Da = BCu^ROL(BCe, 1);
+            De = BCa^ROL(BCi, 1);
+            Di = BCe^ROL(BCo, 1);
+            Do = BCi^ROL(BCu, 1);
+            Du = BCo^ROL(BCa, 1);
+
+            Eba ^= Da;
+            BCa = Eba;
+            Ege ^= De;
+            BCe = ROL(Ege, 44);
+            Eki ^= Di;
+            BCi = ROL(Eki, 43);
+            Emo ^= Do;
+            BCo = ROL(Emo, 21);
+            Esu ^= Du;
+            BCu = ROL(Esu, 14);
+            Aba =   BCa ^((~BCe)&  BCi );
+            Aba ^= (uint64_t)KeccakF_RoundConstants[round+1];
+            Abe =   BCe ^((~BCi)&  BCo );
+            Abi =   BCi ^((~BCo)&  BCu );
+            Abo =   BCo ^((~BCu)&  BCa );
+            Abu =   BCu ^((~BCa)&  BCe );
+
+            Ebo ^= Do;
+            BCa = ROL(Ebo, 28);
+            Egu ^= Du;
+            BCe = ROL(Egu, 20);
+            Eka ^= Da;
+            BCi = ROL(Eka, 3);
+            Eme ^= De;
+            BCo = ROL(Eme, 45);
+            Esi ^= Di;
+            BCu = ROL(Esi, 61);
+            Aga =   BCa ^((~BCe)&  BCi );
+            Age =   BCe ^((~BCi)&  BCo );
+            Agi =   BCi ^((~BCo)&  BCu );
+            Ago =   BCo ^((~BCu)&  BCa );
+            Agu =   BCu ^((~BCa)&  BCe );
+
+            Ebe ^= De;
+            BCa = ROL(Ebe, 1);
+            Egi ^= Di;
+            BCe = ROL(Egi, 6);
+            Eko ^= Do;
+            BCi = ROL(Eko, 25);
+            Emu ^= Du;
+            BCo = ROL(Emu, 8);
+            Esa ^= Da;
+            BCu = ROL(Esa, 18);
+            Aka =   BCa ^((~BCe)&  BCi );
+            Ake =   BCe ^((~BCi)&  BCo );
+            Aki =   BCi ^((~BCo)&  BCu );
+            Ako =   BCo ^((~BCu)&  BCa );
+            Aku =   BCu ^((~BCa)&  BCe );
+
+            Ebu ^= Du;
+            BCa = ROL(Ebu, 27);
+            Ega ^= Da;
+            BCe = ROL(Ega, 36);
+            Eke ^= De;
+            BCi = ROL(Eke, 10);
+            Emi ^= Di;
+            BCo = ROL(Emi, 15);
+            Eso ^= Do;
+            BCu = ROL(Eso, 56);
+            Ama =   BCa ^((~BCe)&  BCi );
+            Ame =   BCe ^((~BCi)&  BCo );
+            Ami =   BCi ^((~BCo)&  BCu );
+            Amo =   BCo ^((~BCu)&  BCa );
+            Amu =   BCu ^((~BCa)&  BCe );
+
+            Ebi ^= Di;
+            BCa = ROL(Ebi, 62);
+            Ego ^= Do;
+            BCe = ROL(Ego, 55);
+            Eku ^= Du;
+            BCi = ROL(Eku, 39);
+            Ema ^= Da;
+            BCo = ROL(Ema, 41);
+            Ese ^= De;
+            BCu = ROL(Ese, 2);
+            Asa =   BCa ^((~BCe)&  BCi );
+            Ase =   BCe ^((~BCi)&  BCo );
+            Asi =   BCi ^((~BCo)&  BCu );
+            Aso =   BCo ^((~BCu)&  BCa );
+            Asu =   BCu ^((~BCa)&  BCe );
+        }
+
+        //copyToState(state, A)
+        state[ 0] = Aba;
+        state[ 1] = Abe;
+        state[ 2] = Abi;
+        state[ 3] = Abo;
+        state[ 4] = Abu;
+        state[ 5] = Aga;
+        state[ 6] = Age;
+        state[ 7] = Agi;
+        state[ 8] = Ago;
+        state[ 9] = Agu;
+        state[10] = Aka;
+        state[11] = Ake;
+        state[12] = Aki;
+        state[13] = Ako;
+        state[14] = Aku;
+        state[15] = Ama;
+        state[16] = Ame;
+        state[17] = Ami;
+        state[18] = Amo;
+        state[19] = Amu;
+        state[20] = Asa;
+        state[21] = Ase;
+        state[22] = Asi;
+        state[23] = Aso;
+        state[24] = Asu;
+}
+
+/*************************************************
+* Name:        keccak_absorb
+*
+* Description: Absorb step of Keccak;
+*              non-incremental, starts by zeroeing the state.
+*
+* Arguments:   - uint64_t *s: pointer to (uninitialized) output Keccak state
+*              - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
+*              - const uint8_t *m: pointer to input to be absorbed into s
+*              - int32_t mlen: length of input in bytes
+*              - uint8_t p: domain-separation byte for different
+*                                 Keccak-derived functions
+**************************************************/
+static void keccak_absorb(uint64_t *s,
+                          uint32_t r,
+                          const uint8_t *m,
+                          int32_t mlen,
+                          uint8_t p)
+{
+  uint32_t i;
+  uint8_t t[200];
+  DBENCH_START();
+
+  /* Zero state */
+  for(i = 0; i < 25; ++i)
+    s[i] = 0;
+
+  while(mlen >= r) {
+    for(i = 0; i < r/8; ++i)
+      s[i] ^= load64(m + 8*i);
+
+    KeccakF1600_StatePermute(s);
+    mlen -= r;
+    m += r;
+  }
+
+  for(i = 0; i < r; ++i)
+    t[i] = 0;
+  for(i = 0; i < mlen; ++i)
+    t[i] = m[i];
+  t[i] = p;
+  t[r-1] |= 128;
+  for(i = 0; i < r/8; ++i)
+    s[i] ^= load64(t + 8*i);
+
+  DBENCH_STOP(*tshake);
+}
+
+/*************************************************
+* Name:        keccak_squeezeblocks
+*
+* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each.
+*              Modifies the state. Can be called multiple times to keep
+*              squeezing, i.e., is incremental.
+*
+* Arguments:   - uint8_t *h: pointer to output blocks
+*              - int32_t int nblocks: number of blocks to be
+*                                                squeezed (written to h)
+*              - uint64_t *s: pointer to input/output Keccak state
+*              - uint32_t r: rate in bytes (e.g., 168 for SHAKE128)
+**************************************************/
+static void keccak_squeezeblocks(uint8_t *h,
+                                 int32_t nblocks,
+                                 uint64_t *s,
+                                 uint32_t r)
+{
+  uint32_t i;
+  DBENCH_START();
+
+  while(nblocks > 0) {
+    KeccakF1600_StatePermute(s);
+    for(i=0; i < (r >> 3); i++) {
+      store64(h + 8*i, s[i]);
+    }
+    h += r;
+    --nblocks;
+  }
+
+  DBENCH_STOP(*tshake);
+}
+
+/*************************************************
+* Name:        shake128_absorb
+*
+* Description: Absorb step of the SHAKE128 XOF.
+*              non-incremental, starts by zeroeing the state.
+*
+* Arguments:   - uint64_t *s: pointer to (uninitialized) output Keccak state
+*              - const uint8_t *input: pointer to input to be absorbed
+*                                            into s
+*              - int32_t inlen: length of input in bytes
+**************************************************/
+void shake128_absorb(uint64_t *s,
+                     const uint8_t *input,
+                     int32_t inlen)
+{
+  keccak_absorb(s, SHAKE128_RATE, input, inlen, 0x1F);
+}
+
+/*************************************************
+* Name:        shake128_squeezeblocks
+*
+* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of
+*              SHAKE128_RATE bytes each. Modifies the state. Can be called
+*              multiple times to keep squeezing, i.e., is incremental.
+*
+* Arguments:   - uint8_t *output: pointer to output blocks
+*              - int32_t nblocks: number of blocks to be squeezed
+*                                            (written to output)
+*              - uint64_t *s: pointer to input/output Keccak state
+**************************************************/
+void shake128_squeezeblocks(uint8_t *output,
+                            int32_t nblocks,
+                            uint64_t *s)
+{
+  keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE);
+}
+
+/*************************************************
+* Name:        shake256_absorb
+*
+* Description: Absorb step of the SHAKE256 XOF.
+*              non-incremental, starts by zeroeing the state.
+*
+* Arguments:   - uint64_t *s: pointer to (uninitialized) output Keccak state
+*              - const uint8_t *input: pointer to input to be absorbed
+*                                            into s
+*              - int32_t inlen: length of input in bytes
+**************************************************/
+void shake256_absorb(uint64_t *s,
+                     const uint8_t *input,
+                     int32_t inlen)
+{
+  keccak_absorb(s, SHAKE256_RATE, input, inlen, 0x1F);
+}
+
+/*************************************************
+* Name:        shake256_squeezeblocks
+*
+* Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of
+*              SHAKE256_RATE bytes each. Modifies the state. Can be called
+*              multiple times to keep squeezing, i.e., is incremental.
+*
+* Arguments:   - uint8_t *output: pointer to output blocks
+*              - int32_t nblocks: number of blocks to be squeezed
+*                                            (written to output)
+*              - uint64_t *s: pointer to input/output Keccak state
+**************************************************/
+void shake256_squeezeblocks(uint8_t *output,
+                            int32_t nblocks,
+                            uint64_t *s)
+{
+  keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE);
+}
+
+/*************************************************
+* Name:        shake128
+*
+* Description: SHAKE128 XOF with non-incremental API
+*
+* Arguments:   - uint8_t *output: pointer to output
+*              - int32_t outlen: requested output length in bytes
+*              - const uint8_t *input: pointer to input
+*              - int32_t inlen: length of input in bytes
+**************************************************/
+void shake128(uint8_t *output,
+              int32_t outlen,
+              const uint8_t *input,
+              int32_t inlen)
+{
+  uint32_t i,nblocks = outlen/SHAKE128_RATE;
+  uint8_t t[SHAKE128_RATE];
+  uint64_t s[25];
+
+  shake128_absorb(s, input, inlen);
+  shake128_squeezeblocks(output, nblocks, s);
+
+  output += nblocks*SHAKE128_RATE;
+  outlen -= nblocks*SHAKE128_RATE;
+
+  if(outlen) {
+    shake128_squeezeblocks(t, 1, s);
+    for(i = 0; i < outlen; ++i)
+      output[i] = t[i];
+  }
+}
+
+/*************************************************
+* Name:        shake256
+*
+* Description: SHAKE256 XOF with non-incremental API
+*
+* Arguments:   - uint8_t *output: pointer to output
+*              - int32_t outlen: requested output length in bytes
+*              - const uint8_t *input: pointer to input
+*              - int32_t inlen: length of input in bytes
+**************************************************/
+void shake256(uint8_t *output,
+              int32_t outlen,
+              const uint8_t *input,
+              int32_t inlen)
+{
+  uint32_t i,nblocks = outlen/SHAKE256_RATE;
+  uint8_t t[SHAKE256_RATE];
+  uint64_t s[25];
+
+  shake256_absorb(s, input, inlen);
+  shake256_squeezeblocks(output, nblocks, s);
+
+  output += nblocks*SHAKE256_RATE;
+  outlen -= nblocks*SHAKE256_RATE;
+
+  if(outlen) {
+    shake256_squeezeblocks(t, 1, s);
+    for(i = 0; i < outlen; ++i)
+      output[i] = t[i];
+  }
+}
+//#include "params.h"
+//#include "reduce.h"
+//#include "ntt.h"
+//#include "poly.h"
+
+/* Roots of unity in order needed by forward ntt */
+static const uint32_t zetas[N] = {0, 25847, 5771523, 7861508, 237124, 7602457, 7504169, 466468, 1826347, 2353451, 8021166, 6288512, 3119733, 5495562, 3111497, 2680103, 2725464, 1024112, 7300517, 3585928, 7830929, 7260833, 2619752, 6271868, 6262231, 4520680, 6980856, 5102745, 1757237, 8360995, 4010497, 280005, 2706023, 95776, 3077325, 3530437, 6718724, 4788269, 5842901, 3915439, 4519302, 5336701, 3574422, 5512770, 3539968, 8079950, 2348700, 7841118, 6681150, 6736599, 3505694, 4558682, 3507263, 6239768, 6779997, 3699596, 811944, 531354, 954230, 3881043, 3900724, 5823537, 2071892, 5582638, 4450022, 6851714, 4702672, 5339162, 6927966, 3475950, 2176455, 6795196, 7122806, 1939314, 4296819, 7380215, 5190273, 5223087, 4747489, 126922, 3412210, 7396998, 2147896, 2715295, 5412772, 4686924, 7969390, 5903370, 7709315, 7151892, 8357436, 7072248, 7998430, 1349076, 1852771, 6949987, 5037034, 264944, 508951, 3097992, 44288, 7280319, 904516, 3958618, 4656075, 8371839, 1653064, 5130689, 2389356, 8169440, 759969, 7063561, 189548, 4827145, 3159746, 6529015, 5971092, 8202977, 1315589, 1341330, 1285669, 6795489, 7567685, 6940675, 5361315, 4499357, 4751448, 3839961, 2091667, 3407706, 2316500, 3817976, 5037939, 2244091, 5933984, 4817955, 266997, 2434439, 7144689, 3513181, 4860065, 4621053, 7183191, 5187039, 900702, 1859098, 909542, 819034, 495491, 6767243, 8337157, 7857917, 7725090, 5257975, 2031748, 3207046, 4823422, 7855319, 7611795, 4784579, 342297, 286988, 5942594, 4108315, 3437287, 5038140, 1735879, 203044, 2842341, 2691481, 5790267, 1265009, 4055324, 1247620, 2486353, 1595974, 4613401, 1250494, 2635921, 4832145, 5386378, 1869119, 1903435, 7329447, 7047359, 1237275, 5062207, 6950192, 7929317, 1312455, 3306115, 6417775, 7100756, 1917081, 5834105, 7005614, 1500165, 777191, 2235880, 3406031, 7838005, 5548557, 6709241, 6533464, 5796124, 4656147, 594136, 4603424, 6366809, 2432395, 2454455, 8215696, 1957272, 3369112, 185531, 7173032, 5196991, 162844, 1616392, 3014001, 810149, 1652634, 4686184, 6581310, 5341501, 3523897, 3866901, 269760, 2213111, 7404533, 1717735, 472078, 7953734, 1723600, 6577327, 1910376, 6712985, 7276084, 8119771, 4546524, 5441381, 6144432, 7959518, 6094090, 183443, 7403526, 1612842, 4834730, 7826001, 3919660, 8332111, 7018208, 3937738, 1400424, 7534263, 1976782};
+
+/* Roots of unity in order needed by inverse ntt */
+static const uint32_t zetas_inv[N] = {6403635, 846154, 6979993, 4442679, 1362209, 48306, 4460757, 554416, 3545687, 6767575, 976891, 8196974, 2286327, 420899, 2235985, 2939036, 3833893, 260646, 1104333, 1667432, 6470041, 1803090, 6656817, 426683, 7908339, 6662682, 975884, 6167306, 8110657, 4513516, 4856520, 3038916, 1799107, 3694233, 6727783, 7570268, 5366416, 6764025, 8217573, 3183426, 1207385, 8194886, 5011305, 6423145, 164721, 5925962, 5948022, 2013608, 3776993, 7786281, 3724270, 2584293, 1846953, 1671176, 2831860, 542412, 4974386, 6144537, 7603226, 6880252, 1374803, 2546312, 6463336, 1279661, 1962642, 5074302, 7067962, 451100, 1430225, 3318210, 7143142, 1333058, 1050970, 6476982, 6511298, 2994039, 3548272, 5744496, 7129923, 3767016, 6784443, 5894064, 7132797, 4325093, 7115408, 2590150, 5688936, 5538076, 8177373, 6644538, 3342277, 4943130, 4272102, 2437823, 8093429, 8038120, 3595838, 768622, 525098, 3556995, 5173371, 6348669, 3122442, 655327, 522500, 43260, 1613174, 7884926, 7561383, 7470875, 6521319, 7479715, 3193378, 1197226, 3759364, 3520352, 4867236, 1235728, 5945978, 8113420, 3562462, 2446433, 6136326, 3342478, 4562441, 6063917, 4972711, 6288750, 4540456, 3628969, 3881060, 3019102, 1439742, 812732, 1584928, 7094748, 7039087, 7064828, 177440, 2409325, 1851402, 5220671, 3553272, 8190869, 1316856, 7620448, 210977, 5991061, 3249728, 6727353, 8578, 3724342, 4421799, 7475901, 1100098, 8336129, 5282425, 7871466, 8115473, 3343383, 1430430, 6527646, 7031341, 381987, 1308169, 22981, 1228525, 671102, 2477047, 411027, 3693493, 2967645, 5665122, 6232521, 983419, 4968207, 8253495, 3632928, 3157330, 3190144, 1000202, 4083598, 6441103, 1257611, 1585221, 6203962, 4904467, 1452451, 3041255, 3677745, 1528703, 3930395, 2797779, 6308525, 2556880, 4479693, 4499374, 7426187, 7849063, 7568473, 4680821, 1600420, 2140649, 4873154, 3821735, 4874723, 1643818, 1699267, 539299, 6031717, 300467, 4840449, 2867647, 4805995, 3043716, 3861115, 4464978, 2537516, 3592148, 1661693, 4849980, 5303092, 8284641, 5674394, 8100412, 4369920, 19422, 6623180, 3277672, 1399561, 3859737, 2118186, 2108549, 5760665, 1119584, 549488, 4794489, 1079900, 7356305, 5654953, 5700314, 5268920, 2884855, 5260684, 2091905, 359251, 6026966, 6554070, 7913949, 876248, 777960, 8143293, 518909, 2608894, 8354570};
+
+/*************************************************
+* Name:        ntt
+*
+* Description: Forward NTT, in-place. No modular reduction is performed after
+*              additions or subtractions. Hence output coefficients can be up
+*              to 16*Q larger than the coefficients of the input polynomial.
+*              Output vector is in bitreversed order.
+*
+* Arguments:   - uint32_t p[N]: input/output coefficient array
+**************************************************/
+void ntt(uint32_t p[N]) {
+  uint32_t len, start, j, k;
+  uint32_t zeta, t;
+
+  k = 1;
+  for(len = 128; len > 0; len >>= 1) {
+    for(start = 0; start < N; start = j + len) {
+      zeta = zetas[k++];
+      for(j = start; j < start + len; ++j) {
+        t = montgomery_reduce((uint64_t)zeta * p[j + len]);
+        p[j + len] = p[j] + 2*Q - t;
+        p[j] = p[j] + t;
+      }
+    }
+  }
+}
+
+/*************************************************
+* Name:        invntt_frominvmont
+*
+* Description: Inverse NTT and multiplication by Montgomery factor 2^32.
+*              In-place. No modular reductions after additions or
+*              subtractions. Input coefficient need to be smaller than 2*Q.
+*              Output coefficient are smaller than 2*Q.
+*
+* Arguments:   - uint32_t p[N]: input/output coefficient array
+**************************************************/
+void invntt_frominvmont(uint32_t p[N]) {
+  uint32_t start, len, j, k;
+  uint32_t t, zeta;
+  const uint32_t f = (((uint64_t)MONT*MONT % Q) * (Q-1) % Q) * ((Q-1) >> 8) % Q;
+
+  k = 0;
+  for(len = 1; len < N; len <<= 1) {
+    for(start = 0; start < N; start = j + len) {
+      zeta = zetas_inv[k++];
+      for(j = start; j < start + len; ++j) {
+        t = p[j];
+        p[j] = t + p[j + len];
+        p[j + len] = t + 256*Q - p[j + len];
+        p[j + len] = montgomery_reduce((uint64_t)zeta * p[j + len]);
+      }
+    }
+  }
+
+  for(j = 0; j < N; ++j) {
+    p[j] = montgomery_reduce((uint64_t)f * p[j]);
+  }
+}
+//#include "params.h"
+//#include "poly.h"
+//#include "polyvec.h"
+//#include "packing.h"
+
+/*************************************************
+* Name:        pack_pk
+*
+* Description: Bit-pack public key pk = (rho, t1).
+*
+* Arguments:   - uint8_t pk[]: output byte array
+*              - const uint8_t rho[]: byte array containing rho
+*              - const polyveck *t1: pointer to vector t1
+**************************************************/
+void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES],
+             const uint8_t rho[SEEDBYTES],
+             const polyveck *t1)
+{
+  uint32_t i;
+
+  for(i = 0; i < SEEDBYTES; ++i)
+    pk[i] = rho[i];
+  pk += SEEDBYTES;
+
+  for(i = 0; i < K; ++i)
+    polyt1_pack(pk + i*POLT1_SIZE_PACKED, t1->vec+i);
+}
+
+/*************************************************
+* Name:        unpack_pk
+*
+* Description: Unpack public key pk = (rho, t1).
+*
+* Arguments:   - const uint8_t rho[]: output byte array for rho
+*              - const polyveck *t1: pointer to output vector t1
+*              - uint8_t pk[]: byte array containing bit-packed pk
+**************************************************/
+void unpack_pk(uint8_t rho[SEEDBYTES],
+               polyveck *t1,
+               const uint8_t pk[CRYPTO_PUBLICKEYBYTES])
+{
+  uint32_t i;
+
+  for(i = 0; i < SEEDBYTES; ++i)
+    rho[i] = pk[i];
+  pk += SEEDBYTES;
+
+  for(i = 0; i < K; ++i)
+    polyt1_unpack(t1->vec+i, pk + i*POLT1_SIZE_PACKED);
+}
+
+/*************************************************
+* Name:        pack_sk
+*
+* Description: Bit-pack secret key sk = (rho, key, tr, s1, s2, t0).
+*
+* Arguments:   - uint8_t sk[]: output byte array
+*              - const uint8_t rho[]: byte array containing rho
+*              - const uint8_t key[]: byte array containing key
+*              - const uint8_t tr[]: byte array containing tr
+*              - const polyvecl *s1: pointer to vector s1
+*              - const polyveck *s2: pointer to vector s2
+*              - const polyveck *t0: pointer to vector t0
+**************************************************/
+void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES],
+             const uint8_t rho[SEEDBYTES],
+             const uint8_t key[SEEDBYTES],
+             const uint8_t tr[CRHBYTES],
+             const polyvecl *s1,
+             const polyveck *s2,
+             const polyveck *t0)
+{
+  uint32_t i;
+
+  for(i = 0; i < SEEDBYTES; ++i)
+    sk[i] = rho[i];
+  sk += SEEDBYTES;
+
+  for(i = 0; i < SEEDBYTES; ++i)
+    sk[i] = key[i];
+  sk += SEEDBYTES;
+
+  for(i = 0; i < CRHBYTES; ++i)
+    sk[i] = tr[i];
+  sk += CRHBYTES;
+
+  for(i = 0; i < L; ++i)
+    polyeta_pack(sk + i*POLETA_SIZE_PACKED, s1->vec+i);
+  sk += L*POLETA_SIZE_PACKED;
+
+  for(i = 0; i < K; ++i)
+    polyeta_pack(sk + i*POLETA_SIZE_PACKED, s2->vec+i);
+  sk += K*POLETA_SIZE_PACKED;
+
+  for(i = 0; i < K; ++i)
+    polyt0_pack(sk + i*POLT0_SIZE_PACKED, t0->vec+i);
+}
+
+/*************************************************
+* Name:        unpack_sk
+*
+* Description: Unpack secret key sk = (rho, key, tr, s1, s2, t0).
+*
+* Arguments:   - const uint8_t rho[]: output byte array for rho
+*              - const uint8_t key[]: output byte array for key
+*              - const uint8_t tr[]: output byte array for tr
+*              - const polyvecl *s1: pointer to output vector s1
+*              - const polyveck *s2: pointer to output vector s2
+*              - const polyveck *r0: pointer to output vector t0
+*              - uint8_t sk[]: byte array containing bit-packed sk
+**************************************************/
+void unpack_sk(uint8_t rho[SEEDBYTES],
+               uint8_t key[SEEDBYTES],
+               uint8_t tr[CRHBYTES],
+               polyvecl *s1,
+               polyveck *s2,
+               polyveck *t0,
+               const uint8_t sk[CRYPTO_SECRETKEYBYTES])
+{
+  uint32_t i;
+
+  for(i = 0; i < SEEDBYTES; ++i)
+    rho[i] = sk[i];
+  sk += SEEDBYTES;
+
+  for(i = 0; i < SEEDBYTES; ++i)
+    key[i] = sk[i];
+  sk += SEEDBYTES;
+
+  for(i = 0; i < CRHBYTES; ++i)
+    tr[i] = sk[i];
+  sk += CRHBYTES;
+
+  for(i=0; i < L; ++i)
+    polyeta_unpack(s1->vec+i, sk + i*POLETA_SIZE_PACKED);
+  sk += L*POLETA_SIZE_PACKED;
+
+  for(i=0; i < K; ++i)
+    polyeta_unpack(s2->vec+i, sk + i*POLETA_SIZE_PACKED);
+  sk += K*POLETA_SIZE_PACKED;
+
+  for(i=0; i < K; ++i)
+    polyt0_unpack(t0->vec+i, sk + i*POLT0_SIZE_PACKED);
+}
+
+/*************************************************
+* Name:        pack_sig
+*
+* Description: Bit-pack signature sig = (z, h, c).
+*
+* Arguments:   - uint8_t sig[]: output byte array
+*              - const polyvecl *z: pointer to vector z
+*              - const polyveck *h: pointer to hint vector h
+*              - const poly *c: pointer to challenge polynomial
+**************************************************/
+void pack_sig(uint8_t sig[CRYPTO_BYTES],
+              const polyvecl *z,
+              const polyveck *h,
+              const poly *c)
+{
+  uint32_t i, j, k;
+  uint64_t signs, mask;
+
+  for(i = 0; i < L; ++i)
+    polyz_pack(sig + i*POLZ_SIZE_PACKED, z->vec+i);
+  sig += L*POLZ_SIZE_PACKED;
+
+  /* Encode h */
+  k = 0;
+  for(i = 0; i < K; ++i) {
+    for(j = 0; j < N; ++j)
+      if(h->vec[i].coeffs[j] != 0)
+        sig[k++] = j;
+
+    sig[OMEGA + i] = k;
+  }
+  while(k < OMEGA) sig[k++] = 0;
+  sig += OMEGA + K;
+
+  /* Encode c */
+  signs = 0;
+  mask = 1;
+  for(i = 0; i < N/8; ++i) {
+    sig[i] = 0;
+    for(j = 0; j < 8; ++j) {
+      if(c->coeffs[8*i+j] != 0) {
+        sig[i] |= (1U << j);
+        if(c->coeffs[8*i+j] == (Q - 1)) signs |= mask;
+        mask <<= 1;
+      }
+    }
+  }
+  sig += N/8;
+  for(i = 0; i < 8; ++i)
+    sig[i] = signs >> 8*i;
+}
+
+/*************************************************
+* Name:        unpack_sig
+*
+* Description: Unpack signature sig = (z, h, c).
+*
+* Arguments:   - polyvecl *z: pointer to output vector z
+*              - polyveck *h: pointer to output hint vector h
+*              - poly *c: pointer to output challenge polynomial
+*              - const uint8_t sig[]: byte array containing
+*                bit-packed signature
+*
+* Returns 1 in case of malformed signature; otherwise 0.
+**************************************************/
+int unpack_sig(polyvecl *z,
+               polyveck *h,
+               poly *c,
+               const uint8_t sig[CRYPTO_BYTES])
+{
+  uint32_t i, j, k;
+  uint64_t signs, mask;
+
+  for(i = 0; i < L; ++i)
+    polyz_unpack(z->vec+i, sig + i*POLZ_SIZE_PACKED);
+  sig += L*POLZ_SIZE_PACKED;
+
+  /* Decode h */
+  k = 0;
+  for(i = 0; i < K; ++i) {
+    for(j = 0; j < N; ++j)
+      h->vec[i].coeffs[j] = 0;
+
+    if(sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA)
+      return 1;
+
+    for(j = k; j < sig[OMEGA + i]; ++j) {
+      /* Coefficients are ordered for strong unforgeability */
+      if(j > k && sig[j] <= sig[j-1]) return 1;
+      h->vec[i].coeffs[sig[j]] = 1;
+    }
+
+    k = sig[OMEGA + i];
+  }
+
+  /* Extra indices are zero for strong unforgeability */
+  for(j = k; j < OMEGA; ++j)
+    if(sig[j])
+      return 1;
+
+  sig += OMEGA + K;
+
+  /* Decode c */
+  for(i = 0; i < N; ++i)
+    c->coeffs[i] = 0;
+
+  signs = 0;
+  for(i = 0; i < 8; ++i)
+    signs |= (uint64_t)sig[N/8+i] << 8*i;
+
+  /* Extra sign bits are zero for strong unforgeability */
+  if(signs >> 60)
+    return 1;
+
+  mask = 1;
+  for(i = 0; i < N/8; ++i) {
+    for(j = 0; j < 8; ++j) {
+      if((sig[i] >> j) & 0x01) {
+        c->coeffs[8*i+j] = (signs & mask) ? Q - 1 : 1;
+        mask <<= 1;
+      }
+    }
+  }
+
+  return 0;
+}
+//#include <stdint.h>
+//#include "test/cpucycles.h"
+//#include "fips202.h"
+//#include "params.h"
+//#include "reduce.h"
+//#include "rounding.h"
+//#include "ntt.h"
+//#include "poly.h"
+
+#ifdef DBENCH
+extern const uint64_t timing_overhead;
+extern uint64_t *tred, *tadd, *tmul, *tround, *tsample, *tpack;
+#endif
+
+/*************************************************
+* Name:        poly_reduce
+*
+* Description: Reduce all coefficients of input polynomial to representative
+*              in [0,2*Q[.
+*
+* Arguments:   - poly *a: pointer to input/output polynomial
+**************************************************/
+void poly_reduce(poly *a) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    a->coeffs[i] = reduce32(a->coeffs[i]);
+
+  DBENCH_STOP(*tred);
+}
+
+/*************************************************
+* Name:        poly_csubq
+*
+* Description: For all coefficients of input polynomial subtract Q if
+*              coefficient is bigger than Q.
+*
+* Arguments:   - poly *a: pointer to input/output polynomial
+**************************************************/
+void poly_csubq(poly *a) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    a->coeffs[i] = csubq(a->coeffs[i]);
+
+  DBENCH_STOP(*tred);
+}
+
+/*************************************************
+* Name:        poly_freeze
+*
+* Description: Reduce all coefficients of the polynomial to standard
+*              representatives.
+*
+* Arguments:   - poly *a: pointer to input/output polynomial
+**************************************************/
+void poly_freeze(poly *a) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    a->coeffs[i] = freeze(a->coeffs[i]);
+
+  DBENCH_STOP(*tred);
+}
+
+/*************************************************
+* Name:        poly_add
+*
+* Description: Add polynomials. No modular reduction is performed.
+*
+* Arguments:   - poly *c: pointer to output polynomial
+*              - const poly *a: pointer to first summand
+*              - const poly *b: pointer to second summand
+**************************************************/
+void poly_add(poly *c, const poly *a, const poly *b)  {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    c->coeffs[i] = a->coeffs[i] + b->coeffs[i];
+
+  DBENCH_STOP(*tadd);
+}
+
+/*************************************************
+* Name:        poly_sub
+*
+* Description: Subtract polynomials. Assumes coefficients of second input
+*              polynomial to be less than 2*Q. No modular reduction is
+*              performed.
+*
+* Arguments:   - poly *c: pointer to output polynomial
+*              - const poly *a: pointer to first input polynomial
+*              - const poly *b: pointer to second input polynomial to be
+*                               subtraced from first input polynomial
+**************************************************/
+void poly_sub(poly *c, const poly *a, const poly *b) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    c->coeffs[i] = a->coeffs[i] + 2*Q - b->coeffs[i];
+
+  DBENCH_STOP(*tadd);
+}
+
+/*************************************************
+* Name:        poly_neg
+*
+* Description: Negate polynomial. Assumes input coefficients to be standard
+*              representatives.
+*
+* Arguments:   - poly *a: pointer to input/output polynomial
+**************************************************/
+void poly_neg(poly *a) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    a->coeffs[i] = Q - a->coeffs[i];
+
+  DBENCH_STOP(*tadd);
+}
+
+/*************************************************
+* Name:        poly_shiftl
+*
+* Description: Multiply polynomial by 2^k without modular reduction. Assumes
+*              input coefficients to be less than 2^{32-k}.
+*
+* Arguments:   - poly *a: pointer to input/output polynomial
+*              - uint32_t k: exponent
+**************************************************/
+void poly_shiftl(poly *a, uint32_t k) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    a->coeffs[i] <<= k;
+
+  DBENCH_STOP(*tmul);
+}
+
+/*************************************************
+* Name:        poly_ntt
+*
+* Description: Forward NTT. Output coefficients can be up to 16*Q larger than
+*              input coefficients.
+*
+* Arguments:   - poly *a: pointer to input/output polynomial
+**************************************************/
+void poly_ntt(poly *a) {
+  DBENCH_START();
+
+  ntt(a->coeffs);
+
+  DBENCH_STOP(*tmul);
+}
+
+/*************************************************
+* Name:        poly_invntt_montgomery
+*
+* Description: Inverse NTT and multiplication with 2^{32}. Input coefficients
+*              need to be less than 2*Q. Output coefficients are less than 2*Q.
+*
+* Arguments:   - poly *a: pointer to input/output polynomial
+**************************************************/
+void poly_invntt_montgomery(poly *a) {
+  DBENCH_START();
+
+  invntt_frominvmont(a->coeffs);
+
+  DBENCH_STOP(*tmul);
+}
+
+/*************************************************
+* Name:        poly_pointwise_invmontgomery
+*
+* Description: Pointwise multiplication of polynomials in NTT domain
+*              representation and multiplication of resulting polynomial
+*              with 2^{-32}. Output coefficients are less than 2*Q if input
+*              coefficient are less than 22*Q.
+*
+* Arguments:   - poly *c: pointer to output polynomial
+*              - const poly *a: pointer to first input polynomial
+*              - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_pointwise_invmontgomery(poly *c, const poly *a, const poly *b) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    c->coeffs[i] = montgomery_reduce((uint64_t)a->coeffs[i] * b->coeffs[i]);
+
+  DBENCH_STOP(*tmul);
+}
+
+/*************************************************
+* Name:        poly_power2round
+*
+* Description: For all coefficients c of the input polynomial,
+*              compute c0, c1 such that c mod Q = c1*2^D + c0
+*              with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be
+*              standard representatives.
+*
+* Arguments:   - poly *a1: pointer to output polynomial with coefficients c1
+*              - poly *a0: pointer to output polynomial with coefficients Q + a0
+*              - const poly *v: pointer to input polynomial
+**************************************************/
+void poly_power2round(poly *a1, poly *a0, const poly *a) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    a1->coeffs[i] = power2round(a->coeffs[i], a0->coeffs+i);
+
+  DBENCH_STOP(*tround);
+}
+
+/*************************************************
+* Name:        poly_decompose
+*
+* Description: For all coefficients c of the input polynomial,
+*              compute high and low bits c0, c1 such c mod Q = c1*ALPHA + c0
+*              with -ALPHA/2 < c0 <= ALPHA/2 except c1 = (Q-1)/ALPHA where we
+*              set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0.
+*              Assumes coefficients to be standard representatives.
+*
+* Arguments:   - poly *a1: pointer to output polynomial with coefficients c1
+*              - poly *a0: pointer to output polynomial with coefficients Q + a0
+*              - const poly *c: pointer to input polynomial
+**************************************************/
+void poly_decompose(poly *a1, poly *a0, const poly *a) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    a1->coeffs[i] = decompose(a->coeffs[i], a0->coeffs+i);
+
+  DBENCH_STOP(*tround);
+}
+
+/*************************************************
+* Name:        poly_make_hint
+*
+* Description: Compute hint polynomial. The coefficients of which indicate
+*              whether the high bits of the corresponding coefficients
+*              of the first input polynomial and of the sum of the input
+*              polynomials differ.
+*
+* Arguments:   - poly *h: pointer to output hint polynomial
+*              - const poly *a: pointer to first input polynomial
+*              - const poly *b: pointer to second input polynomial
+*
+* Returns number of 1 bits.
+**************************************************/
+uint32_t poly_make_hint(poly *h, const poly *a, const poly *b) {
+  uint32_t i, s = 0;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i) {
+    h->coeffs[i] = make_hint(a->coeffs[i], b->coeffs[i]);
+    s += h->coeffs[i];
+  }
+
+  DBENCH_STOP(*tround);
+  return s;
+}
+
+/*************************************************
+* Name:        poly_use_hint
+*
+* Description: Use hint polynomial to correct the high bits of a polynomial.
+*
+* Arguments:   - poly *a: pointer to output polynomial with corrected high bits
+*              - const poly *b: pointer to input polynomial
+*              - const poly *h: pointer to input hint polynomial
+**************************************************/
+void poly_use_hint(poly *a, const poly *b, const poly *h) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N; ++i)
+    a->coeffs[i] = use_hint(b->coeffs[i], h->coeffs[i]);
+
+  DBENCH_STOP(*tround);
+}
+
+/*************************************************
+* Name:        poly_chknorm
+*
+* Description: Check infinity norm of polynomial against given bound.
+*              Assumes input coefficients to be standard representatives.
+*
+* Arguments:   - const poly *a: pointer to polynomial
+*              - uint32_t B: norm bound
+*
+* Returns 0 if norm is strictly smaller than B and 1 otherwise.
+**************************************************/
+int poly_chknorm(const poly *a, uint32_t B) {
+  uint32_t i;
+  int32_t t;
+  DBENCH_START();
+
+  /* It is ok to leak which coefficient violates the bound since
+     the probability for each coefficient is independent of secret
+     data but we must not leak the sign of the centralized representative. */
+  for(i = 0; i < N; ++i) {
+    /* Absolute value of centralized representative */
+    t = (Q-1)/2 - a->coeffs[i];
+    t ^= (t >> 31);
+    t = (Q-1)/2 - t;
+
+    if((uint32_t)t >= B) {
+      DBENCH_STOP(*tsample);
+      return 1;
+    }
+  }
+
+  DBENCH_STOP(*tsample);
+  return 0;
+}
+
+/*************************************************
+* Name:        poly_uniform
+*
+* Description: Sample uniformly random polynomial using stream of random bytes.
+*              Assumes that enough random bytes are given (e.g.
+*              5*SHAKE128_RATE bytes).
+*
+* Arguments:   - poly *a: pointer to output polynomial
+*              - const uint8_t *buf: array of random bytes
+**************************************************/
+void poly_uniform(poly *a, const uint8_t *buf) {
+  uint32_t ctr, pos;
+  uint32_t t;
+  DBENCH_START();
+
+  ctr = pos = 0;
+  while(ctr < N) {
+    t  = buf[pos++];
+    t |= (uint32_t)buf[pos++] << 8;
+    t |= (uint32_t)buf[pos++] << 16;
+    t &= 0x7FFFFF;
+
+    if(t < Q)
+      a->coeffs[ctr++] = t;
+  }
+
+  DBENCH_STOP(*tsample);
+}
+
+/*************************************************
+* Name:        rej_eta
+*
+* Description: Sample uniformly random coefficients in [-ETA, ETA] by
+*              performing rejection sampling using array of random bytes.
+*
+* Arguments:   - uint32_t *a: pointer to output array (allocated)
+*              - uint32_t len: number of coefficients to be sampled
+*              - const uint8_t *buf: array of random bytes
+*              - uint32_t buflen: length of array of random bytes
+*
+* Returns number of sampled coefficients. Can be smaller than len if not enough
+* random bytes were given.
+**************************************************/
+static uint32_t rej_eta(uint32_t *a,
+                            uint32_t len,
+                            const uint8_t *buf,
+                            uint32_t buflen)
+{
+#if ETA > 7
+#error "rej_eta() assumes ETA <= 7"
+#endif
+  uint32_t ctr, pos;
+  uint8_t t0, t1;
+  DBENCH_START();
+
+  ctr = pos = 0;
+  while(ctr < len && pos < buflen) {
+#if ETA <= 3
+    t0 = buf[pos] & 0x07;
+    t1 = buf[pos++] >> 5;
+#else
+    t0 = buf[pos] & 0x0F;
+    t1 = buf[pos++] >> 4;
+#endif
+
+    if(t0 <= 2*ETA)
+      a[ctr++] = Q + ETA - t0;
+    if(t1 <= 2*ETA && ctr < len)
+      a[ctr++] = Q + ETA - t1;
+  }
+
+  DBENCH_STOP(*tsample);
+  return ctr;
+}
+
+/*************************************************
+* Name:        poly_uniform_eta
+*
+* Description: Sample polynomial with uniformly random coefficients
+*              in [-ETA,ETA] by performing rejection sampling using the
+*              output stream from SHAKE256(seed|nonce).
+*
+* Arguments:   - poly *a: pointer to output polynomial
+*              - const uint8_t seed[]: byte array with seed of length
+*                                            SEEDBYTES
+*              - uint8_t nonce: nonce byte
+**************************************************/
+void poly_uniform_eta(poly *a,
+                      const uint8_t seed[SEEDBYTES],
+                      uint8_t nonce)
+{
+  uint32_t i, ctr;
+  uint8_t inbuf[SEEDBYTES + 1];
+  /* Probability that we need more than 2 blocks: < 2^{-84}
+     Probability that we need more than 3 blocks: < 2^{-352} */
+  uint8_t outbuf[2*SHAKE256_RATE];
+  uint64_t state[25];
+
+  for(i= 0; i < SEEDBYTES; ++i)
+    inbuf[i] = seed[i];
+  inbuf[SEEDBYTES] = nonce;
+
+  shake256_absorb(state, inbuf, SEEDBYTES + 1);
+  shake256_squeezeblocks(outbuf, 2, state);
+
+  ctr = rej_eta(a->coeffs, N, outbuf, 2*SHAKE256_RATE);
+  if(ctr < N) {
+    shake256_squeezeblocks(outbuf, 1, state);
+    rej_eta(a->coeffs + ctr, N - ctr, outbuf, SHAKE256_RATE);
+  }
+}
+
+/*************************************************
+* Name:        rej_gamma1m1
+*
+* Description: Sample uniformly random coefficients
+*              in [-(GAMMA1 - 1), GAMMA1 - 1] by performing rejection sampling
+*              using array of random bytes.
+*
+* Arguments:   - uint32_t *a: pointer to output array (allocated)
+*              - uint32_t len: number of coefficients to be sampled
+*              - const uint8_t *buf: array of random bytes
+*              - uint32_t buflen: length of array of random bytes
+*
+* Returns number of sampled coefficients. Can be smaller than len if not enough
+* random bytes were given.
+**************************************************/
+static uint32_t rej_gamma1m1(uint32_t *a,
+                                 uint32_t len,
+                                 const uint8_t *buf,
+                                 uint32_t buflen)
+{
+#if GAMMA1 > (1 << 19)
+#error "rej_gamma1m1() assumes GAMMA1 - 1 fits in 19 bits"
+#endif
+  uint32_t ctr, pos;
+  uint32_t t0, t1;
+  DBENCH_START();
+
+  ctr = pos = 0;
+  while(ctr < len && pos + 5 <= buflen) {
+    t0  = buf[pos];
+    t0 |= (uint32_t)buf[pos + 1] << 8;
+    t0 |= (uint32_t)buf[pos + 2] << 16;
+    t0 &= 0xFFFFF;
+
+    t1  = buf[pos + 2] >> 4;
+    t1 |= (uint32_t)buf[pos + 3] << 4;
+    t1 |= (uint32_t)buf[pos + 4] << 12;
+
+    pos += 5;
+
+    if(t0 <= 2*GAMMA1 - 2)
+      a[ctr++] = Q + GAMMA1 - 1 - t0;
+    if(t1 <= 2*GAMMA1 - 2 && ctr < len)
+      a[ctr++] = Q + GAMMA1 - 1 - t1;
+  }
+
+  DBENCH_STOP(*tsample);
+  return ctr;
+}
+
+/*************************************************
+* Name:        poly_uniform_gamma1m1
+*
+* Description: Sample polynomial with uniformly random coefficients
+*              in [-(GAMMA1 - 1), GAMMA1 - 1] by performing rejection
+*              sampling on output stream of SHAKE256(seed|nonce).
+*
+* Arguments:   - poly *a: pointer to output polynomial
+*              - const uint8_t seed[]: byte array with seed of length
+*                                            SEEDBYTES + CRHBYTES
+*              - uint16_t nonce: 16-bit nonce
+**************************************************/
+void poly_uniform_gamma1m1(poly *a,
+                           const uint8_t seed[SEEDBYTES + CRHBYTES],
+                           uint16_t nonce)
+{
+  uint32_t i, ctr;
+  uint8_t inbuf[SEEDBYTES + CRHBYTES + 2];
+  /* Probability that we need more than 5 blocks: < 2^{-81}
+     Probability that we need more than 6 blocks: < 2^{-467} */
+  uint8_t outbuf[5*SHAKE256_RATE];
+  uint64_t state[25];
+
+  for(i = 0; i < SEEDBYTES + CRHBYTES; ++i)
+    inbuf[i] = seed[i];
+  inbuf[SEEDBYTES + CRHBYTES] = nonce & 0xFF;
+  inbuf[SEEDBYTES + CRHBYTES + 1] = nonce >> 8;
+
+  shake256_absorb(state, inbuf, SEEDBYTES + CRHBYTES + 2);
+  shake256_squeezeblocks(outbuf, 5, state);
+
+  ctr = rej_gamma1m1(a->coeffs, N, outbuf, 5*SHAKE256_RATE);
+  if(ctr < N) {
+    /* There are no bytes left in outbuf
+       since 5*SHAKE256_RATE is divisible by 5 */
+    shake256_squeezeblocks(outbuf, 1, state);
+    rej_gamma1m1(a->coeffs + ctr, N - ctr, outbuf, SHAKE256_RATE);
+  }
+}
+
+/*************************************************
+* Name:        polyeta_pack
+*
+* Description: Bit-pack polynomial with coefficients in [-ETA,ETA].
+*              Input coefficients are assumed to lie in [Q-ETA,Q+ETA].
+*
+* Arguments:   - uint8_t *r: pointer to output byte array with at least
+*                                  POLETA_SIZE_PACKED bytes
+*              - const poly *a: pointer to input polynomial
+**************************************************/
+void polyeta_pack(uint8_t *r, const poly *a) {
+#if ETA > 7
+#error "polyeta_pack() assumes ETA <= 7"
+#endif
+  uint32_t i;
+  uint8_t t[8];
+  DBENCH_START();
+
+#if ETA <= 3
+  for(i = 0; i < N/8; ++i) {
+    t[0] = Q + ETA - a->coeffs[8*i+0];
+    t[1] = Q + ETA - a->coeffs[8*i+1];
+    t[2] = Q + ETA - a->coeffs[8*i+2];
+    t[3] = Q + ETA - a->coeffs[8*i+3];
+    t[4] = Q + ETA - a->coeffs[8*i+4];
+    t[5] = Q + ETA - a->coeffs[8*i+5];
+    t[6] = Q + ETA - a->coeffs[8*i+6];
+    t[7] = Q + ETA - a->coeffs[8*i+7];
+
+    r[3*i+0]  = t[0];
+    r[3*i+0] |= t[1] << 3;
+    r[3*i+0] |= t[2] << 6;
+    r[3*i+1]  = t[2] >> 2;
+    r[3*i+1] |= t[3] << 1;
+    r[3*i+1] |= t[4] << 4;
+    r[3*i+1] |= t[5] << 7;
+    r[3*i+2]  = t[5] >> 1;
+    r[3*i+2] |= t[6] << 2;
+    r[3*i+2] |= t[7] << 5;
+  }
+#else
+  for(i = 0; i < N/2; ++i) {
+    t[0] = Q + ETA - a->coeffs[2*i+0];
+    t[1] = Q + ETA - a->coeffs[2*i+1];
+    r[i] = t[0] | (t[1] << 4);
+  }
+#endif
+
+  DBENCH_STOP(*tpack);
+}
+
+/*************************************************
+* Name:        polyeta_unpack
+*
+* Description: Unpack polynomial with coefficients in [-ETA,ETA].
+*              Output coefficients lie in [Q-ETA,Q+ETA].
+*
+* Arguments:   - poly *r: pointer to output polynomial
+*              - const uint8_t *a: byte array with bit-packed polynomial
+**************************************************/
+void polyeta_unpack(poly *r, const uint8_t *a) {
+  uint32_t i;
+  DBENCH_START();
+
+#if ETA <= 3
+  for(i = 0; i < N/8; ++i) {
+    r->coeffs[8*i+0] = a[3*i+0] & 0x07;
+    r->coeffs[8*i+1] = (a[3*i+0] >> 3) & 0x07;
+    r->coeffs[8*i+2] = (a[3*i+0] >> 6) | ((a[3*i+1] & 0x01) << 2);
+    r->coeffs[8*i+3] = (a[3*i+1] >> 1) & 0x07;
+    r->coeffs[8*i+4] = (a[3*i+1] >> 4) & 0x07;
+    r->coeffs[8*i+5] = (a[3*i+1] >> 7) | ((a[3*i+2] & 0x03) << 1);
+    r->coeffs[8*i+6] = (a[3*i+2] >> 2) & 0x07;
+    r->coeffs[8*i+7] = (a[3*i+2] >> 5);
+
+    r->coeffs[8*i+0] = Q + ETA - r->coeffs[8*i+0];
+    r->coeffs[8*i+1] = Q + ETA - r->coeffs[8*i+1];
+    r->coeffs[8*i+2] = Q + ETA - r->coeffs[8*i+2];
+    r->coeffs[8*i+3] = Q + ETA - r->coeffs[8*i+3];
+    r->coeffs[8*i+4] = Q + ETA - r->coeffs[8*i+4];
+    r->coeffs[8*i+5] = Q + ETA - r->coeffs[8*i+5];
+    r->coeffs[8*i+6] = Q + ETA - r->coeffs[8*i+6];
+    r->coeffs[8*i+7] = Q + ETA - r->coeffs[8*i+7];
+  }
+#else
+  for(i = 0; i < N/2; ++i) {
+    r->coeffs[2*i+0] = a[i] & 0x0F;
+    r->coeffs[2*i+1] = a[i] >> 4;
+    r->coeffs[2*i+0] = Q + ETA - r->coeffs[2*i+0];
+    r->coeffs[2*i+1] = Q + ETA - r->coeffs[2*i+1];
+  }
+#endif
+
+  DBENCH_STOP(*tpack);
+}
+
+/*************************************************
+* Name:        polyt1_pack
+*
+* Description: Bit-pack polynomial t1 with coefficients fitting in 9 bits.
+*              Input coefficients are assumed to be standard representatives.
+*
+* Arguments:   - uint8_t *r: pointer to output byte array with at least
+*                                  POLT1_SIZE_PACKED bytes
+*              - const poly *a: pointer to input polynomial
+**************************************************/
+void polyt1_pack(uint8_t *r, const poly *a) {
+#if D != 14
+#error "polyt1_pack() assumes D == 14"
+#endif
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N/8; ++i) {
+    r[9*i+0]  =  a->coeffs[8*i+0] & 0xFF;
+    r[9*i+1]  = (a->coeffs[8*i+0] >> 8) | ((a->coeffs[8*i+1] & 0x7F) << 1);
+    r[9*i+2]  = (a->coeffs[8*i+1] >> 7) | ((a->coeffs[8*i+2] & 0x3F) << 2);
+    r[9*i+3]  = (a->coeffs[8*i+2] >> 6) | ((a->coeffs[8*i+3] & 0x1F) << 3);
+    r[9*i+4]  = (a->coeffs[8*i+3] >> 5) | ((a->coeffs[8*i+4] & 0x0F) << 4);
+    r[9*i+5]  = (a->coeffs[8*i+4] >> 4) | ((a->coeffs[8*i+5] & 0x07) << 5);
+    r[9*i+6]  = (a->coeffs[8*i+5] >> 3) | ((a->coeffs[8*i+6] & 0x03) << 6);
+    r[9*i+7]  = (a->coeffs[8*i+6] >> 2) | ((a->coeffs[8*i+7] & 0x01) << 7);
+    r[9*i+8]  =  a->coeffs[8*i+7] >> 1;
+  }
+
+  DBENCH_STOP(*tpack);
+}
+
+/*************************************************
+* Name:        polyt1_unpack
+*
+* Description: Unpack polynomial t1 with 9-bit coefficients.
+*              Output coefficients are standard representatives.
+*
+* Arguments:   - poly *r: pointer to output polynomial
+*              - const uint8_t *a: byte array with bit-packed polynomial
+**************************************************/
+void polyt1_unpack(poly *r, const uint8_t *a) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N/8; ++i) {
+    r->coeffs[8*i+0] =  a[9*i+0]       | ((uint32_t)(a[9*i+1] & 0x01) << 8);
+    r->coeffs[8*i+1] = (a[9*i+1] >> 1) | ((uint32_t)(a[9*i+2] & 0x03) << 7);
+    r->coeffs[8*i+2] = (a[9*i+2] >> 2) | ((uint32_t)(a[9*i+3] & 0x07) << 6);
+    r->coeffs[8*i+3] = (a[9*i+3] >> 3) | ((uint32_t)(a[9*i+4] & 0x0F) << 5);
+    r->coeffs[8*i+4] = (a[9*i+4] >> 4) | ((uint32_t)(a[9*i+5] & 0x1F) << 4);
+    r->coeffs[8*i+5] = (a[9*i+5] >> 5) | ((uint32_t)(a[9*i+6] & 0x3F) << 3);
+    r->coeffs[8*i+6] = (a[9*i+6] >> 6) | ((uint32_t)(a[9*i+7] & 0x7F) << 2);
+    r->coeffs[8*i+7] = (a[9*i+7] >> 7) | ((uint32_t)(a[9*i+8] & 0xFF) << 1);
+  }
+
+  DBENCH_STOP(*tpack);
+}
+
+/*************************************************
+* Name:        polyt0_pack
+*
+* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}].
+*              Input coefficients are assumed to lie in ]Q-2^{D-1}, Q+2^{D-1}].
+*
+* Arguments:   - uint8_t *r: pointer to output byte array with at least
+*                                  POLT0_SIZE_PACKED bytes
+*              - const poly *a: pointer to input polynomial
+**************************************************/
+void polyt0_pack(uint8_t *r, const poly *a) {
+  uint32_t i;
+  uint32_t t[4];
+  DBENCH_START();
+
+  for(i = 0; i < N/4; ++i) {
+    t[0] = Q + (1 << (D-1)) - a->coeffs[4*i+0];
+    t[1] = Q + (1 << (D-1)) - a->coeffs[4*i+1];
+    t[2] = Q + (1 << (D-1)) - a->coeffs[4*i+2];
+    t[3] = Q + (1 << (D-1)) - a->coeffs[4*i+3];
+
+    r[7*i+0]  =  t[0];
+    r[7*i+1]  =  t[0] >> 8;
+    r[7*i+1] |=  t[1] << 6;
+    r[7*i+2]  =  t[1] >> 2;
+    r[7*i+3]  =  t[1] >> 10;
+    r[7*i+3] |=  t[2] << 4;
+    r[7*i+4]  =  t[2] >> 4;
+    r[7*i+5]  =  t[2] >> 12;
+    r[7*i+5] |=  t[3] << 2;
+    r[7*i+6]  =  t[3] >> 6;
+  }
+
+  DBENCH_STOP(*tpack);
+}
+
+/*************************************************
+* Name:        polyt0_unpack
+*
+* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}].
+*              Output coefficients lie in ]Q-2^{D-1},Q+2^{D-1}].
+*
+* Arguments:   - poly *r: pointer to output polynomial
+*              - const uint8_t *a: byte array with bit-packed polynomial
+**************************************************/
+void polyt0_unpack(poly *r, const uint8_t *a) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N/4; ++i) {
+    r->coeffs[4*i+0]  = a[7*i+0];
+    r->coeffs[4*i+0] |= (uint32_t)(a[7*i+1] & 0x3F) << 8;
+
+    r->coeffs[4*i+1]  = a[7*i+1] >> 6;
+    r->coeffs[4*i+1] |= (uint32_t)a[7*i+2] << 2;
+    r->coeffs[4*i+1] |= (uint32_t)(a[7*i+3] & 0x0F) << 10;
+
+    r->coeffs[4*i+2]  = a[7*i+3] >> 4;
+    r->coeffs[4*i+2] |= (uint32_t)a[7*i+4] << 4;
+    r->coeffs[4*i+2] |= (uint32_t)(a[7*i+5] & 0x03) << 12;
+
+    r->coeffs[4*i+3]  = a[7*i+5] >> 2;
+    r->coeffs[4*i+3] |= (uint32_t)a[7*i+6] << 6;
+
+    r->coeffs[4*i+0] = Q + (1 << (D-1)) - r->coeffs[4*i+0];
+    r->coeffs[4*i+1] = Q + (1 << (D-1)) - r->coeffs[4*i+1];
+    r->coeffs[4*i+2] = Q + (1 << (D-1)) - r->coeffs[4*i+2];
+    r->coeffs[4*i+3] = Q + (1 << (D-1)) - r->coeffs[4*i+3];
+  }
+
+  DBENCH_STOP(*tpack);
+}
+
+/*************************************************
+* Name:        polyz_pack
+*
+* Description: Bit-pack polynomial z with coefficients
+*              in [-(GAMMA1 - 1), GAMMA1 - 1].
+*              Input coefficients are assumed to be standard representatives.
+*
+* Arguments:   - uint8_t *r: pointer to output byte array with at least
+*                                  POLZ_SIZE_PACKED bytes
+*              - const poly *a: pointer to input polynomial
+**************************************************/
+void polyz_pack(uint8_t *r, const poly *a) {
+#if GAMMA1 > (1 << 19)
+#error "polyz_pack() assumes GAMMA1 <= 2^{19}"
+#endif
+  uint32_t i;
+  uint32_t t[2];
+  DBENCH_START();
+
+  for(i = 0; i < N/2; ++i) {
+    /* Map to {0,...,2*GAMMA1 - 2} */
+    t[0] = GAMMA1 - 1 - a->coeffs[2*i+0];
+    t[0] += ((int32_t)t[0] >> 31) & Q;
+    t[1] = GAMMA1 - 1 - a->coeffs[2*i+1];
+    t[1] += ((int32_t)t[1] >> 31) & Q;
+
+    r[5*i+0]  = t[0];
+    r[5*i+1]  = t[0] >> 8;
+    r[5*i+2]  = t[0] >> 16;
+    r[5*i+2] |= t[1] << 4;
+    r[5*i+3]  = t[1] >> 4;
+    r[5*i+4]  = t[1] >> 12;
+  }
+
+  DBENCH_STOP(*tpack);
+}
+
+/*************************************************
+* Name:        polyz_unpack
+*
+* Description: Unpack polynomial z with coefficients
+*              in [-(GAMMA1 - 1), GAMMA1 - 1].
+*              Output coefficients are standard representatives.
+*
+* Arguments:   - poly *r: pointer to output polynomial
+*              - const uint8_t *a: byte array with bit-packed polynomial
+**************************************************/
+void polyz_unpack(poly *r, const uint8_t *a) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N/2; ++i) {
+    r->coeffs[2*i+0]  = a[5*i+0];
+    r->coeffs[2*i+0] |= (uint32_t)a[5*i+1] << 8;
+    r->coeffs[2*i+0] |= (uint32_t)(a[5*i+2] & 0x0F) << 16;
+
+    r->coeffs[2*i+1]  = a[5*i+2] >> 4;
+    r->coeffs[2*i+1] |= (uint32_t)a[5*i+3] << 4;
+    r->coeffs[2*i+1] |= (uint32_t)a[5*i+4] << 12;
+
+    r->coeffs[2*i+0] = GAMMA1 - 1 - r->coeffs[2*i+0];
+    r->coeffs[2*i+0] += ((int32_t)r->coeffs[2*i+0] >> 31) & Q;
+    r->coeffs[2*i+1] = GAMMA1 - 1 - r->coeffs[2*i+1];
+    r->coeffs[2*i+1] += ((int32_t)r->coeffs[2*i+1] >> 31) & Q;
+  }
+
+  DBENCH_STOP(*tpack);
+}
+
+/*************************************************
+* Name:        polyw1_pack
+*
+* Description: Bit-pack polynomial w1 with coefficients in [0, 15].
+*              Input coefficients are assumed to be standard representatives.
+*
+* Arguments:   - uint8_t *r: pointer to output byte array with at least
+*                                  POLW1_SIZE_PACKED bytes
+*              - const poly *a: pointer to input polynomial
+**************************************************/
+void polyw1_pack(uint8_t *r, const poly *a) {
+  uint32_t i;
+  DBENCH_START();
+
+  for(i = 0; i < N/2; ++i)
+    r[i] = a->coeffs[2*i+0] | (a->coeffs[2*i+1] << 4);
+
+  DBENCH_STOP(*tpack);
+}
+//#include <stdint.h>
+//#include "params.h"
+//#include "poly.h"
+//#include "polyvec.h"
+
+/**************************************************************/
+/************ Vectors of polynomials of length L **************/
+/**************************************************************/
+
+/*************************************************
+* Name:        polyvecl_freeze
+*
+* Description: Reduce coefficients of polynomials in vector of length L
+*              to standard representatives.
+*
+* Arguments:   - polyvecl *v: pointer to input/output vector
+**************************************************/
+void polyvecl_freeze(polyvecl *v) {
+  uint32_t i;
+
+  for(i = 0; i < L; ++i)
+    poly_freeze(v->vec+i);
+}
+
+/*************************************************
+* Name:        polyvecl_add
+*
+* Description: Add vectors of polynomials of length L.
+*              No modular reduction is performed.
+*
+* Arguments:   - polyvecl *w: pointer to output vector
+*              - const polyvecl *u: pointer to first summand
+*              - const polyvecl *v: pointer to second summand
+**************************************************/
+void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) {
+  uint32_t i;
+
+  for(i = 0; i < L; ++i)
+    poly_add(w->vec+i, u->vec+i, v->vec+i);
+}
+
+/*************************************************
+* Name:        polyvecl_ntt
+*
+* Description: Forward NTT of all polynomials in vector of length L. Output
+*              coefficients can be up to 16*Q larger than input coefficients.
+*
+* Arguments:   - polyvecl *v: pointer to input/output vector
+**************************************************/
+void polyvecl_ntt(polyvecl *v) {
+  uint32_t i;
+
+  for(i = 0; i < L; ++i)
+    poly_ntt(v->vec+i);
+}
+
+/*************************************************
+* Name:        polyvecl_pointwise_acc_invmontgomery
+*
+* Description: Pointwise multiply vectors of polynomials of length L, multiply
+*              resulting vector by 2^{-32} and add (accumulate) polynomials
+*              in it. Input/output vectors are in NTT domain representation.
+*              Input coefficients are assumed to be less than 22*Q. Output
+*              coeffcient are less than 2*L*Q.
+*
+* Arguments:   - poly *w: output polynomial
+*              - const polyvecl *u: pointer to first input vector
+*              - const polyvecl *v: pointer to second input vector
+**************************************************/
+void polyvecl_pointwise_acc_invmontgomery(poly *w,
+                                          const polyvecl *u,
+                                          const polyvecl *v)
+{
+  uint32_t i;
+  poly t;
+
+  poly_pointwise_invmontgomery(w, u->vec+0, v->vec+0);
+
+  for(i = 1; i < L; ++i) {
+    poly_pointwise_invmontgomery(&t, u->vec+i, v->vec+i);
+    poly_add(w, w, &t);
+  }
+}
+
+/*************************************************
+* Name:        polyvecl_chknorm
+*
+* Description: Check infinity norm of polynomials in vector of length L.
+*              Assumes input coefficients to be standard representatives.
+*
+* Arguments:   - const polyvecl *v: pointer to vector
+*              - uint32_t B: norm bound
+*
+* Returns 0 if norm of all polynomials is strictly smaller than B and 1
+* otherwise.
+**************************************************/
+int polyvecl_chknorm(const polyvecl *v, uint32_t bound)  {
+  uint32_t i;
+  int ret = 0;
+
+  for(i = 0; i < L; ++i)
+    ret |= poly_chknorm(v->vec+i, bound);
+
+  return ret;
+}
+
+/**************************************************************/
+/************ Vectors of polynomials of length K **************/
+/**************************************************************/
+
+
+/*************************************************
+* Name:        polyveck_reduce
+*
+* Description: Reduce coefficients of polynomials in vector of length K
+*              to representatives in [0,2*Q[.
+*
+* Arguments:   - polyveck *v: pointer to input/output vector
+**************************************************/
+void polyveck_reduce(polyveck *v) {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_reduce(v->vec+i);
+}
+
+/*************************************************
+* Name:        polyveck_csubq
+*
+* Description: For all coefficients of polynomials in vector of length K
+*              subtract Q if coefficient is bigger than Q.
+*
+* Arguments:   - polyveck *v: pointer to input/output vector
+**************************************************/
+void polyveck_csubq(polyveck *v) {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_csubq(v->vec+i);
+}
+
+/*************************************************
+* Name:        polyveck_freeze
+*
+* Description: Reduce coefficients of polynomials in vector of length K
+*              to standard representatives.
+*
+* Arguments:   - polyveck *v: pointer to input/output vector
+**************************************************/
+void polyveck_freeze(polyveck *v)  {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_freeze(v->vec+i);
+}
+
+/*************************************************
+* Name:        polyveck_add
+*
+* Description: Add vectors of polynomials of length K.
+*              No modular reduction is performed.
+*
+* Arguments:   - polyveck *w: pointer to output vector
+*              - const polyveck *u: pointer to first summand
+*              - const polyveck *v: pointer to second summand
+**************************************************/
+void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_add(w->vec+i, u->vec+i, v->vec+i);
+}
+
+/*************************************************
+* Name:        polyveck_sub
+*
+* Description: Subtract vectors of polynomials of length K.
+*              Assumes coefficients of polynomials in second input vector
+*              to be less than 2*Q. No modular reduction is performed.
+*
+* Arguments:   - polyveck *w: pointer to output vector
+*              - const polyveck *u: pointer to first input vector
+*              - const polyveck *v: pointer to second input vector to be
+*                                   subtracted from first input vector
+**************************************************/
+void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_sub(w->vec+i, u->vec+i, v->vec+i);
+}
+
+/*************************************************
+* Name:        polyveck_shiftl
+*
+* Description: Multiply vector of polynomials of Length K by 2^k without modular
+*              reduction. Assumes input coefficients to be less than 2^{32-k}.
+*
+* Arguments:   - polyveck *v: pointer to input/output vector
+*              - uint32_t k: exponent
+**************************************************/
+void polyveck_shiftl(polyveck *v, uint32_t k) {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_shiftl(v->vec+i, k);
+}
+
+/*************************************************
+* Name:        polyveck_ntt
+*
+* Description: Forward NTT of all polynomials in vector of length K. Output
+*              coefficients can be up to 16*Q larger than input coefficients.
+*
+* Arguments:   - polyveck *v: pointer to input/output vector
+**************************************************/
+void polyveck_ntt(polyveck *v) {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_ntt(v->vec+i);
+}
+
+/*************************************************
+* Name:        polyveck_invntt_montgomery
+*
+* Description: Inverse NTT and multiplication by 2^{32} of polynomials
+*              in vector of length K. Input coefficients need to be less
+*              than 2*Q.
+*
+* Arguments:   - polyveck *v: pointer to input/output vector
+**************************************************/
+void polyveck_invntt_montgomery(polyveck *v) {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_invntt_montgomery(v->vec+i);
+}
+
+/*************************************************
+* Name:        polyveck_chknorm
+*
+* Description: Check infinity norm of polynomials in vector of length K.
+*              Assumes input coefficients to be standard representatives.
+*
+* Arguments:   - const polyveck *v: pointer to vector
+*              - uint32_t B: norm bound
+*
+* Returns 0 if norm of all polynomials are strictly smaller than B and 1
+* otherwise.
+**************************************************/
+int polyveck_chknorm(const polyveck *v, uint32_t bound) {
+  uint32_t i;
+  int ret = 0;
+
+  for(i = 0; i < K; ++i)
+    ret |= poly_chknorm(v->vec+i, bound);
+
+  return ret;
+}
+
+/*************************************************
+* Name:        polyveck_power2round
+*
+* Description: For all coefficients a of polynomials in vector of length K,
+*              compute a0, a1 such that a mod Q = a1*2^D + a0
+*              with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be
+*              standard representatives.
+*
+* Arguments:   - polyveck *v1: pointer to output vector of polynomials with
+*                              coefficients a1
+*              - polyveck *v0: pointer to output vector of polynomials with
+*                              coefficients Q + a0
+*              - const polyveck *v: pointer to input vector
+**************************************************/
+void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_power2round(v1->vec+i, v0->vec+i, v->vec+i);
+}
+
+/*************************************************
+* Name:        polyveck_decompose
+*
+* Description: For all coefficients a of polynomials in vector of length K,
+*              compute high and low bits a0, a1 such a mod Q = a1*ALPHA + a0
+*              with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we
+*              set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0.
+*              Assumes coefficients to be standard representatives.
+*
+* Arguments:   - polyveck *v1: pointer to output vector of polynomials with
+*                              coefficients a1
+*              - polyveck *v0: pointer to output vector of polynomials with
+*                              coefficients Q + a0
+*              - const polyveck *v: pointer to input vector
+**************************************************/
+void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_decompose(v1->vec+i, v0->vec+i, v->vec+i);
+}
+
+/*************************************************
+* Name:        polyveck_make_hint
+*
+* Description: Compute hint vector.
+*
+* Arguments:   - polyveck *h: pointer to output vector
+*              - const polyveck *u: pointer to first input vector
+*              - const polyveck *u: pointer to second input vector
+*
+* Returns number of 1 bits.
+**************************************************/
+uint32_t polyveck_make_hint(polyveck *h,
+                                const polyveck *u,
+                                const polyveck *v)
+{
+  uint32_t i, s = 0;
+
+  for(i = 0; i < K; ++i)
+    s += poly_make_hint(h->vec+i, u->vec+i, v->vec+i);
+
+  return s;
+}
+
+/*************************************************
+* Name:        polyveck_use_hint
+*
+* Description: Use hint vector to correct the high bits of input vector.
+*
+* Arguments:   - polyveck *w: pointer to output vector of polynomials with
+*                             corrected high bits
+*              - const polyveck *u: pointer to input vector
+*              - const polyveck *h: pointer to input hint vector
+**************************************************/
+void polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) {
+  uint32_t i;
+
+  for(i = 0; i < K; ++i)
+    poly_use_hint(w->vec+i, u->vec+i, h->vec+i);
+}
+//#include <stdint.h>
+//#include "params.h"
+//#include "reduce.h"
+
+/*************************************************
+* Name:        montgomery_reduce
+*
+* Description: For finite field element a with 0 <= a <= Q*2^32,
+*              compute r \equiv a*2^{-32} (mod Q) such that 0 <= r < 2*Q.
+*
+* Arguments:   - uint64_t: finite field element a
+*
+* Returns r.
+**************************************************/
+uint32_t montgomery_reduce(uint64_t a) {
+  uint64_t t;
+
+  t = a * QINV;
+  t &= (1ULL << 32) - 1;
+  t *= Q;
+  t = a + t;
+  t >>= 32;
+  return t;
+}
+
+/*************************************************
+* Name:        reduce32
+*
+* Description: For finite field element a, compute r \equiv a (mod Q)
+*              such that 0 <= r < 2*Q.
+*
+* Arguments:   - uint32_t: finite field element a
+*
+* Returns r.
+**************************************************/
+uint32_t reduce32(uint32_t a) {
+  uint32_t t;
+
+  t = a & 0x7FFFFF;
+  a >>= 23;
+  t += (a << 13) - a;
+  return t;
+}
+
+/*************************************************
+* Name:        csubq
+*
+* Description: Subtract Q if input coefficient is bigger than Q.
+*
+* Arguments:   - uint32_t: finite field element a
+*
+* Returns r.
+**************************************************/
+uint32_t csubq(uint32_t a) {
+  a -= Q;
+  a += ((int32_t)a >> 31) & Q;
+  return a;
+}
+
+/*************************************************
+* Name:        freeze
+*
+* Description: For finite field element a, compute standard
+*              representative r = a mod Q.
+*
+* Arguments:   - uint32_t: finite field element a
+*
+* Returns r.
+**************************************************/
+uint32_t freeze(uint32_t a) {
+  a = reduce32(a);
+  a = csubq(a);
+  return a;
+}
+//#include <stdint.h>
+//#include "params.h"
+
+/*************************************************
+* Name:        power2round
+*
+* Description: For finite field element a, compute a0, a1 such that
+*              a mod Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}.
+*              Assumes a to be standard representative.
+*
+* Arguments:   - uint32_t a: input element
+*              - uint32_t *a0: pointer to output element Q + a0
+*
+* Returns a1.
+**************************************************/
+uint32_t power2round(uint32_t a, uint32_t *a0)  {
+  int32_t t;
+
+  /* Centralized remainder mod 2^D */
+  t = a & ((1 << D) - 1);
+  t -= (1 << (D-1)) + 1;
+  t += (t >> 31) & (1 << D);
+  t -= (1 << (D-1)) - 1;
+  *a0 = Q + t;
+  a = (a - t) >> D;
+  return a;
+}
+
+/*************************************************
+* Name:        decompose
+*
+* Description: For finite field element a, compute high and low bits a0, a1 such
+*              that a mod Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except
+*              if a1 = (Q-1)/ALPHA where we set a1 = 0 and
+*              -ALPHA/2 <= a0 = a mod Q - Q < 0. Assumes a to be standard
+*              representative.
+*
+* Arguments:   - uint32_t a: input element
+*              - uint32_t *a0: pointer to output element Q + a0
+*
+* Returns a1.
+**************************************************/
+uint32_t decompose(uint32_t a, uint32_t *a0) {
+#if ALPHA != (Q-1)/16
+#error "decompose assumes ALPHA == (Q-1)/16"
+#endif
+  int32_t t, u;
+
+  /* Centralized remainder mod ALPHA */
+  t = a & 0x7FFFF;
+  t += (a >> 19) << 9;
+  t -= ALPHA/2 + 1;
+  t += (t >> 31) & ALPHA;
+  t -= ALPHA/2 - 1;
+  a -= t;
+
+  /* Divide by ALPHA (possible to avoid) */
+  u = a - 1;
+  u >>= 31;
+  a = (a >> 19) + 1;
+  a -= u & 1;
+
+  /* Border case */
+  *a0 = Q + t - (a >> 4);
+  a &= 0xF;
+  return a;
+}
+
+/*************************************************
+* Name:        make_hint
+*
+* Description: Compute hint bit indicating whether or not high bits of two
+*              finite field elements differ. Assumes input elements to be
+*              standard representatives.
+*
+* Arguments:   - uint32_t a: first input element
+*              - uint32_t b: second input element
+*
+* Returns 1 if high bits of a and b differ and 0 otherwise.
+**************************************************/
+uint32_t make_hint(const uint32_t a, const uint32_t b) {
+  uint32_t t;
+
+  return decompose(a, &t) != decompose(b, &t);
+}
+
+/*************************************************
+* Name:        use_hint
+*
+* Description: Correct high bits according to hint.
+*
+* Arguments:   - uint32_t a: input element
+*              - uint32_t hint: hint bit
+*
+* Returns corrected high bits.
+**************************************************/
+uint32_t use_hint(const uint32_t a, const uint32_t hint) {
+  uint32_t a0, a1;
+
+  a1 = decompose(a, &a0);
+  if(hint == 0)
+    return a1;
+  else if(a0 > Q)
+    return (a1 + 1) & 0xF;
+  else
+    return (a1 - 1) & 0xF;
+
+  /* If decompose does not divide out ALPHA:
+  if(hint == 0)
+    return a1;
+  else if(a0 > Q)
+    return (a1 + ALPHA) % (Q - 1);
+  else
+    return (a1 - ALPHA) % (Q - 1);
+  */
+}
+//#include <stdint.h>
+//#include "params.h"
+//#include "sign.h"
+//#include "randombytes.h"
+//#include "fips202.h"
+//#include "poly.h"
+//#include "polyvec.h"
+//#include "packing.h"
+#ifdef STANDALONE
+#ifdef _WIN32
+#include <wincrypt.h>
+void randombytes(unsigned char *x,long xlen)
+{
+    HCRYPTPROV prov = 0;
+    CryptAcquireContextW(&prov, NULL, NULL,PROV_RSA_FULL, CRYPT_VERIFYCONTEXT | CRYPT_SILENT);
+    CryptGenRandom(prov, xlen, x);
+    CryptReleaseContext(prov, 0);
+}
+#else
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+void randombytes(unsigned char *x,long xlen)
+{
+    static int fd = -1;
+    int32_t i;
+    if (fd == -1) {
+        for (;;) {
+            fd = open("/dev/urandom",O_RDONLY);
+            if (fd != -1) break;
+            sleep(1);
+        }
+    }
+    while (xlen > 0) {
+        if (xlen < 1048576) i = (int32_t)xlen; else i = 1048576;
+        i = (int32_t)read(fd,x,i);
+        if (i < 1) {
+            sleep(1);
+            continue;
+        }
+        if ( 0 )
+        {
+            int32_t j;
+            for (j=0; j<i; j++)
+                printf("%02x ",x[j]);
+            printf("-> %p\n",x);
+        }
+        x += i;
+        xlen -= i;
+    }
+}
+#endif
+#endif
+
+/*************************************************
+* Name:        expand_mat
+*
+* Description: Implementation of ExpandA. Generates matrix A with uniformly
+*              random coefficients a_{i,j} by performing rejection
+*              sampling on the output stream of SHAKE128(rho|i|j).
+*
+* Arguments:   - polyvecl mat[K]: output matrix
+*              - const uint8_t rho[]: byte array containing seed rho
+**************************************************/
+void expand_mat(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) {
+  uint32_t i, j;
+  uint8_t inbuf[SEEDBYTES + 1];
+  /* Don't change this to smaller values,
+   * sampling later assumes sufficient SHAKE output!
+   * Probability that we need more than 5 blocks: < 2^{-132}.
+   * Probability that we need more than 6 blocks: < 2^{-546}. */
+  uint8_t outbuf[5*SHAKE128_RATE];
+
+  for(i = 0; i < SEEDBYTES; ++i)
+    inbuf[i] = rho[i];
+
+  for(i = 0; i < K; ++i) {
+    for(j = 0; j < L; ++j) {
+      inbuf[SEEDBYTES] = i + (j << 4);
+      shake128(outbuf, sizeof(outbuf), inbuf, SEEDBYTES + 1);
+      poly_uniform(mat[i].vec+j, outbuf);
+    }
+  }
+}
+
+/*************************************************
+* Name:        challenge
+*
+* Description: Implementation of H. Samples polynomial with 60 nonzero
+*              coefficients in {-1,1} using the output stream of
+*              SHAKE256(mu|w1).
+*
+* Arguments:   - poly *c: pointer to output polynomial
+*              - const uint8_t mu[]: byte array containing mu
+*              - const polyveck *w1: pointer to vector w1
+**************************************************/
+void challenge(poly *c,
+               const uint8_t mu[CRHBYTES],
+               const polyveck *w1)
+{
+  uint32_t i, b, pos;
+  uint8_t inbuf[CRHBYTES + K*POLW1_SIZE_PACKED];
+  uint8_t outbuf[SHAKE256_RATE];
+  uint64_t state[25], signs, mask;
+
+  for(i = 0; i < CRHBYTES; ++i)
+    inbuf[i] = mu[i];
+  for(i = 0; i < K; ++i)
+    polyw1_pack(inbuf + CRHBYTES + i*POLW1_SIZE_PACKED, w1->vec+i);
+
+  shake256_absorb(state, inbuf, sizeof(inbuf));
+  shake256_squeezeblocks(outbuf, 1, state);
+
+  signs = 0;
+  for(i = 0; i < 8; ++i)
+    signs |= (uint64_t)outbuf[i] << 8*i;
+
+  pos = 8;
+  mask = 1;
+
+  for(i = 0; i < N; ++i)
+    c->coeffs[i] = 0;
+
+  for(i = 196; i < 256; ++i) {
+    do {
+      if(pos >= SHAKE256_RATE) {
+        shake256_squeezeblocks(outbuf, 1, state);
+        pos = 0;
+      }
+
+      b = outbuf[pos++];
+    } while(b > i);
+
+    c->coeffs[i] = c->coeffs[b];
+    c->coeffs[b] = (signs & mask) ? Q - 1 : 1;
+    mask <<= 1;
+  }
+}
+
+/*************************************************
+* Name:        dilithium_keypair
+*
+* Description: Generates public and private key.
+*
+* Arguments:   - uint8_t *pk: pointer to output public key (allocated
+*                                   array of CRYPTO_PUBLICKEYBYTES bytes)
+*              - uint8_t *sk: pointer to output private key (allocated
+*                                   array of CRYPTO_SECRETKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int dilithium_keypair(uint8_t *pk, uint8_t *sk) {
+  uint32_t i;
+  uint8_t seedbuf[3*SEEDBYTES];
+  uint8_t tr[CRHBYTES];
+  uint8_t *rho, *rhoprime, *key;
+  uint16_t nonce = 0;
+  polyvecl mat[K];
+  polyvecl s1, s1hat;
+  polyveck s2, t, t1, t0;
+
+  /* Expand 32 bytes of randomness into rho, rhoprime and key */
+  randombytes(seedbuf, SEEDBYTES);
+  shake256(seedbuf, 3*SEEDBYTES, seedbuf, SEEDBYTES);
+  rho = seedbuf;
+  rhoprime = rho + SEEDBYTES;
+  key = rho + 2*SEEDBYTES;
+
+  /* Expand matrix */
+  expand_mat(mat, rho);
+
+  /* Sample short vectors s1 and s2 */
+  for(i = 0; i < L; ++i)
+    poly_uniform_eta(s1.vec+i, rhoprime, nonce++);
+  for(i = 0; i < K; ++i)
+    poly_uniform_eta(s2.vec+i, rhoprime, nonce++);
+
+  /* Matrix-vector multiplication */
+  s1hat = s1;
+  polyvecl_ntt(&s1hat);
+  for(i = 0; i < K; ++i) {
+    polyvecl_pointwise_acc_invmontgomery(t.vec+i, mat+i, &s1hat);
+    poly_reduce(t.vec+i);
+    poly_invntt_montgomery(t.vec+i);
+  }
+
+  /* Add noise vector s2 */
+  polyveck_add(&t, &t, &s2);
+
+  /* Extract t1 and write public key */
+  polyveck_freeze(&t);
+  polyveck_power2round(&t1, &t0, &t);
+  pack_pk(pk, rho, &t1);
+
+  /* Compute CRH(rho, t1) and write secret key */
+  shake256(tr, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES);
+  pack_sk(sk, rho, key, tr, &s1, &s2, &t0);
+
+  return 0;
+}
+
+/*************************************************
+* Name:        dilithium_sign
+*
+* Description: Compute signed message.
+*
+* Arguments:   - uint8_t *sm: pointer to output signed message (allocated
+*                                   array with CRYPTO_BYTES + mlen bytes),
+*                                   can be equal to m
+*              - int32_t *smlen: pointer to output length of signed
+*                                           message
+*              - const uint8_t *m: pointer to message to be signed
+*              - int32_t mlen: length of message
+*              - const uint8_t *sk: pointer to bit-packed secret key
+*
+* Returns 0 (success)
+**************************************************/
+int dilithium_sign(uint8_t *sm,
+                int32_t *smlen,
+                const uint8_t *m,
+                int32_t mlen,
+                const uint8_t *sk)
+{
+  int32_t i, j;
+  uint32_t n;
+  uint8_t seedbuf[2*SEEDBYTES + CRHBYTES]; // TODO: nonce in seedbuf (2x)
+  uint8_t tr[CRHBYTES];
+  uint8_t *rho, *key, *mu;
+  uint16_t nonce = 0;
+  poly c, chat;
+  polyvecl mat[K], s1, y, yhat, z;
+  polyveck s2, t0, w, w1;
+  polyveck h, wcs2, wcs20, ct0, tmp;
+
+  rho = seedbuf;
+  key = seedbuf + SEEDBYTES;
+  mu = seedbuf + 2*SEEDBYTES;
+  unpack_sk(rho, key, tr, &s1, &s2, &t0, sk);
+
+  /* Copy tr and message into the sm buffer,
+   * backwards since m and sm can be equal in SUPERCOP API */
+  for(i = 1; i <= mlen; ++i)
+    sm[CRYPTO_BYTES + mlen - i] = m[mlen - i];
+  for(i = 0; i < CRHBYTES; ++i)
+    sm[CRYPTO_BYTES - CRHBYTES + i] = tr[i];
+
+  /* Compute CRH(tr, msg) */
+  shake256(mu, CRHBYTES, sm + CRYPTO_BYTES - CRHBYTES, CRHBYTES + mlen);
+
+  /* Expand matrix and transform vectors */
+  expand_mat(mat, rho);
+  polyvecl_ntt(&s1);
+  polyveck_ntt(&s2);
+  polyveck_ntt(&t0);
+
+  rej:
+  /* Sample intermediate vector y */
+  for(i = 0; i < L; ++i)
+    poly_uniform_gamma1m1(y.vec+i, key, nonce++);
+
+  /* Matrix-vector multiplication */
+  yhat = y;
+  polyvecl_ntt(&yhat);
+  for(i = 0; i < K; ++i) {
+    polyvecl_pointwise_acc_invmontgomery(w.vec+i, mat+i, &yhat);
+    poly_reduce(w.vec+i);
+    poly_invntt_montgomery(w.vec+i);
+  }
+
+  /* Decompose w and call the random oracle */
+  polyveck_csubq(&w);
+  polyveck_decompose(&w1, &tmp, &w);
+  challenge(&c, mu, &w1);
+
+  /* Compute z, reject if it reveals secret */
+  chat = c;
+  poly_ntt(&chat);
+  for(i = 0; i < L; ++i) {
+    poly_pointwise_invmontgomery(z.vec+i, &chat, s1.vec+i);
+    poly_invntt_montgomery(z.vec+i);
+  }
+  polyvecl_add(&z, &z, &y);
+  polyvecl_freeze(&z);
+  if(polyvecl_chknorm(&z, GAMMA1 - BETA))
+    goto rej;
+
+  /* Compute w - cs2, reject if w1 can not be computed from it */
+  for(i = 0; i < K; ++i) {
+    poly_pointwise_invmontgomery(wcs2.vec+i, &chat, s2.vec+i);
+    poly_invntt_montgomery(wcs2.vec+i);
+  }
+  polyveck_sub(&wcs2, &w, &wcs2);
+  polyveck_freeze(&wcs2);
+  polyveck_decompose(&tmp, &wcs20, &wcs2);
+  polyveck_csubq(&wcs20);
+  if(polyveck_chknorm(&wcs20, GAMMA2 - BETA))
+    goto rej;
+
+  for(i = 0; i < K; ++i)
+    for(j = 0; j < N; ++j)
+      if(tmp.vec[i].coeffs[j] != w1.vec[i].coeffs[j])
+        goto rej;
+
+  /* Compute hints for w1 */
+  for(i = 0; i < K; ++i) {
+    poly_pointwise_invmontgomery(ct0.vec+i, &chat, t0.vec+i);
+    poly_invntt_montgomery(ct0.vec+i);
+  }
+
+  polyveck_csubq(&ct0);
+  if(polyveck_chknorm(&ct0, GAMMA2))
+    goto rej;
+
+  polyveck_add(&tmp, &wcs2, &ct0);
+  polyveck_csubq(&tmp);
+  n = polyveck_make_hint(&h, &wcs2, &tmp);
+  if(n > OMEGA)
+    goto rej;
+
+  /* Write signature */
+  pack_sig(sm, &z, &h, &c);
+
+  *smlen = mlen + CRYPTO_BYTES;
+  return 0;
+}
+
+/*************************************************
+* Name:        dilithium_verify
+*
+* Description: Verify signed message.
+*
+* Arguments:   - uint8_t *m: pointer to output message (allocated
+*                                  array with smlen bytes), can be equal to sm
+*              - int32_t *mlen: pointer to output length of message
+*              - const uint8_t *sm: pointer to signed message
+*              - int32_t smlen: length of signed message
+*              - const uint8_t *pk: pointer to bit-packed public key
+*
+* Returns 0 if signed message could be verified correctly and -1 otherwise
+**************************************************/
+int dilithium_verify(uint8_t *m,
+                     int32_t *mlen,
+                     const uint8_t *sm,
+                     int32_t smlen,
+                     const uint8_t *pk)
+{
+  int32_t i;
+  uint8_t rho[SEEDBYTES];
+  uint8_t mu[CRHBYTES];
+  poly c, chat, cp;
+  polyvecl mat[K], z;
+  polyveck t1, w1, h, tmp1, tmp2;
+
+  if(smlen < CRYPTO_BYTES)
+    goto badsig;
+
+  *mlen = smlen - CRYPTO_BYTES;
+
+  unpack_pk(rho, &t1, pk);
+  if(unpack_sig(&z, &h, &c, sm))
+    goto badsig;
+  if(polyvecl_chknorm(&z, GAMMA1 - BETA))
+    goto badsig;
+
+  /* Compute CRH(CRH(rho, t1), msg) using m as "playground" buffer */
+  if(sm != m)
+    for(i = 0; i < *mlen; ++i)
+      m[CRYPTO_BYTES + i] = sm[CRYPTO_BYTES + i];
+
+  shake256(m + CRYPTO_BYTES - CRHBYTES, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES);
+  shake256(mu, CRHBYTES, m + CRYPTO_BYTES - CRHBYTES, CRHBYTES + *mlen);
+
+  /* Matrix-vector multiplication; compute Az - c2^dt1 */
+  expand_mat(mat, rho);
+  polyvecl_ntt(&z);
+  for(i = 0; i < K ; ++i)
+    polyvecl_pointwise_acc_invmontgomery(tmp1.vec+i, mat+i, &z);
+
+  chat = c;
+  poly_ntt(&chat);
+  polyveck_shiftl(&t1, D);
+  polyveck_ntt(&t1);
+  for(i = 0; i < K; ++i)
+    poly_pointwise_invmontgomery(tmp2.vec+i, &chat, t1.vec+i);
+
+  polyveck_sub(&tmp1, &tmp1, &tmp2);
+  polyveck_reduce(&tmp1);
+  polyveck_invntt_montgomery(&tmp1);
+
+  /* Reconstruct w1 */
+  polyveck_csubq(&tmp1);
+  polyveck_use_hint(&w1, &tmp1, &h);
+
+  /* Call random oracle and verify challenge */
+  challenge(&cp, mu, &w1);
+  for(i = 0; i < N; ++i)
+    if(c.coeffs[i] != cp.coeffs[i])
+    {
+        /* Signature verification failed */
+    badsig:
+        *mlen = (int32_t) -1;
+        for(i = 0; i < smlen; ++i)
+            m[i] = 0;
+        
+        return -1;
+    }
+
+  /* All good, copy msg, return 0 */
+  for(i = 0; i < *mlen; ++i)
+    m[i] = sm[CRYPTO_BYTES + i];
+  return 0;
+}
+
+#ifdef STANDALONE
+///////////////////////////////////////////////////////////////////////////////
+#include <stdio.h>
+#include <stdlib.h>
+
+#define MLEN 59
+#define NTESTS 10000
+
+int64_t timing_overhead;
+#ifdef DBENCH
+int64_t *tred, *tadd, *tmul, *tround, *tsample, *tpack, *tshake;
+#endif
+
+static int cmp_llu(const void *a, const void*b)
+{
+    if(*(int64_t *)a < *(int64_t *)b) return -1;
+    if(*(int64_t *)a > *(int64_t *)b) return 1;
+    return 0;
+}
+
+static int64_t median(int64_t *l, size_t llen)
+{
+    qsort(l,llen,sizeof(uint64_t),cmp_llu);
+    
+    if(llen%2) return l[llen/2];
+    else return (l[llen/2-1]+l[llen/2])/2;
+}
+
+static int64_t average(int64_t *t, size_t tlen)
+{
+    uint64_t acc=0;
+    size_t i;
+    for(i=0;i<tlen;i++)
+        acc += t[i];
+    return acc/(tlen);
+}
+
+static void print_results(const char *s, int64_t *t, size_t tlen)
+{
+    size_t i;
+    printf("%s", s);
+    for(i=0;i<tlen-1;i++)
+    {
+        t[i] = t[i+1] - t[i];
+        //fprintf(stderr,"%lld ", (long long)t[i]);
+    }
+    printf("\n");
+    printf("median: %lld\n", (long long)median(t, tlen));
+    printf("average: %lld\n", (long long)average(t, tlen-1));
+    printf("\n");
+}
+
+int32_t main(void)
+{
+    uint32_t i;
+    int32_t ret;
+    int32_t j, mlen, smlen;
+    uint8_t m[MLEN];
+    uint8_t sm[MLEN + CRYPTO_BYTES];
+    uint8_t m2[MLEN + CRYPTO_BYTES];
+    uint8_t pk[CRYPTO_PUBLICKEYBYTES];
+    uint8_t sk[CRYPTO_SECRETKEYBYTES];
+    int64_t tkeygen[NTESTS], tsign[NTESTS], tverify[NTESTS];
+#ifdef DBENCH
+    int64_t t[7][NTESTS], dummy;
+    
+    memset(t, 0, sizeof(t));
+    tred = tadd = tmul = tround = tsample = tpack = tshake = &dummy;
+#endif
+    
+    timing_overhead = cpucycles_overhead();
+    
+    for(i = 0; i < NTESTS; ++i)
+    {
+        
+#ifdef DBENCH
+        tred = t[0] + i;
+        tadd = t[1] + i;
+        tmul = t[2] + i;
+        tround = t[3] + i;
+        tsample = t[4] + i;
+        tpack = t[5] + i;
+        tshake = t[6] + i;
+        tkeygen[i] = cpucycles_start();
+#endif
+        
+        dilithium_keypair(pk, sk); // 1.3
+#ifdef DBENCH
+        tkeygen[i] = cpucycles_stop() - tkeygen[i] - timing_overhead;
+        // tred = tadd = tmul = tround = tsample = tpack = tshake = &dummy;
+        tsign[i] = cpucycles_start();
+#endif
+        randombytes(m, MLEN); // 1.27
+
+        dilithium_sign(sm, &smlen, m, MLEN, sk); // 7.2
+#ifdef DBENCH
+        tsign[i] = cpucycles_stop() - tsign[i] - timing_overhead;
+        
+        tverify[i] = cpucycles_start();
+#endif
+        ret = dilithium_verify(m2, &mlen, sm, smlen, pk);
+#ifdef DBENCH
+        tverify[i] = cpucycles_stop() - tverify[i] - timing_overhead;
+#endif
+        if(ret) {
+            printf("Verification failed\n");
+            return -1;
+        }
+        
+        if(mlen != MLEN) {
+            printf("Message lengths don't match\n");
+            return -1;
+        }
+        
+        for(j = 0; j < mlen; ++j) {
+            if(m[j] != m2[j]) {
+                printf("Messages don't match\n");
+                return -1;
+            }
+        }
+    }
+    
+#ifdef DBENCH
+    print_results("keygen:", tkeygen, NTESTS);
+    print_results("sign: ", tsign, NTESTS);
+    print_results("verify: ", tverify, NTESTS);
+    
+    print_results("modular reduction:", t[0], NTESTS);
+    print_results("addition:", t[1], NTESTS);
+    print_results("multiplication:", t[2], NTESTS);
+    print_results("rounding:", t[3], NTESTS);
+    print_results("rejection sampling:", t[4], NTESTS);
+    print_results("packing:", t[5], NTESTS);
+    print_results("SHAKE:", t[6], NTESTS);
+#endif
+    
+    return 0;
+}
+#endif
+
diff --git a/src/cc/dilithium.h b/src/cc/dilithium.h
new file mode 100644
index 000000000..87276da7d
--- /dev/null
+++ b/src/cc/dilithium.h
@@ -0,0 +1,474 @@
+#include <stdint.h>
+
+#ifndef CPUCYCLES_H
+#define CPUCYCLES_H
+
+#ifdef DBENCH
+#define DBENCH_START() uint64_t time = cpucycles_start()
+#define DBENCH_STOP(t) t += cpucycles_stop() - time - timing_overhead
+#else
+#define DBENCH_START()
+#define DBENCH_STOP(t)
+#endif
+
+#ifdef USE_RDPMC  /* Needs echo 2 > /sys/devices/cpu/rdpmc */
+#ifdef SERIALIZE_RDC
+
+static inline uint64_t cpucycles_start(void) {
+    const uint32_t ecx = (1U << 30) + 1;
+    uint64_t result;
+    
+    asm volatile("cpuid; movl %1,%%ecx; rdpmc; shlq $32,%%rdx; orq %%rdx,%%rax"
+                 : "=&a" (result) : "r" (ecx) : "rbx", "rcx", "rdx");
+    
+    return result;
+}
+
+static inline uint64_t cpucycles_stop(void) {
+    const uint32_t ecx = (1U << 30) + 1;
+    uint64_t result, dummy;
+    
+    asm volatile("rdpmc; shlq $32,%%rdx; orq %%rdx,%%rax; movq %%rax,%0; cpuid"
+                 : "=&r" (result), "=c" (dummy) : "c" (ecx) : "rax", "rbx", "rdx");
+    
+    return result;
+}
+
+#else
+
+static inline uint64_t cpucycles_start(void) {
+    const uint32_t ecx = (1U << 30) + 1;
+    uint64_t result;
+    
+    asm volatile("rdpmc; shlq $32,%%rdx; orq %%rdx,%%rax"
+                 : "=a" (result) : "c" (ecx) : "rdx");
+    
+    return result;
+}
+
+static inline uint64_t cpucycles_stop(void) {
+    const uint32_t ecx = (1U << 30) + 1;
+    uint64_t result;
+    
+    asm volatile("rdpmc; shlq $32,%%rdx; orq %%rdx,%%rax"
+                 : "=a" (result) : "c" (ecx) : "rdx");
+    
+    return result;
+}
+
+#endif
+#else
+#ifdef SERIALIZE_RDC
+
+static inline uint64_t cpucycles_start(void) {
+    uint64_t result;
+    
+    asm volatile("cpuid; rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax"
+                 : "=a" (result) : : "%rbx", "%rcx", "%rdx");
+    
+    return result;
+}
+
+static inline uint64_t cpucycles_stop(void) {
+    uint64_t result;
+    
+    asm volatile("rdtscp; shlq $32,%%rdx; orq %%rdx,%%rax; mov %%rax,%0; cpuid"
+                 : "=r" (result) : : "%rax", "%rbx", "%rcx", "%rdx");
+    
+    return result;
+}
+
+#else
+
+static inline uint64_t cpucycles_start(void) {
+    uint64_t result;
+    
+    asm volatile("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax"
+                 : "=a" (result) : : "%rdx");
+    
+    return result;
+}
+
+static inline uint64_t cpucycles_stop(void) {
+    uint64_t result;
+    
+    asm volatile("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax"
+                 : "=a" (result) : : "%rdx");
+    
+    return result;
+}
+
+#endif
+#endif
+
+int64_t cpucycles_overhead(void);
+
+#endif
+
+#ifndef FIPS202_H
+#define FIPS202_H
+
+
+#define SHAKE128_RATE 168
+#define SHAKE256_RATE 136
+
+void shake128_absorb(uint64_t *s,
+                     const uint8_t *input,
+                     int32_t inlen);
+
+void shake128_squeezeblocks(uint8_t *output,
+                            int32_t nblocks,
+                            uint64_t *s);
+
+void shake256_absorb(uint64_t *s,
+                     const uint8_t *input,
+                     int32_t inlen);
+
+void shake256_squeezeblocks(uint8_t *output,
+                            int32_t nblocks,
+                            uint64_t *s);
+
+void shake128(uint8_t *output,
+              int32_t outlen,
+              const uint8_t *input,
+              int32_t inlen);
+
+void shake256(uint8_t *output,
+              int32_t outlen,
+              const uint8_t *input,
+              int32_t inlen);
+
+#endif
+
+#ifndef PARAMS_H
+#define PARAMS_H
+
+#ifndef MODE
+#define MODE 2
+#endif
+
+#define SEEDBYTES 32U
+#define CRHBYTES 48U
+#define N 256U
+#define Q 8380417U
+#define QBITS 23U
+#define ROOT_OF_UNITY 1753U
+#define D 14U
+#define GAMMA1 ((Q - 1U)/16U)
+#define GAMMA2 (GAMMA1/2U)
+#define ALPHA (2U*GAMMA2)
+
+#if MODE == 0
+#define K 3U
+#define L 2U
+#define ETA 7U
+#define SETABITS 4U
+#define BETA 375U
+#define OMEGA 64U
+
+#elif MODE == 1
+#define K 4U
+#define L 3U
+#define ETA 6U
+#define SETABITS 4U
+#define BETA 325U
+#define OMEGA 80U
+
+#elif MODE == 2
+#define K 5U
+#define L 4U
+#define ETA 5U
+#define SETABITS 4U
+#define BETA 275U
+#define OMEGA 96U
+
+#elif MODE == 3
+#define K 6U
+#define L 5U
+#define ETA 3U
+#define SETABITS 3U
+#define BETA 175U
+#define OMEGA 120U
+
+#endif
+
+#define POL_SIZE_PACKED ((N*QBITS)/8)
+#define POLT1_SIZE_PACKED ((N*(QBITS - D))/8)
+#define POLT0_SIZE_PACKED ((N*D)/8)
+#define POLETA_SIZE_PACKED ((N*SETABITS)/8)
+#define POLZ_SIZE_PACKED ((N*(QBITS - 3))/8)
+#define POLW1_SIZE_PACKED ((N*4)/8)
+#define POLVECK_SIZE_PACKED (K*POL_SIZE_PACKED)
+#define POLVECL_SIZE_PACKED (L*POL_SIZE_PACKED)
+
+#define CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLT1_SIZE_PACKED)
+#define CRYPTO_SECRETKEYBYTES (2*SEEDBYTES + (L + K)*POLETA_SIZE_PACKED + CRHBYTES + K*POLT0_SIZE_PACKED)
+#define CRYPTO_BYTES (L*POLZ_SIZE_PACKED + (OMEGA + K) + (N/8 + 8))
+
+#endif
+#ifndef POLY_H
+#define POLY_H
+
+//#include <stdint.h>
+//#include "params.h"
+//#include "fips202.h"
+
+typedef struct {
+    uint32_t coeffs[N];
+} poly __attribute__((aligned(32)));
+
+void poly_reduce(poly *a);
+void poly_csubq(poly *a);
+void poly_freeze(poly *a);
+
+void poly_add(poly *c, const poly *a, const poly *b);
+void poly_sub(poly *c, const poly *a, const poly *b);
+void poly_neg(poly *a);
+void poly_shiftl(poly *a, uint32_t k);
+
+void poly_ntt(poly *a);
+void poly_invntt_montgomery(poly *a);
+void poly_pointwise_invmontgomery(poly *c, const poly *a, const poly *b);
+
+void poly_power2round(poly *a1, poly *a0, const poly *a);
+void poly_decompose(poly *a1, poly *a0, const poly *a);
+uint32_t poly_make_hint(poly *h, const poly *a, const poly *b);
+void poly_use_hint(poly *a, const poly *b, const poly *h);
+
+int  poly_chknorm(const poly *a, uint32_t B);
+void poly_uniform(poly *a, const uint8_t *buf);
+void poly_uniform_eta(poly *a,
+                      const uint8_t seed[SEEDBYTES],
+                      uint8_t nonce);
+void poly_uniform_gamma1m1(poly *a,
+                           const uint8_t seed[SEEDBYTES + CRHBYTES],
+                           uint16_t nonce);
+
+void polyeta_pack(uint8_t *r, const poly *a);
+void polyeta_unpack(poly *r, const uint8_t *a);
+
+void polyt1_pack(uint8_t *r, const poly *a);
+void polyt1_unpack(poly *r, const uint8_t *a);
+
+void polyt0_pack(uint8_t *r, const poly *a);
+void polyt0_unpack(poly *r, const uint8_t *a);
+
+void polyz_pack(uint8_t *r, const poly *a);
+void polyz_unpack(poly *r, const uint8_t *a);
+
+void polyw1_pack(uint8_t *r, const poly *a);
+#endif
+#ifndef POLYVEC_H
+#define POLYVEC_H
+
+//#include <stdint.h>
+//#include "params.h"
+//#include "poly.h"
+
+/* Vectors of polynomials of length L */
+typedef struct {
+    poly vec[L];
+} polyvecl;
+
+void polyvecl_freeze(polyvecl *v);
+
+void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v);
+
+void polyvecl_ntt(polyvecl *v);
+void polyvecl_pointwise_acc_invmontgomery(poly *w,
+                                          const polyvecl *u,
+                                          const polyvecl *v);
+
+int polyvecl_chknorm(const polyvecl *v, uint32_t B);
+
+
+
+/* Vectors of polynomials of length K */
+typedef struct {
+    poly vec[K];
+} polyveck;
+
+void polyveck_reduce(polyveck *v);
+void polyveck_csubq(polyveck *v);
+void polyveck_freeze(polyveck *v);
+
+void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v);
+void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v);
+void polyveck_shiftl(polyveck *v, uint32_t k);
+
+void polyveck_ntt(polyveck *v);
+void polyveck_invntt_montgomery(polyveck *v);
+
+int polyveck_chknorm(const polyveck *v, uint32_t B);
+
+void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v);
+void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v);
+uint32_t polyveck_make_hint(polyveck *h,
+                                const polyveck *u,
+                                const polyveck *v);
+void polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h);
+
+#endif
+
+#ifndef NTT_H
+#define NTT_H
+
+//#include <stdint.h>
+//#include "params.h"
+
+void ntt(uint32_t p[N]);
+void invntt_frominvmont(uint32_t p[N]);
+
+#endif
+#ifndef PACKING_H
+#define PACKING_H
+
+//#include "params.h"
+//#include "polyvec.h"
+
+void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES],
+             const uint8_t rho[SEEDBYTES], const polyveck *t1);
+void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES],
+             const uint8_t rho[SEEDBYTES],
+             const uint8_t key[SEEDBYTES],
+             const uint8_t tr[CRHBYTES],
+             const polyvecl *s1,
+             const polyveck *s2,
+             const polyveck *t0);
+void pack_sig(uint8_t sig[CRYPTO_BYTES],
+              const polyvecl *z, const polyveck *h, const poly *c);
+
+void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1,
+               const uint8_t pk[CRYPTO_PUBLICKEYBYTES]);
+void unpack_sk(uint8_t rho[SEEDBYTES],
+               uint8_t key[SEEDBYTES],
+               uint8_t tr[CRHBYTES],
+               polyvecl *s1,
+               polyveck *s2,
+               polyveck *t0,
+               const uint8_t sk[CRYPTO_SECRETKEYBYTES]);
+int unpack_sig(polyvecl *z, polyveck *h, poly *c,
+                const uint8_t sig[CRYPTO_BYTES]);
+
+#endif
+#ifndef REDUCE_H
+#define REDUCE_H
+
+//#include <stdint.h>
+
+#define MONT 4193792U // 2^32 % Q
+#define QINV 4236238847U // -q^(-1) mod 2^32
+
+/* a <= Q*2^32 => r < 2*Q */
+uint32_t montgomery_reduce(uint64_t a);
+
+/* r < 2*Q */
+uint32_t reduce32(uint32_t a);
+
+/* a < 2*Q => r < Q */
+uint32_t csubq(uint32_t a);
+
+/* r < Q */
+uint32_t freeze(uint32_t a);
+
+#endif
+#ifndef ROUNDING_H
+#define ROUNDING_H
+
+//#include <stdint.h>
+
+uint32_t power2round(const uint32_t a, uint32_t *a0);
+uint32_t decompose(uint32_t a, uint32_t *a0);
+uint32_t make_hint(const uint32_t a, const uint32_t b);
+uint32_t use_hint(const uint32_t a, const uint32_t hint);
+
+#endif
+#ifndef SIGN_H
+#define SIGN_H
+
+//#include "params.h"
+//#include "poly.h"
+//#include "polyvec.h"
+
+void expand_mat(polyvecl mat[K], const uint8_t rho[SEEDBYTES]);
+void challenge(poly *c, const uint8_t mu[CRHBYTES],
+               const polyveck *w1);
+
+int crypto_sign_keypair(uint8_t *pk, uint8_t *sk);
+
+int crypto_sign(uint8_t *sm, int32_t *smlen,
+                const uint8_t *msg, int32_t len,
+                const uint8_t *sk);
+
+int crypto_sign_open(uint8_t *m, int32_t *mlen,
+                     const uint8_t *sm, int32_t smlen,
+                     const uint8_t *pk);
+
+#endif
+
+#ifndef API_H
+#define API_H
+
+#ifndef MODE
+#define MODE 2
+#endif
+
+#if MODE == 0
+#if CRYPTO_PUBLICKEYBYTES -896U
+CRYPTO_PUBLICKEYBYTES size error
+#endif
+#if CRYPTO_SECRETKEYBYTES -2096U
+CRYPTO_SECRETKEYBYTES size error
+#endif
+#if CRYPTO_BYTES -1387U
+CRYPTO_BYTES size error
+#endif
+
+#elif MODE == 1
+#if CRYPTO_PUBLICKEYBYTES -1184U
+CRYPTO_PUBLICKEYBYTES size error
+#endif
+#if CRYPTO_SECRETKEYBYTES -2800U
+CRYPTO_SECRETKEYBYTES size error
+#endif
+#if CRYPTO_BYTES -2044U
+CRYPTO_BYTES size error
+#endif
+
+#elif MODE == 2
+#if CRYPTO_PUBLICKEYBYTES -1472U
+CRYPTO_PUBLICKEYBYTES size error
+#endif
+#if CRYPTO_SECRETKEYBYTES -3504U
+CRYPTO_SECRETKEYBYTES size error
+#endif
+#if CRYPTO_BYTES -2701U
+CRYPTO_BYTES size error
+#endif
+
+#elif MODE == 3
+#if CRYPTO_PUBLICKEYBYTES -1760U
+CRYPTO_PUBLICKEYBYTES size error
+#endif
+#if CRYPTO_SECRETKEYBYTES -3856U
+CRYPTO_SECRETKEYBYTES size error
+#endif
+#if CRYPTO_BYTES -3366U
+CRYPTO_BYTES size error
+#endif
+
+#endif
+
+#define CRYPTO_ALGNAME "Dilithium"
+
+int crypto_sign_keypair(uint8_t *pk, uint8_t *sk);
+
+int crypto_sign(uint8_t *sm, int32_t *smlen,
+                const uint8_t *msg, int32_t len,
+                const uint8_t *sk);
+
+int crypto_sign_open(uint8_t *m, int32_t *mlen,
+                     const uint8_t *sm, int32_t smlen,
+                     const uint8_t *pk);
+
+#endif
diff --git a/src/cc/makecclib b/src/cc/makecclib
index f4d2bd01b..adac757c3 100755
--- a/src/cc/makecclib
+++ b/src/cc/makecclib
@@ -1,2 +1,2 @@
 #!/bin/sh
-gcc -std=c++11 -I../secp256k1/include -I../univalue/include -I../cryptoconditions/include -I../cryptoconditions/src -I../cryptoconditions/src/asn -I.. -I. -fPIC -shared -c -o ../libcc.so cclib.cpp 
+gcc -O3 -std=c++11 -I../secp256k1/include -I../univalue/include -I../cryptoconditions/include -I../cryptoconditions/src -I../cryptoconditions/src/asn -I.. -I. -fPIC -shared -c -o ../libcc.so cclib.cpp
diff --git a/src/cc/musig.cpp b/src/cc/musig.cpp
index bd3d74a95..6dbab482a 100644
--- a/src/cc/musig.cpp
+++ b/src/cc/musig.cpp
@@ -659,12 +659,12 @@ UniValue musig_partialsig(uint64_t txfee,struct CCcontract_info *cp,cJSON *param
     } else return(cclib_error(result,"wrong number of params, need 3: pkhash, ind, partialsig"));
 }
 
-int testmain(void);
+//int testmain(void);
 UniValue musig_verify(uint64_t txfee,struct CCcontract_info *cp,cJSON *params)
 {
     static secp256k1_context *ctx;
     UniValue result(UniValue::VOBJ); int32_t i,n; uint8_t msg[32],musig64[64]; secp256k1_pubkey combined_pk; secp256k1_schnorrsig musig; char str[129];
-    testmain();
+    //testmain();
     if ( ctx == 0 )
         ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY);
     if ( params != 0 && (n= cJSON_GetArraySize(params)) == 3 )