From 8d68e969342e00547793fa1517aede9ec096fb74 Mon Sep 17 00:00:00 2001 From: troky Date: Tue, 10 Mar 2015 09:47:46 +0100 Subject: [PATCH] Added Lyra2RE algo --- Makefile.am | 1 + algorithm.c | 53 +- algorithm.h | 3 +- algorithm/lyra2.c | 208 +++ algorithm/lyra2.h | 50 + algorithm/lyra2re.c | 169 +++ algorithm/lyra2re.h | 10 + algorithm/sponge.c | 742 +++++++++++ algorithm/sponge.h | 108 ++ driver-opencl.c | 166 +-- findnonce.c | 139 +- findnonce.h | 1 + kernel/Lyra2.cl | 145 +++ kernel/Lyra2RE.cl | 392 ++++++ kernel/arebyp.cl | 2 +- kernel/blake256.cl | 96 ++ kernel/diamond.cl | 2042 ++++-------------------------- kernel/groestl256.cl | 415 ++++++ kernel/groestlcoin.cl | 2036 ++++------------------------- kernel/keccak1600.cl | 84 ++ kernel/skein256.cl | 107 ++ sgminer.c | 1 + winbuild/sgminer.vcxproj | 6 + winbuild/sgminer.vcxproj.filters | 18 + 24 files changed, 3307 insertions(+), 3687 deletions(-) create mode 100644 algorithm/lyra2.c create mode 100644 algorithm/lyra2.h create mode 100644 algorithm/lyra2re.c create mode 100644 algorithm/lyra2re.h create mode 100644 algorithm/sponge.c create mode 100644 algorithm/sponge.h create mode 100644 kernel/Lyra2.cl create mode 100644 kernel/Lyra2RE.cl create mode 100644 kernel/blake256.cl create mode 100644 kernel/groestl256.cl create mode 100644 kernel/keccak1600.cl create mode 100644 kernel/skein256.cl diff --git a/Makefile.am b/Makefile.am index 158baa6d0..9d272c96e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -69,6 +69,7 @@ sgminer_SOURCES += algorithm/fresh.c algorithm/fresh.h sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h +sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index cc7e683e0..fd9d9f073 100644 --- a/algorithm.c +++ b/algorithm.c @@ -32,6 +32,7 @@ #include "algorithm/whirlcoin.h" #include "algorithm/neoscrypt.h" #include "algorithm/whirlpoolx.h" +#include "algorithm/lyra2re.h" #include "compat.h" @@ -54,7 +55,8 @@ const char *algorithm_type_str[] = { "Fresh", "Whirlcoin", "Neoscrypt", - "WhirlpoolX" + "WhirlpoolX", + "Lyra2RE" }; void sha256(const unsigned char *message, unsigned int len, unsigned char *digest) @@ -653,6 +655,50 @@ static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk return status; } +static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel; + unsigned int num; + cl_int status = 0; + cl_ulong le_target; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); + + // blake - search + kernel = &clState->kernel; + num = 0; + + CL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(blk->work->blk.ctx_a); + CL_SET_ARG(blk->work->blk.ctx_b); + CL_SET_ARG(blk->work->blk.ctx_c); + CL_SET_ARG(blk->work->blk.ctx_d); + CL_SET_ARG(blk->work->blk.ctx_e); + CL_SET_ARG(blk->work->blk.ctx_f); + CL_SET_ARG(blk->work->blk.ctx_g); + CL_SET_ARG(blk->work->blk.ctx_h); + CL_SET_ARG(blk->work->blk.cty_a); + CL_SET_ARG(blk->work->blk.cty_b); + CL_SET_ARG(blk->work->blk.cty_c); + + // bmw - search1 + kernel = clState->extra_kernels; + CL_SET_ARG_0(clState->padbuffer8); + // groestl - search2 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // skein - search3 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // jh - search4 + num = 0; + CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + typedef struct _algorithm_settings_t { const char *name; /* Human-readable identifier */ algorithm_type_t type; //common algorithm type @@ -728,6 +774,8 @@ static algorithm_settings_t algos[] = { { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL}, + { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304 , 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL}, + // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b, c) \ { a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, c, NULL} @@ -737,7 +785,6 @@ static algorithm_settings_t algos[] = { #undef A_FUGUE { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL}, - { "whirlpoolx", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, 0, whirlpoolx_regenhash, queue_sph_kernel, gen_hash, NULL }, // Terminator (do not remove) @@ -812,6 +859,8 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa ALGO_ALIAS("keccak", "maxcoin"); ALGO_ALIAS("whirlpool", "whirlcoin"); ALGO_ALIAS("whirlpoolx", "whirlpoolx"); + ALGO_ALIAS("Lyra2RE", "lyra2re"); + ALGO_ALIAS("lyra2", "lyra2re"); #undef ALGO_ALIAS #undef ALGO_ALIAS_NF diff --git a/algorithm.h b/algorithm.h index bbe63beb3..a121be198 100644 --- a/algorithm.h +++ b/algorithm.h @@ -25,7 +25,8 @@ typedef enum { ALGO_NIST, ALGO_FRESH, ALGO_WHIRL, - ALGO_NEOSCRYPT + ALGO_NEOSCRYPT, + ALGO_LYRA2RE } algorithm_type_t; extern const char *algorithm_type_str[]; diff --git a/algorithm/lyra2.c b/algorithm/lyra2.c new file mode 100644 index 000000000..6944b22f3 --- /dev/null +++ b/algorithm/lyra2.c @@ -0,0 +1,208 @@ +/** + * Implementation of the Lyra2 Password Hashing Scheme (PHS). + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include "lyra2.h" +#include "sponge.h" + +/** + * Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords + * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits, + * where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all + * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value + * of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols). + * + * @param K The derived key to be output by the algorithm + * @param kLen Desired key length + * @param pwd User password + * @param pwdlen Password length + * @param salt Salt + * @param saltlen Salt length + * @param timeCost Parameter to determine the processing time (T) + * @param nRows Number or rows of the memory matrix (R) + * @param nCols Number of columns of the memory matrix (C) + * + * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation) + */ +int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) { + + //============================= Basic variables ============================// + int64_t row = 2; //index of row to be processed + int64_t prev = 1; //index of prev (last row ever computed/modified) + int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering) + int64_t tau; //Time Loop iterator + int64_t step = 1; //Visitation step (used during Setup and Wandering phases) + int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup) + int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1 + int64_t i; //auxiliary iteration counter + //==========================================================================/ + + //========== Initializing the Memory Matrix and pointers to it =============// + //Tries to allocate enough space for the whole memory matrix + i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES); + uint64_t *wholeMatrix = (uint64_t*)malloc(i); + if (wholeMatrix == NULL) { + return -1; + } + memset(wholeMatrix, 0, i); + + //Allocates pointers to each row of the matrix + uint64_t **memMatrix = (uint64_t**)malloc(nRows * sizeof (uint64_t*)); + if (memMatrix == NULL) { + return -1; + } + //Places the pointers in the correct positions + uint64_t *ptrWord = wholeMatrix; + for (i = 0; i < nRows; i++) { + memMatrix[i] = ptrWord; + ptrWord += ROW_LEN_INT64; + } + //==========================================================================/ + + //============= Getting the password + salt + basil padded with 10*1 ===============// + //OBS.:The memory matrix will temporarily hold the password: not for saving memory, + //but this ensures that the password copied locally will be overwritten as soon as possible + + //First, we clean enough blocks for the password, salt, basil and padding + uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1; + byte *ptrByte = (byte*) wholeMatrix; + memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES); + + //Prepends the password + memcpy(ptrByte, pwd, pwdlen); + ptrByte += pwdlen; + + //Concatenates the salt + memcpy(ptrByte, salt, saltlen); + ptrByte += saltlen; + + //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface + memcpy(ptrByte, &kLen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &pwdlen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &saltlen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &timeCost, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &nRows, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &nCols, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + + //Now comes the padding + *ptrByte = 0x80; //first byte of padding: right after the password + ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix + ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block + *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block + //==========================================================================/ + + //======================= Initializing the Sponge State ====================// + //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c) + uint64_t *state = (uint64_t*)malloc(16 * sizeof (uint64_t)); + if (state == NULL) { + return -1; + } + initState(state); + //==========================================================================/ + + //================================ Setup Phase =============================// + //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits + ptrWord = wholeMatrix; + for (i = 0; i < nBlocksInput; i++) { + absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil) + ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil) + } + + //Initializes M[0] and M[1] + reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here + reducedDuplexRow1(state, memMatrix[0], memMatrix[1]); + + do { + //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand) + reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); + + + //updates the value of row* (deterministically picked during Setup)) + rowa = (rowa + step) & (window - 1); + //update prev: it now points to the last row ever computed + prev = row; + //updates row: goes to the next row to be computed + row++; + + //Checks if all rows in the window where visited. + if (rowa == 0) { + step = window + gap; //changes the step: approximately doubles its value + window *= 2; //doubles the size of the re-visitation window + gap = -gap; //inverts the modifier to the step + } + + } while (row < nRows); + //==========================================================================/ + + //============================ Wandering Phase =============================// + row = 0; //Resets the visitation to the first row of the memory matrix + for (tau = 1; tau <= timeCost; tau++) { + //Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1 + step = (tau % 2 == 0) ? -1 : nRows / 2 - 1; + do { + //Selects a pseudorandom index row* + //------------------------------------------------------------------------------------------ + //rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) + rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE) + //------------------------------------------------------------------------------------------ + + //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row] + reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); + + //update prev: it now points to the last row ever computed + prev = row; + + //updates row: goes to the next row to be computed + //------------------------------------------------------------------------------------------ + //row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) + row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE) + //------------------------------------------------------------------------------------------ + + } while (row != 0); + } + //==========================================================================/ + + //============================ Wrap-up Phase ===============================// + //Absorbs the last block of the memory matrix + absorbBlock(state, memMatrix[rowa]); + + //Squeezes the key + squeeze(state, (unsigned char*)K, kLen); + //==========================================================================/ + + //========================= Freeing the memory =============================// + free(memMatrix); + free(wholeMatrix); + + //Wiping out the sponge's internal state before freeing it + memset(state, 0, 16 * sizeof (uint64_t)); + free(state); + //==========================================================================/ + + return 0; +} diff --git a/algorithm/lyra2.h b/algorithm/lyra2.h new file mode 100644 index 000000000..13c7dbd3b --- /dev/null +++ b/algorithm/lyra2.h @@ -0,0 +1,50 @@ +/** + * Header file for the Lyra2 Password Hashing Scheme (PHS). + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef LYRA2_H_ +#define LYRA2_H_ + +#include + +typedef unsigned char byte; + +//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED) +#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t) +#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8) //same as above, in bytes + + +#ifdef BLOCK_LEN_BITS + #define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64) //Block length: 768 bits (=96 bytes, =12 uint64_t) + #define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8) //Block length, in bytes +#else //default block lenght: 768 bits + #define BLOCK_LEN_INT64 12 //Block length: 768 bits (=96 bytes, =12 uint64_t) + #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes +#endif + +#ifndef N_COLS + #define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default +#endif + +#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks +#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row + + +int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols); + +#endif /* LYRA2_H_ */ diff --git a/algorithm/lyra2re.c b/algorithm/lyra2re.c new file mode 100644 index 000000000..ba37094ab --- /dev/null +++ b/algorithm/lyra2re.c @@ -0,0 +1,169 @@ +/*- + * Copyright 2014 James Lovejoy + * Copyright 2014 phm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + +#include "sph/sph_blake.h" +#include "sph/sph_groestl.h" +#include "sph/sph_skein.h" +#include "sph/sph_keccak.h" +#include "lyra2.h" + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + + +inline void lyra2rehash(void *state, const void *input) +{ + sph_blake256_context ctx_blake; + sph_groestl256_context ctx_groestl; + sph_keccak256_context ctx_keccak; + sph_skein256_context ctx_skein; + + uint32_t hashA[8], hashB[8]; + + sph_blake256_init(&ctx_blake); + sph_blake256 (&ctx_blake, input, 80); + sph_blake256_close (&ctx_blake, hashA); + + + + + sph_keccak256_init(&ctx_keccak); + sph_keccak256 (&ctx_keccak,hashA, 32); + sph_keccak256_close(&ctx_keccak, hashB); + + LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); + + + sph_skein256_init(&ctx_skein); + sph_skein256 (&ctx_skein, hashA, 32); + sph_skein256_close(&ctx_skein, hashB); + + + sph_groestl256_init(&ctx_groestl); + sph_groestl256 (&ctx_groestl, hashB, 32); + sph_groestl256_close(&ctx_groestl, hashA); + +//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]); + + memcpy(state, hashA, 32); +} + +static const uint32_t diff1targ = 0x0000ffff; + + +/* Used externally as confirmation of correct OCL code */ +int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + lyra2rehash(ohash, data); + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void lyra2re_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + lyra2rehash(ohash, data); +} + +bool scanhash_lyra2re(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while(1) { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + lyra2rehash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", + (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) { + *last_nonce = n; + break; + } + } + + return ret; +} + + + diff --git a/algorithm/lyra2re.h b/algorithm/lyra2re.h new file mode 100644 index 000000000..8a58e7471 --- /dev/null +++ b/algorithm/lyra2re.h @@ -0,0 +1,10 @@ +#ifndef LYRA2RE_H +#define LYRA2RE_H + +#include "miner.h" + +extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget, + uint32_t nonce); +extern void lyra2re_regenhash(struct work *work); + +#endif /* LYRA2RE_H */ diff --git a/algorithm/sponge.c b/algorithm/sponge.c new file mode 100644 index 000000000..e717a508d --- /dev/null +++ b/algorithm/sponge.c @@ -0,0 +1,742 @@ +/** + * A simple implementation of Blake2b's internal permutation + * in the form of a sponge. + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include "sponge.h" +#include "lyra2.h" + + + +/** + * Initializes the Sponge State. The first 512 bits are set to zeros and the remainder + * receive Blake2b's IV as per Blake2b's specification. Note: Even though sponges + * typically have their internal state initialized with zeros, Blake2b's G function + * has a fixed point: if the internal state and message are both filled with zeros. the + * resulting permutation will always be a block filled with zeros; this happens because + * Blake2b does not use the constants originally employed in Blake2 inside its G function, + * relying on the IV for avoiding possible fixed points. + * + * @param state The 1024-bit array to be initialized + */ +void initState(uint64_t state[/*16*/]) { + //First 512 bis are zeros + memset(state, 0, 64); + //Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV + state[8] = blake2b_IV[0]; + state[9] = blake2b_IV[1]; + state[10] = blake2b_IV[2]; + state[11] = blake2b_IV[3]; + state[12] = blake2b_IV[4]; + state[13] = blake2b_IV[5]; + state[14] = blake2b_IV[6]; + state[15] = blake2b_IV[7]; +} + +/** + * Execute Blake2b's G function, with all 12 rounds. + * + * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function + */ +static void blake2bLyra(uint64_t *v) { + ROUND_LYRA(0); + ROUND_LYRA(1); + ROUND_LYRA(2); + ROUND_LYRA(3); + ROUND_LYRA(4); + ROUND_LYRA(5); + ROUND_LYRA(6); + ROUND_LYRA(7); + ROUND_LYRA(8); + ROUND_LYRA(9); + ROUND_LYRA(10); + ROUND_LYRA(11); +} + +/** + * Executes a reduced version of Blake2b's G function with only one round + * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function + */ +static void reducedBlake2bLyra(uint64_t *v) { + ROUND_LYRA(0); +} + +/** + * Performs a squeeze operation, using Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param out Array that will receive the data squeezed + * @param len The number of bytes to be squeezed into the "out" array + */ +void squeeze(uint64_t *state, byte *out, unsigned int len) { + int fullBlocks = len / BLOCK_LEN_BYTES; + byte *ptr = out; + int i; + //Squeezes full blocks + for (i = 0; i < fullBlocks; i++) { + memcpy(ptr, state, BLOCK_LEN_BYTES); + blake2bLyra(state); + ptr += BLOCK_LEN_BYTES; + } + + //Squeezes remaining bytes + memcpy(ptr, state, (len % BLOCK_LEN_BYTES)); +} + +/** + * Performs an absorb operation for a single block (BLOCK_LEN_INT64 words + * of type uint64_t), using Blake2b's G function as the internal permutation + * + * @param state The current state of the sponge + * @param in The block to be absorbed (BLOCK_LEN_INT64 words) + */ +void absorbBlock(uint64_t *state, const uint64_t *in) { + //XORs the first BLOCK_LEN_INT64 words of "in" with the current state + state[0] ^= in[0]; + state[1] ^= in[1]; + state[2] ^= in[2]; + state[3] ^= in[3]; + state[4] ^= in[4]; + state[5] ^= in[5]; + state[6] ^= in[6]; + state[7] ^= in[7]; + state[8] ^= in[8]; + state[9] ^= in[9]; + state[10] ^= in[10]; + state[11] ^= in[11]; + + //Applies the transformation f to the sponge's state + blake2bLyra(state); +} + +/** + * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64 + * words of type uint64_t), using Blake2b's G function as the internal permutation + * + * @param state The current state of the sponge + * @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words) + */ +void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) { + //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state + state[0] ^= in[0]; + state[1] ^= in[1]; + state[2] ^= in[2]; + state[3] ^= in[3]; + state[4] ^= in[4]; + state[5] ^= in[5]; + state[6] ^= in[6]; + state[7] ^= in[7]; + + //Applies the transformation f to the sponge's state + blake2bLyra(state); +} + +/** + * Performs a reduced squeeze operation for a single row, from the highest to + * the lowest index, using the reduced-round Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param rowOut Row to receive the data squeezed + */ +void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) { + uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1] + int i; + //M[row][C-1-col] = H.reduced_squeeze() + for (i = 0; i < N_COLS; i++) { + ptrWord[0] = state[0]; + ptrWord[1] = state[1]; + ptrWord[2] = state[2]; + ptrWord[3] = state[3]; + ptrWord[4] = state[4]; + ptrWord[5] = state[5]; + ptrWord[6] = state[6]; + ptrWord[7] = state[7]; + ptrWord[8] = state[8]; + ptrWord[9] = state[9]; + ptrWord[10] = state[10]; + ptrWord[11] = state[11]; + + //Goes to next block (column) that will receive the squeezed data + ptrWord -= BLOCK_LEN_INT64; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + } +} + +/** + * Performs a reduced duplex operation for a single row, from the highest to + * the lowest index, using the reduced-round Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param rowIn Row to feed the sponge + * @param rowOut Row to receive the sponge's output + */ +void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + int i; + + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[prev][col]" + state[0] ^= (ptrWordIn[0]); + state[1] ^= (ptrWordIn[1]); + state[2] ^= (ptrWordIn[2]); + state[3] ^= (ptrWordIn[3]); + state[4] ^= (ptrWordIn[4]); + state[5] ^= (ptrWordIn[5]); + state[6] ^= (ptrWordIn[6]); + state[7] ^= (ptrWordIn[7]); + state[8] ^= (ptrWordIn[8]); + state[9] ^= (ptrWordIn[9]); + state[10] ^= (ptrWordIn[10]); + state[11] ^= (ptrWordIn[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][C-1-col] = M[prev][col] XOR rand + ptrWordOut[0] = ptrWordIn[0] ^ state[0]; + ptrWordOut[1] = ptrWordIn[1] ^ state[1]; + ptrWordOut[2] = ptrWordIn[2] ^ state[2]; + ptrWordOut[3] = ptrWordIn[3] ^ state[3]; + ptrWordOut[4] = ptrWordIn[4] ^ state[4]; + ptrWordOut[5] = ptrWordIn[5] ^ state[5]; + ptrWordOut[6] = ptrWordIn[6] ^ state[6]; + ptrWordOut[7] = ptrWordIn[7] ^ state[7]; + ptrWordOut[8] = ptrWordIn[8] ^ state[8]; + ptrWordOut[9] = ptrWordIn[9] ^ state[9]; + ptrWordOut[10] = ptrWordIn[10] ^ state[10]; + ptrWordOut[11] = ptrWordIn[11] ^ state[11]; + + + //Input: next column (i.e., next block in sequence) + ptrWordIn += BLOCK_LEN_INT64; + //Output: goes to previous column + ptrWordOut -= BLOCK_LEN_INT64; + } +} + +/** + * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., + * the wordwise addition of two columns, ignoring carries between words). The + * output of this operation, "rand", is then used to make + * "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and + * "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left and N_COLS is a system parameter. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + int i; + + for (i = 0; i < N_COLS; i++) { + //Absorbing "M[prev] [+] M[row*]" + state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); + state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); + state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); + state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); + state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); + state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); + state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); + state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); + state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); + state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); + state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); + state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][col] = M[prev][col] XOR rand + ptrWordOut[0] = ptrWordIn[0] ^ state[0]; + ptrWordOut[1] = ptrWordIn[1] ^ state[1]; + ptrWordOut[2] = ptrWordIn[2] ^ state[2]; + ptrWordOut[3] = ptrWordIn[3] ^ state[3]; + ptrWordOut[4] = ptrWordIn[4] ^ state[4]; + ptrWordOut[5] = ptrWordIn[5] ^ state[5]; + ptrWordOut[6] = ptrWordIn[6] ^ state[6]; + ptrWordOut[7] = ptrWordIn[7] ^ state[7]; + ptrWordOut[8] = ptrWordIn[8] ^ state[8]; + ptrWordOut[9] = ptrWordIn[9] ^ state[9]; + ptrWordOut[10] = ptrWordIn[10] ^ state[10]; + ptrWordOut[11] = ptrWordIn[11] ^ state[11]; + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[11]; + ptrWordInOut[1] ^= state[0]; + ptrWordInOut[2] ^= state[1]; + ptrWordInOut[3] ^= state[2]; + ptrWordInOut[4] ^= state[3]; + ptrWordInOut[5] ^= state[4]; + ptrWordInOut[6] ^= state[5]; + ptrWordInOut[7] ^= state[6]; + ptrWordInOut[8] ^= state[7]; + ptrWordInOut[9] ^= state[8]; + ptrWordInOut[10] ^= state[9]; + ptrWordInOut[11] ^= state[10]; + + //Inputs: next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + //Output: goes to previous column + ptrWordOut -= BLOCK_LEN_INT64; + } +} + +/** + * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., + * the wordwise addition of two columns, ignoring carries between words). The + * output of this operation, "rand", is then used to make + * "M[rowOut][col] = M[rowOut][col] XOR rand" and + * "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[prev] [+] M[row*]" + state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); + state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); + state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); + state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); + state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); + state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); + state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); + state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); + state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); + state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); + state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); + state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[rowOut][col] = M[rowOut][col] XOR rand + ptrWordOut[0] ^= state[0]; + ptrWordOut[1] ^= state[1]; + ptrWordOut[2] ^= state[2]; + ptrWordOut[3] ^= state[3]; + ptrWordOut[4] ^= state[4]; + ptrWordOut[5] ^= state[5]; + ptrWordOut[6] ^= state[6]; + ptrWordOut[7] ^= state[7]; + ptrWordOut[8] ^= state[8]; + ptrWordOut[9] ^= state[9]; + ptrWordOut[10] ^= state[10]; + ptrWordOut[11] ^= state[11]; + + //M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[11]; + ptrWordInOut[1] ^= state[0]; + ptrWordInOut[2] ^= state[1]; + ptrWordInOut[3] ^= state[2]; + ptrWordInOut[4] ^= state[3]; + ptrWordInOut[5] ^= state[4]; + ptrWordInOut[6] ^= state[5]; + ptrWordInOut[7] ^= state[6]; + ptrWordInOut[8] ^= state[7]; + ptrWordInOut[9] ^= state[8]; + ptrWordInOut[10] ^= state[9]; + ptrWordInOut[11] ^= state[10]; + + //Goes to next block + ptrWordOut += BLOCK_LEN_INT64; + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + } +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * Performs a duplex operation over "M[rowInOut] [+] M[rowIn]", writing the output "rand" + * on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +void reducedDuplexRowSetupOLD(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] ^ ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] ^ ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] ^ ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] ^ ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] ^ ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] ^ ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] ^ ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] ^ ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] ^ ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] ^ ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] ^ ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] ^ ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][col] = rand + ptrWordOut[0] = state[0]; + ptrWordOut[1] = state[1]; + ptrWordOut[2] = state[2]; + ptrWordOut[3] = state[3]; + ptrWordOut[4] = state[4]; + ptrWordOut[5] = state[5]; + ptrWordOut[6] = state[6]; + ptrWordOut[7] = state[7]; + ptrWordOut[8] = state[8]; + ptrWordOut[9] = state[9]; + ptrWordOut[10] = state[10]; + ptrWordOut[11] = state[11]; + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += BLOCK_LEN_INT64; + } +} +*/ + +/** + * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand" + * on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + + //M[row][col] = rand + ptrWordOut[0] = state[0] ^ ptrWordIn[0]; + ptrWordOut[1] = state[1] ^ ptrWordIn[1]; + ptrWordOut[2] = state[2] ^ ptrWordIn[2]; + ptrWordOut[3] = state[3] ^ ptrWordIn[3]; + ptrWordOut[4] = state[4] ^ ptrWordIn[4]; + ptrWordOut[5] = state[5] ^ ptrWordIn[5]; + ptrWordOut[6] = state[6] ^ ptrWordIn[6]; + ptrWordOut[7] = state[7] ^ ptrWordIn[7]; + ptrWordOut[8] = state[8] ^ ptrWordIn[8]; + ptrWordOut[9] = state[9] ^ ptrWordIn[9]; + ptrWordOut[10] = state[10] ^ ptrWordIn[10]; + ptrWordOut[11] = state[11] ^ ptrWordIn[11]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += BLOCK_LEN_INT64; + } +} +*/ + +/** + * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand" + * on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut; + int i; + + for (i = 0; i < N_COLS / 2; i++) { + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + + //M[row][col] = rand + ptrWordOut[0] = state[0] ^ ptrWordIn[0]; + ptrWordOut[1] = state[1] ^ ptrWordIn[1]; + ptrWordOut[2] = state[2] ^ ptrWordIn[2]; + ptrWordOut[3] = state[3] ^ ptrWordIn[3]; + ptrWordOut[4] = state[4] ^ ptrWordIn[4]; + ptrWordOut[5] = state[5] ^ ptrWordIn[5]; + ptrWordOut[6] = state[6] ^ ptrWordIn[6]; + ptrWordOut[7] = state[7] ^ ptrWordIn[7]; + ptrWordOut[8] = state[8] ^ ptrWordIn[8]; + ptrWordOut[9] = state[9] ^ ptrWordIn[9]; + ptrWordOut[10] = state[10] ^ ptrWordIn[10]; + ptrWordOut[11] = state[11] ^ ptrWordIn[11]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += 2 * BLOCK_LEN_INT64; + } + + ptrWordOut = rowOut + BLOCK_LEN_INT64; + for (i = 0; i < N_COLS / 2; i++) { + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + + //M[row][col] = rand + ptrWordOut[0] = state[0] ^ ptrWordIn[0]; + ptrWordOut[1] = state[1] ^ ptrWordIn[1]; + ptrWordOut[2] = state[2] ^ ptrWordIn[2]; + ptrWordOut[3] = state[3] ^ ptrWordIn[3]; + ptrWordOut[4] = state[4] ^ ptrWordIn[4]; + ptrWordOut[5] = state[5] ^ ptrWordIn[5]; + ptrWordOut[6] = state[6] ^ ptrWordIn[6]; + ptrWordOut[7] = state[7] ^ ptrWordIn[7]; + ptrWordOut[8] = state[8] ^ ptrWordIn[8]; + ptrWordOut[9] = state[9] ^ ptrWordIn[9]; + ptrWordOut[10] = state[10] ^ ptrWordIn[10]; + ptrWordOut[11] = state[11] ^ ptrWordIn[11]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += 2 * BLOCK_LEN_INT64; + } +} +*/ + +/** + * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", using the output "rand" + * to make "M[rowOut][col] = M[rowOut][col] XOR rand" and "M[rowInOut] = M[rowInOut] XOR rotW(rand)", + * where rotW is a 64-bit rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[rowOut][col] = M[rowOut][col] XOR rand + ptrWordOut[0] ^= state[0]; + ptrWordOut[1] ^= state[1]; + ptrWordOut[2] ^= state[2]; + ptrWordOut[3] ^= state[3]; + ptrWordOut[4] ^= state[4]; + ptrWordOut[5] ^= state[5]; + ptrWordOut[6] ^= state[6]; + ptrWordOut[7] ^= state[7]; + ptrWordOut[8] ^= state[8]; + ptrWordOut[9] ^= state[9]; + ptrWordOut[10] ^= state[10]; + ptrWordOut[11] ^= state[11]; + + //M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand) + + + //Goes to next block + ptrWordOut += BLOCK_LEN_INT64; + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + } +} +*/ + +/** + Prints an array of unsigned chars + */ +void printArray(unsigned char *array, unsigned int size, char *name) { + int i; + printf("%s: ", name); + for (i = 0; i < size; i++) { + printf("%2x|", array[i]); + } + printf("\n"); +} + +//////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/algorithm/sponge.h b/algorithm/sponge.h new file mode 100644 index 000000000..3fcff0d7e --- /dev/null +++ b/algorithm/sponge.h @@ -0,0 +1,108 @@ +/** + * Header file for Blake2b's internal permutation in the form of a sponge. + * This code is based on the original Blake2b's implementation provided by + * Samuel Neves (https://blake2.net/) + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SPONGE_H_ +#define SPONGE_H_ + +#include + +#if defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(32))) +#elif defined(_MSC_VER) +#define ALIGN __declspec(align(32)) +#else +#define ALIGN +#endif + + +/*Blake2b IV Array*/ +static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +/*Blake2b's rotation*/ +static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ + return ( w >> c ) | ( w << ( 64 - c ) ); +} + +/*Blake2b's G function*/ +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) + + +/*One Round of the Blake2b's compression function*/ +#define ROUND_LYRA(r) \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); + + +//---- Housekeeping +void initState(uint64_t state[/*16*/]); + +//---- Squeezes +void squeeze(uint64_t *state, unsigned char *out, unsigned int len); +void reducedSqueezeRow0(uint64_t* state, uint64_t* row); + +//---- Absorbs +void absorbBlock(uint64_t *state, const uint64_t *in); +void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in); + +//---- Duplexes +void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut); +void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); + +//---- Misc +void printArray(unsigned char *array, unsigned int size, char *name); + +//////////////////////////////////////////////////////////////////////////////////////////////// + + +////TESTS//// +//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2); +//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +///////////// + + +#endif /* SPONGE_H_ */ diff --git a/driver-opencl.c b/driver-opencl.c index 48ffc517d..72ee95596 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -284,7 +284,7 @@ char *set_gpu_engine(const char *_arg) char *arg = (char *)alloca(strlen(_arg) + 1); strcpy(arg, _arg); - if(!(nextptr = strtok(arg, ","))) + if (!(nextptr = strtok(arg, ","))) return "Invalid parameters for set gpu engine"; do { @@ -408,12 +408,12 @@ char *set_gpu_memdiff(char *arg) gpus[device++].gpu_memdiff = val; } - if (device == 1) { - for (i = device; i < MAX_GPUDEVICES; i++) - gpus[i].gpu_memdiff = gpus[0].gpu_memdiff; - } + if (device == 1) { + for (i = device; i < MAX_GPUDEVICES; i++) + gpus[i].gpu_memdiff = gpus[0].gpu_memdiff; + } - return NULL; + return NULL; } char *set_gpu_powertune(char *arg) @@ -630,12 +630,12 @@ char *set_xintensity(const char *_arg) device++; } if (device == 1) - for (i = device; i < MAX_GPUDEVICES; i++) { - gpus[i].dynamic = gpus[0].dynamic; - gpus[i].intensity = gpus[0].intensity; - gpus[i].rawintensity = gpus[0].rawintensity; - gpus[i].xintensity = gpus[0].xintensity; - } + for (i = device; i < MAX_GPUDEVICES; i++) { + gpus[i].dynamic = gpus[0].dynamic; + gpus[i].intensity = gpus[0].intensity; + gpus[i].rawintensity = gpus[0].rawintensity; + gpus[i].xintensity = gpus[0].xintensity; + } return NULL; } @@ -673,12 +673,12 @@ char *set_rawintensity(const char *_arg) device++; } if (device == 1) - for (i = device; i < MAX_GPUDEVICES; i++) { - gpus[i].dynamic = gpus[0].dynamic; - gpus[i].intensity = gpus[0].intensity; - gpus[i].rawintensity = gpus[0].rawintensity; - gpus[i].xintensity = gpus[0].xintensity; - } + for (i = device; i < MAX_GPUDEVICES; i++) { + gpus[i].dynamic = gpus[0].dynamic; + gpus[i].intensity = gpus[0].intensity; + gpus[i].rawintensity = gpus[0].rawintensity; + gpus[i].xintensity = gpus[0].xintensity; + } return NULL; } @@ -799,22 +799,22 @@ void manage_gpu(void) displayed_rolling = thr->rolling; if (!mhash_base) displayed_rolling *= 1000; - wlog("Thread %d: %.1f %sh/s %s ", i, displayed_rolling, mhash_base ? "M" : "K" , cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled"); + wlog("Thread %d: %.1f %sh/s %s ", i, displayed_rolling, mhash_base ? "M" : "K", cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled"); switch (cgpu->status) { - default: - case LIFE_WELL: - wlog("ALIVE"); - break; - case LIFE_SICK: - wlog("SICK reported in %s", checkin); - break; - case LIFE_DEAD: - wlog("DEAD reported in %s", checkin); - break; - case LIFE_INIT: - case LIFE_NOSTART: - wlog("Never started"); - break; + default: + case LIFE_WELL: + wlog("ALIVE"); + break; + case LIFE_SICK: + wlog("SICK reported in %s", checkin); + break; + case LIFE_DEAD: + wlog("DEAD reported in %s", checkin); + break; + case LIFE_INIT: + case LIFE_NOSTART: + wlog("Never started"); + break; } if (thr->pause) wlog(" paused"); @@ -825,7 +825,7 @@ void manage_gpu(void) wlog("\n"); } - wlogprint("[E]nable [D]isable [R]estart GPU %s\n",adl_active ? "[C]hange settings" : ""); + wlogprint("[E]nable [D]isable [R]estart GPU %s\n", adl_active ? "[C]hange settings" : ""); wlogprint("[I]ntensity E[x]perimental intensity R[a]w Intensity\n"); wlogprint("Or press any other key to continue\n"); @@ -868,7 +868,8 @@ void manage_gpu(void) } rd_unlock(&mining_thr_lock); goto retry; - } else if (!strncasecmp(&input, "d", 1)) { + } + else if (!strncasecmp(&input, "d", 1)) { if (selected) selected = curses_int("Select GPU to disable"); if (selected < 0 || selected >= nDevs) { @@ -881,7 +882,8 @@ void manage_gpu(void) } gpus[selected].deven = DEV_DISABLED; goto retry; - } else if (!strncasecmp(&input, "i", 1)) { + } + else if (!strncasecmp(&input, "i", 1)) { int intensity; char *intvar; @@ -893,8 +895,8 @@ void manage_gpu(void) } intvar = curses_input("Set GPU scan intensity (d or " - MIN_INTENSITY_STR " -> " - MAX_INTENSITY_STR ")"); + MIN_INTENSITY_STR " -> " + MAX_INTENSITY_STR ")"); if (!intvar) { wlogprint("Invalid input\n"); goto retry; @@ -927,7 +929,8 @@ void manage_gpu(void) pause_dynamic_threads(selected); goto retry; - } else if (!strncasecmp(&input, "x", 1)) { + } + else if (!strncasecmp(&input, "x", 1)) { int xintensity; char *intvar; @@ -960,7 +963,8 @@ void manage_gpu(void) pause_dynamic_threads(selected); goto retry; - } else if (!strncasecmp(&input, "a", 1)) { + } + else if (!strncasecmp(&input, "a", 1)) { int rawintensity; char *intvar; @@ -993,7 +997,8 @@ void manage_gpu(void) pause_dynamic_threads(selected); goto retry; - } else if (!strncasecmp(&input, "r", 1)) { + } + else if (!strncasecmp(&input, "r", 1)) { if (selected) selected = curses_int("Select GPU to attempt to restart"); if (selected < 0 || selected >= nDevs) { @@ -1003,7 +1008,8 @@ void manage_gpu(void) wlogprint("Attempting to restart threads of GPU %d\n", selected); reinit_device(&gpus[selected]); goto retry; - } else if (adl_active && (!strncasecmp(&input, "c", 1))) { + } + else if (adl_active && (!strncasecmp(&input, "c", 1))) { if (selected) selected = curses_int("Select GPU to change settings on"); if (selected < 0 || selected >= nDevs) { @@ -1012,7 +1018,8 @@ void manage_gpu(void) } change_gpusettings(selected); goto retry; - } else + } + else clear_logwin(); immedok(logwin, false); @@ -1027,8 +1034,8 @@ void manage_gpu(void) static _clState *clStates[MAX_GPUDEVICES]; static void set_threads_hashes(unsigned int vectors, unsigned int compute_shaders, int64_t *hashes, size_t *globalThreads, - unsigned int minthreads, __maybe_unused int *intensity, __maybe_unused int *xintensity, - __maybe_unused int *rawintensity, algorithm_t *algorithm) + unsigned int minthreads, __maybe_unused int *intensity, __maybe_unused int *xintensity, + __maybe_unused int *rawintensity, algorithm_t *algorithm) { unsigned int threads = 0; while (threads < minthreads) { @@ -1037,7 +1044,7 @@ static void set_threads_hashes(unsigned int vectors, unsigned int compute_shader threads = *rawintensity; } else if (*xintensity > 0) { - threads = compute_shaders * ((algorithm->xintensity_shift)?(1 << (algorithm->xintensity_shift + *xintensity)):*xintensity); + threads = compute_shaders * ((algorithm->xintensity_shift) ? (1 << (algorithm->xintensity_shift + *xintensity)) : *xintensity); } else { threads = 1 << (algorithm->intensity_shift + *intensity); @@ -1102,7 +1109,8 @@ void *reinit_gpu(void *userdata) applog(LOG_WARNING, "Thread %d still exists, killing it off", thr_id); cg_completion_timeout(&thr_info_cancel_join, thr, 5000); thr->cgpu->drv->thread_shutdown(thr); - } else + } + else applog(LOG_WARNING, "Thread %d no longer exists", thr_id); } rd_unlock(&mining_thr_lock); @@ -1234,7 +1242,8 @@ static void get_opencl_statline_before(char *buf, size_t bufsiz, struct cgpu_inf else tailsprintf(buf, bufsiz, " "); tailsprintf(buf, bufsiz, "| "); - } else + } + else gpu->drv->get_statline_before = &blank_get_statline_before; } #endif @@ -1250,7 +1259,7 @@ static void get_opencl_statline(char *buf, size_t bufsiz, struct cgpu_info *gpu) } struct opencl_thread_data { - cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint); + cl_int(*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint); uint32_t *res; }; @@ -1340,7 +1349,7 @@ static bool opencl_thread_init(struct thr_info *thr) } status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, - buffersize, blank_res, 0, NULL, NULL); + buffersize, blank_res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { free(thrdata->res); free(thrdata); @@ -1357,7 +1366,13 @@ static bool opencl_thread_init(struct thr_info *thr) static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work) { - work->blk.work = work; + if (!safe_cmp(work->pool->algorithm.name, "Lyra2RE")) { + work->blk.work = work; + precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data)); + } + else { + work->blk.work = work; + } thr->pool_no = work->pool->pool_no; return true; } @@ -1365,7 +1380,7 @@ static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work extern int opt_dynamic_interval; static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, - int64_t __maybe_unused max_nonce) + int64_t __maybe_unused max_nonce) { const int thr_id = thr->id; struct opencl_thread_data *thrdata = (struct opencl_thread_data *)thr->cgpu_data; @@ -1376,11 +1391,11 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, cl_int status; size_t globalThreads[1]; size_t localThreads[1] = { clState->wsize }; - size_t *p_global_work_offset = NULL; + size_t *p_global_work_offset = NULL; int64_t hashes; int found = gpu->algorithm.found_idx; int buffersize = BUFFERSIZE; - unsigned int i; + unsigned int i; /* Windows' timer resolution is only 15ms so oversample 5x */ if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) { @@ -1392,7 +1407,8 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, if (gpu_us > dynamic_us) { if (gpu->intensity > MIN_INTENSITY) --gpu->intensity; - } else if (gpu_us < dynamic_us / 2) { + } + else if (gpu_us < dynamic_us / 2) { if (gpu->intensity < MAX_INTENSITY) ++gpu->intensity; } @@ -1401,7 +1417,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, } set_threads_hashes(clState->vwidth, clState->compute_shaders, &hashes, globalThreads, localThreads[0], - &gpu->intensity, &gpu->xintensity, &gpu->rawintensity, &gpu->algorithm); + &gpu->intensity, &gpu->xintensity, &gpu->rawintensity, &gpu->algorithm); if (hashes > gpu->max_hashes) gpu->max_hashes = hashes; @@ -1411,27 +1427,27 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, return -1; } - if (clState->goffset) - p_global_work_offset = (size_t *)&work->blk.nonce; + if (clState->goffset) + p_global_work_offset = (size_t *)&work->blk.nonce; - status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset, - globalThreads, localThreads, 0, NULL, NULL); + status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset, + globalThreads, localThreads, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status); return -1; } for (i = 0; i < clState->n_extra_kernels; i++) { - status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, - globalThreads, localThreads, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) { - applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status); - return -1; - } + status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, + globalThreads, localThreads, 0, NULL, NULL); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status); + return -1; + } } status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, - buffersize, thrdata->res, 0, NULL, NULL); + buffersize, thrdata->res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status); return -1; @@ -1449,7 +1465,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, if (thrdata->res[found]) { /* Clear the buffer again */ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, - buffersize, blank_res, 0, NULL, NULL); + buffersize, blank_res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); return -1; @@ -1471,7 +1487,7 @@ static void opencl_thread_shutdown(struct thr_info *thr) const int thr_id = thr->id; _clState *clState = clStates[thr_id]; clStates[thr_id] = NULL; - unsigned int i; + unsigned int i; if (clState) { clFinish(clState->commandQueue); @@ -1480,8 +1496,8 @@ static void opencl_thread_shutdown(struct thr_info *thr) if (clState->padbuffer8) clReleaseMemObject(clState->padbuffer8); clReleaseKernel(clState->kernel); - for (i = 0; i < clState->n_extra_kernels; i++) - clReleaseKernel(clState->extra_kernels[i]); + for (i = 0; i < clState->n_extra_kernels; i++) + clReleaseKernel(clState->extra_kernels[i]); clReleaseProgram(clState->program); clReleaseCommandQueue(clState->commandQueue); clReleaseContext(clState->context); @@ -1503,7 +1519,7 @@ struct device_drv opencl_drv = { #ifdef HAVE_ADL /*.get_statline_before = */ get_opencl_statline_before, #else - NULL, + NULL, #endif /*.get_statline = */ get_opencl_statline, /*.api_data = */ NULL, @@ -1524,7 +1540,7 @@ struct device_drv opencl_drv = { /*.hw_error = */ NULL, /*.thread_shutdown = */ opencl_thread_shutdown, /*.thread_enable =*/ NULL, - false, - 0, - 0 + false, + 0, + 0 }; diff --git a/findnonce.c b/findnonce.c index be9ba0dfd..c24029972 100644 --- a/findnonce.c +++ b/findnonce.c @@ -92,7 +92,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); blk->PreVal4_2 = blk->PreVal4 + blk->T1; blk->PreVal0 = blk->PreVal4 + blk->ctx_a; - blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3)); + blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3)); blk->PreW32 = blk->W16 + (rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3)); blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10)); blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10)); @@ -191,7 +191,7 @@ static void *postcalc_hash(void *userdata) * end of the res[] array */ if (unlikely(pcd->res[found] & ~found)) { applog(LOG_WARNING, "%s%d: invalid nonce count - HW error", - thr->cgpu->drv->name, thr->cgpu->device_id); + thr->cgpu->drv->name, thr->cgpu->device_id); hw_errors++; thr->cgpu->hw_errors++; pcd->res[found] &= found; @@ -200,7 +200,7 @@ static void *postcalc_hash(void *userdata) for (entry = 0; entry < pcd->res[found]; entry++) { uint32_t nonce = pcd->res[entry]; if (found == 0x0F) - nonce = swab32(nonce); + nonce = swab32(nonce); applog(LOG_DEBUG, "[THR%d] OCL NONCE %08x (%lu) found in slot %d (found = %d)", thr->id, nonce, nonce, entry, found); submit_nonce(thr, pcd->work, nonce); @@ -234,3 +234,136 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res) free(pcd); } } + +// BLAKE 256 14 rounds (standard) + +typedef struct +{ + uint32_t h[8]; + uint32_t t; +} blake_state256; + +#define NB_ROUNDS32 14 + +const uint8_t blake_sigma[][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } +}; + +const uint32_t blake_u256[16] = +{ + 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, + 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, + 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, + 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917 +}; + +#define ROT32(x,n) (((x)<<(32-n))|( (x)>>(n))) +//#define ROT32(x,n) (rotate((uint)x, (uint)32-n)) +#define ADD32(x,y) ((uint32_t)((x) + (y))) +#define XOR32(x,y) ((uint32_t)((x) ^ (y))) + +#define G(a,b,c,d,i) \ +do { \ + v[a] += XOR32(m[blake_sigma[r][i]], blake_u256[blake_sigma[r][i + 1]]) + v[b]; \ + v[d] = ROT32(XOR32(v[d], v[a]), 16); \ + v[c] += v[d]; \ + v[b] = ROT32(XOR32(v[b], v[c]), 12); \ + v[a] += XOR32(m[blake_sigma[r][i + 1]], blake_u256[blake_sigma[r][i]]) + v[b]; \ + v[d] = ROT32(XOR32(v[d], v[a]), 8); \ + v[c] += v[d]; \ + v[b] = ROT32(XOR32(v[b], v[c]), 7); \ +} while (0) + +// compress a block +void blake256_compress_block(blake_state256 *S, uint32_t *m) +{ + uint32_t v[16]; + int i, r; + for (i = 0; i < 8; ++i) v[i] = S->h[i]; + + v[8] = blake_u256[0]; + v[9] = blake_u256[1]; + v[10] = blake_u256[2]; + v[11] = blake_u256[3]; + v[12] = blake_u256[4]; + v[13] = blake_u256[5]; + v[14] = blake_u256[6]; + v[15] = blake_u256[7]; + + v[12] ^= S->t; + v[13] ^= S->t; + + for (r = 0; r < NB_ROUNDS32; ++r) + { + /* column step */ + G(0, 4, 8, 12, 0); + G(1, 5, 9, 13, 2); + G(2, 6, 10, 14, 4); + G(3, 7, 11, 15, 6); + /* diagonal step */ + G(0, 5, 10, 15, 8); + G(1, 6, 11, 12, 10); + G(2, 7, 8, 13, 12); + G(3, 4, 9, 14, 14); + } + + for (i = 0; i < 16; ++i) S->h[i & 7] ^= v[i]; +} + +void blake256_init(blake_state256 *S) +{ + S->h[0] = 0x6a09e667; + S->h[1] = 0xbb67ae85; + S->h[2] = 0x3c6ef372; + S->h[3] = 0xa54ff53a; + S->h[4] = 0x510e527f; + S->h[5] = 0x9b05688c; + S->h[6] = 0x1f83d9ab; + S->h[7] = 0x5be0cd19; + S->t = 0; +} + +void blake256_update(blake_state256 *S, const uint32_t *in) +{ + uint32_t m[16]; + int i; + S->t = 512; + for (i = 0; i < 16; ++i) m[i] = in[i]; + blake256_compress_block(S, m); +} + +void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) +{ + blake_state256 S; + blake256_init(&S); + blake256_update(&S, data); + + blk->ctx_a = S.h[0]; + blk->ctx_b = S.h[1]; + blk->ctx_c = S.h[2]; + blk->ctx_d = S.h[3]; + blk->ctx_e = S.h[4]; + blk->ctx_f = S.h[5]; + blk->ctx_g = S.h[6]; + blk->ctx_h = S.h[7]; + + blk->cty_a = data[16]; + blk->cty_b = data[17]; + blk->cty_c = data[18]; +} \ No newline at end of file diff --git a/findnonce.h b/findnonce.h index 9376a57be..49b1aa9a5 100644 --- a/findnonce.h +++ b/findnonce.h @@ -10,5 +10,6 @@ extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res); +extern void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); #endif /*FINDNONCE_H*/ diff --git a/kernel/Lyra2.cl b/kernel/Lyra2.cl new file mode 100644 index 000000000..24aa5eb85 --- /dev/null +++ b/kernel/Lyra2.cl @@ -0,0 +1,145 @@ +/* +* Lyra2 kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* Copyright (c) 2014 djm34 +* +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ + +/*Blake2b IV Array*/ +__constant static const sph_u64 blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +/*Blake2b's rotation*/ +static inline sph_u64 rotr64( const sph_u64 w, const unsigned c ){ + return rotate(w, (ulong)(64-c)); +} + +/*Blake2b's G function*/ +#define G(a,b,c,d) \ + do { \ +a += b; d ^= a; d = SPH_ROTR64(d, 32); \ +c += d; b ^= c; b = SPH_ROTR64(b, 24); \ +a += b; d ^= a; d = SPH_ROTR64(d, 16); \ +c += d; b ^= c; b = SPH_ROTR64(b, 63); \ + } while(0) + + +/*One Round of the Blake2b's compression function*/ +#define round_lyra(v) \ + do { \ + G(v[ 0],v[ 4],v[ 8],v[12]); \ + G(v[ 1],v[ 5],v[ 9],v[13]); \ + G(v[ 2],v[ 6],v[10],v[14]); \ + G(v[ 3],v[ 7],v[11],v[15]); \ + G(v[ 0],v[ 5],v[10],v[15]); \ + G(v[ 1],v[ 6],v[11],v[12]); \ + G(v[ 2],v[ 7],v[ 8],v[13]); \ + G(v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + + +#define reduceDuplexRowSetup(rowIn, rowInOut, rowOut) \ + { \ + for (int i = 0; i < 8; i++) \ + { \ +\ + for (int j = 0; j < 12; j++) {state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut];} \ + round_lyra(state); \ + for (int j = 0; j < 12; j++) {Matrix[j + 84 - 12 * i][rowOut] = Matrix[12 * i + j][rowIn] ^ state[j];} \ +\ + Matrix[0 + 12 * i][rowInOut] ^= state[11]; \ + Matrix[1 + 12 * i][rowInOut] ^= state[0]; \ + Matrix[2 + 12 * i][rowInOut] ^= state[1]; \ + Matrix[3 + 12 * i][rowInOut] ^= state[2]; \ + Matrix[4 + 12 * i][rowInOut] ^= state[3]; \ + Matrix[5 + 12 * i][rowInOut] ^= state[4]; \ + Matrix[6 + 12 * i][rowInOut] ^= state[5]; \ + Matrix[7 + 12 * i][rowInOut] ^= state[6]; \ + Matrix[8 + 12 * i][rowInOut] ^= state[7]; \ + Matrix[9 + 12 * i][rowInOut] ^= state[8]; \ + Matrix[10 + 12 * i][rowInOut] ^= state[9]; \ + Matrix[11 + 12 * i][rowInOut] ^= state[10]; \ + } \ + \ + } + +#define reduceDuplexRow(rowIn, rowInOut, rowOut) \ + { \ + for (int i = 0; i < 8; i++) \ + { \ + for (int j = 0; j < 12; j++) \ + state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut]; \ + \ + round_lyra(state); \ + for (int j = 0; j < 12; j++) {Matrix[j + 12 * i][rowOut] ^= state[j];} \ +\ + Matrix[0 + 12 * i][rowInOut] ^= state[11]; \ + Matrix[1 + 12 * i][rowInOut] ^= state[0]; \ + Matrix[2 + 12 * i][rowInOut] ^= state[1]; \ + Matrix[3 + 12 * i][rowInOut] ^= state[2]; \ + Matrix[4 + 12 * i][rowInOut] ^= state[3]; \ + Matrix[5 + 12 * i][rowInOut] ^= state[4]; \ + Matrix[6 + 12 * i][rowInOut] ^= state[5]; \ + Matrix[7 + 12 * i][rowInOut] ^= state[6]; \ + Matrix[8 + 12 * i][rowInOut] ^= state[7]; \ + Matrix[9 + 12 * i][rowInOut] ^= state[8]; \ + Matrix[10 + 12 * i][rowInOut] ^= state[9]; \ + Matrix[11 + 12 * i][rowInOut] ^= state[10]; \ + } \ + \ + } +#define absorbblock(in) { \ + state[0] ^= Matrix[0][in]; \ + state[1] ^= Matrix[1][in]; \ + state[2] ^= Matrix[2][in]; \ + state[3] ^= Matrix[3][in]; \ + state[4] ^= Matrix[4][in]; \ + state[5] ^= Matrix[5][in]; \ + state[6] ^= Matrix[6][in]; \ + state[7] ^= Matrix[7][in]; \ + state[8] ^= Matrix[8][in]; \ + state[9] ^= Matrix[9][in]; \ + state[10] ^= Matrix[10][in]; \ + state[11] ^= Matrix[11][in]; \ + round_lyra(state); \ + round_lyra(state); \ + round_lyra(state); \ + round_lyra(state); \ + round_lyra(state); \ + round_lyra(state); \ + round_lyra(state); \ + round_lyra(state); \ + round_lyra(state); \ + round_lyra(state); \ + round_lyra(state); \ + round_lyra(state); \ + } \ No newline at end of file diff --git a/kernel/Lyra2RE.cl b/kernel/Lyra2RE.cl new file mode 100644 index 000000000..03ddc15ed --- /dev/null +++ b/kernel/Lyra2RE.cl @@ -0,0 +1,392 @@ +/* + * Lyra2RE kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * Copyright (c) 2014 djm34 + * Copyright (c) 2014 James Lovejoy + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author djm34 + */ + +#pragma OPENCL EXTENSION cl_amd_printf : enable + +#ifndef LYRA2RE_CL +#define LYRA2RE_CL + +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64; +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) + +//#define SPH_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) +//#define SPH_ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +//#define SPH_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +//#define SPH_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) + +#define SPH_ROTL32(x,n) rotate(x,(uint)n) //faster with driver 14.6 +#define SPH_ROTR32(x,n) rotate(x,(uint)(32-n)) +#define SPH_ROTL64(x,n) rotate(x,(ulong)n) +#define SPH_ROTR64(x,n) rotate(x,(ulong)(64-n)) + + +#include "blake256.cl" +#include "groestl256.cl" +#include "Lyra2.cl" +#include "keccak1600.cl" +#include "skein256.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); + #define DEC64LE(x) SWAP8(*(const __global sph_u64 *) (x)); + #define DEC32LE(x) (*(const __global sph_u32 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); + #define DEC64LE(x) (*(const __global sph_u64 *) (x)); +#define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x)); +#endif + +typedef union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; +} hash_t; + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search( + __global hash_t* hashes, + // precalc hash from fisrt part of message + const uint h0, + const uint h1, + const uint h2, + const uint h3, + const uint h4, + const uint h5, + const uint h6, + const uint h7, + // last 12 bytes of original message + const uint in16, + const uint in17, + const uint in18 +) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + sph_u32 h[8]; + sph_u32 m[16]; + sph_u32 v[16]; + + +h[0]=h0; +h[1]=h1; +h[2]=h2; +h[3]=h3; +h[4]=h4; +h[5]=h5; +h[6]=h6; +h[7]=h7; +// compress 2nd round + m[0] = in16; + m[1] = in17; + m[2] = in18; + m[3] = SWAP4(gid); + + for (int i = 4; i < 16; i++) {m[i] = c_Padding[i];} + + for (int i = 0; i < 8; i++) {v[i] = h[i];} + + v[8] = c_u256[0]; + v[9] = c_u256[1]; + v[10] = c_u256[2]; + v[11] = c_u256[3]; + v[12] = c_u256[4] ^ 640; + v[13] = c_u256[5] ^ 640; + v[14] = c_u256[6]; + v[15] = c_u256[7]; + + for (int r = 0; r < 14; r++) { + GS(0, 4, 0x8, 0xC, 0x0); + GS(1, 5, 0x9, 0xD, 0x2); + GS(2, 6, 0xA, 0xE, 0x4); + GS(3, 7, 0xB, 0xF, 0x6); + GS(0, 5, 0xA, 0xF, 0x8); + GS(1, 6, 0xB, 0xC, 0xA); + GS(2, 7, 0x8, 0xD, 0xC); + GS(3, 4, 0x9, 0xE, 0xE); + } + + for (int i = 0; i < 16; i++) { + int j = i & 7; + h[j] ^= v[i];} + +for (int i=0;i<8;i++) {hash->h4[i]=SWAP4(h[i]);} + + barrier(CLK_GLOBAL_MEM_FENCE); + +} + +// keccak256 + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + sph_u64 keccak_gpu_state[25]; + + for (int i = 0; i<25; i++) { + if (i<4) { keccak_gpu_state[i] = hash->h8[i]; } + else { keccak_gpu_state[i] = 0; } + } + keccak_gpu_state[4] = 0x0000000000000001; + keccak_gpu_state[16] = 0x8000000000000000; + + keccak_block(keccak_gpu_state); + for (int i = 0; i<4; i++) { hash->h8[i] = keccak_gpu_state[i]; } + + barrier(CLK_GLOBAL_MEM_FENCE); + + +} + +/// lyra2 algo + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + + + sph_u64 state[16]; + + for (int i = 0; i<4; i++) { state[i] = hash->h8[i];} //password + for (int i = 0; i<4; i++) { state[i + 4] = state[i]; } //salt + + for (int i = 0; i<8; i++) { state[i + 8] = blake2b_IV[i]; } + + // blake2blyra x2 + + for (int i = 0; i<24; i++) { round_lyra(state); } //because 12 is not enough + + sph_u64 Matrix[96][8]; // very uncool + /// reducedSqueezeRow0 + + for (int i = 0; i < 8; i++) + { + for (int j = 0; j<12; j++) { Matrix[j + 84 - 12 * i][0] = state[j]; } + round_lyra(state); + } + + /// reducedSqueezeRow1 + + for (int i = 0; i < 8; i++) + { + for (int j = 0; j<12; j++) { state[j] ^= Matrix[j + 12 * i][0]; } + round_lyra(state); + for (int j = 0; j<12; j++) { Matrix[j + 84 - 12 * i][1] = Matrix[j + 12 * i][0] ^ state[j]; } + } + + + reduceDuplexRowSetup(1, 0, 2); + reduceDuplexRowSetup(2, 1, 3); + reduceDuplexRowSetup(3, 0, 4); + reduceDuplexRowSetup(4, 3, 5); + reduceDuplexRowSetup(5, 2, 6); + reduceDuplexRowSetup(6, 1, 7); + + sph_u64 rowa; + rowa = state[0] & 7; + + reduceDuplexRow(7, rowa, 0); + rowa = state[0] & 7; + reduceDuplexRow(0, rowa, 3); + rowa = state[0] & 7; + reduceDuplexRow(3, rowa, 6); + rowa = state[0] & 7; + reduceDuplexRow(6, rowa, 1); + rowa = state[0] & 7; + reduceDuplexRow(1, rowa, 4); + rowa = state[0] & 7; + reduceDuplexRow(4, rowa, 7); + rowa = state[0] & 7; + reduceDuplexRow(7, rowa, 2); + rowa = state[0] & 7; + reduceDuplexRow(2, rowa, 5); + + absorbblock(rowa); + + for (int i = 0; i<4; i++) {hash->h8[i] = state[i];} + + barrier(CLK_GLOBAL_MEM_FENCE); + +} + +//skein256 + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + + sph_u64 h[9]; + sph_u64 t[3]; + sph_u64 dt0,dt1,dt2,dt3; + sph_u64 p0, p1, p2, p3, p4, p5, p6, p7; + h[8] = skein_ks_parity; + + for (int i = 0; i<8; i++) { + h[i] = SKEIN_IV512_256[i]; + h[8] ^= h[i];} + + t[0]=t12[0]; + t[1]=t12[1]; + t[2]=t12[2]; + + dt0=hash->h8[0]; + dt1=hash->h8[1]; + dt2=hash->h8[2]; + dt3=hash->h8[3]; + + p0 = h[0] + dt0; + p1 = h[1] + dt1; + p2 = h[2] + dt2; + p3 = h[3] + dt3; + p4 = h[4]; + p5 = h[5] + t[0]; + p6 = h[6] + t[1]; + p7 = h[7]; + + #pragma unroll + for (int i = 1; i<19; i+=2) {Round_8_512(p0,p1,p2,p3,p4,p5,p6,p7,i);} + p0 ^= dt0; + p1 ^= dt1; + p2 ^= dt2; + p3 ^= dt3; + + h[0] = p0; + h[1] = p1; + h[2] = p2; + h[3] = p3; + h[4] = p4; + h[5] = p5; + h[6] = p6; + h[7] = p7; + h[8] = skein_ks_parity; + + for (int i = 0; i<8; i++) { h[8] ^= h[i]; } + + t[0] = t12[3]; + t[1] = t12[4]; + t[2] = t12[5]; + p5 += t[0]; //p5 already equal h[5] + p6 += t[1]; + + #pragma unroll + for (int i = 1; i<19; i+=2) { Round_8_512(p0, p1, p2, p3, p4, p5, p6, p7, i); } + + hash->h8[0] = p0; + hash->h8[1] = p1; + hash->h8[2] = p2; + hash->h8[3] = p3; + + + + barrier(CLK_GLOBAL_MEM_FENCE); + +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search4(__global hash_t* hashes, __global uint* output, const uint target) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid - get_global_offset(0)]); + + sph_u64 message[8], state[8]; + sph_u64 t[8]; + for (int k = 0; k<4; k++) { message[k] = hash->h8[k]; } + + message[4] = 0x80UL; + message[5] = 0UL; + message[6] = 0UL; + message[7] = 0x0100000000000000UL; + + for (int u = 0; u<8; u++) { state[u] = message[u]; } + state[7] ^= 0x0001000000000000UL; + + PERM_SMALL_P(state); + state[7] ^= 0x0001000000000000UL; + PERM_SMALL_Q(message); + + for (int u = 0; u<8; u++) { state[u] ^= message[u]; } + message[7] = state[7]; + + PERM_SMALL_Pf(state); + + state[7] ^= message[7]; + barrier(CLK_GLOBAL_MEM_FENCE); + + bool result = ( as_uint2(state[7]).y <= target); + if (result) { + output[atomic_inc(output + 0xFF)] = SWAP4(gid); + } + +} + + +#endif // LYRA2RE_CL \ No newline at end of file diff --git a/kernel/arebyp.cl b/kernel/arebyp.cl index 2e00e2cd8..ecfa9863d 100644 --- a/kernel/arebyp.cl +++ b/kernel/arebyp.cl @@ -904,7 +904,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) //------------------------------------------------------------------------------------------------------------ uint cotmp = x * zSIZE; progress = 0; - for (i = 0; i < N[NFACTOR] + 512 + 42; i++) + for (i = 0; i < N[NFACTOR] + (N[NFACTOR] / LOOKUP_GAP) + 42; i++) { //if (progress < 2 * N[NFACTOR]) { diff --git a/kernel/blake256.cl b/kernel/blake256.cl new file mode 100644 index 000000000..012285a5e --- /dev/null +++ b/kernel/blake256.cl @@ -0,0 +1,96 @@ +/* +* blake256 kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* Copyright (c) 2014 djm34 +* Copyright (c) 2014 tpruvot +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ +__constant static const int sigma[16][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } +}; + + +__constant static const sph_u32 c_IV256[8] = { + 0x6A09E667, 0xBB67AE85, + 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, + 0x1F83D9AB, 0x5BE0CD19 +}; + +/* Second part (64-80) msg never change, store it */ +__constant static const sph_u32 c_Padding[16] = { + 0, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0, 0, 0, 0, + 0, 1, 0, 640, +}; +__constant static const sph_u32 c_u256[16] = { + 0x243F6A88, 0x85A308D3, + 0x13198A2E, 0x03707344, + 0xA4093822, 0x299F31D0, + 0x082EFA98, 0xEC4E6C89, + 0x452821E6, 0x38D01377, + 0xBE5466CF, 0x34E90C6C, + 0xC0AC29B7, 0xC97C50DD, + 0x3F84D5B5, 0xB5470917 +}; + +#define GS(a,b,c,d,x) { \ + const sph_u32 idx1 = sigma[r][x]; \ + const sph_u32 idx2 = sigma[r][x+1]; \ + v[a] += (m[idx1] ^ c_u256[idx2]) + v[b]; \ + v[d] ^= v[a]; \ + v[d] = SPH_ROTR32(v[d], 16); \ + v[c] += v[d]; \ + v[b] ^= v[c]; \ + v[b] = SPH_ROTR32(v[b], 12); \ +\ + v[a] += (m[idx2] ^ c_u256[idx1]) + v[b]; \ + v[d] ^= v[a]; \ + v[d] = SPH_ROTR32(v[d], 8); \ + v[c] += v[d]; \ + v[b] ^= v[c]; \ + v[b] = SPH_ROTR32(v[b], 7); \ +} + + + + + diff --git a/kernel/diamond.cl b/kernel/diamond.cl index b02edd7a4..de3768cf9 100644 --- a/kernel/diamond.cl +++ b/kernel/diamond.cl @@ -1,8 +1,10 @@ /* - * ==========================(LICENSE BEGIN)============================ + * GroestlCoin kernel implementation. * - * GroestlCoin kernel implementation: Copyright (c) 2014 pallas + * ==========================(LICENSE BEGIN)============================ * + * Copyright (c) 2014 phm + * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,10 +12,10 @@ * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. @@ -24,1451 +26,164 @@ * * ===========================(LICENSE END)============================= * - * GroestlCoin kernel implementation: @author pallas - * Forum thread: http://bitcointalk.org/index.php?topic=779598 - * Donations to: BTC 1H7qC5uHuGX2d5s9Kuw3k7Wm7xMQzL16SN + * @author phm */ -#ifndef DIAMOND_CL -#define DIAMOND_CL - -#define DC64(x) ((ulong)(x ## UL)) -#define DEC64E(x) (*(const __global ulong *) (x)); -#define H15 (((ulong)(512 & 0xFF) << 56) | ((ulong)(512 & 0xFF00) << 40)) -#define M15 0x100000000000000 -#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) // rotate - -#define C64e(x) ((DC64(x) >> 56) \ - | ((DC64(x) >> 40) & DC64(0x000000000000FF00)) \ - | ((DC64(x) >> 24) & DC64(0x0000000000FF0000)) \ - | ((DC64(x) >> 8) & DC64(0x00000000FF000000)) \ - | ((DC64(x) << 8) & DC64(0x000000FF00000000)) \ - | ((DC64(x) << 24) & DC64(0x0000FF0000000000)) \ - | ((DC64(x) << 40) & DC64(0x00FF000000000000)) \ - | ((DC64(x) << 56))) -#define B64_0(x) ((x) & 0xFF) -#define B64_1(x) (((x) >> 8) & 0xFF) -#define B64_2(x) (((x) >> 16) & 0xFF) -#define B64_3(x) (((x) >> 24) & 0xFF) -#define B64_4(x) (((x) >> 32) & 0xFF) -#define B64_5(x) (((x) >> 40) & 0xFF) -#define B64_6(x) (((x) >> 48) & 0xFF) -#define B64_7(x) ((x) >> 56) -#define PC64(j, r) ((ulong)((j) + (r))) -#define QC64(j, r) (((ulong)(r) << 56) ^ ~((ulong)(j) << 56)) - -__constant static const ulong T0[] = { - C64e(0xc632f4a5f497a5c6), C64e(0xf86f978497eb84f8), - C64e(0xee5eb099b0c799ee), C64e(0xf67a8c8d8cf78df6), - C64e(0xffe8170d17e50dff), C64e(0xd60adcbddcb7bdd6), - C64e(0xde16c8b1c8a7b1de), C64e(0x916dfc54fc395491), - C64e(0x6090f050f0c05060), C64e(0x0207050305040302), - C64e(0xce2ee0a9e087a9ce), C64e(0x56d1877d87ac7d56), - C64e(0xe7cc2b192bd519e7), C64e(0xb513a662a67162b5), - C64e(0x4d7c31e6319ae64d), C64e(0xec59b59ab5c39aec), - C64e(0x8f40cf45cf05458f), C64e(0x1fa3bc9dbc3e9d1f), - C64e(0x8949c040c0094089), C64e(0xfa68928792ef87fa), - C64e(0xefd03f153fc515ef), C64e(0xb29426eb267febb2), - C64e(0x8ece40c94007c98e), C64e(0xfbe61d0b1ded0bfb), - C64e(0x416e2fec2f82ec41), C64e(0xb31aa967a97d67b3), - C64e(0x5f431cfd1cbefd5f), C64e(0x456025ea258aea45), - C64e(0x23f9dabfda46bf23), C64e(0x535102f702a6f753), - C64e(0xe445a196a1d396e4), C64e(0x9b76ed5bed2d5b9b), - C64e(0x75285dc25deac275), C64e(0xe1c5241c24d91ce1), - C64e(0x3dd4e9aee97aae3d), C64e(0x4cf2be6abe986a4c), - C64e(0x6c82ee5aeed85a6c), C64e(0x7ebdc341c3fc417e), - C64e(0xf5f3060206f102f5), C64e(0x8352d14fd11d4f83), - C64e(0x688ce45ce4d05c68), C64e(0x515607f407a2f451), - C64e(0xd18d5c345cb934d1), C64e(0xf9e1180818e908f9), - C64e(0xe24cae93aedf93e2), C64e(0xab3e9573954d73ab), - C64e(0x6297f553f5c45362), C64e(0x2a6b413f41543f2a), - C64e(0x081c140c14100c08), C64e(0x9563f652f6315295), - C64e(0x46e9af65af8c6546), C64e(0x9d7fe25ee2215e9d), - C64e(0x3048782878602830), C64e(0x37cff8a1f86ea137), - C64e(0x0a1b110f11140f0a), C64e(0x2febc4b5c45eb52f), - C64e(0x0e151b091b1c090e), C64e(0x247e5a365a483624), - C64e(0x1badb69bb6369b1b), C64e(0xdf98473d47a53ddf), - C64e(0xcda76a266a8126cd), C64e(0x4ef5bb69bb9c694e), - C64e(0x7f334ccd4cfecd7f), C64e(0xea50ba9fbacf9fea), - C64e(0x123f2d1b2d241b12), C64e(0x1da4b99eb93a9e1d), - C64e(0x58c49c749cb07458), C64e(0x3446722e72682e34), - C64e(0x3641772d776c2d36), C64e(0xdc11cdb2cda3b2dc), - C64e(0xb49d29ee2973eeb4), C64e(0x5b4d16fb16b6fb5b), - C64e(0xa4a501f60153f6a4), C64e(0x76a1d74dd7ec4d76), - C64e(0xb714a361a37561b7), C64e(0x7d3449ce49face7d), - C64e(0x52df8d7b8da47b52), C64e(0xdd9f423e42a13edd), - C64e(0x5ecd937193bc715e), C64e(0x13b1a297a2269713), - C64e(0xa6a204f50457f5a6), C64e(0xb901b868b86968b9), - C64e(0x0000000000000000), C64e(0xc1b5742c74992cc1), - C64e(0x40e0a060a0806040), C64e(0xe3c2211f21dd1fe3), - C64e(0x793a43c843f2c879), C64e(0xb69a2ced2c77edb6), - C64e(0xd40dd9bed9b3bed4), C64e(0x8d47ca46ca01468d), - C64e(0x671770d970ced967), C64e(0x72afdd4bdde44b72), - C64e(0x94ed79de7933de94), C64e(0x98ff67d4672bd498), - C64e(0xb09323e8237be8b0), C64e(0x855bde4ade114a85), - C64e(0xbb06bd6bbd6d6bbb), C64e(0xc5bb7e2a7e912ac5), - C64e(0x4f7b34e5349ee54f), C64e(0xedd73a163ac116ed), - C64e(0x86d254c55417c586), C64e(0x9af862d7622fd79a), - C64e(0x6699ff55ffcc5566), C64e(0x11b6a794a7229411), - C64e(0x8ac04acf4a0fcf8a), C64e(0xe9d9301030c910e9), - C64e(0x040e0a060a080604), C64e(0xfe66988198e781fe), - C64e(0xa0ab0bf00b5bf0a0), C64e(0x78b4cc44ccf04478), - C64e(0x25f0d5bad54aba25), C64e(0x4b753ee33e96e34b), - C64e(0xa2ac0ef30e5ff3a2), C64e(0x5d4419fe19bafe5d), - C64e(0x80db5bc05b1bc080), C64e(0x0580858a850a8a05), - C64e(0x3fd3ecadec7ead3f), C64e(0x21fedfbcdf42bc21), - C64e(0x70a8d848d8e04870), C64e(0xf1fd0c040cf904f1), - C64e(0x63197adf7ac6df63), C64e(0x772f58c158eec177), - C64e(0xaf309f759f4575af), C64e(0x42e7a563a5846342), - C64e(0x2070503050403020), C64e(0xe5cb2e1a2ed11ae5), - C64e(0xfdef120e12e10efd), C64e(0xbf08b76db7656dbf), - C64e(0x8155d44cd4194c81), C64e(0x18243c143c301418), - C64e(0x26795f355f4c3526), C64e(0xc3b2712f719d2fc3), - C64e(0xbe8638e13867e1be), C64e(0x35c8fda2fd6aa235), - C64e(0x88c74fcc4f0bcc88), C64e(0x2e654b394b5c392e), - C64e(0x936af957f93d5793), C64e(0x55580df20daaf255), - C64e(0xfc619d829de382fc), C64e(0x7ab3c947c9f4477a), - C64e(0xc827efacef8bacc8), C64e(0xba8832e7326fe7ba), - C64e(0x324f7d2b7d642b32), C64e(0xe642a495a4d795e6), - C64e(0xc03bfba0fb9ba0c0), C64e(0x19aab398b3329819), - C64e(0x9ef668d16827d19e), C64e(0xa322817f815d7fa3), - C64e(0x44eeaa66aa886644), C64e(0x54d6827e82a87e54), - C64e(0x3bdde6abe676ab3b), C64e(0x0b959e839e16830b), - C64e(0x8cc945ca4503ca8c), C64e(0xc7bc7b297b9529c7), - C64e(0x6b056ed36ed6d36b), C64e(0x286c443c44503c28), - C64e(0xa72c8b798b5579a7), C64e(0xbc813de23d63e2bc), - C64e(0x1631271d272c1d16), C64e(0xad379a769a4176ad), - C64e(0xdb964d3b4dad3bdb), C64e(0x649efa56fac85664), - C64e(0x74a6d24ed2e84e74), C64e(0x1436221e22281e14), - C64e(0x92e476db763fdb92), C64e(0x0c121e0a1e180a0c), - C64e(0x48fcb46cb4906c48), C64e(0xb88f37e4376be4b8), - C64e(0x9f78e75de7255d9f), C64e(0xbd0fb26eb2616ebd), - C64e(0x43692aef2a86ef43), C64e(0xc435f1a6f193a6c4), - C64e(0x39dae3a8e372a839), C64e(0x31c6f7a4f762a431), - C64e(0xd38a593759bd37d3), C64e(0xf274868b86ff8bf2), - C64e(0xd583563256b132d5), C64e(0x8b4ec543c50d438b), - C64e(0x6e85eb59ebdc596e), C64e(0xda18c2b7c2afb7da), - C64e(0x018e8f8c8f028c01), C64e(0xb11dac64ac7964b1), - C64e(0x9cf16dd26d23d29c), C64e(0x49723be03b92e049), - C64e(0xd81fc7b4c7abb4d8), C64e(0xacb915fa1543faac), - C64e(0xf3fa090709fd07f3), C64e(0xcfa06f256f8525cf), - C64e(0xca20eaafea8fafca), C64e(0xf47d898e89f38ef4), - C64e(0x476720e9208ee947), C64e(0x1038281828201810), - C64e(0x6f0b64d564ded56f), C64e(0xf073838883fb88f0), - C64e(0x4afbb16fb1946f4a), C64e(0x5cca967296b8725c), - C64e(0x38546c246c702438), C64e(0x575f08f108aef157), - C64e(0x732152c752e6c773), C64e(0x9764f351f3355197), - C64e(0xcbae6523658d23cb), C64e(0xa125847c84597ca1), - C64e(0xe857bf9cbfcb9ce8), C64e(0x3e5d6321637c213e), - C64e(0x96ea7cdd7c37dd96), C64e(0x611e7fdc7fc2dc61), - C64e(0x0d9c9186911a860d), C64e(0x0f9b9485941e850f), - C64e(0xe04bab90abdb90e0), C64e(0x7cbac642c6f8427c), - C64e(0x712657c457e2c471), C64e(0xcc29e5aae583aacc), - C64e(0x90e373d8733bd890), C64e(0x06090f050f0c0506), - C64e(0xf7f4030103f501f7), C64e(0x1c2a36123638121c), - C64e(0xc23cfea3fe9fa3c2), C64e(0x6a8be15fe1d45f6a), - C64e(0xaebe10f91047f9ae), C64e(0x69026bd06bd2d069), - C64e(0x17bfa891a82e9117), C64e(0x9971e858e8295899), - C64e(0x3a5369276974273a), C64e(0x27f7d0b9d04eb927), - C64e(0xd991483848a938d9), C64e(0xebde351335cd13eb), - C64e(0x2be5ceb3ce56b32b), C64e(0x2277553355443322), - C64e(0xd204d6bbd6bfbbd2), C64e(0xa9399070904970a9), - C64e(0x07878089800e8907), C64e(0x33c1f2a7f266a733), - C64e(0x2decc1b6c15ab62d), C64e(0x3c5a66226678223c), - C64e(0x15b8ad92ad2a9215), C64e(0xc9a96020608920c9), - C64e(0x875cdb49db154987), C64e(0xaab01aff1a4fffaa), - C64e(0x50d8887888a07850), C64e(0xa52b8e7a8e517aa5), - C64e(0x03898a8f8a068f03), C64e(0x594a13f813b2f859), - C64e(0x09929b809b128009), C64e(0x1a2339173934171a), - C64e(0x651075da75cada65), C64e(0xd784533153b531d7), - C64e(0x84d551c65113c684), C64e(0xd003d3b8d3bbb8d0), - C64e(0x82dc5ec35e1fc382), C64e(0x29e2cbb0cb52b029), - C64e(0x5ac3997799b4775a), C64e(0x1e2d3311333c111e), - C64e(0x7b3d46cb46f6cb7b), C64e(0xa8b71ffc1f4bfca8), - C64e(0x6d0c61d661dad66d), C64e(0x2c624e3a4e583a2c) -}; - -__constant static const ulong T1[] = { - C64e(0xc6c632f4a5f497a5), C64e(0xf8f86f978497eb84), - C64e(0xeeee5eb099b0c799), C64e(0xf6f67a8c8d8cf78d), - C64e(0xffffe8170d17e50d), C64e(0xd6d60adcbddcb7bd), - C64e(0xdede16c8b1c8a7b1), C64e(0x91916dfc54fc3954), - C64e(0x606090f050f0c050), C64e(0x0202070503050403), - C64e(0xcece2ee0a9e087a9), C64e(0x5656d1877d87ac7d), - C64e(0xe7e7cc2b192bd519), C64e(0xb5b513a662a67162), - C64e(0x4d4d7c31e6319ae6), C64e(0xecec59b59ab5c39a), - C64e(0x8f8f40cf45cf0545), C64e(0x1f1fa3bc9dbc3e9d), - C64e(0x898949c040c00940), C64e(0xfafa68928792ef87), - C64e(0xefefd03f153fc515), C64e(0xb2b29426eb267feb), - C64e(0x8e8ece40c94007c9), C64e(0xfbfbe61d0b1ded0b), - C64e(0x41416e2fec2f82ec), C64e(0xb3b31aa967a97d67), - C64e(0x5f5f431cfd1cbefd), C64e(0x45456025ea258aea), - C64e(0x2323f9dabfda46bf), C64e(0x53535102f702a6f7), - C64e(0xe4e445a196a1d396), C64e(0x9b9b76ed5bed2d5b), - C64e(0x7575285dc25deac2), C64e(0xe1e1c5241c24d91c), - C64e(0x3d3dd4e9aee97aae), C64e(0x4c4cf2be6abe986a), - C64e(0x6c6c82ee5aeed85a), C64e(0x7e7ebdc341c3fc41), - C64e(0xf5f5f3060206f102), C64e(0x838352d14fd11d4f), - C64e(0x68688ce45ce4d05c), C64e(0x51515607f407a2f4), - C64e(0xd1d18d5c345cb934), C64e(0xf9f9e1180818e908), - C64e(0xe2e24cae93aedf93), C64e(0xabab3e9573954d73), - C64e(0x626297f553f5c453), C64e(0x2a2a6b413f41543f), - C64e(0x08081c140c14100c), C64e(0x959563f652f63152), - C64e(0x4646e9af65af8c65), C64e(0x9d9d7fe25ee2215e), - C64e(0x3030487828786028), C64e(0x3737cff8a1f86ea1), - C64e(0x0a0a1b110f11140f), C64e(0x2f2febc4b5c45eb5), - C64e(0x0e0e151b091b1c09), C64e(0x24247e5a365a4836), - C64e(0x1b1badb69bb6369b), C64e(0xdfdf98473d47a53d), - C64e(0xcdcda76a266a8126), C64e(0x4e4ef5bb69bb9c69), - C64e(0x7f7f334ccd4cfecd), C64e(0xeaea50ba9fbacf9f), - C64e(0x12123f2d1b2d241b), C64e(0x1d1da4b99eb93a9e), - C64e(0x5858c49c749cb074), C64e(0x343446722e72682e), - C64e(0x363641772d776c2d), C64e(0xdcdc11cdb2cda3b2), - C64e(0xb4b49d29ee2973ee), C64e(0x5b5b4d16fb16b6fb), - C64e(0xa4a4a501f60153f6), C64e(0x7676a1d74dd7ec4d), - C64e(0xb7b714a361a37561), C64e(0x7d7d3449ce49face), - C64e(0x5252df8d7b8da47b), C64e(0xdddd9f423e42a13e), - C64e(0x5e5ecd937193bc71), C64e(0x1313b1a297a22697), - C64e(0xa6a6a204f50457f5), C64e(0xb9b901b868b86968), - C64e(0x0000000000000000), C64e(0xc1c1b5742c74992c), - C64e(0x4040e0a060a08060), C64e(0xe3e3c2211f21dd1f), - C64e(0x79793a43c843f2c8), C64e(0xb6b69a2ced2c77ed), - C64e(0xd4d40dd9bed9b3be), C64e(0x8d8d47ca46ca0146), - C64e(0x67671770d970ced9), C64e(0x7272afdd4bdde44b), - C64e(0x9494ed79de7933de), C64e(0x9898ff67d4672bd4), - C64e(0xb0b09323e8237be8), C64e(0x85855bde4ade114a), - C64e(0xbbbb06bd6bbd6d6b), C64e(0xc5c5bb7e2a7e912a), - C64e(0x4f4f7b34e5349ee5), C64e(0xededd73a163ac116), - C64e(0x8686d254c55417c5), C64e(0x9a9af862d7622fd7), - C64e(0x666699ff55ffcc55), C64e(0x1111b6a794a72294), - C64e(0x8a8ac04acf4a0fcf), C64e(0xe9e9d9301030c910), - C64e(0x04040e0a060a0806), C64e(0xfefe66988198e781), - C64e(0xa0a0ab0bf00b5bf0), C64e(0x7878b4cc44ccf044), - C64e(0x2525f0d5bad54aba), C64e(0x4b4b753ee33e96e3), - C64e(0xa2a2ac0ef30e5ff3), C64e(0x5d5d4419fe19bafe), - C64e(0x8080db5bc05b1bc0), C64e(0x050580858a850a8a), - C64e(0x3f3fd3ecadec7ead), C64e(0x2121fedfbcdf42bc), - C64e(0x7070a8d848d8e048), C64e(0xf1f1fd0c040cf904), - C64e(0x6363197adf7ac6df), C64e(0x77772f58c158eec1), - C64e(0xafaf309f759f4575), C64e(0x4242e7a563a58463), - C64e(0x2020705030504030), C64e(0xe5e5cb2e1a2ed11a), - C64e(0xfdfdef120e12e10e), C64e(0xbfbf08b76db7656d), - C64e(0x818155d44cd4194c), C64e(0x1818243c143c3014), - C64e(0x2626795f355f4c35), C64e(0xc3c3b2712f719d2f), - C64e(0xbebe8638e13867e1), C64e(0x3535c8fda2fd6aa2), - C64e(0x8888c74fcc4f0bcc), C64e(0x2e2e654b394b5c39), - C64e(0x93936af957f93d57), C64e(0x5555580df20daaf2), - C64e(0xfcfc619d829de382), C64e(0x7a7ab3c947c9f447), - C64e(0xc8c827efacef8bac), C64e(0xbaba8832e7326fe7), - C64e(0x32324f7d2b7d642b), C64e(0xe6e642a495a4d795), - C64e(0xc0c03bfba0fb9ba0), C64e(0x1919aab398b33298), - C64e(0x9e9ef668d16827d1), C64e(0xa3a322817f815d7f), - C64e(0x4444eeaa66aa8866), C64e(0x5454d6827e82a87e), - C64e(0x3b3bdde6abe676ab), C64e(0x0b0b959e839e1683), - C64e(0x8c8cc945ca4503ca), C64e(0xc7c7bc7b297b9529), - C64e(0x6b6b056ed36ed6d3), C64e(0x28286c443c44503c), - C64e(0xa7a72c8b798b5579), C64e(0xbcbc813de23d63e2), - C64e(0x161631271d272c1d), C64e(0xadad379a769a4176), - C64e(0xdbdb964d3b4dad3b), C64e(0x64649efa56fac856), - C64e(0x7474a6d24ed2e84e), C64e(0x141436221e22281e), - C64e(0x9292e476db763fdb), C64e(0x0c0c121e0a1e180a), - C64e(0x4848fcb46cb4906c), C64e(0xb8b88f37e4376be4), - C64e(0x9f9f78e75de7255d), C64e(0xbdbd0fb26eb2616e), - C64e(0x4343692aef2a86ef), C64e(0xc4c435f1a6f193a6), - C64e(0x3939dae3a8e372a8), C64e(0x3131c6f7a4f762a4), - C64e(0xd3d38a593759bd37), C64e(0xf2f274868b86ff8b), - C64e(0xd5d583563256b132), C64e(0x8b8b4ec543c50d43), - C64e(0x6e6e85eb59ebdc59), C64e(0xdada18c2b7c2afb7), - C64e(0x01018e8f8c8f028c), C64e(0xb1b11dac64ac7964), - C64e(0x9c9cf16dd26d23d2), C64e(0x4949723be03b92e0), - C64e(0xd8d81fc7b4c7abb4), C64e(0xacacb915fa1543fa), - C64e(0xf3f3fa090709fd07), C64e(0xcfcfa06f256f8525), - C64e(0xcaca20eaafea8faf), C64e(0xf4f47d898e89f38e), - C64e(0x47476720e9208ee9), C64e(0x1010382818282018), - C64e(0x6f6f0b64d564ded5), C64e(0xf0f073838883fb88), - C64e(0x4a4afbb16fb1946f), C64e(0x5c5cca967296b872), - C64e(0x3838546c246c7024), C64e(0x57575f08f108aef1), - C64e(0x73732152c752e6c7), C64e(0x979764f351f33551), - C64e(0xcbcbae6523658d23), C64e(0xa1a125847c84597c), - C64e(0xe8e857bf9cbfcb9c), C64e(0x3e3e5d6321637c21), - C64e(0x9696ea7cdd7c37dd), C64e(0x61611e7fdc7fc2dc), - C64e(0x0d0d9c9186911a86), C64e(0x0f0f9b9485941e85), - C64e(0xe0e04bab90abdb90), C64e(0x7c7cbac642c6f842), - C64e(0x71712657c457e2c4), C64e(0xcccc29e5aae583aa), - C64e(0x9090e373d8733bd8), C64e(0x0606090f050f0c05), - C64e(0xf7f7f4030103f501), C64e(0x1c1c2a3612363812), - C64e(0xc2c23cfea3fe9fa3), C64e(0x6a6a8be15fe1d45f), - C64e(0xaeaebe10f91047f9), C64e(0x6969026bd06bd2d0), - C64e(0x1717bfa891a82e91), C64e(0x999971e858e82958), - C64e(0x3a3a536927697427), C64e(0x2727f7d0b9d04eb9), - C64e(0xd9d991483848a938), C64e(0xebebde351335cd13), - C64e(0x2b2be5ceb3ce56b3), C64e(0x2222775533554433), - C64e(0xd2d204d6bbd6bfbb), C64e(0xa9a9399070904970), - C64e(0x0707878089800e89), C64e(0x3333c1f2a7f266a7), - C64e(0x2d2decc1b6c15ab6), C64e(0x3c3c5a6622667822), - C64e(0x1515b8ad92ad2a92), C64e(0xc9c9a96020608920), - C64e(0x87875cdb49db1549), C64e(0xaaaab01aff1a4fff), - C64e(0x5050d8887888a078), C64e(0xa5a52b8e7a8e517a), - C64e(0x0303898a8f8a068f), C64e(0x59594a13f813b2f8), - C64e(0x0909929b809b1280), C64e(0x1a1a233917393417), - C64e(0x65651075da75cada), C64e(0xd7d784533153b531), - C64e(0x8484d551c65113c6), C64e(0xd0d003d3b8d3bbb8), - C64e(0x8282dc5ec35e1fc3), C64e(0x2929e2cbb0cb52b0), - C64e(0x5a5ac3997799b477), C64e(0x1e1e2d3311333c11), - C64e(0x7b7b3d46cb46f6cb), C64e(0xa8a8b71ffc1f4bfc), - C64e(0x6d6d0c61d661dad6), C64e(0x2c2c624e3a4e583a) -}; -/* -__constant static const ulong T2_G[] = { - C64e(0xa5c6c632f4a5f497), C64e(0x84f8f86f978497eb), - C64e(0x99eeee5eb099b0c7), C64e(0x8df6f67a8c8d8cf7), - C64e(0x0dffffe8170d17e5), C64e(0xbdd6d60adcbddcb7), - C64e(0xb1dede16c8b1c8a7), C64e(0x5491916dfc54fc39), - C64e(0x50606090f050f0c0), C64e(0x0302020705030504), - C64e(0xa9cece2ee0a9e087), C64e(0x7d5656d1877d87ac), - C64e(0x19e7e7cc2b192bd5), C64e(0x62b5b513a662a671), - C64e(0xe64d4d7c31e6319a), C64e(0x9aecec59b59ab5c3), - C64e(0x458f8f40cf45cf05), C64e(0x9d1f1fa3bc9dbc3e), - C64e(0x40898949c040c009), C64e(0x87fafa68928792ef), - C64e(0x15efefd03f153fc5), C64e(0xebb2b29426eb267f), - C64e(0xc98e8ece40c94007), C64e(0x0bfbfbe61d0b1ded), - C64e(0xec41416e2fec2f82), C64e(0x67b3b31aa967a97d), - C64e(0xfd5f5f431cfd1cbe), C64e(0xea45456025ea258a), - C64e(0xbf2323f9dabfda46), C64e(0xf753535102f702a6), - C64e(0x96e4e445a196a1d3), C64e(0x5b9b9b76ed5bed2d), - C64e(0xc27575285dc25dea), C64e(0x1ce1e1c5241c24d9), - C64e(0xae3d3dd4e9aee97a), C64e(0x6a4c4cf2be6abe98), - C64e(0x5a6c6c82ee5aeed8), C64e(0x417e7ebdc341c3fc), - C64e(0x02f5f5f3060206f1), C64e(0x4f838352d14fd11d), - C64e(0x5c68688ce45ce4d0), C64e(0xf451515607f407a2), - C64e(0x34d1d18d5c345cb9), C64e(0x08f9f9e1180818e9), - C64e(0x93e2e24cae93aedf), C64e(0x73abab3e9573954d), - C64e(0x53626297f553f5c4), C64e(0x3f2a2a6b413f4154), - C64e(0x0c08081c140c1410), C64e(0x52959563f652f631), - C64e(0x654646e9af65af8c), C64e(0x5e9d9d7fe25ee221), - C64e(0x2830304878287860), C64e(0xa13737cff8a1f86e), - C64e(0x0f0a0a1b110f1114), C64e(0xb52f2febc4b5c45e), - C64e(0x090e0e151b091b1c), C64e(0x3624247e5a365a48), - C64e(0x9b1b1badb69bb636), C64e(0x3ddfdf98473d47a5), - C64e(0x26cdcda76a266a81), C64e(0x694e4ef5bb69bb9c), - C64e(0xcd7f7f334ccd4cfe), C64e(0x9feaea50ba9fbacf), - C64e(0x1b12123f2d1b2d24), C64e(0x9e1d1da4b99eb93a), - C64e(0x745858c49c749cb0), C64e(0x2e343446722e7268), - C64e(0x2d363641772d776c), C64e(0xb2dcdc11cdb2cda3), - C64e(0xeeb4b49d29ee2973), C64e(0xfb5b5b4d16fb16b6), - C64e(0xf6a4a4a501f60153), C64e(0x4d7676a1d74dd7ec), - C64e(0x61b7b714a361a375), C64e(0xce7d7d3449ce49fa), - C64e(0x7b5252df8d7b8da4), C64e(0x3edddd9f423e42a1), - C64e(0x715e5ecd937193bc), C64e(0x971313b1a297a226), - C64e(0xf5a6a6a204f50457), C64e(0x68b9b901b868b869), - C64e(0x0000000000000000), C64e(0x2cc1c1b5742c7499), - C64e(0x604040e0a060a080), C64e(0x1fe3e3c2211f21dd), - C64e(0xc879793a43c843f2), C64e(0xedb6b69a2ced2c77), - C64e(0xbed4d40dd9bed9b3), C64e(0x468d8d47ca46ca01), - C64e(0xd967671770d970ce), C64e(0x4b7272afdd4bdde4), - C64e(0xde9494ed79de7933), C64e(0xd49898ff67d4672b), - C64e(0xe8b0b09323e8237b), C64e(0x4a85855bde4ade11), - C64e(0x6bbbbb06bd6bbd6d), C64e(0x2ac5c5bb7e2a7e91), - C64e(0xe54f4f7b34e5349e), C64e(0x16ededd73a163ac1), - C64e(0xc58686d254c55417), C64e(0xd79a9af862d7622f), - C64e(0x55666699ff55ffcc), C64e(0x941111b6a794a722), - C64e(0xcf8a8ac04acf4a0f), C64e(0x10e9e9d9301030c9), - C64e(0x0604040e0a060a08), C64e(0x81fefe66988198e7), - C64e(0xf0a0a0ab0bf00b5b), C64e(0x447878b4cc44ccf0), - C64e(0xba2525f0d5bad54a), C64e(0xe34b4b753ee33e96), - C64e(0xf3a2a2ac0ef30e5f), C64e(0xfe5d5d4419fe19ba), - C64e(0xc08080db5bc05b1b), C64e(0x8a050580858a850a), - C64e(0xad3f3fd3ecadec7e), C64e(0xbc2121fedfbcdf42), - C64e(0x487070a8d848d8e0), C64e(0x04f1f1fd0c040cf9), - C64e(0xdf6363197adf7ac6), C64e(0xc177772f58c158ee), - C64e(0x75afaf309f759f45), C64e(0x634242e7a563a584), - C64e(0x3020207050305040), C64e(0x1ae5e5cb2e1a2ed1), - C64e(0x0efdfdef120e12e1), C64e(0x6dbfbf08b76db765), - C64e(0x4c818155d44cd419), C64e(0x141818243c143c30), - C64e(0x352626795f355f4c), C64e(0x2fc3c3b2712f719d), - C64e(0xe1bebe8638e13867), C64e(0xa23535c8fda2fd6a), - C64e(0xcc8888c74fcc4f0b), C64e(0x392e2e654b394b5c), - C64e(0x5793936af957f93d), C64e(0xf25555580df20daa), - C64e(0x82fcfc619d829de3), C64e(0x477a7ab3c947c9f4), - C64e(0xacc8c827efacef8b), C64e(0xe7baba8832e7326f), - C64e(0x2b32324f7d2b7d64), C64e(0x95e6e642a495a4d7), - C64e(0xa0c0c03bfba0fb9b), C64e(0x981919aab398b332), - C64e(0xd19e9ef668d16827), C64e(0x7fa3a322817f815d), - C64e(0x664444eeaa66aa88), C64e(0x7e5454d6827e82a8), - C64e(0xab3b3bdde6abe676), C64e(0x830b0b959e839e16), - C64e(0xca8c8cc945ca4503), C64e(0x29c7c7bc7b297b95), - C64e(0xd36b6b056ed36ed6), C64e(0x3c28286c443c4450), - C64e(0x79a7a72c8b798b55), C64e(0xe2bcbc813de23d63), - C64e(0x1d161631271d272c), C64e(0x76adad379a769a41), - C64e(0x3bdbdb964d3b4dad), C64e(0x5664649efa56fac8), - C64e(0x4e7474a6d24ed2e8), C64e(0x1e141436221e2228), - C64e(0xdb9292e476db763f), C64e(0x0a0c0c121e0a1e18), - C64e(0x6c4848fcb46cb490), C64e(0xe4b8b88f37e4376b), - C64e(0x5d9f9f78e75de725), C64e(0x6ebdbd0fb26eb261), - C64e(0xef4343692aef2a86), C64e(0xa6c4c435f1a6f193), - C64e(0xa83939dae3a8e372), C64e(0xa43131c6f7a4f762), - C64e(0x37d3d38a593759bd), C64e(0x8bf2f274868b86ff), - C64e(0x32d5d583563256b1), C64e(0x438b8b4ec543c50d), - C64e(0x596e6e85eb59ebdc), C64e(0xb7dada18c2b7c2af), - C64e(0x8c01018e8f8c8f02), C64e(0x64b1b11dac64ac79), - C64e(0xd29c9cf16dd26d23), C64e(0xe04949723be03b92), - C64e(0xb4d8d81fc7b4c7ab), C64e(0xfaacacb915fa1543), - C64e(0x07f3f3fa090709fd), C64e(0x25cfcfa06f256f85), - C64e(0xafcaca20eaafea8f), C64e(0x8ef4f47d898e89f3), - C64e(0xe947476720e9208e), C64e(0x1810103828182820), - C64e(0xd56f6f0b64d564de), C64e(0x88f0f073838883fb), - C64e(0x6f4a4afbb16fb194), C64e(0x725c5cca967296b8), - C64e(0x243838546c246c70), C64e(0xf157575f08f108ae), - C64e(0xc773732152c752e6), C64e(0x51979764f351f335), - C64e(0x23cbcbae6523658d), C64e(0x7ca1a125847c8459), - C64e(0x9ce8e857bf9cbfcb), C64e(0x213e3e5d6321637c), - C64e(0xdd9696ea7cdd7c37), C64e(0xdc61611e7fdc7fc2), - C64e(0x860d0d9c9186911a), C64e(0x850f0f9b9485941e), - C64e(0x90e0e04bab90abdb), C64e(0x427c7cbac642c6f8), - C64e(0xc471712657c457e2), C64e(0xaacccc29e5aae583), - C64e(0xd89090e373d8733b), C64e(0x050606090f050f0c), - C64e(0x01f7f7f4030103f5), C64e(0x121c1c2a36123638), - C64e(0xa3c2c23cfea3fe9f), C64e(0x5f6a6a8be15fe1d4), - C64e(0xf9aeaebe10f91047), C64e(0xd06969026bd06bd2), - C64e(0x911717bfa891a82e), C64e(0x58999971e858e829), - C64e(0x273a3a5369276974), C64e(0xb92727f7d0b9d04e), - C64e(0x38d9d991483848a9), C64e(0x13ebebde351335cd), - C64e(0xb32b2be5ceb3ce56), C64e(0x3322227755335544), - C64e(0xbbd2d204d6bbd6bf), C64e(0x70a9a93990709049), - C64e(0x890707878089800e), C64e(0xa73333c1f2a7f266), - C64e(0xb62d2decc1b6c15a), C64e(0x223c3c5a66226678), - C64e(0x921515b8ad92ad2a), C64e(0x20c9c9a960206089), - C64e(0x4987875cdb49db15), C64e(0xffaaaab01aff1a4f), - C64e(0x785050d8887888a0), C64e(0x7aa5a52b8e7a8e51), - C64e(0x8f0303898a8f8a06), C64e(0xf859594a13f813b2), - C64e(0x800909929b809b12), C64e(0x171a1a2339173934), - C64e(0xda65651075da75ca), C64e(0x31d7d784533153b5), - C64e(0xc68484d551c65113), C64e(0xb8d0d003d3b8d3bb), - C64e(0xc38282dc5ec35e1f), C64e(0xb02929e2cbb0cb52), - C64e(0x775a5ac3997799b4), C64e(0x111e1e2d3311333c), - C64e(0xcb7b7b3d46cb46f6), C64e(0xfca8a8b71ffc1f4b), - C64e(0xd66d6d0c61d661da), C64e(0x3a2c2c624e3a4e58) -}; - -__constant static const ulong T3_G[] = { - C64e(0x97a5c6c632f4a5f4), C64e(0xeb84f8f86f978497), - C64e(0xc799eeee5eb099b0), C64e(0xf78df6f67a8c8d8c), - C64e(0xe50dffffe8170d17), C64e(0xb7bdd6d60adcbddc), - C64e(0xa7b1dede16c8b1c8), C64e(0x395491916dfc54fc), - C64e(0xc050606090f050f0), C64e(0x0403020207050305), - C64e(0x87a9cece2ee0a9e0), C64e(0xac7d5656d1877d87), - C64e(0xd519e7e7cc2b192b), C64e(0x7162b5b513a662a6), - C64e(0x9ae64d4d7c31e631), C64e(0xc39aecec59b59ab5), - C64e(0x05458f8f40cf45cf), C64e(0x3e9d1f1fa3bc9dbc), - C64e(0x0940898949c040c0), C64e(0xef87fafa68928792), - C64e(0xc515efefd03f153f), C64e(0x7febb2b29426eb26), - C64e(0x07c98e8ece40c940), C64e(0xed0bfbfbe61d0b1d), - C64e(0x82ec41416e2fec2f), C64e(0x7d67b3b31aa967a9), - C64e(0xbefd5f5f431cfd1c), C64e(0x8aea45456025ea25), - C64e(0x46bf2323f9dabfda), C64e(0xa6f753535102f702), - C64e(0xd396e4e445a196a1), C64e(0x2d5b9b9b76ed5bed), - C64e(0xeac27575285dc25d), C64e(0xd91ce1e1c5241c24), - C64e(0x7aae3d3dd4e9aee9), C64e(0x986a4c4cf2be6abe), - C64e(0xd85a6c6c82ee5aee), C64e(0xfc417e7ebdc341c3), - C64e(0xf102f5f5f3060206), C64e(0x1d4f838352d14fd1), - C64e(0xd05c68688ce45ce4), C64e(0xa2f451515607f407), - C64e(0xb934d1d18d5c345c), C64e(0xe908f9f9e1180818), - C64e(0xdf93e2e24cae93ae), C64e(0x4d73abab3e957395), - C64e(0xc453626297f553f5), C64e(0x543f2a2a6b413f41), - C64e(0x100c08081c140c14), C64e(0x3152959563f652f6), - C64e(0x8c654646e9af65af), C64e(0x215e9d9d7fe25ee2), - C64e(0x6028303048782878), C64e(0x6ea13737cff8a1f8), - C64e(0x140f0a0a1b110f11), C64e(0x5eb52f2febc4b5c4), - C64e(0x1c090e0e151b091b), C64e(0x483624247e5a365a), - C64e(0x369b1b1badb69bb6), C64e(0xa53ddfdf98473d47), - C64e(0x8126cdcda76a266a), C64e(0x9c694e4ef5bb69bb), - C64e(0xfecd7f7f334ccd4c), C64e(0xcf9feaea50ba9fba), - C64e(0x241b12123f2d1b2d), C64e(0x3a9e1d1da4b99eb9), - C64e(0xb0745858c49c749c), C64e(0x682e343446722e72), - C64e(0x6c2d363641772d77), C64e(0xa3b2dcdc11cdb2cd), - C64e(0x73eeb4b49d29ee29), C64e(0xb6fb5b5b4d16fb16), - C64e(0x53f6a4a4a501f601), C64e(0xec4d7676a1d74dd7), - C64e(0x7561b7b714a361a3), C64e(0xface7d7d3449ce49), - C64e(0xa47b5252df8d7b8d), C64e(0xa13edddd9f423e42), - C64e(0xbc715e5ecd937193), C64e(0x26971313b1a297a2), - C64e(0x57f5a6a6a204f504), C64e(0x6968b9b901b868b8), - C64e(0x0000000000000000), C64e(0x992cc1c1b5742c74), - C64e(0x80604040e0a060a0), C64e(0xdd1fe3e3c2211f21), - C64e(0xf2c879793a43c843), C64e(0x77edb6b69a2ced2c), - C64e(0xb3bed4d40dd9bed9), C64e(0x01468d8d47ca46ca), - C64e(0xced967671770d970), C64e(0xe44b7272afdd4bdd), - C64e(0x33de9494ed79de79), C64e(0x2bd49898ff67d467), - C64e(0x7be8b0b09323e823), C64e(0x114a85855bde4ade), - C64e(0x6d6bbbbb06bd6bbd), C64e(0x912ac5c5bb7e2a7e), - C64e(0x9ee54f4f7b34e534), C64e(0xc116ededd73a163a), - C64e(0x17c58686d254c554), C64e(0x2fd79a9af862d762), - C64e(0xcc55666699ff55ff), C64e(0x22941111b6a794a7), - C64e(0x0fcf8a8ac04acf4a), C64e(0xc910e9e9d9301030), - C64e(0x080604040e0a060a), C64e(0xe781fefe66988198), - C64e(0x5bf0a0a0ab0bf00b), C64e(0xf0447878b4cc44cc), - C64e(0x4aba2525f0d5bad5), C64e(0x96e34b4b753ee33e), - C64e(0x5ff3a2a2ac0ef30e), C64e(0xbafe5d5d4419fe19), - C64e(0x1bc08080db5bc05b), C64e(0x0a8a050580858a85), - C64e(0x7ead3f3fd3ecadec), C64e(0x42bc2121fedfbcdf), - C64e(0xe0487070a8d848d8), C64e(0xf904f1f1fd0c040c), - C64e(0xc6df6363197adf7a), C64e(0xeec177772f58c158), - C64e(0x4575afaf309f759f), C64e(0x84634242e7a563a5), - C64e(0x4030202070503050), C64e(0xd11ae5e5cb2e1a2e), - C64e(0xe10efdfdef120e12), C64e(0x656dbfbf08b76db7), - C64e(0x194c818155d44cd4), C64e(0x30141818243c143c), - C64e(0x4c352626795f355f), C64e(0x9d2fc3c3b2712f71), - C64e(0x67e1bebe8638e138), C64e(0x6aa23535c8fda2fd), - C64e(0x0bcc8888c74fcc4f), C64e(0x5c392e2e654b394b), - C64e(0x3d5793936af957f9), C64e(0xaaf25555580df20d), - C64e(0xe382fcfc619d829d), C64e(0xf4477a7ab3c947c9), - C64e(0x8bacc8c827efacef), C64e(0x6fe7baba8832e732), - C64e(0x642b32324f7d2b7d), C64e(0xd795e6e642a495a4), - C64e(0x9ba0c0c03bfba0fb), C64e(0x32981919aab398b3), - C64e(0x27d19e9ef668d168), C64e(0x5d7fa3a322817f81), - C64e(0x88664444eeaa66aa), C64e(0xa87e5454d6827e82), - C64e(0x76ab3b3bdde6abe6), C64e(0x16830b0b959e839e), - C64e(0x03ca8c8cc945ca45), C64e(0x9529c7c7bc7b297b), - C64e(0xd6d36b6b056ed36e), C64e(0x503c28286c443c44), - C64e(0x5579a7a72c8b798b), C64e(0x63e2bcbc813de23d), - C64e(0x2c1d161631271d27), C64e(0x4176adad379a769a), - C64e(0xad3bdbdb964d3b4d), C64e(0xc85664649efa56fa), - C64e(0xe84e7474a6d24ed2), C64e(0x281e141436221e22), - C64e(0x3fdb9292e476db76), C64e(0x180a0c0c121e0a1e), - C64e(0x906c4848fcb46cb4), C64e(0x6be4b8b88f37e437), - C64e(0x255d9f9f78e75de7), C64e(0x616ebdbd0fb26eb2), - C64e(0x86ef4343692aef2a), C64e(0x93a6c4c435f1a6f1), - C64e(0x72a83939dae3a8e3), C64e(0x62a43131c6f7a4f7), - C64e(0xbd37d3d38a593759), C64e(0xff8bf2f274868b86), - C64e(0xb132d5d583563256), C64e(0x0d438b8b4ec543c5), - C64e(0xdc596e6e85eb59eb), C64e(0xafb7dada18c2b7c2), - C64e(0x028c01018e8f8c8f), C64e(0x7964b1b11dac64ac), - C64e(0x23d29c9cf16dd26d), C64e(0x92e04949723be03b), - C64e(0xabb4d8d81fc7b4c7), C64e(0x43faacacb915fa15), - C64e(0xfd07f3f3fa090709), C64e(0x8525cfcfa06f256f), - C64e(0x8fafcaca20eaafea), C64e(0xf38ef4f47d898e89), - C64e(0x8ee947476720e920), C64e(0x2018101038281828), - C64e(0xded56f6f0b64d564), C64e(0xfb88f0f073838883), - C64e(0x946f4a4afbb16fb1), C64e(0xb8725c5cca967296), - C64e(0x70243838546c246c), C64e(0xaef157575f08f108), - C64e(0xe6c773732152c752), C64e(0x3551979764f351f3), - C64e(0x8d23cbcbae652365), C64e(0x597ca1a125847c84), - C64e(0xcb9ce8e857bf9cbf), C64e(0x7c213e3e5d632163), - C64e(0x37dd9696ea7cdd7c), C64e(0xc2dc61611e7fdc7f), - C64e(0x1a860d0d9c918691), C64e(0x1e850f0f9b948594), - C64e(0xdb90e0e04bab90ab), C64e(0xf8427c7cbac642c6), - C64e(0xe2c471712657c457), C64e(0x83aacccc29e5aae5), - C64e(0x3bd89090e373d873), C64e(0x0c050606090f050f), - C64e(0xf501f7f7f4030103), C64e(0x38121c1c2a361236), - C64e(0x9fa3c2c23cfea3fe), C64e(0xd45f6a6a8be15fe1), - C64e(0x47f9aeaebe10f910), C64e(0xd2d06969026bd06b), - C64e(0x2e911717bfa891a8), C64e(0x2958999971e858e8), - C64e(0x74273a3a53692769), C64e(0x4eb92727f7d0b9d0), - C64e(0xa938d9d991483848), C64e(0xcd13ebebde351335), - C64e(0x56b32b2be5ceb3ce), C64e(0x4433222277553355), - C64e(0xbfbbd2d204d6bbd6), C64e(0x4970a9a939907090), - C64e(0x0e89070787808980), C64e(0x66a73333c1f2a7f2), - C64e(0x5ab62d2decc1b6c1), C64e(0x78223c3c5a662266), - C64e(0x2a921515b8ad92ad), C64e(0x8920c9c9a9602060), - C64e(0x154987875cdb49db), C64e(0x4fffaaaab01aff1a), - C64e(0xa0785050d8887888), C64e(0x517aa5a52b8e7a8e), - C64e(0x068f0303898a8f8a), C64e(0xb2f859594a13f813), - C64e(0x12800909929b809b), C64e(0x34171a1a23391739), - C64e(0xcada65651075da75), C64e(0xb531d7d784533153), - C64e(0x13c68484d551c651), C64e(0xbbb8d0d003d3b8d3), - C64e(0x1fc38282dc5ec35e), C64e(0x52b02929e2cbb0cb), - C64e(0xb4775a5ac3997799), C64e(0x3c111e1e2d331133), - C64e(0xf6cb7b7b3d46cb46), C64e(0x4bfca8a8b71ffc1f), - C64e(0xdad66d6d0c61d661), C64e(0x583a2c2c624e3a4e) -}; - -__constant static const ulong T4_G[] = { - C64e(0xf497a5c6c632f4a5), C64e(0x97eb84f8f86f9784), - C64e(0xb0c799eeee5eb099), C64e(0x8cf78df6f67a8c8d), - C64e(0x17e50dffffe8170d), C64e(0xdcb7bdd6d60adcbd), - C64e(0xc8a7b1dede16c8b1), C64e(0xfc395491916dfc54), - C64e(0xf0c050606090f050), C64e(0x0504030202070503), - C64e(0xe087a9cece2ee0a9), C64e(0x87ac7d5656d1877d), - C64e(0x2bd519e7e7cc2b19), C64e(0xa67162b5b513a662), - C64e(0x319ae64d4d7c31e6), C64e(0xb5c39aecec59b59a), - C64e(0xcf05458f8f40cf45), C64e(0xbc3e9d1f1fa3bc9d), - C64e(0xc00940898949c040), C64e(0x92ef87fafa689287), - C64e(0x3fc515efefd03f15), C64e(0x267febb2b29426eb), - C64e(0x4007c98e8ece40c9), C64e(0x1ded0bfbfbe61d0b), - C64e(0x2f82ec41416e2fec), C64e(0xa97d67b3b31aa967), - C64e(0x1cbefd5f5f431cfd), C64e(0x258aea45456025ea), - C64e(0xda46bf2323f9dabf), C64e(0x02a6f753535102f7), - C64e(0xa1d396e4e445a196), C64e(0xed2d5b9b9b76ed5b), - C64e(0x5deac27575285dc2), C64e(0x24d91ce1e1c5241c), - C64e(0xe97aae3d3dd4e9ae), C64e(0xbe986a4c4cf2be6a), - C64e(0xeed85a6c6c82ee5a), C64e(0xc3fc417e7ebdc341), - C64e(0x06f102f5f5f30602), C64e(0xd11d4f838352d14f), - C64e(0xe4d05c68688ce45c), C64e(0x07a2f451515607f4), - C64e(0x5cb934d1d18d5c34), C64e(0x18e908f9f9e11808), - C64e(0xaedf93e2e24cae93), C64e(0x954d73abab3e9573), - C64e(0xf5c453626297f553), C64e(0x41543f2a2a6b413f), - C64e(0x14100c08081c140c), C64e(0xf63152959563f652), - C64e(0xaf8c654646e9af65), C64e(0xe2215e9d9d7fe25e), - C64e(0x7860283030487828), C64e(0xf86ea13737cff8a1), - C64e(0x11140f0a0a1b110f), C64e(0xc45eb52f2febc4b5), - C64e(0x1b1c090e0e151b09), C64e(0x5a483624247e5a36), - C64e(0xb6369b1b1badb69b), C64e(0x47a53ddfdf98473d), - C64e(0x6a8126cdcda76a26), C64e(0xbb9c694e4ef5bb69), - C64e(0x4cfecd7f7f334ccd), C64e(0xbacf9feaea50ba9f), - C64e(0x2d241b12123f2d1b), C64e(0xb93a9e1d1da4b99e), - C64e(0x9cb0745858c49c74), C64e(0x72682e343446722e), - C64e(0x776c2d363641772d), C64e(0xcda3b2dcdc11cdb2), - C64e(0x2973eeb4b49d29ee), C64e(0x16b6fb5b5b4d16fb), - C64e(0x0153f6a4a4a501f6), C64e(0xd7ec4d7676a1d74d), - C64e(0xa37561b7b714a361), C64e(0x49face7d7d3449ce), - C64e(0x8da47b5252df8d7b), C64e(0x42a13edddd9f423e), - C64e(0x93bc715e5ecd9371), C64e(0xa226971313b1a297), - C64e(0x0457f5a6a6a204f5), C64e(0xb86968b9b901b868), - C64e(0x0000000000000000), C64e(0x74992cc1c1b5742c), - C64e(0xa080604040e0a060), C64e(0x21dd1fe3e3c2211f), - C64e(0x43f2c879793a43c8), C64e(0x2c77edb6b69a2ced), - C64e(0xd9b3bed4d40dd9be), C64e(0xca01468d8d47ca46), - C64e(0x70ced967671770d9), C64e(0xdde44b7272afdd4b), - C64e(0x7933de9494ed79de), C64e(0x672bd49898ff67d4), - C64e(0x237be8b0b09323e8), C64e(0xde114a85855bde4a), - C64e(0xbd6d6bbbbb06bd6b), C64e(0x7e912ac5c5bb7e2a), - C64e(0x349ee54f4f7b34e5), C64e(0x3ac116ededd73a16), - C64e(0x5417c58686d254c5), C64e(0x622fd79a9af862d7), - C64e(0xffcc55666699ff55), C64e(0xa722941111b6a794), - C64e(0x4a0fcf8a8ac04acf), C64e(0x30c910e9e9d93010), - C64e(0x0a080604040e0a06), C64e(0x98e781fefe669881), - C64e(0x0b5bf0a0a0ab0bf0), C64e(0xccf0447878b4cc44), - C64e(0xd54aba2525f0d5ba), C64e(0x3e96e34b4b753ee3), - C64e(0x0e5ff3a2a2ac0ef3), C64e(0x19bafe5d5d4419fe), - C64e(0x5b1bc08080db5bc0), C64e(0x850a8a050580858a), - C64e(0xec7ead3f3fd3ecad), C64e(0xdf42bc2121fedfbc), - C64e(0xd8e0487070a8d848), C64e(0x0cf904f1f1fd0c04), - C64e(0x7ac6df6363197adf), C64e(0x58eec177772f58c1), - C64e(0x9f4575afaf309f75), C64e(0xa584634242e7a563), - C64e(0x5040302020705030), C64e(0x2ed11ae5e5cb2e1a), - C64e(0x12e10efdfdef120e), C64e(0xb7656dbfbf08b76d), - C64e(0xd4194c818155d44c), C64e(0x3c30141818243c14), - C64e(0x5f4c352626795f35), C64e(0x719d2fc3c3b2712f), - C64e(0x3867e1bebe8638e1), C64e(0xfd6aa23535c8fda2), - C64e(0x4f0bcc8888c74fcc), C64e(0x4b5c392e2e654b39), - C64e(0xf93d5793936af957), C64e(0x0daaf25555580df2), - C64e(0x9de382fcfc619d82), C64e(0xc9f4477a7ab3c947), - C64e(0xef8bacc8c827efac), C64e(0x326fe7baba8832e7), - C64e(0x7d642b32324f7d2b), C64e(0xa4d795e6e642a495), - C64e(0xfb9ba0c0c03bfba0), C64e(0xb332981919aab398), - C64e(0x6827d19e9ef668d1), C64e(0x815d7fa3a322817f), - C64e(0xaa88664444eeaa66), C64e(0x82a87e5454d6827e), - C64e(0xe676ab3b3bdde6ab), C64e(0x9e16830b0b959e83), - C64e(0x4503ca8c8cc945ca), C64e(0x7b9529c7c7bc7b29), - C64e(0x6ed6d36b6b056ed3), C64e(0x44503c28286c443c), - C64e(0x8b5579a7a72c8b79), C64e(0x3d63e2bcbc813de2), - C64e(0x272c1d161631271d), C64e(0x9a4176adad379a76), - C64e(0x4dad3bdbdb964d3b), C64e(0xfac85664649efa56), - C64e(0xd2e84e7474a6d24e), C64e(0x22281e141436221e), - C64e(0x763fdb9292e476db), C64e(0x1e180a0c0c121e0a), - C64e(0xb4906c4848fcb46c), C64e(0x376be4b8b88f37e4), - C64e(0xe7255d9f9f78e75d), C64e(0xb2616ebdbd0fb26e), - C64e(0x2a86ef4343692aef), C64e(0xf193a6c4c435f1a6), - C64e(0xe372a83939dae3a8), C64e(0xf762a43131c6f7a4), - C64e(0x59bd37d3d38a5937), C64e(0x86ff8bf2f274868b), - C64e(0x56b132d5d5835632), C64e(0xc50d438b8b4ec543), - C64e(0xebdc596e6e85eb59), C64e(0xc2afb7dada18c2b7), - C64e(0x8f028c01018e8f8c), C64e(0xac7964b1b11dac64), - C64e(0x6d23d29c9cf16dd2), C64e(0x3b92e04949723be0), - C64e(0xc7abb4d8d81fc7b4), C64e(0x1543faacacb915fa), - C64e(0x09fd07f3f3fa0907), C64e(0x6f8525cfcfa06f25), - C64e(0xea8fafcaca20eaaf), C64e(0x89f38ef4f47d898e), - C64e(0x208ee947476720e9), C64e(0x2820181010382818), - C64e(0x64ded56f6f0b64d5), C64e(0x83fb88f0f0738388), - C64e(0xb1946f4a4afbb16f), C64e(0x96b8725c5cca9672), - C64e(0x6c70243838546c24), C64e(0x08aef157575f08f1), - C64e(0x52e6c773732152c7), C64e(0xf33551979764f351), - C64e(0x658d23cbcbae6523), C64e(0x84597ca1a125847c), - C64e(0xbfcb9ce8e857bf9c), C64e(0x637c213e3e5d6321), - C64e(0x7c37dd9696ea7cdd), C64e(0x7fc2dc61611e7fdc), - C64e(0x911a860d0d9c9186), C64e(0x941e850f0f9b9485), - C64e(0xabdb90e0e04bab90), C64e(0xc6f8427c7cbac642), - C64e(0x57e2c471712657c4), C64e(0xe583aacccc29e5aa), - C64e(0x733bd89090e373d8), C64e(0x0f0c050606090f05), - C64e(0x03f501f7f7f40301), C64e(0x3638121c1c2a3612), - C64e(0xfe9fa3c2c23cfea3), C64e(0xe1d45f6a6a8be15f), - C64e(0x1047f9aeaebe10f9), C64e(0x6bd2d06969026bd0), - C64e(0xa82e911717bfa891), C64e(0xe82958999971e858), - C64e(0x6974273a3a536927), C64e(0xd04eb92727f7d0b9), - C64e(0x48a938d9d9914838), C64e(0x35cd13ebebde3513), - C64e(0xce56b32b2be5ceb3), C64e(0x5544332222775533), - C64e(0xd6bfbbd2d204d6bb), C64e(0x904970a9a9399070), - C64e(0x800e890707878089), C64e(0xf266a73333c1f2a7), - C64e(0xc15ab62d2decc1b6), C64e(0x6678223c3c5a6622), - C64e(0xad2a921515b8ad92), C64e(0x608920c9c9a96020), - C64e(0xdb154987875cdb49), C64e(0x1a4fffaaaab01aff), - C64e(0x88a0785050d88878), C64e(0x8e517aa5a52b8e7a), - C64e(0x8a068f0303898a8f), C64e(0x13b2f859594a13f8), - C64e(0x9b12800909929b80), C64e(0x3934171a1a233917), - C64e(0x75cada65651075da), C64e(0x53b531d7d7845331), - C64e(0x5113c68484d551c6), C64e(0xd3bbb8d0d003d3b8), - C64e(0x5e1fc38282dc5ec3), C64e(0xcb52b02929e2cbb0), - C64e(0x99b4775a5ac39977), C64e(0x333c111e1e2d3311), - C64e(0x46f6cb7b7b3d46cb), C64e(0x1f4bfca8a8b71ffc), - C64e(0x61dad66d6d0c61d6), C64e(0x4e583a2c2c624e3a) -}; - -__constant static const ulong T5_G[] = { - C64e(0xa5f497a5c6c632f4), C64e(0x8497eb84f8f86f97), - C64e(0x99b0c799eeee5eb0), C64e(0x8d8cf78df6f67a8c), - C64e(0x0d17e50dffffe817), C64e(0xbddcb7bdd6d60adc), - C64e(0xb1c8a7b1dede16c8), C64e(0x54fc395491916dfc), - C64e(0x50f0c050606090f0), C64e(0x0305040302020705), - C64e(0xa9e087a9cece2ee0), C64e(0x7d87ac7d5656d187), - C64e(0x192bd519e7e7cc2b), C64e(0x62a67162b5b513a6), - C64e(0xe6319ae64d4d7c31), C64e(0x9ab5c39aecec59b5), - C64e(0x45cf05458f8f40cf), C64e(0x9dbc3e9d1f1fa3bc), - C64e(0x40c00940898949c0), C64e(0x8792ef87fafa6892), - C64e(0x153fc515efefd03f), C64e(0xeb267febb2b29426), - C64e(0xc94007c98e8ece40), C64e(0x0b1ded0bfbfbe61d), - C64e(0xec2f82ec41416e2f), C64e(0x67a97d67b3b31aa9), - C64e(0xfd1cbefd5f5f431c), C64e(0xea258aea45456025), - C64e(0xbfda46bf2323f9da), C64e(0xf702a6f753535102), - C64e(0x96a1d396e4e445a1), C64e(0x5bed2d5b9b9b76ed), - C64e(0xc25deac27575285d), C64e(0x1c24d91ce1e1c524), - C64e(0xaee97aae3d3dd4e9), C64e(0x6abe986a4c4cf2be), - C64e(0x5aeed85a6c6c82ee), C64e(0x41c3fc417e7ebdc3), - C64e(0x0206f102f5f5f306), C64e(0x4fd11d4f838352d1), - C64e(0x5ce4d05c68688ce4), C64e(0xf407a2f451515607), - C64e(0x345cb934d1d18d5c), C64e(0x0818e908f9f9e118), - C64e(0x93aedf93e2e24cae), C64e(0x73954d73abab3e95), - C64e(0x53f5c453626297f5), C64e(0x3f41543f2a2a6b41), - C64e(0x0c14100c08081c14), C64e(0x52f63152959563f6), - C64e(0x65af8c654646e9af), C64e(0x5ee2215e9d9d7fe2), - C64e(0x2878602830304878), C64e(0xa1f86ea13737cff8), - C64e(0x0f11140f0a0a1b11), C64e(0xb5c45eb52f2febc4), - C64e(0x091b1c090e0e151b), C64e(0x365a483624247e5a), - C64e(0x9bb6369b1b1badb6), C64e(0x3d47a53ddfdf9847), - C64e(0x266a8126cdcda76a), C64e(0x69bb9c694e4ef5bb), - C64e(0xcd4cfecd7f7f334c), C64e(0x9fbacf9feaea50ba), - C64e(0x1b2d241b12123f2d), C64e(0x9eb93a9e1d1da4b9), - C64e(0x749cb0745858c49c), C64e(0x2e72682e34344672), - C64e(0x2d776c2d36364177), C64e(0xb2cda3b2dcdc11cd), - C64e(0xee2973eeb4b49d29), C64e(0xfb16b6fb5b5b4d16), - C64e(0xf60153f6a4a4a501), C64e(0x4dd7ec4d7676a1d7), - C64e(0x61a37561b7b714a3), C64e(0xce49face7d7d3449), - C64e(0x7b8da47b5252df8d), C64e(0x3e42a13edddd9f42), - C64e(0x7193bc715e5ecd93), C64e(0x97a226971313b1a2), - C64e(0xf50457f5a6a6a204), C64e(0x68b86968b9b901b8), - C64e(0x0000000000000000), C64e(0x2c74992cc1c1b574), - C64e(0x60a080604040e0a0), C64e(0x1f21dd1fe3e3c221), - C64e(0xc843f2c879793a43), C64e(0xed2c77edb6b69a2c), - C64e(0xbed9b3bed4d40dd9), C64e(0x46ca01468d8d47ca), - C64e(0xd970ced967671770), C64e(0x4bdde44b7272afdd), - C64e(0xde7933de9494ed79), C64e(0xd4672bd49898ff67), - C64e(0xe8237be8b0b09323), C64e(0x4ade114a85855bde), - C64e(0x6bbd6d6bbbbb06bd), C64e(0x2a7e912ac5c5bb7e), - C64e(0xe5349ee54f4f7b34), C64e(0x163ac116ededd73a), - C64e(0xc55417c58686d254), C64e(0xd7622fd79a9af862), - C64e(0x55ffcc55666699ff), C64e(0x94a722941111b6a7), - C64e(0xcf4a0fcf8a8ac04a), C64e(0x1030c910e9e9d930), - C64e(0x060a080604040e0a), C64e(0x8198e781fefe6698), - C64e(0xf00b5bf0a0a0ab0b), C64e(0x44ccf0447878b4cc), - C64e(0xbad54aba2525f0d5), C64e(0xe33e96e34b4b753e), - C64e(0xf30e5ff3a2a2ac0e), C64e(0xfe19bafe5d5d4419), - C64e(0xc05b1bc08080db5b), C64e(0x8a850a8a05058085), - C64e(0xadec7ead3f3fd3ec), C64e(0xbcdf42bc2121fedf), - C64e(0x48d8e0487070a8d8), C64e(0x040cf904f1f1fd0c), - C64e(0xdf7ac6df6363197a), C64e(0xc158eec177772f58), - C64e(0x759f4575afaf309f), C64e(0x63a584634242e7a5), - C64e(0x3050403020207050), C64e(0x1a2ed11ae5e5cb2e), - C64e(0x0e12e10efdfdef12), C64e(0x6db7656dbfbf08b7), - C64e(0x4cd4194c818155d4), C64e(0x143c30141818243c), - C64e(0x355f4c352626795f), C64e(0x2f719d2fc3c3b271), - C64e(0xe13867e1bebe8638), C64e(0xa2fd6aa23535c8fd), - C64e(0xcc4f0bcc8888c74f), C64e(0x394b5c392e2e654b), - C64e(0x57f93d5793936af9), C64e(0xf20daaf25555580d), - C64e(0x829de382fcfc619d), C64e(0x47c9f4477a7ab3c9), - C64e(0xacef8bacc8c827ef), C64e(0xe7326fe7baba8832), - C64e(0x2b7d642b32324f7d), C64e(0x95a4d795e6e642a4), - C64e(0xa0fb9ba0c0c03bfb), C64e(0x98b332981919aab3), - C64e(0xd16827d19e9ef668), C64e(0x7f815d7fa3a32281), - C64e(0x66aa88664444eeaa), C64e(0x7e82a87e5454d682), - C64e(0xabe676ab3b3bdde6), C64e(0x839e16830b0b959e), - C64e(0xca4503ca8c8cc945), C64e(0x297b9529c7c7bc7b), - C64e(0xd36ed6d36b6b056e), C64e(0x3c44503c28286c44), - C64e(0x798b5579a7a72c8b), C64e(0xe23d63e2bcbc813d), - C64e(0x1d272c1d16163127), C64e(0x769a4176adad379a), - C64e(0x3b4dad3bdbdb964d), C64e(0x56fac85664649efa), - C64e(0x4ed2e84e7474a6d2), C64e(0x1e22281e14143622), - C64e(0xdb763fdb9292e476), C64e(0x0a1e180a0c0c121e), - C64e(0x6cb4906c4848fcb4), C64e(0xe4376be4b8b88f37), - C64e(0x5de7255d9f9f78e7), C64e(0x6eb2616ebdbd0fb2), - C64e(0xef2a86ef4343692a), C64e(0xa6f193a6c4c435f1), - C64e(0xa8e372a83939dae3), C64e(0xa4f762a43131c6f7), - C64e(0x3759bd37d3d38a59), C64e(0x8b86ff8bf2f27486), - C64e(0x3256b132d5d58356), C64e(0x43c50d438b8b4ec5), - C64e(0x59ebdc596e6e85eb), C64e(0xb7c2afb7dada18c2), - C64e(0x8c8f028c01018e8f), C64e(0x64ac7964b1b11dac), - C64e(0xd26d23d29c9cf16d), C64e(0xe03b92e04949723b), - C64e(0xb4c7abb4d8d81fc7), C64e(0xfa1543faacacb915), - C64e(0x0709fd07f3f3fa09), C64e(0x256f8525cfcfa06f), - C64e(0xafea8fafcaca20ea), C64e(0x8e89f38ef4f47d89), - C64e(0xe9208ee947476720), C64e(0x1828201810103828), - C64e(0xd564ded56f6f0b64), C64e(0x8883fb88f0f07383), - C64e(0x6fb1946f4a4afbb1), C64e(0x7296b8725c5cca96), - C64e(0x246c70243838546c), C64e(0xf108aef157575f08), - C64e(0xc752e6c773732152), C64e(0x51f33551979764f3), - C64e(0x23658d23cbcbae65), C64e(0x7c84597ca1a12584), - C64e(0x9cbfcb9ce8e857bf), C64e(0x21637c213e3e5d63), - C64e(0xdd7c37dd9696ea7c), C64e(0xdc7fc2dc61611e7f), - C64e(0x86911a860d0d9c91), C64e(0x85941e850f0f9b94), - C64e(0x90abdb90e0e04bab), C64e(0x42c6f8427c7cbac6), - C64e(0xc457e2c471712657), C64e(0xaae583aacccc29e5), - C64e(0xd8733bd89090e373), C64e(0x050f0c050606090f), - C64e(0x0103f501f7f7f403), C64e(0x123638121c1c2a36), - C64e(0xa3fe9fa3c2c23cfe), C64e(0x5fe1d45f6a6a8be1), - C64e(0xf91047f9aeaebe10), C64e(0xd06bd2d06969026b), - C64e(0x91a82e911717bfa8), C64e(0x58e82958999971e8), - C64e(0x276974273a3a5369), C64e(0xb9d04eb92727f7d0), - C64e(0x3848a938d9d99148), C64e(0x1335cd13ebebde35), - C64e(0xb3ce56b32b2be5ce), C64e(0x3355443322227755), - C64e(0xbbd6bfbbd2d204d6), C64e(0x70904970a9a93990), - C64e(0x89800e8907078780), C64e(0xa7f266a73333c1f2), - C64e(0xb6c15ab62d2decc1), C64e(0x226678223c3c5a66), - C64e(0x92ad2a921515b8ad), C64e(0x20608920c9c9a960), - C64e(0x49db154987875cdb), C64e(0xff1a4fffaaaab01a), - C64e(0x7888a0785050d888), C64e(0x7a8e517aa5a52b8e), - C64e(0x8f8a068f0303898a), C64e(0xf813b2f859594a13), - C64e(0x809b12800909929b), C64e(0x173934171a1a2339), - C64e(0xda75cada65651075), C64e(0x3153b531d7d78453), - C64e(0xc65113c68484d551), C64e(0xb8d3bbb8d0d003d3), - C64e(0xc35e1fc38282dc5e), C64e(0xb0cb52b02929e2cb), - C64e(0x7799b4775a5ac399), C64e(0x11333c111e1e2d33), - C64e(0xcb46f6cb7b7b3d46), C64e(0xfc1f4bfca8a8b71f), - C64e(0xd661dad66d6d0c61), C64e(0x3a4e583a2c2c624e) -}; - -__constant static const ulong T6_G[] = { - C64e(0xf4a5f497a5c6c632), C64e(0x978497eb84f8f86f), - C64e(0xb099b0c799eeee5e), C64e(0x8c8d8cf78df6f67a), - C64e(0x170d17e50dffffe8), C64e(0xdcbddcb7bdd6d60a), - C64e(0xc8b1c8a7b1dede16), C64e(0xfc54fc395491916d), - C64e(0xf050f0c050606090), C64e(0x0503050403020207), - C64e(0xe0a9e087a9cece2e), C64e(0x877d87ac7d5656d1), - C64e(0x2b192bd519e7e7cc), C64e(0xa662a67162b5b513), - C64e(0x31e6319ae64d4d7c), C64e(0xb59ab5c39aecec59), - C64e(0xcf45cf05458f8f40), C64e(0xbc9dbc3e9d1f1fa3), - C64e(0xc040c00940898949), C64e(0x928792ef87fafa68), - C64e(0x3f153fc515efefd0), C64e(0x26eb267febb2b294), - C64e(0x40c94007c98e8ece), C64e(0x1d0b1ded0bfbfbe6), - C64e(0x2fec2f82ec41416e), C64e(0xa967a97d67b3b31a), - C64e(0x1cfd1cbefd5f5f43), C64e(0x25ea258aea454560), - C64e(0xdabfda46bf2323f9), C64e(0x02f702a6f7535351), - C64e(0xa196a1d396e4e445), C64e(0xed5bed2d5b9b9b76), - C64e(0x5dc25deac2757528), C64e(0x241c24d91ce1e1c5), - C64e(0xe9aee97aae3d3dd4), C64e(0xbe6abe986a4c4cf2), - C64e(0xee5aeed85a6c6c82), C64e(0xc341c3fc417e7ebd), - C64e(0x060206f102f5f5f3), C64e(0xd14fd11d4f838352), - C64e(0xe45ce4d05c68688c), C64e(0x07f407a2f4515156), - C64e(0x5c345cb934d1d18d), C64e(0x180818e908f9f9e1), - C64e(0xae93aedf93e2e24c), C64e(0x9573954d73abab3e), - C64e(0xf553f5c453626297), C64e(0x413f41543f2a2a6b), - C64e(0x140c14100c08081c), C64e(0xf652f63152959563), - C64e(0xaf65af8c654646e9), C64e(0xe25ee2215e9d9d7f), - C64e(0x7828786028303048), C64e(0xf8a1f86ea13737cf), - C64e(0x110f11140f0a0a1b), C64e(0xc4b5c45eb52f2feb), - C64e(0x1b091b1c090e0e15), C64e(0x5a365a483624247e), - C64e(0xb69bb6369b1b1bad), C64e(0x473d47a53ddfdf98), - C64e(0x6a266a8126cdcda7), C64e(0xbb69bb9c694e4ef5), - C64e(0x4ccd4cfecd7f7f33), C64e(0xba9fbacf9feaea50), - C64e(0x2d1b2d241b12123f), C64e(0xb99eb93a9e1d1da4), - C64e(0x9c749cb0745858c4), C64e(0x722e72682e343446), - C64e(0x772d776c2d363641), C64e(0xcdb2cda3b2dcdc11), - C64e(0x29ee2973eeb4b49d), C64e(0x16fb16b6fb5b5b4d), - C64e(0x01f60153f6a4a4a5), C64e(0xd74dd7ec4d7676a1), - C64e(0xa361a37561b7b714), C64e(0x49ce49face7d7d34), - C64e(0x8d7b8da47b5252df), C64e(0x423e42a13edddd9f), - C64e(0x937193bc715e5ecd), C64e(0xa297a226971313b1), - C64e(0x04f50457f5a6a6a2), C64e(0xb868b86968b9b901), - C64e(0x0000000000000000), C64e(0x742c74992cc1c1b5), - C64e(0xa060a080604040e0), C64e(0x211f21dd1fe3e3c2), - C64e(0x43c843f2c879793a), C64e(0x2ced2c77edb6b69a), - C64e(0xd9bed9b3bed4d40d), C64e(0xca46ca01468d8d47), - C64e(0x70d970ced9676717), C64e(0xdd4bdde44b7272af), - C64e(0x79de7933de9494ed), C64e(0x67d4672bd49898ff), - C64e(0x23e8237be8b0b093), C64e(0xde4ade114a85855b), - C64e(0xbd6bbd6d6bbbbb06), C64e(0x7e2a7e912ac5c5bb), - C64e(0x34e5349ee54f4f7b), C64e(0x3a163ac116ededd7), - C64e(0x54c55417c58686d2), C64e(0x62d7622fd79a9af8), - C64e(0xff55ffcc55666699), C64e(0xa794a722941111b6), - C64e(0x4acf4a0fcf8a8ac0), C64e(0x301030c910e9e9d9), - C64e(0x0a060a080604040e), C64e(0x988198e781fefe66), - C64e(0x0bf00b5bf0a0a0ab), C64e(0xcc44ccf0447878b4), - C64e(0xd5bad54aba2525f0), C64e(0x3ee33e96e34b4b75), - C64e(0x0ef30e5ff3a2a2ac), C64e(0x19fe19bafe5d5d44), - C64e(0x5bc05b1bc08080db), C64e(0x858a850a8a050580), - C64e(0xecadec7ead3f3fd3), C64e(0xdfbcdf42bc2121fe), - C64e(0xd848d8e0487070a8), C64e(0x0c040cf904f1f1fd), - C64e(0x7adf7ac6df636319), C64e(0x58c158eec177772f), - C64e(0x9f759f4575afaf30), C64e(0xa563a584634242e7), - C64e(0x5030504030202070), C64e(0x2e1a2ed11ae5e5cb), - C64e(0x120e12e10efdfdef), C64e(0xb76db7656dbfbf08), - C64e(0xd44cd4194c818155), C64e(0x3c143c3014181824), - C64e(0x5f355f4c35262679), C64e(0x712f719d2fc3c3b2), - C64e(0x38e13867e1bebe86), C64e(0xfda2fd6aa23535c8), - C64e(0x4fcc4f0bcc8888c7), C64e(0x4b394b5c392e2e65), - C64e(0xf957f93d5793936a), C64e(0x0df20daaf2555558), - C64e(0x9d829de382fcfc61), C64e(0xc947c9f4477a7ab3), - C64e(0xefacef8bacc8c827), C64e(0x32e7326fe7baba88), - C64e(0x7d2b7d642b32324f), C64e(0xa495a4d795e6e642), - C64e(0xfba0fb9ba0c0c03b), C64e(0xb398b332981919aa), - C64e(0x68d16827d19e9ef6), C64e(0x817f815d7fa3a322), - C64e(0xaa66aa88664444ee), C64e(0x827e82a87e5454d6), - C64e(0xe6abe676ab3b3bdd), C64e(0x9e839e16830b0b95), - C64e(0x45ca4503ca8c8cc9), C64e(0x7b297b9529c7c7bc), - C64e(0x6ed36ed6d36b6b05), C64e(0x443c44503c28286c), - C64e(0x8b798b5579a7a72c), C64e(0x3de23d63e2bcbc81), - C64e(0x271d272c1d161631), C64e(0x9a769a4176adad37), - C64e(0x4d3b4dad3bdbdb96), C64e(0xfa56fac85664649e), - C64e(0xd24ed2e84e7474a6), C64e(0x221e22281e141436), - C64e(0x76db763fdb9292e4), C64e(0x1e0a1e180a0c0c12), - C64e(0xb46cb4906c4848fc), C64e(0x37e4376be4b8b88f), - C64e(0xe75de7255d9f9f78), C64e(0xb26eb2616ebdbd0f), - C64e(0x2aef2a86ef434369), C64e(0xf1a6f193a6c4c435), - C64e(0xe3a8e372a83939da), C64e(0xf7a4f762a43131c6), - C64e(0x593759bd37d3d38a), C64e(0x868b86ff8bf2f274), - C64e(0x563256b132d5d583), C64e(0xc543c50d438b8b4e), - C64e(0xeb59ebdc596e6e85), C64e(0xc2b7c2afb7dada18), - C64e(0x8f8c8f028c01018e), C64e(0xac64ac7964b1b11d), - C64e(0x6dd26d23d29c9cf1), C64e(0x3be03b92e0494972), - C64e(0xc7b4c7abb4d8d81f), C64e(0x15fa1543faacacb9), - C64e(0x090709fd07f3f3fa), C64e(0x6f256f8525cfcfa0), - C64e(0xeaafea8fafcaca20), C64e(0x898e89f38ef4f47d), - C64e(0x20e9208ee9474767), C64e(0x2818282018101038), - C64e(0x64d564ded56f6f0b), C64e(0x838883fb88f0f073), - C64e(0xb16fb1946f4a4afb), C64e(0x967296b8725c5cca), - C64e(0x6c246c7024383854), C64e(0x08f108aef157575f), - C64e(0x52c752e6c7737321), C64e(0xf351f33551979764), - C64e(0x6523658d23cbcbae), C64e(0x847c84597ca1a125), - C64e(0xbf9cbfcb9ce8e857), C64e(0x6321637c213e3e5d), - C64e(0x7cdd7c37dd9696ea), C64e(0x7fdc7fc2dc61611e), - C64e(0x9186911a860d0d9c), C64e(0x9485941e850f0f9b), - C64e(0xab90abdb90e0e04b), C64e(0xc642c6f8427c7cba), - C64e(0x57c457e2c4717126), C64e(0xe5aae583aacccc29), - C64e(0x73d8733bd89090e3), C64e(0x0f050f0c05060609), - C64e(0x030103f501f7f7f4), C64e(0x36123638121c1c2a), - C64e(0xfea3fe9fa3c2c23c), C64e(0xe15fe1d45f6a6a8b), - C64e(0x10f91047f9aeaebe), C64e(0x6bd06bd2d0696902), - C64e(0xa891a82e911717bf), C64e(0xe858e82958999971), - C64e(0x69276974273a3a53), C64e(0xd0b9d04eb92727f7), - C64e(0x483848a938d9d991), C64e(0x351335cd13ebebde), - C64e(0xceb3ce56b32b2be5), C64e(0x5533554433222277), - C64e(0xd6bbd6bfbbd2d204), C64e(0x9070904970a9a939), - C64e(0x8089800e89070787), C64e(0xf2a7f266a73333c1), - C64e(0xc1b6c15ab62d2dec), C64e(0x66226678223c3c5a), - C64e(0xad92ad2a921515b8), C64e(0x6020608920c9c9a9), - C64e(0xdb49db154987875c), C64e(0x1aff1a4fffaaaab0), - C64e(0x887888a0785050d8), C64e(0x8e7a8e517aa5a52b), - C64e(0x8a8f8a068f030389), C64e(0x13f813b2f859594a), - C64e(0x9b809b1280090992), C64e(0x39173934171a1a23), - C64e(0x75da75cada656510), C64e(0x533153b531d7d784), - C64e(0x51c65113c68484d5), C64e(0xd3b8d3bbb8d0d003), - C64e(0x5ec35e1fc38282dc), C64e(0xcbb0cb52b02929e2), - C64e(0x997799b4775a5ac3), C64e(0x3311333c111e1e2d), - C64e(0x46cb46f6cb7b7b3d), C64e(0x1ffc1f4bfca8a8b7), - C64e(0x61d661dad66d6d0c), C64e(0x4e3a4e583a2c2c62) -}; - -__constant static const ulong T7_G[] = { - C64e(0x32f4a5f497a5c6c6), C64e(0x6f978497eb84f8f8), - C64e(0x5eb099b0c799eeee), C64e(0x7a8c8d8cf78df6f6), - C64e(0xe8170d17e50dffff), C64e(0x0adcbddcb7bdd6d6), - C64e(0x16c8b1c8a7b1dede), C64e(0x6dfc54fc39549191), - C64e(0x90f050f0c0506060), C64e(0x0705030504030202), - C64e(0x2ee0a9e087a9cece), C64e(0xd1877d87ac7d5656), - C64e(0xcc2b192bd519e7e7), C64e(0x13a662a67162b5b5), - C64e(0x7c31e6319ae64d4d), C64e(0x59b59ab5c39aecec), - C64e(0x40cf45cf05458f8f), C64e(0xa3bc9dbc3e9d1f1f), - C64e(0x49c040c009408989), C64e(0x68928792ef87fafa), - C64e(0xd03f153fc515efef), C64e(0x9426eb267febb2b2), - C64e(0xce40c94007c98e8e), C64e(0xe61d0b1ded0bfbfb), - C64e(0x6e2fec2f82ec4141), C64e(0x1aa967a97d67b3b3), - C64e(0x431cfd1cbefd5f5f), C64e(0x6025ea258aea4545), - C64e(0xf9dabfda46bf2323), C64e(0x5102f702a6f75353), - C64e(0x45a196a1d396e4e4), C64e(0x76ed5bed2d5b9b9b), - C64e(0x285dc25deac27575), C64e(0xc5241c24d91ce1e1), - C64e(0xd4e9aee97aae3d3d), C64e(0xf2be6abe986a4c4c), - C64e(0x82ee5aeed85a6c6c), C64e(0xbdc341c3fc417e7e), - C64e(0xf3060206f102f5f5), C64e(0x52d14fd11d4f8383), - C64e(0x8ce45ce4d05c6868), C64e(0x5607f407a2f45151), - C64e(0x8d5c345cb934d1d1), C64e(0xe1180818e908f9f9), - C64e(0x4cae93aedf93e2e2), C64e(0x3e9573954d73abab), - C64e(0x97f553f5c4536262), C64e(0x6b413f41543f2a2a), - C64e(0x1c140c14100c0808), C64e(0x63f652f631529595), - C64e(0xe9af65af8c654646), C64e(0x7fe25ee2215e9d9d), - C64e(0x4878287860283030), C64e(0xcff8a1f86ea13737), - C64e(0x1b110f11140f0a0a), C64e(0xebc4b5c45eb52f2f), - C64e(0x151b091b1c090e0e), C64e(0x7e5a365a48362424), - C64e(0xadb69bb6369b1b1b), C64e(0x98473d47a53ddfdf), - C64e(0xa76a266a8126cdcd), C64e(0xf5bb69bb9c694e4e), - C64e(0x334ccd4cfecd7f7f), C64e(0x50ba9fbacf9feaea), - C64e(0x3f2d1b2d241b1212), C64e(0xa4b99eb93a9e1d1d), - C64e(0xc49c749cb0745858), C64e(0x46722e72682e3434), - C64e(0x41772d776c2d3636), C64e(0x11cdb2cda3b2dcdc), - C64e(0x9d29ee2973eeb4b4), C64e(0x4d16fb16b6fb5b5b), - C64e(0xa501f60153f6a4a4), C64e(0xa1d74dd7ec4d7676), - C64e(0x14a361a37561b7b7), C64e(0x3449ce49face7d7d), - C64e(0xdf8d7b8da47b5252), C64e(0x9f423e42a13edddd), - C64e(0xcd937193bc715e5e), C64e(0xb1a297a226971313), - C64e(0xa204f50457f5a6a6), C64e(0x01b868b86968b9b9), - C64e(0x0000000000000000), C64e(0xb5742c74992cc1c1), - C64e(0xe0a060a080604040), C64e(0xc2211f21dd1fe3e3), - C64e(0x3a43c843f2c87979), C64e(0x9a2ced2c77edb6b6), - C64e(0x0dd9bed9b3bed4d4), C64e(0x47ca46ca01468d8d), - C64e(0x1770d970ced96767), C64e(0xafdd4bdde44b7272), - C64e(0xed79de7933de9494), C64e(0xff67d4672bd49898), - C64e(0x9323e8237be8b0b0), C64e(0x5bde4ade114a8585), - C64e(0x06bd6bbd6d6bbbbb), C64e(0xbb7e2a7e912ac5c5), - C64e(0x7b34e5349ee54f4f), C64e(0xd73a163ac116eded), - C64e(0xd254c55417c58686), C64e(0xf862d7622fd79a9a), - C64e(0x99ff55ffcc556666), C64e(0xb6a794a722941111), - C64e(0xc04acf4a0fcf8a8a), C64e(0xd9301030c910e9e9), - C64e(0x0e0a060a08060404), C64e(0x66988198e781fefe), - C64e(0xab0bf00b5bf0a0a0), C64e(0xb4cc44ccf0447878), - C64e(0xf0d5bad54aba2525), C64e(0x753ee33e96e34b4b), - C64e(0xac0ef30e5ff3a2a2), C64e(0x4419fe19bafe5d5d), - C64e(0xdb5bc05b1bc08080), C64e(0x80858a850a8a0505), - C64e(0xd3ecadec7ead3f3f), C64e(0xfedfbcdf42bc2121), - C64e(0xa8d848d8e0487070), C64e(0xfd0c040cf904f1f1), - C64e(0x197adf7ac6df6363), C64e(0x2f58c158eec17777), - C64e(0x309f759f4575afaf), C64e(0xe7a563a584634242), - C64e(0x7050305040302020), C64e(0xcb2e1a2ed11ae5e5), - C64e(0xef120e12e10efdfd), C64e(0x08b76db7656dbfbf), - C64e(0x55d44cd4194c8181), C64e(0x243c143c30141818), - C64e(0x795f355f4c352626), C64e(0xb2712f719d2fc3c3), - C64e(0x8638e13867e1bebe), C64e(0xc8fda2fd6aa23535), - C64e(0xc74fcc4f0bcc8888), C64e(0x654b394b5c392e2e), - C64e(0x6af957f93d579393), C64e(0x580df20daaf25555), - C64e(0x619d829de382fcfc), C64e(0xb3c947c9f4477a7a), - C64e(0x27efacef8bacc8c8), C64e(0x8832e7326fe7baba), - C64e(0x4f7d2b7d642b3232), C64e(0x42a495a4d795e6e6), - C64e(0x3bfba0fb9ba0c0c0), C64e(0xaab398b332981919), - C64e(0xf668d16827d19e9e), C64e(0x22817f815d7fa3a3), - C64e(0xeeaa66aa88664444), C64e(0xd6827e82a87e5454), - C64e(0xdde6abe676ab3b3b), C64e(0x959e839e16830b0b), - C64e(0xc945ca4503ca8c8c), C64e(0xbc7b297b9529c7c7), - C64e(0x056ed36ed6d36b6b), C64e(0x6c443c44503c2828), - C64e(0x2c8b798b5579a7a7), C64e(0x813de23d63e2bcbc), - C64e(0x31271d272c1d1616), C64e(0x379a769a4176adad), - C64e(0x964d3b4dad3bdbdb), C64e(0x9efa56fac8566464), - C64e(0xa6d24ed2e84e7474), C64e(0x36221e22281e1414), - C64e(0xe476db763fdb9292), C64e(0x121e0a1e180a0c0c), - C64e(0xfcb46cb4906c4848), C64e(0x8f37e4376be4b8b8), - C64e(0x78e75de7255d9f9f), C64e(0x0fb26eb2616ebdbd), - C64e(0x692aef2a86ef4343), C64e(0x35f1a6f193a6c4c4), - C64e(0xdae3a8e372a83939), C64e(0xc6f7a4f762a43131), - C64e(0x8a593759bd37d3d3), C64e(0x74868b86ff8bf2f2), - C64e(0x83563256b132d5d5), C64e(0x4ec543c50d438b8b), - C64e(0x85eb59ebdc596e6e), C64e(0x18c2b7c2afb7dada), - C64e(0x8e8f8c8f028c0101), C64e(0x1dac64ac7964b1b1), - C64e(0xf16dd26d23d29c9c), C64e(0x723be03b92e04949), - C64e(0x1fc7b4c7abb4d8d8), C64e(0xb915fa1543faacac), - C64e(0xfa090709fd07f3f3), C64e(0xa06f256f8525cfcf), - C64e(0x20eaafea8fafcaca), C64e(0x7d898e89f38ef4f4), - C64e(0x6720e9208ee94747), C64e(0x3828182820181010), - C64e(0x0b64d564ded56f6f), C64e(0x73838883fb88f0f0), - C64e(0xfbb16fb1946f4a4a), C64e(0xca967296b8725c5c), - C64e(0x546c246c70243838), C64e(0x5f08f108aef15757), - C64e(0x2152c752e6c77373), C64e(0x64f351f335519797), - C64e(0xae6523658d23cbcb), C64e(0x25847c84597ca1a1), - C64e(0x57bf9cbfcb9ce8e8), C64e(0x5d6321637c213e3e), - C64e(0xea7cdd7c37dd9696), C64e(0x1e7fdc7fc2dc6161), - C64e(0x9c9186911a860d0d), C64e(0x9b9485941e850f0f), - C64e(0x4bab90abdb90e0e0), C64e(0xbac642c6f8427c7c), - C64e(0x2657c457e2c47171), C64e(0x29e5aae583aacccc), - C64e(0xe373d8733bd89090), C64e(0x090f050f0c050606), - C64e(0xf4030103f501f7f7), C64e(0x2a36123638121c1c), - C64e(0x3cfea3fe9fa3c2c2), C64e(0x8be15fe1d45f6a6a), - C64e(0xbe10f91047f9aeae), C64e(0x026bd06bd2d06969), - C64e(0xbfa891a82e911717), C64e(0x71e858e829589999), - C64e(0x5369276974273a3a), C64e(0xf7d0b9d04eb92727), - C64e(0x91483848a938d9d9), C64e(0xde351335cd13ebeb), - C64e(0xe5ceb3ce56b32b2b), C64e(0x7755335544332222), - C64e(0x04d6bbd6bfbbd2d2), C64e(0x399070904970a9a9), - C64e(0x878089800e890707), C64e(0xc1f2a7f266a73333), - C64e(0xecc1b6c15ab62d2d), C64e(0x5a66226678223c3c), - C64e(0xb8ad92ad2a921515), C64e(0xa96020608920c9c9), - C64e(0x5cdb49db15498787), C64e(0xb01aff1a4fffaaaa), - C64e(0xd8887888a0785050), C64e(0x2b8e7a8e517aa5a5), - C64e(0x898a8f8a068f0303), C64e(0x4a13f813b2f85959), - C64e(0x929b809b12800909), C64e(0x2339173934171a1a), - C64e(0x1075da75cada6565), C64e(0x84533153b531d7d7), - C64e(0xd551c65113c68484), C64e(0x03d3b8d3bbb8d0d0), - C64e(0xdc5ec35e1fc38282), C64e(0xe2cbb0cb52b02929), - C64e(0xc3997799b4775a5a), C64e(0x2d3311333c111e1e), - C64e(0x3d46cb46f6cb7b7b), C64e(0xb71ffc1f4bfca8a8), - C64e(0x0c61d661dad66d6d), C64e(0x624e3a4e583a2c2c) -}; -*/ -#define RBTT(d, b0, b1, b2, b3, b4, b5, b6, b7) do { \ - d = T0[t0[b0]] \ - ^ T1[t1[b1]] \ - ^ T2[t2[b2]] \ - ^ T3[t3[b3]] \ - ^ T4[t4[b4]] \ - ^ T5[t5[b5]] \ - ^ T6[t6[b6]] \ - ^ T7[t7[b7]]; \ - } while (0) - -#define ROUND_BIG_P(a, r) do { \ - t0[0x0] = B64_0(a[0x0]) ^ PC64(0x00, r); \ - t1[0x0] = B64_1(a[0x0]); \ - t2[0x0] = B64_2(a[0x0]); \ - t3[0x0] = B64_3(a[0x0]); \ - t4[0x0] = B64_4(a[0x0]); \ - t5[0x0] = B64_5(a[0x0]); \ - t6[0x0] = B64_6(a[0x0]); \ - t7[0x0] = B64_7(a[0x0]); \ - t0[0x1] = B64_0(a[0x1]) ^ PC64(0x10, r); \ - t1[0x1] = B64_1(a[0x1]); \ - t2[0x1] = B64_2(a[0x1]); \ - t3[0x1] = B64_3(a[0x1]); \ - t4[0x1] = B64_4(a[0x1]); \ - t5[0x1] = B64_5(a[0x1]); \ - t6[0x1] = B64_6(a[0x1]); \ - t7[0x1] = B64_7(a[0x1]); \ - t0[0x2] = B64_0(a[0x2]) ^ PC64(0x20, r); \ - t1[0x2] = B64_1(a[0x2]); \ - t2[0x2] = B64_2(a[0x2]); \ - t3[0x2] = B64_3(a[0x2]); \ - t4[0x2] = B64_4(a[0x2]); \ - t5[0x2] = B64_5(a[0x2]); \ - t6[0x2] = B64_6(a[0x2]); \ - t7[0x2] = B64_7(a[0x2]); \ - t0[0x3] = B64_0(a[0x3]) ^ PC64(0x30, r); \ - t1[0x3] = B64_1(a[0x3]); \ - t2[0x3] = B64_2(a[0x3]); \ - t3[0x3] = B64_3(a[0x3]); \ - t4[0x3] = B64_4(a[0x3]); \ - t5[0x3] = B64_5(a[0x3]); \ - t6[0x3] = B64_6(a[0x3]); \ - t7[0x3] = B64_7(a[0x3]); \ - t0[0x4] = B64_0(a[0x4]) ^ PC64(0x40, r); \ - t1[0x4] = B64_1(a[0x4]); \ - t2[0x4] = B64_2(a[0x4]); \ - t3[0x4] = B64_3(a[0x4]); \ - t4[0x4] = B64_4(a[0x4]); \ - t5[0x4] = B64_5(a[0x4]); \ - t6[0x4] = B64_6(a[0x4]); \ - t7[0x4] = B64_7(a[0x4]); \ - t0[0x5] = B64_0(a[0x5]) ^ PC64(0x50, r); \ - t1[0x5] = B64_1(a[0x5]); \ - t2[0x5] = B64_2(a[0x5]); \ - t3[0x5] = B64_3(a[0x5]); \ - t4[0x5] = B64_4(a[0x5]); \ - t5[0x5] = B64_5(a[0x5]); \ - t6[0x5] = B64_6(a[0x5]); \ - t7[0x5] = B64_7(a[0x5]); \ - t0[0x6] = B64_0(a[0x6]) ^ PC64(0x60, r); \ - t1[0x6] = B64_1(a[0x6]); \ - t2[0x6] = B64_2(a[0x6]); \ - t3[0x6] = B64_3(a[0x6]); \ - t4[0x6] = B64_4(a[0x6]); \ - t5[0x6] = B64_5(a[0x6]); \ - t6[0x6] = B64_6(a[0x6]); \ - t7[0x6] = B64_7(a[0x6]); \ - t0[0x7] = B64_0(a[0x7]) ^ PC64(0x70, r); \ - t1[0x7] = B64_1(a[0x7]); \ - t2[0x7] = B64_2(a[0x7]); \ - t3[0x7] = B64_3(a[0x7]); \ - t4[0x7] = B64_4(a[0x7]); \ - t5[0x7] = B64_5(a[0x7]); \ - t6[0x7] = B64_6(a[0x7]); \ - t7[0x7] = B64_7(a[0x7]); \ - t0[0x8] = B64_0(a[0x8]) ^ PC64(0x80, r); \ - t1[0x8] = B64_1(a[0x8]); \ - t2[0x8] = B64_2(a[0x8]); \ - t3[0x8] = B64_3(a[0x8]); \ - t4[0x8] = B64_4(a[0x8]); \ - t5[0x8] = B64_5(a[0x8]); \ - t6[0x8] = B64_6(a[0x8]); \ - t7[0x8] = B64_7(a[0x8]); \ - t0[0x9] = B64_0(a[0x9]) ^ PC64(0x90, r); \ - t1[0x9] = B64_1(a[0x9]); \ - t2[0x9] = B64_2(a[0x9]); \ - t3[0x9] = B64_3(a[0x9]); \ - t4[0x9] = B64_4(a[0x9]); \ - t5[0x9] = B64_5(a[0x9]); \ - t6[0x9] = B64_6(a[0x9]); \ - t7[0x9] = B64_7(a[0x9]); \ - t0[0xA] = B64_0(a[0xA]) ^ PC64(0xA0, r); \ - t1[0xA] = B64_1(a[0xA]); \ - t2[0xA] = B64_2(a[0xA]); \ - t3[0xA] = B64_3(a[0xA]); \ - t4[0xA] = B64_4(a[0xA]); \ - t5[0xA] = B64_5(a[0xA]); \ - t6[0xA] = B64_6(a[0xA]); \ - t7[0xA] = B64_7(a[0xA]); \ - t0[0xB] = B64_0(a[0xB]) ^ PC64(0xB0, r); \ - t1[0xB] = B64_1(a[0xB]); \ - t2[0xB] = B64_2(a[0xB]); \ - t3[0xB] = B64_3(a[0xB]); \ - t4[0xB] = B64_4(a[0xB]); \ - t5[0xB] = B64_5(a[0xB]); \ - t6[0xB] = B64_6(a[0xB]); \ - t7[0xB] = B64_7(a[0xB]); \ - t0[0xC] = B64_0(a[0xC]) ^ PC64(0xC0, r); \ - t1[0xC] = B64_1(a[0xC]); \ - t2[0xC] = B64_2(a[0xC]); \ - t3[0xC] = B64_3(a[0xC]); \ - t4[0xC] = B64_4(a[0xC]); \ - t5[0xC] = B64_5(a[0xC]); \ - t6[0xC] = B64_6(a[0xC]); \ - t7[0xC] = B64_7(a[0xC]); \ - t0[0xD] = B64_0(a[0xD]) ^ PC64(0xD0, r); \ - t1[0xD] = B64_1(a[0xD]); \ - t2[0xD] = B64_2(a[0xD]); \ - t3[0xD] = B64_3(a[0xD]); \ - t4[0xD] = B64_4(a[0xD]); \ - t5[0xD] = B64_5(a[0xD]); \ - t6[0xD] = B64_6(a[0xD]); \ - t7[0xD] = B64_7(a[0xD]); \ - t0[0xE] = B64_0(a[0xE]) ^ PC64(0xE0, r); \ - t1[0xE] = B64_1(a[0xE]); \ - t2[0xE] = B64_2(a[0xE]); \ - t3[0xE] = B64_3(a[0xE]); \ - t4[0xE] = B64_4(a[0xE]); \ - t5[0xE] = B64_5(a[0xE]); \ - t6[0xE] = B64_6(a[0xE]); \ - t7[0xE] = B64_7(a[0xE]); \ - t0[0xF] = B64_0(a[0xF]) ^ PC64(0xF0, r); \ - t1[0xF] = B64_1(a[0xF]); \ - t2[0xF] = B64_2(a[0xF]); \ - t3[0xF] = B64_3(a[0xF]); \ - t4[0xF] = B64_4(a[0xF]); \ - t5[0xF] = B64_5(a[0xF]); \ - t6[0xF] = B64_6(a[0xF]); \ - t7[0xF] = B64_7(a[0xF]); \ - RBTT(a[0x0], 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xB); \ - RBTT(a[0x1], 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xC); \ - RBTT(a[0x2], 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0xD); \ - RBTT(a[0x3], 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xE); \ - RBTT(a[0x4], 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xF); \ - RBTT(a[0x5], 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0x0); \ - RBTT(a[0x6], 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0x1); \ - RBTT(a[0x7], 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0x2); \ - RBTT(a[0x8], 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0x3); \ - RBTT(a[0x9], 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x4); \ - RBTT(a[0xA], 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x5); \ - RBTT(a[0xB], 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x6); \ - RBTT(a[0xC], 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x7); \ - RBTT(a[0xD], 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x8); \ - RBTT(a[0xE], 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x9); \ - RBTT(a[0xF], 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xA); \ - } while (0) - -#define ROUND_BIG_Q(a, r) do { \ - a[0x0] ^= QC64(0x00, r); \ - a[0x1] ^= QC64(0x10, r); \ - a[0x2] ^= QC64(0x20, r); \ - a[0x3] ^= QC64(0x30, r); \ - a[0x4] ^= QC64(0x40, r); \ - a[0x5] ^= QC64(0x50, r); \ - a[0x6] ^= QC64(0x60, r); \ - a[0x7] ^= QC64(0x70, r); \ - a[0x8] ^= QC64(0x80, r); \ - a[0x9] ^= QC64(0x90, r); \ - a[0xA] ^= QC64(0xA0, r); \ - a[0xB] ^= QC64(0xB0, r); \ - a[0xC] ^= QC64(0xC0, r); \ - a[0xD] ^= QC64(0xD0, r); \ - a[0xE] ^= QC64(0xE0, r); \ - a[0xF] ^= QC64(0xF0, r); \ - t0[0x0] = B64_0(a[0x0]); \ - t1[0x0] = B64_1(a[0x0]); \ - t2[0x0] = B64_2(a[0x0]); \ - t3[0x0] = B64_3(a[0x0]); \ - t4[0x0] = B64_4(a[0x0]); \ - t5[0x0] = B64_5(a[0x0]); \ - t6[0x0] = B64_6(a[0x0]); \ - t7[0x0] = B64_7(a[0x0]); \ - t0[0x1] = B64_0(a[0x1]); \ - t1[0x1] = B64_1(a[0x1]); \ - t2[0x1] = B64_2(a[0x1]); \ - t3[0x1] = B64_3(a[0x1]); \ - t4[0x1] = B64_4(a[0x1]); \ - t5[0x1] = B64_5(a[0x1]); \ - t6[0x1] = B64_6(a[0x1]); \ - t7[0x1] = B64_7(a[0x1]); \ - t0[0x2] = B64_0(a[0x2]); \ - t1[0x2] = B64_1(a[0x2]); \ - t2[0x2] = B64_2(a[0x2]); \ - t3[0x2] = B64_3(a[0x2]); \ - t4[0x2] = B64_4(a[0x2]); \ - t5[0x2] = B64_5(a[0x2]); \ - t6[0x2] = B64_6(a[0x2]); \ - t7[0x2] = B64_7(a[0x2]); \ - t0[0x3] = B64_0(a[0x3]); \ - t1[0x3] = B64_1(a[0x3]); \ - t2[0x3] = B64_2(a[0x3]); \ - t3[0x3] = B64_3(a[0x3]); \ - t4[0x3] = B64_4(a[0x3]); \ - t5[0x3] = B64_5(a[0x3]); \ - t6[0x3] = B64_6(a[0x3]); \ - t7[0x3] = B64_7(a[0x3]); \ - t0[0x4] = B64_0(a[0x4]); \ - t1[0x4] = B64_1(a[0x4]); \ - t2[0x4] = B64_2(a[0x4]); \ - t3[0x4] = B64_3(a[0x4]); \ - t4[0x4] = B64_4(a[0x4]); \ - t5[0x4] = B64_5(a[0x4]); \ - t6[0x4] = B64_6(a[0x4]); \ - t7[0x4] = B64_7(a[0x4]); \ - t0[0x5] = B64_0(a[0x5]); \ - t1[0x5] = B64_1(a[0x5]); \ - t2[0x5] = B64_2(a[0x5]); \ - t3[0x5] = B64_3(a[0x5]); \ - t4[0x5] = B64_4(a[0x5]); \ - t5[0x5] = B64_5(a[0x5]); \ - t6[0x5] = B64_6(a[0x5]); \ - t7[0x5] = B64_7(a[0x5]); \ - t0[0x6] = B64_0(a[0x6]); \ - t1[0x6] = B64_1(a[0x6]); \ - t2[0x6] = B64_2(a[0x6]); \ - t3[0x6] = B64_3(a[0x6]); \ - t4[0x6] = B64_4(a[0x6]); \ - t5[0x6] = B64_5(a[0x6]); \ - t6[0x6] = B64_6(a[0x6]); \ - t7[0x6] = B64_7(a[0x6]); \ - t0[0x7] = B64_0(a[0x7]); \ - t1[0x7] = B64_1(a[0x7]); \ - t2[0x7] = B64_2(a[0x7]); \ - t3[0x7] = B64_3(a[0x7]); \ - t4[0x7] = B64_4(a[0x7]); \ - t5[0x7] = B64_5(a[0x7]); \ - t6[0x7] = B64_6(a[0x7]); \ - t7[0x7] = B64_7(a[0x7]); \ - t0[0x8] = B64_0(a[0x8]); \ - t1[0x8] = B64_1(a[0x8]); \ - t2[0x8] = B64_2(a[0x8]); \ - t3[0x8] = B64_3(a[0x8]); \ - t4[0x8] = B64_4(a[0x8]); \ - t5[0x8] = B64_5(a[0x8]); \ - t6[0x8] = B64_6(a[0x8]); \ - t7[0x8] = B64_7(a[0x8]); \ - t0[0x9] = B64_0(a[0x9]); \ - t1[0x9] = B64_1(a[0x9]); \ - t2[0x9] = B64_2(a[0x9]); \ - t3[0x9] = B64_3(a[0x9]); \ - t4[0x9] = B64_4(a[0x9]); \ - t5[0x9] = B64_5(a[0x9]); \ - t6[0x9] = B64_6(a[0x9]); \ - t7[0x9] = B64_7(a[0x9]); \ - t0[0xA] = B64_0(a[0xA]); \ - t1[0xA] = B64_1(a[0xA]); \ - t2[0xA] = B64_2(a[0xA]); \ - t3[0xA] = B64_3(a[0xA]); \ - t4[0xA] = B64_4(a[0xA]); \ - t5[0xA] = B64_5(a[0xA]); \ - t6[0xA] = B64_6(a[0xA]); \ - t7[0xA] = B64_7(a[0xA]); \ - t0[0xB] = B64_0(a[0xB]); \ - t1[0xB] = B64_1(a[0xB]); \ - t2[0xB] = B64_2(a[0xB]); \ - t3[0xB] = B64_3(a[0xB]); \ - t4[0xB] = B64_4(a[0xB]); \ - t5[0xB] = B64_5(a[0xB]); \ - t6[0xB] = B64_6(a[0xB]); \ - t7[0xB] = B64_7(a[0xB]); \ - t0[0xC] = B64_0(a[0xC]); \ - t1[0xC] = B64_1(a[0xC]); \ - t2[0xC] = B64_2(a[0xC]); \ - t3[0xC] = B64_3(a[0xC]); \ - t4[0xC] = B64_4(a[0xC]); \ - t5[0xC] = B64_5(a[0xC]); \ - t6[0xC] = B64_6(a[0xC]); \ - t7[0xC] = B64_7(a[0xC]); \ - t0[0xD] = B64_0(a[0xD]); \ - t1[0xD] = B64_1(a[0xD]); \ - t2[0xD] = B64_2(a[0xD]); \ - t3[0xD] = B64_3(a[0xD]); \ - t4[0xD] = B64_4(a[0xD]); \ - t5[0xD] = B64_5(a[0xD]); \ - t6[0xD] = B64_6(a[0xD]); \ - t7[0xD] = B64_7(a[0xD]); \ - t0[0xE] = B64_0(a[0xE]); \ - t1[0xE] = B64_1(a[0xE]); \ - t2[0xE] = B64_2(a[0xE]); \ - t3[0xE] = B64_3(a[0xE]); \ - t4[0xE] = B64_4(a[0xE]); \ - t5[0xE] = B64_5(a[0xE]); \ - t6[0xE] = B64_6(a[0xE]); \ - t7[0xE] = B64_7(a[0xE]); \ - t0[0xF] = B64_0(a[0xF]); \ - t1[0xF] = B64_1(a[0xF]); \ - t2[0xF] = B64_2(a[0xF]); \ - t3[0xF] = B64_3(a[0xF]); \ - t4[0xF] = B64_4(a[0xF]); \ - t5[0xF] = B64_5(a[0xF]); \ - t6[0xF] = B64_6(a[0xF]); \ - t7[0xF] = B64_7(a[0xF]); \ - RBTT(a[0x0], 0x1, 0x3, 0x5, 0xB, 0x0, 0x2, 0x4, 0x6); \ - RBTT(a[0x1], 0x2, 0x4, 0x6, 0xC, 0x1, 0x3, 0x5, 0x7); \ - RBTT(a[0x2], 0x3, 0x5, 0x7, 0xD, 0x2, 0x4, 0x6, 0x8); \ - RBTT(a[0x3], 0x4, 0x6, 0x8, 0xE, 0x3, 0x5, 0x7, 0x9); \ - RBTT(a[0x4], 0x5, 0x7, 0x9, 0xF, 0x4, 0x6, 0x8, 0xA); \ - RBTT(a[0x5], 0x6, 0x8, 0xA, 0x0, 0x5, 0x7, 0x9, 0xB); \ - RBTT(a[0x6], 0x7, 0x9, 0xB, 0x1, 0x6, 0x8, 0xA, 0xC); \ - RBTT(a[0x7], 0x8, 0xA, 0xC, 0x2, 0x7, 0x9, 0xB, 0xD); \ - RBTT(a[0x8], 0x9, 0xB, 0xD, 0x3, 0x8, 0xA, 0xC, 0xE); \ - RBTT(a[0x9], 0xA, 0xC, 0xE, 0x4, 0x9, 0xB, 0xD, 0xF); \ - RBTT(a[0xA], 0xB, 0xD, 0xF, 0x5, 0xA, 0xC, 0xE, 0x0); \ - RBTT(a[0xB], 0xC, 0xE, 0x0, 0x6, 0xB, 0xD, 0xF, 0x1); \ - RBTT(a[0xC], 0xD, 0xF, 0x1, 0x7, 0xC, 0xE, 0x0, 0x2); \ - RBTT(a[0xD], 0xE, 0x0, 0x2, 0x8, 0xD, 0xF, 0x1, 0x3); \ - RBTT(a[0xE], 0xF, 0x1, 0x3, 0x9, 0xE, 0x0, 0x2, 0x4); \ - RBTT(a[0xF], 0x0, 0x2, 0x4, 0xA, 0xF, 0x1, 0x3, 0x5); \ - } while (0) - -#define PERM_BIG_P(a, start, end) do { \ - for (u = start; u < end; u++) { \ - ROUND_BIG_P(a, u); \ - } \ - } while (0) +#ifndef GROESTLCOIN_CL +#define GROESTLCOIN_CL + +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64; +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_ECHO_64 1 +#define SPH_SIMD_NOCOPY 0 +#define SPH_CUBEHASH_UNROLL 0 + +#ifndef SPH_LUFFA_PARALLEL + #define SPH_LUFFA_PARALLEL 0 +#endif + +#include "groestl.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define ENC64E(x) SWAP8(x) + #define DEC64E(x) SWAP8(*(const __global sph_u64 *) (x)); +#else + #define ENC64E(x) (x) + #define DEC64E(x) (*(const __global sph_u64 *) (x)); +#endif + +#define ROL32(x, n) rotate(x, (uint) n) +#define SHR(x, n) ((x) >> n) +#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) + +#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) +#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) + +#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) +#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) + +#define P(a,b,c,d,e,f,g,h,x,K) \ +{ \ + temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ + d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ +} -#define PERM_BIG_Q(a) do { \ - /* for (ulong u = 0; u < (14UL << 56); u += (1UL << 56)) { */ \ - for (u = 0; u < 14; u++) { \ - ROUND_BIG_Q(a, u); \ - } \ - } while (0) +#define PLAST(a,b,c,d,e,f,g,h,x,K) \ +{ \ + d += h + S3(e) + F1(e,f,g) + (x + K); \ +} +#define F0(y, x, z) bitselect(z, y, z ^ x) +#define F1(x, y, z) bitselect(z, y, x) + +#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) +#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) +#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) +#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) +#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) +#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) +#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) +#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) +#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) +#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) +#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) +#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) +#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) +#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) +#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) +#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) + +#define RD14 (S1(W12) + W7 + S0(W15) + W14) +#define RD15 (S1(W13) + W8 + S0(W0) + W15) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target) { - __local ulong T2[256], T3[256], T4[256], T5[256], T6[256], T7[256]; - uint u; - - // for (u = get_local_id(0); u < 256; u += get_local_size(0)) { - u = get_local_id(0); - /* - T1[u] = T1_G[u]; - T2[u] = T2_G[u]; - T3[u] = T3_G[u]; - T4[u] = T4_G[u]; - T5[u] = T5_G[u]; - T6[u] = T6_G[u]; - T7[u] = T7_G[u]; - */ - // create other tables based on T0: avoids keeping them in the kernel. -// T1[u] = ROTL64(T0[u], 8UL); - T2[u] = ROTL64(T0[u], 16UL); - T3[u] = ROTL64(T0[u], 24UL); - T4[u] = ROTL64(T0[u], 32UL); - T5[u] = ROTL64(T0[u], 40UL); - T6[u] = ROTL64(T0[u], 48UL); - T7[u] = ROTL64(T0[u], 56UL); - barrier(CLK_LOCAL_MEM_FENCE); - - ulong g[16], m[16], t0[16], t1[16], t2[16], t3[16], t4[16], t5[16], t6[16], t7[16]; - uint flag = 0, gid = get_global_id(0), r = 13; - +__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; + } hash; + +#if !SPH_SMALL_FOOTPRINT_GROESTL + __local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256]; + __local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256]; +#else + __local sph_u64 T0_C[256], T4_C[256]; +#endif + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + T0_C[i] = T0[i]; + T4_C[i] = T4[i]; +#if !SPH_SMALL_FOOTPRINT_GROESTL + T1_C[i] = T1[i]; + T2_C[i] = T2[i]; + T3_C[i] = T3[i]; + T5_C[i] = T5[i]; + T6_C[i] = T6[i]; + T7_C[i] = T7[i]; +#endif + } + barrier(CLK_LOCAL_MEM_FENCE); // groestl +#define T0 T0_C +#define T1 T1_C +#define T2 T2_C +#define T3 T3_C +#define T4 T4_C +#define T5 T5_C +#define T6 T6_C +#define T7 T7_C + + + // groestl + { + sph_u64 H[16]; +//#pragma unroll 15 + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; +#if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); +#else + H[15] = (sph_u64)512; +#endif + + sph_u64 g[16], m[16]; m[0] = DEC64E(block + 0 * 8); m[1] = DEC64E(block + 1 * 8); m[2] = DEC64E(block + 2 * 8); @@ -1480,374 +195,93 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp m[8] = DEC64E(block + 8 * 8); m[9] = DEC64E(block + 9 * 8); m[9] &= 0x00000000FFFFFFFF; - m[9] |= ((ulong) gid << 32); - m[10] = 0x80; - -perm: - m[11] = 0; - m[12] = 0; - m[13] = 0; - m[14] = 0; - m[15] = M15; - -#pragma unroll - for (u = 0; u < 15; u++) g[u] = m[u]; - g[15] = M15 ^ H15; - - - g[0x0] ^= PC64(0x00, 0); - g[0x1] ^= PC64(0x10, 0); - g[0x2] ^= PC64(0x20, 0); - g[0x3] ^= PC64(0x30, 0); - g[0x4] ^= PC64(0x40, 0); - g[0x5] ^= PC64(0x50, 0); - g[0x6] ^= PC64(0x60, 0); - g[0x7] ^= PC64(0x70, 0); - g[0x8] ^= PC64(0x80, 0); - g[0x9] ^= PC64(0x90, 0); - g[0xA] ^= PC64(0xA0, 0); - g[0xB] = PC64(0xB0, 0); - g[0xC] = PC64(0xC0, 0); - g[0xD] = PC64(0xD0, 0); - g[0xE] = PC64(0xE0, 0); - g[0xF] ^= PC64(0xF0, 0); - t0[0x0] = B64_0(g[0x0]); - t1[0x0] = B64_1(g[0x0]); - t2[0x0] = B64_2(g[0x0]); - t3[0x0] = B64_3(g[0x0]); - t4[0x0] = B64_4(g[0x0]); - t5[0x0] = B64_5(g[0x0]); - t6[0x0] = B64_6(g[0x0]); - t7[0x0] = B64_7(g[0x0]); - t0[0x1] = B64_0(g[0x1]); - t1[0x1] = B64_1(g[0x1]); - t2[0x1] = B64_2(g[0x1]); - t3[0x1] = B64_3(g[0x1]); - t4[0x1] = B64_4(g[0x1]); - t5[0x1] = B64_5(g[0x1]); - t6[0x1] = B64_6(g[0x1]); - t7[0x1] = B64_7(g[0x1]); - t0[0x2] = B64_0(g[0x2]); - t1[0x2] = B64_1(g[0x2]); - t2[0x2] = B64_2(g[0x2]); - t3[0x2] = B64_3(g[0x2]); - t4[0x2] = B64_4(g[0x2]); - t5[0x2] = B64_5(g[0x2]); - t6[0x2] = B64_6(g[0x2]); - t7[0x2] = B64_7(g[0x2]); - t0[0x3] = B64_0(g[0x3]); - t1[0x3] = B64_1(g[0x3]); - t2[0x3] = B64_2(g[0x3]); - t3[0x3] = B64_3(g[0x3]); - t4[0x3] = B64_4(g[0x3]); - t5[0x3] = B64_5(g[0x3]); - t6[0x3] = B64_6(g[0x3]); - t7[0x3] = B64_7(g[0x3]); - t0[0x4] = B64_0(g[0x4]); - t1[0x4] = B64_1(g[0x4]); - t2[0x4] = B64_2(g[0x4]); - t3[0x4] = B64_3(g[0x4]); - t4[0x4] = B64_4(g[0x4]); - t5[0x4] = B64_5(g[0x4]); - t6[0x4] = B64_6(g[0x4]); - t7[0x4] = B64_7(g[0x4]); - t0[0x5] = B64_0(g[0x5]); - t1[0x5] = B64_1(g[0x5]); - t2[0x5] = B64_2(g[0x5]); - t3[0x5] = B64_3(g[0x5]); - t4[0x5] = B64_4(g[0x5]); - t5[0x5] = B64_5(g[0x5]); - t6[0x5] = B64_6(g[0x5]); - t7[0x5] = B64_7(g[0x5]); - t0[0x6] = B64_0(g[0x6]); - t1[0x6] = B64_1(g[0x6]); - t2[0x6] = B64_2(g[0x6]); - t3[0x6] = B64_3(g[0x6]); - t4[0x6] = B64_4(g[0x6]); - t5[0x6] = B64_5(g[0x6]); - t6[0x6] = B64_6(g[0x6]); - t7[0x6] = B64_7(g[0x6]); - t0[0x7] = B64_0(g[0x7]); - t1[0x7] = B64_1(g[0x7]); - t2[0x7] = B64_2(g[0x7]); - t3[0x7] = B64_3(g[0x7]); - t4[0x7] = B64_4(g[0x7]); - t5[0x7] = B64_5(g[0x7]); - t6[0x7] = B64_6(g[0x7]); - t7[0x7] = B64_7(g[0x7]); - t0[0x8] = B64_0(g[0x8]); - t1[0x8] = B64_1(g[0x8]); - t2[0x8] = B64_2(g[0x8]); - t3[0x8] = B64_3(g[0x8]); - t4[0x8] = B64_4(g[0x8]); - t5[0x8] = B64_5(g[0x8]); - t6[0x8] = B64_6(g[0x8]); - t7[0x8] = B64_7(g[0x8]); - t0[0x9] = B64_0(g[0x9]); - t1[0x9] = B64_1(g[0x9]); - t2[0x9] = B64_2(g[0x9]); - t3[0x9] = B64_3(g[0x9]); - t4[0x9] = B64_4(g[0x9]); - t5[0x9] = B64_5(g[0x9]); - t6[0x9] = B64_6(g[0x9]); - t7[0x9] = B64_7(g[0x9]); - t0[0xA] = B64_0(g[0xA]); - t1[0xA] = B64_1(g[0xA]); - t2[0xA] = B64_2(g[0xA]); - t3[0xA] = B64_3(g[0xA]); - t4[0xA] = B64_4(g[0xA]); - t5[0xA] = B64_5(g[0xA]); - t6[0xA] = B64_6(g[0xA]); - t7[0xA] = B64_7(g[0xA]); - t0[0xB] = B64_0(g[0xB]); - t1[0xB] = B64_1(g[0xB]); - t2[0xB] = B64_2(g[0xB]); - t3[0xB] = B64_3(g[0xB]); - t4[0xB] = B64_4(g[0xB]); - t5[0xB] = B64_5(g[0xB]); - t6[0xB] = B64_6(g[0xB]); - t7[0xB] = B64_7(g[0xB]); - t0[0xC] = B64_0(g[0xC]); - t1[0xC] = B64_1(g[0xC]); - t2[0xC] = B64_2(g[0xC]); - t3[0xC] = B64_3(g[0xC]); - t4[0xC] = B64_4(g[0xC]); - t5[0xC] = B64_5(g[0xC]); - t6[0xC] = B64_6(g[0xC]); - t7[0xC] = B64_7(g[0xC]); - t0[0xD] = B64_0(g[0xD]); - t1[0xD] = B64_1(g[0xD]); - t2[0xD] = B64_2(g[0xD]); - t3[0xD] = B64_3(g[0xD]); - t4[0xD] = B64_4(g[0xD]); - t5[0xD] = B64_5(g[0xD]); - t6[0xD] = B64_6(g[0xD]); - t7[0xD] = B64_7(g[0xD]); - t0[0xE] = B64_0(g[0xE]); - t1[0xE] = B64_1(g[0xE]); - t2[0xE] = B64_2(g[0xE]); - t3[0xE] = B64_3(g[0xE]); - t4[0xE] = B64_4(g[0xE]); - t5[0xE] = B64_5(g[0xE]); - t6[0xE] = B64_6(g[0xE]); - t7[0xE] = B64_7(g[0xE]); - t0[0xF] = B64_0(g[0xF]); - t1[0xF] = B64_1(g[0xF]); - t2[0xF] = B64_2(g[0xF]); - t3[0xF] = B64_3(g[0xF]); - t4[0xF] = B64_4(g[0xF]); - t5[0xF] = B64_5(g[0xF]); - t6[0xF] = B64_6(g[0xF]); - t7[0xF] = B64_7(g[0xF]); - g[0x0] = T0[t0[0x0]] ^ T1[t1[0x1]] ^ T2[t2[0x2]] ^ T3[t3[0x3]] ^ T4[t4[0x4]] ^ T5[t5[0x5]] ^ T6[t6[0x6]] ^ C64e(0x32f4a5f497a5c6c6); - g[0x1] = T0[t0[0x1]] ^ T1[t1[0x2]] ^ T2[t2[0x3]] ^ T3[t3[0x4]] ^ T4[t4[0x5]] ^ T5[t5[0x6]] ^ T6[t6[0x7]] ^ C64e(0x32f4a5f497a5c6c6); - g[0x2] = T0[t0[0x2]] ^ T1[t1[0x3]] ^ T2[t2[0x4]] ^ T3[t3[0x5]] ^ T4[t4[0x6]] ^ T5[t5[0x7]] ^ T6[t6[0x8]] ^ C64e(0x32f4a5f497a5c6c6); - g[0x3] = T0[t0[0x3]] ^ T1[t1[0x4]] ^ T2[t2[0x5]] ^ T3[t3[0x6]] ^ T4[t4[0x7]] ^ T5[t5[0x8]] ^ T6[t6[0x9]] ^ C64e(0x32f4a5f497a5c6c6); - g[0x4] = T0[t0[0x4]] ^ T1[t1[0x5]] ^ T2[t2[0x6]] ^ T3[t3[0x7]] ^ T4[t4[0x8]] ^ T5[t5[0x9]] ^ C64e(0xf4a5f497a5c6c632) ^ T7[t7[0xF]]; - g[0x5] = T0[t0[0x5]] ^ T1[t1[0x6]] ^ T2[t2[0x7]] ^ T3[t3[0x8]] ^ T4[t4[0x9]] ^ C64e(0xa5f497a5c6c632f4) ^ C64e(0xf4a5f497a5c6c632) ^ T7[t7[0x0]]; - g[0x6] = T0[t0[0x6]] ^ T1[t1[0x7]] ^ T2[t2[0x8]] ^ T3[t3[0x9]] ^ C64e(0xf497a5c6c632f4a5) ^ C64e(0xa5f497a5c6c632f4) ^ C64e(0xf4a5f497a5c6c632) ^ T7[t7[0x1]]; - g[0x7] = T0[t0[0x7]] ^ T1[t1[0x8]] ^ T2[t2[0x9]] ^ C64e(0x97a5c6c632f4a5f4) ^ C64e(0xf497a5c6c632f4a5) ^ C64e(0xa5f497a5c6c632f4) ^ C64e(0xf4a5f497a5c6c632) ^ T7[t7[0x2]]; - g[0x8] = T0[t0[0x8]] ^ T1[t1[0x9]] ^ C64e(0xa5c6c632f4a5f497) ^ C64e(0x97a5c6c632f4a5f4) ^ C64e(0xf497a5c6c632f4a5) ^ C64e(0xa5f497a5c6c632f4) ^ C64e(0xf4a5f497a5c6c632) ^ T7[t7[0x3]]; - g[0x9] = T0[t0[0x9]] ^ C64e(0xc6c632f4a5f497a5) ^ C64e(0xa5c6c632f4a5f497) ^ C64e(0x97a5c6c632f4a5f4) ^ C64e(0xf497a5c6c632f4a5) ^ C64e(0xa5f497a5c6c632f4) ^ T6[t6[0xF]] ^ T7[t7[0x4]]; - g[0xA] = T0[t0[0xA]] ^ C64e(0xc6c632f4a5f497a5) ^ C64e(0xa5c6c632f4a5f497) ^ C64e(0x97a5c6c632f4a5f4) ^ C64e(0xf497a5c6c632f4a5) ^ T5[t5[0xF]] ^ T6[t6[0x0]] ^ T7[t7[0x5]]; - g[0xB] = T0[t0[0xB]] ^ C64e(0xc6c632f4a5f497a5) ^ C64e(0xa5c6c632f4a5f497) ^ C64e(0x97a5c6c632f4a5f4) ^ T4[t4[0xF]] ^ T5[t5[0x0]] ^ T6[t6[0x1]] ^ T7[t7[0x6]]; - g[0xC] = T0[t0[0xC]] ^ C64e(0xc6c632f4a5f497a5) ^ C64e(0xa5c6c632f4a5f497) ^ T3[t3[0xF]] ^ T4[t4[0x0]] ^ T5[t5[0x1]] ^ T6[t6[0x2]] ^ T7[t7[0x7]]; - g[0xD] = T0[t0[0xD]] ^ C64e(0xc6c632f4a5f497a5) ^ T2[t2[0xF]] ^ T3[t3[0x0]] ^ T4[t4[0x1]] ^ T5[t5[0x2]] ^ T6[t6[0x3]] ^ T7[t7[0x8]]; - g[0xE] = T0[t0[0xE]] ^ T1[t1[0xF]] ^ T2[t2[0x0]] ^ T3[t3[0x1]] ^ T4[t4[0x2]] ^ T5[t5[0x3]] ^ T6[t6[0x4]] ^ T7[t7[0x9]]; - g[0xF] = T0[t0[0xF]] ^ T1[t1[0x0]] ^ T2[t2[0x1]] ^ T3[t3[0x2]] ^ T4[t4[0x3]] ^ T5[t5[0x4]] ^ T6[t6[0x5]] ^ T7[t7[0xA]]; - + m[9] |= ((sph_u64) gid << 32); + +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[10] = 0x80; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); - PERM_BIG_P(g, 1, 14); +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + +//#pragma unroll 8 + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = ENC64E(H[u + 8]); + +//#pragma unroll 15 + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; + #if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + #else + H[15] = (sph_u64)512; + #endif + + m[0] = hash.h8[0]; + m[1] = hash.h8[1]; + m[2] = hash.h8[2]; + m[3] = hash.h8[3]; + m[4] = hash.h8[4]; + m[5] = hash.h8[5]; + m[6] = hash.h8[6]; + m[7] = hash.h8[7]; + +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); PERM_BIG_Q(m); -#pragma unroll - for (u = 0; u < 16; u++) g[u] ^= m[u]; -#pragma unroll - for (u = 0; u < 8; u++) m[u] = g[u + 8]; - g[15] ^= H15; - PERM_BIG_P(g, 0, r); +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; -round: -// move the ^= to the relevant first byte down here? tried that, was slower?!?!? - g[0x0] ^= PC64(0x00, r); - g[0x1] ^= PC64(0x10, r); - g[0x6] ^= PC64(0x60, r); - g[0xB] ^= PC64(0xB0, r); - g[0xC] ^= PC64(0xC0, r); - g[0xD] ^= PC64(0xD0, r); - g[0xE] ^= PC64(0xE0, r); - g[0xF] ^= PC64(0xF0, r); - t0[0x0] = B64_0(g[0x0]); - t1[0x0] = B64_1(g[0x0]); - t2[0x0] = B64_2(g[0x0]); - t3[0x0] = B64_3(g[0x0]); - t4[0x0] = B64_4(g[0x0]); - t5[0x0] = B64_5(g[0x0]); - t6[0x0] = B64_6(g[0x0]); - t7[0x0] = B64_7(g[0x0]); - t0[0x1] = B64_0(g[0x1]); - t1[0x1] = B64_1(g[0x1]); - t2[0x1] = B64_2(g[0x1]); - t3[0x1] = B64_3(g[0x1]); - t4[0x1] = B64_4(g[0x1]); - t5[0x1] = B64_5(g[0x1]); - t6[0x1] = B64_6(g[0x1]); - t7[0x1] = B64_7(g[0x1]); - t0[0x6] = B64_0(g[0x6]); - t1[0x6] = B64_1(g[0x6]); - t2[0x6] = B64_2(g[0x6]); - t3[0x6] = B64_3(g[0x6]); - t4[0x6] = B64_4(g[0x6]); - t5[0x6] = B64_5(g[0x6]); - t6[0x6] = B64_6(g[0x6]); - t7[0x6] = B64_7(g[0x6]); - t0[0xB] = B64_0(g[0xB]); - t1[0xB] = B64_1(g[0xB]); - t2[0xB] = B64_2(g[0xB]); - t3[0xB] = B64_3(g[0xB]); - t4[0xB] = B64_4(g[0xB]); - t5[0xB] = B64_5(g[0xB]); - t6[0xB] = B64_6(g[0xB]); - t7[0xB] = B64_7(g[0xB]); - t0[0xC] = B64_0(g[0xC]); - t1[0xC] = B64_1(g[0xC]); - t2[0xC] = B64_2(g[0xC]); - t3[0xC] = B64_3(g[0xC]); - t4[0xC] = B64_4(g[0xC]); - t5[0xC] = B64_5(g[0xC]); - t6[0xC] = B64_6(g[0xC]); - t7[0xC] = B64_7(g[0xC]); - t0[0xD] = B64_0(g[0xD]); - t1[0xD] = B64_1(g[0xD]); - t2[0xD] = B64_2(g[0xD]); - t3[0xD] = B64_3(g[0xD]); - t4[0xD] = B64_4(g[0xD]); - t5[0xD] = B64_5(g[0xD]); - t6[0xD] = B64_6(g[0xD]); - t7[0xD] = B64_7(g[0xD]); - t0[0xE] = B64_0(g[0xE]); - t1[0xE] = B64_1(g[0xE]); - t2[0xE] = B64_2(g[0xE]); - t3[0xE] = B64_3(g[0xE]); - t4[0xE] = B64_4(g[0xE]); - t5[0xE] = B64_5(g[0xE]); - t6[0xE] = B64_6(g[0xE]); - t7[0xE] = B64_7(g[0xE]); - t0[0xF] = B64_0(g[0xF]); - t1[0xF] = B64_1(g[0xF]); - t2[0xF] = B64_2(g[0xF]); - t3[0xF] = B64_3(g[0xF]); - t4[0xF] = B64_4(g[0xF]); - t5[0xF] = B64_5(g[0xF]); - t6[0xF] = B64_6(g[0xF]); - t7[0xF] = B64_7(g[0xF]); + //#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); - if (flag < 2) { - g[0x2] ^= PC64(0x20, r); - g[0x3] ^= PC64(0x30, r); - g[0x4] ^= PC64(0x40, r); - g[0x5] ^= PC64(0x50, r); - g[0x7] ^= PC64(0x70, r); - g[0x8] ^= PC64(0x80, r); - g[0x9] ^= PC64(0x90, r); - g[0xA] ^= PC64(0xA0, r); - t0[0x2] = B64_0(g[0x2]); - t1[0x2] = B64_1(g[0x2]); - t2[0x2] = B64_2(g[0x2]); - t3[0x2] = B64_3(g[0x2]); - t4[0x2] = B64_4(g[0x2]); - t5[0x2] = B64_5(g[0x2]); - t6[0x2] = B64_6(g[0x2]); - t7[0x2] = B64_7(g[0x2]); - t0[0x3] = B64_0(g[0x3]); - t1[0x3] = B64_1(g[0x3]); - t2[0x3] = B64_2(g[0x3]); - t3[0x3] = B64_3(g[0x3]); - t4[0x3] = B64_4(g[0x3]); - t5[0x3] = B64_5(g[0x3]); - t6[0x3] = B64_6(g[0x3]); - t7[0x3] = B64_7(g[0x3]); - t0[0x4] = B64_0(g[0x4]); - t1[0x4] = B64_1(g[0x4]); - t2[0x4] = B64_2(g[0x4]); - t3[0x4] = B64_3(g[0x4]); - t4[0x4] = B64_4(g[0x4]); - t5[0x4] = B64_5(g[0x4]); - t6[0x4] = B64_6(g[0x4]); - t7[0x4] = B64_7(g[0x4]); - t0[0x5] = B64_0(g[0x5]); - t1[0x5] = B64_1(g[0x5]); - t2[0x5] = B64_2(g[0x5]); - t3[0x5] = B64_3(g[0x5]); - t4[0x5] = B64_4(g[0x5]); - t5[0x5] = B64_5(g[0x5]); - t6[0x5] = B64_6(g[0x5]); - t7[0x5] = B64_7(g[0x5]); - t0[0x7] = B64_0(g[0x7]); - t1[0x7] = B64_1(g[0x7]); - t2[0x7] = B64_2(g[0x7]); - t3[0x7] = B64_3(g[0x7]); - t4[0x7] = B64_4(g[0x7]); - t5[0x7] = B64_5(g[0x7]); - t6[0x7] = B64_6(g[0x7]); - t7[0x7] = B64_7(g[0x7]); - t0[0x8] = B64_0(g[0x8]); - t1[0x8] = B64_1(g[0x8]); - t2[0x8] = B64_2(g[0x8]); - t3[0x8] = B64_3(g[0x8]); - t4[0x8] = B64_4(g[0x8]); - t5[0x8] = B64_5(g[0x8]); - t6[0x8] = B64_6(g[0x8]); - t7[0x8] = B64_7(g[0x8]); - t0[0x9] = B64_0(g[0x9]); - t1[0x9] = B64_1(g[0x9]); - t2[0x9] = B64_2(g[0x9]); - t3[0x9] = B64_3(g[0x9]); - t4[0x9] = B64_4(g[0x9]); - t5[0x9] = B64_5(g[0x9]); - t6[0x9] = B64_6(g[0x9]); - t7[0x9] = B64_7(g[0x9]); - t0[0xA] = B64_0(g[0xA]); - t1[0xA] = B64_1(g[0xA]); - t2[0xA] = B64_2(g[0xA]); - t3[0xA] = B64_3(g[0xA]); - t4[0xA] = B64_4(g[0xA]); - t5[0xA] = B64_5(g[0xA]); - t6[0xA] = B64_6(g[0xA]); - t7[0xA] = B64_7(g[0xA]); - if (flag == 0) { - RBTT(g[0x8], 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0x3); - RBTT(g[0x9], 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x4); - RBTT(g[0xA], 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x5); - } else { - RBTT(g[0x0], 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xB); - RBTT(g[0x1], 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xC); - RBTT(g[0x6], 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0x1); - } - RBTT(g[0xC], 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x7); - RBTT(g[0xD], 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x8); - RBTT(g[0xE], 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x9); - RBTT(g[0xF], 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xA); - } - RBTT(g[0xB], 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x6); + //#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; - if (flag == 2) goto end; - if (flag++ == 1) { - r = 13; - goto round; - } +//#pragma unroll 8 + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = H[u + 8]; - r = 12; -#pragma unroll - for (u = 0; u < 8; u++) m[u] ^= g[u + 8]; - m[7] ^= H15; - m[8] = 0x80; - m[9] = 0; - m[10] = 0; - goto perm; + barrier(CLK_GLOBAL_MEM_FENCE); + } -end: - if ((g[3 + 8] ^ m[3]) <= target) output[output[0xFF]++] = as_uint(as_uchar4(gid).wzyx); + bool result = (hash.h8[3] <= target); + if (result) + output[output[0xFF]++] = SWAP4(gid); } -#endif // DIAMOND_CL \ No newline at end of file +#endif // GROESTLCOIN_CL diff --git a/kernel/groestl256.cl b/kernel/groestl256.cl new file mode 100644 index 000000000..824910a35 --- /dev/null +++ b/kernel/groestl256.cl @@ -0,0 +1,415 @@ +/* $Id: groestl.c 260 2011-07-21 01:02:38Z tp $ */ +/* + * Groestl256 + * + * ==========================(LICENSE BEGIN)============================ + * Copyright (c) 2014 djm34 + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +/* + * Apparently, the 32-bit-only version is not faster than the 64-bit + * version unless using the "small footprint" code on a 32-bit machine. + */ + +#define C64e(x) ((SPH_C64(x) >> 56) \ + | ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \ + | ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \ + | ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \ + | ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \ + | ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \ + | ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \ + | ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000))) +#define dec64e_aligned sph_dec64le_aligned +#define enc64e sph_enc64le +#define B64_0(x) ((x) & 0xFF) +#define B64_1(x) (((x) >> 8) & 0xFF) +#define B64_2(x) (((x) >> 16) & 0xFF) +#define B64_3(x) (((x) >> 24) & 0xFF) +#define B64_4(x) (((x) >> 32) & 0xFF) +#define B64_5(x) (((x) >> 40) & 0xFF) +#define B64_6(x) (((x) >> 48) & 0xFF) +#define B64_7(x) ((x) >> 56) +#define R64 SPH_ROTL64 +#define PC64(j, r) ((sph_u64)((j) + (r))) +#define QC64(j, r) (((sph_u64)(r) << 56) ^ SPH_T64(~((sph_u64)(j) << 56))) + +__constant static const sph_u64 T0[] = { + C64e(0xc632f4a5f497a5c6), C64e(0xf86f978497eb84f8), + C64e(0xee5eb099b0c799ee), C64e(0xf67a8c8d8cf78df6), + C64e(0xffe8170d17e50dff), C64e(0xd60adcbddcb7bdd6), + C64e(0xde16c8b1c8a7b1de), C64e(0x916dfc54fc395491), + C64e(0x6090f050f0c05060), C64e(0x0207050305040302), + C64e(0xce2ee0a9e087a9ce), C64e(0x56d1877d87ac7d56), + C64e(0xe7cc2b192bd519e7), C64e(0xb513a662a67162b5), + C64e(0x4d7c31e6319ae64d), C64e(0xec59b59ab5c39aec), + C64e(0x8f40cf45cf05458f), C64e(0x1fa3bc9dbc3e9d1f), + C64e(0x8949c040c0094089), C64e(0xfa68928792ef87fa), + C64e(0xefd03f153fc515ef), C64e(0xb29426eb267febb2), + C64e(0x8ece40c94007c98e), C64e(0xfbe61d0b1ded0bfb), + C64e(0x416e2fec2f82ec41), C64e(0xb31aa967a97d67b3), + C64e(0x5f431cfd1cbefd5f), C64e(0x456025ea258aea45), + C64e(0x23f9dabfda46bf23), C64e(0x535102f702a6f753), + C64e(0xe445a196a1d396e4), C64e(0x9b76ed5bed2d5b9b), + C64e(0x75285dc25deac275), C64e(0xe1c5241c24d91ce1), + C64e(0x3dd4e9aee97aae3d), C64e(0x4cf2be6abe986a4c), + C64e(0x6c82ee5aeed85a6c), C64e(0x7ebdc341c3fc417e), + C64e(0xf5f3060206f102f5), C64e(0x8352d14fd11d4f83), + C64e(0x688ce45ce4d05c68), C64e(0x515607f407a2f451), + C64e(0xd18d5c345cb934d1), C64e(0xf9e1180818e908f9), + C64e(0xe24cae93aedf93e2), C64e(0xab3e9573954d73ab), + C64e(0x6297f553f5c45362), C64e(0x2a6b413f41543f2a), + C64e(0x081c140c14100c08), C64e(0x9563f652f6315295), + C64e(0x46e9af65af8c6546), C64e(0x9d7fe25ee2215e9d), + C64e(0x3048782878602830), C64e(0x37cff8a1f86ea137), + C64e(0x0a1b110f11140f0a), C64e(0x2febc4b5c45eb52f), + C64e(0x0e151b091b1c090e), C64e(0x247e5a365a483624), + C64e(0x1badb69bb6369b1b), C64e(0xdf98473d47a53ddf), + C64e(0xcda76a266a8126cd), C64e(0x4ef5bb69bb9c694e), + C64e(0x7f334ccd4cfecd7f), C64e(0xea50ba9fbacf9fea), + C64e(0x123f2d1b2d241b12), C64e(0x1da4b99eb93a9e1d), + C64e(0x58c49c749cb07458), C64e(0x3446722e72682e34), + C64e(0x3641772d776c2d36), C64e(0xdc11cdb2cda3b2dc), + C64e(0xb49d29ee2973eeb4), C64e(0x5b4d16fb16b6fb5b), + C64e(0xa4a501f60153f6a4), C64e(0x76a1d74dd7ec4d76), + C64e(0xb714a361a37561b7), C64e(0x7d3449ce49face7d), + C64e(0x52df8d7b8da47b52), C64e(0xdd9f423e42a13edd), + C64e(0x5ecd937193bc715e), C64e(0x13b1a297a2269713), + C64e(0xa6a204f50457f5a6), C64e(0xb901b868b86968b9), + C64e(0x0000000000000000), C64e(0xc1b5742c74992cc1), + C64e(0x40e0a060a0806040), C64e(0xe3c2211f21dd1fe3), + C64e(0x793a43c843f2c879), C64e(0xb69a2ced2c77edb6), + C64e(0xd40dd9bed9b3bed4), C64e(0x8d47ca46ca01468d), + C64e(0x671770d970ced967), C64e(0x72afdd4bdde44b72), + C64e(0x94ed79de7933de94), C64e(0x98ff67d4672bd498), + C64e(0xb09323e8237be8b0), C64e(0x855bde4ade114a85), + C64e(0xbb06bd6bbd6d6bbb), C64e(0xc5bb7e2a7e912ac5), + C64e(0x4f7b34e5349ee54f), C64e(0xedd73a163ac116ed), + C64e(0x86d254c55417c586), C64e(0x9af862d7622fd79a), + C64e(0x6699ff55ffcc5566), C64e(0x11b6a794a7229411), + C64e(0x8ac04acf4a0fcf8a), C64e(0xe9d9301030c910e9), + C64e(0x040e0a060a080604), C64e(0xfe66988198e781fe), + C64e(0xa0ab0bf00b5bf0a0), C64e(0x78b4cc44ccf04478), + C64e(0x25f0d5bad54aba25), C64e(0x4b753ee33e96e34b), + C64e(0xa2ac0ef30e5ff3a2), C64e(0x5d4419fe19bafe5d), + C64e(0x80db5bc05b1bc080), C64e(0x0580858a850a8a05), + C64e(0x3fd3ecadec7ead3f), C64e(0x21fedfbcdf42bc21), + C64e(0x70a8d848d8e04870), C64e(0xf1fd0c040cf904f1), + C64e(0x63197adf7ac6df63), C64e(0x772f58c158eec177), + C64e(0xaf309f759f4575af), C64e(0x42e7a563a5846342), + C64e(0x2070503050403020), C64e(0xe5cb2e1a2ed11ae5), + C64e(0xfdef120e12e10efd), C64e(0xbf08b76db7656dbf), + C64e(0x8155d44cd4194c81), C64e(0x18243c143c301418), + C64e(0x26795f355f4c3526), C64e(0xc3b2712f719d2fc3), + C64e(0xbe8638e13867e1be), C64e(0x35c8fda2fd6aa235), + C64e(0x88c74fcc4f0bcc88), C64e(0x2e654b394b5c392e), + C64e(0x936af957f93d5793), C64e(0x55580df20daaf255), + C64e(0xfc619d829de382fc), C64e(0x7ab3c947c9f4477a), + C64e(0xc827efacef8bacc8), C64e(0xba8832e7326fe7ba), + C64e(0x324f7d2b7d642b32), C64e(0xe642a495a4d795e6), + C64e(0xc03bfba0fb9ba0c0), C64e(0x19aab398b3329819), + C64e(0x9ef668d16827d19e), C64e(0xa322817f815d7fa3), + C64e(0x44eeaa66aa886644), C64e(0x54d6827e82a87e54), + C64e(0x3bdde6abe676ab3b), C64e(0x0b959e839e16830b), + C64e(0x8cc945ca4503ca8c), C64e(0xc7bc7b297b9529c7), + C64e(0x6b056ed36ed6d36b), C64e(0x286c443c44503c28), + C64e(0xa72c8b798b5579a7), C64e(0xbc813de23d63e2bc), + C64e(0x1631271d272c1d16), C64e(0xad379a769a4176ad), + C64e(0xdb964d3b4dad3bdb), C64e(0x649efa56fac85664), + C64e(0x74a6d24ed2e84e74), C64e(0x1436221e22281e14), + C64e(0x92e476db763fdb92), C64e(0x0c121e0a1e180a0c), + C64e(0x48fcb46cb4906c48), C64e(0xb88f37e4376be4b8), + C64e(0x9f78e75de7255d9f), C64e(0xbd0fb26eb2616ebd), + C64e(0x43692aef2a86ef43), C64e(0xc435f1a6f193a6c4), + C64e(0x39dae3a8e372a839), C64e(0x31c6f7a4f762a431), + C64e(0xd38a593759bd37d3), C64e(0xf274868b86ff8bf2), + C64e(0xd583563256b132d5), C64e(0x8b4ec543c50d438b), + C64e(0x6e85eb59ebdc596e), C64e(0xda18c2b7c2afb7da), + C64e(0x018e8f8c8f028c01), C64e(0xb11dac64ac7964b1), + C64e(0x9cf16dd26d23d29c), C64e(0x49723be03b92e049), + C64e(0xd81fc7b4c7abb4d8), C64e(0xacb915fa1543faac), + C64e(0xf3fa090709fd07f3), C64e(0xcfa06f256f8525cf), + C64e(0xca20eaafea8fafca), C64e(0xf47d898e89f38ef4), + C64e(0x476720e9208ee947), C64e(0x1038281828201810), + C64e(0x6f0b64d564ded56f), C64e(0xf073838883fb88f0), + C64e(0x4afbb16fb1946f4a), C64e(0x5cca967296b8725c), + C64e(0x38546c246c702438), C64e(0x575f08f108aef157), + C64e(0x732152c752e6c773), C64e(0x9764f351f3355197), + C64e(0xcbae6523658d23cb), C64e(0xa125847c84597ca1), + C64e(0xe857bf9cbfcb9ce8), C64e(0x3e5d6321637c213e), + C64e(0x96ea7cdd7c37dd96), C64e(0x611e7fdc7fc2dc61), + C64e(0x0d9c9186911a860d), C64e(0x0f9b9485941e850f), + C64e(0xe04bab90abdb90e0), C64e(0x7cbac642c6f8427c), + C64e(0x712657c457e2c471), C64e(0xcc29e5aae583aacc), + C64e(0x90e373d8733bd890), C64e(0x06090f050f0c0506), + C64e(0xf7f4030103f501f7), C64e(0x1c2a36123638121c), + C64e(0xc23cfea3fe9fa3c2), C64e(0x6a8be15fe1d45f6a), + C64e(0xaebe10f91047f9ae), C64e(0x69026bd06bd2d069), + C64e(0x17bfa891a82e9117), C64e(0x9971e858e8295899), + C64e(0x3a5369276974273a), C64e(0x27f7d0b9d04eb927), + C64e(0xd991483848a938d9), C64e(0xebde351335cd13eb), + C64e(0x2be5ceb3ce56b32b), C64e(0x2277553355443322), + C64e(0xd204d6bbd6bfbbd2), C64e(0xa9399070904970a9), + C64e(0x07878089800e8907), C64e(0x33c1f2a7f266a733), + C64e(0x2decc1b6c15ab62d), C64e(0x3c5a66226678223c), + C64e(0x15b8ad92ad2a9215), C64e(0xc9a96020608920c9), + C64e(0x875cdb49db154987), C64e(0xaab01aff1a4fffaa), + C64e(0x50d8887888a07850), C64e(0xa52b8e7a8e517aa5), + C64e(0x03898a8f8a068f03), C64e(0x594a13f813b2f859), + C64e(0x09929b809b128009), C64e(0x1a2339173934171a), + C64e(0x651075da75cada65), C64e(0xd784533153b531d7), + C64e(0x84d551c65113c684), C64e(0xd003d3b8d3bbb8d0), + C64e(0x82dc5ec35e1fc382), C64e(0x29e2cbb0cb52b029), + C64e(0x5ac3997799b4775a), C64e(0x1e2d3311333c111e), + C64e(0x7b3d46cb46f6cb7b), C64e(0xa8b71ffc1f4bfca8), + C64e(0x6d0c61d661dad66d), C64e(0x2c624e3a4e583a2c) +}; + +__constant static const sph_u64 T4[] = { + C64e(0xf497a5c6c632f4a5), C64e(0x97eb84f8f86f9784), + C64e(0xb0c799eeee5eb099), C64e(0x8cf78df6f67a8c8d), + C64e(0x17e50dffffe8170d), C64e(0xdcb7bdd6d60adcbd), + C64e(0xc8a7b1dede16c8b1), C64e(0xfc395491916dfc54), + C64e(0xf0c050606090f050), C64e(0x0504030202070503), + C64e(0xe087a9cece2ee0a9), C64e(0x87ac7d5656d1877d), + C64e(0x2bd519e7e7cc2b19), C64e(0xa67162b5b513a662), + C64e(0x319ae64d4d7c31e6), C64e(0xb5c39aecec59b59a), + C64e(0xcf05458f8f40cf45), C64e(0xbc3e9d1f1fa3bc9d), + C64e(0xc00940898949c040), C64e(0x92ef87fafa689287), + C64e(0x3fc515efefd03f15), C64e(0x267febb2b29426eb), + C64e(0x4007c98e8ece40c9), C64e(0x1ded0bfbfbe61d0b), + C64e(0x2f82ec41416e2fec), C64e(0xa97d67b3b31aa967), + C64e(0x1cbefd5f5f431cfd), C64e(0x258aea45456025ea), + C64e(0xda46bf2323f9dabf), C64e(0x02a6f753535102f7), + C64e(0xa1d396e4e445a196), C64e(0xed2d5b9b9b76ed5b), + C64e(0x5deac27575285dc2), C64e(0x24d91ce1e1c5241c), + C64e(0xe97aae3d3dd4e9ae), C64e(0xbe986a4c4cf2be6a), + C64e(0xeed85a6c6c82ee5a), C64e(0xc3fc417e7ebdc341), + C64e(0x06f102f5f5f30602), C64e(0xd11d4f838352d14f), + C64e(0xe4d05c68688ce45c), C64e(0x07a2f451515607f4), + C64e(0x5cb934d1d18d5c34), C64e(0x18e908f9f9e11808), + C64e(0xaedf93e2e24cae93), C64e(0x954d73abab3e9573), + C64e(0xf5c453626297f553), C64e(0x41543f2a2a6b413f), + C64e(0x14100c08081c140c), C64e(0xf63152959563f652), + C64e(0xaf8c654646e9af65), C64e(0xe2215e9d9d7fe25e), + C64e(0x7860283030487828), C64e(0xf86ea13737cff8a1), + C64e(0x11140f0a0a1b110f), C64e(0xc45eb52f2febc4b5), + C64e(0x1b1c090e0e151b09), C64e(0x5a483624247e5a36), + C64e(0xb6369b1b1badb69b), C64e(0x47a53ddfdf98473d), + C64e(0x6a8126cdcda76a26), C64e(0xbb9c694e4ef5bb69), + C64e(0x4cfecd7f7f334ccd), C64e(0xbacf9feaea50ba9f), + C64e(0x2d241b12123f2d1b), C64e(0xb93a9e1d1da4b99e), + C64e(0x9cb0745858c49c74), C64e(0x72682e343446722e), + C64e(0x776c2d363641772d), C64e(0xcda3b2dcdc11cdb2), + C64e(0x2973eeb4b49d29ee), C64e(0x16b6fb5b5b4d16fb), + C64e(0x0153f6a4a4a501f6), C64e(0xd7ec4d7676a1d74d), + C64e(0xa37561b7b714a361), C64e(0x49face7d7d3449ce), + C64e(0x8da47b5252df8d7b), C64e(0x42a13edddd9f423e), + C64e(0x93bc715e5ecd9371), C64e(0xa226971313b1a297), + C64e(0x0457f5a6a6a204f5), C64e(0xb86968b9b901b868), + C64e(0x0000000000000000), C64e(0x74992cc1c1b5742c), + C64e(0xa080604040e0a060), C64e(0x21dd1fe3e3c2211f), + C64e(0x43f2c879793a43c8), C64e(0x2c77edb6b69a2ced), + C64e(0xd9b3bed4d40dd9be), C64e(0xca01468d8d47ca46), + C64e(0x70ced967671770d9), C64e(0xdde44b7272afdd4b), + C64e(0x7933de9494ed79de), C64e(0x672bd49898ff67d4), + C64e(0x237be8b0b09323e8), C64e(0xde114a85855bde4a), + C64e(0xbd6d6bbbbb06bd6b), C64e(0x7e912ac5c5bb7e2a), + C64e(0x349ee54f4f7b34e5), C64e(0x3ac116ededd73a16), + C64e(0x5417c58686d254c5), C64e(0x622fd79a9af862d7), + C64e(0xffcc55666699ff55), C64e(0xa722941111b6a794), + C64e(0x4a0fcf8a8ac04acf), C64e(0x30c910e9e9d93010), + C64e(0x0a080604040e0a06), C64e(0x98e781fefe669881), + C64e(0x0b5bf0a0a0ab0bf0), C64e(0xccf0447878b4cc44), + C64e(0xd54aba2525f0d5ba), C64e(0x3e96e34b4b753ee3), + C64e(0x0e5ff3a2a2ac0ef3), C64e(0x19bafe5d5d4419fe), + C64e(0x5b1bc08080db5bc0), C64e(0x850a8a050580858a), + C64e(0xec7ead3f3fd3ecad), C64e(0xdf42bc2121fedfbc), + C64e(0xd8e0487070a8d848), C64e(0x0cf904f1f1fd0c04), + C64e(0x7ac6df6363197adf), C64e(0x58eec177772f58c1), + C64e(0x9f4575afaf309f75), C64e(0xa584634242e7a563), + C64e(0x5040302020705030), C64e(0x2ed11ae5e5cb2e1a), + C64e(0x12e10efdfdef120e), C64e(0xb7656dbfbf08b76d), + C64e(0xd4194c818155d44c), C64e(0x3c30141818243c14), + C64e(0x5f4c352626795f35), C64e(0x719d2fc3c3b2712f), + C64e(0x3867e1bebe8638e1), C64e(0xfd6aa23535c8fda2), + C64e(0x4f0bcc8888c74fcc), C64e(0x4b5c392e2e654b39), + C64e(0xf93d5793936af957), C64e(0x0daaf25555580df2), + C64e(0x9de382fcfc619d82), C64e(0xc9f4477a7ab3c947), + C64e(0xef8bacc8c827efac), C64e(0x326fe7baba8832e7), + C64e(0x7d642b32324f7d2b), C64e(0xa4d795e6e642a495), + C64e(0xfb9ba0c0c03bfba0), C64e(0xb332981919aab398), + C64e(0x6827d19e9ef668d1), C64e(0x815d7fa3a322817f), + C64e(0xaa88664444eeaa66), C64e(0x82a87e5454d6827e), + C64e(0xe676ab3b3bdde6ab), C64e(0x9e16830b0b959e83), + C64e(0x4503ca8c8cc945ca), C64e(0x7b9529c7c7bc7b29), + C64e(0x6ed6d36b6b056ed3), C64e(0x44503c28286c443c), + C64e(0x8b5579a7a72c8b79), C64e(0x3d63e2bcbc813de2), + C64e(0x272c1d161631271d), C64e(0x9a4176adad379a76), + C64e(0x4dad3bdbdb964d3b), C64e(0xfac85664649efa56), + C64e(0xd2e84e7474a6d24e), C64e(0x22281e141436221e), + C64e(0x763fdb9292e476db), C64e(0x1e180a0c0c121e0a), + C64e(0xb4906c4848fcb46c), C64e(0x376be4b8b88f37e4), + C64e(0xe7255d9f9f78e75d), C64e(0xb2616ebdbd0fb26e), + C64e(0x2a86ef4343692aef), C64e(0xf193a6c4c435f1a6), + C64e(0xe372a83939dae3a8), C64e(0xf762a43131c6f7a4), + C64e(0x59bd37d3d38a5937), C64e(0x86ff8bf2f274868b), + C64e(0x56b132d5d5835632), C64e(0xc50d438b8b4ec543), + C64e(0xebdc596e6e85eb59), C64e(0xc2afb7dada18c2b7), + C64e(0x8f028c01018e8f8c), C64e(0xac7964b1b11dac64), + C64e(0x6d23d29c9cf16dd2), C64e(0x3b92e04949723be0), + C64e(0xc7abb4d8d81fc7b4), C64e(0x1543faacacb915fa), + C64e(0x09fd07f3f3fa0907), C64e(0x6f8525cfcfa06f25), + C64e(0xea8fafcaca20eaaf), C64e(0x89f38ef4f47d898e), + C64e(0x208ee947476720e9), C64e(0x2820181010382818), + C64e(0x64ded56f6f0b64d5), C64e(0x83fb88f0f0738388), + C64e(0xb1946f4a4afbb16f), C64e(0x96b8725c5cca9672), + C64e(0x6c70243838546c24), C64e(0x08aef157575f08f1), + C64e(0x52e6c773732152c7), C64e(0xf33551979764f351), + C64e(0x658d23cbcbae6523), C64e(0x84597ca1a125847c), + C64e(0xbfcb9ce8e857bf9c), C64e(0x637c213e3e5d6321), + C64e(0x7c37dd9696ea7cdd), C64e(0x7fc2dc61611e7fdc), + C64e(0x911a860d0d9c9186), C64e(0x941e850f0f9b9485), + C64e(0xabdb90e0e04bab90), C64e(0xc6f8427c7cbac642), + C64e(0x57e2c471712657c4), C64e(0xe583aacccc29e5aa), + C64e(0x733bd89090e373d8), C64e(0x0f0c050606090f05), + C64e(0x03f501f7f7f40301), C64e(0x3638121c1c2a3612), + C64e(0xfe9fa3c2c23cfea3), C64e(0xe1d45f6a6a8be15f), + C64e(0x1047f9aeaebe10f9), C64e(0x6bd2d06969026bd0), + C64e(0xa82e911717bfa891), C64e(0xe82958999971e858), + C64e(0x6974273a3a536927), C64e(0xd04eb92727f7d0b9), + C64e(0x48a938d9d9914838), C64e(0x35cd13ebebde3513), + C64e(0xce56b32b2be5ceb3), C64e(0x5544332222775533), + C64e(0xd6bfbbd2d204d6bb), C64e(0x904970a9a9399070), + C64e(0x800e890707878089), C64e(0xf266a73333c1f2a7), + C64e(0xc15ab62d2decc1b6), C64e(0x6678223c3c5a6622), + C64e(0xad2a921515b8ad92), C64e(0x608920c9c9a96020), + C64e(0xdb154987875cdb49), C64e(0x1a4fffaaaab01aff), + C64e(0x88a0785050d88878), C64e(0x8e517aa5a52b8e7a), + C64e(0x8a068f0303898a8f), C64e(0x13b2f859594a13f8), + C64e(0x9b12800909929b80), C64e(0x3934171a1a233917), + C64e(0x75cada65651075da), C64e(0x53b531d7d7845331), + C64e(0x5113c68484d551c6), C64e(0xd3bbb8d0d003d3b8), + C64e(0x5e1fc38282dc5ec3), C64e(0xcb52b02929e2cbb0), + C64e(0x99b4775a5ac39977), C64e(0x333c111e1e2d3311), + C64e(0x46f6cb7b7b3d46cb), C64e(0x1f4bfca8a8b71ffc), + C64e(0x61dad66d6d0c61d6), C64e(0x4e583a2c2c624e3a) +}; + +#define RSTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \ + t[d] = T0[B64_0(a[b0])] \ + ^ R64(T0[B64_1(a[b1])], 8) \ + ^ R64(T0[B64_2(a[b2])], 16) \ + ^ R64(T0[B64_3(a[b3])], 24) \ + ^ T4[B64_4(a[b4])] \ + ^ R64(T4[B64_5(a[b5])], 8) \ + ^ R64(T4[B64_6(a[b6])], 16) \ + ^ R64(T4[B64_7(a[b7])], 24); \ + } while (0) + +#define ROUND_SMALL_P(a, r) do { \ + a[0] ^= PC64(0x00, r); \ + a[1] ^= PC64(0x10, r); \ + a[2] ^= PC64(0x20, r); \ + a[3] ^= PC64(0x30, r); \ + a[4] ^= PC64(0x40, r); \ + a[5] ^= PC64(0x50, r); \ + a[6] ^= PC64(0x60, r); \ + a[7] ^= PC64(0x70, r); \ + RSTT(0, a, 0, 1, 2, 3, 4, 5, 6, 7); \ + RSTT(1, a, 1, 2, 3, 4, 5, 6, 7, 0); \ + RSTT(2, a, 2, 3, 4, 5, 6, 7, 0, 1); \ + RSTT(3, a, 3, 4, 5, 6, 7, 0, 1, 2); \ + RSTT(4, a, 4, 5, 6, 7, 0, 1, 2, 3); \ + RSTT(5, a, 5, 6, 7, 0, 1, 2, 3, 4); \ + RSTT(6, a, 6, 7, 0, 1, 2, 3, 4, 5); \ + RSTT(7, a, 7, 0, 1, 2, 3, 4, 5, 6); \ + a[0] = t[0]; \ + a[1] = t[1]; \ + a[2] = t[2]; \ + a[3] = t[3]; \ + a[4] = t[4]; \ + a[5] = t[5]; \ + a[6] = t[6]; \ + a[7] = t[7]; \ + } while (0) + +#define ROUND_SMALL_Pf(a, r) do { \ + a[0] ^= PC64(0x00, r); \ + a[1] ^= PC64(0x10, r); \ + a[2] ^= PC64(0x20, r); \ + a[3] ^= PC64(0x30, r); \ + a[4] ^= PC64(0x40, r); \ + a[5] ^= PC64(0x50, r); \ + a[6] ^= PC64(0x60, r); \ + a[7] ^= PC64(0x70, r); \ + RSTT(7, a, 7, 0, 1, 2, 3, 4, 5, 6); \ + a[7] = t[7]; \ + } while (0) + +#define ROUND_SMALL_Q(a, r) do { \ + a[0] ^= QC64(0x00, r); \ + a[1] ^= QC64(0x10, r); \ + a[2] ^= QC64(0x20, r); \ + a[3] ^= QC64(0x30, r); \ + a[4] ^= QC64(0x40, r); \ + a[5] ^= QC64(0x50, r); \ + a[6] ^= QC64(0x60, r); \ + a[7] ^= QC64(0x70, r); \ + RSTT(0, a, 1, 3, 5, 7, 0, 2, 4, 6); \ + RSTT(1, a, 2, 4, 6, 0, 1, 3, 5, 7); \ + RSTT(2, a, 3, 5, 7, 1, 2, 4, 6, 0); \ + RSTT(3, a, 4, 6, 0, 2, 3, 5, 7, 1); \ + RSTT(4, a, 5, 7, 1, 3, 4, 6, 0, 2); \ + RSTT(5, a, 6, 0, 2, 4, 5, 7, 1, 3); \ + RSTT(6, a, 7, 1, 3, 5, 6, 0, 2, 4); \ + RSTT(7, a, 0, 2, 4, 6, 7, 1, 3, 5); \ + a[0] = t[0]; \ + a[1] = t[1]; \ + a[2] = t[2]; \ + a[3] = t[3]; \ + a[4] = t[4]; \ + a[5] = t[5]; \ + a[6] = t[6]; \ + a[7] = t[7]; \ + } while (0) + +#define PERM_SMALL_P(a) do { \ + for (int r = 0; r < 10; r ++) \ + ROUND_SMALL_P(a, r); \ + } while (0) + +#define PERM_SMALL_Pf(a) do { \ + for (int r = 0; r < 9; r ++) { \ + ROUND_SMALL_P(a, r);} \ + ROUND_SMALL_Pf(a,9); \ + } while (0) + +#define PERM_SMALL_Q(a) do { \ + for (int r = 0; r < 10; r ++) \ + ROUND_SMALL_Q(a, r); \ + } while (0) + diff --git a/kernel/groestlcoin.cl b/kernel/groestlcoin.cl index 54420d126..de3768cf9 100644 --- a/kernel/groestlcoin.cl +++ b/kernel/groestlcoin.cl @@ -1,8 +1,10 @@ /* - * ==========================(LICENSE BEGIN)============================ + * GroestlCoin kernel implementation. * - * GroestlCoin kernel implementation: Copyright (c) 2014 pallas + * ==========================(LICENSE BEGIN)============================ * + * Copyright (c) 2014 phm + * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,10 +12,10 @@ * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. @@ -24,1451 +26,164 @@ * * ===========================(LICENSE END)============================= * - * GroestlCoin kernel implementation: @author pallas - * Forum thread: http://bitcointalk.org/index.php?topic=779598 - * Donations to: BTC 1H7qC5uHuGX2d5s9Kuw3k7Wm7xMQzL16SN + * @author phm */ #ifndef GROESTLCOIN_CL #define GROESTLCOIN_CL -#define DC64(x) ((ulong)(x ## UL)) -#define DEC64E(x) (*(const __global ulong *) (x)); -#define H15 (((ulong)(512 & 0xFF) << 56) | ((ulong)(512 & 0xFF00) << 40)) -#define M15 0x100000000000000 -#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) // rotate - -#define C64e(x) ((DC64(x) >> 56) \ - | ((DC64(x) >> 40) & DC64(0x000000000000FF00)) \ - | ((DC64(x) >> 24) & DC64(0x0000000000FF0000)) \ - | ((DC64(x) >> 8) & DC64(0x00000000FF000000)) \ - | ((DC64(x) << 8) & DC64(0x000000FF00000000)) \ - | ((DC64(x) << 24) & DC64(0x0000FF0000000000)) \ - | ((DC64(x) << 40) & DC64(0x00FF000000000000)) \ - | ((DC64(x) << 56))) -#define B64_0(x) ((x) & 0xFF) -#define B64_1(x) (((x) >> 8) & 0xFF) -#define B64_2(x) (((x) >> 16) & 0xFF) -#define B64_3(x) (((x) >> 24) & 0xFF) -#define B64_4(x) (((x) >> 32) & 0xFF) -#define B64_5(x) (((x) >> 40) & 0xFF) -#define B64_6(x) (((x) >> 48) & 0xFF) -#define B64_7(x) ((x) >> 56) -#define PC64(j, r) ((ulong)((j) + (r))) -#define QC64(j, r) (((ulong)(r) << 56) ^ ~((ulong)(j) << 56)) - -__constant static const ulong T0[] = { - C64e(0xc632f4a5f497a5c6), C64e(0xf86f978497eb84f8), - C64e(0xee5eb099b0c799ee), C64e(0xf67a8c8d8cf78df6), - C64e(0xffe8170d17e50dff), C64e(0xd60adcbddcb7bdd6), - C64e(0xde16c8b1c8a7b1de), C64e(0x916dfc54fc395491), - C64e(0x6090f050f0c05060), C64e(0x0207050305040302), - C64e(0xce2ee0a9e087a9ce), C64e(0x56d1877d87ac7d56), - C64e(0xe7cc2b192bd519e7), C64e(0xb513a662a67162b5), - C64e(0x4d7c31e6319ae64d), C64e(0xec59b59ab5c39aec), - C64e(0x8f40cf45cf05458f), C64e(0x1fa3bc9dbc3e9d1f), - C64e(0x8949c040c0094089), C64e(0xfa68928792ef87fa), - C64e(0xefd03f153fc515ef), C64e(0xb29426eb267febb2), - C64e(0x8ece40c94007c98e), C64e(0xfbe61d0b1ded0bfb), - C64e(0x416e2fec2f82ec41), C64e(0xb31aa967a97d67b3), - C64e(0x5f431cfd1cbefd5f), C64e(0x456025ea258aea45), - C64e(0x23f9dabfda46bf23), C64e(0x535102f702a6f753), - C64e(0xe445a196a1d396e4), C64e(0x9b76ed5bed2d5b9b), - C64e(0x75285dc25deac275), C64e(0xe1c5241c24d91ce1), - C64e(0x3dd4e9aee97aae3d), C64e(0x4cf2be6abe986a4c), - C64e(0x6c82ee5aeed85a6c), C64e(0x7ebdc341c3fc417e), - C64e(0xf5f3060206f102f5), C64e(0x8352d14fd11d4f83), - C64e(0x688ce45ce4d05c68), C64e(0x515607f407a2f451), - C64e(0xd18d5c345cb934d1), C64e(0xf9e1180818e908f9), - C64e(0xe24cae93aedf93e2), C64e(0xab3e9573954d73ab), - C64e(0x6297f553f5c45362), C64e(0x2a6b413f41543f2a), - C64e(0x081c140c14100c08), C64e(0x9563f652f6315295), - C64e(0x46e9af65af8c6546), C64e(0x9d7fe25ee2215e9d), - C64e(0x3048782878602830), C64e(0x37cff8a1f86ea137), - C64e(0x0a1b110f11140f0a), C64e(0x2febc4b5c45eb52f), - C64e(0x0e151b091b1c090e), C64e(0x247e5a365a483624), - C64e(0x1badb69bb6369b1b), C64e(0xdf98473d47a53ddf), - C64e(0xcda76a266a8126cd), C64e(0x4ef5bb69bb9c694e), - C64e(0x7f334ccd4cfecd7f), C64e(0xea50ba9fbacf9fea), - C64e(0x123f2d1b2d241b12), C64e(0x1da4b99eb93a9e1d), - C64e(0x58c49c749cb07458), C64e(0x3446722e72682e34), - C64e(0x3641772d776c2d36), C64e(0xdc11cdb2cda3b2dc), - C64e(0xb49d29ee2973eeb4), C64e(0x5b4d16fb16b6fb5b), - C64e(0xa4a501f60153f6a4), C64e(0x76a1d74dd7ec4d76), - C64e(0xb714a361a37561b7), C64e(0x7d3449ce49face7d), - C64e(0x52df8d7b8da47b52), C64e(0xdd9f423e42a13edd), - C64e(0x5ecd937193bc715e), C64e(0x13b1a297a2269713), - C64e(0xa6a204f50457f5a6), C64e(0xb901b868b86968b9), - C64e(0x0000000000000000), C64e(0xc1b5742c74992cc1), - C64e(0x40e0a060a0806040), C64e(0xe3c2211f21dd1fe3), - C64e(0x793a43c843f2c879), C64e(0xb69a2ced2c77edb6), - C64e(0xd40dd9bed9b3bed4), C64e(0x8d47ca46ca01468d), - C64e(0x671770d970ced967), C64e(0x72afdd4bdde44b72), - C64e(0x94ed79de7933de94), C64e(0x98ff67d4672bd498), - C64e(0xb09323e8237be8b0), C64e(0x855bde4ade114a85), - C64e(0xbb06bd6bbd6d6bbb), C64e(0xc5bb7e2a7e912ac5), - C64e(0x4f7b34e5349ee54f), C64e(0xedd73a163ac116ed), - C64e(0x86d254c55417c586), C64e(0x9af862d7622fd79a), - C64e(0x6699ff55ffcc5566), C64e(0x11b6a794a7229411), - C64e(0x8ac04acf4a0fcf8a), C64e(0xe9d9301030c910e9), - C64e(0x040e0a060a080604), C64e(0xfe66988198e781fe), - C64e(0xa0ab0bf00b5bf0a0), C64e(0x78b4cc44ccf04478), - C64e(0x25f0d5bad54aba25), C64e(0x4b753ee33e96e34b), - C64e(0xa2ac0ef30e5ff3a2), C64e(0x5d4419fe19bafe5d), - C64e(0x80db5bc05b1bc080), C64e(0x0580858a850a8a05), - C64e(0x3fd3ecadec7ead3f), C64e(0x21fedfbcdf42bc21), - C64e(0x70a8d848d8e04870), C64e(0xf1fd0c040cf904f1), - C64e(0x63197adf7ac6df63), C64e(0x772f58c158eec177), - C64e(0xaf309f759f4575af), C64e(0x42e7a563a5846342), - C64e(0x2070503050403020), C64e(0xe5cb2e1a2ed11ae5), - C64e(0xfdef120e12e10efd), C64e(0xbf08b76db7656dbf), - C64e(0x8155d44cd4194c81), C64e(0x18243c143c301418), - C64e(0x26795f355f4c3526), C64e(0xc3b2712f719d2fc3), - C64e(0xbe8638e13867e1be), C64e(0x35c8fda2fd6aa235), - C64e(0x88c74fcc4f0bcc88), C64e(0x2e654b394b5c392e), - C64e(0x936af957f93d5793), C64e(0x55580df20daaf255), - C64e(0xfc619d829de382fc), C64e(0x7ab3c947c9f4477a), - C64e(0xc827efacef8bacc8), C64e(0xba8832e7326fe7ba), - C64e(0x324f7d2b7d642b32), C64e(0xe642a495a4d795e6), - C64e(0xc03bfba0fb9ba0c0), C64e(0x19aab398b3329819), - C64e(0x9ef668d16827d19e), C64e(0xa322817f815d7fa3), - C64e(0x44eeaa66aa886644), C64e(0x54d6827e82a87e54), - C64e(0x3bdde6abe676ab3b), C64e(0x0b959e839e16830b), - C64e(0x8cc945ca4503ca8c), C64e(0xc7bc7b297b9529c7), - C64e(0x6b056ed36ed6d36b), C64e(0x286c443c44503c28), - C64e(0xa72c8b798b5579a7), C64e(0xbc813de23d63e2bc), - C64e(0x1631271d272c1d16), C64e(0xad379a769a4176ad), - C64e(0xdb964d3b4dad3bdb), C64e(0x649efa56fac85664), - C64e(0x74a6d24ed2e84e74), C64e(0x1436221e22281e14), - C64e(0x92e476db763fdb92), C64e(0x0c121e0a1e180a0c), - C64e(0x48fcb46cb4906c48), C64e(0xb88f37e4376be4b8), - C64e(0x9f78e75de7255d9f), C64e(0xbd0fb26eb2616ebd), - C64e(0x43692aef2a86ef43), C64e(0xc435f1a6f193a6c4), - C64e(0x39dae3a8e372a839), C64e(0x31c6f7a4f762a431), - C64e(0xd38a593759bd37d3), C64e(0xf274868b86ff8bf2), - C64e(0xd583563256b132d5), C64e(0x8b4ec543c50d438b), - C64e(0x6e85eb59ebdc596e), C64e(0xda18c2b7c2afb7da), - C64e(0x018e8f8c8f028c01), C64e(0xb11dac64ac7964b1), - C64e(0x9cf16dd26d23d29c), C64e(0x49723be03b92e049), - C64e(0xd81fc7b4c7abb4d8), C64e(0xacb915fa1543faac), - C64e(0xf3fa090709fd07f3), C64e(0xcfa06f256f8525cf), - C64e(0xca20eaafea8fafca), C64e(0xf47d898e89f38ef4), - C64e(0x476720e9208ee947), C64e(0x1038281828201810), - C64e(0x6f0b64d564ded56f), C64e(0xf073838883fb88f0), - C64e(0x4afbb16fb1946f4a), C64e(0x5cca967296b8725c), - C64e(0x38546c246c702438), C64e(0x575f08f108aef157), - C64e(0x732152c752e6c773), C64e(0x9764f351f3355197), - C64e(0xcbae6523658d23cb), C64e(0xa125847c84597ca1), - C64e(0xe857bf9cbfcb9ce8), C64e(0x3e5d6321637c213e), - C64e(0x96ea7cdd7c37dd96), C64e(0x611e7fdc7fc2dc61), - C64e(0x0d9c9186911a860d), C64e(0x0f9b9485941e850f), - C64e(0xe04bab90abdb90e0), C64e(0x7cbac642c6f8427c), - C64e(0x712657c457e2c471), C64e(0xcc29e5aae583aacc), - C64e(0x90e373d8733bd890), C64e(0x06090f050f0c0506), - C64e(0xf7f4030103f501f7), C64e(0x1c2a36123638121c), - C64e(0xc23cfea3fe9fa3c2), C64e(0x6a8be15fe1d45f6a), - C64e(0xaebe10f91047f9ae), C64e(0x69026bd06bd2d069), - C64e(0x17bfa891a82e9117), C64e(0x9971e858e8295899), - C64e(0x3a5369276974273a), C64e(0x27f7d0b9d04eb927), - C64e(0xd991483848a938d9), C64e(0xebde351335cd13eb), - C64e(0x2be5ceb3ce56b32b), C64e(0x2277553355443322), - C64e(0xd204d6bbd6bfbbd2), C64e(0xa9399070904970a9), - C64e(0x07878089800e8907), C64e(0x33c1f2a7f266a733), - C64e(0x2decc1b6c15ab62d), C64e(0x3c5a66226678223c), - C64e(0x15b8ad92ad2a9215), C64e(0xc9a96020608920c9), - C64e(0x875cdb49db154987), C64e(0xaab01aff1a4fffaa), - C64e(0x50d8887888a07850), C64e(0xa52b8e7a8e517aa5), - C64e(0x03898a8f8a068f03), C64e(0x594a13f813b2f859), - C64e(0x09929b809b128009), C64e(0x1a2339173934171a), - C64e(0x651075da75cada65), C64e(0xd784533153b531d7), - C64e(0x84d551c65113c684), C64e(0xd003d3b8d3bbb8d0), - C64e(0x82dc5ec35e1fc382), C64e(0x29e2cbb0cb52b029), - C64e(0x5ac3997799b4775a), C64e(0x1e2d3311333c111e), - C64e(0x7b3d46cb46f6cb7b), C64e(0xa8b71ffc1f4bfca8), - C64e(0x6d0c61d661dad66d), C64e(0x2c624e3a4e583a2c) -}; - -__constant static const ulong T1[] = { - C64e(0xc6c632f4a5f497a5), C64e(0xf8f86f978497eb84), - C64e(0xeeee5eb099b0c799), C64e(0xf6f67a8c8d8cf78d), - C64e(0xffffe8170d17e50d), C64e(0xd6d60adcbddcb7bd), - C64e(0xdede16c8b1c8a7b1), C64e(0x91916dfc54fc3954), - C64e(0x606090f050f0c050), C64e(0x0202070503050403), - C64e(0xcece2ee0a9e087a9), C64e(0x5656d1877d87ac7d), - C64e(0xe7e7cc2b192bd519), C64e(0xb5b513a662a67162), - C64e(0x4d4d7c31e6319ae6), C64e(0xecec59b59ab5c39a), - C64e(0x8f8f40cf45cf0545), C64e(0x1f1fa3bc9dbc3e9d), - C64e(0x898949c040c00940), C64e(0xfafa68928792ef87), - C64e(0xefefd03f153fc515), C64e(0xb2b29426eb267feb), - C64e(0x8e8ece40c94007c9), C64e(0xfbfbe61d0b1ded0b), - C64e(0x41416e2fec2f82ec), C64e(0xb3b31aa967a97d67), - C64e(0x5f5f431cfd1cbefd), C64e(0x45456025ea258aea), - C64e(0x2323f9dabfda46bf), C64e(0x53535102f702a6f7), - C64e(0xe4e445a196a1d396), C64e(0x9b9b76ed5bed2d5b), - C64e(0x7575285dc25deac2), C64e(0xe1e1c5241c24d91c), - C64e(0x3d3dd4e9aee97aae), C64e(0x4c4cf2be6abe986a), - C64e(0x6c6c82ee5aeed85a), C64e(0x7e7ebdc341c3fc41), - C64e(0xf5f5f3060206f102), C64e(0x838352d14fd11d4f), - C64e(0x68688ce45ce4d05c), C64e(0x51515607f407a2f4), - C64e(0xd1d18d5c345cb934), C64e(0xf9f9e1180818e908), - C64e(0xe2e24cae93aedf93), C64e(0xabab3e9573954d73), - C64e(0x626297f553f5c453), C64e(0x2a2a6b413f41543f), - C64e(0x08081c140c14100c), C64e(0x959563f652f63152), - C64e(0x4646e9af65af8c65), C64e(0x9d9d7fe25ee2215e), - C64e(0x3030487828786028), C64e(0x3737cff8a1f86ea1), - C64e(0x0a0a1b110f11140f), C64e(0x2f2febc4b5c45eb5), - C64e(0x0e0e151b091b1c09), C64e(0x24247e5a365a4836), - C64e(0x1b1badb69bb6369b), C64e(0xdfdf98473d47a53d), - C64e(0xcdcda76a266a8126), C64e(0x4e4ef5bb69bb9c69), - C64e(0x7f7f334ccd4cfecd), C64e(0xeaea50ba9fbacf9f), - C64e(0x12123f2d1b2d241b), C64e(0x1d1da4b99eb93a9e), - C64e(0x5858c49c749cb074), C64e(0x343446722e72682e), - C64e(0x363641772d776c2d), C64e(0xdcdc11cdb2cda3b2), - C64e(0xb4b49d29ee2973ee), C64e(0x5b5b4d16fb16b6fb), - C64e(0xa4a4a501f60153f6), C64e(0x7676a1d74dd7ec4d), - C64e(0xb7b714a361a37561), C64e(0x7d7d3449ce49face), - C64e(0x5252df8d7b8da47b), C64e(0xdddd9f423e42a13e), - C64e(0x5e5ecd937193bc71), C64e(0x1313b1a297a22697), - C64e(0xa6a6a204f50457f5), C64e(0xb9b901b868b86968), - C64e(0x0000000000000000), C64e(0xc1c1b5742c74992c), - C64e(0x4040e0a060a08060), C64e(0xe3e3c2211f21dd1f), - C64e(0x79793a43c843f2c8), C64e(0xb6b69a2ced2c77ed), - C64e(0xd4d40dd9bed9b3be), C64e(0x8d8d47ca46ca0146), - C64e(0x67671770d970ced9), C64e(0x7272afdd4bdde44b), - C64e(0x9494ed79de7933de), C64e(0x9898ff67d4672bd4), - C64e(0xb0b09323e8237be8), C64e(0x85855bde4ade114a), - C64e(0xbbbb06bd6bbd6d6b), C64e(0xc5c5bb7e2a7e912a), - C64e(0x4f4f7b34e5349ee5), C64e(0xededd73a163ac116), - C64e(0x8686d254c55417c5), C64e(0x9a9af862d7622fd7), - C64e(0x666699ff55ffcc55), C64e(0x1111b6a794a72294), - C64e(0x8a8ac04acf4a0fcf), C64e(0xe9e9d9301030c910), - C64e(0x04040e0a060a0806), C64e(0xfefe66988198e781), - C64e(0xa0a0ab0bf00b5bf0), C64e(0x7878b4cc44ccf044), - C64e(0x2525f0d5bad54aba), C64e(0x4b4b753ee33e96e3), - C64e(0xa2a2ac0ef30e5ff3), C64e(0x5d5d4419fe19bafe), - C64e(0x8080db5bc05b1bc0), C64e(0x050580858a850a8a), - C64e(0x3f3fd3ecadec7ead), C64e(0x2121fedfbcdf42bc), - C64e(0x7070a8d848d8e048), C64e(0xf1f1fd0c040cf904), - C64e(0x6363197adf7ac6df), C64e(0x77772f58c158eec1), - C64e(0xafaf309f759f4575), C64e(0x4242e7a563a58463), - C64e(0x2020705030504030), C64e(0xe5e5cb2e1a2ed11a), - C64e(0xfdfdef120e12e10e), C64e(0xbfbf08b76db7656d), - C64e(0x818155d44cd4194c), C64e(0x1818243c143c3014), - C64e(0x2626795f355f4c35), C64e(0xc3c3b2712f719d2f), - C64e(0xbebe8638e13867e1), C64e(0x3535c8fda2fd6aa2), - C64e(0x8888c74fcc4f0bcc), C64e(0x2e2e654b394b5c39), - C64e(0x93936af957f93d57), C64e(0x5555580df20daaf2), - C64e(0xfcfc619d829de382), C64e(0x7a7ab3c947c9f447), - C64e(0xc8c827efacef8bac), C64e(0xbaba8832e7326fe7), - C64e(0x32324f7d2b7d642b), C64e(0xe6e642a495a4d795), - C64e(0xc0c03bfba0fb9ba0), C64e(0x1919aab398b33298), - C64e(0x9e9ef668d16827d1), C64e(0xa3a322817f815d7f), - C64e(0x4444eeaa66aa8866), C64e(0x5454d6827e82a87e), - C64e(0x3b3bdde6abe676ab), C64e(0x0b0b959e839e1683), - C64e(0x8c8cc945ca4503ca), C64e(0xc7c7bc7b297b9529), - C64e(0x6b6b056ed36ed6d3), C64e(0x28286c443c44503c), - C64e(0xa7a72c8b798b5579), C64e(0xbcbc813de23d63e2), - C64e(0x161631271d272c1d), C64e(0xadad379a769a4176), - C64e(0xdbdb964d3b4dad3b), C64e(0x64649efa56fac856), - C64e(0x7474a6d24ed2e84e), C64e(0x141436221e22281e), - C64e(0x9292e476db763fdb), C64e(0x0c0c121e0a1e180a), - C64e(0x4848fcb46cb4906c), C64e(0xb8b88f37e4376be4), - C64e(0x9f9f78e75de7255d), C64e(0xbdbd0fb26eb2616e), - C64e(0x4343692aef2a86ef), C64e(0xc4c435f1a6f193a6), - C64e(0x3939dae3a8e372a8), C64e(0x3131c6f7a4f762a4), - C64e(0xd3d38a593759bd37), C64e(0xf2f274868b86ff8b), - C64e(0xd5d583563256b132), C64e(0x8b8b4ec543c50d43), - C64e(0x6e6e85eb59ebdc59), C64e(0xdada18c2b7c2afb7), - C64e(0x01018e8f8c8f028c), C64e(0xb1b11dac64ac7964), - C64e(0x9c9cf16dd26d23d2), C64e(0x4949723be03b92e0), - C64e(0xd8d81fc7b4c7abb4), C64e(0xacacb915fa1543fa), - C64e(0xf3f3fa090709fd07), C64e(0xcfcfa06f256f8525), - C64e(0xcaca20eaafea8faf), C64e(0xf4f47d898e89f38e), - C64e(0x47476720e9208ee9), C64e(0x1010382818282018), - C64e(0x6f6f0b64d564ded5), C64e(0xf0f073838883fb88), - C64e(0x4a4afbb16fb1946f), C64e(0x5c5cca967296b872), - C64e(0x3838546c246c7024), C64e(0x57575f08f108aef1), - C64e(0x73732152c752e6c7), C64e(0x979764f351f33551), - C64e(0xcbcbae6523658d23), C64e(0xa1a125847c84597c), - C64e(0xe8e857bf9cbfcb9c), C64e(0x3e3e5d6321637c21), - C64e(0x9696ea7cdd7c37dd), C64e(0x61611e7fdc7fc2dc), - C64e(0x0d0d9c9186911a86), C64e(0x0f0f9b9485941e85), - C64e(0xe0e04bab90abdb90), C64e(0x7c7cbac642c6f842), - C64e(0x71712657c457e2c4), C64e(0xcccc29e5aae583aa), - C64e(0x9090e373d8733bd8), C64e(0x0606090f050f0c05), - C64e(0xf7f7f4030103f501), C64e(0x1c1c2a3612363812), - C64e(0xc2c23cfea3fe9fa3), C64e(0x6a6a8be15fe1d45f), - C64e(0xaeaebe10f91047f9), C64e(0x6969026bd06bd2d0), - C64e(0x1717bfa891a82e91), C64e(0x999971e858e82958), - C64e(0x3a3a536927697427), C64e(0x2727f7d0b9d04eb9), - C64e(0xd9d991483848a938), C64e(0xebebde351335cd13), - C64e(0x2b2be5ceb3ce56b3), C64e(0x2222775533554433), - C64e(0xd2d204d6bbd6bfbb), C64e(0xa9a9399070904970), - C64e(0x0707878089800e89), C64e(0x3333c1f2a7f266a7), - C64e(0x2d2decc1b6c15ab6), C64e(0x3c3c5a6622667822), - C64e(0x1515b8ad92ad2a92), C64e(0xc9c9a96020608920), - C64e(0x87875cdb49db1549), C64e(0xaaaab01aff1a4fff), - C64e(0x5050d8887888a078), C64e(0xa5a52b8e7a8e517a), - C64e(0x0303898a8f8a068f), C64e(0x59594a13f813b2f8), - C64e(0x0909929b809b1280), C64e(0x1a1a233917393417), - C64e(0x65651075da75cada), C64e(0xd7d784533153b531), - C64e(0x8484d551c65113c6), C64e(0xd0d003d3b8d3bbb8), - C64e(0x8282dc5ec35e1fc3), C64e(0x2929e2cbb0cb52b0), - C64e(0x5a5ac3997799b477), C64e(0x1e1e2d3311333c11), - C64e(0x7b7b3d46cb46f6cb), C64e(0xa8a8b71ffc1f4bfc), - C64e(0x6d6d0c61d661dad6), C64e(0x2c2c624e3a4e583a) -}; -/* -__constant static const ulong T2_G[] = { - C64e(0xa5c6c632f4a5f497), C64e(0x84f8f86f978497eb), - C64e(0x99eeee5eb099b0c7), C64e(0x8df6f67a8c8d8cf7), - C64e(0x0dffffe8170d17e5), C64e(0xbdd6d60adcbddcb7), - C64e(0xb1dede16c8b1c8a7), C64e(0x5491916dfc54fc39), - C64e(0x50606090f050f0c0), C64e(0x0302020705030504), - C64e(0xa9cece2ee0a9e087), C64e(0x7d5656d1877d87ac), - C64e(0x19e7e7cc2b192bd5), C64e(0x62b5b513a662a671), - C64e(0xe64d4d7c31e6319a), C64e(0x9aecec59b59ab5c3), - C64e(0x458f8f40cf45cf05), C64e(0x9d1f1fa3bc9dbc3e), - C64e(0x40898949c040c009), C64e(0x87fafa68928792ef), - C64e(0x15efefd03f153fc5), C64e(0xebb2b29426eb267f), - C64e(0xc98e8ece40c94007), C64e(0x0bfbfbe61d0b1ded), - C64e(0xec41416e2fec2f82), C64e(0x67b3b31aa967a97d), - C64e(0xfd5f5f431cfd1cbe), C64e(0xea45456025ea258a), - C64e(0xbf2323f9dabfda46), C64e(0xf753535102f702a6), - C64e(0x96e4e445a196a1d3), C64e(0x5b9b9b76ed5bed2d), - C64e(0xc27575285dc25dea), C64e(0x1ce1e1c5241c24d9), - C64e(0xae3d3dd4e9aee97a), C64e(0x6a4c4cf2be6abe98), - C64e(0x5a6c6c82ee5aeed8), C64e(0x417e7ebdc341c3fc), - C64e(0x02f5f5f3060206f1), C64e(0x4f838352d14fd11d), - C64e(0x5c68688ce45ce4d0), C64e(0xf451515607f407a2), - C64e(0x34d1d18d5c345cb9), C64e(0x08f9f9e1180818e9), - C64e(0x93e2e24cae93aedf), C64e(0x73abab3e9573954d), - C64e(0x53626297f553f5c4), C64e(0x3f2a2a6b413f4154), - C64e(0x0c08081c140c1410), C64e(0x52959563f652f631), - C64e(0x654646e9af65af8c), C64e(0x5e9d9d7fe25ee221), - C64e(0x2830304878287860), C64e(0xa13737cff8a1f86e), - C64e(0x0f0a0a1b110f1114), C64e(0xb52f2febc4b5c45e), - C64e(0x090e0e151b091b1c), C64e(0x3624247e5a365a48), - C64e(0x9b1b1badb69bb636), C64e(0x3ddfdf98473d47a5), - C64e(0x26cdcda76a266a81), C64e(0x694e4ef5bb69bb9c), - C64e(0xcd7f7f334ccd4cfe), C64e(0x9feaea50ba9fbacf), - C64e(0x1b12123f2d1b2d24), C64e(0x9e1d1da4b99eb93a), - C64e(0x745858c49c749cb0), C64e(0x2e343446722e7268), - C64e(0x2d363641772d776c), C64e(0xb2dcdc11cdb2cda3), - C64e(0xeeb4b49d29ee2973), C64e(0xfb5b5b4d16fb16b6), - C64e(0xf6a4a4a501f60153), C64e(0x4d7676a1d74dd7ec), - C64e(0x61b7b714a361a375), C64e(0xce7d7d3449ce49fa), - C64e(0x7b5252df8d7b8da4), C64e(0x3edddd9f423e42a1), - C64e(0x715e5ecd937193bc), C64e(0x971313b1a297a226), - C64e(0xf5a6a6a204f50457), C64e(0x68b9b901b868b869), - C64e(0x0000000000000000), C64e(0x2cc1c1b5742c7499), - C64e(0x604040e0a060a080), C64e(0x1fe3e3c2211f21dd), - C64e(0xc879793a43c843f2), C64e(0xedb6b69a2ced2c77), - C64e(0xbed4d40dd9bed9b3), C64e(0x468d8d47ca46ca01), - C64e(0xd967671770d970ce), C64e(0x4b7272afdd4bdde4), - C64e(0xde9494ed79de7933), C64e(0xd49898ff67d4672b), - C64e(0xe8b0b09323e8237b), C64e(0x4a85855bde4ade11), - C64e(0x6bbbbb06bd6bbd6d), C64e(0x2ac5c5bb7e2a7e91), - C64e(0xe54f4f7b34e5349e), C64e(0x16ededd73a163ac1), - C64e(0xc58686d254c55417), C64e(0xd79a9af862d7622f), - C64e(0x55666699ff55ffcc), C64e(0x941111b6a794a722), - C64e(0xcf8a8ac04acf4a0f), C64e(0x10e9e9d9301030c9), - C64e(0x0604040e0a060a08), C64e(0x81fefe66988198e7), - C64e(0xf0a0a0ab0bf00b5b), C64e(0x447878b4cc44ccf0), - C64e(0xba2525f0d5bad54a), C64e(0xe34b4b753ee33e96), - C64e(0xf3a2a2ac0ef30e5f), C64e(0xfe5d5d4419fe19ba), - C64e(0xc08080db5bc05b1b), C64e(0x8a050580858a850a), - C64e(0xad3f3fd3ecadec7e), C64e(0xbc2121fedfbcdf42), - C64e(0x487070a8d848d8e0), C64e(0x04f1f1fd0c040cf9), - C64e(0xdf6363197adf7ac6), C64e(0xc177772f58c158ee), - C64e(0x75afaf309f759f45), C64e(0x634242e7a563a584), - C64e(0x3020207050305040), C64e(0x1ae5e5cb2e1a2ed1), - C64e(0x0efdfdef120e12e1), C64e(0x6dbfbf08b76db765), - C64e(0x4c818155d44cd419), C64e(0x141818243c143c30), - C64e(0x352626795f355f4c), C64e(0x2fc3c3b2712f719d), - C64e(0xe1bebe8638e13867), C64e(0xa23535c8fda2fd6a), - C64e(0xcc8888c74fcc4f0b), C64e(0x392e2e654b394b5c), - C64e(0x5793936af957f93d), C64e(0xf25555580df20daa), - C64e(0x82fcfc619d829de3), C64e(0x477a7ab3c947c9f4), - C64e(0xacc8c827efacef8b), C64e(0xe7baba8832e7326f), - C64e(0x2b32324f7d2b7d64), C64e(0x95e6e642a495a4d7), - C64e(0xa0c0c03bfba0fb9b), C64e(0x981919aab398b332), - C64e(0xd19e9ef668d16827), C64e(0x7fa3a322817f815d), - C64e(0x664444eeaa66aa88), C64e(0x7e5454d6827e82a8), - C64e(0xab3b3bdde6abe676), C64e(0x830b0b959e839e16), - C64e(0xca8c8cc945ca4503), C64e(0x29c7c7bc7b297b95), - C64e(0xd36b6b056ed36ed6), C64e(0x3c28286c443c4450), - C64e(0x79a7a72c8b798b55), C64e(0xe2bcbc813de23d63), - C64e(0x1d161631271d272c), C64e(0x76adad379a769a41), - C64e(0x3bdbdb964d3b4dad), C64e(0x5664649efa56fac8), - C64e(0x4e7474a6d24ed2e8), C64e(0x1e141436221e2228), - C64e(0xdb9292e476db763f), C64e(0x0a0c0c121e0a1e18), - C64e(0x6c4848fcb46cb490), C64e(0xe4b8b88f37e4376b), - C64e(0x5d9f9f78e75de725), C64e(0x6ebdbd0fb26eb261), - C64e(0xef4343692aef2a86), C64e(0xa6c4c435f1a6f193), - C64e(0xa83939dae3a8e372), C64e(0xa43131c6f7a4f762), - C64e(0x37d3d38a593759bd), C64e(0x8bf2f274868b86ff), - C64e(0x32d5d583563256b1), C64e(0x438b8b4ec543c50d), - C64e(0x596e6e85eb59ebdc), C64e(0xb7dada18c2b7c2af), - C64e(0x8c01018e8f8c8f02), C64e(0x64b1b11dac64ac79), - C64e(0xd29c9cf16dd26d23), C64e(0xe04949723be03b92), - C64e(0xb4d8d81fc7b4c7ab), C64e(0xfaacacb915fa1543), - C64e(0x07f3f3fa090709fd), C64e(0x25cfcfa06f256f85), - C64e(0xafcaca20eaafea8f), C64e(0x8ef4f47d898e89f3), - C64e(0xe947476720e9208e), C64e(0x1810103828182820), - C64e(0xd56f6f0b64d564de), C64e(0x88f0f073838883fb), - C64e(0x6f4a4afbb16fb194), C64e(0x725c5cca967296b8), - C64e(0x243838546c246c70), C64e(0xf157575f08f108ae), - C64e(0xc773732152c752e6), C64e(0x51979764f351f335), - C64e(0x23cbcbae6523658d), C64e(0x7ca1a125847c8459), - C64e(0x9ce8e857bf9cbfcb), C64e(0x213e3e5d6321637c), - C64e(0xdd9696ea7cdd7c37), C64e(0xdc61611e7fdc7fc2), - C64e(0x860d0d9c9186911a), C64e(0x850f0f9b9485941e), - C64e(0x90e0e04bab90abdb), C64e(0x427c7cbac642c6f8), - C64e(0xc471712657c457e2), C64e(0xaacccc29e5aae583), - C64e(0xd89090e373d8733b), C64e(0x050606090f050f0c), - C64e(0x01f7f7f4030103f5), C64e(0x121c1c2a36123638), - C64e(0xa3c2c23cfea3fe9f), C64e(0x5f6a6a8be15fe1d4), - C64e(0xf9aeaebe10f91047), C64e(0xd06969026bd06bd2), - C64e(0x911717bfa891a82e), C64e(0x58999971e858e829), - C64e(0x273a3a5369276974), C64e(0xb92727f7d0b9d04e), - C64e(0x38d9d991483848a9), C64e(0x13ebebde351335cd), - C64e(0xb32b2be5ceb3ce56), C64e(0x3322227755335544), - C64e(0xbbd2d204d6bbd6bf), C64e(0x70a9a93990709049), - C64e(0x890707878089800e), C64e(0xa73333c1f2a7f266), - C64e(0xb62d2decc1b6c15a), C64e(0x223c3c5a66226678), - C64e(0x921515b8ad92ad2a), C64e(0x20c9c9a960206089), - C64e(0x4987875cdb49db15), C64e(0xffaaaab01aff1a4f), - C64e(0x785050d8887888a0), C64e(0x7aa5a52b8e7a8e51), - C64e(0x8f0303898a8f8a06), C64e(0xf859594a13f813b2), - C64e(0x800909929b809b12), C64e(0x171a1a2339173934), - C64e(0xda65651075da75ca), C64e(0x31d7d784533153b5), - C64e(0xc68484d551c65113), C64e(0xb8d0d003d3b8d3bb), - C64e(0xc38282dc5ec35e1f), C64e(0xb02929e2cbb0cb52), - C64e(0x775a5ac3997799b4), C64e(0x111e1e2d3311333c), - C64e(0xcb7b7b3d46cb46f6), C64e(0xfca8a8b71ffc1f4b), - C64e(0xd66d6d0c61d661da), C64e(0x3a2c2c624e3a4e58) -}; - -__constant static const ulong T3_G[] = { - C64e(0x97a5c6c632f4a5f4), C64e(0xeb84f8f86f978497), - C64e(0xc799eeee5eb099b0), C64e(0xf78df6f67a8c8d8c), - C64e(0xe50dffffe8170d17), C64e(0xb7bdd6d60adcbddc), - C64e(0xa7b1dede16c8b1c8), C64e(0x395491916dfc54fc), - C64e(0xc050606090f050f0), C64e(0x0403020207050305), - C64e(0x87a9cece2ee0a9e0), C64e(0xac7d5656d1877d87), - C64e(0xd519e7e7cc2b192b), C64e(0x7162b5b513a662a6), - C64e(0x9ae64d4d7c31e631), C64e(0xc39aecec59b59ab5), - C64e(0x05458f8f40cf45cf), C64e(0x3e9d1f1fa3bc9dbc), - C64e(0x0940898949c040c0), C64e(0xef87fafa68928792), - C64e(0xc515efefd03f153f), C64e(0x7febb2b29426eb26), - C64e(0x07c98e8ece40c940), C64e(0xed0bfbfbe61d0b1d), - C64e(0x82ec41416e2fec2f), C64e(0x7d67b3b31aa967a9), - C64e(0xbefd5f5f431cfd1c), C64e(0x8aea45456025ea25), - C64e(0x46bf2323f9dabfda), C64e(0xa6f753535102f702), - C64e(0xd396e4e445a196a1), C64e(0x2d5b9b9b76ed5bed), - C64e(0xeac27575285dc25d), C64e(0xd91ce1e1c5241c24), - C64e(0x7aae3d3dd4e9aee9), C64e(0x986a4c4cf2be6abe), - C64e(0xd85a6c6c82ee5aee), C64e(0xfc417e7ebdc341c3), - C64e(0xf102f5f5f3060206), C64e(0x1d4f838352d14fd1), - C64e(0xd05c68688ce45ce4), C64e(0xa2f451515607f407), - C64e(0xb934d1d18d5c345c), C64e(0xe908f9f9e1180818), - C64e(0xdf93e2e24cae93ae), C64e(0x4d73abab3e957395), - C64e(0xc453626297f553f5), C64e(0x543f2a2a6b413f41), - C64e(0x100c08081c140c14), C64e(0x3152959563f652f6), - C64e(0x8c654646e9af65af), C64e(0x215e9d9d7fe25ee2), - C64e(0x6028303048782878), C64e(0x6ea13737cff8a1f8), - C64e(0x140f0a0a1b110f11), C64e(0x5eb52f2febc4b5c4), - C64e(0x1c090e0e151b091b), C64e(0x483624247e5a365a), - C64e(0x369b1b1badb69bb6), C64e(0xa53ddfdf98473d47), - C64e(0x8126cdcda76a266a), C64e(0x9c694e4ef5bb69bb), - C64e(0xfecd7f7f334ccd4c), C64e(0xcf9feaea50ba9fba), - C64e(0x241b12123f2d1b2d), C64e(0x3a9e1d1da4b99eb9), - C64e(0xb0745858c49c749c), C64e(0x682e343446722e72), - C64e(0x6c2d363641772d77), C64e(0xa3b2dcdc11cdb2cd), - C64e(0x73eeb4b49d29ee29), C64e(0xb6fb5b5b4d16fb16), - C64e(0x53f6a4a4a501f601), C64e(0xec4d7676a1d74dd7), - C64e(0x7561b7b714a361a3), C64e(0xface7d7d3449ce49), - C64e(0xa47b5252df8d7b8d), C64e(0xa13edddd9f423e42), - C64e(0xbc715e5ecd937193), C64e(0x26971313b1a297a2), - C64e(0x57f5a6a6a204f504), C64e(0x6968b9b901b868b8), - C64e(0x0000000000000000), C64e(0x992cc1c1b5742c74), - C64e(0x80604040e0a060a0), C64e(0xdd1fe3e3c2211f21), - C64e(0xf2c879793a43c843), C64e(0x77edb6b69a2ced2c), - C64e(0xb3bed4d40dd9bed9), C64e(0x01468d8d47ca46ca), - C64e(0xced967671770d970), C64e(0xe44b7272afdd4bdd), - C64e(0x33de9494ed79de79), C64e(0x2bd49898ff67d467), - C64e(0x7be8b0b09323e823), C64e(0x114a85855bde4ade), - C64e(0x6d6bbbbb06bd6bbd), C64e(0x912ac5c5bb7e2a7e), - C64e(0x9ee54f4f7b34e534), C64e(0xc116ededd73a163a), - C64e(0x17c58686d254c554), C64e(0x2fd79a9af862d762), - C64e(0xcc55666699ff55ff), C64e(0x22941111b6a794a7), - C64e(0x0fcf8a8ac04acf4a), C64e(0xc910e9e9d9301030), - C64e(0x080604040e0a060a), C64e(0xe781fefe66988198), - C64e(0x5bf0a0a0ab0bf00b), C64e(0xf0447878b4cc44cc), - C64e(0x4aba2525f0d5bad5), C64e(0x96e34b4b753ee33e), - C64e(0x5ff3a2a2ac0ef30e), C64e(0xbafe5d5d4419fe19), - C64e(0x1bc08080db5bc05b), C64e(0x0a8a050580858a85), - C64e(0x7ead3f3fd3ecadec), C64e(0x42bc2121fedfbcdf), - C64e(0xe0487070a8d848d8), C64e(0xf904f1f1fd0c040c), - C64e(0xc6df6363197adf7a), C64e(0xeec177772f58c158), - C64e(0x4575afaf309f759f), C64e(0x84634242e7a563a5), - C64e(0x4030202070503050), C64e(0xd11ae5e5cb2e1a2e), - C64e(0xe10efdfdef120e12), C64e(0x656dbfbf08b76db7), - C64e(0x194c818155d44cd4), C64e(0x30141818243c143c), - C64e(0x4c352626795f355f), C64e(0x9d2fc3c3b2712f71), - C64e(0x67e1bebe8638e138), C64e(0x6aa23535c8fda2fd), - C64e(0x0bcc8888c74fcc4f), C64e(0x5c392e2e654b394b), - C64e(0x3d5793936af957f9), C64e(0xaaf25555580df20d), - C64e(0xe382fcfc619d829d), C64e(0xf4477a7ab3c947c9), - C64e(0x8bacc8c827efacef), C64e(0x6fe7baba8832e732), - C64e(0x642b32324f7d2b7d), C64e(0xd795e6e642a495a4), - C64e(0x9ba0c0c03bfba0fb), C64e(0x32981919aab398b3), - C64e(0x27d19e9ef668d168), C64e(0x5d7fa3a322817f81), - C64e(0x88664444eeaa66aa), C64e(0xa87e5454d6827e82), - C64e(0x76ab3b3bdde6abe6), C64e(0x16830b0b959e839e), - C64e(0x03ca8c8cc945ca45), C64e(0x9529c7c7bc7b297b), - C64e(0xd6d36b6b056ed36e), C64e(0x503c28286c443c44), - C64e(0x5579a7a72c8b798b), C64e(0x63e2bcbc813de23d), - C64e(0x2c1d161631271d27), C64e(0x4176adad379a769a), - C64e(0xad3bdbdb964d3b4d), C64e(0xc85664649efa56fa), - C64e(0xe84e7474a6d24ed2), C64e(0x281e141436221e22), - C64e(0x3fdb9292e476db76), C64e(0x180a0c0c121e0a1e), - C64e(0x906c4848fcb46cb4), C64e(0x6be4b8b88f37e437), - C64e(0x255d9f9f78e75de7), C64e(0x616ebdbd0fb26eb2), - C64e(0x86ef4343692aef2a), C64e(0x93a6c4c435f1a6f1), - C64e(0x72a83939dae3a8e3), C64e(0x62a43131c6f7a4f7), - C64e(0xbd37d3d38a593759), C64e(0xff8bf2f274868b86), - C64e(0xb132d5d583563256), C64e(0x0d438b8b4ec543c5), - C64e(0xdc596e6e85eb59eb), C64e(0xafb7dada18c2b7c2), - C64e(0x028c01018e8f8c8f), C64e(0x7964b1b11dac64ac), - C64e(0x23d29c9cf16dd26d), C64e(0x92e04949723be03b), - C64e(0xabb4d8d81fc7b4c7), C64e(0x43faacacb915fa15), - C64e(0xfd07f3f3fa090709), C64e(0x8525cfcfa06f256f), - C64e(0x8fafcaca20eaafea), C64e(0xf38ef4f47d898e89), - C64e(0x8ee947476720e920), C64e(0x2018101038281828), - C64e(0xded56f6f0b64d564), C64e(0xfb88f0f073838883), - C64e(0x946f4a4afbb16fb1), C64e(0xb8725c5cca967296), - C64e(0x70243838546c246c), C64e(0xaef157575f08f108), - C64e(0xe6c773732152c752), C64e(0x3551979764f351f3), - C64e(0x8d23cbcbae652365), C64e(0x597ca1a125847c84), - C64e(0xcb9ce8e857bf9cbf), C64e(0x7c213e3e5d632163), - C64e(0x37dd9696ea7cdd7c), C64e(0xc2dc61611e7fdc7f), - C64e(0x1a860d0d9c918691), C64e(0x1e850f0f9b948594), - C64e(0xdb90e0e04bab90ab), C64e(0xf8427c7cbac642c6), - C64e(0xe2c471712657c457), C64e(0x83aacccc29e5aae5), - C64e(0x3bd89090e373d873), C64e(0x0c050606090f050f), - C64e(0xf501f7f7f4030103), C64e(0x38121c1c2a361236), - C64e(0x9fa3c2c23cfea3fe), C64e(0xd45f6a6a8be15fe1), - C64e(0x47f9aeaebe10f910), C64e(0xd2d06969026bd06b), - C64e(0x2e911717bfa891a8), C64e(0x2958999971e858e8), - C64e(0x74273a3a53692769), C64e(0x4eb92727f7d0b9d0), - C64e(0xa938d9d991483848), C64e(0xcd13ebebde351335), - C64e(0x56b32b2be5ceb3ce), C64e(0x4433222277553355), - C64e(0xbfbbd2d204d6bbd6), C64e(0x4970a9a939907090), - C64e(0x0e89070787808980), C64e(0x66a73333c1f2a7f2), - C64e(0x5ab62d2decc1b6c1), C64e(0x78223c3c5a662266), - C64e(0x2a921515b8ad92ad), C64e(0x8920c9c9a9602060), - C64e(0x154987875cdb49db), C64e(0x4fffaaaab01aff1a), - C64e(0xa0785050d8887888), C64e(0x517aa5a52b8e7a8e), - C64e(0x068f0303898a8f8a), C64e(0xb2f859594a13f813), - C64e(0x12800909929b809b), C64e(0x34171a1a23391739), - C64e(0xcada65651075da75), C64e(0xb531d7d784533153), - C64e(0x13c68484d551c651), C64e(0xbbb8d0d003d3b8d3), - C64e(0x1fc38282dc5ec35e), C64e(0x52b02929e2cbb0cb), - C64e(0xb4775a5ac3997799), C64e(0x3c111e1e2d331133), - C64e(0xf6cb7b7b3d46cb46), C64e(0x4bfca8a8b71ffc1f), - C64e(0xdad66d6d0c61d661), C64e(0x583a2c2c624e3a4e) -}; - -__constant static const ulong T4_G[] = { - C64e(0xf497a5c6c632f4a5), C64e(0x97eb84f8f86f9784), - C64e(0xb0c799eeee5eb099), C64e(0x8cf78df6f67a8c8d), - C64e(0x17e50dffffe8170d), C64e(0xdcb7bdd6d60adcbd), - C64e(0xc8a7b1dede16c8b1), C64e(0xfc395491916dfc54), - C64e(0xf0c050606090f050), C64e(0x0504030202070503), - C64e(0xe087a9cece2ee0a9), C64e(0x87ac7d5656d1877d), - C64e(0x2bd519e7e7cc2b19), C64e(0xa67162b5b513a662), - C64e(0x319ae64d4d7c31e6), C64e(0xb5c39aecec59b59a), - C64e(0xcf05458f8f40cf45), C64e(0xbc3e9d1f1fa3bc9d), - C64e(0xc00940898949c040), C64e(0x92ef87fafa689287), - C64e(0x3fc515efefd03f15), C64e(0x267febb2b29426eb), - C64e(0x4007c98e8ece40c9), C64e(0x1ded0bfbfbe61d0b), - C64e(0x2f82ec41416e2fec), C64e(0xa97d67b3b31aa967), - C64e(0x1cbefd5f5f431cfd), C64e(0x258aea45456025ea), - C64e(0xda46bf2323f9dabf), C64e(0x02a6f753535102f7), - C64e(0xa1d396e4e445a196), C64e(0xed2d5b9b9b76ed5b), - C64e(0x5deac27575285dc2), C64e(0x24d91ce1e1c5241c), - C64e(0xe97aae3d3dd4e9ae), C64e(0xbe986a4c4cf2be6a), - C64e(0xeed85a6c6c82ee5a), C64e(0xc3fc417e7ebdc341), - C64e(0x06f102f5f5f30602), C64e(0xd11d4f838352d14f), - C64e(0xe4d05c68688ce45c), C64e(0x07a2f451515607f4), - C64e(0x5cb934d1d18d5c34), C64e(0x18e908f9f9e11808), - C64e(0xaedf93e2e24cae93), C64e(0x954d73abab3e9573), - C64e(0xf5c453626297f553), C64e(0x41543f2a2a6b413f), - C64e(0x14100c08081c140c), C64e(0xf63152959563f652), - C64e(0xaf8c654646e9af65), C64e(0xe2215e9d9d7fe25e), - C64e(0x7860283030487828), C64e(0xf86ea13737cff8a1), - C64e(0x11140f0a0a1b110f), C64e(0xc45eb52f2febc4b5), - C64e(0x1b1c090e0e151b09), C64e(0x5a483624247e5a36), - C64e(0xb6369b1b1badb69b), C64e(0x47a53ddfdf98473d), - C64e(0x6a8126cdcda76a26), C64e(0xbb9c694e4ef5bb69), - C64e(0x4cfecd7f7f334ccd), C64e(0xbacf9feaea50ba9f), - C64e(0x2d241b12123f2d1b), C64e(0xb93a9e1d1da4b99e), - C64e(0x9cb0745858c49c74), C64e(0x72682e343446722e), - C64e(0x776c2d363641772d), C64e(0xcda3b2dcdc11cdb2), - C64e(0x2973eeb4b49d29ee), C64e(0x16b6fb5b5b4d16fb), - C64e(0x0153f6a4a4a501f6), C64e(0xd7ec4d7676a1d74d), - C64e(0xa37561b7b714a361), C64e(0x49face7d7d3449ce), - C64e(0x8da47b5252df8d7b), C64e(0x42a13edddd9f423e), - C64e(0x93bc715e5ecd9371), C64e(0xa226971313b1a297), - C64e(0x0457f5a6a6a204f5), C64e(0xb86968b9b901b868), - C64e(0x0000000000000000), C64e(0x74992cc1c1b5742c), - C64e(0xa080604040e0a060), C64e(0x21dd1fe3e3c2211f), - C64e(0x43f2c879793a43c8), C64e(0x2c77edb6b69a2ced), - C64e(0xd9b3bed4d40dd9be), C64e(0xca01468d8d47ca46), - C64e(0x70ced967671770d9), C64e(0xdde44b7272afdd4b), - C64e(0x7933de9494ed79de), C64e(0x672bd49898ff67d4), - C64e(0x237be8b0b09323e8), C64e(0xde114a85855bde4a), - C64e(0xbd6d6bbbbb06bd6b), C64e(0x7e912ac5c5bb7e2a), - C64e(0x349ee54f4f7b34e5), C64e(0x3ac116ededd73a16), - C64e(0x5417c58686d254c5), C64e(0x622fd79a9af862d7), - C64e(0xffcc55666699ff55), C64e(0xa722941111b6a794), - C64e(0x4a0fcf8a8ac04acf), C64e(0x30c910e9e9d93010), - C64e(0x0a080604040e0a06), C64e(0x98e781fefe669881), - C64e(0x0b5bf0a0a0ab0bf0), C64e(0xccf0447878b4cc44), - C64e(0xd54aba2525f0d5ba), C64e(0x3e96e34b4b753ee3), - C64e(0x0e5ff3a2a2ac0ef3), C64e(0x19bafe5d5d4419fe), - C64e(0x5b1bc08080db5bc0), C64e(0x850a8a050580858a), - C64e(0xec7ead3f3fd3ecad), C64e(0xdf42bc2121fedfbc), - C64e(0xd8e0487070a8d848), C64e(0x0cf904f1f1fd0c04), - C64e(0x7ac6df6363197adf), C64e(0x58eec177772f58c1), - C64e(0x9f4575afaf309f75), C64e(0xa584634242e7a563), - C64e(0x5040302020705030), C64e(0x2ed11ae5e5cb2e1a), - C64e(0x12e10efdfdef120e), C64e(0xb7656dbfbf08b76d), - C64e(0xd4194c818155d44c), C64e(0x3c30141818243c14), - C64e(0x5f4c352626795f35), C64e(0x719d2fc3c3b2712f), - C64e(0x3867e1bebe8638e1), C64e(0xfd6aa23535c8fda2), - C64e(0x4f0bcc8888c74fcc), C64e(0x4b5c392e2e654b39), - C64e(0xf93d5793936af957), C64e(0x0daaf25555580df2), - C64e(0x9de382fcfc619d82), C64e(0xc9f4477a7ab3c947), - C64e(0xef8bacc8c827efac), C64e(0x326fe7baba8832e7), - C64e(0x7d642b32324f7d2b), C64e(0xa4d795e6e642a495), - C64e(0xfb9ba0c0c03bfba0), C64e(0xb332981919aab398), - C64e(0x6827d19e9ef668d1), C64e(0x815d7fa3a322817f), - C64e(0xaa88664444eeaa66), C64e(0x82a87e5454d6827e), - C64e(0xe676ab3b3bdde6ab), C64e(0x9e16830b0b959e83), - C64e(0x4503ca8c8cc945ca), C64e(0x7b9529c7c7bc7b29), - C64e(0x6ed6d36b6b056ed3), C64e(0x44503c28286c443c), - C64e(0x8b5579a7a72c8b79), C64e(0x3d63e2bcbc813de2), - C64e(0x272c1d161631271d), C64e(0x9a4176adad379a76), - C64e(0x4dad3bdbdb964d3b), C64e(0xfac85664649efa56), - C64e(0xd2e84e7474a6d24e), C64e(0x22281e141436221e), - C64e(0x763fdb9292e476db), C64e(0x1e180a0c0c121e0a), - C64e(0xb4906c4848fcb46c), C64e(0x376be4b8b88f37e4), - C64e(0xe7255d9f9f78e75d), C64e(0xb2616ebdbd0fb26e), - C64e(0x2a86ef4343692aef), C64e(0xf193a6c4c435f1a6), - C64e(0xe372a83939dae3a8), C64e(0xf762a43131c6f7a4), - C64e(0x59bd37d3d38a5937), C64e(0x86ff8bf2f274868b), - C64e(0x56b132d5d5835632), C64e(0xc50d438b8b4ec543), - C64e(0xebdc596e6e85eb59), C64e(0xc2afb7dada18c2b7), - C64e(0x8f028c01018e8f8c), C64e(0xac7964b1b11dac64), - C64e(0x6d23d29c9cf16dd2), C64e(0x3b92e04949723be0), - C64e(0xc7abb4d8d81fc7b4), C64e(0x1543faacacb915fa), - C64e(0x09fd07f3f3fa0907), C64e(0x6f8525cfcfa06f25), - C64e(0xea8fafcaca20eaaf), C64e(0x89f38ef4f47d898e), - C64e(0x208ee947476720e9), C64e(0x2820181010382818), - C64e(0x64ded56f6f0b64d5), C64e(0x83fb88f0f0738388), - C64e(0xb1946f4a4afbb16f), C64e(0x96b8725c5cca9672), - C64e(0x6c70243838546c24), C64e(0x08aef157575f08f1), - C64e(0x52e6c773732152c7), C64e(0xf33551979764f351), - C64e(0x658d23cbcbae6523), C64e(0x84597ca1a125847c), - C64e(0xbfcb9ce8e857bf9c), C64e(0x637c213e3e5d6321), - C64e(0x7c37dd9696ea7cdd), C64e(0x7fc2dc61611e7fdc), - C64e(0x911a860d0d9c9186), C64e(0x941e850f0f9b9485), - C64e(0xabdb90e0e04bab90), C64e(0xc6f8427c7cbac642), - C64e(0x57e2c471712657c4), C64e(0xe583aacccc29e5aa), - C64e(0x733bd89090e373d8), C64e(0x0f0c050606090f05), - C64e(0x03f501f7f7f40301), C64e(0x3638121c1c2a3612), - C64e(0xfe9fa3c2c23cfea3), C64e(0xe1d45f6a6a8be15f), - C64e(0x1047f9aeaebe10f9), C64e(0x6bd2d06969026bd0), - C64e(0xa82e911717bfa891), C64e(0xe82958999971e858), - C64e(0x6974273a3a536927), C64e(0xd04eb92727f7d0b9), - C64e(0x48a938d9d9914838), C64e(0x35cd13ebebde3513), - C64e(0xce56b32b2be5ceb3), C64e(0x5544332222775533), - C64e(0xd6bfbbd2d204d6bb), C64e(0x904970a9a9399070), - C64e(0x800e890707878089), C64e(0xf266a73333c1f2a7), - C64e(0xc15ab62d2decc1b6), C64e(0x6678223c3c5a6622), - C64e(0xad2a921515b8ad92), C64e(0x608920c9c9a96020), - C64e(0xdb154987875cdb49), C64e(0x1a4fffaaaab01aff), - C64e(0x88a0785050d88878), C64e(0x8e517aa5a52b8e7a), - C64e(0x8a068f0303898a8f), C64e(0x13b2f859594a13f8), - C64e(0x9b12800909929b80), C64e(0x3934171a1a233917), - C64e(0x75cada65651075da), C64e(0x53b531d7d7845331), - C64e(0x5113c68484d551c6), C64e(0xd3bbb8d0d003d3b8), - C64e(0x5e1fc38282dc5ec3), C64e(0xcb52b02929e2cbb0), - C64e(0x99b4775a5ac39977), C64e(0x333c111e1e2d3311), - C64e(0x46f6cb7b7b3d46cb), C64e(0x1f4bfca8a8b71ffc), - C64e(0x61dad66d6d0c61d6), C64e(0x4e583a2c2c624e3a) -}; - -__constant static const ulong T5_G[] = { - C64e(0xa5f497a5c6c632f4), C64e(0x8497eb84f8f86f97), - C64e(0x99b0c799eeee5eb0), C64e(0x8d8cf78df6f67a8c), - C64e(0x0d17e50dffffe817), C64e(0xbddcb7bdd6d60adc), - C64e(0xb1c8a7b1dede16c8), C64e(0x54fc395491916dfc), - C64e(0x50f0c050606090f0), C64e(0x0305040302020705), - C64e(0xa9e087a9cece2ee0), C64e(0x7d87ac7d5656d187), - C64e(0x192bd519e7e7cc2b), C64e(0x62a67162b5b513a6), - C64e(0xe6319ae64d4d7c31), C64e(0x9ab5c39aecec59b5), - C64e(0x45cf05458f8f40cf), C64e(0x9dbc3e9d1f1fa3bc), - C64e(0x40c00940898949c0), C64e(0x8792ef87fafa6892), - C64e(0x153fc515efefd03f), C64e(0xeb267febb2b29426), - C64e(0xc94007c98e8ece40), C64e(0x0b1ded0bfbfbe61d), - C64e(0xec2f82ec41416e2f), C64e(0x67a97d67b3b31aa9), - C64e(0xfd1cbefd5f5f431c), C64e(0xea258aea45456025), - C64e(0xbfda46bf2323f9da), C64e(0xf702a6f753535102), - C64e(0x96a1d396e4e445a1), C64e(0x5bed2d5b9b9b76ed), - C64e(0xc25deac27575285d), C64e(0x1c24d91ce1e1c524), - C64e(0xaee97aae3d3dd4e9), C64e(0x6abe986a4c4cf2be), - C64e(0x5aeed85a6c6c82ee), C64e(0x41c3fc417e7ebdc3), - C64e(0x0206f102f5f5f306), C64e(0x4fd11d4f838352d1), - C64e(0x5ce4d05c68688ce4), C64e(0xf407a2f451515607), - C64e(0x345cb934d1d18d5c), C64e(0x0818e908f9f9e118), - C64e(0x93aedf93e2e24cae), C64e(0x73954d73abab3e95), - C64e(0x53f5c453626297f5), C64e(0x3f41543f2a2a6b41), - C64e(0x0c14100c08081c14), C64e(0x52f63152959563f6), - C64e(0x65af8c654646e9af), C64e(0x5ee2215e9d9d7fe2), - C64e(0x2878602830304878), C64e(0xa1f86ea13737cff8), - C64e(0x0f11140f0a0a1b11), C64e(0xb5c45eb52f2febc4), - C64e(0x091b1c090e0e151b), C64e(0x365a483624247e5a), - C64e(0x9bb6369b1b1badb6), C64e(0x3d47a53ddfdf9847), - C64e(0x266a8126cdcda76a), C64e(0x69bb9c694e4ef5bb), - C64e(0xcd4cfecd7f7f334c), C64e(0x9fbacf9feaea50ba), - C64e(0x1b2d241b12123f2d), C64e(0x9eb93a9e1d1da4b9), - C64e(0x749cb0745858c49c), C64e(0x2e72682e34344672), - C64e(0x2d776c2d36364177), C64e(0xb2cda3b2dcdc11cd), - C64e(0xee2973eeb4b49d29), C64e(0xfb16b6fb5b5b4d16), - C64e(0xf60153f6a4a4a501), C64e(0x4dd7ec4d7676a1d7), - C64e(0x61a37561b7b714a3), C64e(0xce49face7d7d3449), - C64e(0x7b8da47b5252df8d), C64e(0x3e42a13edddd9f42), - C64e(0x7193bc715e5ecd93), C64e(0x97a226971313b1a2), - C64e(0xf50457f5a6a6a204), C64e(0x68b86968b9b901b8), - C64e(0x0000000000000000), C64e(0x2c74992cc1c1b574), - C64e(0x60a080604040e0a0), C64e(0x1f21dd1fe3e3c221), - C64e(0xc843f2c879793a43), C64e(0xed2c77edb6b69a2c), - C64e(0xbed9b3bed4d40dd9), C64e(0x46ca01468d8d47ca), - C64e(0xd970ced967671770), C64e(0x4bdde44b7272afdd), - C64e(0xde7933de9494ed79), C64e(0xd4672bd49898ff67), - C64e(0xe8237be8b0b09323), C64e(0x4ade114a85855bde), - C64e(0x6bbd6d6bbbbb06bd), C64e(0x2a7e912ac5c5bb7e), - C64e(0xe5349ee54f4f7b34), C64e(0x163ac116ededd73a), - C64e(0xc55417c58686d254), C64e(0xd7622fd79a9af862), - C64e(0x55ffcc55666699ff), C64e(0x94a722941111b6a7), - C64e(0xcf4a0fcf8a8ac04a), C64e(0x1030c910e9e9d930), - C64e(0x060a080604040e0a), C64e(0x8198e781fefe6698), - C64e(0xf00b5bf0a0a0ab0b), C64e(0x44ccf0447878b4cc), - C64e(0xbad54aba2525f0d5), C64e(0xe33e96e34b4b753e), - C64e(0xf30e5ff3a2a2ac0e), C64e(0xfe19bafe5d5d4419), - C64e(0xc05b1bc08080db5b), C64e(0x8a850a8a05058085), - C64e(0xadec7ead3f3fd3ec), C64e(0xbcdf42bc2121fedf), - C64e(0x48d8e0487070a8d8), C64e(0x040cf904f1f1fd0c), - C64e(0xdf7ac6df6363197a), C64e(0xc158eec177772f58), - C64e(0x759f4575afaf309f), C64e(0x63a584634242e7a5), - C64e(0x3050403020207050), C64e(0x1a2ed11ae5e5cb2e), - C64e(0x0e12e10efdfdef12), C64e(0x6db7656dbfbf08b7), - C64e(0x4cd4194c818155d4), C64e(0x143c30141818243c), - C64e(0x355f4c352626795f), C64e(0x2f719d2fc3c3b271), - C64e(0xe13867e1bebe8638), C64e(0xa2fd6aa23535c8fd), - C64e(0xcc4f0bcc8888c74f), C64e(0x394b5c392e2e654b), - C64e(0x57f93d5793936af9), C64e(0xf20daaf25555580d), - C64e(0x829de382fcfc619d), C64e(0x47c9f4477a7ab3c9), - C64e(0xacef8bacc8c827ef), C64e(0xe7326fe7baba8832), - C64e(0x2b7d642b32324f7d), C64e(0x95a4d795e6e642a4), - C64e(0xa0fb9ba0c0c03bfb), C64e(0x98b332981919aab3), - C64e(0xd16827d19e9ef668), C64e(0x7f815d7fa3a32281), - C64e(0x66aa88664444eeaa), C64e(0x7e82a87e5454d682), - C64e(0xabe676ab3b3bdde6), C64e(0x839e16830b0b959e), - C64e(0xca4503ca8c8cc945), C64e(0x297b9529c7c7bc7b), - C64e(0xd36ed6d36b6b056e), C64e(0x3c44503c28286c44), - C64e(0x798b5579a7a72c8b), C64e(0xe23d63e2bcbc813d), - C64e(0x1d272c1d16163127), C64e(0x769a4176adad379a), - C64e(0x3b4dad3bdbdb964d), C64e(0x56fac85664649efa), - C64e(0x4ed2e84e7474a6d2), C64e(0x1e22281e14143622), - C64e(0xdb763fdb9292e476), C64e(0x0a1e180a0c0c121e), - C64e(0x6cb4906c4848fcb4), C64e(0xe4376be4b8b88f37), - C64e(0x5de7255d9f9f78e7), C64e(0x6eb2616ebdbd0fb2), - C64e(0xef2a86ef4343692a), C64e(0xa6f193a6c4c435f1), - C64e(0xa8e372a83939dae3), C64e(0xa4f762a43131c6f7), - C64e(0x3759bd37d3d38a59), C64e(0x8b86ff8bf2f27486), - C64e(0x3256b132d5d58356), C64e(0x43c50d438b8b4ec5), - C64e(0x59ebdc596e6e85eb), C64e(0xb7c2afb7dada18c2), - C64e(0x8c8f028c01018e8f), C64e(0x64ac7964b1b11dac), - C64e(0xd26d23d29c9cf16d), C64e(0xe03b92e04949723b), - C64e(0xb4c7abb4d8d81fc7), C64e(0xfa1543faacacb915), - C64e(0x0709fd07f3f3fa09), C64e(0x256f8525cfcfa06f), - C64e(0xafea8fafcaca20ea), C64e(0x8e89f38ef4f47d89), - C64e(0xe9208ee947476720), C64e(0x1828201810103828), - C64e(0xd564ded56f6f0b64), C64e(0x8883fb88f0f07383), - C64e(0x6fb1946f4a4afbb1), C64e(0x7296b8725c5cca96), - C64e(0x246c70243838546c), C64e(0xf108aef157575f08), - C64e(0xc752e6c773732152), C64e(0x51f33551979764f3), - C64e(0x23658d23cbcbae65), C64e(0x7c84597ca1a12584), - C64e(0x9cbfcb9ce8e857bf), C64e(0x21637c213e3e5d63), - C64e(0xdd7c37dd9696ea7c), C64e(0xdc7fc2dc61611e7f), - C64e(0x86911a860d0d9c91), C64e(0x85941e850f0f9b94), - C64e(0x90abdb90e0e04bab), C64e(0x42c6f8427c7cbac6), - C64e(0xc457e2c471712657), C64e(0xaae583aacccc29e5), - C64e(0xd8733bd89090e373), C64e(0x050f0c050606090f), - C64e(0x0103f501f7f7f403), C64e(0x123638121c1c2a36), - C64e(0xa3fe9fa3c2c23cfe), C64e(0x5fe1d45f6a6a8be1), - C64e(0xf91047f9aeaebe10), C64e(0xd06bd2d06969026b), - C64e(0x91a82e911717bfa8), C64e(0x58e82958999971e8), - C64e(0x276974273a3a5369), C64e(0xb9d04eb92727f7d0), - C64e(0x3848a938d9d99148), C64e(0x1335cd13ebebde35), - C64e(0xb3ce56b32b2be5ce), C64e(0x3355443322227755), - C64e(0xbbd6bfbbd2d204d6), C64e(0x70904970a9a93990), - C64e(0x89800e8907078780), C64e(0xa7f266a73333c1f2), - C64e(0xb6c15ab62d2decc1), C64e(0x226678223c3c5a66), - C64e(0x92ad2a921515b8ad), C64e(0x20608920c9c9a960), - C64e(0x49db154987875cdb), C64e(0xff1a4fffaaaab01a), - C64e(0x7888a0785050d888), C64e(0x7a8e517aa5a52b8e), - C64e(0x8f8a068f0303898a), C64e(0xf813b2f859594a13), - C64e(0x809b12800909929b), C64e(0x173934171a1a2339), - C64e(0xda75cada65651075), C64e(0x3153b531d7d78453), - C64e(0xc65113c68484d551), C64e(0xb8d3bbb8d0d003d3), - C64e(0xc35e1fc38282dc5e), C64e(0xb0cb52b02929e2cb), - C64e(0x7799b4775a5ac399), C64e(0x11333c111e1e2d33), - C64e(0xcb46f6cb7b7b3d46), C64e(0xfc1f4bfca8a8b71f), - C64e(0xd661dad66d6d0c61), C64e(0x3a4e583a2c2c624e) -}; - -__constant static const ulong T6_G[] = { - C64e(0xf4a5f497a5c6c632), C64e(0x978497eb84f8f86f), - C64e(0xb099b0c799eeee5e), C64e(0x8c8d8cf78df6f67a), - C64e(0x170d17e50dffffe8), C64e(0xdcbddcb7bdd6d60a), - C64e(0xc8b1c8a7b1dede16), C64e(0xfc54fc395491916d), - C64e(0xf050f0c050606090), C64e(0x0503050403020207), - C64e(0xe0a9e087a9cece2e), C64e(0x877d87ac7d5656d1), - C64e(0x2b192bd519e7e7cc), C64e(0xa662a67162b5b513), - C64e(0x31e6319ae64d4d7c), C64e(0xb59ab5c39aecec59), - C64e(0xcf45cf05458f8f40), C64e(0xbc9dbc3e9d1f1fa3), - C64e(0xc040c00940898949), C64e(0x928792ef87fafa68), - C64e(0x3f153fc515efefd0), C64e(0x26eb267febb2b294), - C64e(0x40c94007c98e8ece), C64e(0x1d0b1ded0bfbfbe6), - C64e(0x2fec2f82ec41416e), C64e(0xa967a97d67b3b31a), - C64e(0x1cfd1cbefd5f5f43), C64e(0x25ea258aea454560), - C64e(0xdabfda46bf2323f9), C64e(0x02f702a6f7535351), - C64e(0xa196a1d396e4e445), C64e(0xed5bed2d5b9b9b76), - C64e(0x5dc25deac2757528), C64e(0x241c24d91ce1e1c5), - C64e(0xe9aee97aae3d3dd4), C64e(0xbe6abe986a4c4cf2), - C64e(0xee5aeed85a6c6c82), C64e(0xc341c3fc417e7ebd), - C64e(0x060206f102f5f5f3), C64e(0xd14fd11d4f838352), - C64e(0xe45ce4d05c68688c), C64e(0x07f407a2f4515156), - C64e(0x5c345cb934d1d18d), C64e(0x180818e908f9f9e1), - C64e(0xae93aedf93e2e24c), C64e(0x9573954d73abab3e), - C64e(0xf553f5c453626297), C64e(0x413f41543f2a2a6b), - C64e(0x140c14100c08081c), C64e(0xf652f63152959563), - C64e(0xaf65af8c654646e9), C64e(0xe25ee2215e9d9d7f), - C64e(0x7828786028303048), C64e(0xf8a1f86ea13737cf), - C64e(0x110f11140f0a0a1b), C64e(0xc4b5c45eb52f2feb), - C64e(0x1b091b1c090e0e15), C64e(0x5a365a483624247e), - C64e(0xb69bb6369b1b1bad), C64e(0x473d47a53ddfdf98), - C64e(0x6a266a8126cdcda7), C64e(0xbb69bb9c694e4ef5), - C64e(0x4ccd4cfecd7f7f33), C64e(0xba9fbacf9feaea50), - C64e(0x2d1b2d241b12123f), C64e(0xb99eb93a9e1d1da4), - C64e(0x9c749cb0745858c4), C64e(0x722e72682e343446), - C64e(0x772d776c2d363641), C64e(0xcdb2cda3b2dcdc11), - C64e(0x29ee2973eeb4b49d), C64e(0x16fb16b6fb5b5b4d), - C64e(0x01f60153f6a4a4a5), C64e(0xd74dd7ec4d7676a1), - C64e(0xa361a37561b7b714), C64e(0x49ce49face7d7d34), - C64e(0x8d7b8da47b5252df), C64e(0x423e42a13edddd9f), - C64e(0x937193bc715e5ecd), C64e(0xa297a226971313b1), - C64e(0x04f50457f5a6a6a2), C64e(0xb868b86968b9b901), - C64e(0x0000000000000000), C64e(0x742c74992cc1c1b5), - C64e(0xa060a080604040e0), C64e(0x211f21dd1fe3e3c2), - C64e(0x43c843f2c879793a), C64e(0x2ced2c77edb6b69a), - C64e(0xd9bed9b3bed4d40d), C64e(0xca46ca01468d8d47), - C64e(0x70d970ced9676717), C64e(0xdd4bdde44b7272af), - C64e(0x79de7933de9494ed), C64e(0x67d4672bd49898ff), - C64e(0x23e8237be8b0b093), C64e(0xde4ade114a85855b), - C64e(0xbd6bbd6d6bbbbb06), C64e(0x7e2a7e912ac5c5bb), - C64e(0x34e5349ee54f4f7b), C64e(0x3a163ac116ededd7), - C64e(0x54c55417c58686d2), C64e(0x62d7622fd79a9af8), - C64e(0xff55ffcc55666699), C64e(0xa794a722941111b6), - C64e(0x4acf4a0fcf8a8ac0), C64e(0x301030c910e9e9d9), - C64e(0x0a060a080604040e), C64e(0x988198e781fefe66), - C64e(0x0bf00b5bf0a0a0ab), C64e(0xcc44ccf0447878b4), - C64e(0xd5bad54aba2525f0), C64e(0x3ee33e96e34b4b75), - C64e(0x0ef30e5ff3a2a2ac), C64e(0x19fe19bafe5d5d44), - C64e(0x5bc05b1bc08080db), C64e(0x858a850a8a050580), - C64e(0xecadec7ead3f3fd3), C64e(0xdfbcdf42bc2121fe), - C64e(0xd848d8e0487070a8), C64e(0x0c040cf904f1f1fd), - C64e(0x7adf7ac6df636319), C64e(0x58c158eec177772f), - C64e(0x9f759f4575afaf30), C64e(0xa563a584634242e7), - C64e(0x5030504030202070), C64e(0x2e1a2ed11ae5e5cb), - C64e(0x120e12e10efdfdef), C64e(0xb76db7656dbfbf08), - C64e(0xd44cd4194c818155), C64e(0x3c143c3014181824), - C64e(0x5f355f4c35262679), C64e(0x712f719d2fc3c3b2), - C64e(0x38e13867e1bebe86), C64e(0xfda2fd6aa23535c8), - C64e(0x4fcc4f0bcc8888c7), C64e(0x4b394b5c392e2e65), - C64e(0xf957f93d5793936a), C64e(0x0df20daaf2555558), - C64e(0x9d829de382fcfc61), C64e(0xc947c9f4477a7ab3), - C64e(0xefacef8bacc8c827), C64e(0x32e7326fe7baba88), - C64e(0x7d2b7d642b32324f), C64e(0xa495a4d795e6e642), - C64e(0xfba0fb9ba0c0c03b), C64e(0xb398b332981919aa), - C64e(0x68d16827d19e9ef6), C64e(0x817f815d7fa3a322), - C64e(0xaa66aa88664444ee), C64e(0x827e82a87e5454d6), - C64e(0xe6abe676ab3b3bdd), C64e(0x9e839e16830b0b95), - C64e(0x45ca4503ca8c8cc9), C64e(0x7b297b9529c7c7bc), - C64e(0x6ed36ed6d36b6b05), C64e(0x443c44503c28286c), - C64e(0x8b798b5579a7a72c), C64e(0x3de23d63e2bcbc81), - C64e(0x271d272c1d161631), C64e(0x9a769a4176adad37), - C64e(0x4d3b4dad3bdbdb96), C64e(0xfa56fac85664649e), - C64e(0xd24ed2e84e7474a6), C64e(0x221e22281e141436), - C64e(0x76db763fdb9292e4), C64e(0x1e0a1e180a0c0c12), - C64e(0xb46cb4906c4848fc), C64e(0x37e4376be4b8b88f), - C64e(0xe75de7255d9f9f78), C64e(0xb26eb2616ebdbd0f), - C64e(0x2aef2a86ef434369), C64e(0xf1a6f193a6c4c435), - C64e(0xe3a8e372a83939da), C64e(0xf7a4f762a43131c6), - C64e(0x593759bd37d3d38a), C64e(0x868b86ff8bf2f274), - C64e(0x563256b132d5d583), C64e(0xc543c50d438b8b4e), - C64e(0xeb59ebdc596e6e85), C64e(0xc2b7c2afb7dada18), - C64e(0x8f8c8f028c01018e), C64e(0xac64ac7964b1b11d), - C64e(0x6dd26d23d29c9cf1), C64e(0x3be03b92e0494972), - C64e(0xc7b4c7abb4d8d81f), C64e(0x15fa1543faacacb9), - C64e(0x090709fd07f3f3fa), C64e(0x6f256f8525cfcfa0), - C64e(0xeaafea8fafcaca20), C64e(0x898e89f38ef4f47d), - C64e(0x20e9208ee9474767), C64e(0x2818282018101038), - C64e(0x64d564ded56f6f0b), C64e(0x838883fb88f0f073), - C64e(0xb16fb1946f4a4afb), C64e(0x967296b8725c5cca), - C64e(0x6c246c7024383854), C64e(0x08f108aef157575f), - C64e(0x52c752e6c7737321), C64e(0xf351f33551979764), - C64e(0x6523658d23cbcbae), C64e(0x847c84597ca1a125), - C64e(0xbf9cbfcb9ce8e857), C64e(0x6321637c213e3e5d), - C64e(0x7cdd7c37dd9696ea), C64e(0x7fdc7fc2dc61611e), - C64e(0x9186911a860d0d9c), C64e(0x9485941e850f0f9b), - C64e(0xab90abdb90e0e04b), C64e(0xc642c6f8427c7cba), - C64e(0x57c457e2c4717126), C64e(0xe5aae583aacccc29), - C64e(0x73d8733bd89090e3), C64e(0x0f050f0c05060609), - C64e(0x030103f501f7f7f4), C64e(0x36123638121c1c2a), - C64e(0xfea3fe9fa3c2c23c), C64e(0xe15fe1d45f6a6a8b), - C64e(0x10f91047f9aeaebe), C64e(0x6bd06bd2d0696902), - C64e(0xa891a82e911717bf), C64e(0xe858e82958999971), - C64e(0x69276974273a3a53), C64e(0xd0b9d04eb92727f7), - C64e(0x483848a938d9d991), C64e(0x351335cd13ebebde), - C64e(0xceb3ce56b32b2be5), C64e(0x5533554433222277), - C64e(0xd6bbd6bfbbd2d204), C64e(0x9070904970a9a939), - C64e(0x8089800e89070787), C64e(0xf2a7f266a73333c1), - C64e(0xc1b6c15ab62d2dec), C64e(0x66226678223c3c5a), - C64e(0xad92ad2a921515b8), C64e(0x6020608920c9c9a9), - C64e(0xdb49db154987875c), C64e(0x1aff1a4fffaaaab0), - C64e(0x887888a0785050d8), C64e(0x8e7a8e517aa5a52b), - C64e(0x8a8f8a068f030389), C64e(0x13f813b2f859594a), - C64e(0x9b809b1280090992), C64e(0x39173934171a1a23), - C64e(0x75da75cada656510), C64e(0x533153b531d7d784), - C64e(0x51c65113c68484d5), C64e(0xd3b8d3bbb8d0d003), - C64e(0x5ec35e1fc38282dc), C64e(0xcbb0cb52b02929e2), - C64e(0x997799b4775a5ac3), C64e(0x3311333c111e1e2d), - C64e(0x46cb46f6cb7b7b3d), C64e(0x1ffc1f4bfca8a8b7), - C64e(0x61d661dad66d6d0c), C64e(0x4e3a4e583a2c2c62) -}; - -__constant static const ulong T7_G[] = { - C64e(0x32f4a5f497a5c6c6), C64e(0x6f978497eb84f8f8), - C64e(0x5eb099b0c799eeee), C64e(0x7a8c8d8cf78df6f6), - C64e(0xe8170d17e50dffff), C64e(0x0adcbddcb7bdd6d6), - C64e(0x16c8b1c8a7b1dede), C64e(0x6dfc54fc39549191), - C64e(0x90f050f0c0506060), C64e(0x0705030504030202), - C64e(0x2ee0a9e087a9cece), C64e(0xd1877d87ac7d5656), - C64e(0xcc2b192bd519e7e7), C64e(0x13a662a67162b5b5), - C64e(0x7c31e6319ae64d4d), C64e(0x59b59ab5c39aecec), - C64e(0x40cf45cf05458f8f), C64e(0xa3bc9dbc3e9d1f1f), - C64e(0x49c040c009408989), C64e(0x68928792ef87fafa), - C64e(0xd03f153fc515efef), C64e(0x9426eb267febb2b2), - C64e(0xce40c94007c98e8e), C64e(0xe61d0b1ded0bfbfb), - C64e(0x6e2fec2f82ec4141), C64e(0x1aa967a97d67b3b3), - C64e(0x431cfd1cbefd5f5f), C64e(0x6025ea258aea4545), - C64e(0xf9dabfda46bf2323), C64e(0x5102f702a6f75353), - C64e(0x45a196a1d396e4e4), C64e(0x76ed5bed2d5b9b9b), - C64e(0x285dc25deac27575), C64e(0xc5241c24d91ce1e1), - C64e(0xd4e9aee97aae3d3d), C64e(0xf2be6abe986a4c4c), - C64e(0x82ee5aeed85a6c6c), C64e(0xbdc341c3fc417e7e), - C64e(0xf3060206f102f5f5), C64e(0x52d14fd11d4f8383), - C64e(0x8ce45ce4d05c6868), C64e(0x5607f407a2f45151), - C64e(0x8d5c345cb934d1d1), C64e(0xe1180818e908f9f9), - C64e(0x4cae93aedf93e2e2), C64e(0x3e9573954d73abab), - C64e(0x97f553f5c4536262), C64e(0x6b413f41543f2a2a), - C64e(0x1c140c14100c0808), C64e(0x63f652f631529595), - C64e(0xe9af65af8c654646), C64e(0x7fe25ee2215e9d9d), - C64e(0x4878287860283030), C64e(0xcff8a1f86ea13737), - C64e(0x1b110f11140f0a0a), C64e(0xebc4b5c45eb52f2f), - C64e(0x151b091b1c090e0e), C64e(0x7e5a365a48362424), - C64e(0xadb69bb6369b1b1b), C64e(0x98473d47a53ddfdf), - C64e(0xa76a266a8126cdcd), C64e(0xf5bb69bb9c694e4e), - C64e(0x334ccd4cfecd7f7f), C64e(0x50ba9fbacf9feaea), - C64e(0x3f2d1b2d241b1212), C64e(0xa4b99eb93a9e1d1d), - C64e(0xc49c749cb0745858), C64e(0x46722e72682e3434), - C64e(0x41772d776c2d3636), C64e(0x11cdb2cda3b2dcdc), - C64e(0x9d29ee2973eeb4b4), C64e(0x4d16fb16b6fb5b5b), - C64e(0xa501f60153f6a4a4), C64e(0xa1d74dd7ec4d7676), - C64e(0x14a361a37561b7b7), C64e(0x3449ce49face7d7d), - C64e(0xdf8d7b8da47b5252), C64e(0x9f423e42a13edddd), - C64e(0xcd937193bc715e5e), C64e(0xb1a297a226971313), - C64e(0xa204f50457f5a6a6), C64e(0x01b868b86968b9b9), - C64e(0x0000000000000000), C64e(0xb5742c74992cc1c1), - C64e(0xe0a060a080604040), C64e(0xc2211f21dd1fe3e3), - C64e(0x3a43c843f2c87979), C64e(0x9a2ced2c77edb6b6), - C64e(0x0dd9bed9b3bed4d4), C64e(0x47ca46ca01468d8d), - C64e(0x1770d970ced96767), C64e(0xafdd4bdde44b7272), - C64e(0xed79de7933de9494), C64e(0xff67d4672bd49898), - C64e(0x9323e8237be8b0b0), C64e(0x5bde4ade114a8585), - C64e(0x06bd6bbd6d6bbbbb), C64e(0xbb7e2a7e912ac5c5), - C64e(0x7b34e5349ee54f4f), C64e(0xd73a163ac116eded), - C64e(0xd254c55417c58686), C64e(0xf862d7622fd79a9a), - C64e(0x99ff55ffcc556666), C64e(0xb6a794a722941111), - C64e(0xc04acf4a0fcf8a8a), C64e(0xd9301030c910e9e9), - C64e(0x0e0a060a08060404), C64e(0x66988198e781fefe), - C64e(0xab0bf00b5bf0a0a0), C64e(0xb4cc44ccf0447878), - C64e(0xf0d5bad54aba2525), C64e(0x753ee33e96e34b4b), - C64e(0xac0ef30e5ff3a2a2), C64e(0x4419fe19bafe5d5d), - C64e(0xdb5bc05b1bc08080), C64e(0x80858a850a8a0505), - C64e(0xd3ecadec7ead3f3f), C64e(0xfedfbcdf42bc2121), - C64e(0xa8d848d8e0487070), C64e(0xfd0c040cf904f1f1), - C64e(0x197adf7ac6df6363), C64e(0x2f58c158eec17777), - C64e(0x309f759f4575afaf), C64e(0xe7a563a584634242), - C64e(0x7050305040302020), C64e(0xcb2e1a2ed11ae5e5), - C64e(0xef120e12e10efdfd), C64e(0x08b76db7656dbfbf), - C64e(0x55d44cd4194c8181), C64e(0x243c143c30141818), - C64e(0x795f355f4c352626), C64e(0xb2712f719d2fc3c3), - C64e(0x8638e13867e1bebe), C64e(0xc8fda2fd6aa23535), - C64e(0xc74fcc4f0bcc8888), C64e(0x654b394b5c392e2e), - C64e(0x6af957f93d579393), C64e(0x580df20daaf25555), - C64e(0x619d829de382fcfc), C64e(0xb3c947c9f4477a7a), - C64e(0x27efacef8bacc8c8), C64e(0x8832e7326fe7baba), - C64e(0x4f7d2b7d642b3232), C64e(0x42a495a4d795e6e6), - C64e(0x3bfba0fb9ba0c0c0), C64e(0xaab398b332981919), - C64e(0xf668d16827d19e9e), C64e(0x22817f815d7fa3a3), - C64e(0xeeaa66aa88664444), C64e(0xd6827e82a87e5454), - C64e(0xdde6abe676ab3b3b), C64e(0x959e839e16830b0b), - C64e(0xc945ca4503ca8c8c), C64e(0xbc7b297b9529c7c7), - C64e(0x056ed36ed6d36b6b), C64e(0x6c443c44503c2828), - C64e(0x2c8b798b5579a7a7), C64e(0x813de23d63e2bcbc), - C64e(0x31271d272c1d1616), C64e(0x379a769a4176adad), - C64e(0x964d3b4dad3bdbdb), C64e(0x9efa56fac8566464), - C64e(0xa6d24ed2e84e7474), C64e(0x36221e22281e1414), - C64e(0xe476db763fdb9292), C64e(0x121e0a1e180a0c0c), - C64e(0xfcb46cb4906c4848), C64e(0x8f37e4376be4b8b8), - C64e(0x78e75de7255d9f9f), C64e(0x0fb26eb2616ebdbd), - C64e(0x692aef2a86ef4343), C64e(0x35f1a6f193a6c4c4), - C64e(0xdae3a8e372a83939), C64e(0xc6f7a4f762a43131), - C64e(0x8a593759bd37d3d3), C64e(0x74868b86ff8bf2f2), - C64e(0x83563256b132d5d5), C64e(0x4ec543c50d438b8b), - C64e(0x85eb59ebdc596e6e), C64e(0x18c2b7c2afb7dada), - C64e(0x8e8f8c8f028c0101), C64e(0x1dac64ac7964b1b1), - C64e(0xf16dd26d23d29c9c), C64e(0x723be03b92e04949), - C64e(0x1fc7b4c7abb4d8d8), C64e(0xb915fa1543faacac), - C64e(0xfa090709fd07f3f3), C64e(0xa06f256f8525cfcf), - C64e(0x20eaafea8fafcaca), C64e(0x7d898e89f38ef4f4), - C64e(0x6720e9208ee94747), C64e(0x3828182820181010), - C64e(0x0b64d564ded56f6f), C64e(0x73838883fb88f0f0), - C64e(0xfbb16fb1946f4a4a), C64e(0xca967296b8725c5c), - C64e(0x546c246c70243838), C64e(0x5f08f108aef15757), - C64e(0x2152c752e6c77373), C64e(0x64f351f335519797), - C64e(0xae6523658d23cbcb), C64e(0x25847c84597ca1a1), - C64e(0x57bf9cbfcb9ce8e8), C64e(0x5d6321637c213e3e), - C64e(0xea7cdd7c37dd9696), C64e(0x1e7fdc7fc2dc6161), - C64e(0x9c9186911a860d0d), C64e(0x9b9485941e850f0f), - C64e(0x4bab90abdb90e0e0), C64e(0xbac642c6f8427c7c), - C64e(0x2657c457e2c47171), C64e(0x29e5aae583aacccc), - C64e(0xe373d8733bd89090), C64e(0x090f050f0c050606), - C64e(0xf4030103f501f7f7), C64e(0x2a36123638121c1c), - C64e(0x3cfea3fe9fa3c2c2), C64e(0x8be15fe1d45f6a6a), - C64e(0xbe10f91047f9aeae), C64e(0x026bd06bd2d06969), - C64e(0xbfa891a82e911717), C64e(0x71e858e829589999), - C64e(0x5369276974273a3a), C64e(0xf7d0b9d04eb92727), - C64e(0x91483848a938d9d9), C64e(0xde351335cd13ebeb), - C64e(0xe5ceb3ce56b32b2b), C64e(0x7755335544332222), - C64e(0x04d6bbd6bfbbd2d2), C64e(0x399070904970a9a9), - C64e(0x878089800e890707), C64e(0xc1f2a7f266a73333), - C64e(0xecc1b6c15ab62d2d), C64e(0x5a66226678223c3c), - C64e(0xb8ad92ad2a921515), C64e(0xa96020608920c9c9), - C64e(0x5cdb49db15498787), C64e(0xb01aff1a4fffaaaa), - C64e(0xd8887888a0785050), C64e(0x2b8e7a8e517aa5a5), - C64e(0x898a8f8a068f0303), C64e(0x4a13f813b2f85959), - C64e(0x929b809b12800909), C64e(0x2339173934171a1a), - C64e(0x1075da75cada6565), C64e(0x84533153b531d7d7), - C64e(0xd551c65113c68484), C64e(0x03d3b8d3bbb8d0d0), - C64e(0xdc5ec35e1fc38282), C64e(0xe2cbb0cb52b02929), - C64e(0xc3997799b4775a5a), C64e(0x2d3311333c111e1e), - C64e(0x3d46cb46f6cb7b7b), C64e(0xb71ffc1f4bfca8a8), - C64e(0x0c61d661dad66d6d), C64e(0x624e3a4e583a2c2c) -}; -*/ -#define RBTT(d, b0, b1, b2, b3, b4, b5, b6, b7) do { \ - d = T0[t0[b0]] \ - ^ T1[t1[b1]] \ - ^ T2[t2[b2]] \ - ^ T3[t3[b3]] \ - ^ T4[t4[b4]] \ - ^ T5[t5[b5]] \ - ^ T6[t6[b6]] \ - ^ T7[t7[b7]]; \ - } while (0) - -#define ROUND_BIG_P(a, r) do { \ - t0[0x0] = B64_0(a[0x0]) ^ PC64(0x00, r); \ - t1[0x0] = B64_1(a[0x0]); \ - t2[0x0] = B64_2(a[0x0]); \ - t3[0x0] = B64_3(a[0x0]); \ - t4[0x0] = B64_4(a[0x0]); \ - t5[0x0] = B64_5(a[0x0]); \ - t6[0x0] = B64_6(a[0x0]); \ - t7[0x0] = B64_7(a[0x0]); \ - t0[0x1] = B64_0(a[0x1]) ^ PC64(0x10, r); \ - t1[0x1] = B64_1(a[0x1]); \ - t2[0x1] = B64_2(a[0x1]); \ - t3[0x1] = B64_3(a[0x1]); \ - t4[0x1] = B64_4(a[0x1]); \ - t5[0x1] = B64_5(a[0x1]); \ - t6[0x1] = B64_6(a[0x1]); \ - t7[0x1] = B64_7(a[0x1]); \ - t0[0x2] = B64_0(a[0x2]) ^ PC64(0x20, r); \ - t1[0x2] = B64_1(a[0x2]); \ - t2[0x2] = B64_2(a[0x2]); \ - t3[0x2] = B64_3(a[0x2]); \ - t4[0x2] = B64_4(a[0x2]); \ - t5[0x2] = B64_5(a[0x2]); \ - t6[0x2] = B64_6(a[0x2]); \ - t7[0x2] = B64_7(a[0x2]); \ - t0[0x3] = B64_0(a[0x3]) ^ PC64(0x30, r); \ - t1[0x3] = B64_1(a[0x3]); \ - t2[0x3] = B64_2(a[0x3]); \ - t3[0x3] = B64_3(a[0x3]); \ - t4[0x3] = B64_4(a[0x3]); \ - t5[0x3] = B64_5(a[0x3]); \ - t6[0x3] = B64_6(a[0x3]); \ - t7[0x3] = B64_7(a[0x3]); \ - t0[0x4] = B64_0(a[0x4]) ^ PC64(0x40, r); \ - t1[0x4] = B64_1(a[0x4]); \ - t2[0x4] = B64_2(a[0x4]); \ - t3[0x4] = B64_3(a[0x4]); \ - t4[0x4] = B64_4(a[0x4]); \ - t5[0x4] = B64_5(a[0x4]); \ - t6[0x4] = B64_6(a[0x4]); \ - t7[0x4] = B64_7(a[0x4]); \ - t0[0x5] = B64_0(a[0x5]) ^ PC64(0x50, r); \ - t1[0x5] = B64_1(a[0x5]); \ - t2[0x5] = B64_2(a[0x5]); \ - t3[0x5] = B64_3(a[0x5]); \ - t4[0x5] = B64_4(a[0x5]); \ - t5[0x5] = B64_5(a[0x5]); \ - t6[0x5] = B64_6(a[0x5]); \ - t7[0x5] = B64_7(a[0x5]); \ - t0[0x6] = B64_0(a[0x6]) ^ PC64(0x60, r); \ - t1[0x6] = B64_1(a[0x6]); \ - t2[0x6] = B64_2(a[0x6]); \ - t3[0x6] = B64_3(a[0x6]); \ - t4[0x6] = B64_4(a[0x6]); \ - t5[0x6] = B64_5(a[0x6]); \ - t6[0x6] = B64_6(a[0x6]); \ - t7[0x6] = B64_7(a[0x6]); \ - t0[0x7] = B64_0(a[0x7]) ^ PC64(0x70, r); \ - t1[0x7] = B64_1(a[0x7]); \ - t2[0x7] = B64_2(a[0x7]); \ - t3[0x7] = B64_3(a[0x7]); \ - t4[0x7] = B64_4(a[0x7]); \ - t5[0x7] = B64_5(a[0x7]); \ - t6[0x7] = B64_6(a[0x7]); \ - t7[0x7] = B64_7(a[0x7]); \ - t0[0x8] = B64_0(a[0x8]) ^ PC64(0x80, r); \ - t1[0x8] = B64_1(a[0x8]); \ - t2[0x8] = B64_2(a[0x8]); \ - t3[0x8] = B64_3(a[0x8]); \ - t4[0x8] = B64_4(a[0x8]); \ - t5[0x8] = B64_5(a[0x8]); \ - t6[0x8] = B64_6(a[0x8]); \ - t7[0x8] = B64_7(a[0x8]); \ - t0[0x9] = B64_0(a[0x9]) ^ PC64(0x90, r); \ - t1[0x9] = B64_1(a[0x9]); \ - t2[0x9] = B64_2(a[0x9]); \ - t3[0x9] = B64_3(a[0x9]); \ - t4[0x9] = B64_4(a[0x9]); \ - t5[0x9] = B64_5(a[0x9]); \ - t6[0x9] = B64_6(a[0x9]); \ - t7[0x9] = B64_7(a[0x9]); \ - t0[0xA] = B64_0(a[0xA]) ^ PC64(0xA0, r); \ - t1[0xA] = B64_1(a[0xA]); \ - t2[0xA] = B64_2(a[0xA]); \ - t3[0xA] = B64_3(a[0xA]); \ - t4[0xA] = B64_4(a[0xA]); \ - t5[0xA] = B64_5(a[0xA]); \ - t6[0xA] = B64_6(a[0xA]); \ - t7[0xA] = B64_7(a[0xA]); \ - t0[0xB] = B64_0(a[0xB]) ^ PC64(0xB0, r); \ - t1[0xB] = B64_1(a[0xB]); \ - t2[0xB] = B64_2(a[0xB]); \ - t3[0xB] = B64_3(a[0xB]); \ - t4[0xB] = B64_4(a[0xB]); \ - t5[0xB] = B64_5(a[0xB]); \ - t6[0xB] = B64_6(a[0xB]); \ - t7[0xB] = B64_7(a[0xB]); \ - t0[0xC] = B64_0(a[0xC]) ^ PC64(0xC0, r); \ - t1[0xC] = B64_1(a[0xC]); \ - t2[0xC] = B64_2(a[0xC]); \ - t3[0xC] = B64_3(a[0xC]); \ - t4[0xC] = B64_4(a[0xC]); \ - t5[0xC] = B64_5(a[0xC]); \ - t6[0xC] = B64_6(a[0xC]); \ - t7[0xC] = B64_7(a[0xC]); \ - t0[0xD] = B64_0(a[0xD]) ^ PC64(0xD0, r); \ - t1[0xD] = B64_1(a[0xD]); \ - t2[0xD] = B64_2(a[0xD]); \ - t3[0xD] = B64_3(a[0xD]); \ - t4[0xD] = B64_4(a[0xD]); \ - t5[0xD] = B64_5(a[0xD]); \ - t6[0xD] = B64_6(a[0xD]); \ - t7[0xD] = B64_7(a[0xD]); \ - t0[0xE] = B64_0(a[0xE]) ^ PC64(0xE0, r); \ - t1[0xE] = B64_1(a[0xE]); \ - t2[0xE] = B64_2(a[0xE]); \ - t3[0xE] = B64_3(a[0xE]); \ - t4[0xE] = B64_4(a[0xE]); \ - t5[0xE] = B64_5(a[0xE]); \ - t6[0xE] = B64_6(a[0xE]); \ - t7[0xE] = B64_7(a[0xE]); \ - t0[0xF] = B64_0(a[0xF]) ^ PC64(0xF0, r); \ - t1[0xF] = B64_1(a[0xF]); \ - t2[0xF] = B64_2(a[0xF]); \ - t3[0xF] = B64_3(a[0xF]); \ - t4[0xF] = B64_4(a[0xF]); \ - t5[0xF] = B64_5(a[0xF]); \ - t6[0xF] = B64_6(a[0xF]); \ - t7[0xF] = B64_7(a[0xF]); \ - RBTT(a[0x0], 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xB); \ - RBTT(a[0x1], 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xC); \ - RBTT(a[0x2], 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0xD); \ - RBTT(a[0x3], 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xE); \ - RBTT(a[0x4], 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xF); \ - RBTT(a[0x5], 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0x0); \ - RBTT(a[0x6], 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0x1); \ - RBTT(a[0x7], 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0x2); \ - RBTT(a[0x8], 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0x3); \ - RBTT(a[0x9], 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x4); \ - RBTT(a[0xA], 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x5); \ - RBTT(a[0xB], 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x6); \ - RBTT(a[0xC], 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x7); \ - RBTT(a[0xD], 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x8); \ - RBTT(a[0xE], 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x9); \ - RBTT(a[0xF], 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xA); \ - } while (0) - -#define ROUND_BIG_Q(a, r) do { \ - a[0x0] ^= QC64(0x00, r); \ - a[0x1] ^= QC64(0x10, r); \ - a[0x2] ^= QC64(0x20, r); \ - a[0x3] ^= QC64(0x30, r); \ - a[0x4] ^= QC64(0x40, r); \ - a[0x5] ^= QC64(0x50, r); \ - a[0x6] ^= QC64(0x60, r); \ - a[0x7] ^= QC64(0x70, r); \ - a[0x8] ^= QC64(0x80, r); \ - a[0x9] ^= QC64(0x90, r); \ - a[0xA] ^= QC64(0xA0, r); \ - a[0xB] ^= QC64(0xB0, r); \ - a[0xC] ^= QC64(0xC0, r); \ - a[0xD] ^= QC64(0xD0, r); \ - a[0xE] ^= QC64(0xE0, r); \ - a[0xF] ^= QC64(0xF0, r); \ - t0[0x0] = B64_0(a[0x0]); \ - t1[0x0] = B64_1(a[0x0]); \ - t2[0x0] = B64_2(a[0x0]); \ - t3[0x0] = B64_3(a[0x0]); \ - t4[0x0] = B64_4(a[0x0]); \ - t5[0x0] = B64_5(a[0x0]); \ - t6[0x0] = B64_6(a[0x0]); \ - t7[0x0] = B64_7(a[0x0]); \ - t0[0x1] = B64_0(a[0x1]); \ - t1[0x1] = B64_1(a[0x1]); \ - t2[0x1] = B64_2(a[0x1]); \ - t3[0x1] = B64_3(a[0x1]); \ - t4[0x1] = B64_4(a[0x1]); \ - t5[0x1] = B64_5(a[0x1]); \ - t6[0x1] = B64_6(a[0x1]); \ - t7[0x1] = B64_7(a[0x1]); \ - t0[0x2] = B64_0(a[0x2]); \ - t1[0x2] = B64_1(a[0x2]); \ - t2[0x2] = B64_2(a[0x2]); \ - t3[0x2] = B64_3(a[0x2]); \ - t4[0x2] = B64_4(a[0x2]); \ - t5[0x2] = B64_5(a[0x2]); \ - t6[0x2] = B64_6(a[0x2]); \ - t7[0x2] = B64_7(a[0x2]); \ - t0[0x3] = B64_0(a[0x3]); \ - t1[0x3] = B64_1(a[0x3]); \ - t2[0x3] = B64_2(a[0x3]); \ - t3[0x3] = B64_3(a[0x3]); \ - t4[0x3] = B64_4(a[0x3]); \ - t5[0x3] = B64_5(a[0x3]); \ - t6[0x3] = B64_6(a[0x3]); \ - t7[0x3] = B64_7(a[0x3]); \ - t0[0x4] = B64_0(a[0x4]); \ - t1[0x4] = B64_1(a[0x4]); \ - t2[0x4] = B64_2(a[0x4]); \ - t3[0x4] = B64_3(a[0x4]); \ - t4[0x4] = B64_4(a[0x4]); \ - t5[0x4] = B64_5(a[0x4]); \ - t6[0x4] = B64_6(a[0x4]); \ - t7[0x4] = B64_7(a[0x4]); \ - t0[0x5] = B64_0(a[0x5]); \ - t1[0x5] = B64_1(a[0x5]); \ - t2[0x5] = B64_2(a[0x5]); \ - t3[0x5] = B64_3(a[0x5]); \ - t4[0x5] = B64_4(a[0x5]); \ - t5[0x5] = B64_5(a[0x5]); \ - t6[0x5] = B64_6(a[0x5]); \ - t7[0x5] = B64_7(a[0x5]); \ - t0[0x6] = B64_0(a[0x6]); \ - t1[0x6] = B64_1(a[0x6]); \ - t2[0x6] = B64_2(a[0x6]); \ - t3[0x6] = B64_3(a[0x6]); \ - t4[0x6] = B64_4(a[0x6]); \ - t5[0x6] = B64_5(a[0x6]); \ - t6[0x6] = B64_6(a[0x6]); \ - t7[0x6] = B64_7(a[0x6]); \ - t0[0x7] = B64_0(a[0x7]); \ - t1[0x7] = B64_1(a[0x7]); \ - t2[0x7] = B64_2(a[0x7]); \ - t3[0x7] = B64_3(a[0x7]); \ - t4[0x7] = B64_4(a[0x7]); \ - t5[0x7] = B64_5(a[0x7]); \ - t6[0x7] = B64_6(a[0x7]); \ - t7[0x7] = B64_7(a[0x7]); \ - t0[0x8] = B64_0(a[0x8]); \ - t1[0x8] = B64_1(a[0x8]); \ - t2[0x8] = B64_2(a[0x8]); \ - t3[0x8] = B64_3(a[0x8]); \ - t4[0x8] = B64_4(a[0x8]); \ - t5[0x8] = B64_5(a[0x8]); \ - t6[0x8] = B64_6(a[0x8]); \ - t7[0x8] = B64_7(a[0x8]); \ - t0[0x9] = B64_0(a[0x9]); \ - t1[0x9] = B64_1(a[0x9]); \ - t2[0x9] = B64_2(a[0x9]); \ - t3[0x9] = B64_3(a[0x9]); \ - t4[0x9] = B64_4(a[0x9]); \ - t5[0x9] = B64_5(a[0x9]); \ - t6[0x9] = B64_6(a[0x9]); \ - t7[0x9] = B64_7(a[0x9]); \ - t0[0xA] = B64_0(a[0xA]); \ - t1[0xA] = B64_1(a[0xA]); \ - t2[0xA] = B64_2(a[0xA]); \ - t3[0xA] = B64_3(a[0xA]); \ - t4[0xA] = B64_4(a[0xA]); \ - t5[0xA] = B64_5(a[0xA]); \ - t6[0xA] = B64_6(a[0xA]); \ - t7[0xA] = B64_7(a[0xA]); \ - t0[0xB] = B64_0(a[0xB]); \ - t1[0xB] = B64_1(a[0xB]); \ - t2[0xB] = B64_2(a[0xB]); \ - t3[0xB] = B64_3(a[0xB]); \ - t4[0xB] = B64_4(a[0xB]); \ - t5[0xB] = B64_5(a[0xB]); \ - t6[0xB] = B64_6(a[0xB]); \ - t7[0xB] = B64_7(a[0xB]); \ - t0[0xC] = B64_0(a[0xC]); \ - t1[0xC] = B64_1(a[0xC]); \ - t2[0xC] = B64_2(a[0xC]); \ - t3[0xC] = B64_3(a[0xC]); \ - t4[0xC] = B64_4(a[0xC]); \ - t5[0xC] = B64_5(a[0xC]); \ - t6[0xC] = B64_6(a[0xC]); \ - t7[0xC] = B64_7(a[0xC]); \ - t0[0xD] = B64_0(a[0xD]); \ - t1[0xD] = B64_1(a[0xD]); \ - t2[0xD] = B64_2(a[0xD]); \ - t3[0xD] = B64_3(a[0xD]); \ - t4[0xD] = B64_4(a[0xD]); \ - t5[0xD] = B64_5(a[0xD]); \ - t6[0xD] = B64_6(a[0xD]); \ - t7[0xD] = B64_7(a[0xD]); \ - t0[0xE] = B64_0(a[0xE]); \ - t1[0xE] = B64_1(a[0xE]); \ - t2[0xE] = B64_2(a[0xE]); \ - t3[0xE] = B64_3(a[0xE]); \ - t4[0xE] = B64_4(a[0xE]); \ - t5[0xE] = B64_5(a[0xE]); \ - t6[0xE] = B64_6(a[0xE]); \ - t7[0xE] = B64_7(a[0xE]); \ - t0[0xF] = B64_0(a[0xF]); \ - t1[0xF] = B64_1(a[0xF]); \ - t2[0xF] = B64_2(a[0xF]); \ - t3[0xF] = B64_3(a[0xF]); \ - t4[0xF] = B64_4(a[0xF]); \ - t5[0xF] = B64_5(a[0xF]); \ - t6[0xF] = B64_6(a[0xF]); \ - t7[0xF] = B64_7(a[0xF]); \ - RBTT(a[0x0], 0x1, 0x3, 0x5, 0xB, 0x0, 0x2, 0x4, 0x6); \ - RBTT(a[0x1], 0x2, 0x4, 0x6, 0xC, 0x1, 0x3, 0x5, 0x7); \ - RBTT(a[0x2], 0x3, 0x5, 0x7, 0xD, 0x2, 0x4, 0x6, 0x8); \ - RBTT(a[0x3], 0x4, 0x6, 0x8, 0xE, 0x3, 0x5, 0x7, 0x9); \ - RBTT(a[0x4], 0x5, 0x7, 0x9, 0xF, 0x4, 0x6, 0x8, 0xA); \ - RBTT(a[0x5], 0x6, 0x8, 0xA, 0x0, 0x5, 0x7, 0x9, 0xB); \ - RBTT(a[0x6], 0x7, 0x9, 0xB, 0x1, 0x6, 0x8, 0xA, 0xC); \ - RBTT(a[0x7], 0x8, 0xA, 0xC, 0x2, 0x7, 0x9, 0xB, 0xD); \ - RBTT(a[0x8], 0x9, 0xB, 0xD, 0x3, 0x8, 0xA, 0xC, 0xE); \ - RBTT(a[0x9], 0xA, 0xC, 0xE, 0x4, 0x9, 0xB, 0xD, 0xF); \ - RBTT(a[0xA], 0xB, 0xD, 0xF, 0x5, 0xA, 0xC, 0xE, 0x0); \ - RBTT(a[0xB], 0xC, 0xE, 0x0, 0x6, 0xB, 0xD, 0xF, 0x1); \ - RBTT(a[0xC], 0xD, 0xF, 0x1, 0x7, 0xC, 0xE, 0x0, 0x2); \ - RBTT(a[0xD], 0xE, 0x0, 0x2, 0x8, 0xD, 0xF, 0x1, 0x3); \ - RBTT(a[0xE], 0xF, 0x1, 0x3, 0x9, 0xE, 0x0, 0x2, 0x4); \ - RBTT(a[0xF], 0x0, 0x2, 0x4, 0xA, 0xF, 0x1, 0x3, 0x5); \ - } while (0) - -#define PERM_BIG_P(a, start, end) do { \ - for (u = start; u < end; u++) { \ - ROUND_BIG_P(a, u); \ - } \ - } while (0) +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64; +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_ECHO_64 1 +#define SPH_SIMD_NOCOPY 0 +#define SPH_CUBEHASH_UNROLL 0 + +#ifndef SPH_LUFFA_PARALLEL + #define SPH_LUFFA_PARALLEL 0 +#endif + +#include "groestl.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define ENC64E(x) SWAP8(x) + #define DEC64E(x) SWAP8(*(const __global sph_u64 *) (x)); +#else + #define ENC64E(x) (x) + #define DEC64E(x) (*(const __global sph_u64 *) (x)); +#endif + +#define ROL32(x, n) rotate(x, (uint) n) +#define SHR(x, n) ((x) >> n) +#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) + +#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) +#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) + +#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) +#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) + +#define P(a,b,c,d,e,f,g,h,x,K) \ +{ \ + temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ + d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ +} -#define PERM_BIG_Q(a) do { \ - /* for (ulong u = 0; u < (14UL << 56); u += (1UL << 56)) { */ \ - for (u = 0; u < 14; u++) { \ - ROUND_BIG_Q(a, u); \ - } \ - } while (0) +#define PLAST(a,b,c,d,e,f,g,h,x,K) \ +{ \ + d += h + S3(e) + F1(e,f,g) + (x + K); \ +} +#define F0(y, x, z) bitselect(z, y, z ^ x) +#define F1(x, y, z) bitselect(z, y, x) + +#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) +#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) +#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) +#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) +#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) +#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) +#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) +#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) +#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) +#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) +#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) +#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) +#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) +#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) +#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) +#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) + +#define RD14 (S1(W12) + W7 + S0(W15) + W14) +#define RD15 (S1(W13) + W8 + S0(W0) + W15) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target) { - __local ulong T2[256], T3[256], T4[256], T5[256], T6[256], T7[256]; - uint u; - - // for (u = get_local_id(0); u < 256; u += get_local_size(0)) { - u = get_local_id(0); - /* - T1[u] = T1_G[u]; - T2[u] = T2_G[u]; - T3[u] = T3_G[u]; - T4[u] = T4_G[u]; - T5[u] = T5_G[u]; - T6[u] = T6_G[u]; - T7[u] = T7_G[u]; - */ - // create other tables based on T0: avoids keeping them in the kernel. -// T1[u] = ROTL64(T0[u], 8UL); - T2[u] = ROTL64(T0[u], 16UL); - T3[u] = ROTL64(T0[u], 24UL); - T4[u] = ROTL64(T0[u], 32UL); - T5[u] = ROTL64(T0[u], 40UL); - T6[u] = ROTL64(T0[u], 48UL); - T7[u] = ROTL64(T0[u], 56UL); - barrier(CLK_LOCAL_MEM_FENCE); - - ulong g[16], m[16], t0[16], t1[16], t2[16], t3[16], t4[16], t5[16], t6[16], t7[16]; - uint flag = 0, gid = get_global_id(0), r = 13; - +__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; + } hash; + +#if !SPH_SMALL_FOOTPRINT_GROESTL + __local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256]; + __local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256]; +#else + __local sph_u64 T0_C[256], T4_C[256]; +#endif + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + T0_C[i] = T0[i]; + T4_C[i] = T4[i]; +#if !SPH_SMALL_FOOTPRINT_GROESTL + T1_C[i] = T1[i]; + T2_C[i] = T2[i]; + T3_C[i] = T3[i]; + T5_C[i] = T5[i]; + T6_C[i] = T6[i]; + T7_C[i] = T7[i]; +#endif + } + barrier(CLK_LOCAL_MEM_FENCE); // groestl +#define T0 T0_C +#define T1 T1_C +#define T2 T2_C +#define T3 T3_C +#define T4 T4_C +#define T5 T5_C +#define T6 T6_C +#define T7 T7_C + + + // groestl + { + sph_u64 H[16]; +//#pragma unroll 15 + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; +#if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); +#else + H[15] = (sph_u64)512; +#endif + + sph_u64 g[16], m[16]; m[0] = DEC64E(block + 0 * 8); m[1] = DEC64E(block + 1 * 8); m[2] = DEC64E(block + 2 * 8); @@ -1480,374 +195,93 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp m[8] = DEC64E(block + 8 * 8); m[9] = DEC64E(block + 9 * 8); m[9] &= 0x00000000FFFFFFFF; - m[9] |= ((ulong) gid << 32); - m[10] = 0x80; - -perm: - m[11] = 0; - m[12] = 0; - m[13] = 0; - m[14] = 0; - m[15] = M15; - -#pragma unroll - for (u = 0; u < 15; u++) g[u] = m[u]; - g[15] = M15 ^ H15; - - - g[0x0] ^= PC64(0x00, 0); - g[0x1] ^= PC64(0x10, 0); - g[0x2] ^= PC64(0x20, 0); - g[0x3] ^= PC64(0x30, 0); - g[0x4] ^= PC64(0x40, 0); - g[0x5] ^= PC64(0x50, 0); - g[0x6] ^= PC64(0x60, 0); - g[0x7] ^= PC64(0x70, 0); - g[0x8] ^= PC64(0x80, 0); - g[0x9] ^= PC64(0x90, 0); - g[0xA] ^= PC64(0xA0, 0); - g[0xB] = PC64(0xB0, 0); - g[0xC] = PC64(0xC0, 0); - g[0xD] = PC64(0xD0, 0); - g[0xE] = PC64(0xE0, 0); - g[0xF] ^= PC64(0xF0, 0); - t0[0x0] = B64_0(g[0x0]); - t1[0x0] = B64_1(g[0x0]); - t2[0x0] = B64_2(g[0x0]); - t3[0x0] = B64_3(g[0x0]); - t4[0x0] = B64_4(g[0x0]); - t5[0x0] = B64_5(g[0x0]); - t6[0x0] = B64_6(g[0x0]); - t7[0x0] = B64_7(g[0x0]); - t0[0x1] = B64_0(g[0x1]); - t1[0x1] = B64_1(g[0x1]); - t2[0x1] = B64_2(g[0x1]); - t3[0x1] = B64_3(g[0x1]); - t4[0x1] = B64_4(g[0x1]); - t5[0x1] = B64_5(g[0x1]); - t6[0x1] = B64_6(g[0x1]); - t7[0x1] = B64_7(g[0x1]); - t0[0x2] = B64_0(g[0x2]); - t1[0x2] = B64_1(g[0x2]); - t2[0x2] = B64_2(g[0x2]); - t3[0x2] = B64_3(g[0x2]); - t4[0x2] = B64_4(g[0x2]); - t5[0x2] = B64_5(g[0x2]); - t6[0x2] = B64_6(g[0x2]); - t7[0x2] = B64_7(g[0x2]); - t0[0x3] = B64_0(g[0x3]); - t1[0x3] = B64_1(g[0x3]); - t2[0x3] = B64_2(g[0x3]); - t3[0x3] = B64_3(g[0x3]); - t4[0x3] = B64_4(g[0x3]); - t5[0x3] = B64_5(g[0x3]); - t6[0x3] = B64_6(g[0x3]); - t7[0x3] = B64_7(g[0x3]); - t0[0x4] = B64_0(g[0x4]); - t1[0x4] = B64_1(g[0x4]); - t2[0x4] = B64_2(g[0x4]); - t3[0x4] = B64_3(g[0x4]); - t4[0x4] = B64_4(g[0x4]); - t5[0x4] = B64_5(g[0x4]); - t6[0x4] = B64_6(g[0x4]); - t7[0x4] = B64_7(g[0x4]); - t0[0x5] = B64_0(g[0x5]); - t1[0x5] = B64_1(g[0x5]); - t2[0x5] = B64_2(g[0x5]); - t3[0x5] = B64_3(g[0x5]); - t4[0x5] = B64_4(g[0x5]); - t5[0x5] = B64_5(g[0x5]); - t6[0x5] = B64_6(g[0x5]); - t7[0x5] = B64_7(g[0x5]); - t0[0x6] = B64_0(g[0x6]); - t1[0x6] = B64_1(g[0x6]); - t2[0x6] = B64_2(g[0x6]); - t3[0x6] = B64_3(g[0x6]); - t4[0x6] = B64_4(g[0x6]); - t5[0x6] = B64_5(g[0x6]); - t6[0x6] = B64_6(g[0x6]); - t7[0x6] = B64_7(g[0x6]); - t0[0x7] = B64_0(g[0x7]); - t1[0x7] = B64_1(g[0x7]); - t2[0x7] = B64_2(g[0x7]); - t3[0x7] = B64_3(g[0x7]); - t4[0x7] = B64_4(g[0x7]); - t5[0x7] = B64_5(g[0x7]); - t6[0x7] = B64_6(g[0x7]); - t7[0x7] = B64_7(g[0x7]); - t0[0x8] = B64_0(g[0x8]); - t1[0x8] = B64_1(g[0x8]); - t2[0x8] = B64_2(g[0x8]); - t3[0x8] = B64_3(g[0x8]); - t4[0x8] = B64_4(g[0x8]); - t5[0x8] = B64_5(g[0x8]); - t6[0x8] = B64_6(g[0x8]); - t7[0x8] = B64_7(g[0x8]); - t0[0x9] = B64_0(g[0x9]); - t1[0x9] = B64_1(g[0x9]); - t2[0x9] = B64_2(g[0x9]); - t3[0x9] = B64_3(g[0x9]); - t4[0x9] = B64_4(g[0x9]); - t5[0x9] = B64_5(g[0x9]); - t6[0x9] = B64_6(g[0x9]); - t7[0x9] = B64_7(g[0x9]); - t0[0xA] = B64_0(g[0xA]); - t1[0xA] = B64_1(g[0xA]); - t2[0xA] = B64_2(g[0xA]); - t3[0xA] = B64_3(g[0xA]); - t4[0xA] = B64_4(g[0xA]); - t5[0xA] = B64_5(g[0xA]); - t6[0xA] = B64_6(g[0xA]); - t7[0xA] = B64_7(g[0xA]); - t0[0xB] = B64_0(g[0xB]); - t1[0xB] = B64_1(g[0xB]); - t2[0xB] = B64_2(g[0xB]); - t3[0xB] = B64_3(g[0xB]); - t4[0xB] = B64_4(g[0xB]); - t5[0xB] = B64_5(g[0xB]); - t6[0xB] = B64_6(g[0xB]); - t7[0xB] = B64_7(g[0xB]); - t0[0xC] = B64_0(g[0xC]); - t1[0xC] = B64_1(g[0xC]); - t2[0xC] = B64_2(g[0xC]); - t3[0xC] = B64_3(g[0xC]); - t4[0xC] = B64_4(g[0xC]); - t5[0xC] = B64_5(g[0xC]); - t6[0xC] = B64_6(g[0xC]); - t7[0xC] = B64_7(g[0xC]); - t0[0xD] = B64_0(g[0xD]); - t1[0xD] = B64_1(g[0xD]); - t2[0xD] = B64_2(g[0xD]); - t3[0xD] = B64_3(g[0xD]); - t4[0xD] = B64_4(g[0xD]); - t5[0xD] = B64_5(g[0xD]); - t6[0xD] = B64_6(g[0xD]); - t7[0xD] = B64_7(g[0xD]); - t0[0xE] = B64_0(g[0xE]); - t1[0xE] = B64_1(g[0xE]); - t2[0xE] = B64_2(g[0xE]); - t3[0xE] = B64_3(g[0xE]); - t4[0xE] = B64_4(g[0xE]); - t5[0xE] = B64_5(g[0xE]); - t6[0xE] = B64_6(g[0xE]); - t7[0xE] = B64_7(g[0xE]); - t0[0xF] = B64_0(g[0xF]); - t1[0xF] = B64_1(g[0xF]); - t2[0xF] = B64_2(g[0xF]); - t3[0xF] = B64_3(g[0xF]); - t4[0xF] = B64_4(g[0xF]); - t5[0xF] = B64_5(g[0xF]); - t6[0xF] = B64_6(g[0xF]); - t7[0xF] = B64_7(g[0xF]); - g[0x0] = T0[t0[0x0]] ^ T1[t1[0x1]] ^ T2[t2[0x2]] ^ T3[t3[0x3]] ^ T4[t4[0x4]] ^ T5[t5[0x5]] ^ T6[t6[0x6]] ^ C64e(0x32f4a5f497a5c6c6); - g[0x1] = T0[t0[0x1]] ^ T1[t1[0x2]] ^ T2[t2[0x3]] ^ T3[t3[0x4]] ^ T4[t4[0x5]] ^ T5[t5[0x6]] ^ T6[t6[0x7]] ^ C64e(0x32f4a5f497a5c6c6); - g[0x2] = T0[t0[0x2]] ^ T1[t1[0x3]] ^ T2[t2[0x4]] ^ T3[t3[0x5]] ^ T4[t4[0x6]] ^ T5[t5[0x7]] ^ T6[t6[0x8]] ^ C64e(0x32f4a5f497a5c6c6); - g[0x3] = T0[t0[0x3]] ^ T1[t1[0x4]] ^ T2[t2[0x5]] ^ T3[t3[0x6]] ^ T4[t4[0x7]] ^ T5[t5[0x8]] ^ T6[t6[0x9]] ^ C64e(0x32f4a5f497a5c6c6); - g[0x4] = T0[t0[0x4]] ^ T1[t1[0x5]] ^ T2[t2[0x6]] ^ T3[t3[0x7]] ^ T4[t4[0x8]] ^ T5[t5[0x9]] ^ C64e(0xf4a5f497a5c6c632) ^ T7[t7[0xF]]; - g[0x5] = T0[t0[0x5]] ^ T1[t1[0x6]] ^ T2[t2[0x7]] ^ T3[t3[0x8]] ^ T4[t4[0x9]] ^ C64e(0xa5f497a5c6c632f4) ^ C64e(0xf4a5f497a5c6c632) ^ T7[t7[0x0]]; - g[0x6] = T0[t0[0x6]] ^ T1[t1[0x7]] ^ T2[t2[0x8]] ^ T3[t3[0x9]] ^ C64e(0xf497a5c6c632f4a5) ^ C64e(0xa5f497a5c6c632f4) ^ C64e(0xf4a5f497a5c6c632) ^ T7[t7[0x1]]; - g[0x7] = T0[t0[0x7]] ^ T1[t1[0x8]] ^ T2[t2[0x9]] ^ C64e(0x97a5c6c632f4a5f4) ^ C64e(0xf497a5c6c632f4a5) ^ C64e(0xa5f497a5c6c632f4) ^ C64e(0xf4a5f497a5c6c632) ^ T7[t7[0x2]]; - g[0x8] = T0[t0[0x8]] ^ T1[t1[0x9]] ^ C64e(0xa5c6c632f4a5f497) ^ C64e(0x97a5c6c632f4a5f4) ^ C64e(0xf497a5c6c632f4a5) ^ C64e(0xa5f497a5c6c632f4) ^ C64e(0xf4a5f497a5c6c632) ^ T7[t7[0x3]]; - g[0x9] = T0[t0[0x9]] ^ C64e(0xc6c632f4a5f497a5) ^ C64e(0xa5c6c632f4a5f497) ^ C64e(0x97a5c6c632f4a5f4) ^ C64e(0xf497a5c6c632f4a5) ^ C64e(0xa5f497a5c6c632f4) ^ T6[t6[0xF]] ^ T7[t7[0x4]]; - g[0xA] = T0[t0[0xA]] ^ C64e(0xc6c632f4a5f497a5) ^ C64e(0xa5c6c632f4a5f497) ^ C64e(0x97a5c6c632f4a5f4) ^ C64e(0xf497a5c6c632f4a5) ^ T5[t5[0xF]] ^ T6[t6[0x0]] ^ T7[t7[0x5]]; - g[0xB] = T0[t0[0xB]] ^ C64e(0xc6c632f4a5f497a5) ^ C64e(0xa5c6c632f4a5f497) ^ C64e(0x97a5c6c632f4a5f4) ^ T4[t4[0xF]] ^ T5[t5[0x0]] ^ T6[t6[0x1]] ^ T7[t7[0x6]]; - g[0xC] = T0[t0[0xC]] ^ C64e(0xc6c632f4a5f497a5) ^ C64e(0xa5c6c632f4a5f497) ^ T3[t3[0xF]] ^ T4[t4[0x0]] ^ T5[t5[0x1]] ^ T6[t6[0x2]] ^ T7[t7[0x7]]; - g[0xD] = T0[t0[0xD]] ^ C64e(0xc6c632f4a5f497a5) ^ T2[t2[0xF]] ^ T3[t3[0x0]] ^ T4[t4[0x1]] ^ T5[t5[0x2]] ^ T6[t6[0x3]] ^ T7[t7[0x8]]; - g[0xE] = T0[t0[0xE]] ^ T1[t1[0xF]] ^ T2[t2[0x0]] ^ T3[t3[0x1]] ^ T4[t4[0x2]] ^ T5[t5[0x3]] ^ T6[t6[0x4]] ^ T7[t7[0x9]]; - g[0xF] = T0[t0[0xF]] ^ T1[t1[0x0]] ^ T2[t2[0x1]] ^ T3[t3[0x2]] ^ T4[t4[0x3]] ^ T5[t5[0x4]] ^ T6[t6[0x5]] ^ T7[t7[0xA]]; - + m[9] |= ((sph_u64) gid << 32); + +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[10] = 0x80; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); - PERM_BIG_P(g, 1, 14); +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + +//#pragma unroll 8 + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = ENC64E(H[u + 8]); + +//#pragma unroll 15 + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; + #if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + #else + H[15] = (sph_u64)512; + #endif + + m[0] = hash.h8[0]; + m[1] = hash.h8[1]; + m[2] = hash.h8[2]; + m[3] = hash.h8[3]; + m[4] = hash.h8[4]; + m[5] = hash.h8[5]; + m[6] = hash.h8[6]; + m[7] = hash.h8[7]; + +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); PERM_BIG_Q(m); -#pragma unroll - for (u = 0; u < 16; u++) g[u] ^= m[u]; -#pragma unroll - for (u = 0; u < 8; u++) m[u] = g[u + 8]; - g[15] ^= H15; - PERM_BIG_P(g, 0, r); +//#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; -round: -// move the ^= to the relevant first byte down here? tried that, was slower?!?!? - g[0x0] ^= PC64(0x00, r); - g[0x1] ^= PC64(0x10, r); - g[0x6] ^= PC64(0x60, r); - g[0xB] ^= PC64(0xB0, r); - g[0xC] ^= PC64(0xC0, r); - g[0xD] ^= PC64(0xD0, r); - g[0xE] ^= PC64(0xE0, r); - g[0xF] ^= PC64(0xF0, r); - t0[0x0] = B64_0(g[0x0]); - t1[0x0] = B64_1(g[0x0]); - t2[0x0] = B64_2(g[0x0]); - t3[0x0] = B64_3(g[0x0]); - t4[0x0] = B64_4(g[0x0]); - t5[0x0] = B64_5(g[0x0]); - t6[0x0] = B64_6(g[0x0]); - t7[0x0] = B64_7(g[0x0]); - t0[0x1] = B64_0(g[0x1]); - t1[0x1] = B64_1(g[0x1]); - t2[0x1] = B64_2(g[0x1]); - t3[0x1] = B64_3(g[0x1]); - t4[0x1] = B64_4(g[0x1]); - t5[0x1] = B64_5(g[0x1]); - t6[0x1] = B64_6(g[0x1]); - t7[0x1] = B64_7(g[0x1]); - t0[0x6] = B64_0(g[0x6]); - t1[0x6] = B64_1(g[0x6]); - t2[0x6] = B64_2(g[0x6]); - t3[0x6] = B64_3(g[0x6]); - t4[0x6] = B64_4(g[0x6]); - t5[0x6] = B64_5(g[0x6]); - t6[0x6] = B64_6(g[0x6]); - t7[0x6] = B64_7(g[0x6]); - t0[0xB] = B64_0(g[0xB]); - t1[0xB] = B64_1(g[0xB]); - t2[0xB] = B64_2(g[0xB]); - t3[0xB] = B64_3(g[0xB]); - t4[0xB] = B64_4(g[0xB]); - t5[0xB] = B64_5(g[0xB]); - t6[0xB] = B64_6(g[0xB]); - t7[0xB] = B64_7(g[0xB]); - t0[0xC] = B64_0(g[0xC]); - t1[0xC] = B64_1(g[0xC]); - t2[0xC] = B64_2(g[0xC]); - t3[0xC] = B64_3(g[0xC]); - t4[0xC] = B64_4(g[0xC]); - t5[0xC] = B64_5(g[0xC]); - t6[0xC] = B64_6(g[0xC]); - t7[0xC] = B64_7(g[0xC]); - t0[0xD] = B64_0(g[0xD]); - t1[0xD] = B64_1(g[0xD]); - t2[0xD] = B64_2(g[0xD]); - t3[0xD] = B64_3(g[0xD]); - t4[0xD] = B64_4(g[0xD]); - t5[0xD] = B64_5(g[0xD]); - t6[0xD] = B64_6(g[0xD]); - t7[0xD] = B64_7(g[0xD]); - t0[0xE] = B64_0(g[0xE]); - t1[0xE] = B64_1(g[0xE]); - t2[0xE] = B64_2(g[0xE]); - t3[0xE] = B64_3(g[0xE]); - t4[0xE] = B64_4(g[0xE]); - t5[0xE] = B64_5(g[0xE]); - t6[0xE] = B64_6(g[0xE]); - t7[0xE] = B64_7(g[0xE]); - t0[0xF] = B64_0(g[0xF]); - t1[0xF] = B64_1(g[0xF]); - t2[0xF] = B64_2(g[0xF]); - t3[0xF] = B64_3(g[0xF]); - t4[0xF] = B64_4(g[0xF]); - t5[0xF] = B64_5(g[0xF]); - t6[0xF] = B64_6(g[0xF]); - t7[0xF] = B64_7(g[0xF]); + //#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); - if (flag < 2) { - g[0x2] ^= PC64(0x20, r); - g[0x3] ^= PC64(0x30, r); - g[0x4] ^= PC64(0x40, r); - g[0x5] ^= PC64(0x50, r); - g[0x7] ^= PC64(0x70, r); - g[0x8] ^= PC64(0x80, r); - g[0x9] ^= PC64(0x90, r); - g[0xA] ^= PC64(0xA0, r); - t0[0x2] = B64_0(g[0x2]); - t1[0x2] = B64_1(g[0x2]); - t2[0x2] = B64_2(g[0x2]); - t3[0x2] = B64_3(g[0x2]); - t4[0x2] = B64_4(g[0x2]); - t5[0x2] = B64_5(g[0x2]); - t6[0x2] = B64_6(g[0x2]); - t7[0x2] = B64_7(g[0x2]); - t0[0x3] = B64_0(g[0x3]); - t1[0x3] = B64_1(g[0x3]); - t2[0x3] = B64_2(g[0x3]); - t3[0x3] = B64_3(g[0x3]); - t4[0x3] = B64_4(g[0x3]); - t5[0x3] = B64_5(g[0x3]); - t6[0x3] = B64_6(g[0x3]); - t7[0x3] = B64_7(g[0x3]); - t0[0x4] = B64_0(g[0x4]); - t1[0x4] = B64_1(g[0x4]); - t2[0x4] = B64_2(g[0x4]); - t3[0x4] = B64_3(g[0x4]); - t4[0x4] = B64_4(g[0x4]); - t5[0x4] = B64_5(g[0x4]); - t6[0x4] = B64_6(g[0x4]); - t7[0x4] = B64_7(g[0x4]); - t0[0x5] = B64_0(g[0x5]); - t1[0x5] = B64_1(g[0x5]); - t2[0x5] = B64_2(g[0x5]); - t3[0x5] = B64_3(g[0x5]); - t4[0x5] = B64_4(g[0x5]); - t5[0x5] = B64_5(g[0x5]); - t6[0x5] = B64_6(g[0x5]); - t7[0x5] = B64_7(g[0x5]); - t0[0x7] = B64_0(g[0x7]); - t1[0x7] = B64_1(g[0x7]); - t2[0x7] = B64_2(g[0x7]); - t3[0x7] = B64_3(g[0x7]); - t4[0x7] = B64_4(g[0x7]); - t5[0x7] = B64_5(g[0x7]); - t6[0x7] = B64_6(g[0x7]); - t7[0x7] = B64_7(g[0x7]); - t0[0x8] = B64_0(g[0x8]); - t1[0x8] = B64_1(g[0x8]); - t2[0x8] = B64_2(g[0x8]); - t3[0x8] = B64_3(g[0x8]); - t4[0x8] = B64_4(g[0x8]); - t5[0x8] = B64_5(g[0x8]); - t6[0x8] = B64_6(g[0x8]); - t7[0x8] = B64_7(g[0x8]); - t0[0x9] = B64_0(g[0x9]); - t1[0x9] = B64_1(g[0x9]); - t2[0x9] = B64_2(g[0x9]); - t3[0x9] = B64_3(g[0x9]); - t4[0x9] = B64_4(g[0x9]); - t5[0x9] = B64_5(g[0x9]); - t6[0x9] = B64_6(g[0x9]); - t7[0x9] = B64_7(g[0x9]); - t0[0xA] = B64_0(g[0xA]); - t1[0xA] = B64_1(g[0xA]); - t2[0xA] = B64_2(g[0xA]); - t3[0xA] = B64_3(g[0xA]); - t4[0xA] = B64_4(g[0xA]); - t5[0xA] = B64_5(g[0xA]); - t6[0xA] = B64_6(g[0xA]); - t7[0xA] = B64_7(g[0xA]); - if (flag == 0) { - RBTT(g[0x8], 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0x3); - RBTT(g[0x9], 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x4); - RBTT(g[0xA], 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x5); - } else { - RBTT(g[0x0], 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xB); - RBTT(g[0x1], 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xC); - RBTT(g[0x6], 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0x1); - } - RBTT(g[0xC], 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x7); - RBTT(g[0xD], 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x8); - RBTT(g[0xE], 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x9); - RBTT(g[0xF], 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xA); - } - RBTT(g[0xB], 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x6); + //#pragma unroll 16 + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; - if (flag == 2) goto end; - if (flag++ == 1) { - r = 13; - goto round; - } +//#pragma unroll 8 + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = H[u + 8]; - r = 12; -#pragma unroll - for (u = 0; u < 8; u++) m[u] ^= g[u + 8]; - m[7] ^= H15; - m[8] = 0x80; - m[9] = 0; - m[10] = 0; - goto perm; + barrier(CLK_GLOBAL_MEM_FENCE); + } -end: - if ((g[3 + 8] ^ m[3]) <= target) output[output[0xFF]++] = as_uint(as_uchar4(gid).wzyx); + bool result = (hash.h8[3] <= target); + if (result) + output[output[0xFF]++] = SWAP4(gid); } -#endif // GROESTLCOIN_CL \ No newline at end of file +#endif // GROESTLCOIN_CL diff --git a/kernel/keccak1600.cl b/kernel/keccak1600.cl new file mode 100644 index 000000000..d870a155e --- /dev/null +++ b/kernel/keccak1600.cl @@ -0,0 +1,84 @@ +/* + * keccak_1600 function + * C. Buchner 2014 + * + */ + +__constant static const sph_u64 RC[] = { + SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082), + SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000), + SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001), + SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009), + SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088), + SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A), + SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B), + SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003), + SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080), + SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A), + SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080), + SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008) +}; + + +inline void keccak_block(ulong *s) { + size_t i; + ulong t[5], u[5], v, w; + + for (i = 0; i < 24; i++) { + /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ + t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ + u[0] = t[4] ^ SPH_ROTL64(t[1], 1); + u[1] = t[0] ^ SPH_ROTL64(t[2], 1); + u[2] = t[1] ^ SPH_ROTL64(t[3], 1); + u[3] = t[2] ^ SPH_ROTL64(t[4], 1); + u[4] = t[3] ^ SPH_ROTL64(t[0], 1); + + /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ + s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; + s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; + s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; + s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; + s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; + + /* rho pi: b[..] = rotl(a[..], ..) */ + v = s[1]; + s[1] = SPH_ROTL64(s[6], 44); + s[6] = SPH_ROTL64(s[9], 20); + s[9] = SPH_ROTL64(s[22], 61); + s[22] = SPH_ROTL64(s[14], 39); + s[14] = SPH_ROTL64(s[20], 18); + s[20] = SPH_ROTL64(s[2], 62); + s[2] = SPH_ROTL64(s[12], 43); + s[12] = SPH_ROTL64(s[13], 25); + s[13] = SPH_ROTL64(s[19], 8); + s[19] = SPH_ROTL64(s[23], 56); + s[23] = SPH_ROTL64(s[15], 41); + s[15] = SPH_ROTL64(s[4], 27); + s[4] = SPH_ROTL64(s[24], 14); + s[24] = SPH_ROTL64(s[21], 2); + s[21] = SPH_ROTL64(s[8], 55); + s[8] = SPH_ROTL64(s[16], 45); + s[16] = SPH_ROTL64(s[5], 36); + s[5] = SPH_ROTL64(s[3], 28); + s[3] = SPH_ROTL64(s[18], 21); + s[18] = SPH_ROTL64(s[17], 15); + s[17] = SPH_ROTL64(s[11], 10); + s[11] = SPH_ROTL64(s[7], 6); + s[7] = SPH_ROTL64(s[10], 3); + s[10] = SPH_ROTL64(v, 1); + + v = s[0]; w = s[1]; s[0] ^= (~w) & s[2]; s[1] ^= (~s[2]) & s[3]; s[2] ^= (~s[3]) & s[4]; s[3] ^= (~s[4]) & v; s[4] ^= (~v) & w; + v = s[5]; w = s[6]; s[5] ^= (~w) & s[7]; s[6] ^= (~s[7]) & s[8]; s[7] ^= (~s[8]) & s[9]; s[8] ^= (~s[9]) & v; s[9] ^= (~v) & w; + v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w; + v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w; + v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w; + + s[0] ^= RC[i]; + } +}; \ No newline at end of file diff --git a/kernel/skein256.cl b/kernel/skein256.cl new file mode 100644 index 000000000..8a5e8481f --- /dev/null +++ b/kernel/skein256.cl @@ -0,0 +1,107 @@ + +/* +* skein256 kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* Copyright (c) 2014 djm34 +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ + + +__constant static const sph_u64 SKEIN_IV512[] = { + SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03), + SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1), + SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4), + SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33) +}; + +__constant static const sph_u64 SKEIN_IV512_256[8] = { + 0xCCD044A12FDB3E13UL, 0xE83590301A79A9EBUL, + 0x55AEA0614F816E6FUL, 0x2A2767A4AE9B94DBUL, + 0xEC06025E74DD7683UL, 0xE7A436CDC4746251UL, + 0xC36FBAF9393AD185UL, 0x3EEDBA1833EDFC13UL +}; + + + +__constant static const int ROT256[8][4] = +{ + 46, 36, 19, 37, + 33, 27, 14, 42, + 17, 49, 36, 39, + 44, 9, 54, 56, + 39, 30, 34, 24, + 13, 50, 10, 17, + 25, 29, 39, 43, + 8, 35, 56, 22, +}; + +__constant static const sph_u64 skein_ks_parity = 0x1BD11BDAA9FC1A22; + +__constant static const sph_u64 t12[6] = +{ 0x20UL, +0xf000000000000000UL, +0xf000000000000020UL, +0x08UL, +0xff00000000000000UL, +0xff00000000000008UL +}; +static inline ulong ROTL64(const ulong v, const ulong n){ + return rotate(v,n); +} + +#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT) { \ +p0 += p1; p1 = SPH_ROTL64(p1, ROT256[ROT][0]); p1 ^= p0; \ +p2 += p3; p3 = SPH_ROTL64(p3, ROT256[ROT][1]); p3 ^= p2; \ +p4 += p5; p5 = SPH_ROTL64(p5, ROT256[ROT][2]); p5 ^= p4; \ +p6 += p7; p7 = SPH_ROTL64(p7, ROT256[ROT][3]); p7 ^= p6; \ +} + +#define Round_8_512(p0, p1, p2, p3, p4, p5, p6, p7, R) { \ + Round512(p0, p1, p2, p3, p4, p5, p6, p7, 0); \ + Round512(p2, p1, p4, p7, p6, p5, p0, p3, 1); \ + Round512(p4, p1, p6, p3, p0, p5, p2, p7, 2); \ + Round512(p6, p1, p0, p7, p2, p5, p4, p3, 3); \ + p0 += h[((R)+0) % 9]; \ + p1 += h[((R)+1) % 9]; \ + p2 += h[((R)+2) % 9]; \ + p3 += h[((R)+3) % 9]; \ + p4 += h[((R)+4) % 9]; \ + p5 += h[((R)+5) % 9] + t[((R)+0) % 3]; \ + p6 += h[((R)+6) % 9] + t[((R)+1) % 3]; \ + p7 += h[((R)+7) % 9] + R; \ + Round512(p0, p1, p2, p3, p4, p5, p6, p7, 4); \ + Round512(p2, p1, p4, p7, p6, p5, p0, p3, 5); \ + Round512(p4, p1, p6, p3, p0, p5, p2, p7, 6); \ + Round512(p6, p1, p0, p7, p2, p5, p4, p3, 7); \ + p0 += h[((R)+1) % 9]; \ + p1 += h[((R)+2) % 9]; \ + p2 += h[((R)+3) % 9]; \ + p3 += h[((R)+4) % 9]; \ + p4 += h[((R)+5) % 9]; \ + p5 += h[((R)+6) % 9] + t[((R)+1) % 3]; \ + p6 += h[((R)+7) % 9] + t[((R)+2) % 3]; \ + p7 += h[((R)+8) % 9] + (R+1); \ +} \ No newline at end of file diff --git a/sgminer.c b/sgminer.c index f3e409334..67ce4291d 100644 --- a/sgminer.c +++ b/sgminer.c @@ -48,6 +48,7 @@ char *curly = ":D"; #endif #include #include "sph/sph_sha2.h" +#include "sph/sph_blake.h" #include "compat.h" #include "miner.h" diff --git a/winbuild/sgminer.vcxproj b/winbuild/sgminer.vcxproj index a76b65c9e..a2a8e8f3c 100644 --- a/winbuild/sgminer.vcxproj +++ b/winbuild/sgminer.vcxproj @@ -263,7 +263,10 @@ + + + @@ -325,7 +328,10 @@ + + + diff --git a/winbuild/sgminer.vcxproj.filters b/winbuild/sgminer.vcxproj.filters index 58da41ee0..feeb80907 100644 --- a/winbuild/sgminer.vcxproj.filters +++ b/winbuild/sgminer.vcxproj.filters @@ -209,6 +209,15 @@ Source Files\algorithm + + Source Files\algorithm + + + Source Files\algorithm + + + Source Files\algorithm + @@ -397,6 +406,15 @@ Header Files\algorithm + + Header Files\algorithm + + + Header Files\algorithm + + + Header Files\algorithm +