From 0020df9f954e5aa8a83cb05d9c8a32df02dfc39d Mon Sep 17 00:00:00 2001 From: shikokuchuo <53399081+shikokuchuo@users.noreply.github.com> Date: Fri, 1 Mar 2024 22:42:12 +0000 Subject: [PATCH 1/2] xxHash64 implementation --- NAMESPACE | 1 + NEWS.md | 2 + R/base.R | 30 ++++ man/xxh64.Rd | 47 ++++++ src/init.c | 2 + src/secret.h | 21 ++- src/secret2.c | 4 +- src/secret3.c | 448 ++++++++++++++++++++++++++++++++++++++++++++++++++ tests/tests.R | 16 ++ 9 files changed, 567 insertions(+), 4 deletions(-) create mode 100644 man/xxh64.Rd create mode 100644 src/secret3.c diff --git a/NAMESPACE b/NAMESPACE index 25484f0..524858c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,4 +2,5 @@ export(sha256) export(sha3) +export(xxh64) useDynLib(secretbase, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md index 6951f6b..7c1ee7e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # secretbase 0.3.0.9000 (development) +* Adds 'xxHash64' as a fast non-cryptographic hash function. + # secretbase 0.3.0.1 * CRAN release correcting for Clang-UBSAN checks. diff --git a/R/base.R b/R/base.R index 52ed8b6..dd688eb 100644 --- a/R/base.R +++ b/R/base.R @@ -124,3 +124,33 @@ sha3 <- function(x, bits = 256L, convert = TRUE, file) sha256 <- function(x, convert = TRUE, file) if (missing(file)) .Call(secretbase_sha256, x, convert) else .Call(secretbase_sha256_file, file, convert) + +#' Fast Non-Cryptographic Hashing Using xxHash64 +#' +#' Returns the 'xxHash64' of the supplied R object or file. This is an extremely +#' fast hash, processing at RAM speed limits. +#' +#' @inheritParams sha3 +#' +#' @return A character string, raw or integer vector depending on 'convert'. +#' +#' @details This implementation uses the algorithm released by Yann Collet at +#' \url{https://xxhash.com/}. +#' +#' @examples +#' # xxHash64 as character string: +#' xxh64("secret base") +#' +#' # xxHash64 as raw vector: +#' xxh64("secret base", convert = FALSE) +#' +#' # xxHash64 a file: +#' file <- tempfile(); cat("secret base", file = file) +#' xxh64(file = file) +#' unlink(file) +#' +#' @export +#' +xxh64 <- function(x, convert = TRUE, file) + if (missing(file)) .Call(secretbase_xxh64, x, convert) else + .Call(secretbase_xxh64_file, file, convert) diff --git a/man/xxh64.Rd b/man/xxh64.Rd new file mode 100644 index 0000000..aa3e7d8 --- /dev/null +++ b/man/xxh64.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/base.R +\name{xxh64} +\alias{xxh64} +\title{Fast Non-Cryptographic Hashing Using xxHash64} +\usage{ +xxh64(x, convert = TRUE, file) +} +\arguments{ +\item{x}{R object to hash. A character string or raw vector (without +attributes) is hashed 'as is'. All other objects are hashed using R +serialization in a memory-efficient 'streaming' manner, without +allocation of the serialized object. To ensure portability, serialization +v3 XDR is always used with headers skipped (as these contain R version +and encoding information).} + +\item{convert}{[default TRUE] if TRUE, the hash is converted to its hex +representation as a character string, if FALSE, output directly as a raw +vector, or if NA, a vector of (32-bit) integer values.} + +\item{file}{character file name / path. If specified, 'x' is ignored. The +file is hashed in a streaming fashion and may be larger than memory.} +} +\value{ +A character string, raw or integer vector depending on 'convert'. +} +\description{ +Returns the 'xxHash64' of the supplied R object or file. This is an extremely + fast hash, processing at RAM speed limits. +} +\details{ +This implementation uses the algorithm released by Yann Collet at + \url{https://xxhash.com/}. +} +\examples{ +# xxHash64 as character string: +xxh64("secret base") + +# xxHash64 as raw vector: +xxh64("secret base", convert = FALSE) + +# xxHash64 a file: +file <- tempfile(); cat("secret base", file = file) +xxh64(file = file) +unlink(file) + +} diff --git a/src/init.c b/src/init.c index 913228c..4a8950b 100644 --- a/src/init.c +++ b/src/init.c @@ -23,6 +23,8 @@ static const R_CallMethodDef callMethods[] = { {"secretbase_sha3_file", (DL_FUNC) &secretbase_sha3_file, 3}, {"secretbase_sha256", (DL_FUNC) &secretbase_sha256, 2}, {"secretbase_sha256_file", (DL_FUNC) &secretbase_sha256_file, 2}, + {"secretbase_xxh64", (DL_FUNC) &secretbase_xxh64, 2}, + {"secretbase_xxh64_file", (DL_FUNC) &secretbase_xxh64_file, 2}, {NULL, NULL, 0} }; diff --git a/src/secret.h b/src/secret.h index a26b612..52afa6e 100644 --- a/src/secret.h +++ b/src/secret.h @@ -30,10 +30,11 @@ #define SB_SERIAL_HEADERS 6 #define SB_BUF_SIZE 4096 + #ifdef WORDS_BIGENDIAN -# define MBEDTLS_IS_BIG_ENDIAN 1 +# define SB_IS_BIG_ENDIAN 1 #else -# define MBEDTLS_IS_BIG_ENDIAN 0 +# define SB_IS_BIG_ENDIAN 0 #endif typedef enum { @@ -67,6 +68,15 @@ typedef struct mbedtls_sha256_context { uint32_t state[8]; } mbedtls_sha256_context; +typedef struct XXH64_state_s { + uint64_t total_len; + uint64_t v[4]; + uint64_t mem64[4]; + uint32_t memsize; + uint32_t reserved32; + uint64_t reserved64; +} XXH64_state_t; + typedef struct secretbase_sha3_context { int skip; mbedtls_sha3_context *ctx; @@ -77,11 +87,18 @@ typedef struct secretbase_sha256_context { mbedtls_sha256_context *ctx; } secretbase_sha256_context; +typedef struct secretbase_xxh64_context { + int skip; + XXH64_state_t *ctx; +} secretbase_xxh64_context; + SEXP hash_to_sexp(unsigned char *, size_t, int); SEXP secretbase_sha3(SEXP, SEXP, SEXP); SEXP secretbase_sha3_file(SEXP, SEXP, SEXP); SEXP secretbase_sha256(SEXP, SEXP); SEXP secretbase_sha256_file(SEXP, SEXP); +SEXP secretbase_xxh64(SEXP, SEXP); +SEXP secretbase_xxh64_file(SEXP, SEXP); #endif diff --git a/src/secret2.c b/src/secret2.c index c13dcae..9f494e4 100644 --- a/src/secret2.c +++ b/src/secret2.c @@ -116,14 +116,14 @@ static inline uint64_t mbedtls_bswap64(uint64_t x) { #endif /* !defined(MBEDTLS_BSWAP64) */ #define MBEDTLS_GET_UINT32_BE(data, offset) \ -((MBEDTLS_IS_BIG_ENDIAN) \ +((SB_IS_BIG_ENDIAN) \ ? mbedtls_get_unaligned_uint32((data) + (offset)) \ : MBEDTLS_BSWAP32(mbedtls_get_unaligned_uint32((data) + (offset))) \ ) #define MBEDTLS_PUT_UINT32_BE(n, data, offset) \ { \ - if (MBEDTLS_IS_BIG_ENDIAN) \ + if (SB_IS_BIG_ENDIAN) \ { \ mbedtls_put_unaligned_uint32((data) + (offset), (uint32_t) (n)); \ } \ diff --git a/src/secret3.c b/src/secret3.c new file mode 100644 index 0000000..71a057d --- /dev/null +++ b/src/secret3.c @@ -0,0 +1,448 @@ +// Copyright (C) 2024 Hibiki AI Limited +// +// This file is part of secretbase. +// +// secretbase is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// secretbase is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// secretbase. If not, see . + +// secretbase ------------------------------------------------------------------ + +#include "secret.h" + +// secretbase - xxHash64 implementation ---------------------------------------- + +/* + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (C) 2012-2023 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +// XXH_VERSION_MAJOR 0 XXH_VERSION_MINOR 8 XXH_VERSION_RELEASE 2 + +#if defined (__GNUC__) +# define XXH_PUREF __attribute__((pure)) +#else +# define XXH_PUREF +#endif + +#ifdef __has_attribute +# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +# define XXH_HAS_ATTRIBUTE(x) 0 +#endif + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 202311L) && defined(__has_c_attribute) +# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) +#else +# define XXH_HAS_C_ATTRIBUTE(x) 0 +#endif + +#if XXH_HAS_C_ATTRIBUTE(fallthrough) +# define XXH_FALLTHROUGH [[fallthrough]] +#elif XXH_HAS_ATTRIBUTE(__fallthrough__) +# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__)) +#else +# define XXH_FALLTHROUGH /* fallthrough */ +#endif + +#if XXH_HAS_ATTRIBUTE(noescape) +# define XXH_NOESCAPE __attribute__((noescape)) +#else +# define XXH_NOESCAPE +#endif + +#if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) +# define XXH_FORCE_MEMORY_ACCESS 1 +#endif + +static void* XXH_memcpy(void* dest, const void* src, size_t size) +{ + return memcpy(dest,src,size); +} + +#if defined(__GNUC__) || defined(__clang__) +# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused)) +# define XXH_NO_INLINE static __attribute__((noinline)) +#elif defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# define XXH_FORCE_INLINE static inline +# define XXH_NO_INLINE static +#else +# define XXH_FORCE_INLINE static +# define XXH_NO_INLINE static +#endif + +#if defined(__INTEL_COMPILER) +# define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c)) +#else +# define XXH_ASSERT(c) XXH_ASSUME(c) +#endif + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */ +# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0) +#else +# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0) +#endif + +#define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c) + +#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#ifdef __has_builtin +# define XXH_HAS_BUILTIN(x) __has_builtin(x) +#else +# define XXH_HAS_BUILTIN(x) 0 +#endif + +#if XXH_HAS_BUILTIN(__builtin_unreachable) +# define XXH_UNREACHABLE() __builtin_unreachable() + +#else +# define XXH_UNREACHABLE() +#endif + +#if XXH_HAS_BUILTIN(__builtin_assume) +# define XXH_ASSUME(c) __builtin_assume(c) +#else +# define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); } +#endif + +#if (defined(XXH_FORCE_MEMORY_ACCESS)) + +static uint64_t XXH_read64(const void* ptr) +{ + typedef __attribute__((aligned(1))) uint64_t xxh_unalign64; + return *((const xxh_unalign64*)ptr); +} + +#else + +static uint64_t XXH_read64(const void* memPtr) +{ + uint64_t val; + XXH_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ + && XXH_HAS_BUILTIN(__builtin_rotateleft64) +# define XXH_rotl32 __builtin_rotateleft32 +# define XXH_rotl64 __builtin_rotateleft64 + +#else +# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) +#endif + +#if XXH_GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static uint32_t XXH_swap32 (uint32_t x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} + +static uint64_t XXH_swap64(uint64_t x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +XXH_FORCE_INLINE uint32_t XXH_get32bits(const void* ptr) +{ + return SB_IS_BIG_ENDIAN ? XXH_swap32(*(const uint32_t*)ptr) : *(const uint32_t*)ptr; +} + +XXH_FORCE_INLINE uint64_t XXH_readLE64(const void* ptr) +{ + return SB_IS_BIG_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + +XXH_FORCE_INLINE uint64_t XXH_get64bits(const void* ptr) +{ + return SB_IS_BIG_ENDIAN ? XXH_swap64(*(const uint64_t*)ptr) : *(const uint64_t*)ptr; +} + +#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL +#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL +#define XXH_PRIME64_3 0x165667B19E3779F9ULL +#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL +#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL + +static uint64_t XXH64_round(uint64_t acc, uint64_t input) +{ + acc += input * XXH_PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= XXH_PRIME64_1; + return acc; +} + +static uint64_t XXH64_mergeRound(uint64_t acc, uint64_t val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4; + return acc; +} + +static uint64_t XXH64_avalanche(uint64_t hash) +{ + hash ^= hash >> 33; + hash *= XXH_PRIME64_2; + hash ^= hash >> 29; + hash *= XXH_PRIME64_3; + hash ^= hash >> 32; + return hash; +} + +static XXH_PUREF uint64_t +XXH64_finalize(uint64_t hash, const uint8_t* ptr, size_t len) +{ + if (ptr==NULL) XXH_ASSERT(len == 0); + len &= 31; + while (len >= 8) { + uint64_t const k1 = XXH64_round(0, XXH_get64bits(ptr)); + ptr += 8; + hash ^= k1; + hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4; + len -= 8; + } + if (len >= 4) { + hash ^= (uint64_t)(XXH_get32bits(ptr)) * XXH_PRIME64_1; + ptr += 4; + hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; + len -= 4; + } + while (len > 0) { + hash ^= (*ptr++) * XXH_PRIME64_5; + hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1; + --len; + } + return XXH64_avalanche(hash); +} + +static void XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr) +{ + XXH_ASSERT(statePtr != NULL); + memset(statePtr, 0, sizeof(*statePtr)); + statePtr->v[0] = XXH_PRIME64_1 + XXH_PRIME64_2; + statePtr->v[1] = XXH_PRIME64_2; + statePtr->v[2] = 0; + statePtr->v[3] = -XXH_PRIME64_1; +} + +static void XXH64_update (XXH_NOESCAPE XXH64_state_t* state, + XXH_NOESCAPE const void* input, size_t len) +{ + + const uint8_t* p = (const uint8_t*)input; + const uint8_t* const bEnd = p + len; + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((uint8_t*)state->mem64) + state->memsize, input, len); + state->memsize += (uint32_t)len; + return; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((uint8_t*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0)); + state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1)); + state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2)); + state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3)); + p += 32 - state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const uint8_t* const limit = bEnd - 32; + + do { + state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8; + state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8; + state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8; + state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8; + } while (p<=limit); + + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + +} + +static uint64_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state) +{ + uint64_t h64; + + if (state->total_len >= 32) { + h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18); + h64 = XXH64_mergeRound(h64, state->v[0]); + h64 = XXH64_mergeRound(h64, state->v[1]); + h64 = XXH64_mergeRound(h64, state->v[2]); + h64 = XXH64_mergeRound(h64, state->v[3]); + } else { + h64 = state->v[2] /*seed*/ + XXH_PRIME64_5; + } + + h64 += (uint64_t) state->total_len; + + h64 = XXH64_finalize(h64, (const uint8_t*)state->mem64, (size_t)state->total_len); + + return SB_IS_BIG_ENDIAN ? h64 : XXH_swap64(h64); +} + +// secretbase - internals ------------------------------------------------------ + +static void hash_bytes(R_outpstream_t stream, void *src, int len) { + + secretbase_xxh64_context *sctx = (secretbase_xxh64_context *) stream->data; + sctx->skip ? (void) sctx->skip-- : XXH64_update(sctx->ctx, (uint8_t *) src, (size_t) len); + +} + +static void hash_file(XXH64_state_t *ctx, const SEXP x) { + + if (TYPEOF(x) != STRSXP) + Rf_error("'file' must be specified as a character string"); + const char *file = R_ExpandFileName(CHAR(STRING_ELT(x, 0))); + unsigned char buf[SB_BUF_SIZE]; + FILE *f; + size_t cur; + + if ((f = fopen(file, "rb")) == NULL) + Rf_error("file not found or no read permission at '%s'", file); + + setbuf(f, NULL); + + while ((cur = fread(buf, sizeof(char), SB_BUF_SIZE, f))) { + XXH64_update(ctx, buf, cur); + } + + if (ferror(f)) { + fclose(f); + Rf_error("file read error at '%s'", file); + } + fclose(f); + +} + +static void hash_object(XXH64_state_t *ctx, const SEXP x) { + + switch (TYPEOF(x)) { + case STRSXP: + if (XLENGTH(x) == 1 && ATTRIB(x) == R_NilValue) { + const char *s = CHAR(STRING_ELT(x, 0)); + XXH64_update(ctx, (uint8_t *) s, strlen(s)); + return; + } + break; + case RAWSXP: + if (ATTRIB(x) == R_NilValue) { + XXH64_update(ctx, (uint8_t *) STDVEC_DATAPTR(x), (size_t) XLENGTH(x)); + return; + } + break; + } + + secretbase_xxh64_context sctx; + sctx.skip = SB_SERIAL_HEADERS; + sctx.ctx = ctx; + + struct R_outpstream_st output_stream; + R_InitOutPStream( + &output_stream, + (R_pstream_data_t) &sctx, + R_pstream_xdr_format, + SB_R_SERIAL_VER, + NULL, + hash_bytes, + NULL, + R_NilValue + ); + R_Serialize(x, &output_stream); + +} + +SEXP secretbase_xxh64_impl(const SEXP x, const SEXP convert, + void (*const hash_func)(XXH64_state_t *, SEXP)) { + + const int conv = LOGICAL(convert)[0]; + uint64_t buf; + struct XXH64_state_s state; + + XXH64_reset(&state); + hash_func(&state, x); + buf = XXH64_digest(&state); + + return hash_to_sexp((unsigned char *) &buf, sizeof(uint64_t), conv); + +} + +// secretbase - exported functions --------------------------------------------- + +SEXP secretbase_xxh64(SEXP x, SEXP convert) { + + return secretbase_xxh64_impl(x, convert, hash_object); + +} + +SEXP secretbase_xxh64_file(SEXP x, SEXP convert) { + + return secretbase_xxh64_impl(x, convert, hash_file); + +} diff --git a/tests/tests.R b/tests/tests.R index 8e6cfbb..3dcc6a7 100644 --- a/tests/tests.R +++ b/tests/tests.R @@ -58,3 +58,19 @@ test_error(hash_func("", ""), "file not found or no read permission") if (.Platform[["OS.type"]] == "unix") test_error(sha256(file = "~/"), "file read error") test_equal(sha256(paste(1:888, collapse = "")), "ec5df945d0ff0c927812ec503fe9ffd5cbdf7cf79b5391ad5002b3a80760183b") test_equal(sha256(NULL), "71557d1c8bac9bbe3cbec8d00bb223a2f372279827064095447e569fbf5a760a") +# xxHash64 tests: +test_equal(xxh64("secret base"), "ac1f7520cd9f49e9") +test_equal(xxh64("secret base", convert = NA)[1L], 544546732L) +test_that(xxh64("secret base", convert = FALSE), is.raw) +test_equal(xxh64(data.frame(a = 1, b = 2)), "cbc8601c2c4a3c16") +xhash_func <- function(file, string) { + on.exit(unlink(file)) + cat(string, file = file) + xxh64(file = file) +} +test_equal(xhash_func(tempfile(), "secret base"), "ac1f7520cd9f49e9") +test_error(xhash_func("", ""), "file not found or no read permission") +if (.Platform[["OS.type"]] == "unix") test_error(xxh64(file = "~/"), "file read error") +test_equal(xxh64(paste(1:888, collapse = "")), "faf5050519852b31") +test_equal(xxh64(NULL), "c85d88fc56f4e042") +test_equal(xxh64(c("secret base", "")), "de232a89d184aac9") From 2b56ebe1354277881d3932eccabb513303078af5 Mon Sep 17 00:00:00 2001 From: shikokuchuo <53399081+shikokuchuo@users.noreply.github.com> Date: Tue, 5 Mar 2024 11:18:37 +0000 Subject: [PATCH 2/2] add tests --- tests/tests.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/tests.R b/tests/tests.R index 3dcc6a7..5eca101 100644 --- a/tests/tests.R +++ b/tests/tests.R @@ -47,6 +47,9 @@ test_equal(sha256("secret base"), "1951c1ca3d50e95e6ede2b1c26fefd0f0e8eba1e51a83 test_equal(sha256("secret base", convert = NA)[2L], 1592348733L) test_equal(sha256(sha256("secret base", convert = FALSE)), "bd45eca9cbd4404cd467909fa8a2196ee9ffc7cb7f70f6343ff6647419744d41") test_equal(sha256(data.frame(a = 1, b = 2)), "189874c3ac59edecb4eab95a2d7c1bbb293a6ccd04e3da5b28daca91ebc7f15b") +test_equal(sha256(c("secret", "base")), "6a38552b0dab8bf0a6c2f9a6a7acf764631319843f58e85f883301a3cd08b1f2") +test_equal(sha256(`attr<-`("base", "secret", "base")), "6e9e5b1a42304047ba73161360917d001c19974aebd98d345251ff138cddf6ea") +test_equal(sha256(`class<-`(sha256(character(), convert = FALSE), "hash")), "c762db048ff48f7ba3e9df5539db3aa678cb2f336d96d5c81b4e5d3e19783d14") test_error(sha256(file = NULL), "'file' must be specified as a character string") hash_func <- function(file, string) { on.exit(unlink(file)) @@ -61,8 +64,12 @@ test_equal(sha256(NULL), "71557d1c8bac9bbe3cbec8d00bb223a2f372279827064095447e56 # xxHash64 tests: test_equal(xxh64("secret base"), "ac1f7520cd9f49e9") test_equal(xxh64("secret base", convert = NA)[1L], 544546732L) -test_that(xxh64("secret base", convert = FALSE), is.raw) +test_equal(xxh64(xxh64("secret base", convert = FALSE)), "f0d7a8c177d5a602") test_equal(xxh64(data.frame(a = 1, b = 2)), "cbc8601c2c4a3c16") +test_equal(xxh64(c("secret", "base")), "c601d788544a7385") +test_equal(xxh64(`attr<-`("base", "secret", "base")), "801151a163c18252") +test_equal(xxh64(`class<-`(xxh64(character(), convert = FALSE), "hash")), "68ba986ecfccf4fc") +test_error(xxh64(file = NULL), "'file' must be specified as a character string") xhash_func <- function(file, string) { on.exit(unlink(file)) cat(string, file = file)