From 352843c71c1f65ae91d7230e3ce0de6eea3e3f11 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 17 May 2021 14:01:43 +0200 Subject: [PATCH] Add basic support for s390x --- build.rs | 12 ++--- crypto/fipsmodule/aes/aes_nohw.c | 32 ++++++------ crypto/fipsmodule/bn/montgomery.c | 14 ++++++ crypto/fipsmodule/ec/p256_shared.h | 9 +++- crypto/internal.h | 80 ++++++++++++++++++++++++++---- crypto/poly1305/poly1305.c | 7 +-- include/ring-core/base.h | 3 ++ 7 files changed, 117 insertions(+), 40 deletions(-) diff --git a/build.rs b/build.rs index da1379f7b3..eae7bcaf24 100644 --- a/build.rs +++ b/build.rs @@ -40,12 +40,12 @@ const RING_SRCS: &[(&[&str], &str)] = &[ (&[], "crypto/mem.c"), (&[], "crypto/poly1305/poly1305.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/crypto.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/curve25519/curve25519.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/ecp_nistz.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/gfp_p256.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/gfp_p384.c"), - (&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/p256.c"), + (&[], "crypto/crypto.c"), + (&[], "crypto/curve25519/curve25519.c"), + (&[], "crypto/fipsmodule/ec/ecp_nistz.c"), + (&[], "crypto/fipsmodule/ec/gfp_p256.c"), + (&[], "crypto/fipsmodule/ec/gfp_p384.c"), + (&[], "crypto/fipsmodule/ec/p256.c"), (&[X86_64, X86], "crypto/cpu-intel.c"), diff --git a/crypto/fipsmodule/aes/aes_nohw.c b/crypto/fipsmodule/aes/aes_nohw.c index a6306f049e..d871d82c79 100644 --- a/crypto/fipsmodule/aes/aes_nohw.c +++ b/crypto/fipsmodule/aes/aes_nohw.c @@ -346,19 +346,18 @@ static inline uint8_t lo(uint32_t a) { static inline void aes_nohw_compact_block(aes_word_t out[AES_NOHW_BLOCK_WORDS], const uint8_t in[16]) { - OPENSSL_memcpy(out, in, 16); #if defined(OPENSSL_SSE2) - // No conversions needed. + OPENSSL_memcpy(out, in, 16); // No conversions needed. #elif defined(OPENSSL_64_BIT) - uint64_t a0 = aes_nohw_compact_word(out[0]); - uint64_t a1 = aes_nohw_compact_word(out[1]); + uint64_t a0 = aes_nohw_compact_word(CRYPTO_read_le64(in)); + uint64_t a1 = aes_nohw_compact_word(CRYPTO_read_le64(in + 8)); out[0] = (a0 & UINT64_C(0x00000000ffffffff)) | (a1 << 32); out[1] = (a1 & UINT64_C(0xffffffff00000000)) | (a0 >> 32); #else - uint32_t a0 = aes_nohw_compact_word(out[0]); - uint32_t a1 = aes_nohw_compact_word(out[1]); - uint32_t a2 = aes_nohw_compact_word(out[2]); - uint32_t a3 = aes_nohw_compact_word(out[3]); + uint32_t a0 = aes_nohw_compact_word(CRYPTO_read_le32(in)); + uint32_t a1 = aes_nohw_compact_word(CRYPTO_read_le32(in + 4)); + uint32_t a2 = aes_nohw_compact_word(CRYPTO_read_le32(in + 8)); + uint32_t a3 = aes_nohw_compact_word(CRYPTO_read_le32(in + 12)); // Note clang, when building for ARM Thumb2, will sometimes miscompile // expressions such as (a0 & 0x0000ff00) << 8, particularly when building // without optimizations. This bug was introduced in @@ -382,8 +381,8 @@ static inline void aes_nohw_uncompact_block( aes_nohw_uncompact_word((a0 & UINT64_C(0x00000000ffffffff)) | (a1 << 32)); uint64_t b1 = aes_nohw_uncompact_word((a1 & UINT64_C(0xffffffff00000000)) | (a0 >> 32)); - OPENSSL_memcpy(out, &b0, 8); - OPENSSL_memcpy(out + 8, &b1, 8); + CRYPTO_write_le64(b0, out); + CRYPTO_write_le64(b1, out + 8); #else uint32_t a0 = in[0]; uint32_t a1 = in[1]; @@ -404,10 +403,10 @@ static inline void aes_nohw_uncompact_block( b1 = aes_nohw_uncompact_word(b1); b2 = aes_nohw_uncompact_word(b2); b3 = aes_nohw_uncompact_word(b3); - OPENSSL_memcpy(out, &b0, 4); - OPENSSL_memcpy(out + 4, &b1, 4); - OPENSSL_memcpy(out + 8, &b2, 4); - OPENSSL_memcpy(out + 12, &b3, 4); + CRYPTO_write_le32(b0, out); + CRYPTO_write_le32(b1, out + 4); + CRYPTO_write_le32(b2, out + 8); + CRYPTO_write_le32(b3, out + 12); #endif } @@ -925,18 +924,17 @@ void aes_nohw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, // Make |AES_NOHW_BATCH_SIZE| copies of |ivec|. alignas(AES_NOHW_WORD_SIZE) union { - uint32_t u32[AES_NOHW_BATCH_SIZE * 4]; uint8_t u8[AES_NOHW_BATCH_SIZE * 16]; } ivs, enc_ivs; for (size_t i = 0; i < AES_NOHW_BATCH_SIZE; i++) { OPENSSL_memcpy(ivs.u8 + 16 * i, ivec, 16); } - uint32_t ctr = CRYPTO_bswap4(ivs.u32[3]); + uint32_t ctr = CRYPTO_read_be32(ivs.u8 + 12); for (;;) { // Update counters. for (uint32_t i = 0; i < AES_NOHW_BATCH_SIZE; i++) { - ivs.u32[4 * i + 3] = CRYPTO_bswap4(ctr + i); + CRYPTO_write_be32(ctr + i, ivs.u8 + 16 * i + 12); } size_t todo = blocks >= AES_NOHW_BATCH_SIZE ? AES_NOHW_BATCH_SIZE : blocks; diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index b1f1c69329..e047bf5a6a 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -156,3 +156,17 @@ int bn_from_montgomery_in_place(BN_ULONG r[], size_t num_r, BN_ULONG a[], } return 1; } + +#if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64) && \ + !defined(OPENSSL_ARM) && !defined(OPENSSL_AARCH64) +void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { + Limb tmp[2 * num]; + for (size_t i = 0; i < num; i++) + tmp[i] = 0; + for (size_t i = 0; i < num; i++) + tmp[num + i] = limbs_mul_add_limb(tmp + i, ap, bp[i], num); + + bn_from_montgomery_in_place(rp, num, tmp, 2 * num, np, num, n0); +} +#endif diff --git a/crypto/fipsmodule/ec/p256_shared.h b/crypto/fipsmodule/ec/p256_shared.h index 4dd325bee1..3c1f107a65 100644 --- a/crypto/fipsmodule/ec/p256_shared.h +++ b/crypto/fipsmodule/ec/p256_shared.h @@ -50,7 +50,14 @@ typedef unsigned char P256_SCALAR_BYTES[33]; static inline void p256_scalar_bytes_from_limbs( P256_SCALAR_BYTES bytes_out, const BN_ULONG limbs[P256_LIMBS]) { - OPENSSL_memcpy(bytes_out, limbs, 32); + for (int i = 0; i < P256_LIMBS; i++) + { +#if BN_BITS2 == 64 + CRYPTO_write_le64(limbs[i], bytes_out + i * 8); +#else + CRYPTO_write_le32(limbs[i], bytes_out + i * 4); +#endif + } bytes_out[32] = 0; } diff --git a/crypto/internal.h b/crypto/internal.h index b975c0b53a..88e19e5b92 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -254,19 +254,77 @@ static inline crypto_word constant_time_select_w(crypto_word mask, // Endianness conversions. -#if defined(__GNUC__) && __GNUC__ >= 2 -static inline uint32_t CRYPTO_bswap4(uint32_t x) { - return __builtin_bswap32(x); +static inline uint32_t CRYPTO_read_le32(const uint8_t *p) { + return (((uint32_t)p[0]) | + (((uint32_t)p[1]) << 8) | + (((uint32_t)p[2]) << 16) | + (((uint32_t)p[3]) << 24)); } -#elif defined(_MSC_VER) -#pragma warning(push, 3) -#include -#pragma warning(pop) -#pragma intrinsic(_byteswap_uint64, _byteswap_ulong) -static inline uint32_t CRYPTO_bswap4(uint32_t x) { - return _byteswap_ulong(x); + +static inline uint32_t CRYPTO_read_be32(const uint8_t *p) { + return ((((uint32_t)p[0]) << 24) | + (((uint32_t)p[1]) << 16) | + (((uint32_t)p[2]) << 8) | + ((uint32_t)p[3])); +} + +static inline uint64_t CRYPTO_read_le64(const uint8_t *p) { + return (((uint64_t)p[0]) | + (((uint64_t)p[1]) << 8) | + (((uint64_t)p[2]) << 16) | + (((uint64_t)p[3]) << 24) | + (((uint64_t)p[4]) << 32) | + (((uint64_t)p[5]) << 40) | + (((uint64_t)p[6]) << 48) | + (((uint64_t)p[7]) << 56)); +} + +static inline uint64_t CRYPTO_read_be64(const uint8_t *p) { + return ((((uint64_t)p[0]) << 56) | + (((uint64_t)p[1]) << 48) | + (((uint64_t)p[2]) << 40) | + (((uint64_t)p[3]) << 32) | + (((uint64_t)p[4]) << 24) | + (((uint64_t)p[5]) << 16) | + (((uint64_t)p[6]) << 8) | + ((uint64_t)p[7])); +} + +static inline void CRYPTO_write_le32(uint32_t v, uint8_t *p) { + p[0] = (uint8_t)(v & 0xff); + p[1] = (uint8_t)((v >> 8) & 0xff); + p[2] = (uint8_t)((v >> 16) & 0xff); + p[3] = (uint8_t)((v >> 24) & 0xff); +} + +static inline void CRYPTO_write_be32(uint32_t v, uint8_t *p) { + p[0] = (uint8_t)((v >> 24) & 0xff); + p[1] = (uint8_t)((v >> 16) & 0xff); + p[2] = (uint8_t)((v >> 8) & 0xff); + p[3] = (uint8_t)(v & 0xff); +} + +static inline void CRYPTO_write_le64(uint64_t v, uint8_t *p) { + p[0] = (uint8_t)(v & 0xff); + p[1] = (uint8_t)((v >> 8) & 0xff); + p[2] = (uint8_t)((v >> 16) & 0xff); + p[3] = (uint8_t)((v >> 24) & 0xff); + p[4] = (uint8_t)((v >> 32) & 0xff); + p[5] = (uint8_t)((v >> 40) & 0xff); + p[6] = (uint8_t)((v >> 48) & 0xff); + p[7] = (uint8_t)((v >> 56) & 0xff); +} + +static inline void CRYPTO_write_be64(uint64_t v, uint8_t *p) { + p[0] = (uint8_t)((v >> 56) & 0xff); + p[1] = (uint8_t)((v >> 48) & 0xff); + p[2] = (uint8_t)((v >> 40) & 0xff); + p[3] = (uint8_t)((v >> 32) & 0xff); + p[4] = (uint8_t)((v >> 24) & 0xff); + p[5] = (uint8_t)((v >> 16) & 0xff); + p[6] = (uint8_t)((v >> 8) & 0xff); + p[7] = (uint8_t)(v & 0xff); } -#endif #if !defined(RING_CORE_NOSTDLIBINC) #include diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c index a2f0b987f0..cd1c7d3d5e 100644 --- a/crypto/poly1305/poly1305.c +++ b/crypto/poly1305/poly1305.c @@ -29,15 +29,12 @@ #pragma GCC diagnostic ignored "-Wconversion" #endif -// We can assume little-endian. static uint32_t U8TO32_LE(const uint8_t *m) { - uint32_t r; - OPENSSL_memcpy(&r, m, sizeof(r)); - return r; + return CRYPTO_read_le32(m); } static void U32TO8_LE(uint8_t *m, uint32_t v) { - OPENSSL_memcpy(m, &v, sizeof(v)); + CRYPTO_write_le32(v, m); } static uint64_t mul32x32_64(uint32_t a, uint32_t b) { return (uint64_t)a * b; } diff --git a/include/ring-core/base.h b/include/ring-core/base.h index f1a027d1a4..69cf40ca20 100644 --- a/include/ring-core/base.h +++ b/include/ring-core/base.h @@ -89,6 +89,9 @@ #elif defined(__MIPSEL__) && defined(__LP64__) #define OPENSSL_64_BIT #define OPENSSL_MIPS64 +#elif defined(__s390x__) +#define OPENSSL_64_BIT +#define OPENSSL_S390X #elif defined(__wasm__) #define OPENSSL_32_BIT #else