diff --git a/crypto/chacha/asm/chacha-x86_64.pl b/crypto/chacha/asm/chacha-x86_64.pl index 1fe75bd08..2526c7564 100755 --- a/crypto/chacha/asm/chacha-x86_64.pl +++ b/crypto/chacha/asm/chacha-x86_64.pl @@ -78,6 +78,7 @@ .extern OPENSSL_ia32cap_P +.section .rodata .align 64 .Lzero: .long 0,0,0,0 @@ -107,6 +108,7 @@ .Lsixteen: .long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 .asciz "ChaCha20 for x86_64, CRYPTOGAMS by " +.text ___ sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm diff --git a/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl b/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl index b2067c78d..f0430c3e8 100644 --- a/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl +++ b/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl @@ -42,6 +42,7 @@ chacha20_poly1305_constants: +.section .rodata .align 64 .Lchacha20_consts: .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' @@ -79,6 +80,7 @@ .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.text ___ my ($oup,$inp,$inl,$adp,$keyp,$itr1,$itr2,$adl)=("%rdi","%rsi","%rbx","%rcx","%r9","%rcx","%r8","%r8"); diff --git a/crypto/curve25519/curve25519.c b/crypto/curve25519/curve25519.c index d2a841669..5916e31c5 100644 --- a/crypto/curve25519/curve25519.c +++ b/crypto/curve25519/curve25519.c @@ -43,10 +43,6 @@ // Various pre-computed constants. #include "./curve25519_tables.h" -#if defined(OPENSSL_NO_ASM) -#define FIAT_25519_NO_ASM -#endif - #if defined(BORINGSSL_CURVE25519_64BIT) #if defined(__GNUC__) #pragma GCC diagnostic ignored "-Wpedantic" diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl index 4e4a2774d..e9cc528c0 100644 --- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl @@ -1505,6 +1505,7 @@ sub aesni_generate8 { } $code.=<<___; +.section .rodata .align 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 @@ -1525,6 +1526,7 @@ sub aesni_generate8 { .asciz "AES for Intel AES-NI, CRYPTOGAMS by " .align 64 +.text ___ # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, diff --git a/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl b/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl index c663cbf76..d088ba2d5 100644 --- a/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl @@ -873,6 +873,7 @@ ## ## ######################################################## .type _vpaes_consts,\@object +.section .rodata .align 64 _vpaes_consts: .Lk_inv: # inv, inva @@ -941,6 +942,7 @@ .asciz "Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" .align 64 .size _vpaes_consts,.-_vpaes_consts +.text ___ if ($win64) { diff --git a/crypto/fipsmodule/bn/asm/bn-armv8.pl b/crypto/fipsmodule/bn/asm/bn-armv8.pl new file mode 100755 index 000000000..783fb96ad --- /dev/null +++ b/crypto/fipsmodule/bn/asm/bn-armv8.pl @@ -0,0 +1,118 @@ +#!/usr/bin/env perl +# Copyright (c) 2023, Google Inc. +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +use strict; + +my $flavour = shift; +my $output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; +my $dir = $1; +my $xlate; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\""; +*STDOUT = *OUT; + +my ($rp, $ap, $bp, $num) = ("x0", "x1", "x2", "x3"); +my ($a0, $a1, $b0, $b1, $num_pairs) = ("x4", "x5", "x6", "x7", "x8"); +my $code = <<____; +#include + +.text + +// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, +// size_t num); +.type bn_add_words, %function +.globl bn_add_words +.align 4 +bn_add_words: + AARCH64_VALID_CALL_TARGET + # Clear the carry flag. + cmn xzr, xzr + + # aarch64 can load two registers at a time, so we do two loop iterations at + # at a time. Split $num = 2 * $num_pairs + $num. This allows loop + # operations to use CBNZ without clobbering the carry flag. + lsr $num_pairs, $num, #1 + and $num, $num, #1 + + cbz $num_pairs, .Ladd_tail +.Ladd_loop: + ldp $a0, $a1, [$ap], #16 + ldp $b0, $b1, [$bp], #16 + sub $num_pairs, $num_pairs, #1 + adcs $a0, $a0, $b0 + adcs $a1, $a1, $b1 + stp $a0, $a1, [$rp], #16 + cbnz $num_pairs, .Ladd_loop + +.Ladd_tail: + cbz $num, .Ladd_exit + ldr $a0, [$ap], #8 + ldr $b0, [$bp], #8 + adcs $a0, $a0, $b0 + str $a0, [$rp], #8 + +.Ladd_exit: + cset x0, cs + ret +.size bn_add_words,.-bn_add_words + +// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, +// size_t num); +.type bn_sub_words, %function +.globl bn_sub_words +.align 4 +bn_sub_words: + AARCH64_VALID_CALL_TARGET + # Set the carry flag. Arm's borrow bit is flipped from the carry flag, + # so we want C = 1 here. + cmp xzr, xzr + + # aarch64 can load two registers at a time, so we do two loop iterations at + # at a time. Split $num = 2 * $num_pairs + $num. This allows loop + # operations to use CBNZ without clobbering the carry flag. + lsr $num_pairs, $num, #1 + and $num, $num, #1 + + cbz $num_pairs, .Lsub_tail +.Lsub_loop: + ldp $a0, $a1, [$ap], #16 + ldp $b0, $b1, [$bp], #16 + sub $num_pairs, $num_pairs, #1 + sbcs $a0, $a0, $b0 + sbcs $a1, $a1, $b1 + stp $a0, $a1, [$rp], #16 + cbnz $num_pairs, .Lsub_loop + +.Lsub_tail: + cbz $num, .Lsub_exit + ldr $a0, [$ap], #8 + ldr $b0, [$bp], #8 + sbcs $a0, $a0, $b0 + str $a0, [$rp], #8 + +.Lsub_exit: + cset x0, cc + ret +size bn_sub_words,.-bn_sub_words +____ + +print $code; +close STDOUT or die "error closing STDOUT: $!"; diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index ed3534fbd..c01a7aea8 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl @@ -3576,11 +3576,13 @@ ___ } $code.=<<___; +.section .rodata .align 64 .Linc: .long 0,0, 1,1 .long 2,2, 2,2 .asciz "Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by " +.text ___ # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, diff --git a/crypto/fipsmodule/ec/asm/p256-armv8-asm.pl b/crypto/fipsmodule/ec/asm/p256-armv8-asm.pl index 04235bed7..edd6f6b11 100644 --- a/crypto/fipsmodule/ec/asm/p256-armv8-asm.pl +++ b/crypto/fipsmodule/ec/asm/p256-armv8-asm.pl @@ -56,7 +56,7 @@ $code.=<<___; #include "ring-core/arm_arch.h" -.text +.section .rodata .align 5 .Lpoly: .quad 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 @@ -71,6 +71,7 @@ .LordK: .quad 0xccd1c8aaee00bc4f .asciz "ECP_NISTZ256 for ARMv8, CRYPTOGAMS by " +.text // void ecp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4], // const BN_ULONG x2[4]); @@ -86,8 +87,10 @@ ldr $bi,[$bp] // bp[0] ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,:pg_hi21:.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_mul_mont @@ -109,8 +112,10 @@ ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,:pg_hi21:.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_sqr_mont @@ -134,8 +139,10 @@ mov $acc1,xzr mov $acc2,xzr mov $acc3,xzr - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,:pg_hi21:.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_sub_from @@ -513,9 +520,11 @@ mov $rp_real,$rp ldp $acc2,$acc3,[$ap,#48] mov $ap_real,$ap - ldr $poly1,.Lpoly+8 + adrp $poly3,:pg_hi21:.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] mov $t0,$acc0 - ldr $poly3,.Lpoly+24 + ldr $poly3,[$poly3,#24] mov $t1,$acc1 ldp $a0,$a1,[$ap_real,#64] // forward load for p256_sqr_mont mov $t2,$acc2 @@ -674,8 +683,10 @@ mov $rp_real,$rp mov $ap_real,$ap mov $bp_real,$bp - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,:pg_hi21:.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] orr $t0,$a0,$a1 orr $t2,$a2,$a3 orr $in2infty,$t0,$t2 @@ -928,8 +939,10 @@ mov $rp_real,$rp mov $ap_real,$ap mov $bp_real,$bp - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,:pg_hi21:.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] ldp $a0,$a1,[$ap,#64] // in1_z ldp $a2,$a3,[$ap,#64+16] @@ -1080,7 +1093,8 @@ stp $acc2,$acc3,[$rp_real,#$i+16] ___ $code.=<<___ if ($i == 0); - adr $bp_real,.Lone_mont-64 + adrp $bp_real,:pg_hi21:.Lone_mont-64 + add $bp_real,$bp_real,:lo12:.Lone_mont-64 ___ } $code.=<<___; @@ -1131,7 +1145,8 @@ stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] - adr $ordk,.Lord + adrp $ordk,:pg_hi21:.Lord + add $ordk,$ordk,:lo12:.Lord ldr $bi,[$bp] // bp[0] ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] @@ -1274,7 +1289,8 @@ stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] - adr $ordk,.Lord + adrp $ordk,:pg_hi21:.Lord + add $ordk,$ordk,:lo12:.Lord ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] diff --git a/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl b/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl index 449f5086a..e187e0f42 100755 --- a/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl +++ b/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl @@ -62,6 +62,7 @@ .extern OPENSSL_ia32cap_P # The polynomial +.section .rodata .align 64 .Lpoly: .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 @@ -80,6 +81,7 @@ .quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 .LordK: .quad 0xccd1c8aaee00bc4f +.text ___ { diff --git a/crypto/fipsmodule/ec/p256.c b/crypto/fipsmodule/ec/p256.c index 5c15fac8f..a64cc0496 100644 --- a/crypto/fipsmodule/ec/p256.c +++ b/crypto/fipsmodule/ec/p256.c @@ -46,10 +46,6 @@ #pragma GCC diagnostic ignored "-Winline" #endif -#if defined(OPENSSL_NO_ASM) -#define FIAT_P256_NO_ASM -#endif - #if defined(BORINGSSL_HAS_UINT128) #if defined(__GNUC__) #pragma GCC diagnostic ignored "-Wpedantic" diff --git a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl index f2f6e3266..385718ffd 100644 --- a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl +++ b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl @@ -1053,6 +1053,7 @@ ___ $code.=<<___; +.section .rodata .align 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 @@ -1066,6 +1067,7 @@ .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 .asciz "AES-NI GCM module for x86_64, CRYPTOGAMS by " .align 64 +.text ___ }}} else {{{ $code=<<___; # assembler is too old diff --git a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl index ff3c992fd..c59ffd73e 100644 --- a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl +++ b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl @@ -1286,6 +1286,7 @@ sub reduction_avx { } $code.=<<___; +.section .rodata .align 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 @@ -1297,6 +1298,7 @@ sub reduction_avx { .asciz "GHASH for x86_64, CRYPTOGAMS by " .align 64 +.text ___ $code =~ s/\`([^\`]*)\`/eval($1)/gem; diff --git a/crypto/fipsmodule/sha/asm/sha512-x86_64.pl b/crypto/fipsmodule/sha/asm/sha512-x86_64.pl index 25a551bbe..9f3c11f48 100755 --- a/crypto/fipsmodule/sha/asm/sha512-x86_64.pl +++ b/crypto/fipsmodule/sha/asm/sha512-x86_64.pl @@ -404,6 +404,7 @@ () if ($SZ==4) { $code.=<<___; +.section .rodata .align 64 .type $TABLE,\@object $TABLE: @@ -447,9 +448,11 @@ () .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 .asciz "SHA256 block transform for x86_64, CRYPTOGAMS by " +.text ___ } else { $code.=<<___; +.section .rodata .align 64 .type $TABLE,\@object $TABLE: @@ -537,6 +540,7 @@ () .quad 0x0001020304050607,0x08090a0b0c0d0e0f .quad 0x0001020304050607,0x08090a0b0c0d0e0f .asciz "SHA512 block transform for x86_64, CRYPTOGAMS by " +.text ___ } diff --git a/crypto/internal.h b/crypto/internal.h index cb2f703cc..f7b6c6241 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -189,7 +189,7 @@ typedef uint32_t crypto_word; // always has the same output for a given input. This allows it to eliminate // dead code, move computations across loops, and vectorize. static inline crypto_word value_barrier_w(crypto_word a) { -#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) +#if defined(__GNUC__) || defined(__clang__) __asm__("" : "+r"(a) : /* no inputs */); #endif return a; @@ -197,7 +197,7 @@ static inline crypto_word value_barrier_w(crypto_word a) { // value_barrier_u32 behaves like |value_barrier_w| but takes a |uint32_t|. static inline uint32_t value_barrier_u32(uint32_t a) { -#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) +#if defined(__GNUC__) || defined(__clang__) __asm__("" : "+r"(a) : /* no inputs */); #endif return a; @@ -205,7 +205,7 @@ static inline uint32_t value_barrier_u32(uint32_t a) { // value_barrier_u64 behaves like |value_barrier_w| but takes a |uint64_t|. static inline uint64_t value_barrier_u64(uint64_t a) { -#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) +#if defined(__GNUC__) || defined(__clang__) __asm__("" : "+r"(a) : /* no inputs */); #endif return a; diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl index a780b8154..508cf4986 100755 --- a/crypto/perlasm/x86_64-xlate.pl +++ b/crypto/perlasm/x86_64-xlate.pl @@ -1029,6 +1029,27 @@ } } { package directive; # pick up directives, which start with . + my %sections; + sub nasm_section { + my ($name, $qualifiers) = @_; + my $ret = "section\t$name"; + if (exists $sections{$name}) { + # Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392701. Only + # emit section qualifiers the first time a section is referenced. + # For all subsequent references, require the qualifiers match and + # omit them. + # + # See also https://crbug.com/1422018 and b/270643835. + my $old = $sections{$name}; + die "Inconsistent qualifiers: $qualifiers vs $old" if ($qualifiers ne "" && $qualifiers ne $old); + } else { + $sections{$name} = $qualifiers; + if ($qualifiers ne "") { + $ret .= " $qualifiers"; + } + } + return $ret; + } sub re { my ($class, $line) = @_; my $self = {}; @@ -1107,6 +1128,9 @@ $self->{value} = ".p2align\t" . (log($$line)/log(2)); } elsif ($dir eq ".section") { $current_segment=$$line; + if (!$elf && $current_segment eq ".rodata") { + if ($flavour eq "macosx") { $self->{value} = ".section\t__DATA,__const"; } + } if (!$elf && $current_segment eq ".init") { if ($flavour eq "macosx") { $self->{value} = ".mod_init_func"; } elsif ($flavour eq "mingw64") { $self->{value} = ".section\t.ctors"; } @@ -1134,7 +1158,7 @@ SWITCH: for ($dir) { /\.text/ && do { my $v=undef; if ($nasm) { - $v="section .text code align=64\n"; + $v=nasm_section(".text", "code align=64")."\n"; } else { $v="$current_segment\tENDS\n" if ($current_segment); $current_segment = ".text\$"; @@ -1147,7 +1171,7 @@ }; /\.data/ && do { my $v=undef; if ($nasm) { - $v="section .data data align=8\n"; + $v=nasm_section(".data", "data align=8")."\n"; } else { $v="$current_segment\tENDS\n" if ($current_segment); $current_segment = "_DATA"; @@ -1159,18 +1183,20 @@ /\.section/ && do { my $v=undef; $$line =~ s/([^,]*).*/$1/; $$line = ".CRT\$XCU" if ($$line eq ".init"); + $$line = ".rdata" if ($$line eq ".rodata"); if ($nasm) { - $v="section $$line"; - if ($$line=~/\.([px])data/) { - $v.=" rdata align="; - $v.=$1 eq "p"? 4 : 8; + my $qualifiers = ""; + if ($$line=~/\.([prx])data/) { + $qualifiers = "rdata align="; + $qualifiers .= $1 eq "p"? 4 : 8; } elsif ($$line=~/\.CRT\$/i) { - $v.=" rdata align=8"; + $qualifiers = "rdata align=8"; } + $v = nasm_section($$line, $qualifiers); } else { $v="$current_segment\tENDS\n" if ($current_segment); $v.="$$line\tSEGMENT"; - if ($$line=~/\.([px])data/) { + if ($$line=~/\.([prx])data/) { $v.=" READONLY"; $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref); } elsif ($$line=~/\.CRT\$/i) {