Skip to content

Commit

Permalink
SIMDSHA512body(): Add compile-time toggle for all-zeroes optimization
Browse files Browse the repository at this point in the history
And turn off the manual optimization since it started causing a major
sha512crypt performance regression on AVX with RHEL6's old gcc after the
iterated hashing commit.  There appear to be no regressions from turning
this off now, meaning that the compiler is hopefully able to figure the
zeroes out now that they're written to w[] by this very function.
  • Loading branch information
solardiz committed Jan 14, 2024
1 parent eda4f9e commit 7957f7c
Showing 1 changed file with 24 additions and 16 deletions.
40 changes: 24 additions & 16 deletions src/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -2272,6 +2272,24 @@ void SIMDSHA256body(vtype *data, uint32_t *out, uint32_t *reload_state, unsigned
w[i][(t)&0xf] = vadd_epi64(tmp1[i], tmp2[i]); \
}

#define SHA512_STEP(a,b,c,d,e,f,g,h,x,K) \
{ \
SHA512_PARA_DO(i) \
{ \
tmp1[i] = vadd_epi64(h[i], w[i][(x)&0xf]); \
tmp2[i] = vadd_epi64(S1(e[i]),vset1_epi64(K)); \
tmp1[i] = vadd_epi64(tmp1[i], Ch(e[i],f[i],g[i])); \
tmp1[i] = vadd_epi64(tmp1[i], tmp2[i]); \
tmp2[i] = vadd_epi64(S0(a[i]),Maj(a[i],b[i],c[i])); \
d[i] = vadd_epi64(tmp1[i], d[i]); \
h[i] = vadd_epi64(tmp1[i], tmp2[i]); \
if (x < 64) R(x); \
} \
}

#define SHA512_MANUAL_OPT 0

#if SHA512_MANUAL_OPT
#undef R0
#define R0(t) \
w[i][t] = vadd_epi64(s0(w[i][(t-15)&0xf]), w[i][(t-16)&0xf]);
Expand Down Expand Up @@ -2303,21 +2321,6 @@ void SIMDSHA256body(vtype *data, uint32_t *out, uint32_t *reload_state, unsigned
w[i][t] = vadd_epi64(tmp1[i], tmp2[i]); \
}

#define SHA512_STEP(a,b,c,d,e,f,g,h,x,K) \
{ \
SHA512_PARA_DO(i) \
{ \
tmp1[i] = vadd_epi64(h[i], w[i][(x)&0xf]); \
tmp2[i] = vadd_epi64(S1(e[i]),vset1_epi64(K)); \
tmp1[i] = vadd_epi64(tmp1[i], Ch(e[i],f[i],g[i])); \
tmp1[i] = vadd_epi64(tmp1[i], tmp2[i]); \
tmp2[i] = vadd_epi64(S0(a[i]),Maj(a[i],b[i],c[i])); \
d[i] = vadd_epi64(tmp1[i], d[i]); \
h[i] = vadd_epi64(tmp1[i], tmp2[i]); \
if (x < 64) R(x); \
} \
}

#define SHA512_STEP0(a,b,c,d,e,f,g,h,x,K) \
{ \
SHA512_PARA_DO(i) \
Expand All @@ -2337,6 +2340,7 @@ void SIMDSHA256body(vtype *data, uint32_t *out, uint32_t *reload_state, unsigned
if (x < 64) R(x); \
} \
}
#endif

#define INIT_D 0x152fecd8f70e5939ULL

Expand Down Expand Up @@ -2436,7 +2440,7 @@ static MAYBE_INLINE void SIMDSHA512univ(vtype* data, uint64_t *out, uint64_t *re
SHA512_PARA_DO(k)
{
w[k][8] = vset1_epi64(0x8000000000000000ULL);
#if 0
#if !SHA512_MANUAL_OPT
w[k][9] =
w[k][10] =
w[k][11] =
Expand Down Expand Up @@ -2510,6 +2514,7 @@ static MAYBE_INLINE void SIMDSHA512univ(vtype* data, uint64_t *out, uint64_t *re
}
}

#if SHA512_MANUAL_OPT
if (SSEi_flags & SSEi_HALF_IN) {
SHA512_STEP0(a, b, c, d, e, f, g, h, 0, 0x428a2f98d728ae22ULL);
SHA512_STEP0(h, a, b, c, d, e, f, g, 1, 0x7137449123ef65cdULL);
Expand All @@ -2527,6 +2532,7 @@ static MAYBE_INLINE void SIMDSHA512univ(vtype* data, uint64_t *out, uint64_t *re
SHA512_STEP0(d, e, f, g, h, a, b, c, 13, 0x80deb1fe3b1696b1ULL);
SHA512_STEP0(c, d, e, f, g, h, a, b, 14, 0x9bdc06a725c71235ULL);
} else {
#endif
SHA512_STEP(a, b, c, d, e, f, g, h, 0, 0x428a2f98d728ae22ULL);
SHA512_STEP(h, a, b, c, d, e, f, g, 1, 0x7137449123ef65cdULL);
SHA512_STEP(g, h, a, b, c, d, e, f, 2, 0xb5c0fbcfec4d3b2fULL);
Expand All @@ -2542,7 +2548,9 @@ static MAYBE_INLINE void SIMDSHA512univ(vtype* data, uint64_t *out, uint64_t *re
SHA512_STEP(e, f, g, h, a, b, c, d, 12, 0x72be5d74f27b896fULL);
SHA512_STEP(d, e, f, g, h, a, b, c, 13, 0x80deb1fe3b1696b1ULL);
SHA512_STEP(c, d, e, f, g, h, a, b, 14, 0x9bdc06a725c71235ULL);
#if SHA512_MANUAL_OPT
}
#endif
SHA512_STEP(b, c, d, e, f, g, h, a, 15, 0xc19bf174cf692694ULL);

SHA512_STEP(a, b, c, d, e, f, g, h, 16, 0xe49b69c19ef14ad2ULL);
Expand Down

0 comments on commit 7957f7c

Please sign in to comment.