From a842cb2275c3b51c06a683d304f890890077402a Mon Sep 17 00:00:00 2001 From: "L. Pereira" Date: Sun, 14 Apr 2024 09:33:40 -0700 Subject: [PATCH] Use AVX2 when unmasking websockets frames This already used SSE2, but let's use a wider vector if available to get this over more quickly. --- src/lib/lwan-websocket.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/lib/lwan-websocket.c b/src/lib/lwan-websocket.c index b9ca7bc4c..ab4f4253c 100644 --- a/src/lib/lwan-websocket.c +++ b/src/lib/lwan-websocket.c @@ -26,7 +26,7 @@ #include #if defined(__x86_64__) -#include +#include #endif #include "lwan-io-wrappers.h" @@ -156,19 +156,39 @@ static void unmask(char *msg, size_t msg_len, char mask[static 4]) if (sizeof(void *) == 8) { const uint64_t mask64 = (uint64_t)mask32 << 32 | mask32; -#if defined(__x86_64__) +#if defined(__AVX2__) + const size_t len256 = msg_len / 32; + if (len256) { + const __m256i mask256 = + _mm256_setr_epi64x((int64_t)mask64, (int64_t)mask64, + (int64_t)mask64, (int64_t)mask64); + for (size_t i = 0; i < len256; i++) { + __m256i v = _mm256_loadu_si256((__m256i *)msg); + _mm256_storeu_si256((__m256i *)msg, + _mm256_xor_si256(v, mask256)); + msg += 32; + } + + msg_len = (size_t)(msg_end - msg); + } +#endif + +#if defined(__SSE2__) const size_t len128 = msg_len / 16; if (len128) { - const __m128i mask128 = _mm_setr_epi64((__m64)mask64, (__m64)mask64); + const __m128i mask128 = + _mm_setr_epi64((__m64)mask64, (__m64)mask64); for (size_t i = 0; i < len128; i++) { __m128i v = _mm_loadu_si128((__m128i *)msg); _mm_storeu_si128((__m128i *)msg, _mm_xor_si128(v, mask128)); msg += 16; } + + msg_len = (size_t)(msg_end - msg); } #endif - const size_t len64 = (size_t)((msg_end - msg) / 8); + const size_t len64 = msg_len / 8; for (size_t i = 0; i < len64; i++) { uint64_t v = string_as_uint64(msg); v ^= mask64;