checksum: guard the AVX2 roll-asm path with a runtime CPUID check

When built with --enable-roll-asm, get_checksum1() called the AVX2 asm
routine get_checksum1_avx2_asm() unconditionally. Unlike the intrinsic
path (get_checksum1_avx2_64), which is function-multiversioned with a
target("default") fallback and so resolves safely on any CPU, the asm
routine is a single AVX2-only symbol with no fallback. On an x86-64 host
without AVX2 (an older CPU, or a VM that does not expose AVX2) the first
block checksum executes a VEX-encoded instruction and dies with SIGILL,
which surfaces as "connection unexpectedly closed (0 bytes received so
far)" and a code-12 protocol error.

Gate the asm call on a cached __builtin_cpu_supports("avx2") check, the
same signal the intrinsic resolver uses. When AVX2 is absent we skip it
and the SSSE3/SSE2/scalar steps (safe everywhere) do the work. Apply the
same guard in the simdtest harness so it can run on non-AVX2 hosts too.
This commit is contained in:
Andrew Tridgell
2026-06-09 12:04:17 +10:00
parent 806dff20d9
commit 85eedb242e

View File

@@ -317,6 +317,21 @@ __attribute__ ((target("sse2"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf
extern "C" __attribute__ ((target("avx2"))) int32 get_checksum1_avx2_asm(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2);
/* The asm routine is AVX2-only and, unlike the multi-versioned intrinsic
* paths, has no compiler-generated fallback, so it must not be called on a
* CPU without AVX2 (it would fault with SIGILL). Gate it on a cached runtime
* check; when AVX2 is absent we skip it and the SSSE3/SSE2/scalar steps,
* which are safe everywhere, do all the work. */
static int roll_asm_have_avx2(void)
{
static int have = -1;
if (have < 0) {
__builtin_cpu_init();
have = __builtin_cpu_supports("avx2") ? 1 : 0;
}
return have;
}
#else /* } { */
/*
@@ -461,7 +476,8 @@ static inline uint32 get_checksum1_cpp(char *buf1, int32 len)
// multiples of 64 bytes using AVX2 (if available)
#ifdef USE_ROLL_ASM
i = get_checksum1_avx2_asm((schar*)buf1, len, i, &s1, &s2);
if (roll_asm_have_avx2())
i = get_checksum1_avx2_asm((schar*)buf1, len, i, &s1, &s2);
#else
i = get_checksum1_avx2_64((schar*)buf1, len, i, &s1, &s2);
#endif
@@ -579,7 +595,10 @@ static uint32 checksum_via_avx2(char *buf, int32 len)
int32 i;
uint32 s1 = 0, s2 = 0;
#ifdef USE_ROLL_ASM
i = get_checksum1_avx2_asm((schar*)buf, len, 0, &s1, &s2);
if (roll_asm_have_avx2())
i = get_checksum1_avx2_asm((schar*)buf, len, 0, &s1, &s2);
else
i = 0;
#else
i = get_checksum1_avx2_64((schar*)buf, len, 0, &s1, &s2);
#endif