From 85eedb242edf81d930c81575912d47ddc2f8cd87 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Tue, 9 Jun 2026 12:04:17 +1000 Subject: [PATCH] checksum: guard the AVX2 roll-asm path with a runtime CPUID check When built with --enable-roll-asm, get_checksum1() called the AVX2 asm routine get_checksum1_avx2_asm() unconditionally. Unlike the intrinsic path (get_checksum1_avx2_64), which is function-multiversioned with a target("default") fallback and so resolves safely on any CPU, the asm routine is a single AVX2-only symbol with no fallback. On an x86-64 host without AVX2 (an older CPU, or a VM that does not expose AVX2) the first block checksum executes a VEX-encoded instruction and dies with SIGILL, which surfaces as "connection unexpectedly closed (0 bytes received so far)" and a code-12 protocol error. Gate the asm call on a cached __builtin_cpu_supports("avx2") check, the same signal the intrinsic resolver uses. When AVX2 is absent we skip it and the SSSE3/SSE2/scalar steps (safe everywhere) do the work. Apply the same guard in the simdtest harness so it can run on non-AVX2 hosts too. --- simd-checksum-x86_64.cpp | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/simd-checksum-x86_64.cpp b/simd-checksum-x86_64.cpp index 99391cbe..7b0a7546 100644 --- a/simd-checksum-x86_64.cpp +++ b/simd-checksum-x86_64.cpp @@ -317,6 +317,21 @@ __attribute__ ((target("sse2"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf extern "C" __attribute__ ((target("avx2"))) int32 get_checksum1_avx2_asm(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2); +/* The asm routine is AVX2-only and, unlike the multi-versioned intrinsic + * paths, has no compiler-generated fallback, so it must not be called on a + * CPU without AVX2 (it would fault with SIGILL). Gate it on a cached runtime + * check; when AVX2 is absent we skip it and the SSSE3/SSE2/scalar steps, + * which are safe everywhere, do all the work. */ +static int roll_asm_have_avx2(void) +{ + static int have = -1; + if (have < 0) { + __builtin_cpu_init(); + have = __builtin_cpu_supports("avx2") ? 1 : 0; + } + return have; +} + #else /* } { */ /* @@ -461,7 +476,8 @@ static inline uint32 get_checksum1_cpp(char *buf1, int32 len) // multiples of 64 bytes using AVX2 (if available) #ifdef USE_ROLL_ASM - i = get_checksum1_avx2_asm((schar*)buf1, len, i, &s1, &s2); + if (roll_asm_have_avx2()) + i = get_checksum1_avx2_asm((schar*)buf1, len, i, &s1, &s2); #else i = get_checksum1_avx2_64((schar*)buf1, len, i, &s1, &s2); #endif @@ -579,7 +595,10 @@ static uint32 checksum_via_avx2(char *buf, int32 len) int32 i; uint32 s1 = 0, s2 = 0; #ifdef USE_ROLL_ASM - i = get_checksum1_avx2_asm((schar*)buf, len, 0, &s1, &s2); + if (roll_asm_have_avx2()) + i = get_checksum1_avx2_asm((schar*)buf, len, 0, &s1, &s2); + else + i = 0; #else i = get_checksum1_avx2_64((schar*)buf, len, 0, &s1, &s2); #endif