[runtime] Fixed bug in x86 polynomial hash

Forced compiler to use legacy SSE instructions form instead of modern AVX form
This commit is contained in:
Igor Chevdar
2021-02-06 19:01:09 +05:00
committed by Vasily Levchenko
parent 4d335e9d58
commit 3164709fb7
2 changed files with 30 additions and 39 deletions
@@ -39,10 +39,6 @@ constexpr std::array<uint32_t, Count> RepeatingPowers(uint32_t base, uint8_t exp
return result;
}
#if defined(__x86_64__) or defined(__i386__)
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
template<typename Traits>
ALWAYS_INLINE void polyHashTail(int& n, uint16_t const*& str, typename Traits::Vec128Type& res, uint32_t const* b, uint32_t const* p) {
using VecType = typename Traits::VecType;
@@ -194,8 +190,4 @@ ALWAYS_INLINE void polyHashUnroll8(int& n, uint16_t const*& str, typename Traits
res = Traits::vec128Add(res, Traits::vec128Add(sum1, sum2));
}
#if defined(__x86_64__) or defined(__i386__)
#pragma clang attribute pop
#endif
#endif // RUNTIME_POLYHASH_COMMON_H
@@ -8,9 +8,10 @@
#if defined(__x86_64__) or defined(__i386__)
#include <immintrin.h>
#define __SSE41__ __attribute__((target("sse4.1")))
#define __AVX2__ __attribute__((target("avx2")))
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#include <immintrin.h>
namespace {
@@ -26,24 +27,24 @@ struct SSETraits {
using Vec128Type = __m128i;
using U16VecType = __m128i;
ALWAYS_INLINE static VecType initVec() { return _mm_setzero_si128(); }
ALWAYS_INLINE static Vec128Type initVec128() { return _mm_setzero_si128(); }
ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
ALWAYS_INLINE static VecType u16Load(U16VecType x) { return _mm_cvtepu16_epi32(x); }
ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return _mm_mullo_epi32(x, y); }
ALWAYS_INLINE static VecType vecAdd(VecType x, VecType y) { return _mm_add_epi32(x, y); }
ALWAYS_INLINE static Vec128Type squash2(VecType x, VecType y) {
__SSE41__ static VecType initVec() { return _mm_setzero_si128(); }
__SSE41__ static Vec128Type initVec128() { return _mm_setzero_si128(); }
__SSE41__ static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
__SSE41__ static VecType u16Load(U16VecType x) { return _mm_cvtepu16_epi32(x); }
__SSE41__ static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
__SSE41__ static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
__SSE41__ static VecType vecMul(VecType x, VecType y) { return _mm_mullo_epi32(x, y); }
__SSE41__ static VecType vecAdd(VecType x, VecType y) { return _mm_add_epi32(x, y); }
__SSE41__ static Vec128Type squash2(VecType x, VecType y) {
return squash1(_mm_hadd_epi32(x, y)); // [x0 + x1, x2 + x3, y0 + y1, y2 + y3]
}
ALWAYS_INLINE static Vec128Type squash1(VecType z) {
__SSE41__ static Vec128Type squash1(VecType z) {
VecType sum = _mm_hadd_epi32(z, z); // [z0 + z1, z2 + z3, z0 + z1, z2 + z3]
return _mm_hadd_epi32(sum, sum); // [z0..3, same, same, same]
}
static int polyHashUnalignedUnrollUpTo8(int n, uint16_t const* str) {
__SSE41__ static int polyHashUnalignedUnrollUpTo8(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll2<SSETraits>(n, str, res, &b8[0], &p64[56]);
@@ -52,7 +53,7 @@ struct SSETraits {
return vec128toInt(res);
}
static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
__SSE41__ static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll4<SSETraits>(n, str, res, &b16[0], &p64[48]);
@@ -68,19 +69,19 @@ struct AVX2Traits {
using Vec128Type = __m128i;
using U16VecType = __m128i;
ALWAYS_INLINE static VecType initVec() { return _mm256_setzero_si256(); }
ALWAYS_INLINE static Vec128Type initVec128() { return _mm_setzero_si128(); }
ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
ALWAYS_INLINE static VecType u16Load(U16VecType x) { return _mm256_cvtepu16_epi32(x); }
ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return _mm256_mullo_epi32(x, y); }
ALWAYS_INLINE static VecType vecAdd(VecType x, VecType y) { return _mm256_add_epi32(x, y); }
ALWAYS_INLINE static Vec128Type squash2(VecType x, VecType y) {
__AVX2__ static VecType initVec() { return _mm256_setzero_si256(); }
__AVX2__ static Vec128Type initVec128() { return _mm_setzero_si128(); }
__AVX2__ static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
__AVX2__ static VecType u16Load(U16VecType x) { return _mm256_cvtepu16_epi32(x); }
__AVX2__ static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
__AVX2__ static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
__AVX2__ static VecType vecMul(VecType x, VecType y) { return _mm256_mullo_epi32(x, y); }
__AVX2__ static VecType vecAdd(VecType x, VecType y) { return _mm256_add_epi32(x, y); }
__AVX2__ static Vec128Type squash2(VecType x, VecType y) {
return squash1(_mm256_hadd_epi32(x, y)); // [x0 + x1, x2 + x3, y0 + y1, y2 + y3, x4 + x5, x6 + x7, y4 + y5, y6 + y7]
}
ALWAYS_INLINE static Vec128Type squash1(VecType z) {
__AVX2__ static Vec128Type squash1(VecType z) {
VecType sum = _mm256_hadd_epi32(z, z); // [z0 + z1, z2 + z3, z0 + z1, z2 + z3, z4 + z5, z6 + z7, z4 + z5, z6 + z7]
sum = _mm256_hadd_epi32(sum, sum); // [z0..3, z0..3, z0..3, z0..3, z4..7, z4..7, z4..7, z4..7]
Vec128Type lo = _mm256_extracti128_si256(sum, 0); // [z0..3, same, same, same]
@@ -88,7 +89,7 @@ struct AVX2Traits {
return _mm_add_epi32(lo, hi); // [z0..7, same, same, same]
}
static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
__AVX2__ static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll2<AVX2Traits>(n, str, res, &b16[0], &p64[48]);
@@ -98,7 +99,7 @@ struct AVX2Traits {
return vec128toInt(res);
}
static int polyHashUnalignedUnrollUpTo32(int n, uint16_t const* str) {
__AVX2__ static int polyHashUnalignedUnrollUpTo32(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll4<AVX2Traits>(n, str, res, &b32[0], &p64[32]);
@@ -109,7 +110,7 @@ struct AVX2Traits {
return vec128toInt(res);
}
static int polyHashUnalignedUnrollUpTo64(int n, uint16_t const* str) {
__AVX2__ static int polyHashUnalignedUnrollUpTo64(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll8<AVX2Traits>(n, str, res, &b64[0], &p64[0]);
@@ -128,8 +129,8 @@ struct AVX2Traits {
const bool x64 = false;
#endif
bool initialized = false;
bool sseSupported;
bool avx2Supported;
bool sseSupported = false;
bool avx2Supported = false;
}
@@ -161,6 +162,4 @@ int polyHash_x86(int length, uint16_t const* str) {
return res;
}
#pragma clang attribute pop
#endif