diff --git a/kotlin-native/runtime/src/main/cpp/KString.cpp b/kotlin-native/runtime/src/main/cpp/KString.cpp index bcfcedd7628..3f96007b706 100644 --- a/kotlin-native/runtime/src/main/cpp/KString.cpp +++ b/kotlin-native/runtime/src/main/cpp/KString.cpp @@ -28,6 +28,8 @@ #include "utf8.h" +#include "polyhash/PolyHash.h" + namespace { typedef std::back_insert_iterator KStdStringInserter; @@ -1165,10 +1167,7 @@ KInt Kotlin_String_lastIndexOfString(KString thiz, KString other, KInt fromIndex KInt Kotlin_String_hashCode(KString thiz) { // TODO: consider caching strings hashes. - // TODO: maybe use some simpler hashing algorithm? - // Note that we don't use Java's string hash. - return CityHash64( - CharArrayAddressOfElementAt(thiz, 0), thiz->count_ * sizeof(KChar)); + return polyHash(thiz->count_, CharArrayAddressOfElementAt(thiz, 0)); } const KChar* Kotlin_String_utf16pointer(KString message) { diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/PolyHash.cpp b/kotlin-native/runtime/src/main/cpp/polyhash/PolyHash.cpp new file mode 100644 index 00000000000..194c27fb856 --- /dev/null +++ b/kotlin-native/runtime/src/main/cpp/polyhash/PolyHash.cpp @@ -0,0 +1,19 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ + +#include "polyhash/PolyHash.h" +#include "polyhash/naive.h" +#include "polyhash/x86.h" +#include "polyhash/arm.h" + +int polyHash(int length, uint16_t const* str) { +#if defined(__x86_64__) or defined(__i386__) + return polyHash_x86(length, str); +#elif defined(__arm__) or defined(__aarch64__) + return polyHash_arm(length, str); +#else + return polyHash_naive(length, str); +#endif +} \ No newline at end of file diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/PolyHash.h b/kotlin-native/runtime/src/main/cpp/polyhash/PolyHash.h new file mode 100644 index 00000000000..b9bcf48d0c3 --- /dev/null +++ b/kotlin-native/runtime/src/main/cpp/polyhash/PolyHash.h @@ -0,0 +1,14 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ + +#ifndef RUNTIME_POLYHASH_H +#define RUNTIME_POLYHASH_H + +#include + +// Computes polynomial hash with base = 31. +int polyHash(int length, uint16_t const* str); + +#endif // RUNTIME_POLYHASH_H diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/PolyHashTest.cpp b/kotlin-native/runtime/src/main/cpp/polyhash/PolyHashTest.cpp new file mode 100644 index 00000000000..ca2e7afb804 --- /dev/null +++ b/kotlin-native/runtime/src/main/cpp/polyhash/PolyHashTest.cpp @@ -0,0 +1,27 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ + +#include "polyhash/PolyHash.h" +#include "polyhash/naive.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +TEST(PolyHashTest, Correctness) { + const int maxLength = 10000; + uint16_t str[maxLength + 100]; + for (int k = 1; k <= maxLength; ++k) { + for (int i = 0; i < k; ++i) + str[i] = k * maxLength + i; + str[k] = 0; + + for (int shift = 0; shift < 8 && k - shift > 0; ++shift) + EXPECT_EQ(polyHash_naive(k - shift, str + shift), polyHash(k - shift, str + shift)); + } +} + +} \ No newline at end of file diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp b/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp new file mode 100644 index 00000000000..1e194af701f --- /dev/null +++ b/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp @@ -0,0 +1,115 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ + +#include "polyhash/common.h" +#include "polyhash/arm.h" + +#if defined(__arm__) or defined(__aarch64__) + +#ifndef __ARM_NEON + +int polyHash_arm(int length, uint16_t const* str) { + return polyHash_naive(length, str); +} + +#else + +#include + +namespace { + +alignas(32) constexpr auto p32 = DecreasingPowers<32>(31); // [base^31, base^30, .., base^2, base, 1] +alignas(32) constexpr auto b32 = RepeatingPowers<8>(31, 32); // [base^32, base^32, .., base^32] (8) +alignas(32) constexpr auto b16 = RepeatingPowers<8>(31, 16); // [base^16, base^16, .., base^16] (8) +alignas(32) constexpr auto b8 = RepeatingPowers<8>(31, 8); // [base^8, base^8, .., base^8 ] (8) +alignas(32) constexpr auto b4 = RepeatingPowers<8>(31, 4); // [base^4, base^4, .., base^4 ] (8) + +struct NeonTraits { + using VecType = uint32x4_t; + using Vec128Type = uint32x4_t; + using U16VecType = uint16x4_t; + + ALWAYS_INLINE static VecType initVec() { return vdupq_n_u32(0); } + ALWAYS_INLINE static Vec128Type initVec128() { return vdupq_n_u32(0); } + ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return vgetq_lane_u32(x, 0); } + ALWAYS_INLINE static VecType u16Load(U16VecType x) { return vmovl_u16(x); } + ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return vmulq_u32(x, y); } + ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return vaddq_u32(x, y); } + ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return vmulq_u32(x, y); } + ALWAYS_INLINE static VecType vecAdd(VecType x, VecType y) { return vaddq_u32(x, y); } + ALWAYS_INLINE static Vec128Type squash2(VecType x, VecType y) { + return squash1(vaddq_u32(x, y)); // [x0 + y0, x1 + y1, x2 + y2, x3 + y3] + } + + ALWAYS_INLINE static uint32x4_t squash1(uint32x4_t z) { + #ifdef __aarch64__ + return vdupq_n_u32(vaddvq_u32(z)); // [z0..3, same, same, same] + #else + uint32x2_t lo = vget_low_u32(z); // [z0, z1] + uint32x2_t hi = vget_high_u32(z); // [z2, z3] + uint32x2_t sum = vadd_u32(lo, hi); // [z0 + z2, z1 + z3] + sum = vpadd_u32(sum, sum); // [z0..3, same] + return vcombine_u32(sum, sum); // [z0..3, same, same, same] + #endif + }; + + static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) { + Vec128Type res = initVec128(); + + polyHashUnroll4(n, str, res, &b16[0], &p32[16]); + polyHashUnroll2(n, str, res, &b8[0], &p32[24]); + polyHashTail(n, str, res, &b4[0], &p32[28]); + + return vec128toInt(res); + } + + static int polyHashUnalignedUnrollUpTo32(int n, uint16_t const* str) { + Vec128Type res = initVec128(); + + polyHashUnroll8(n, str, res, &b32[0], &p32[0]); + polyHashUnroll4(n, str, res, &b16[0], &p32[16]); + polyHashUnroll2(n, str, res, &b8[0], &p32[24]); + polyHashTail(n, str, res, &b4[0], &p32[28]); + + return vec128toInt(res); + } +}; + +#if defined(__aarch64__) + const bool neonSupported = true; // AArch64 always supports Neon. +#elif defined(__ANDROID__) + #include + const bool neonSupported = android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON; +#elif defined(__APPLE__) + const bool neonSupported = true; // It is supported starting from iPhone 3GS. +#elif defined(__linux__) or defined(__unix__) + #include + #include + const bool neonSupported = getauxval(AT_HWCAP) & HWCAP_NEON; +#else + #error "Not supported" +#endif + +} + +int polyHash_arm(int length, uint16_t const* str) { + if (!neonSupported) { + // Vectorization is not supported. + return polyHash_naive(length, str); + } + int res; + if (length < 488) + res = NeonTraits::polyHashUnalignedUnrollUpTo16(length / 4, str); + else + res = NeonTraits::polyHashUnalignedUnrollUpTo32(length / 4, str); + // Handle the tail naively. + for (int i = length & 0xFFFFFFFC; i < length; ++i) + res = res * 31 + str[i]; + return res; +} + +#endif // __ARM_NEON + +#endif // defined(__arm__) or defined(__aarch64__) \ No newline at end of file diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/arm.h b/kotlin-native/runtime/src/main/cpp/polyhash/arm.h new file mode 100644 index 00000000000..919b76a8d94 --- /dev/null +++ b/kotlin-native/runtime/src/main/cpp/polyhash/arm.h @@ -0,0 +1,11 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ + +#ifndef RUNTIME_POLYHASH_ARM_H +#define RUNTIME_POLYHASH_ARM_H + +int polyHash_arm(int length, uint16_t const* str); + +#endif // RUNTIME_POLYHASH_ARM_H diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/common.h b/kotlin-native/runtime/src/main/cpp/polyhash/common.h new file mode 100644 index 00000000000..5ed9302dbdc --- /dev/null +++ b/kotlin-native/runtime/src/main/cpp/polyhash/common.h @@ -0,0 +1,201 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ + +#ifndef RUNTIME_POLYHASH_COMMON_H +#define RUNTIME_POLYHASH_COMMON_H + +#include +#include +#include "polyhash/naive.h" +#include "../Common.h" + +constexpr uint32_t Power(uint32_t base, uint8_t exponent) { + uint32_t result = 1; + for (uint8_t i = 0; i < exponent; ++i) { + result *= base; + } + return result; +} + +template +constexpr std::array DecreasingPowers(uint32_t base) { + std::array result = {}; + uint32_t current = 1; + for (auto it = result.rbegin(); it != result.rend(); ++it) { + *it = current; + current *= base; + } + return result; +} + +template +constexpr std::array RepeatingPowers(uint32_t base, uint8_t exponent) { + std::array result = {}; + uint32_t value = Power(base, exponent); + for (auto& element : result) + element = value; + return result; +} + +#if defined(__x86_64__) or defined(__i386__) +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#endif + +template +ALWAYS_INLINE void polyHashTail(int& n, uint16_t const*& str, typename Traits::Vec128Type& res, uint32_t const* b, uint32_t const* p) { + using VecType = typename Traits::VecType; + using Vec128Type = typename Traits::Vec128Type; + using U16VecType = typename Traits::U16VecType; + + const int vecLength = sizeof(VecType) / 4; + if (n < vecLength / 4) return; + + VecType x = Traits::u16Load(*reinterpret_cast(str)); + res = Traits::vec128Mul(res, *reinterpret_cast(b)); + VecType z = Traits::vecMul(x, *reinterpret_cast(p)); + res = Traits::vec128Add(res, Traits::squash1(z)); + + str += vecLength; + n -= vecLength / 4; +} + +template +ALWAYS_INLINE void polyHashUnroll2(int& n, uint16_t const*& str, typename Traits::Vec128Type& res, uint32_t const* b, uint32_t const* p) { + using VecType = typename Traits::VecType; + using Vec128Type = typename Traits::Vec128Type; + using U16VecType = typename Traits::U16VecType; + + const int vecLength = sizeof(VecType) / 4; + if (n < vecLength / 2) return; + + res = Traits::vec128Mul(res, *reinterpret_cast(b)); + + VecType res0 = Traits::initVec(); + VecType res1 = Traits::initVec(); + + do { + VecType x0 = Traits::u16Load(*reinterpret_cast(str)); + VecType x1 = Traits::u16Load(*reinterpret_cast(str + vecLength)); + res0 = Traits::vecMul(res0, *reinterpret_cast(b)); + res1 = Traits::vecMul(res1, *reinterpret_cast(b)); + VecType z0 = Traits::vecMul(x0, *reinterpret_cast(p)); + VecType z1 = Traits::vecMul(x1, *reinterpret_cast(p + vecLength)); + res0 = Traits::vecAdd(res0, z0); + res1 = Traits::vecAdd(res1, z1); + + str += vecLength * 2; + n -= vecLength / 2; + } while (n >= vecLength / 2); + + res = Traits::vec128Add(res, Traits::squash2(res0, res1)); +} + +template +ALWAYS_INLINE void polyHashUnroll4(int& n, uint16_t const*& str, typename Traits::Vec128Type& res, uint32_t const* b, uint32_t const* p) { + using VecType = typename Traits::VecType; + using Vec128Type = typename Traits::Vec128Type; + using U16VecType = typename Traits::U16VecType; + + const int vecLength = sizeof(VecType) / 4; + if (n < vecLength) return; + + res = Traits::vec128Mul(res, *reinterpret_cast(b)); + + VecType res0 = Traits::initVec(); + VecType res1 = Traits::initVec(); + VecType res2 = Traits::initVec(); + VecType res3 = Traits::initVec(); + + do { + VecType x0 = Traits::u16Load(*reinterpret_cast(str)); + VecType x1 = Traits::u16Load(*reinterpret_cast(str + vecLength)); + VecType x2 = Traits::u16Load(*reinterpret_cast(str + vecLength * 2)); + VecType x3 = Traits::u16Load(*reinterpret_cast(str + vecLength * 3)); + res0 = Traits::vecMul(res0, *reinterpret_cast(b)); + res1 = Traits::vecMul(res1, *reinterpret_cast(b)); + res2 = Traits::vecMul(res2, *reinterpret_cast(b)); + res3 = Traits::vecMul(res3, *reinterpret_cast(b)); + VecType z0 = Traits::vecMul(x0, *reinterpret_cast(p)); + VecType z1 = Traits::vecMul(x1, *reinterpret_cast(p + vecLength)); + VecType z2 = Traits::vecMul(x2, *reinterpret_cast(p + vecLength * 2)); + VecType z3 = Traits::vecMul(x3, *reinterpret_cast(p + vecLength * 3)); + res0 = Traits::vecAdd(res0, z0); + res1 = Traits::vecAdd(res1, z1); + res2 = Traits::vecAdd(res2, z2); + res3 = Traits::vecAdd(res3, z3); + + str += vecLength * 4; + n -= vecLength; + } while (n >= vecLength); + + res = Traits::vec128Add(res, Traits::vec128Add(Traits::squash2(res0, res1), Traits::squash2(res2, res3))); +} + +template +ALWAYS_INLINE void polyHashUnroll8(int& n, uint16_t const*& str, typename Traits::Vec128Type& res, uint32_t const* b, uint32_t const* p) { + using VecType = typename Traits::VecType; + using Vec128Type = typename Traits::Vec128Type; + using U16VecType = typename Traits::U16VecType; + + const int vecLength = sizeof(VecType) / 4; + if (n < vecLength * 2) return; + + VecType res0 = Traits::initVec(); + VecType res1 = Traits::initVec(); + VecType res2 = Traits::initVec(); + VecType res3 = Traits::initVec(); + VecType res4 = Traits::initVec(); + VecType res5 = Traits::initVec(); + VecType res6 = Traits::initVec(); + VecType res7 = Traits::initVec(); + + do { + VecType x0 = Traits::u16Load(*reinterpret_cast(str)); + VecType x1 = Traits::u16Load(*reinterpret_cast(str + vecLength)); + VecType x2 = Traits::u16Load(*reinterpret_cast(str + vecLength * 2)); + VecType x3 = Traits::u16Load(*reinterpret_cast(str + vecLength * 3)); + VecType x4 = Traits::u16Load(*reinterpret_cast(str + vecLength * 4)); + VecType x5 = Traits::u16Load(*reinterpret_cast(str + vecLength * 5)); + VecType x6 = Traits::u16Load(*reinterpret_cast(str + vecLength * 6)); + VecType x7 = Traits::u16Load(*reinterpret_cast(str + vecLength * 7)); + res0 = Traits::vecMul(res0, *reinterpret_cast(b)); + res1 = Traits::vecMul(res1, *reinterpret_cast(b)); + res2 = Traits::vecMul(res2, *reinterpret_cast(b)); + res3 = Traits::vecMul(res3, *reinterpret_cast(b)); + res4 = Traits::vecMul(res4, *reinterpret_cast(b)); + res5 = Traits::vecMul(res5, *reinterpret_cast(b)); + res6 = Traits::vecMul(res6, *reinterpret_cast(b)); + res7 = Traits::vecMul(res7, *reinterpret_cast(b)); + VecType z0 = Traits::vecMul(x0, *reinterpret_cast(p)); + VecType z1 = Traits::vecMul(x1, *reinterpret_cast(p + vecLength)); + VecType z2 = Traits::vecMul(x2, *reinterpret_cast(p + vecLength * 2)); + VecType z3 = Traits::vecMul(x3, *reinterpret_cast(p + vecLength * 3)); + VecType z4 = Traits::vecMul(x4, *reinterpret_cast(p + vecLength * 4)); + VecType z5 = Traits::vecMul(x5, *reinterpret_cast(p + vecLength * 5)); + VecType z6 = Traits::vecMul(x6, *reinterpret_cast(p + vecLength * 6)); + VecType z7 = Traits::vecMul(x7, *reinterpret_cast(p + vecLength * 7)); + res0 = Traits::vecAdd(res0, z0); + res1 = Traits::vecAdd(res1, z1); + res2 = Traits::vecAdd(res2, z2); + res3 = Traits::vecAdd(res3, z3); + res4 = Traits::vecAdd(res4, z4); + res5 = Traits::vecAdd(res5, z5); + res6 = Traits::vecAdd(res6, z6); + res7 = Traits::vecAdd(res7, z7); + + str += vecLength * 8; + n -= vecLength * 2; + } while (n >= vecLength * 2); + + Vec128Type sum1 = Traits::vec128Add(Traits::squash2(res0, res1), Traits::squash2(res2, res3)); + Vec128Type sum2 = Traits::vec128Add(Traits::squash2(res4, res5), Traits::squash2(res6, res7)); + res = Traits::vec128Add(res, Traits::vec128Add(sum1, sum2)); +} + +#if defined(__x86_64__) or defined(__i386__) +#pragma clang attribute pop +#endif + +#endif // RUNTIME_POLYHASH_COMMON_H diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/naive.h b/kotlin-native/runtime/src/main/cpp/polyhash/naive.h new file mode 100644 index 00000000000..20ca0a85052 --- /dev/null +++ b/kotlin-native/runtime/src/main/cpp/polyhash/naive.h @@ -0,0 +1,18 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ + +#ifndef RUNTIME_POLYHASH_NAIVE_H +#define RUNTIME_POLYHASH_NAIVE_H + +#include + +inline int polyHash_naive(int length, uint16_t const* str) { + int res = 0; + for (int i = 0; i < length; ++i) + res = res * 31 + str[i]; + return res; +} + +#endif // RUNTIME_POLYHASH_NAIVE_H diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp b/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp new file mode 100644 index 00000000000..a5b2b321637 --- /dev/null +++ b/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp @@ -0,0 +1,166 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ + +#include "polyhash/common.h" +#include "polyhash/x86.h" + +#if defined(__x86_64__) or defined(__i386__) + +#include + +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) + +namespace { + +alignas(32) constexpr auto p64 = DecreasingPowers<64>(31); // [base^63, base^62, .., base^2, base, 1] +alignas(32) constexpr auto b64 = RepeatingPowers<8>(31, 64); // [base^64, base^64, .., base^64] (8) +alignas(32) constexpr auto b32 = RepeatingPowers<8>(31, 32); // [base^32, base^32, .., base^32] (8) +alignas(32) constexpr auto b16 = RepeatingPowers<8>(31, 16); // [base^16, base^16, .., base^16] (8) +alignas(32) constexpr auto b8 = RepeatingPowers<8>(31, 8); // [base^8, base^8, .., base^8 ] (8) +alignas(32) constexpr auto b4 = RepeatingPowers<8>(31, 4); // [base^4, base^4, .., base^4 ] (8) + +struct SSETraits { + using VecType = __m128i; + using Vec128Type = __m128i; + using U16VecType = __m128i; + + ALWAYS_INLINE static VecType initVec() { return _mm_setzero_si128(); } + ALWAYS_INLINE static Vec128Type initVec128() { return _mm_setzero_si128(); } + ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); } + ALWAYS_INLINE static VecType u16Load(U16VecType x) { return _mm_cvtepu16_epi32(x); } + ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); } + ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); } + ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return _mm_mullo_epi32(x, y); } + ALWAYS_INLINE static VecType vecAdd(VecType x, VecType y) { return _mm_add_epi32(x, y); } + ALWAYS_INLINE static Vec128Type squash2(VecType x, VecType y) { + return squash1(_mm_hadd_epi32(x, y)); // [x0 + x1, x2 + x3, y0 + y1, y2 + y3] + } + + ALWAYS_INLINE static Vec128Type squash1(VecType z) { + VecType sum = _mm_hadd_epi32(z, z); // [z0 + z1, z2 + z3, z0 + z1, z2 + z3] + return _mm_hadd_epi32(sum, sum); // [z0..3, same, same, same] + } + + static int polyHashUnalignedUnrollUpTo8(int n, uint16_t const* str) { + Vec128Type res = initVec128(); + + polyHashUnroll2(n, str, res, &b8[0], &p64[56]); + polyHashTail(n, str, res, &b4[0], &p64[60]); + + return vec128toInt(res); + } + + static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) { + Vec128Type res = initVec128(); + + polyHashUnroll4(n, str, res, &b16[0], &p64[48]); + polyHashUnroll2(n, str, res, &b8[0], &p64[56]); + polyHashTail(n, str, res, &b4[0], &p64[60]); + + return vec128toInt(res); + } +}; + +struct AVX2Traits { + using VecType = __m256i; + using Vec128Type = __m128i; + using U16VecType = __m128i; + + ALWAYS_INLINE static VecType initVec() { return _mm256_setzero_si256(); } + ALWAYS_INLINE static Vec128Type initVec128() { return _mm_setzero_si128(); } + ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); } + ALWAYS_INLINE static VecType u16Load(U16VecType x) { return _mm256_cvtepu16_epi32(x); } + ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); } + ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); } + ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return _mm256_mullo_epi32(x, y); } + ALWAYS_INLINE static VecType vecAdd(VecType x, VecType y) { return _mm256_add_epi32(x, y); } + ALWAYS_INLINE static Vec128Type squash2(VecType x, VecType y) { + return squash1(_mm256_hadd_epi32(x, y)); // [x0 + x1, x2 + x3, y0 + y1, y2 + y3, x4 + x5, x6 + x7, y4 + y5, y6 + y7] + } + + ALWAYS_INLINE static Vec128Type squash1(VecType z) { + VecType sum = _mm256_hadd_epi32(z, z); // [z0 + z1, z2 + z3, z0 + z1, z2 + z3, z4 + z5, z6 + z7, z4 + z5, z6 + z7] + sum = _mm256_hadd_epi32(sum, sum); // [z0..3, z0..3, z0..3, z0..3, z4..7, z4..7, z4..7, z4..7] + Vec128Type lo = _mm256_extracti128_si256(sum, 0); // [z0..3, same, same, same] + Vec128Type hi = _mm256_extracti128_si256(sum, 1); // [z4..7, same, same, same] + return _mm_add_epi32(lo, hi); // [z0..7, same, same, same] + } + + static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) { + Vec128Type res = initVec128(); + + polyHashUnroll2(n, str, res, &b16[0], &p64[48]); + polyHashTail(n, str, res, &b8[0], &p64[56]); + polyHashTail(n, str, res, &b4[0], &p64[60]); + + return vec128toInt(res); + } + + static int polyHashUnalignedUnrollUpTo32(int n, uint16_t const* str) { + Vec128Type res = initVec128(); + + polyHashUnroll4(n, str, res, &b32[0], &p64[32]); + polyHashUnroll2(n, str, res, &b16[0], &p64[48]); + polyHashTail(n, str, res, &b8[0], &p64[56]); + polyHashTail(n, str, res, &b4[0], &p64[60]); + + return vec128toInt(res); + } + + static int polyHashUnalignedUnrollUpTo64(int n, uint16_t const* str) { + Vec128Type res = initVec128(); + + polyHashUnroll8(n, str, res, &b64[0], &p64[0]); + polyHashUnroll4(n, str, res, &b32[0], &p64[32]); + polyHashUnroll2(n, str, res, &b16[0], &p64[48]); + polyHashTail(n, str, res, &b8[0], &p64[56]); + polyHashTail(n, str, res, &b4[0], &p64[60]); + + return vec128toInt(res); + } +}; + +#if defined(__x86_64__) + const bool x64 = true; +#else + const bool x64 = false; +#endif + bool initialized = false; + bool sseSupported; + bool avx2Supported; + +} + +int polyHash_x86(int length, uint16_t const* str) { + if (!initialized) { + initialized = true; + sseSupported = __builtin_cpu_supports("sse4.1"); + avx2Supported = __builtin_cpu_supports("avx2"); + } + if (length < 16 || (!sseSupported && !avx2Supported)) { + // Either vectorization is not supported or the string is too short to gain from it. + return polyHash_naive(length, str); + } + int res; + if (length < 32) + res = SSETraits::polyHashUnalignedUnrollUpTo8(length / 4, str); + else if (!avx2Supported) + res = SSETraits::polyHashUnalignedUnrollUpTo16(length / 4, str); + else if (length < 128) + res = AVX2Traits::polyHashUnalignedUnrollUpTo16(length / 4, str); + else if (!x64 || length < 576) + res = AVX2Traits::polyHashUnalignedUnrollUpTo32(length / 4, str); + else // Such big unrolling requires 64-bit mode (in 32-bit mode there are only 8 vector registers) + res = AVX2Traits::polyHashUnalignedUnrollUpTo64(length / 4, str); + + // Handle the tail naively. + for (int i = length & 0xFFFFFFFC; i < length; ++i) + res = res * 31 + str[i]; + return res; +} + +#pragma clang attribute pop + +#endif diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/x86.h b/kotlin-native/runtime/src/main/cpp/polyhash/x86.h new file mode 100644 index 00000000000..d9f7f93288b --- /dev/null +++ b/kotlin-native/runtime/src/main/cpp/polyhash/x86.h @@ -0,0 +1,11 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ + +#ifndef RUNTIME_POLYHASH_X86_H +#define RUNTIME_POLYHASH_X86_H + +int polyHash_x86(int length, uint16_t const* str); + +#endif // RUNTIME_POLYHASH_X86_H