From b468a2ab5f1981f63a071f319d5818b24a4b537f Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 7 May 2022 11:53:40 +0100 Subject: [PATCH] mcl: meta_byte: Split off meta_byte_group --- include/mcl/container/detail/meta_byte.hpp | 251 ----------------- .../mcl/container/detail/meta_byte_group.hpp | 263 ++++++++++++++++++ include/mcl/container/hmap.hpp | 1 + include/mcl/container/ihmap.hpp | 1 + 4 files changed, 265 insertions(+), 251 deletions(-) create mode 100644 include/mcl/container/detail/meta_byte_group.hpp diff --git a/include/mcl/container/detail/meta_byte.hpp b/include/mcl/container/detail/meta_byte.hpp index 563fd8b..c3fc8c4 100644 --- a/include/mcl/container/detail/meta_byte.hpp +++ b/include/mcl/container/detail/meta_byte.hpp @@ -4,24 +4,9 @@ #pragma once -#include -#include - -#include "mcl/assert.hpp" #include "mcl/bitsizeof.hpp" -#include "mcl/macro/architecture.hpp" #include "mcl/stdint.hpp" -#if defined(MCL_ARCHITECTURE_ARM64) -# include -#elif defined(MCL_ARCHITECTURE_X86_64) -# include - -# include "mcl/bit_cast.hpp" -#else -# include -#endif - namespace mcl::detail { /// if MSB is 0, this is a full slot. remaining 7 bits is a partial hash of the key. @@ -47,240 +32,4 @@ inline size_t group_index_from_hash(size_t hash, size_t group_index_mask) return hash & group_index_mask; } -#if defined(MCL_ARCHITECTURE_ARM64) - -struct meta_byte_group { - static constexpr size_t max_group_size{16}; - - explicit meta_byte_group(meta_byte* ptr) - : data{vld1q_u8(reinterpret_cast(ptr))} - {} - - explicit meta_byte_group(const std::array& array) - : data{vld1q_u8(reinterpret_cast(array.data()))} - {} - - uint64x2_t match(meta_byte cmp) const - { - return vreinterpretq_u64_u8(vandq_u8(vceqq_u8(data, - vdupq_n_u8(static_cast(cmp))), - vdupq_n_u8(0x80))); - } - - uint64x2_t match_empty_or_tombstone() const - { - return vreinterpretq_u64_u8(vandq_u8(data, - vdupq_n_u8(0x80))); - } - - bool is_any_empty() const - { - static_assert(meta_byte::empty == static_cast(0xff), "empty must be maximal u8 value"); - return vmaxvq_u8(data) == 0xff; - } - - bool is_all_empty_or_tombstone() const - { - return vminvq_u8(vandq_u8(data, vdupq_n_u8(0x80))) == 0x80; - } - - meta_byte get(size_t index) const - { - return static_cast(data[index]); - } - - void set(size_t index, meta_byte value) - { - data[index] = static_cast(value); - } - - uint8x16_t data; -}; - -# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \ - { \ - const uint64x2_t match_result{MATCH}; \ - \ - for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ - const size_t match_index{static_cast(std::countr_zero(match_result_v) / 8)}; \ - __VA_ARGS__ \ - } \ - \ - for (u64 match_result_v{match_result[1]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ - const size_t match_index{static_cast(8 + std::countr_zero(match_result_v) / 8)}; \ - __VA_ARGS__ \ - } \ - } - -# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \ - { \ - const uint64x2_t match_result{MATCH}; \ - \ - for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ - const size_t match_index{static_cast(std::countr_zero(match_result_v) / 8)}; \ - __VA_ARGS__ \ - } \ - \ - for (u64 match_result_v{match_result[1] & 0x00ffffffffffffff}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ - const size_t match_index{static_cast(8 + std::countr_zero(match_result_v) / 8)}; \ - __VA_ARGS__ \ - } \ - } - -#elif defined(MCL_ARCHITECTURE_X86_64) - -struct meta_byte_group { - static constexpr size_t max_group_size{16}; - - explicit meta_byte_group(meta_byte* ptr) - : data{_mm_load_si128(reinterpret_cast<__m128i const*>(ptr))} - {} - - explicit meta_byte_group(const std::array& array) - : data{_mm_loadu_si128(reinterpret_cast<__m128i const*>(array.data()))} - {} - - u16 match(meta_byte cmp) const - { - return _mm_movemask_epi8(_mm_cmpeq_epi8(data, _mm_set1_epi8(static_cast(cmp)))); - } - - u16 match_empty_or_tombstone() const - { - return _mm_movemask_epi8(data); - } - - bool is_any_empty() const - { - return match(meta_byte::empty); - } - - bool is_all_empty_or_tombstone() const - { - return match_empty_or_tombstone() const == 0xffff; - } - - meta_byte get(size_t index) const - { - return mcl::bit_cast>(data)[index]; - } - - void set(size_t index, meta_byte value) - { - auto array = mcl::bit_cast>(data); - array[index] = value; - data = mcl::bit_cast<__m128i>(array); - } - - __m128i data; -}; - -# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \ - { \ - for (const u32 match_result{MATCH}; match_result != 0; match_result &= match_result - 1) { \ - const size_t match_index{static_cast(std::countr_zero(match_result))}; \ - __VA_ARGS__ \ - } \ - } - -# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \ - { \ - for (const u32 match_result{(MATCH) & (0x7fff)}; match_result != 0; match_result &= match_result - 1) { \ - const size_t match_index{static_cast(std::countr_zero(match_result))}; \ - __VA_ARGS__ \ - } \ - } - -#else - -struct meta_byte_group { - static constexpr size_t max_group_size{16}; - - static constexpr u64 msb{0x8080808080808080}; - static constexpr u64 lsb{0x0101010101010101}; - static constexpr u64 not_msb{0x7f7f7f7f7f7f7f7f}; - static constexpr u64 not_lsb{0xfefefefefefefefe}; - - explicit meta_byte_group(meta_byte* ptr) - { - std::memcpy(data.data(), ptr, sizeof(data)); - } - - explicit meta_byte_group(const std::array& array) - : data{array} - {} - - std::array match(meta_byte cmp) const - { - DEBUG_ASSERT(is_full(cmp)); - - const u64 vcmp{lsb * static_cast(cmp)}; - return {(msb - ((data[0] ^ vcmp) & not_msb)) & ~data[0] & msb, (msb - ((data[1] ^ vcmp) & not_msb)) & ~data[1] & msb}; - } - - std::array match_empty_or_tombstone() const - { - return {data[0] & msb, data[1] & msb}; - } - - bool is_any_empty() const - { - static_assert((static_cast(meta_byte::empty) & 0xc0) == 0xc0); - static_assert((static_cast(meta_byte::tombstone) & 0xc0) == 0x80); - - return (data[0] & (data[0] << 1) & msb) || (data[1] & (data[1] << 1) & msb); - } - - bool is_all_empty_or_tombstone() const - { - return (data[0] & data[1] & msb) == msb; - } - - meta_byte get(size_t index) const - { - return mcl::bit_cast>(data)[index]; - } - - void set(size_t index, meta_byte value) - { - auto array = mcl::bit_cast>(data); - array[index] = value; - data = mcl::bit_cast>(array); - } - - std::array data; -}; - -# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \ - { \ - const std::array match_result{MATCH}; \ - \ - for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ - const size_t match_index{static_cast(std::countr_zero(match_result_v) / 8)}; \ - __VA_ARGS__ \ - } \ - \ - for (u64 match_result_v{match_result[1]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ - const size_t match_index{static_cast(8 + std::countr_zero(match_result_v) / 8)}; \ - __VA_ARGS__ \ - } \ - } - -# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \ - { \ - const std::array match_result{MATCH}; \ - \ - for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ - const size_t match_index{static_cast(std::countr_zero(match_result_v) / 8)}; \ - __VA_ARGS__ \ - } \ - \ - for (u64 match_result_v{match_result[1] & 0x00ffffffffffffff}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ - const size_t match_index{static_cast(8 + std::countr_zero(match_result_v) / 8)}; \ - __VA_ARGS__ \ - } \ - } - -#endif - } // namespace mcl::detail diff --git a/include/mcl/container/detail/meta_byte_group.hpp b/include/mcl/container/detail/meta_byte_group.hpp new file mode 100644 index 0000000..592d1f7 --- /dev/null +++ b/include/mcl/container/detail/meta_byte_group.hpp @@ -0,0 +1,263 @@ +// This file is part of the mcl project. +// Copyright (c) 2022 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +#include "mcl/assert.hpp" +#include "mcl/container/detail/meta_byte.hpp" +#include "mcl/macro/architecture.hpp" +#include "mcl/stdint.hpp" + +#if defined(MCL_ARCHITECTURE_ARM64) +# include +#elif defined(MCL_ARCHITECTURE_X86_64) +# include + +# include "mcl/bit_cast.hpp" +#else +# include +#endif + +namespace mcl::detail { + +#if defined(MCL_ARCHITECTURE_ARM64) + +struct meta_byte_group { + static constexpr size_t max_group_size{16}; + + explicit meta_byte_group(meta_byte* ptr) + : data{vld1q_u8(reinterpret_cast(ptr))} + {} + + explicit meta_byte_group(const std::array& array) + : data{vld1q_u8(reinterpret_cast(array.data()))} + {} + + uint64x2_t match(meta_byte cmp) const + { + return vreinterpretq_u64_u8(vandq_u8(vceqq_u8(data, + vdupq_n_u8(static_cast(cmp))), + vdupq_n_u8(0x80))); + } + + uint64x2_t match_empty_or_tombstone() const + { + return vreinterpretq_u64_u8(vandq_u8(data, + vdupq_n_u8(0x80))); + } + + bool is_any_empty() const + { + static_assert(meta_byte::empty == static_cast(0xff), "empty must be maximal u8 value"); + return vmaxvq_u8(data) == 0xff; + } + + bool is_all_empty_or_tombstone() const + { + return vminvq_u8(vandq_u8(data, vdupq_n_u8(0x80))) == 0x80; + } + + meta_byte get(size_t index) const + { + return static_cast(data[index]); + } + + void set(size_t index, meta_byte value) + { + data[index] = static_cast(value); + } + + uint8x16_t data; +}; + +# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \ + { \ + const uint64x2_t match_result{MATCH}; \ + \ + for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ + const size_t match_index{static_cast(std::countr_zero(match_result_v) / 8)}; \ + __VA_ARGS__ \ + } \ + \ + for (u64 match_result_v{match_result[1]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ + const size_t match_index{static_cast(8 + std::countr_zero(match_result_v) / 8)}; \ + __VA_ARGS__ \ + } \ + } + +# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \ + { \ + const uint64x2_t match_result{MATCH}; \ + \ + for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ + const size_t match_index{static_cast(std::countr_zero(match_result_v) / 8)}; \ + __VA_ARGS__ \ + } \ + \ + for (u64 match_result_v{match_result[1] & 0x00ffffffffffffff}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ + const size_t match_index{static_cast(8 + std::countr_zero(match_result_v) / 8)}; \ + __VA_ARGS__ \ + } \ + } + +#elif defined(MCL_ARCHITECTURE_X86_64) + +struct meta_byte_group { + static constexpr size_t max_group_size{16}; + + explicit meta_byte_group(meta_byte* ptr) + : data{_mm_load_si128(reinterpret_cast<__m128i const*>(ptr))} + {} + + explicit meta_byte_group(const std::array& array) + : data{_mm_loadu_si128(reinterpret_cast<__m128i const*>(array.data()))} + {} + + u16 match(meta_byte cmp) const + { + return _mm_movemask_epi8(_mm_cmpeq_epi8(data, _mm_set1_epi8(static_cast(cmp)))); + } + + u16 match_empty_or_tombstone() const + { + return _mm_movemask_epi8(data); + } + + bool is_any_empty() const + { + return match(meta_byte::empty); + } + + bool is_all_empty_or_tombstone() const + { + return match_empty_or_tombstone() const == 0xffff; + } + + meta_byte get(size_t index) const + { + return mcl::bit_cast>(data)[index]; + } + + void set(size_t index, meta_byte value) + { + auto array = mcl::bit_cast>(data); + array[index] = value; + data = mcl::bit_cast<__m128i>(array); + } + + __m128i data; +}; + +# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \ + { \ + for (const u32 match_result{MATCH}; match_result != 0; match_result &= match_result - 1) { \ + const size_t match_index{static_cast(std::countr_zero(match_result))}; \ + __VA_ARGS__ \ + } \ + } + +# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \ + { \ + for (const u32 match_result{(MATCH) & (0x7fff)}; match_result != 0; match_result &= match_result - 1) { \ + const size_t match_index{static_cast(std::countr_zero(match_result))}; \ + __VA_ARGS__ \ + } \ + } + +#else + +struct meta_byte_group { + static constexpr size_t max_group_size{16}; + + static constexpr u64 msb{0x8080808080808080}; + static constexpr u64 lsb{0x0101010101010101}; + static constexpr u64 not_msb{0x7f7f7f7f7f7f7f7f}; + static constexpr u64 not_lsb{0xfefefefefefefefe}; + + explicit meta_byte_group(meta_byte* ptr) + { + std::memcpy(data.data(), ptr, sizeof(data)); + } + + explicit meta_byte_group(const std::array& array) + : data{array} + {} + + std::array match(meta_byte cmp) const + { + DEBUG_ASSERT(is_full(cmp)); + + const u64 vcmp{lsb * static_cast(cmp)}; + return {(msb - ((data[0] ^ vcmp) & not_msb)) & ~data[0] & msb, (msb - ((data[1] ^ vcmp) & not_msb)) & ~data[1] & msb}; + } + + std::array match_empty_or_tombstone() const + { + return {data[0] & msb, data[1] & msb}; + } + + bool is_any_empty() const + { + static_assert((static_cast(meta_byte::empty) & 0xc0) == 0xc0); + static_assert((static_cast(meta_byte::tombstone) & 0xc0) == 0x80); + + return (data[0] & (data[0] << 1) & msb) || (data[1] & (data[1] << 1) & msb); + } + + bool is_all_empty_or_tombstone() const + { + return (data[0] & data[1] & msb) == msb; + } + + meta_byte get(size_t index) const + { + return mcl::bit_cast>(data)[index]; + } + + void set(size_t index, meta_byte value) + { + auto array = mcl::bit_cast>(data); + array[index] = value; + data = mcl::bit_cast>(array); + } + + std::array data; +}; + +# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \ + { \ + const std::array match_result{MATCH}; \ + \ + for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ + const size_t match_index{static_cast(std::countr_zero(match_result_v) / 8)}; \ + __VA_ARGS__ \ + } \ + \ + for (u64 match_result_v{match_result[1]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ + const size_t match_index{static_cast(8 + std::countr_zero(match_result_v) / 8)}; \ + __VA_ARGS__ \ + } \ + } + +# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \ + { \ + const std::array match_result{MATCH}; \ + \ + for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ + const size_t match_index{static_cast(std::countr_zero(match_result_v) / 8)}; \ + __VA_ARGS__ \ + } \ + \ + for (u64 match_result_v{match_result[1] & 0x00ffffffffffffff}; match_result_v != 0; match_result_v &= match_result_v - 1) { \ + const size_t match_index{static_cast(8 + std::countr_zero(match_result_v) / 8)}; \ + __VA_ARGS__ \ + } \ + } + +#endif + +} // namespace mcl::detail diff --git a/include/mcl/container/hmap.hpp b/include/mcl/container/hmap.hpp index 9abe650..0a2544c 100644 --- a/include/mcl/container/hmap.hpp +++ b/include/mcl/container/hmap.hpp @@ -12,6 +12,7 @@ #include "mcl/assert.hpp" #include "mcl/container/detail/meta_byte.hpp" +#include "mcl/container/detail/meta_byte_group.hpp" #include "mcl/container/detail/slot_union.hpp" #include "mcl/hash/xmrx.hpp" #include "mcl/hint/assume.hpp" diff --git a/include/mcl/container/ihmap.hpp b/include/mcl/container/ihmap.hpp index c9d666c..79360ea 100644 --- a/include/mcl/container/ihmap.hpp +++ b/include/mcl/container/ihmap.hpp @@ -13,6 +13,7 @@ #include "mcl/assert.hpp" #include "mcl/container/detail/meta_byte.hpp" +#include "mcl/container/detail/meta_byte_group.hpp" #include "mcl/container/detail/slot_union.hpp" #include "mcl/hash/xmrx.hpp" #include "mcl/hint/assume.hpp"