J'ai finalement implémenté ma fonction de hachage pour les types de type span.
template <typename T>
concept Scalar = is_scalar_v<T>;
template <typename T>
concept SpanType = !Scalar<T> && requires(T t) {
t.data();
t.size();
};
inline constexpr size_t hash_a_base = 123UL;
template <size_t a> struct HashFunc {
constexpr size_t operator()(size_t k) const noexcept {
return 2 * k * k + a * k;
}
};
template <unsigned_integral T, size_t a> struct HashBase {
constexpr size_t operator()(T k) const noexcept {
// TODO: take care when sizeof(T) > sizeof(size_t)...
return HashFunc<a + sizeof(T) * 16>{}(static_cast<size_t>(k));
}
};
template <Scalar T, size_t a> struct HashImpl {
constexpr size_t operator()(const T &key) const noexcept {
if constexpr (sizeof(T) == sizeof(uint64_t)) {
return HashBase<uint64_t, a>{}(bit_cast<uint64_t>(key));
} else if constexpr (sizeof(T) == sizeof(uint8_t)) {
return HashBase<uint8_t, a>{}(bit_cast<uint8_t>(key));
} else if constexpr (sizeof(T) == sizeof(uint16_t)) {
return HashBase<uint16_t, a>{}(bit_cast<uint16_t>(key));
} else if constexpr (sizeof(T) == sizeof(uint32_t)) {
return HashBase<uint32_t, a>{}(bit_cast<uint32_t>(key));
} else {
[]<bool flag = false>() {
static_assert(flag, "Not supported sizeof(T)");
}
();
}
}
};
template <SpanType T>
using SpanBaseType = remove_pointer_t<decltype(data<T>(declval<T>()))>;
template <size_t a> struct HashSpan {
size_t operator()(span<const size_t> arr) const noexcept {
size_t val = arr.size();
for (auto num : arr) {
val = detail::HashBase<size_t, a>{}(num + val);
}
return val;
}
size_t operator()(span<const unsigned char> arr, size_t val) const noexcept {
for (auto num : arr) {
val = detail::HashBase<size_t, a>{}(static_cast<size_t>(num) + val);
}
return val;
}
};
} // namespace detail
template <typename T, size_t a = detail::hash_a_base> struct Hash {
constexpr size_t operator()(const T &key) const noexcept requires(Scalar<T>) {
// negative zero -> positive zero for floating points
if constexpr (is_same_v<T, float>) {
return detail::HashImpl<T, a>{}(key == 0.0f ? 0.0f : key);
} else if constexpr (is_same_v<T, double>) {
return detail::HashImpl<T, a>{}(key == 0.0 ? 0.0 : key);
} else if constexpr (is_same_v<T, long double>) {
return detail::HashImpl<T, a>{}(key == 0.0l ? 0.0l : key);
} else {
return detail::HashImpl<T, a>{}(key);
}
}
constexpr size_t operator()(const T &key) const noexcept requires(SpanType<T>) {
using base_type = detail::SpanBaseType<T>;
auto num_bytes = key.size() * sizeof(base_type);
if (num_bytes == 0) {
return 0UL;
} else {
auto num_words = num_bytes / sizeof(size_t);
size_t val = 0;
if (num_words) {
val = detail::HashSpan<a>{}(span{reinterpret_cast<const size_t *>(key.data()), num_words});
}
auto last_bytes = num_bytes % sizeof(size_t);
if (last_bytes) {
return detail::HashSpan<a>{}(span{reinterpret_cast<const unsigned char *>(key.data() + num_words), last_bytes}, last_bytes + val);
} else {
return val;
}
}
}
};
Pour std::string
de grande longueur, cette fonction était beaucoup plus rapide que la fonction std::hash<std::string>
.
Résultat de l'étude comparative : (Génération de 10000 échantillons aléatoires d'ADN). std::string
de longueur 1~500, avec char
ayant les valeurs 32~126, et les insère dans un ordre aléatoire, les récupère dans un ordre aléatoire, les efface dans un ordre aléatoire. Essayé 1000 fois)
std::unordered_set<std::string, std::hash<std::string>> test
Time to insert 10000 elements:
Average : 10980.6104 us,
Stdev : 973.3181 us,
95% : 13485.1094 us,
99% : 14788.1992 us,
99.9% : 14824.3770 us,
Time to find 10000 elements:
Average : 4738.7148 us,
Stdev : 782.7706 us,
95% : 6460.4658 us,
99% : 7513.4780 us,
99.9% : 9732.9951 us,
Time to erase 10000 elements:
Average : 5242.2021 us,
Stdev : 975.2573 us,
95% : 6961.9961 us,
99% : 10397.3525 us,
99.9% : 11742.9883 us,
std::unordered_set<std::string, frozenca::hard::Hash<std::string>> test
Time to insert 10000 elements:
Average : 4377.6470 us,
Stdev : 411.8980 us,
95% : 5214.9204 us,
99% : 6192.5166 us,
99.9% : 6333.6099 us,
Time to find 10000 elements:
Average : 2407.2400 us,
Stdev : 725.3784 us,
95% : 3846.8633 us,
99% : 6181.4131 us,
99.9% : 7530.2129 us,
Time to erase 10000 elements:
Average : 3017.9241 us,
Stdev : 766.5061 us,
95% : 5034.3765 us,
99% : 6032.3950 us,
99.9% : 7971.5703 us,