#pragma once #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; } namespace ColumnsHashing { /// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe. /// Is used for caching. class HashMethodContext { public: virtual ~HashMethodContext() = default; struct Settings { size_t max_threads; }; }; using HashMethodContextPtr = std::shared_ptr; struct LastElementCacheStats { UInt64 hits = 0; UInt64 misses = 0; void update(size_t num_tries, size_t num_misses) { hits += num_tries - num_misses; misses += num_misses; } }; namespace columns_hashing_impl { struct LastElementCacheBase { bool empty = true; bool found = false; UInt64 misses = 0; void onNewValue(bool is_found) { empty = false; found = is_found; ++misses; } bool hasOnlyOneValue() const { return found && misses == 1; } }; template struct LastElementCache; template struct LastElementCache : public LastElementCacheBase { Value value{}; bool is_null = false; template bool check(const Key & key) const { return !is_null && value.first == key; } bool check(const Value & rhs) const { return !is_null && value == rhs; } }; template struct LastElementCache : public LastElementCacheBase { Value value{}; template bool check(const Key & key) const { return value.first == key; } bool check(const Value & rhs) const { return value == rhs; } }; template class EmplaceResultImpl { Mapped & value; Mapped & cached_value; bool inserted; public: EmplaceResultImpl(Mapped & value_, Mapped & cached_value_, bool inserted_) : value(value_), cached_value(cached_value_), inserted(inserted_) {} bool isInserted() const { return inserted; } auto & getMapped() const { return value; } void setMapped(const Mapped & mapped) { cached_value = mapped; value = mapped; } }; template <> class EmplaceResultImpl { bool inserted; public: explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {} bool isInserted() const { return inserted; } }; /// FindResult optionally may contain pointer to value and offset in hashtable buffer. /// Only bool found is required. /// So we will have 4 different specializations for FindResultImpl class FindResultImplBase { bool found; public: explicit FindResultImplBase(bool found_) : found(found_) {} bool isFound() const { return found; } }; template class FindResultImplOffsetBase { public: constexpr static bool has_offset = need_offset; explicit FindResultImplOffsetBase(size_t /* off */) {} }; template <> class FindResultImplOffsetBase { size_t offset; public: constexpr static bool has_offset = true; explicit FindResultImplOffsetBase(size_t off) : offset(off) {} ALWAYS_INLINE size_t getOffset() const { return offset; } }; template class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBase { Mapped * value; public: FindResultImpl() : FindResultImplBase(false), FindResultImplOffsetBase(0) // NOLINT(clang-analyzer-optin.cplusplus.UninitializedObject) intentionally allow uninitialized value here {} FindResultImpl(Mapped * value_, bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase(off), value(value_) {} Mapped & getMapped() const { return *value; } }; template class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBase { public: FindResultImpl(bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase(off) {} }; template class HashMethodBase { public: using EmplaceResult = EmplaceResultImpl; using FindResult = FindResultImpl; static constexpr bool has_mapped = !std::is_same_v; using Cache = LastElementCache; static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } template ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) { if constexpr (nullable) { if (isNullAt(row)) { if constexpr (consecutive_keys_optimization) { if (!cache.is_null) { cache.onNewValue(true); cache.is_null = true; } } bool has_null_key = data.hasNullKeyData(); data.hasNullKeyData() = true; if constexpr (has_mapped) return EmplaceResult(data.getNullKeyData(), data.getNullKeyData(), !has_null_key); else return EmplaceResult(!has_null_key); } } auto key_holder = static_cast(*this).getKeyHolder(row, pool); return emplaceImpl(key_holder, data); } template ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool) { if constexpr (nullable) { if (isNullAt(row)) { bool has_null_key = data.hasNullKeyData(); if constexpr (consecutive_keys_optimization) { if (!cache.is_null) { cache.onNewValue(has_null_key); cache.is_null = true; } } if constexpr (has_mapped) return FindResult(&data.getNullKeyData(), has_null_key, 0); else return FindResult(has_null_key, 0); } } auto key_holder = static_cast(*this).getKeyHolder(row, pool); return findKeyImpl(keyHolderGetKey(key_holder), data); } template ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) { auto key_holder = static_cast(*this).getKeyHolder(row, pool); return data.hash(keyHolderGetKey(key_holder)); } ALWAYS_INLINE void resetCache() { if constexpr (consecutive_keys_optimization) { cache.empty = true; cache.found = false; cache.misses = 0; } } ALWAYS_INLINE bool hasOnlyOneValueSinceLastReset() const { if constexpr (consecutive_keys_optimization) return cache.hasOnlyOneValue(); return false; } ALWAYS_INLINE UInt64 getCacheMissesSinceLastReset() const { if constexpr (consecutive_keys_optimization) return cache.misses; return 0; } ALWAYS_INLINE bool isNullAt(size_t row) const { if constexpr (nullable) { return null_map->getBool(row); } else { return false; } } protected: Cache cache; const IColumn * null_map = nullptr; bool has_null_data = false; /// column argument only for nullable column explicit HashMethodBase(const IColumn * column = nullptr) { if constexpr (consecutive_keys_optimization) { if constexpr (has_mapped) { /// Init PairNoInit elements. cache.value.second = Mapped(); cache.value.first = {}; } else cache.value = Value(); } if constexpr (nullable) null_map = &checkAndGetColumn(*column).getNullMapColumn(); } template ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data) { if constexpr (consecutive_keys_optimization) { if (cache.found && cache.check(keyHolderGetKey(key_holder))) { if constexpr (has_mapped) return EmplaceResult(cache.value.second, cache.value.second, false); else return EmplaceResult(false); } } typename Data::LookupResult it; bool inserted = false; data.emplace(key_holder, it, inserted); [[maybe_unused]] Mapped * cached = nullptr; if constexpr (has_mapped) cached = &it->getMapped(); if constexpr (has_mapped) { if (inserted) { new (&it->getMapped()) Mapped(); } } if constexpr (consecutive_keys_optimization) { cache.onNewValue(true); if constexpr (nullable) cache.is_null = false; if constexpr (has_mapped) { cache.value.first = it->getKey(); cache.value.second = it->getMapped(); cached = &cache.value.second; } else { cache.value = it->getKey(); } } if constexpr (has_mapped) return EmplaceResult(it->getMapped(), *cached, inserted); else return EmplaceResult(inserted); } template ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data) { if constexpr (consecutive_keys_optimization) { /// It's possible to support such combination, but code will became more complex. /// Now there's not place where we need this options enabled together static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options"); if (likely(!cache.empty) && cache.check(key)) { if constexpr (has_mapped) return FindResult(&cache.value.second, cache.found, 0); else return FindResult(cache.found, 0); } } auto it = data.find(key); if constexpr (consecutive_keys_optimization) { cache.onNewValue(it != nullptr); if constexpr (nullable) cache.is_null = false; if constexpr (has_mapped) { cache.value.first = key; if (it) cache.value.second = it->getMapped(); } else { cache.value = key; } } size_t offset = 0; if constexpr (FindResult::has_offset) offset = it ? data.offsetInternal(it) : 0; if constexpr (has_mapped) return FindResult(it ? &it->getMapped() : nullptr, it != nullptr, offset); else return FindResult(it != nullptr, offset); } }; template struct MappedCache : public PaddedPODArray {}; template <> struct MappedCache {}; /// This class is designed to provide the functionality that is required for /// supporting nullable keys in HashMethodKeysFixed. If there are /// no nullable keys, this class is merely implemented as an empty shell. template class BaseStateKeysFixed; /// Case where nullable keys are supported. template class BaseStateKeysFixed { protected: explicit BaseStateKeysFixed(const ColumnRawPtrs & key_columns) { null_maps.reserve(key_columns.size()); actual_columns.reserve(key_columns.size()); for (const auto & col : key_columns) { if (const auto * nullable_col = checkAndGetColumn(col)) { actual_columns.push_back(&nullable_col->getNestedColumn()); null_maps.push_back(&nullable_col->getNullMapColumn()); } else { actual_columns.push_back(col); null_maps.push_back(nullptr); } } } /// Return the columns which actually contain the values of the keys. /// For a given key column, if it is nullable, we return its nested /// column. Otherwise we return the key column itself. const ColumnRawPtrs & getActualColumns() const { return actual_columns; } /// Create a bitmap that indicates whether, for a particular row, /// a key column bears a null value or not. KeysNullMap createBitmap(size_t row) const { KeysNullMap bitmap{}; for (size_t k = 0; k < null_maps.size(); ++k) { if (null_maps[k] != nullptr) { const auto & null_map = assert_cast(*null_maps[k]).getData(); if (null_map[row] == 1) { size_t bucket = k / 8; size_t offset = k % 8; bitmap[bucket] |= UInt8(1) << offset; } } } return bitmap; } private: ColumnRawPtrs actual_columns; ColumnRawPtrs null_maps; }; /// Case where nullable keys are not supported. template class BaseStateKeysFixed { protected: explicit BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {} const ColumnRawPtrs & getActualColumns() const { return actual_columns; } KeysNullMap createBitmap(size_t) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal error: calling createBitmap() for non-nullable keys is forbidden"); } private: ColumnRawPtrs actual_columns; }; } } }