#pragma once #include #include #include #include #include namespace DB { class Arena; class Aggregator; struct AggregatedDataVariants : private boost::noncopyable { /** Working with states of aggregate functions in the pool is arranged in the following (inconvenient) way: * - when aggregating, states are created in the pool using IAggregateFunction::create (inside - `placement new` of arbitrary structure); * - they must then be destroyed using IAggregateFunction::destroy (inside - calling the destructor of arbitrary structure); * - if aggregation is complete, then, in the Aggregator::convertToBlocks function, pointers to the states of aggregate functions * are written to ColumnAggregateFunction; ColumnAggregateFunction "acquires ownership" of them, that is - calls `destroy` in its destructor. * - if during the aggregation, before call to Aggregator::convertToBlocks, an exception was thrown, * then the states of aggregate functions must still be destroyed, * otherwise, for complex states (eg, AggregateFunctionUniq), there will be memory leaks; * - in this case, to destroy states, the destructor calls Aggregator::destroyAggregateStates method, * but only if the variable aggregator (see below) is not nullptr; * - that is, until you transfer ownership of the aggregate function states in the ColumnAggregateFunction, set the variable `aggregator`, * so that when an exception occurs, the states are correctly destroyed. * * PS. This can be corrected by making a pool that knows about which states of aggregate functions and in which order are put in it, and knows how to destroy them. * But this can hardly be done simply because it is planned to put variable-length strings into the same pool. * In this case, the pool will not be able to know with what offsets objects are stored. */ const Aggregator * aggregator = nullptr; size_t keys_size{}; /// Number of keys. NOTE do we need this field? Sizes key_sizes; /// Dimensions of keys, if keys of fixed length /// Pools for states of aggregate functions. Ownership will be later transferred to ColumnAggregateFunction. using ArenaPtr = std::shared_ptr; using Arenas = std::vector; Arenas aggregates_pools; Arena * aggregates_pool{}; /// The pool that is currently used for allocation. /** Specialization for the case when there are no keys, and for keys not fitted into max_rows_to_group_by. */ AggregatedDataWithoutKey without_key = nullptr; /// Stats of a cache for consecutive keys optimization. /// Stats can be used to disable the cache in case of a lot of misses. ColumnsHashing::LastElementCacheStats consecutive_keys_cache_stats; // Disable consecutive key optimization for Uint8/16, because they use a FixedHashMap // and the lookup there is almost free, so we don't need to cache the last lookup result std::unique_ptr> key8; std::unique_ptr> key16; std::unique_ptr> key32; std::unique_ptr> key64; std::unique_ptr> key_string; std::unique_ptr> key_fixed_string; std::unique_ptr> keys16; std::unique_ptr> keys32; std::unique_ptr> keys64; std::unique_ptr> keys128; std::unique_ptr> keys256; std::unique_ptr> serialized; std::unique_ptr> nullable_serialized; std::unique_ptr> prealloc_serialized; std::unique_ptr> nullable_prealloc_serialized; std::unique_ptr> key32_two_level; std::unique_ptr> key64_two_level; std::unique_ptr> key_string_two_level; std::unique_ptr> key_fixed_string_two_level; std::unique_ptr> keys32_two_level; std::unique_ptr> keys64_two_level; std::unique_ptr> keys128_two_level; std::unique_ptr> keys256_two_level; std::unique_ptr> serialized_two_level; std::unique_ptr> nullable_serialized_two_level; std::unique_ptr> prealloc_serialized_two_level; std::unique_ptr> nullable_prealloc_serialized_two_level; std::unique_ptr> key64_hash64; std::unique_ptr> key_string_hash64; std::unique_ptr> key_fixed_string_hash64; std::unique_ptr> keys128_hash64; std::unique_ptr> keys256_hash64; std::unique_ptr> serialized_hash64; std::unique_ptr> nullable_serialized_hash64; std::unique_ptr> prealloc_serialized_hash64; std::unique_ptr> nullable_prealloc_serialized_hash64; /// Support for nullable keys. std::unique_ptr> nullable_key8; std::unique_ptr> nullable_key16; std::unique_ptr> nullable_key32; std::unique_ptr> nullable_key64; std::unique_ptr> nullable_key32_two_level; std::unique_ptr> nullable_key64_two_level; std::unique_ptr> nullable_key_string; std::unique_ptr> nullable_key_fixed_string; std::unique_ptr> nullable_key_string_two_level; std::unique_ptr> nullable_key_fixed_string_two_level; std::unique_ptr> nullable_keys128; std::unique_ptr> nullable_keys256; std::unique_ptr> nullable_keys128_two_level; std::unique_ptr> nullable_keys256_two_level; /// Support for low cardinality. std::unique_ptr>> low_cardinality_key8; std::unique_ptr>> low_cardinality_key16; std::unique_ptr>> low_cardinality_key32; std::unique_ptr>> low_cardinality_key64; std::unique_ptr>> low_cardinality_key_string; std::unique_ptr>> low_cardinality_key_fixed_string; std::unique_ptr>> low_cardinality_key32_two_level; std::unique_ptr>> low_cardinality_key64_two_level; std::unique_ptr>> low_cardinality_key_string_two_level; std::unique_ptr>> low_cardinality_key_fixed_string_two_level; std::unique_ptr> low_cardinality_keys128; std::unique_ptr> low_cardinality_keys256; std::unique_ptr> low_cardinality_keys128_two_level; std::unique_ptr> low_cardinality_keys256_two_level; /// In this and similar macros, the option without_key is not considered. #define APPLY_FOR_AGGREGATED_VARIANTS(M) \ M(key8, false) \ M(key16, false) \ M(key32, false) \ M(key64, false) \ M(key_string, false) \ M(key_fixed_string, false) \ M(keys16, false) \ M(keys32, false) \ M(keys64, false) \ M(keys128, false) \ M(keys256, false) \ M(serialized, false) \ M(nullable_serialized, false) \ M(prealloc_serialized, false) \ M(nullable_prealloc_serialized, false) \ M(key32_two_level, true) \ M(key64_two_level, true) \ M(key_string_two_level, true) \ M(key_fixed_string_two_level, true) \ M(keys32_two_level, true) \ M(keys64_two_level, true) \ M(keys128_two_level, true) \ M(keys256_two_level, true) \ M(serialized_two_level, true) \ M(nullable_serialized_two_level, true) \ M(prealloc_serialized_two_level, true) \ M(nullable_prealloc_serialized_two_level, true) \ M(key64_hash64, false) \ M(key_string_hash64, false) \ M(key_fixed_string_hash64, false) \ M(keys128_hash64, false) \ M(keys256_hash64, false) \ M(serialized_hash64, false) \ M(nullable_serialized_hash64, false) \ M(prealloc_serialized_hash64, false) \ M(nullable_prealloc_serialized_hash64, false) \ M(nullable_key8, false) \ M(nullable_key16, false) \ M(nullable_key32, false) \ M(nullable_key64, false) \ M(nullable_key32_two_level, true) \ M(nullable_key64_two_level, true) \ M(nullable_key_string, false) \ M(nullable_key_fixed_string, false) \ M(nullable_key_string_two_level, true) \ M(nullable_key_fixed_string_two_level, true) \ M(nullable_keys128, false) \ M(nullable_keys256, false) \ M(nullable_keys128_two_level, true) \ M(nullable_keys256_two_level, true) \ M(low_cardinality_key8, false) \ M(low_cardinality_key16, false) \ M(low_cardinality_key32, false) \ M(low_cardinality_key64, false) \ M(low_cardinality_keys128, false) \ M(low_cardinality_keys256, false) \ M(low_cardinality_key_string, false) \ M(low_cardinality_key_fixed_string, false) \ M(low_cardinality_key32_two_level, true) \ M(low_cardinality_key64_two_level, true) \ M(low_cardinality_keys128_two_level, true) \ M(low_cardinality_keys256_two_level, true) \ M(low_cardinality_key_string_two_level, true) \ M(low_cardinality_key_fixed_string_two_level, true) \ #define APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \ M(key32) \ M(key64) \ M(key_string) \ M(key_fixed_string) \ M(keys32) \ M(keys64) \ M(keys128) \ M(keys256) \ M(serialized) \ M(nullable_serialized) \ M(prealloc_serialized) \ M(nullable_prealloc_serialized) \ M(nullable_key32) \ M(nullable_key64) \ M(nullable_key_string) \ M(nullable_key_fixed_string) \ M(nullable_keys128) \ M(nullable_keys256) \ M(low_cardinality_key32) \ M(low_cardinality_key64) \ M(low_cardinality_keys128) \ M(low_cardinality_keys256) \ M(low_cardinality_key_string) \ M(low_cardinality_key_fixed_string) \ /// NOLINTNEXTLINE #define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ M(key8) \ M(key16) \ M(nullable_key8) \ M(nullable_key16) \ M(keys16) \ M(key64_hash64) \ M(key_string_hash64)\ M(key_fixed_string_hash64) \ M(keys128_hash64) \ M(keys256_hash64) \ M(serialized_hash64) \ M(nullable_serialized_hash64) \ M(prealloc_serialized_hash64) \ M(nullable_prealloc_serialized_hash64) \ M(low_cardinality_key8) \ M(low_cardinality_key16) \ /// NOLINTNEXTLINE #define APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) \ APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \ /// NOLINTNEXTLINE #define APPLY_FOR_VARIANTS_TWO_LEVEL(M) \ M(key32_two_level) \ M(key64_two_level) \ M(key_string_two_level) \ M(key_fixed_string_two_level) \ M(keys32_two_level) \ M(keys64_two_level) \ M(keys128_two_level) \ M(keys256_two_level) \ M(serialized_two_level) \ M(nullable_serialized_two_level) \ M(prealloc_serialized_two_level) \ M(nullable_prealloc_serialized_two_level) \ M(nullable_key32_two_level) \ M(nullable_key64_two_level) \ M(nullable_key_string_two_level) \ M(nullable_key_fixed_string_two_level) \ M(nullable_keys128_two_level) \ M(nullable_keys256_two_level) \ M(low_cardinality_key32_two_level) \ M(low_cardinality_key64_two_level) \ M(low_cardinality_keys128_two_level) \ M(low_cardinality_keys256_two_level) \ M(low_cardinality_key_string_two_level) \ M(low_cardinality_key_fixed_string_two_level) \ #define APPLY_FOR_LOW_CARDINALITY_VARIANTS(M) \ M(low_cardinality_key8) \ M(low_cardinality_key16) \ M(low_cardinality_key32) \ M(low_cardinality_key64) \ M(low_cardinality_keys128) \ M(low_cardinality_keys256) \ M(low_cardinality_key_string) \ M(low_cardinality_key_fixed_string) \ M(low_cardinality_key32_two_level) \ M(low_cardinality_key64_two_level) \ M(low_cardinality_keys128_two_level) \ M(low_cardinality_keys256_two_level) \ M(low_cardinality_key_string_two_level) \ M(low_cardinality_key_fixed_string_two_level) enum class Type : uint8_t { EMPTY = 0, without_key, #define M(NAME, IS_TWO_LEVEL) NAME, APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M }; Type type = Type::EMPTY; AggregatedDataVariants(); ~AggregatedDataVariants(); bool empty() const { return type == Type::EMPTY; } void invalidate() { type = Type::EMPTY; } void init(Type type_, std::optional size_hint = std::nullopt); /// Number of rows (different keys). size_t size() const; size_t sizeWithoutOverflowRow() const; const char * getMethodName() const; bool isTwoLevel() const; bool isConvertibleToTwoLevel() const; void convertToTwoLevel(); bool isLowCardinality() const; static ColumnsHashing::HashMethodContextPtr createCache(Type type, const ColumnsHashing::HashMethodContext::Settings & settings); }; using AggregatedDataVariantsPtr = std::shared_ptr; using ManyAggregatedDataVariants = std::vector; using ManyAggregatedDataVariantsPtr = std::shared_ptr; }