#pragma once #include #include #include #include #include #include #include #include #include #include #include "config.h" #if USE_MULTITARGET_CODE # include #endif namespace DB { namespace ErrorCodes { extern const int NOT_IMPLEMENTED; } /** A template for columns that use a simple array to store. */ template class ColumnVector final : public COWHelper, ColumnFixedSizeHelper>, ColumnVector> { static_assert(!is_decimal); private: using Self = ColumnVector; friend class COWHelper, Self>; struct less; struct less_stable; struct greater; struct greater_stable; struct equals; public: using ValueType = T; using Container = PaddedPODArray; private: ColumnVector() = default; explicit ColumnVector(const size_t n) : data(n) {} ColumnVector(const size_t n, const ValueType x) : data(n, x) {} ColumnVector(const ColumnVector & src) : data(src.data.begin(), src.data.end()) {} /// Sugar constructor. ColumnVector(std::initializer_list il) : data{il} {} public: bool isNumeric() const override { return is_arithmetic_v; } size_t size() const override { return data.size(); } #if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override #else void doInsertFrom(const IColumn & src, size_t n) override #endif { data.push_back(assert_cast(src).getData()[n]); } #if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override #endif { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); } void insertMany(const Field & field, size_t length) override { data.resize_fill(data.size() + length, static_cast(field.safeGet())); } void insertData(const char * pos, size_t) override { data.emplace_back(unalignedLoad(pos)); } void insertDefault() override { data.push_back(T()); } void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length, T()); } void popBack(size_t n) override { data.resize_assume_reserved(data.size() - n); } const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; size_t byteSize() const override { return data.size() * sizeof(data[0]); } size_t byteSizeAt(size_t) const override { return sizeof(data[0]); } size_t allocatedBytes() const override { return data.allocated_bytes(); } void protect() override { data.protect(); } void insertValue(const T value) { data.push_back(value); } template constexpr int compareAtOther(size_t n, size_t m, const ColumnVector & rhs, int nan_direction_hint) const { return CompareHelper::compare(data[n], rhs.data[m], nan_direction_hint); } /// This method implemented in header because it could be possibly devirtualized. #if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override #endif { return CompareHelper::compare(data[n], assert_cast(rhs_).data[m], nan_direction_hint); } #if USE_EMBEDDED_COMPILER bool isComparatorCompilable() const override; llvm::Value * compileComparator(llvm::IRBuilderBase & /*builder*/, llvm::Value * /*lhs*/, llvm::Value * /*rhs*/, llvm::Value * /*nan_direction_hint*/) const override; #endif void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_ranges) const override; size_t estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const override; void reserve(size_t n) override { data.reserve_exact(n); } size_t capacity() const override { return data.capacity(); } void shrinkToFit() override { data.shrink_to_fit(); } const char * getFamilyName() const override { return TypeName.data(); } TypeIndex getDataType() const override { return TypeToTypeIndex; } MutableColumnPtr cloneResized(size_t size) const override; Field operator[](size_t n) const override { assert(n < data.size()); /// This assert is more strict than the corresponding assert inside PODArray. return data[n]; } void get(size_t n, Field & res) const override { res = (*this)[n]; } UInt64 get64(size_t n) const override; Float64 getFloat64(size_t n) const override; Float32 getFloat32(size_t n) const override; /// Out of range conversion is permitted. UInt64 NO_SANITIZE_UNDEFINED getUInt(size_t n) const override { if constexpr (is_arithmetic_v) return UInt64(data[n]); else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get the value of {} as UInt", TypeName); } /// Out of range conversion is permitted. Int64 NO_SANITIZE_UNDEFINED getInt(size_t n) const override { if constexpr (is_arithmetic_v) return Int64(data[n]); else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get the value of {} as Int", TypeName); } bool getBool(size_t n) const override { if constexpr (is_arithmetic_v) return bool(data[n]); else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get the value of {} as bool", TypeName); } void insert(const Field & x) override { data.push_back(static_cast(x.safeGet())); } bool tryInsert(const DB::Field & x) override; #if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; void expand(const IColumn::Filter & mask, bool inverted) override; ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; ColumnPtr replicate(const IColumn::Offsets & offsets) const override; void getExtremes(Field & min, Field & max) const override; bool canBeInsideNullable() const override { return true; } bool isFixedAndContiguous() const override { return true; } size_t sizeOfValueIfFixed() const override { return sizeof(T); } std::string_view getRawData() const override { return {reinterpret_cast(data.data()), byteSize()}; } StringRef getDataAt(size_t n) const override { return StringRef(reinterpret_cast(&data[n]), sizeof(data[n])); } bool isDefaultAt(size_t n) const override { return data[n] == T{}; } bool structureEquals(const IColumn & rhs) const override { return typeid(rhs) == typeid(ColumnVector); } ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override; ColumnPtr compress() const override; /// Replace elements that match the filter with zeroes. If inverted replaces not matched elements. void applyZeroMap(const IColumn::Filter & filt, bool inverted = false); /** More efficient methods of manipulation - to manipulate with data directly. */ Container & getData() { return data; } const Container & getData() const { return data; } const T & getElement(size_t n) const { return data[n]; } T & getElement(size_t n) { return data[n]; } protected: Container data; }; DECLARE_DEFAULT_CODE( template inline void vectorIndexImpl(const Container & data, const PaddedPODArray & indexes, size_t limit, Container & res_data) { for (size_t i = 0; i < limit; ++i) res_data[i] = data[indexes[i]]; } ); DECLARE_AVX512VBMI_SPECIFIC_CODE( template inline void vectorIndexImpl(const Container & data, const PaddedPODArray & indexes, size_t limit, Container & res_data) { static constexpr UInt64 MASK64 = 0xffffffffffffffff; const size_t limit64 = limit & ~63; size_t pos = 0; size_t data_size = data.size(); auto data_pos = reinterpret_cast(data.data()); auto indexes_pos = reinterpret_cast(indexes.data()); auto res_pos = reinterpret_cast(res_data.data()); if (limit == 0) return; /// nothing to do, just return if (data_size <= 64) { /// one single mask load for table size <= 64 __mmask64 last_mask = MASK64 >> (64 - data_size); __m512i table1 = _mm512_maskz_loadu_epi8(last_mask, data_pos); /// 64 bytes table lookup using one single permutexvar_epi8 while (pos < limit64) { __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); __m512i out = _mm512_permutexvar_epi8(vidx, table1); _mm512_storeu_epi8(res_pos + pos, out); pos += 64; } /// tail handling if (limit > limit64) { __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); __m512i out = _mm512_permutexvar_epi8(vidx, table1); _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); } } else if (data_size <= 128) { /// table size (64, 128] requires 2 zmm load __mmask64 last_mask = MASK64 >> (128 - data_size); __m512i table1 = _mm512_loadu_epi8(data_pos); __m512i table2 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 64); /// 128 bytes table lookup using one single permute2xvar_epi8 while (pos < limit64) { __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); __m512i out = _mm512_permutex2var_epi8(table1, vidx, table2); _mm512_storeu_epi8(res_pos + pos, out); pos += 64; } if (limit > limit64) { __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); __m512i out = _mm512_permutex2var_epi8(table1, vidx, table2); _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); } } else { if (data_size > 256) { /// byte index will not exceed 256 boundary. data_size = 256; } __m512i table1 = _mm512_loadu_epi8(data_pos); __m512i table2 = _mm512_loadu_epi8(data_pos + 64); __m512i table3, table4; if (data_size <= 192) { /// only 3 tables need to load if size <= 192 __mmask64 last_mask = MASK64 >> (192 - data_size); table3 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 128); table4 = _mm512_setzero_si512(); } else { __mmask64 last_mask = MASK64 >> (256 - data_size); table3 = _mm512_loadu_epi8(data_pos + 128); table4 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 192); } /// 256 bytes table lookup can use: 2 permute2xvar_epi8 plus 1 blender with MSB while (pos < limit64) { __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); __m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2); __m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4); __mmask64 msb = _mm512_movepi8_mask(vidx); __m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2); _mm512_storeu_epi8(res_pos + pos, out); pos += 64; } if (limit > limit64) { __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); __m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2); __m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4); __mmask64 msb = _mm512_movepi8_mask(vidx); __m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2); _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); } } } ); template template ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_t limit) const { assert(limit <= indexes.size()); auto res = this->create(limit); typename Self::Container & res_data = res->getData(); #if USE_MULTITARGET_CODE if constexpr (sizeof(T) == 1 && sizeof(Type) == 1) { /// VBMI optimization only applicable for (U)Int8 types if (isArchSupported(TargetArch::AVX512VBMI)) { TargetSpecific::AVX512VBMI::vectorIndexImpl(data, indexes, limit, res_data); return res; } } #endif TargetSpecific::Default::vectorIndexImpl(data, indexes, limit, res_data); return res; } template concept is_col_vector = std::is_same_v>; /// Prevent implicit template instantiation of ColumnVector for common types extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; }