#pragma once #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int SIZES_OF_ARRAYS_DONT_MATCH; } class FunctionArrayEnumerateUniq; class FunctionArrayEnumerateDense; template class FunctionArrayEnumerateExtended : public IFunction { public: static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return Derived::name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Number of arguments for function {} doesn't match: passed {}, should be at least 1.", getName(), arguments.size()); for (size_t i = 0; i < arguments.size(); ++i) { const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); if (!array_type) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All arguments for function {} must be arrays but argument {} has type {}.", getName(), i + 1, arguments[i]->getName()); } return std::make_shared(std::make_shared()); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override; private: /// Initially allocate a piece of memory for 64 elements. NOTE: This is just a guess. static constexpr size_t INITIAL_SIZE_DEGREE = 6; template struct MethodOneNumber { using Set = ClearableHashMapWithStackMemory, INITIAL_SIZE_DEGREE>; using Method = ColumnsHashing::HashMethodOneNumber; }; struct MethodString { using Set = ClearableHashMapWithStackMemory; using Method = ColumnsHashing::HashMethodString; }; struct MethodFixedString { using Set = ClearableHashMapWithStackMemory; using Method = ColumnsHashing::HashMethodFixedString; }; struct MethodFixed { using Set = ClearableHashMapWithStackMemory; using Method = ColumnsHashing::HashMethodKeysFixed; }; struct MethodHashed { using Set = ClearableHashMapWithStackMemory; using Method = ColumnsHashing::HashMethodHashed; }; template void executeMethod(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const Sizes & key_sizes, const NullMap * null_map, ColumnUInt32::Container & res_values) const; template void executeMethodImpl(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const Sizes & key_sizes, const NullMap * null_map, ColumnUInt32::Container & res_values) const; template bool executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) const; bool executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) const; bool executeFixedString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) const; bool execute128bit(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values) const; void executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values) const; }; template ColumnPtr FunctionArrayEnumerateExtended::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const { const ColumnArray::Offsets * offsets = nullptr; size_t num_arguments = arguments.size(); ColumnRawPtrs data_columns(num_arguments); Columns array_holders; ColumnPtr offsets_column; for (size_t i = 0; i < num_arguments; ++i) { const ColumnPtr & array_ptr = arguments[i].column; const ColumnArray * array = checkAndGetColumn(array_ptr.get()); if (!array) { const ColumnConst * const_array = checkAndGetColumnConst( arguments[i].column.get()); if (!const_array) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of {}-th argument of function {}", arguments[i].column->getName(), i + 1, getName()); array_holders.emplace_back(const_array->convertToFullColumn()); array = checkAndGetColumn(array_holders.back().get()); } const ColumnArray::Offsets & offsets_i = array->getOffsets(); if (i == 0) { offsets = &offsets_i; offsets_column = array->getOffsetsPtr(); } else if (offsets_i != *offsets) throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.", getName()); const auto * array_data = &array->getData(); data_columns[i] = array_data; } const NullMap * null_map = nullptr; for (size_t i = 0; i < num_arguments; ++i) { if (const auto * nullable_col = checkAndGetColumn(data_columns[i])) { if (num_arguments == 1) data_columns[i] = &nullable_col->getNestedColumn(); null_map = &nullable_col->getNullMapData(); break; } } auto res_nested = ColumnUInt32::create(); ColumnUInt32::Container & res_values = res_nested->getData(); if (!offsets->empty()) res_values.resize(offsets->back()); if (num_arguments == 1) { if (!(executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeString(*offsets, *data_columns[0], null_map, res_values) || executeFixedString(*offsets, *data_columns[0], null_map, res_values))) executeHashed(*offsets, data_columns, res_values); } else { if (!execute128bit(*offsets, data_columns, res_values)) executeHashed(*offsets, data_columns, res_values); } return ColumnArray::create(std::move(res_nested), offsets_column); } template template void FunctionArrayEnumerateExtended::executeMethodImpl( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const Sizes & key_sizes, [[maybe_unused]] const NullMap * null_map, ColumnUInt32::Container & res_values) const { typename Method::Set indices; typename Method::Method method(columns, key_sizes, nullptr); Arena pool; /// Won't use it; ColumnArray::Offset prev_off = 0; if constexpr (std::is_same_v) { // Unique for (size_t off : offsets) { indices.clear(); UInt32 null_count = 0; for (size_t j = prev_off; j < off; ++j) { if constexpr (has_null_map) { if ((*null_map)[j]) { res_values[j] = ++null_count; continue; } } auto emplace_result = method.emplaceKey(indices, j, pool); auto idx = emplace_result.getMapped() + 1; emplace_result.setMapped(idx); res_values[j] = idx; } prev_off = off; } } else { // Dense for (size_t off : offsets) { indices.clear(); UInt32 rank = 0; [[maybe_unused]] UInt32 null_index = 0; for (size_t j = prev_off; j < off; ++j) { if constexpr (has_null_map) { if ((*null_map)[j]) { if (!null_index) null_index = ++rank; res_values[j] = null_index; continue; } } auto emplace_result = method.emplaceKey(indices, j, pool); auto idx = emplace_result.getMapped(); if (!idx) { idx = ++rank; emplace_result.setMapped(idx); } res_values[j] = idx; } prev_off = off; } } } template template void FunctionArrayEnumerateExtended::executeMethod( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const Sizes & key_sizes, const NullMap * null_map, ColumnUInt32::Container & res_values) const { if (null_map) executeMethodImpl(offsets, columns, key_sizes, null_map, res_values); else executeMethodImpl(offsets, columns, key_sizes, null_map, res_values); } template template bool FunctionArrayEnumerateExtended::executeNumber( const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) const { const auto * nested = checkAndGetColumn>(&data); if (!nested) return false; executeMethod>(offsets, {nested}, {}, null_map, res_values); return true; } template bool FunctionArrayEnumerateExtended::executeString( const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) const { const auto * nested = checkAndGetColumn(&data); if (nested) executeMethod(offsets, {nested}, {}, null_map, res_values); return nested; } template bool FunctionArrayEnumerateExtended::executeFixedString( const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) const { const auto * nested = checkAndGetColumn(&data); if (nested) executeMethod(offsets, {nested}, {}, null_map, res_values); return nested; } template bool FunctionArrayEnumerateExtended::execute128bit( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values) const { size_t count = columns.size(); size_t keys_bytes = 0; Sizes key_sizes(count); for (size_t j = 0; j < count; ++j) { if (!columns[j]->isFixedAndContiguous()) return false; key_sizes[j] = columns[j]->sizeOfValueIfFixed(); keys_bytes += key_sizes[j]; } if (keys_bytes > 16) return false; executeMethod(offsets, columns, key_sizes, nullptr, res_values); return true; } template void FunctionArrayEnumerateExtended::executeHashed( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values) const { executeMethod(offsets, columns, {}, nullptr, res_values); } }