#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { template concept StorageConfiguration = std::derived_from; template class DataLakeConfiguration : public BaseStorageConfiguration, public std::enable_shared_from_this { public: using Configuration = StorageObjectStorage::Configuration; bool isDataLakeConfiguration() const override { return true; } std::string getEngineName() const override { return DataLakeMetadata::name; } void update(ObjectStoragePtr object_storage, ContextPtr local_context) override { BaseStorageConfiguration::update(object_storage, local_context); auto new_metadata = DataLakeMetadata::create(object_storage, weak_from_this(), local_context); if (current_metadata && *current_metadata == *new_metadata) return; current_metadata = std::move(new_metadata); BaseStorageConfiguration::setPaths(current_metadata->getDataFiles()); BaseStorageConfiguration::setPartitionColumns(current_metadata->getPartitionColumns()); } std::optional tryGetTableStructureFromMetadata() const override { if (!current_metadata) return std::nullopt; auto schema_from_metadata = current_metadata->getTableSchema(); if (!schema_from_metadata.empty()) { return ColumnsDescription(std::move(schema_from_metadata)); } return std::nullopt; } private: DataLakeMetadataPtr current_metadata; ReadFromFormatInfo prepareReadingFromFormat( ObjectStoragePtr object_storage, const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, ContextPtr local_context) override { auto info = DB::prepareReadingFromFormat(requested_columns, storage_snapshot, local_context, supports_subset_of_columns); if (!current_metadata) { current_metadata = DataLakeMetadata::create(object_storage, weak_from_this(), local_context); } auto column_mapping = current_metadata->getColumnNameToPhysicalNameMapping(); if (!column_mapping.empty()) { for (const auto & [column_name, physical_name] : column_mapping) { auto & column = info.format_header.getByName(column_name); column.name = physical_name; } } return info; } }; #if USE_AVRO #if USE_AWS_S3 using StorageS3IcebergConfiguration = DataLakeConfiguration; # endif #if USE_AZURE_BLOB_STORAGE using StorageAzureIcebergConfiguration = DataLakeConfiguration; # endif #if USE_HDFS using StorageHDFSIcebergConfiguration = DataLakeConfiguration; # endif using StorageLocalIcebergConfiguration = DataLakeConfiguration; #endif #if USE_PARQUET #if USE_AWS_S3 using StorageS3DeltaLakeConfiguration = DataLakeConfiguration; # endif #endif #if USE_AWS_S3 using StorageS3HudiConfiguration = DataLakeConfiguration; #endif }