#pragma once #include #include #include #include #include namespace DB { const size_t DEFAULT_SCHEMA_CACHE_ELEMENTS = 4096; /// Cache that stores columns description and/or number of rows by some string key. /// It's used in schema inference and fast count from format file. /// It implements LRU semantic: after each access to a key in cache we move this key to /// the end of the queue, if we reached the limit of maximum elements in the cache we /// remove keys from the beginning of the queue. /// It also supports keys invalidations by last modification time. If last modification time /// is provided and last modification happened after a key was added to the cache, this key /// will be removed from cache. class SchemaCache { public: explicit SchemaCache(size_t max_elements_); struct Key { String source; String format; String additional_format_info; String schema_inference_mode; bool operator==(const Key & other) const { return source == other.source && format == other.format && additional_format_info == other.additional_format_info && schema_inference_mode == other.schema_inference_mode; } }; using Keys = std::vector; struct KeyHash { size_t operator()(const Key & key) const { return std::hash()(key.source + key.format + key.additional_format_info + key.schema_inference_mode); } }; struct SchemaInfo { std::optional columns; std::optional num_rows; time_t registration_time; }; using LastModificationTimeGetter = std::function()>; /// Add new key or update existing with a schema void addColumns(const Key & key, const ColumnsDescription & columns); /// Add/update many keys with the same schema (usually used for globs) void addManyColumns(const Keys & keys, const ColumnsDescription & columns); /// Add new key or update existing with number of rows void addNumRows(const Key & key, size_t num_rows); std::optional tryGetColumns(const Key & key, LastModificationTimeGetter get_last_mod_time = {}); std::optional tryGetNumRows(const Key & key, LastModificationTimeGetter get_last_mod_time = {}); void clear(); std::unordered_map getAll(); private: void addUnlocked(const Key & key, const std::optional & columns, std::optional num_rows); std::optional tryGetImpl(const Key & key, LastModificationTimeGetter get_last_mod_time); void checkOverflow(); using Queue = std::list; using QueueIterator = Queue::iterator; struct Cell { SchemaInfo schema_info; QueueIterator iterator; }; Queue queue; std::unordered_map data; size_t max_elements; std::mutex mutex; }; }