#pragma once #include #include #include #include #include namespace DB { class ReadBuffer; /** Stream for reading data in TSKV format. * TSKV is a very inefficient data format. * Similar to TSV, but each field is written as key=value. * Fields can be listed in any order (including, in different lines there may be different order), * and some fields may be missing. * An equal sign can be escaped in the field name. * Also, as an additional element there may be a useless tskv fragment - it needs to be ignored. */ class TSKVRowInputFormat final : public IRowInputFormat { public: TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_); String getName() const override { return "TSKVRowInputFormat"; } void resetParser() override; private: void readPrefix() override; bool readRow(MutableColumns & columns, RowReadExtension &) override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; bool supportsCountRows() const override { return true; } size_t countRows(size_t max_block_size) override; bool supportsCustomSerializations() const override { return true; } const FormatSettings format_settings; /// Buffer for the read from the stream the field name. Used when you have to copy it. String name_buf; /// Hash table matching `field name -> position in the block`. NOTE You can use perfect hash map. using NameMap = HashMap; NameMap name_map; /// Set of columns for which the values were read. The rest will be filled with default values. std::vector read_columns; /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. std::vector seen_columns; /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true /// for row like ..., non-nullable column name=\N, ... }; class TSKVSchemaReader : public IRowWithNamesSchemaReader { public: TSKVSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); private: NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override; bool first_row = true; }; }