#pragma once #include #include #include #include #include #include #include #include #include namespace DB { class ReadBuffer; struct NumpyHeader { std::vector shape; std::shared_ptr numpy_type; }; class NpyRowInputFormat final : public IRowInputFormat { public: NpyRowInputFormat(ReadBuffer & in_, Block header_, Params params_); String getName() const override { return "NpyRowInputFormat"; } private: bool supportsCountRows() const override { return true; } size_t countRows(size_t max_block_size) override; void readPrefix() override; bool readRow(MutableColumns & columns, RowReadExtension &) override; void readData(MutableColumns & columns); template void readAndInsertInteger(IColumn * column, const DataTypePtr & data_type, const NumpyDataType & npy_type); template void readAndInsertFloat(IColumn * column, const DataTypePtr & data_type, const NumpyDataType & npy_type); template void readAndInsertString(MutableColumnPtr column, const DataTypePtr & data_type, const NumpyDataType & npy_type, bool is_fixed); template void readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness); template void readBinaryValueAndInsertFloat16(MutableColumnPtr column, NumpyDataType::Endianness endianness); void readRows(MutableColumns & columns); void readValue(IColumn * column); DataTypePtr nested_type; NumpyHeader header; size_t counted_rows = 0; }; class NpySchemaReader : public ISchemaReader { public: explicit NpySchemaReader(ReadBuffer & in_); private: std::optional readNumberOrRows() override; NamesAndTypesList readSchema() override; NumpyHeader header; }; }