#pragma once #include namespace DB { namespace ErrorCodes { extern const int NO_AVAILABLE_DATA; extern const int INCORRECT_DATA; extern const int TOO_LARGE_ARRAY_SIZE; } } /** Replacement of the hash table for a small number (<10) of keys. * Implemented as an array with linear search. * The array is located inside the object. * The interface is a subset of the HashTable interface. * * Insert is possible only if the `full` method returns false. * With an unknown number of different keys, * you should check if the table is not full, * and do a `fallback` in this case (for example, use a real hash table). */ template < typename Key, typename Cell, size_t capacity > class SmallTable : private boost::noncopyable, protected Cell::State { protected: friend class const_iterator; friend class iterator; friend class Reader; using Self = SmallTable; size_t m_size = 0; /// Amount of elements. Cell buf[capacity]; /// A piece of memory for all elements. /// Find a cell with the same key or an empty cell, starting from the specified position and then by the collision resolution chain. const Cell * ALWAYS_INLINE findCell(const Key & x) const { const Cell * it = buf; while (it < buf + m_size) { if (it->keyEquals(x)) break; ++it; } return it; } Cell * ALWAYS_INLINE findCell(const Key & x) { Cell * it = buf; while (it < buf + m_size) { if (it->keyEquals(x)) break; ++it; } return it; } public: using key_type = Key; using mapped_type = typename Cell::mapped_type; using value_type = typename Cell::value_type; class Reader final : private Cell::State { public: explicit Reader(DB::ReadBuffer & in_) : in(in_) { } Reader(const Reader &) = delete; Reader & operator=(const Reader &) = delete; bool next() { if (!is_initialized) { Cell::State::read(in); DB::readVarUInt(size, in); if (size > capacity) throw DB::Exception(DB::ErrorCodes::INCORRECT_DATA, "Illegal size"); is_initialized = true; } if (read_count == size) { is_eof = true; return false; } cell.read(in); ++read_count; return true; } const value_type & get() const { if (!is_initialized || is_eof) throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data"); return cell.getValue(); } private: DB::ReadBuffer & in; Cell cell; size_t read_count = 0; size_t size = 0; bool is_eof = false; bool is_initialized = false; }; class iterator /// NOLINT { Self * container = nullptr; Cell * ptr = nullptr; friend class SmallTable; public: iterator() {} /// NOLINT iterator(Self * container_, Cell * ptr_) : container(container_), ptr(ptr_) {} bool operator== (const iterator & rhs) const { return ptr == rhs.ptr; } bool operator!= (const iterator & rhs) const { return ptr != rhs.ptr; } iterator & operator++() { ++ptr; return *this; } Cell & operator* () const { return *ptr; } Cell * operator->() const { return ptr; } Cell * getPtr() const { return ptr; } }; class const_iterator /// NOLINT { const Self * container = nullptr; const Cell * ptr = nullptr; friend class SmallTable; public: const_iterator() = default; const_iterator(const Self * container_, const Cell * ptr_) : container(container_), ptr(ptr_) {} /// NOLINT const_iterator(const iterator & rhs) : container(rhs.container), ptr(rhs.ptr) {} /// NOLINT bool operator== (const const_iterator & rhs) const { return ptr == rhs.ptr; } bool operator!= (const const_iterator & rhs) const { return ptr != rhs.ptr; } const_iterator & operator++() { ++ptr; return *this; } const Cell & operator* () const { return *ptr; } const Cell * operator->() const { return ptr; } const Cell * getPtr() const { return ptr; } }; const_iterator begin() const { return iteratorTo(buf); } iterator begin() { return iteratorTo(buf); } const_iterator end() const { return iteratorTo(buf + m_size); } iterator end() { return iteratorTo(buf + m_size); } protected: const_iterator iteratorTo(const Cell * ptr) const { return const_iterator(this, ptr); } iterator iteratorTo(Cell * ptr) { return iterator(this, ptr); } public: /** The table is full. * You can not insert anything into the full table. */ bool full() { return m_size == capacity; } /// Insert the value. In the case of any more complex values, it is better to use the `emplace` function. std::pair ALWAYS_INLINE insert(const value_type & x) { std::pair res; emplace(Cell::getKey(x), res.first, res.second); if (res.second) res.first.ptr->setMapped(x); return res; } /** Insert the key, * return an iterator to a position that can be used for `placement new` of value, * as well as the flag - whether a new key was inserted. * * You have to make `placement new` of value if you inserted a new key, * since when destroying a hash table, a destructor will be called for it! * * Example usage: * * Map::iterator it; * bool inserted; * map.emplace(key, it, inserted); * if (inserted) * new(&it->second) Mapped(value); */ void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted) { Cell * res = findCell(x); it = iteratorTo(res); inserted = res == buf + m_size; if (inserted) { new(res) Cell(x, *this); ++m_size; } } iterator ALWAYS_INLINE find(Key x) { return iteratorTo(findCell(x)); } const_iterator ALWAYS_INLINE find(Key x) const { return iteratorTo(findCell(x)); } void write(DB::WriteBuffer & wb) const { Cell::State::write(wb); DB::writeVarUInt(m_size, wb); for (size_t i = 0; i < m_size; ++i) buf[i].write(wb); } void writeText(DB::WriteBuffer & wb) const { Cell::State::writeText(wb); DB::writeText(m_size, wb); for (size_t i = 0; i < m_size; ++i) { DB::writeChar(',', wb); buf[i].writeText(wb); } } void read(DB::ReadBuffer & rb) { Cell::State::read(rb); m_size = 0; size_t new_size = 0; DB::readVarUInt(new_size, rb); if (new_size > 1000'000) throw DB::Exception(DB::ErrorCodes::TOO_LARGE_ARRAY_SIZE, "The size of serialized small table is suspiciously large: {}", new_size); if (new_size > capacity) throw DB::Exception(DB::ErrorCodes::INCORRECT_DATA, "Illegal size"); for (size_t i = 0; i < new_size; ++i) buf[i].read(rb); m_size = new_size; } void readText(DB::ReadBuffer & rb) { Cell::State::readText(rb); m_size = 0; size_t new_size = 0; DB::readText(new_size, rb); if (new_size > capacity) throw DB::Exception(DB::ErrorCodes::INCORRECT_DATA, "Illegal size"); for (size_t i = 0; i < new_size; ++i) { DB::assertChar(',', rb); buf[i].readText(rb); } m_size = new_size; } size_t size() const { return m_size; } bool empty() const { return 0 == m_size; } void clear() { if (!std::is_trivially_destructible_v) for (iterator it = begin(); it != end(); ++it) it.ptr->~Cell(); m_size = 0; } size_t getBufferSizeInBytes() const { return sizeof(buf); } }; struct HashUnused {}; template < typename Key, size_t capacity > using SmallSet = SmallTable, capacity>;