#pragma once #include "config.h" #if USE_NLP #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { /// FrequencyHolder class is responsible for storing and loading dictionaries /// needed for text classification functions: /// /// 1. detectLanguageUnknown /// 2. detectCharset /// 3. detectTonality /// 4. detectProgrammingLanguage class FrequencyHolder { public: struct Language { String name; HashMap map; }; struct Encoding { String name; String lang; HashMap map; }; using Map = HashMap; using Container = std::vector; using EncodingMap = HashMap; using EncodingContainer = std::vector; static FrequencyHolder & getInstance(); const Map & getEmotionalDict() const { return emotional_dict; } const EncodingContainer & getEncodingsFrequency() const { return encodings_freq; } const Container & getProgrammingFrequency() const { return programming_freq; } private: FrequencyHolder(); void loadEncodingsFrequency(); void loadEmotionalDict(); void loadProgrammingFrequency(); Arena string_pool; Map emotional_dict; Container programming_freq; EncodingContainer encodings_freq; }; } #endif