#pragma once
#include "config.h"
#if USE_NLP
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace DB
{
/// FrequencyHolder class is responsible for storing and loading dictionaries
/// needed for text classification functions:
///
/// 1. detectLanguageUnknown
/// 2. detectCharset
/// 3. detectTonality
/// 4. detectProgrammingLanguage
class FrequencyHolder
{
public:
struct Language
{
String name;
HashMap map;
};
struct Encoding
{
String name;
String lang;
HashMap map;
};
using Map = HashMap;
using Container = std::vector;
using EncodingMap = HashMap;
using EncodingContainer = std::vector;
static FrequencyHolder & getInstance();
const Map & getEmotionalDict() const
{
return emotional_dict;
}
const EncodingContainer & getEncodingsFrequency() const
{
return encodings_freq;
}
const Container & getProgrammingFrequency() const
{
return programming_freq;
}
private:
FrequencyHolder();
void loadEncodingsFrequency();
void loadEmotionalDict();
void loadProgrammingFrequency();
Arena string_pool;
Map emotional_dict;
Container programming_freq;
EncodingContainer encodings_freq;
};
}
#endif