#pragma once

#include <irt/common/lib/language/language.h>

#include <map>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>


typedef std::unordered_set<std::string> WordSet;

class NormDict {
public:
    NormDict() {}
    ~NormDict() {}

    // загрузка словарей
    bool            LoadConfig(const char *filename);
    bool            LoadNorm(const char *filename);
    bool            LoadWordCount(const char *filename);
    bool            LoadStops(const char *filename, const char *lang);
    bool            LoadSynCells(const char *filename, const char *lang);
    bool            LoadWordSet(const char *filename, const char *lang, WordSet& words);

    size_t          NumLanguages() const    { return languages.size(); }
    size_t          NumWords() const        { return word2norm.size(); }

    // нормализация
    const char*     WordToNorm(const char *word, const char *lang) const;
    void            GetNormWords(const char *text, const char *lang, std::vector<std::string>&) const;
    unsigned        GetNormCount(const char *word) const;
    std::string     Normalize(const char *text, const char *lang, bool uniq=false) const;
    std::string     Snormalize(const char *text, const char *lang) const;
    bool            IsNormBad(const char *text) const { return bad_words.find(text) != bad_words.end(); }

private:

    std::map<std::string, NIRT::TLanguage>     languages;
    std::unordered_map<std::string, std::string>  word2norm;
    std::unordered_map<std::string, unsigned>  norm2count;
    WordSet bad_words;
};
