#pragma once

#include <util/generic/deque.h>
#include <util/generic/hash_set.h>
#include <util/generic/set.h>
#include <util/generic/string.h>
#include <util/string/join.h>

#include <kernel/lemmer/core/language.h>
#include <kernel/lemmer/tools/handler.h>
#include <library/cpp/tokenizer/tokenizer.h>

namespace NWebmaster {

template<class TTag>
struct TSearcher {
    Y_SAVELOAD_DEFINE(Rules, MaxNGram)

    using TKey = std::pair<TString, int>;
    using TSequence = TDeque<TSet<TString>>;
    using TRule = std::pair<TString, TTag>;
    using TRules = TSet<TRule>;

    struct TTokenHandler : public NLemmer::ILemmerResultsHandler {
        TTokenHandler(TSequence &tokenSequence)
            : TokenSequence(tokenSequence)
        {
        }

        bool OnLemmerResults(const TWideToken& token, NLP_TYPE, const NLemmer::TLemmerResults* results) override {
            //const TUtf16String tokenStr(token.Token, token.Leng);
            Y_UNUSED(token);

            TSet<TString> tokensSet;
            if (results) {
                for (const auto& result : *results) {
                    tokensSet.insert(WideToUTF8(result.first.GetText()));
                }
            }
            if (!tokensSet.empty()) {
                TokenSequence.push_back(tokensSet);
            }
            return true;
        }

        void Flush() override {}

    public:
        TSequence &TokenSequence;
    };

    template<class T>
    static TKey GetKey(const T &begin, const T &end, size_t parts) {
        return TKey(JoinSeq("+", TSet<TString>(begin, end)), parts);
    }

    template<class T>
    static TKey GetKey(const T &sequence) {
        return GetKey(sequence.begin(), sequence.end(), sequence.size());
    }

    static void GetTokenSequence(const TString &text, TSequence &tokenSequence) {
        const static auto params = NLemmer::TLemmerHandler::TParams(TLangMask(LANG_RUS, LANG_ENG));
        TTokenHandler handler(tokenSequence);
        NLemmer::TLemmerHandler lemmer(params, handler);
        TNlpTokenizer tokenizer(lemmer);
        TUtf16String wText = UTF8ToWide(text);
        tokenizer.Tokenize(wText.c_str(), wText.size());
        lemmer.Flush();
    }

    static void GetKeysFromSequence0(const TSequence::iterator &begin, const TSequence::iterator &end, size_t i, TDeque<TString> &variants, TDeque<TKey> &keys) {
        if (begin == end) {
            return;
        }

        for (const auto &token : *begin) {
            variants[i] = token;
            if (std::next(begin) == end) {
                const TKey key = GetKey(variants);
                keys.emplace_back(key);
            }
            GetKeysFromSequence0(std::next(begin), end, i + 1, variants, keys);
        }
    }

    static void GetKeysFromSequence(const TSequence::iterator &begin, const TSequence::iterator &end, TDeque<TKey> &keys) {
        TDeque<TString> variants;
        variants.resize(std::distance(begin, end));
        GetKeysFromSequence0(begin, end, 0, variants, keys);
    }

    void AddRule(const TString &rule, const TTag &tag, TDeque<TKey> &keys) {
        TSequence tokenSequence;
        GetTokenSequence(rule, tokenSequence);
        GetKeysFromSequence(tokenSequence.begin(), tokenSequence.end(), keys);
        for (const auto &key : keys) {
            MaxNGram = Max<int>(MaxNGram, key.second);
            Rules[key.first].insert(TRule(rule, tag));
        }
    }

    void AddRule(const TString &rule, const TTag &tag) {
        TDeque<TKey> keys;
        AddRule(rule, tag, keys);
    }

    void FindRule(const TString &key, TRules &foundRules) const {
        if (Rules.contains(key)) {
            const auto &found = Rules.at(key);
            foundRules.insert(found.begin(), found.end());
        }
    }

    void Search(const TString &text, TRules &hits) const {
        TSequence tokenSequence;
        GetTokenSequence(text, tokenSequence);

        for (int i = 0; i < (int)tokenSequence.size() - MaxNGram + 1; i++) {
            for (int j = 1; j < MaxNGram + 1; j++) {
                auto begin = tokenSequence.begin() + i;
                auto end = tokenSequence.begin() + i + j;
                TDeque<TKey> keys;
                GetKeysFromSequence(begin, end, keys);
                for (const auto &key : keys) {
                    FindRule(key.first, hits);
                }
            }
        }
    }

public:
    THashMap<TString, TRules> Rules;
    int MaxNGram = 0;
};

} //namespace NWebmaster
