#include "get_lemmas.h"

#include <kernel/lemmer/core/language.h>

#include <util/generic/hash_set.h>
#include <util/generic/string.h>
#include <util/string/split.h>

namespace {
    const TUtf16String SPACE = u" ";

    template <typename T>
    T GetLemmasTemplate(TUtf16String& inputText, const TLangMask& langMask, T& result){
        const auto& tokens = StringSplitter(std::move(inputText)).SplitByString(SPACE.c_str()).SkipEmpty();

        auto it = std::inserter(result, result.end());

        for (const auto& iterState: tokens) {
            const TWtringBuf token(iterState);
            TWLemmaArray lemmas;
            NLemmer::AnalyzeWord(token.data(), token.length(), lemmas, langMask);
            THashSet<TUtf16String> unique_lemmas;

            for (const auto& lemma : lemmas) {
                unique_lemmas.insert(lemma.GetText());
            }

            for (const auto& u_lemma: unique_lemmas)
                it = u_lemma;
        }
        return result;
    }
}

THashSet<TUtf16String> NCrypta::GetLemmas(TUtf16String&& inputText, const TLangMask& langMask) {
    THashSet<TUtf16String> result;
    return GetLemmasTemplate(inputText, langMask, result);
}

TVector<TUtf16String> NCrypta::GetNotUniqueLemmas(TUtf16String&& inputText, const TLangMask& langMask) {
    TVector<TUtf16String> result;
    return GetLemmasTemplate(inputText, langMask, result);
}
