#include <jni.h>

#include <kernel/lemmer/core/language.h>

#include <util/generic/hash_set.h>
#include <util/generic/string.h>
#include <util/system/defaults.h>

static NLemmer::TAnalyzeWordOpt ConstructAnalyzeOpt() {
    NLemmer::TAnalyzeWordOpt analyzeOpt =
        NLemmer::TAnalyzeWordOpt::IndexerOpt();
    analyzeOpt.ReturnFoundlingAnyway = false;
    analyzeOpt.AcceptDictionary |= TLangMask(LANG_TUR);
    return analyzeOpt;
}

static const NLemmer::TAnalyzeWordOpt AnalyzeOpt = ConstructAnalyzeOpt();

static const TLangMask LanguagesMask(LI_ALL_LANGUAGES);

extern "C"
__attribute__((__visibility__("default")))
JNIEXPORT jobjectArray JNICALL
    Java_ru_yandex_lemmer_Lemmer_analyzeWord
    (JNIEnv* env, jclass, jcharArray buf, jint len)
{
    jchar* wordChars = env->GetCharArrayElements(buf, 0);
    TWLemmaArray out;
    size_t count = NLemmer::AnalyzeWord(
        TWideToken(reinterpret_cast<const wchar16*>(wordChars), len),
        out,
        LanguagesMask,
        NULL,
        AnalyzeOpt);
    env->ReleaseCharArrayElements(buf, wordChars, JNI_ABORT);

    if (!count) {
        return NULL;
    }

    typedef THashSet<TUtf16String> TLemmas;
    TLemmas lemmas(count);
    for (size_t i = 0; i < count; ++i) {
        ui32 quality = out[i].GetQuality();
        if (quality == TYandexLemma::QDictionary ||
            quality == TYandexLemma::QBastard)
        {
            lemmas.insert(out[i].GetText());
        }
    }
    if (lemmas.empty()) {
        return NULL;
    }

    jobjectArray result = env->NewObjectArray(lemmas.size(),
        env->FindClass("java/lang/String"), NULL);
    if (!result) {
        return NULL;
    }

    jsize pos = 0;
    for (TLemmas::const_iterator iter = lemmas.begin(), end = lemmas.end();
        iter != end; ++iter, ++pos)
    {
        env->SetObjectArrayElement(
            result,
            pos,
            env->NewString(
                reinterpret_cast<const jchar *>(iter->c_str()),
                iter->size()));
    }
    return result;
}

