// Python defines ssize_t; the line below disables our definition of the same
#define HAVE_SSIZE_T 1

#include "lemma_info.h"
#include "python_wrappers.h"

#include <util/generic/singleton.h>
#include <kernel/lemmer/dictlib/grammar_index.h>
#include <kernel/lemmer/core/lemmeraux.h>

// using namespace NPython;

// export functions definitions.

TLemmaInfo* NewLemmaInfo() {
    TLemmaInfo* self = nullptr;
    try {
        self = PyObject_New(TLemmaInfo, &TLemmaInfo::Type);
        if (self == nullptr) {
            ythrow yexception() << "failed to create TLemmaInfo";
        }
        self->Initialize();
        return self;
    } catch (const yexception& exc) {
        if (self != nullptr) {
            PyObject_Del(self);
        }
        NPython::Raise(exc);
        return nullptr;
    }
}

void DeleteLemmaInfo(PyObject* arg) {
    try {
        TLemmaInfo* self = reinterpret_cast<TLemmaInfo*>(arg);
        if (self == nullptr) {
            return;
        }
        self->Deinitialize();
        PyObject_Del(arg);
    } catch (const yexception& exc) {
        NPython::Raise(exc);
    }
}

static PyObject* FlagCheck(TLemmaInfo* lemmaInfo, TCharCategory flag) {
    try {
        if (lemmaInfo == nullptr) {
            ythrow yexception() << "lemma is absent";
        }
        return ((lemmaInfo->Flags & flag) ? NPython::True() : NPython::False()).Release();
    } catch (const yexception& exc) {
        NPython::Raise(exc);
        return nullptr;
    }
}

namespace NPython {
    static PyObject* IsAlpha(TLemmaInfo* self) {
        return FlagCheck(self, CC_ALPHA);
    }

    static PyObject* IsNMToken(TLemmaInfo* self) {
        return FlagCheck(self, CC_NMTOKEN);
    }

    static PyObject* IsNumberX(TLemmaInfo* self) {
        return FlagCheck(self, CC_NUMBER);
    }

    static PyObject* IsNUToken(TLemmaInfo* self) {
        return FlagCheck(self, CC_NUTOKEN);
    }

    static PyObject* IsAscii(TLemmaInfo* self) {
        return FlagCheck(self, CC_ASCII);
    }

    static PyObject* IsNonAscii(TLemmaInfo* self) {
        return FlagCheck(self, CC_NONASCII);
    }

    static PyObject* IsTitleCase(TLemmaInfo* self) {
        return FlagCheck(self, CC_TITLECASE);
    }

    static PyObject* IsUpperCase(TLemmaInfo* self) {
        return FlagCheck(self, CC_UPPERCASE);
    }

    static PyObject* IsLowerCase(TLemmaInfo* self) {
        return FlagCheck(self, CC_LOWERCASE);
    }

    static PyObject* IsMixedCase(TLemmaInfo* self) {
        return FlagCheck(self, CC_MIXEDCASE);
    }

    static PyObject* IsCompound(TLemmaInfo* self) {
        return FlagCheck(self, CC_COMPOUND);
    }

    static PyObject* GenerateInt(TLemmaInfo* self) {
        try {
            return self->Generate();
        } catch (const yexception& exc) {
            PyErr_SetString(PyExc_Exception, exc.what());
            return nullptr;
        }
    }
}; // namespace NPython

// TLemmaInfo implementation.

void TLemmaInfo::Initialize() {
    Lemmata = nullptr;
    LemmaPtr = nullptr;

    Lemma = nullptr;
    Language = nullptr;
    Form = nullptr;
    LexicalFeature = nullptr;
    FormFeature = nullptr;
    Flags = 0;
    RuleId = 0;
    Bastardness = 0;
    Weight = 0;
}

void TLemmaInfo::Deinitialize() {
    delete Lemmata;

    NPython::SafeDecRefCount(Lemma);
    NPython::SafeDecRefCount(Language);
    NPython::SafeDecRefCount(Form);
    NPython::SafeDecRefCount(LexicalFeature);
    NPython::SafeDecRefCount(FormFeature);
}

const char* TLemmaInfo::ClassName = "LemmaInfo";

PyDoc_STRVAR(DocStr_IsAlpha, "Check if lemma contains only alphabetic symbols.");
PyDoc_STRVAR(DocStr_IsNMToken, "Check if lemma contains only alpha-numeric symbols and and not starts with digit.");
PyDoc_STRVAR(DocStr_IsNumber, "Check if lemma contains only numbers.");
PyDoc_STRVAR(DocStr_IsNUToken, "Check if lemma contains only alpha-numeric symbols and and starts with digit.");
PyDoc_STRVAR(DocStr_IsAscii, "Check if lemma contains only ASCII symbols (characters with code < 128).");
PyDoc_STRVAR(DocStr_IsNonAscii, "Check if lemma contains only non-ASCII symbols (characters with code >= 128).");
PyDoc_STRVAR(DocStr_IsTitleCase, "Check if lemma is titlecased.");
PyDoc_STRVAR(DocStr_IsUpperCase, "Check if lemma contains only uppercase symbols.");
PyDoc_STRVAR(DocStr_IsLowerCase, "Check if lemma contains only lowercase symbols.");
PyDoc_STRVAR(DocStr_IsMixedCase, "Check if lemma is in mixed cases.");
PyDoc_STRVAR(DocStr_IsCompound, "Check if lemma is compound.");

#define METHOD_WITH_NO_ARGS(name, method, docstr) \
    {name, reinterpret_cast<PyCFunction>(method), METH_NOARGS, docstr}

PyMethodDef TLemmaInfo::Methods[] = {
    METHOD_WITH_NO_ARGS("IsAlpha", NPython::IsAlpha, DocStr_IsAlpha),
    METHOD_WITH_NO_ARGS("IsNMToken", NPython::IsNMToken, DocStr_IsNMToken),
    METHOD_WITH_NO_ARGS("IsNumber", NPython::IsNumberX, DocStr_IsNumber),
    METHOD_WITH_NO_ARGS("IsNUToken", NPython::IsNUToken, DocStr_IsNUToken),
    METHOD_WITH_NO_ARGS("IsAscii", NPython::IsAscii, DocStr_IsAscii),
    METHOD_WITH_NO_ARGS("IsNonAscii", NPython::IsNonAscii, DocStr_IsNonAscii),
    METHOD_WITH_NO_ARGS("IsTitleCase", NPython::IsTitleCase, DocStr_IsTitleCase),
    METHOD_WITH_NO_ARGS("IsUpperCase", NPython::IsUpperCase, DocStr_IsUpperCase),
    METHOD_WITH_NO_ARGS("IsLowerCase", NPython::IsLowerCase, DocStr_IsLowerCase),
    METHOD_WITH_NO_ARGS("IsMixedCase", NPython::IsMixedCase, DocStr_IsMixedCase),
    METHOD_WITH_NO_ARGS("IsCompound", NPython::IsCompound, DocStr_IsCompound),
    METHOD_WITH_NO_ARGS("Generate", NPython::GenerateInt, nullptr),
    {nullptr, nullptr, 0, nullptr}
};

#undef METHOD_WITH_NO_ARGS

PyDoc_STRVAR(DocStr_Lemma, "Lemma.");
PyDoc_STRVAR(DocStr_Language, "Language of lemma.");
PyDoc_STRVAR(DocStr_Form, "Form of lemma.");
PyDoc_STRVAR(DocStr_LexicalFeature, "Lexical feature of lemma.");
PyDoc_STRVAR(DocStr_FormFeature, "Form feature of lemma.");
PyDoc_STRVAR(DocStr_Bastardness, "Bastardness of lemma.");
PyDoc_STRVAR(DocStr_Weight, "Lemma weight.");
PyDoc_STRVAR(DocStr_RuleId, "Rule ID of lemma.");

PyMemberDef TLemmaInfo::Members[] = {
    {(char *)"Lemma", T_OBJECT_EX, offsetof(TLemmaInfo, Lemma), READONLY, DocStr_Lemma},
    {(char *)"Language", T_OBJECT_EX, offsetof(TLemmaInfo, Language), READONLY, DocStr_Language},
    {(char *)"Form", T_OBJECT_EX, offsetof(TLemmaInfo, Form), READONLY, DocStr_Form},
    {(char *)"LexicalFeature", T_OBJECT_EX, offsetof(TLemmaInfo, LexicalFeature), READONLY, DocStr_LexicalFeature},
    {(char *)"FormFeature", T_OBJECT_EX, offsetof(TLemmaInfo, FormFeature), READONLY, DocStr_FormFeature},
    {(char *)"Bastardness", T_INT, offsetof(TLemmaInfo, Bastardness), READONLY, DocStr_Bastardness},
    {(char *)"First", T_INT, offsetof(TLemmaInfo, First), READONLY, nullptr},
    {(char *)"Last", T_INT, offsetof(TLemmaInfo, Last), READONLY, nullptr},
    {(char *)"FlexLen", T_INT, offsetof(TLemmaInfo, FlexLen), READONLY, nullptr},
    {(char *)"Weight", T_DOUBLE, offsetof(TLemmaInfo, Weight), READONLY, DocStr_Weight},
    {(char *)"RuleId", T_INT, offsetof(TLemmaInfo, RuleId), READONLY, DocStr_RuleId},
    {nullptr, 0, 0, 0, nullptr}
};

PyTypeObject TLemmaInfo::Type = {
    PyVarObject_HEAD_INIT(nullptr, 0)
    MODULE_NAME_STR".LemmaInfo",/*tp_name*/
    sizeof(TLemmaInfo),         /*tp_basicsize*/
    0,                          /*tp_itemsize*/
    DeleteLemmaInfo,            /*tp_dealloc*/
#if PY_VERSION_HEX < 0x030800b4
    nullptr,                          /*tp_print*/
#endif
#if PY_VERSION_HEX >= 0x030800b4
    0,                                /*tp_vectorcall_offset*/
#endif
    nullptr,                          /*tp_getattr*/
    nullptr,                          /*tp_setattr*/
    nullptr,                          /*tp_compare*/
    nullptr,                          /*tp_repr*/
    nullptr,                          /*tp_as_number*/
    nullptr,                          /*tp_as_sequence*/
    nullptr,                          /*tp_as_mapping*/
    nullptr,                          /*tp_hash */
    nullptr,                          /*tp_call*/
    nullptr,                          /*tp_str*/
    nullptr,                          /*tp_getattro*/
    nullptr,                          /*tp_setattro*/
    nullptr,                          /*tp_as_buffer*/
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
    "LemmaInfo class: contains an information about lemma.", /* tp_doc */
    nullptr,                            /* tp_traverse */
    nullptr,                            /* tp_clear */
    nullptr,                          /* tp_richcompare */
    0,                            /* tp_weaklistoffset */
    nullptr,                            /* tp_iter */
    nullptr,                            /* tp_iternext */
    TLemmaInfo::Methods,        /* tp_methods */
    TLemmaInfo::Members,        /* tp_members */
    nullptr, nullptr, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr  // ... and others
#if PY_MAJOR_VERSION > 1 && (PY_MAJOR_VERSION > 2 || PY_MINOR_VERSION > 5)
    ,0  // tp_version_tag
#endif
#if PY_MAJOR_VERSION >= 3
    ,0  // tp_finalize
#endif
#if PY_VERSION_HEX >= 0x030800b1
    ,nullptr /*tp_vectorcall*/
#endif
#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
    ,nullptr /*tp_print*/
#endif
};

// Auxiliary functions: inner structures -> Python objects conversions.

class TGrammarMap {
private:
    typedef TVector<NPython::TPyObjPtr> TData;
    TData Data;

public:
    TGrammarMap() {
        int top = gMax;
        Data.resize(top + 1);

        for (int i = gBefore; i < top; ++i) {
            const char* gName = TGrammarIndex::GetLatinName(static_cast<EGrammar>(i));
            if (gName == nullptr || *gName == 0) {
                continue;
            }
            TData::value_type val = NPython::PythonString(gName);
            Data.at(i) = val;
        }
    }

    const NPython::TPyObjPtr& Get(EGrammar key) {
        if (key < 0 || static_cast<size_t>(key) >= Data.size())
            return Default<NPython::TPyObjPtr>();

        return Data[key];
    }
};

static NPython::TPyObjPtr PyListFromGrammar(const char* grammar) {
    using NTGrammarProcessing::ch2tg;

    NPython::TPyObjPtr itemsList(NPython::List());
    if (grammar == nullptr) {
        return itemsList;
    }

    for (size_t i = 0; grammar[i]; ++i) {
        const NPython::TPyObjPtr& item = Singleton<TGrammarMap>()->Get(ch2tg(grammar[i]));
        if (item.Get() == nullptr)
            continue;
        ListAppend(itemsList, item);
    }

    return itemsList;
}

namespace {
class TLanguagesMap {
private:
    typedef TVector<NPython::TPyObjPtr> TData;
    TData Data;

public:
    TLanguagesMap() {
        int top = LANG_MAX;
        Data.resize(top + 1);

        for (int i = LANG_UNK; i < top; ++i) {
            const TLanguage* lang = NLemmer::GetLanguageById(static_cast<ELanguage>(i));
            if (lang == nullptr)
                continue;

            TData::value_type val = NPython::PythonString(lang->Code());
            Data.at(i) = val;
        }
    }

    const NPython::TPyObjPtr& Get(ELanguage key) {
        if (key < 0 || static_cast<size_t>(key) >= Data.size())
            return Default<NPython::TPyObjPtr>();

        return Data[key];
    }
};
} // namespace

static NPython::TPyObjPtr PyLanguage(ELanguage id) {
    return Singleton<TLanguagesMap>()->Get(id);
}

typedef NPython::TPythonPointer<TLemmaInfo> TLemmaInfoPtr;


PyObject* TLemmaInfo::Generate() const {
    try{
        NPython::TPyObjPtr result(NPython::List());

        if (LemmaPtr == nullptr) {
            ythrow yexception() << "lemma is absent";
        }

        TWordformArray forms;
        TString langStr = NPython::CppString(Language);
        const TLanguage* language = NLemmer::GetLanguageByName(langStr.c_str());
        if (language == nullptr) {
            ythrow yexception() << "bad language name: " << langStr;
        }

        NLemmer::Generate(*LemmaPtr, forms);

        for (TWordformArray::const_iterator i = forms.begin(), mi = forms.end(); i != mi; ++i) {
            NPython::TPyObjPtr gramm(NPython::List());
            for (size_t j = 0, mj = i->FlexGramNum(); j < mj; ++j) {
                NPython::TPyObjPtr unicode(NPython::PythonString(sprint_grammar(i->GetFlexGram()[j], 0, false).c_str()));
                ListAppend(gramm, unicode);
            }
            NPython::TPyObjPtr temp(NPython::Tuple(2));
            SetTupleItem(temp, 0, NPython::PythonString(i->GetText()));
            SetTupleItem(temp, 1, gramm);

            ListAppend(result, temp);
        }

        return result.Release();
    } catch (const yexception& exc) {
        NPython::Raise(exc);
        return nullptr;
    }
}

NPython::TPyObjPtr TLemmaInfo::WrapLemma(const TLemmaInfo::TLemmas& lemmata, const TYandexLemma& lemma) {
    TLemmaInfoPtr lemmaInfo(NewLemmaInfo());
    if (lemmaInfo.Get() == nullptr) {
        ythrow yexception() << "bad alloc";
    }

    lemmaInfo->Lemmata = new TLemmaInfo::TLemmas(lemmata);
    lemmaInfo->LemmaPtr = &lemma;

    lemmaInfo->RuleId = NLemmerAux::GetRuleId(lemma);
    lemmaInfo->Lemma = NPython::PythonString(lemma.GetText()).Release();
    lemmaInfo->Language = PyLanguage(lemma.GetLanguage()).Release();
    lemmaInfo->Form = NPython::PythonString(lemma.GetNormalizedForm()).Release();
    lemmaInfo->LexicalFeature = PyListFromGrammar(lemma.GetStemGram()).Release();

    const size_t flexGramCount = lemma.FlexGramNum();
    NPython::TPyObjPtr forms = NPython::Tuple(flexGramCount);
    if (flexGramCount > 0) {
        const char* const* flexGram = lemma.GetFlexGram();
        for (size_t k = 0; k != flexGramCount; ++k) {
            NPython::TPyObjPtr flex = PyListFromGrammar(flexGram[k]);
            SetTupleItem(forms, k, flex);
        }
    }

    lemmaInfo->FormFeature = forms.Release();

    lemmaInfo->Flags = lemma.GetCaseFlags();
    lemmaInfo->Bastardness = lemma.GetQuality();
    lemmaInfo->First = lemma.GetTokenPos();
    lemmaInfo->Last = lemmaInfo->First + lemma.GetTokenSpan();
    lemmaInfo->FlexLen = lemma.GetFlexLen();
    lemmaInfo->Weight = lemma.GetWeight();

    PyObject* objPtr = reinterpret_cast<PyObject*>(lemmaInfo.Release());
    return NPython::TPyObjPtr(objPtr);
}

NPython::TPyObjPtr TLemmaInfo::Wrap(const TLemmaInfo::TLemmas& lemmata) {
    NPython::TPyObjPtr lemmas(NPython::List());
    for (TWLemmaArray::const_iterator lem = lemmata->begin(), end = lemmata->end(); lem != end; ++lem) {
        NPython::TPyObjPtr lemmaInfo(WrapLemma(lemmata, *lem));
        if (lemmaInfo.Get() == nullptr) {
            ythrow yexception() << "failed to wrap lemma" << Endl;
        }
        ListAppend(lemmas, lemmaInfo);
    }
    return lemmas;
}
