#include <library/cpp/pybind/typedesc.h>
#include <kernel/lemmer/core/language.h>
#include <dict/easyparser/easyparser.h>

#include <util/charset/wide.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>

/*
 * class EasyParser:
 *
 *   def __init__(self, langs=("ru", "en")):
 *      """Initializes TEasyParser object with lemmatization languages <langs>."""
 *
 *   def parse_text(self, text):
 *      """Returns list of lemmas from <text>. Accepts unicode only."""
 */

namespace {
    TString const MODULE_NAME = "_easyparser";

    PyMethodDef MODULE_METHODS[] = {
        {nullptr, nullptr, 0, nullptr}
    };

    TVector<TString> GetGrammarVector(const TGramBitSet& grammar, bool isLatinic = true) {
        TVector<TString> res;
        res.reserve(grammar.Count());
        for (const EGrammar gr : grammar) {
            res.push_back(TGrammarIndex::GetName(gr, isLatinic));
        }
        return res;
    }
}

struct TEasyParserHolder {
    THolder<TEasyParser> Parser;

    TEasyParserHolder(TEasyParser* parser)
        : Parser(parser)
    {
    }
};

class TEasyParserType : public NPyBind::TPythonType<TEasyParserHolder, TEasyParser, TEasyParserType> {
private:
    typedef class NPyBind::TPythonType<TEasyParserHolder, TEasyParser, TEasyParserType> TParent;
    friend class NPyBind::TPythonType<TEasyParserHolder, TEasyParser, TEasyParserType>;

public:
    static TEasyParser* GetObject(const TEasyParserHolder& holder) {
        return holder.Parser.Get();
    }

    static TEasyParserHolder* DoInitObject(PyObject* args, PyObject* kwargs);

private:
    TEasyParserType();
};

class TParseText : public NPyBind::TBaseMethodCaller<TEasyParser> {
public:
    bool CallMethod(PyObject*, TEasyParser* self, PyObject* args, PyObject* kwargs, PyObject*& res) const override {
        try {
            TVector<const char*> keywords = {"", "rich_mode", "token_types", "save_all_tokens", nullptr};

            TUtf16String text;
            bool isRichMode = false;
            TVector<TString> tokTypes = {TString("NLP_WORD")};
            bool saveAllTokens = false;

            if (NPyBind::ExtractOptionalArgs(args, kwargs, keywords.data(), text, isRichMode, tokTypes, saveAllTokens)) {
                TEasyParser::TParseOptions opts;
                for (const TString& s : tokTypes) {
                    NLP_TYPE type = NLP_END;
                    if (TryFromString(s, type)) {
                        opts.AcceptedTypes.insert(type);
                    } else {
                        ythrow yexception() << "unknown token type '" << s << "' in params to parse_text";
                    }
                }
                opts.SaveAllTokens = saveAllTokens;

                if (isRichMode) {
                    TVector<TEasyParser::TWord> words;
                    self->ParseText(text, &words, opts);

                    TVector<PyObject*> pyWords;
                    TMap<TString, PyObject*> wordAttrs;
                    PyObject*& attrToken = wordAttrs["Token"];
                    PyObject*& attrLemma = wordAttrs["Lemma"];
                    PyObject*& attrStemGrammar = wordAttrs["StemGrammar"];

                    for (const auto& word : words) {
                        attrToken = NPyBind::BuildPyObject(word.Token);
                        attrLemma = NPyBind::BuildPyObject(word.Lemma);
                        attrStemGrammar = NPyBind::BuildPyObject(GetGrammarVector(word.StemGrammar));
                        PyObject* pyWord = NPyBind::BuildPyObject(wordAttrs);
                        pyWords.push_back(pyWord);
                    }

                    res = NPyBind::BuildPyObject(pyWords);
                } else {
                    TVector<TUtf16String> lemmas;
                    self->ParseText(text, &lemmas, opts);
                    res = NPyBind::BuildPyObject(lemmas);
                }
                return true;
            }
            ythrow yexception() << "wrong parameters for parse_text";
        } catch (const std::exception& e) {
            PyErr_SetString(PyExc_RuntimeError, e.what());
        }
        res = nullptr;
        return true;
    }
};

TEasyParserType::TEasyParserType()
    : TParent((MODULE_NAME + ".EasyParser").data(), "module for all-inclusive parsing")
{
    AddCaller("parse_text", new TParseText);
}

TEasyParserHolder* TEasyParserType::DoInitObject(PyObject* args, PyObject* /*kwargs*/) {
    try {
        TVector<TString> languageNames;
        if (!NPyBind::ExtractArgs(args, languageNames)) {
            ythrow yexception() << "wrong parameters for EasyParser::__init__()";
        }
        TLangMask langMask;
        for (size_t i = 0; i < languageNames.size(); ++i) {
            ELanguage langId = LanguageByNameStrict(languageNames[i]);
            Y_ENSURE(
                langId != LANG_MAX,
                "unknown language name: \"" << languageNames[i] << "\""
            );
            Y_ENSURE(
                langId != LANG_UNK,
                "unknown language cannot be used: \"" << languageNames[i] << "\""
            );

            Y_ENSURE(
                NLemmer::GetLanguageById(langId),
                "language not supported: \"" << languageNames[i] << "\""
            );
            langMask.Set(langId);
        }

        TEasyParserHolder* result = new TEasyParserHolder(new TEasyParser(langMask));
        return result;
    } catch (const std::exception& e) {
        PyErr_SetString(PyExc_RuntimeError, e.what());
    }
    return nullptr;
}

#if PY_MAJOR_VERSION >= 3
PyMODINIT_FUNC PyInit__easyparser(void) {
    static struct PyModuleDef moduledef = {
        PyModuleDef_HEAD_INIT, MODULE_NAME.data(), NULL, -1, MODULE_METHODS, NULL, NULL, NULL, NULL
    };
    PyObject* module = PyModule_Create(&moduledef);

    TEasyParserType::Instance().Register(module, "EasyParser");

    return module;
}
#else
PyMODINIT_FUNC init_easyparser(void) {
    PyObject* module = Py_InitModule(MODULE_NAME.data(), MODULE_METHODS);
    TEasyParserType::Instance().Register(module, "EasyParser");
}
#endif
