import _easyparser
import json
import yatest.common


SIMPLE_TEXTS = yatest.common.source_path("bindings/python/easyparser/tests/simple_texts.txt")


def do_test_bad_langs(langs):
    is_error = False
    try:
        _easyparser.EasyParser(langs)
    except:
        is_error = True

    assert is_error


def do_test_simple_texts(rich_mode, token_types=None):
    pathInfix = ".".join(
        ["rich" if rich_mode else "plain"] +
        (["_".join(token_types)] if token_types else [])
    )

    outPath = "out_simple_texts.{0}.txt".format(pathInfix)
    out = open(outPath, "w")
    parser = _easyparser.EasyParser(["ru"])
    chunk = []
    for line in open(SIMPLE_TEXTS):
        line = line.strip()
        if line:
            chunk.append(line)
        elif len(chunk) > 0:
            text = (" ".join(chunk)).decode("utf-8")
            params = {
                "save_all_tokens": True,
                "rich_mode": rich_mode
            }
            if token_types:
                params.update({
                    "token_types": token_types
                })
            words = parser.parse_text(text, **params)

            if rich_mode:
                for word in words:
                    word["Token"] = word.get("Token", u"").encode("utf-8")
                    word["Lemma"] = word.get("Lemma", u"").encode("utf-8")
            else:
                words = [word.encode("utf-8") for word in words]

            out.write("{0}\n".format(json.dumps(words, ensure_ascii=False)))
            chunk = []

    return [yatest.common.canonical_file(outPath)]


def test_simple_texts_plain():
    return do_test_simple_texts(rich_mode=False)


def test_simple_texts_plain_all_tokens():
    return do_test_simple_texts(rich_mode=False, token_types=["NLP_WORD", "NLP_MARK", "NLP_INTEGER", "NLP_FLOAT"])


def test_simple_texts_rich():
    return do_test_simple_texts(rich_mode=True)


def test_simple_texts_rich_all_tokens():
    return do_test_simple_texts(rich_mode=True, token_types=["NLP_WORD", "NLP_MARK", "NLP_INTEGER", "NLP_FLOAT"])


def test_bad_langs():
    do_test_bad_langs(["rrrrrr"])
    do_test_bad_langs([""])
    do_test_bad_langs(["jpn"])
    do_test_bad_langs(["unk"])
    do_test_bad_langs(["ka", "fa",  "uz", "sr"])
