#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys
import argparse
import logging
import codecs

if 'BINARY_ROOT_PATH' in os.environ:
    sys.path.append(os.path.join(os.environ['BINARY_ROOT_PATH'], 'bindings', 'python', 'lemmer'))

# binding could be found in arcadia/bindings/python/lemmer
from liblemmer_python_binding import AnalyzeWord


def print_paradigms(infos, out):
    for info in infos:
        paradigm = info.Generate()
        print >>out
        for form, gr in sorted(paradigm):
            print >>out, u"{f}\t{lgr}\t{gr}\t{l}".format(
                f=form,
                lgr=",".join(info.LexicalFeature),
                gr=";".join(gr),
                l=info.Language,
            )
    print >>out

# ----------------------------------------------------
# --- Main section -----------------------------------
# ----------------------------------------------------
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "-l", "--langs",
        dest="langs",
        type=lambda x: x.split(","),
        default="ru",
        help="languages, comma separated",
    )
    parser.add_argument(
        "-t", "--test-mode",
        dest="test",
        action="store_true",
        help="test mode, supress logging messages, use all possible languages",
    )
    args = parser.parse_args()

    logging.basicConfig(
        level=logging.DEBUG if not args.test else logging.WARNING,
        stream=sys.stderr,
        format=u'[%(asctime)s] %(levelname)s\t%(message)s',
    )
    logging.debug("Reading from stdin...")

    inp = sys.stdin
    out = codecs.getwriter("utf-8")(sys.stdout)
    with inp, out:
        while True:
            l = inp.readline()
            if not l:
                break
            l = l.strip()
            if not l:
                continue
            l = l.decode("utf-8")
            if not args.test:
                infos = AnalyzeWord(l, split=False, langs=args.langs)
            else:
                infos = AnalyzeWord(l, split=False)
            print_paradigms(infos, out)

