#!/usr/bin/python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import sys, argparse
from urllib import urlopen
import yt.wrapper as ytw

YT_RULES_DICT_PATH = "//home/so_fml/nirvana/rules{}_dict_{}"
RULES_DICT_URL = "https://web.so.yandex-team.ru/ml/collect_dict/?route="
RULES_DICT_OUTPUT_TEXT_PATH = './rules_dict.txt'

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-a', '--all', action='store_true', help="Whether all rules or atom only")
    parser.add_argument('-u', '--url',            type=str, help="URL for obtaining rules dictionary")
    parser.add_argument('-r', '--route',          type=str, help="The type of mail for which the model is calculated")
    parser.add_argument('-o', '--output',         type=str, help="output YT table for saving fresh rules dictionary")
    parser.add_argument('-t', '--text_output',    type=str, help="output text file for saving fresh rules dictionary")
    args = parser.parse_known_args()[0]
    ROUTE = args.route if args.route else 'in'
    if args.url:
        RULES_DICT_URL = args.url
        if '?' not in RULES_DICT_URL:
            if not RULES_DICT_URL.endswith('/'):
                RULES_DICT_URL += '/'
            RULES_DICT_URL += '?route='
    RULES_DICT_URL += ROUTE
    if args.all:
        RULES_DICT_URL += "&atom=no"
    YT_RULES_DICT_PATH = YT_RULES_DICT_PATH.format("_all" if args.all else "", ROUTE)
    YT_RULES_DICT_OUTPUT_PATH = args.output if args.output else (YT_RULES_DICT_PATH % ROUTE)
    if args.text_output:
        RULES_DICT_OUTPUT_TEXT_PATH = args.text_output

    rules, curDict, maxNum, txt = [], {}, 0, ''
    try:
        if ytw.exists(YT_RULES_DICT_PATH):
            for r in ytw.read_table(YT_RULES_DICT_PATH, format=ytw.JsonFormat(), raw=False):
                t = [r['num'], r['act']]
                if "atom" in r:
                    t.append(r["atom"])
                curDict.update({r['rule']: t})
                if maxNum < r['num']:
                    maxNum = r['num']
        for line in urlopen(RULES_DICT_URL):
            parts = line.split("\t")
            try:
                rule, num = parts[1].rstrip(), 0
                if rule in curDict:
                    num = curDict[rule][0]
                    del curDict[rule]
                else:
                    maxNum += 1
                    num = maxNum
                txt += "%d\t%s\t%d" % (num, rule, int(parts[2]) if len(parts) > 2 else 1)
                row = {'num': num, 'rule': rule, 'act': int(parts[2]) if len(parts) > 2 else 1}
                if len(parts) > 3:
                    txt += "\t%d" % int(parts[3])
                    row["atom"] = int(parts[3])
                txt += "\n"
                rules.append(row)
            except Exception, e:
                print >>sys.stderr, "Error '%s' in row: %s" % (str(e), parts)
        for rule in curDict.iterkeys():
            row = {"num": curDict[rule][0], "rule": rule, "act": 0}
            if len(curDict[rule]) > 2:
                row["atom"] = curDict[rule][2]
            rules.append(row)
    except Exception, e:
        print >>sys.stderr, "Error: %s" % str(e)

    print "Rows count: %d" % len(rules)
    if len(rules) >= 100:    # minimum reasonable value
        try:
            f = open(RULES_DICT_OUTPUT_TEXT_PATH, 'w')
            f.write(txt)
            f.close()
        except Exception, e:
            print >>sys.stderr, "Error writing text result in file: %s" % str(e)
        try:
            if ytw.exists(YT_RULES_DICT_PATH):
                ytw.remove(YT_RULES_DICT_PATH)
            ytw.write_table(YT_RULES_DICT_PATH, rules, format=ytw.YsonFormat(), raw=False)

            if ytw.exists(YT_RULES_DICT_OUTPUT_PATH):
                ytw.remove(YT_RULES_DICT_OUTPUT_PATH)
            ytw.write_table(YT_RULES_DICT_OUTPUT_PATH, rules, format=ytw.YsonFormat(), raw=False)
        except Exception, e:
            print >>sys.stderr, "YT error: %s" % str(e)
