#!/usr/bin/env python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
from __future__ import print_function
import os, os.path, sys, re, subprocess
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, 'WORKING_DIR')
import yt.wrapper as ytw
from collections import OrderedDict
from shutil import move
from tempfile import mkstemp
from web.common import writelog
from web.rules_common import RULES_DIRS, CheckingRepoContext

RULES = {
    "mn_weight_in":   9.0,
    "mn_weight_out":  9.0,
    "mn_weight_corp": 9.0,
    "update_lock":    "WORKING_DIR/temp/update_rules.lock",
    "parse_re":       re.compile(r"(?:\n|^)\s*(rule.*?describe[^\r\n]+?)", re.S),
    "comp_re":        re.compile(r"\n(?:bexpr|arithmetic)\s+[\w!(]|rule\s+\w+\s+R_ANTI\s+", re.M),
    "anti_re":        re.compile("^\s*rule\s+\w+\s+R_ANTI\s+", re.M),
    "name_re":        re.compile(r"^\s*rule\s+(\w+)\s", re.M),
    "weight_re":      re.compile(r"^\s*rule.+\s([-0-9.]+)\s*\r?\n", re.M),
    "text_re":        re.compile(r"#(?!rullng).*?\n"),
    "rullng":         re.compile(r"#rullng[\s\t]", re.M)
}
YT = {
    "proxy":     "hahn.yt.yandex.net",
    "token":     "",
    "dict_in":   "//home/so_fml/nirvana/rules_dict_in",
    "dict_out":  "//home/so_fml/nirvana/rules_dict_out",
    "dict_corp": "//home/so_fml/nirvana/rules_dict_corp",
    "all_in":    "//home/so_fml/nirvana/rules_all_dict_in",
    "all_out":   "//home/so_fml/nirvana/rules_all_dict_out",
    "all_corp":  "//home/so_fml/nirvana/rules_all_dict_corp"
}

def initYTwrapper():
    ytTokenPath = "WORKING_DIR/.yt_token"
    try:
        if not os.path.exists(ytTokenPath):
            ytTokenPath = "WORKING_DIR/.yt/token"
        if os.path.exists(ytTokenPath):
            with open(ytTokenPath) as f:
                YT["token"] = f.read().strip()
        ytw.config['proxy']['url'] = YT["proxy"]
        ytw.config['token'] = YT["token"]
    except Exception, e:
        writelog("loadYTtoken error: %s" % str(e), True)

class Rule:
    def __init__(self, text):
        self.__code = 0
        self.__text = RULES["text_re"].sub("", text)
        self.__name = RULES["name_re"].findall(self.__text)[0]
        self.__rtype = "ATOM"
        self.__weight = 0
        self.__rullng = False

        if not RULES["anti_re"].search(self.__text):
            self.__weight = RULES["weight_re"].findall(self.__text)[0]

        if RULES["comp_re"].search(self.__text):
            self.__rtype = "COMP"

        if RULES["rullng"].search(self.__text):
            self.__rullng = True

    def name(self):
        return self.__name

    def weight(self):
        return self.__weight

    def text(self):
        return self.__text

    def rtype(self):
        return self.__rtype

    def islingvo(self):
        return self.__rullng


def getAtomRules(route='in', atom=True):
    excludeRules, rules, ruleNum = [], OrderedDict(), 0
    rulesDirs = RULES_DIRS[route.capitalize()]
    if route in ["in", "out", "corp"]:
        rulesDirs += ["{}common/".format(RULES_DIRS["In"][0])]
    with CheckingRepoContext(RULES['update_lock']) as RulesLock:
        for rulesDir in rulesDirs:
            mnFolder = rulesDir + "mn/"
            if atom and os.path.exists(mnFolder):
                for ff in sorted(os.listdir(mnFolder)):
                    if not re.search(r'\.not$', ff):
                        continue
                    with open(mnFolder + ff, 'r') as fin:
                        for line in fin:
                            if not line:
                                break
                            line = re.sub(r'(\s|\n|\t)', '', line)
                            excludeRules.append(line)
            for ff in sorted(os.listdir(rulesDir)):
                if not re.search("\.(rul|dlv)$", ff):
                    continue
                #print("%s%s" % (rulesDir, ff))
                if os.access(rulesDir + ff, os.R_OK):
                    with open(rulesDir + ff, 'r') as fin:
                        a = fin.read().decode('koi8-r', 'ignore')
                    rfNum = 0
                    for ruleInfo in RULES["parse_re"].findall(a):
                        rule = Rule(ruleInfo)
                        if not rule.name() in excludeRules:
                            if rule.rtype() == "ATOM":
                                rules[rule.name()] = 1
                            elif not atom:
                                rules[rule.name()] = 0
                            else:
                                continue
                        ruleNum += 1
                        rfNum += 1
                        #print("%i\trule: %s\tweight: %s\ttype: %s\tatom: %s" % (ruleNum, rule.name(), rule.weight(), rule.rtype(), rules[rule.name()]))
                    #print("=== FILE %s:%i" % (ff, rfNum))
    return rules

def updateRulesRepo():
    ret = ''
    try:
        ret = subprocess.check_call('cd %s && git pull >/dev/null' % RULES_DIRS['In'][0], shell=True)
        #ret = subprocess.Popen('cd %s && git pull' % RULES_DIRS['In'][0], shell = True, stdout = subprocess.PIPE,
        #                       stderr = subprocess.STDOUT).communicate()[0]
    except Exception, e:
        ret = "updateRulesRepo error: '%s'" % str(e)
    return ret

def getCurDict(route='in', atom=True):
    curDict = OrderedDict()
    try:
        initYTwrapper()
        prefix = "dict" if atom else "all"
        if ytw.exists(YT['%s_%s' % (prefix, route)]):
            for rec in ytw.read_table(YT['%s_%s' % (prefix, route)], format=ytw.JsonFormat(), raw=False):
                curDict[rec['rule']] = rec['num']
    except ytw.errors.YtError, e:
        writelog("YT error: %s" % str(e), True)
    return curDict

def collectRules(route='in', atom=True):
    txt = ''
    with CheckingRepoContext(RULES['update_lock']) as RulesLock:
        txt = updateRulesRepo()
    if txt:
        writelog(txt); txt = ''
    curDict, actDict = getCurDict(route, atom), {}
    try:
        maxRuleNumber, txt = len(curDict.keys()), ''
        rules = getAtomRules(route, atom)
        for rule in rules.iterkeys():
            if rule not in curDict:
                maxRuleNumber += 1
                curDict[rule] = maxRuleNumber
            actDict[rule] = 1
        for item in sorted(map(lambda k: {"num": curDict[k], "rule": k, "act": 1 if k in actDict else 0, "atom": rules.get(k, "0")}, curDict),
                           key=lambda el: "%10d%s" % (el['num'], el['rule'])):
            row = "%s\t%s\t%s" % (item['num'], item['rule'], item['act'])
            if not atom:
                row += "\t%s" % item["atom"]
            row += "\n"
            txt += row
    except Exception, e:
        writelog("collectRules error: %s." % str(e), True)
    return str(txt)

def makeCommit(msg):
    ret = updateRulesRepo()
    if not ret:
        writelog("Updating rules: %s" % ret)
        if re.search(r'error|fatal|reject', str(ret)):
            return ret
    try:
        ret = subprocess.check_call('cd %s && git commit -am "%s" --author="robot-mailspam" && git push origin master >/dev/null' % (RULES_DIRS['In'][0], msg), shell=True)
    except Exception, e:
        ret = "makeCommit error: '%s'" % str(e)
    return ret

def getProdSlotModelWeight(route='in'):
    hw = sw = f = 0
    with open(RULES_DIRS[route.capitalize()][0] + 'ml.rul') as f:
        for line in f:
            m = re.match(r'rule\s+MATRIXNET_HAM_ALL_C\s+(\-?\d[\d\.]*)', line.strip())
            if m:
                hw = float(m.group(1)); f += 1
            m = re.match(r'rule\s+MATRIXNET_SPAM_ALL_C\s+(\-?\d[\d\.]*)', line.strip())
            if m:
                sw = float(m.group(1)); f += 1
            if f > 1:
                break
    return (hw, sw)

def assignRuleWeight(rule, weight, filename, route='in'):
    f2, tmp_filename = mkstemp()
    f_out = os.fdopen(f2, 'w')
    with open(RULES_DIRS[route.capitalize()][0] + filename) as f:
        for line in f:
            if re.match(r'rule\s+{0}'.format(rule), line):
                print(re.sub(r'^(rule\s+{0}\s+)(\-?\d\S*)'.format(rule), r'\g<1>{0}'.format(weight), line), end='', file=f_out)
            else:
                print(line, end='', file=f_out)
    f_out.close()
    os.unlink(RULES_DIRS[route.capitalize()][0] + filename)
    move(tmp_filename, RULES_DIRS[route.capitalize()][0] + filename)

def enableProdSlotModel(enable=True, route='in'):
    weight = RULES['mn_weight_%s' % route] if enable else 0
    with CheckingRepoContext(RULES['update_lock']) as RulesLock:
        assignRuleWeight('MATRIXNET_HAM_ALL_C', -weight, 'ml.rul', route)
        assignRuleWeight('MATRIXNET_SPAM_ALL_C', weight, 'ml.rul', route)
        makeCommit('Automatic %sabling of model in test slot' % ('en' if weight else 'dis'))

if __name__ == '__main__':
    try:
        print(collectRules())
    except Exception, e:
        writelog("Error: %s" % str(e), True)
