#!/skynet/python/bin/python

import sys
import os
import subprocess
import optparse
import re
import time
import socket
import collections

from api.cqueue import Client
from library.sky.hostresolver import Resolver

def make_tree(line, first=0):
    result = [""]
    i = first
    while i < len(line):
        if line[i] == '[':
            submodule, i = make_tree(line, i + 1)
            result.append(submodule)
            result.append("")
        elif line[i] == ']':
            break
        elif line[i] == ' ' and len(result) == 1:
            result.append("")
        else:
            result[-1] += line[i]
            i += 1

    filtered = []
    for r in result:
        if isinstance(r, str):
            r = r.strip()
        if r:
            filtered.append(r)

    return filtered, i + 1

def get_main_proxy(tree):
    res = []

    if tree[0] in ["sub_antirobot", "geo", "uaas"]:
        return res

    if tree[0] == "proxy":
        res.append(tree)
        return res

    for node in tree:
       if isinstance(node, str):
           continue
       res.extend(get_main_proxy(node))

    return res

def has_on_error(tree):
    if tree[0] in ["sub_antirobot", "geo", "uaas"]:
        return False

    if tree[0] == "on_error":
        return True

    for node in tree:
       if isinstance(node, str):
           continue
       if has_on_error(node):
           return True

    return False

def get_section(tree):
    if tree[0] in ["sub_antirobot", "geo", "uaas"]:
        return None

    if tree[0] in ['regexp_path', 'prefix_path_router']:
        return tree[1]

    for node in tree:
       if isinstance(node, str):
           continue
       res = get_section(node)
       if res:
           return res

    return None

def is_robot(tree):
    if tree[0] == "sub_antirobot" and tree[-1] == "robot":
        return True

    for node in tree:
       if isinstance(node, str):
           continue
       res = is_robot(node)
       if res:
           return res

    return False


class Stat(object):
    allow_unpickle = True

    def __init__(self, section_filter):
        self.errors = []
        self.failures = []
        self.on_error = []
        self.section_filter = section_filter

    def add_line(self, line):
        if not line:
            return

        tree = make_tree(line)[0]
        section = get_section(tree)

        if self.section_filter and (not section or not re.match(self.section_filter, section)):
            return

        main_proxies = get_main_proxy(tree)

        has_succ = False
        for proxy in main_proxies:
            if re.match('.+succ \d\d\d$', proxy[1]):
                has_succ = True
            else:
                self.errors.append((section, proxy[1]))

        if not has_succ and not "click click_lib_answer " in line and section not in ["fast_static_check", "noc_static_check"] and not "errordocument succ " in line and not is_robot(tree):
            self.failures.append((section, line))

        if has_on_error(tree):
            self.on_error.append((section, line))

    def merge(self, another_stat):
        self.errors.extend(another_stat.errors)
        self.failures.extend(another_stat.failures)
        self.on_error.extend(another_stat.on_error)



class LogGetter:
    def __init__(self, section_filter, request_count, port_map):
        self.request_count = request_count
        self.section_filter = section_filter
        self.port_map = port_map

    def __call__(self):
        try:
            port = self.port_map[socket.gethostname()]
            cmd = ["tail", "-n", str(self.request_count), "/usr/local/www/logs/current-access_log-balancer-{}".format(port)]
            log = subprocess.check_output(cmd)

            res = Stat(self.section_filter)

            for line in log.split('\n'):
                res.add_line(line)

            return res
        except Exception as e:
            raise Exception(str(e))

L7_GROUPS = ["G@SAS_L7_BALANCER", "G@VLA_L7_BALANCER_PROD", "G@MAN_L7_BALANCER"]

usage = """./log_stat.py [options] [preset]

will go to specified balancer groups, get accesslog tail, print smth (e.g. aggregated backend errors) """

available_modes = ["errors_by_backend", "raw_errors", "raw_failures", "on_error"]

def main():
    opt_parser = optparse.OptionParser(usage=usage)
    opt_parser.add_option("-s", "--section", action="store", type=str, dest="section", help="section (morda, search, etc) regexp filter")
    opt_parser.add_option("-l", "--lines", action="store", type=int, dest="tail_lines", default=1000, help="number of tail lines of accesslog")
    opt_parser.add_option("-i", "--instances", action="store", type=int, dest="instance_count", default=10, help="number of instances to get log")
    opt_parser.add_option("-p", "--log_port", action="store", type=int, dest="log_port", default=0, help="override port (part of accesslog filename)")
    opt_parser.add_option("-g", "--groups", action="append", type=str, dest="groups", default=[], help="groups of hosts to get log (e.g., G@SAS_L7_BALANCER)" )
    opt_parser.add_option("-m", "--mode", dest="mode", default="errors_by_backend", choices=available_modes, help="output mode, one of: {}".format(", ".join(available_modes)))

    (options, args) = opt_parser.parse_args()

    stat = Stat(options.section)

    if len(args) > 0:
        if args[0] == "term":
            options.groups = L7_GROUPS
            options.log_port = 8081
        elif args[0] == "knoss_search":
            options.groups = ["G@SAS_L7_BALANCER_KNOSS_SEARCH", "G@VLA_L7_BALANCER_KNOSS_SEARCH", "G@MAN_L7_BALANCER_KNOSS_SEARCH"]
        elif args[0] == "knoss_morda":
            options.groups = ["G@SAS_PORTAL_MORDA_L7", "G@VLA_PORTAL_MORDA_L7", "G@MAN_PORTAL_MORDA_L7"]
        elif args[0] == "fast":
            options.groups = ["G@SAS_L7PROD_BALANCER_FAST", "G@VLA_L7PROD_BALANCER_FAST", "G@MAN_L7PROD_BALANCER_FAST"]
            options.log_port = 23011
        else:
            assert False, "unknown preset"
    else:
        if not options.groups:
            options.groups = L7_GROUPS

        if not options.log_port:
            options.log_port = 8081

    with Client() as cqueue:
        hosts = []
        port_map = {}

        for g in options.groups:
            group_hosts = []
            resolver_result = Resolver().resolveInstances(g)
            if resolver_result:
                for host, instances in resolver_result.iteritems():
                    group_hosts.append(host)

                    if len(instances) > 1:
                        assert False, "multiple instances on host"

                    port = list(instances)[0][1].split('@')[0].split(':')[1]
                    assert not host in port_map, "hosts intersection"
                    port_map[host] = port
            else:
                resolver_result = Resolver().resolveHosts(g)
                assert options.log_port, "only hosts resolved, please provide port explicitly (-p option)"
                for host in resolver_result:
                    group_hosts.append(host)
                    assert not host in port_map, "hosts intersection"
                    port_map[host] = options.log_port

            if 0 < options.instance_count and options.instance_count < len(group_hosts):
                group_hosts = group_hosts[0:options.instance_count]

            hosts.extend(group_hosts)

        if options.log_port:
            for host in port_map:
                port_map[host] = options.log_port

        with cqueue.run(hosts, LogGetter(options.section, options.tail_lines, port_map)) as session:
            for host, result, err in session.wait():
                if err:
                    print >>sys.stderr, 'Error on {0}: `{1}`'.format(host, err[0:100])
                elif result is not None:
                    stat.merge(result)

    if options.mode == "raw_errors":
        for err in sorted(stat.errors):
            print err[0], err[1]
    elif options.mode == "raw_failures":
        for failure in sorted(stat.failures):
            print failure[0], failure[1]
    elif options.mode == "on_error":
        for on_error in sorted(stat.on_error):
            print on_error[0], on_error[1]
    elif options.mode == "errors_by_backend":
        counter = collections.defaultdict(int)
        for err in sorted(stat.errors):
            fields = err[1].split()

            s = 2
            if re.match("\d+/\d+", fields[2]):
                s = 3

            counter[(err[0], fields[0], ' '.join(fields[s:]))] += 1

        for key, value in sorted(counter.items(), key=lambda x: x[1]):
            print value, ' '.join(key)


if __name__ == "__main__":
    main()
