#!/usr/bin/env python
# coding=utf-8
import argparse
import json
import os
import re
import socket
import sys
import time
import types
# from pprint import pprint as pp

from thrift.Thrift import TException

import iss_thrift3.ttypes as ttypes
from iss_thrift3.IssService import Client

import thriftlite

VERSION = '0.1.16'

VERBOSE = False
DEFAULT_CACHER_URL = 'thrift://iss3.yandex-team.ru:9090'
HOST_DELIMITER_RE = re.compile(r'[\s,+]+', re.I | re.S)
REMOVED = 'REMOVED'
DEFAULT_HOSTS_PER_QUERY = 200
DEFAULT_TIMEOUT = 60
MAX_CONFIGS_TO_QUERY = 10
HOSTS_PER_BATCH = 10
DEBUG_FIND_BAD_HOSTS = False  # catching bad host data in Cacher


def console(msg, prefix=True, eol=True):
    if prefix:
        sys.stdout.write('# ')

    sys.stdout.write(msg)

    if eol:
        sys.stdout.write('\n')


def console_append(msg, eol=True):
    console(msg, prefix=False, eol=eol)


def debug(msg):
    if VERBOSE:
        console(msg)


def read_stdin():
    if sys.stdin.isatty():
        return ''
    else:
        console("reading host names from stdin...")
        return sys.stdin.read()


def read_file(fn, **kw):
    assert os.path.isfile(fn), 'file not found: "%s"' % fn

    binmode = kw.get('binmode', 0)
    if binmode:
        open_mode = 'rb'
    else:
        open_mode = 'r'

    f = open(fn, open_mode)
    if binmode:
        rc = f.read()
        file_size = os.path.getsize(fn)
        assert len(rc) == file_size, \
            'read_file error: read %d bytes, file on disk: %d bytes -- %s' % (len(rc), file_size, fn)
    else:
        rc = map(lambda x: x.splitlines()[0], f)
    f.close()

    return rc


def write_file(fn, data, **kw):
    binmode = kw.get('binmode', 0)
    if binmode:
        open_mode = 'wb'
    else:
        open_mode = 'w'

    f = open(fn, open_mode)
    if binmode:
        f.write(data)
    else:
        f.write('\n'.join(data))

    f.close()
    return True


class PersistentHostStatus(object):

    def __init__(self, fn):
        self.fn = fn
        self.good = set()
        self.bad = set()
        self.load_state()

    def load_state(self):
        if not os.path.isfile(self.fn):
            return

        raw_data = json.loads(read_file(self.fn, binmode=True))
        self.good = set(raw_data['good'])
        self.bad = set(raw_data['bad'])
        print "load bad hosts: %s" % (self.bad, )
        common = self.good & self.bad
        assert not common, "common items in good and bad: %r" % (common, )

    def is_good(self, host):
        return host in self.good

    def is_bad(self, host):
        return host in self.bad

    def add_good(self, host):
        if host in self.bad:
            self.bad.discard(host)
        self.good.add(host)
        self.flush()

    def add_bad(self, host):
        if host in self.good:
            self.good.discard(host)
        self.bad.add(host)
        self.flush()

    def flush(self):
        structure = dict(good=list(self.good), bad=list(self.bad))
        data = json.dumps(structure)
        write_file(self.fn, data, binmode=True)
        print "written: %d good, %d bad" % (len(self.good), len(self.bad))


if DEBUG_FIND_BAD_HOSTS:
    HOST_STATUS = PersistentHostStatus('.host-status')
else:
    HOST_STATUS = None


class InstanceState(object):
    __slots__ = ('current_state', 'target_state', 'configuration', 'slot', 'host', 'port', 'service', 'bad_resources',
                 'fb_time_hms', )

    def __init__(self, thrift_instance, thrift_instance_state):
        self.current_state = thrift_instance_state.instance.feedbackState
        self.target_state = thrift_instance_state.instance.targetState
        if thrift_instance.targetState != thrift_instance_state.instance.targetState:
            console("error: h_i_state=%r != i_state=%r" %
                    (thrift_instance.targetState, thrift_instance_state.instance.targetState))
        self.configuration = thrift_instance.id.configuration
        self.slot = thrift_instance.id.slot
        self.port, self.host = self.slot.split('@', 1)
        self.service = self.port
        self.bad_resources = []
        self.fb_time_hms = ''

    def __cmp__(self, other):
        result = cmp(self.host, other.host)
        if result == 0:
            if self.service.isdigit() and other.service.isdigit():
                result = cmp(int(self.service), int(other.service))
            else:
                result = cmp(self.service, other.service)
        return result

    def __getitem__(self, item):
        return getattr(self, item)

    def __sizeof__(self):
        memory_size = 0

        for prop_name in self.__slots__:
            memory_size += sys.getsizeof(getattr(self, prop_name, None))

        return memory_size


def format_time_hms(mtime_msec):
    mtime = mtime_msec // 1000

    remainder = int(time.time()) - mtime
    remainder, sec = divmod(remainder, 60)
    remainder, minute = divmod(remainder, 60)
    remainder, hour = divmod(remainder, 24)
    week, day = divmod(remainder, 7)
    parts = [
        (week, 'w'),
        (day, 'd'),
        (hour, 'h'),
        (minute, 'm'),
        (sec, 's'),
    ]
    while len(parts) > 1 and not parts[0][0]:
        parts.pop(0)

    return ''.join(["%d%s" % x for x in parts])


def add_error_details(proxy, instance):
    instance_id = ttypes.InstanceId(instance.slot, instance.configuration)
    response = proxy.getDetailedInstanceState(instance_id)

    feedback = response.instance.feedback
    properties = feedback.properties

    instance.fb_time_hms = 'feedback_age=' + format_time_hms(feedback.timestamp)

    bad_resources = []
    not_ready_resources = set()

    for prop_name in properties.iterkeys():
        parts = prop_name.rsplit('.', 1)

        if len(parts) != 2:
            continue

        base, suffix = parts

        if base in properties and base not in not_ready_resources:
            not_ready_resources.add(base)
            resource_path = properties.get(base + '.path', 'UNKNOWN')

            if resource_path == 'UNKNOWN':
                resource_path = properties.get(base + '.to', 'UNKNOWN')

            bad_resources.append({'name': base, 'path': resource_path, 'cause': properties[base]})

    bad_resources.sort(key=lambda x: x['path'])

    instance.bad_resources = bad_resources

    return instance


def load_state_of_hosts(proxy, hosts, instance_filter_proc, host_instance_conf_filter=None):
    raw_instances = []

    for host_fqdn in hosts:
        if DEBUG_FIND_BAD_HOSTS:
            if HOST_STATUS.is_good(host_fqdn):
                continue
            else:
                HOST_STATUS.add_bad(host_fqdn)

        debug('query host: %s' % (host_fqdn, ))
        host_config = proxy.getHostConfiguration(host_fqdn, None)

        if DEBUG_FIND_BAD_HOSTS:
            HOST_STATUS.add_good(host_fqdn)

        raw_instances.extend(host_config.instances)

    debug("total instances: %d" % len(raw_instances))
    # print instances[0]

    if callable(host_instance_conf_filter):
        raw_instances = [i for i in raw_instances if host_instance_conf_filter(i)]
        debug("total instances: %d (after host filtering)" % len(raw_instances))

    instances = []
    for host_instance_conf in raw_instances:
        instance_state = proxy.getInstanceState(host_instance_conf.id)
        instances.append(InstanceState(host_instance_conf, instance_state))
        # print instance_state

    console('  %d hosts in batch, %d total instances' %
            (len(hosts), len(instances)))

    raw_instances = [i_state for i_state in instances if instance_filter_proc(i_state)]
    console('  %d filtered instances' % len(raw_instances))
    # sys.exit('--enough--')

    return raw_instances


def get_list_instance_filter(options):
    assert is_list_command(options.command)

    def keep_this_instance(instance_state):
        # skip removed instances
        if instance_state.current_state == REMOVED and instance_state.target_state == REMOVED:
            return False

        if not options.full_instance_list and instance_state.current_state == instance_state.target_state:
            return False  # instance is synced and OK

        return True

    return keep_this_instance


def get_listconfig_filters(options, config_names):
    assert is_listconfig_command(options.command)

    def keep_this_instance(instance_state):
        # skip removed instances
        if instance_state.current_state == REMOVED and instance_state.target_state == REMOVED:
            return False

        if not options.full_instance_list and instance_state.current_state == instance_state.target_state:
            return False  # instance is synced and OK

        if instance_state.configuration not in config_names:
            return False

        return True

    def keep_this_host_instance(host_instance):
        if host_instance.id.configuration in config_names:
            return True

        return False

    return keep_this_instance, keep_this_host_instance


FAILED_HOSTS = set()
FAILED_INSTANCE_COUNT = 0


def report_failed():
    console('failed instances: %d' % FAILED_INSTANCE_COUNT)
    if FAILED_HOSTS:
        console("failed hosts (%d): %s" % (len(FAILED_HOSTS, ), " ".join("+" + h for h in sorted(FAILED_HOSTS))))


def report_instances(proxy, options, instances):
    # debug('instances memory usage: %d mb' % (sys.getsizeof(instances) >> 20, ))
    global FAILED_HOSTS
    global FAILED_INSTANCE_COUNT

    instance_list = list(instances)

    instance_list.sort()

    print_error_only = not options.full_instance_list

    for instance in instance_list:
        # skip synced instances
        is_instance_ok = instance.current_state == instance.target_state

        if print_error_only and is_instance_ok:
            continue

        if not is_instance_ok:
            FAILED_HOSTS.add(instance.host)
            FAILED_INSTANCE_COUNT += 1

        if options.expand_failed_resources or options.feedback_age:
            instance = add_error_details(proxy, instance)

        print "%(slot)s %(configuration)s target=%(target_state)s current=%(current_state)s %(fb_time_hms)s" % instance

        if options.expand_failed_resources:
            for r in instance['bad_resources']:
                print '    %(cause)s - %(path)s' % r


def add_default_domain(host, default_domain):
    if '.' in host:
        return host
    else:
        if default_domain.startswith('.'):
            return host + default_domain
        else:
            return host + '.' + default_domain


def extract_hosts(raw_host_data):
    result = set()

    if isinstance(raw_host_data, types.StringTypes):
        items = [h.strip() for h in HOST_DELIMITER_RE.split(raw_host_data) if h.strip()]
        result = set(items)
    elif isinstance(raw_host_data, (tuple, list, set)):
        for i in raw_host_data:
            result |= extract_hosts(i)
    else:
        raise TypeError('unsupported type: %r -- %r' % (type(raw_host_data), raw_host_data))

    return list(sorted(result))


def get_configs_with_prefix(proxy, prefix, branch, exact_config_name):
    if exact_config_name:
        names = [prefix, ]
        console('use specified configuration: %s, skip querying branch' % (prefix, ))
    else:
        response = proxy.getBranchHeads(branch)
        console("total %d configurations in branch %s" % (len(response.configurations), branch))
        names = [cn for cn in response.configurations if cn.startswith(prefix)]
        console('%d configurations with prefix %r found in branch %s' % (len(names), prefix, branch))
    return names


def get_host_from_slot(slot):
    try:
        return slot.split('@', 1)[1]
    except IndexError:
        return slot


def get_config_hosts(proxy, config_names):
    """
    filter syntax:
    https://wiki.yandex-team.ru/iss3/Specifications/queries
    """
    names = []
    if isinstance(config_names, types.StringTypes):
        names.append(names)
    elif isinstance(config_names, (types.ListType, types.TupleType)):
        names.extend(config_names)
    else:
        raise TypeError("unsupported type for config_names: %r, %r" % (type(config_names), config_names))

    names.sort()

    if len(names) > MAX_CONFIGS_TO_QUERY:
        for i, cname in enumerate(names, start=1):
            console("[%d] %s" % (i, cname))

        message = "ERROR: too many (%d) configurations in query. " \
                  "Maximum %d configurations allowed, " \
                  "please clarify config name prefix" % (len(names), MAX_CONFIGS_TO_QUERY)
        sys.exit(message)

    config_clause = ' || '.join('(configuration.id == "%s")' % cname for cname in names)
    mvel_filter = config_clause + ' && any-instance'
    debug('mvel filter: %r' % mvel_filter)

    response = proxy.query(mvel_filter, ttypes.Portion(0, 2147483647))
    result = response.result

    raw_instances = set()
    for dump in result:
        raw_instances |= set(dump.instances)

    hosts = set(get_host_from_slot(s) for s in raw_instances)
    debug("get_config_hosts: got %d hosts" % len(hosts))

    return hosts


class AliasedSubParsersAction(argparse._SubParsersAction):
    """
    https://gist.github.com/sampsyo/471779
    """

    class _AliasedPseudoAction(argparse.Action):

        def __init__(self, name, aliases, help):
            dest = name
            if aliases:
                dest += ' (%s)' % ','.join(aliases)
            sup = super(AliasedSubParsersAction._AliasedPseudoAction, self)
            sup.__init__(option_strings=[], dest=dest, help=help)

    def add_parser(self, name, **kwargs):
        if 'aliases' in kwargs:
            aliases = kwargs['aliases']
            del kwargs['aliases']
        else:
            aliases = []

        parser = super(AliasedSubParsersAction, self).add_parser(name, **kwargs)

        # Make the aliases work.
        for alias in aliases:
            self._name_parser_map[alias] = parser
        # Make the help text reflect them, first removing old help entry.
        if 'help' in kwargs:
            help_arg = kwargs.pop('help')
            self._choices_actions.pop()
            pseudo_action = self._AliasedPseudoAction(name, aliases, help_arg)
            self._choices_actions.append(pseudo_action)

        return parser


def is_list_command(command):
    return command in ('list', 'l')


def is_listconfig_command(command):
    return command in ('listconfig', 'lc', 'listconf')


def get_cli_options(args):
    """
    sample from http://pymotw.com/2/argparse/
    """
    basename = os.path.basename(sys.argv[0])
    parser = argparse.ArgumentParser(prog=basename,
                                     usage='%s [global_options] [list [host1 host2]/'
                                           'listconfig config_prefix]' % basename,
                                     description='ISS3 instance state printer',
                                     version=VERSION)
    parser.register('action', 'parsers', AliasedSubParsersAction)
    parser.add_argument('--cacher-url', action='store', dest='cacher_url', default=DEFAULT_CACHER_URL,
                        help='ISS Cacher address, default to %s' % DEFAULT_CACHER_URL)
    parser.add_argument('--verbose', action="store_true", default=False, dest="verbose",
                        help="verbose mode")
    parser.add_argument('--full', action="store_true", default=False, dest="full_instance_list",
                        help="print full instance list, even normal ones")
    parser.add_argument('--resources', action="store_true", default=False, dest="expand_failed_resources",
                        help="print list of resources which are not ready yet")
    parser.add_argument('--feedback-age', action="store_true", default=False, dest="feedback_age",
                        help="print feedback age, implied by --resources")
    parser.add_argument('-t', "--timeout", action="store", type=int, default=DEFAULT_TIMEOUT, dest="timeout",
                        help="Thrift connection timeout, default %d sec" % DEFAULT_TIMEOUT)
    parser.add_argument("--default-domain", action="store", default='search.yandex.net', dest="default_domain",
                        help="default domain for hostnames without it, default search.yandex.net")
    parser.add_argument('--dont-force-search-yandex-net', action="store_true", default=False, dest="dont_force_domain",
                        help="turn off casting *.yandex.ru hosts to *.search.yandex.net (Hi, SKyNet!)")

    subparsers = parser.add_subparsers(help='commands', dest="command")

    list_parser = subparsers.add_parser('list',
                                        aliases=('l', ),
                                        help="list instances for specified hosts. Hosts are obtained in order: "
                                             "FQDN list in commandline, stdin, hostname for localhost."
                                             " Only wrong instances will be printed by default"
                                             " (target state != current state),"
                                             " use --full option to print all instances."
                                        )
    list_parser.add_argument('hostnames', nargs='*', help="optional hostnames for print information about")

    listconfig_parser = subparsers.add_parser('listconfig',
                                              aliases=('lc', 'listconf'),
                                              help="list configuration instances, "
                                                   "you must specify configuration name prefix."
                                                   " Only wrong instances will be printed by default"
                                                   " (target state != current state),"
                                                   " use --full option to print all instances."
                                              )
    listconfig_parser.add_argument('config_name_prefix', nargs=1, help="configuration name prefix")
    listconfig_parser.add_argument('--default-branch', action="store", default="CURRENT", dest="default_branch",
                                   help="branch name to query cacher for configurations")
    listconfig_parser.add_argument('--exact', action="store_true", default=False, dest="exact_config_name",
                                   help="will process specified configuration without querying BRANCH")

    return parser.parse_args(args)


def explain_cli_options(options):
    console("raw options: %r" % options, )
    console('explanation:')
    options_dict = vars(options)
    option_names = sorted(options_dict.keys())
    processed = set()

    if 'command' in options:
        processed |= set(['command', 'config_name_prefix', 'default_branch'])
        command = options_dict['command']
        if is_list_command(command):
            console('list: process specified hosts instances')
        elif is_listconfig_command(command):
            console('listconfig: process specified configurations instances')
            console('    process configurations with prefix %r (listconfig parameter)' %
                    (options_dict['config_name_prefix'][0], ))
            console('    --default-branch = %s, find configurations in this branch (default to CURRENT)' %
                    (options_dict['default_branch'], ))
        else:
            console("command = %r -- invalid value" % command)

    for option in option_names:
        if option in processed:
            continue

        option_value = options_dict[option]

        if option == 'cacher_url':
            console('--cacher-url = %s' % (option_value, ))
            if option_value == DEFAULT_CACHER_URL:
                console("  will switch to another cacher replica on timeout (default cacher URL)")
            else:
                console("  WILL NOT switch to another cacher replica on timeout (custom cacher URL)")
        elif option == 'timeout':
            console('-t,--timeout = %s sec, cacher request timeout (default %s sec)' % (option_value, DEFAULT_TIMEOUT))
        elif option == 'verbose':
            console('--verbose = %r' % (option_value, ))
        elif option == 'full_instance_list':
            if option_value:
                console('--full specified, print good instances along with bad')
            else:
                console('--full not specified, print bad instances only')
        elif option == 'default_domain':
            console('--default-domain = %s, add this domain for hosts without domain' % (option_value, ))
        elif option == 'dont_force_domain':
            if option_value:
                console("--dont-force-search-yandex-net option not specifed, "
                        "do not replace .yandex.ru domain with .search.yandex.net")
            else:
                console("--dont-force-search-yandex-net specified: replace .yandex.ru domain with .search.yandex.net")
        elif option == 'expand_failed_resources':
            console('--resources = %s: if specified, print resources status and age of last feedback '
                    'for bad instances' % (option_value, ))
        elif option == 'feedback_age':
            console('--feedback-age = %s, will print feedback age for all instances, including good ones' %
                    (option_value, ))
        elif option == 'full_instance_list':
            if option_value:
                console('--full specified, will print good instances state too')
            else:
                console('--full not specified, will print bad instances (default)')
        elif option == 'exact_config_name':
            if option_value:
                console('--exact specified, process instances on this configuration without querying branch')
            else:
                console('--exact not specified, will request configurations'
                        ' with specified prefix from branch (default)')

        processed.add(option)

    missed_options = set(options_dict.keys()) - processed

    if missed_options:
        console('error: unexplained options left: %s' % (", ".join(sorted(missed_options)), ))

    console('===')


def do_report_instances(proxy, options, hosts, instance_filter, host_instance_filter=None):
    hosts_left = list(hosts)
    hosts_left.sort()

    hosts_per_batch = HOSTS_PER_BATCH

    while hosts_left:
        batch_hosts, hosts_left = hosts_left[:hosts_per_batch], hosts_left[hosts_per_batch:]
        console('hosts left: %d' % len(hosts_left))

        thrift_instances = load_state_of_hosts(proxy, batch_hosts, instance_filter, host_instance_filter)
        report_instances(proxy, options, thrift_instances)


def make_next_replica_url():
    # thrift://iss3.yandex-team.ru:9090

    replicas = ['thrift://iss%02d.search.yandex.net:9090' % i for i in range(2, 18 + 1)]
    replicas_number = len(replicas)

    def getter(prev_url):
        if prev_url in replicas:
            prev_index = replicas.index(prev_url)
            new_index = (prev_index + 1) % replicas_number
            result = replicas[new_index]
        else:
            result = replicas[0]
        console('TIMEOUT: switch URL to %s' % result)
        return result

    return getter


def main(args=None):
    global VERBOSE

    if args is None:  # beware of None!
        args = sys.argv[1:]

    start = time.time()

    options = get_cli_options(args)
    explain_cli_options(options)

    if options.verbose:
        VERBOSE = True

    kwargs = dict(url=options.cacher_url, client_class=Client, timeout=options.timeout)

    if options.cacher_url == DEFAULT_CACHER_URL:
        kwargs['get_next_url_func'] = make_next_replica_url()

    proxy = thriftlite.ThriftServerProxy(**kwargs)
    console("cacher info: %s" % proxy.getVersion())

    try:
        if is_list_command(options.command):
            # get hosts from 3 sources
            hosts = extract_hosts(' '.join(options.hostnames))

            if hosts:
                console("use hosts from CLI: %s" % (hosts, ))
            else:
                hosts = extract_hosts(read_stdin())

                if hosts:
                    console("use %d hosts from stdin: %s..." % (len(hosts), hosts[:5], ))
                else:
                    # get localhost
                    this_host = socket.getfqdn()
                    console('use this host: %s' % this_host)
                    hosts = [this_host, ]

            force_domain = not options.dont_force_domain
            if force_domain:
                debug('replace .yandex.ru domain to .search.yandex.net '
                      '(option --dont-force-search-yandex-net not specified)')
                hosts = [thriftlite.force_domain_to_search_yandex_net(i) for i in hosts]

            hosts = [add_default_domain(h, options.default_domain) for h in hosts]

            console('hosts to process: %d' % len(hosts))

            instance_filter = get_list_instance_filter(options)
            do_report_instances(proxy, options, hosts, instance_filter)

        elif is_listconfig_command(options.command):
            config_name_prefix = options.config_name_prefix[0]
            config_names = get_configs_with_prefix(proxy, prefix=config_name_prefix, branch=options.default_branch,
                                                   exact_config_name=options.exact_config_name)
            config_names.sort()
            console('found %d configurations: %s' % (len(config_names), config_names))

            if not config_names:
                sys.exit('ERROR: no configurations with prefix %r found' % (config_name_prefix, ))

            hosts = set(get_config_hosts(proxy, config_names))
            console('hosts with specified configurations: %d' % len(hosts))

            instance_filter, host_instance_filter = get_listconfig_filters(options, config_names)
            do_report_instances(proxy, options, hosts, instance_filter, host_instance_filter)
    except (StandardError, KeyboardInterrupt) as exc:
        print "Error:", str(exc)
        raise
    except TException as exc:
        print thriftlite.dump_thrift_exception(exc)
    finally:
        report_failed()

        for line in proxy.dump_stat():
            debug(line)

        debug('execution time: %.3f sec' % (time.time() - start, ))


if __name__ == '__main__':
    main(sys.argv[1:])
