# encoding: utf-8
""" Getting host lists from external sources """
import itertools
import json
import os
import re
import subprocess
import sys
import traceback
import urllib2
import yaml
from collections import defaultdict, namedtuple
from functools import partial
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
from uuid import uuid4
from uhashring import HashRing
from time import sleep

URLOPEN_TIMEOUT = 30.0

# Interact with external systems


def _extract_host_list(expr, fetcher):
    hosts, exclude_hosts = [], []
    params = expr.split()

    for item in params:
        if item.startswith('+'):
            hosts.extend(fetcher(item))

        elif item.startswith('-'):
            tt = "+%s" % item[1:]
            exclude_hosts.extend(fetcher(tt))

    return hosts, exclude_hosts


# sky


def _run_cmd(cmd, log):
    log.info("execute command: %s" % cmd)

    if not hasattr(cmd, '__iter__'):
        cmd = cmd.split()

    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=sys.stderr)
    out, err = p.communicate()
    retcode = p.wait()

    log.info("command exit code %s" % retcode)

    if retcode != 0:
        raise Exception("exit with code %s" % retcode)

    hosts = out.strip().split('\n')
    return hosts


def use_sky(expr, log):
    # get group content using sky list http interface
    # suppose that 'sky list' support all the intersections itself
    try:
        expr = '/usr/local/bin/sky list ' + expr
        return _run_cmd(expr, log), []
    except Exception as err:
        log.error('error fetching data from %s: %s', expr, err)
        raise


# cat


def use_cat(expr, log):
    try:
        expr = 'cat ' + expr
        return _run_cmd(expr, log), []
    except Exception as err:
        log.error('error fetching data from %s: %s', expr, err)
        raise


# Conductor Groups

# Группы можно перечислять как +<group>
# Вычитания -<group> тоже работают
# Чтобы отфильтровать хосты по одному ДЦ, нужно вписать в начале параметр dc=<dc>
# Чтобы отфильтровать по нескольким ДЦ, нужно вписать в начале include_dc=<dc1>,<dc2>...
# Чтобы включить все хосты, кроме некоторых ДЦ, нужно вписать exclude_dc=<dc1>,<dc2>...
# Для мультидц выборок -<group> не работает, только +<group>


CON_URL = 'http://c.yandex-team.ru/api/groups2hosts/%s'
CON_DC_URL = 'https://c.yandex-team.ru/api-cached/generator/group_dc?%s&group=%%s'  # Сначала % 'dc=', потом % 'group'
CON_URL_WITH_DC = 'https://c.yandex-team.ru/api/groups2hosts/%s?fields=fqdn,root_datacenter_name'


def _http_fetch(url):
    data = urllib2.urlopen(url, timeout=URLOPEN_TIMEOUT).read()
    if data:
        return data.strip().split('\n')
    else:
        return []


def use_con(expr, log):
    # get group content using conductor http interface
    opt = expr.split(None, 1)[0]
    if opt.startswith('dc='):
        url = CON_DC_URL % opt
        expr = expr.split(None, 1)[1]
    elif opt.startswith('include_dc=') or opt.startswith('exclude_dc='):
        return multi_dc_con(expr.split(None, 1)[1], opt)
    else:
        url = CON_URL

    try:
        return _extract_host_list(expr, lambda item: _http_fetch(url % item[1:]))
    except Exception as err:
        log.error('error fetching data from %s: %s', expr, err)
        raise


def multi_dc_con(expr, opt):
    dc_list = set(opt.split('=')[1].split(','))
    incl, _excl = _extract_host_list(expr, lambda item: _http_fetch(CON_URL_WITH_DC % item[1:]))
    incl = map(str.split, incl)
    if opt.startswith('include'):
        incl = [host for host, dc in incl if dc in dc_list]
    else:
        incl = [host for host, dc in incl if dc not in dc_list]
    return incl, _excl


# Conductor Tags

# Использовать так:
# con_tag <tagname> <region>
# tagname - название кондукторного тэга, навшенного на хосты или группы, которые нужно замониторить
# region - ключ из файла server.conf/con_locations.json,
#          по которому будет получен фильтр по дц
# +default - дополнительно мониторить все дц, известные кондуктору,
# но не включённые в con_locations.json, а так же отсутствие dc

CON_DCS_URL = 'https://c.yandex-team.ru/api/datacenters?format=json'

CON_TAG_DC = 'https://c.yandex-team.ru/api/tag2hosts/{tagname}?format=json'
CON_TAG_DC_INCLUDE_URL = '{common_part}&include_dcs={{dcs}}'.format(common_part=CON_TAG_DC)
CON_TAG_DC_EXCLUDE_URL = '{common_part}&exclude_dcs={{dcs}}'.format(common_part=CON_TAG_DC)
REGIONS = {}


def fetch_con_hosts_without_dc(all_dcs, tagname):
    all_dc_names_str = ",".join(dc["name"] for dc in all_dcs if dc["parent"] is None)
    return json.load(
        urllib2.urlopen(CON_TAG_DC_EXCLUDE_URL.format(tagname=tagname, dcs=all_dc_names_str), timeout=URLOPEN_TIMEOUT)
    )


def use_con_tag(expr, log):
    options = expr.strip().split()
    tagname = options[0]
    region = options[1]
    all_dcs = json.load(urllib2.urlopen(CON_DCS_URL, timeout=URLOPEN_TIMEOUT))
    root_dcs = {dc['name'] for dc in all_dcs if dc['parent'] is None}
    region_dcs = set(REGIONS[region])
    exceed_dcs = region_dcs - root_dcs
    if exceed_dcs:
        log.warn('Exceed datacenters: %s', ','.join(exceed_dcs))
        region_dcs.difference_update(exceed_dcs)

    is_default_group = '+default' in options
    if is_default_group:
        orphan_dcs = set(root_dcs)
        for known_dcs in REGIONS.itervalues():
            orphan_dcs.difference_update(known_dcs)
        if orphan_dcs:
            log.warning('orphaned datacenters: %s', ','.join(orphan_dcs))
            region_dcs.update(orphan_dcs)

    dcs = ','.join(region_dcs)
    log.info('used dcs: %s', dcs)
    hosts = json.load(urllib2.urlopen(CON_TAG_DC_INCLUDE_URL.format(tagname=tagname, dcs=dcs), timeout=URLOPEN_TIMEOUT))

    if is_default_group:
        try:
            hosts.extend(fetch_con_hosts_without_dc(all_dcs, tagname))
        except Exception:
            log.exception("uncaught error while fetch hosts without DC")

    return hosts, []


# echo module


def use_echo(cmd, log):
    hosts = cmd.strip().split(' ')
    return hosts, []


# clusterstate and wall-e

# Кроме названия очереди принимает параметры:
# -d=some.domain   Исключить хосты с указанным доменом. Можно указывать несколько раз.
# +d=some.domain   Включать только хосты с указанным доменом. Можно указывать несколько раз.
# -k=conductor-group  Исключить хосты входящие в указанную кондукторную группу. Можно указывать несколько раз.
# +k=conductor-group  Включать только хосты входящие в указанную кондукторную группу. Можно указывать несколько раз.

CLUSTERSTATE_LINE_URL = 'https://clusterstate.yandex-team.ru/api/v1/hardware/lines/{}'
CLUSTERSTATE_LINE_DISCOVER_URL = 'https://clusterstate.yandex-team.ru/api/v1/hardware/lines'
WALLE_LINE_URL = 'https://api.wall-e.yandex-team.ru/v1/hosts?fields=inv,name,location.short_datacenter_name&strict=true&limit={limit}&cursor={cursor}&tags={tag}&state__nin=free'
WALLE_TAG = 'yasm_monitored'
QLOUD_WALLE_TAG = 'yasm_qloud_monitored'


def _walle_expand_request(tag, datacenter, limit=10000):
    cursor = 0
    while True:
        result = json.load(
            urllib2.urlopen(WALLE_LINE_URL.format(cursor=cursor, tag=tag, limit=limit), timeout=URLOPEN_TIMEOUT)
        )
        for row in result["result"]:
            cursor = max(cursor, row["inv"]) + 1
            if row["location"]["short_datacenter_name"] == datacenter:
                yield row["name"]
        if len(result["result"]) < limit:
            break


def _fetcher_decorator(fetcher_func):
    def wrapper(expr, log):
        try:
            params = expr.split()
            hosts = sorted(fetcher_func(params[0]))
            return select_hosts(hosts, params[1:])
        except Exception as err:
            log.error('error fetching data from %s: %s', expr, err)
            raise

    return wrapper


@_fetcher_decorator
def use_clusterstate_datacenters(name):
    result = []
    for line_name in itertools.chain(_http_fetch(CLUSTERSTATE_LINE_DISCOVER_URL), ("man-3_b.1.08",)):
        if re.search(r"([a-z]+)", line_name).group() == name:
            result.extend(_http_fetch(CLUSTERSTATE_LINE_URL.format(line_name)))
    return result


@_fetcher_decorator
def use_walle_datacenters(name):
    return list(_walle_expand_request(WALLE_TAG, name))


@_fetcher_decorator
def use_qloud_datacenters(name):
    return list(_walle_expand_request(QLOUD_WALLE_TAG, name))


def select_hosts(hosts, select_exprs):
    if not hosts:
        raise Exception("hosts can't be empty, maybe something goes wrong")
    sv = Sieve(select_exprs)
    if sv.selectors['+']:
        include = list(sv.apply_selectors('+', hosts))
    else:
        include = hosts

    if sv.selectors['-']:
        exclude = list(sv.apply_selectors('-', hosts))
    else:
        exclude = []
    return include, exclude


class Sieve(object):
    def __init__(self, select_exprs):
        self.con_groups = {}
        selectors = self.selectors = defaultdict(list)
        for expr in select_exprs:
            group = expr[0]  # "+", "-", etc
            method, param = expr[1:].split('=')
            selectors[group].append(self.make_selector(method, param))

    def make_selector(self, method, param):
        if method == 'd':  # exact domain
            return partial(self.check_domain, param)
        elif method == 'k':  # conductor group
            return partial(self.in_conductor, param)
        else:
            raise UserWarning('Unknown filter method: "%s"' % method)

    def in_conductor(self, con_group, host):
        if con_group not in self.con_groups:
            self.con_groups[con_group] = set(_http_fetch(CON_URL % con_group))
        return host in self.con_groups[con_group]

    @staticmethod
    def check_domain(proper_domain, host):
        return proper_domain == host.split('.', 1)[-1]

    def apply_selectors(self, group, objects):
        selectors = self.selectors[group]
        for obj in objects:
            if any(f(obj) for f in selectors):
                yield obj


# Construct host list

FETCHERS = {
    'echo': use_echo,
    'sky': use_sky,
    'cat': use_cat,
    'con': use_con,
    'con_tag': use_con_tag,
    'clusterstate_datacenter': use_clusterstate_datacenters,
    'walle_datacenter': use_walle_datacenters,
    'qloud_datacenter': use_qloud_datacenters,
}


def fetch_hosts(name, cmd, subgroup_splitting, log, cache=None):
    """
    :param name: str - name of group
    :param cmd: str - command for obtaining host list
    :param subgroup_splitting: str - option, describes splitting host list on subgroups
    :param log: Logger
    :cache: dict|None, {("<prefix>", "<expr>"): ([include_hosts], [exclude_hosts])}
    :return: str - host names separated by newline
    """
    prefix, expr = _parse_cmd(cmd)
    include_hosts, exclude_hosts = _load_hosts(cmd, prefix, expr, log, cache=cache)
    return "\n".join(_build_host_list(include_hosts, exclude_hosts, subgroup_splitting, log))


def _parse_cmd(cmd):
    prefix, expr = cmd.split(None, 1)
    prefix = prefix.lower()
    return prefix, expr


def _load_hosts(cmd, prefix, expr, log, cache=None):
    if isinstance(cache, dict) and (prefix, expr) in cache:
        return cache[(prefix, expr)]

    fetcher = FETCHERS.get(prefix)

    if fetcher:
        include_hosts, exclude_hosts = fetcher(expr, log)
    else:
        raise Exception("unknown fetcher {!r} specified".format(prefix))

    if isinstance(cache, dict):
        cache[(prefix, expr)] = (include_hosts, exclude_hosts)

    return include_hosts, exclude_hosts


class SubgroupOptions(namedtuple("SubgroupOptions", ("total", "index", "vnodes"))):
    @classmethod
    def parse(cls, subGroups):
        options = subGroups.split("/")
        total = 1
        idx = 0
        vnodes = 640
        if len(options) in (3, 4):
            options = options[1:] + [total, idx, vnodes][len(options) - 1 :]
            total = int(options[0])
            idx = int(options[1])
            vnodes = int(options[2])
            # selected subgroup started from zero (last group idx = total-1)
            if not 0 <= idx < total:
                raise Exception("wrong index specified, should be less than total")
            if vnodes <= 0:
                raise Exception("wrong vnodes specified, should be greater than total")
        else:
            raise Exception("wrong subgroup splitting given, %r" % subGroups)
        return cls(total, idx, vnodes)

    def __nonzero__(self):
        return self.total > 1


def _build_host_list(include_hosts, exclude_hosts, subgroup_splitting, log):
    # resolve subgroups
    hosts = set(include_hosts) - set(exclude_hosts)

    return sorted(hosts)


class ThreadSafeLogger:
    def __init__(self):
        self.lock = Lock()

    def info(self, fmt, *args):
        with self.lock:
            print >>sys.stderr, fmt % args

    def warn(self, fmt, *args):
        with self.lock:
            print >>sys.stderr, fmt % args

    def warning(self, fmt, *args):
        with self.lock:
            print >>sys.stderr, fmt % args

    def error(self, fmt, *args):
        with self.lock:
            print >>sys.stderr, fmt % args

    def exception(self, fmt, *args):
        with self.lock:
            print >>sys.stderr, fmt % args


def parse_yaml(file, log):
    metagroup = None
    src = None
    aggr_names = []

    if not os.path.isfile(file):
        log.error("Can't parse file {}: such file doesn't exist".format(file))
        return (metagroup, src, aggr_names)

    with open(file) as stream:
        try:
            y = yaml.safe_load(stream)
            metagroup = y['front_properties']['metagroup']
            src = y['instance_properties']['src']
            try:
                vnodes = y['instance_properties']['vnodes']
            except:
                vnodes = 640
            for inst in y['instances']:
                aggr_names.append(inst['aggr_name'])
        except Exception as e:
            log.exception("Can't parse file {}. Message: {}".format(file, e))

    return (metagroup, src, aggr_names, vnodes)


def get_config_files(conf_dir, log):
    for root, _, files in os.walk(conf_dir):
        for file in files:
            if not file.endswith('.yaml'):
                continue

            yield "{}/{}".format(root, file)


def write_csv(output_file, items, delim):
    with open(output_file, "w") as stream:
        for host, group, metagroup in items:
            stream.write(host)
            stream.write(delim)
            stream.write(group)
            stream.write(delim)
            stream.write(metagroup)
            stream.write("\n")


def read_csv(input_file):
    items = []
    with open(input_file, "r") as stream:
        for line in stream:
            if len(line) > 0:
                items.append(line.split())
    return items


class Fetcher:
    def __init__(self, threads, log):
        self.log = log
        self.executor = ThreadPoolExecutor(max_workers=threads)
        self.f_list = []

    def __set_group(self, hosts, aggr_names, vnodes):
        hash_ring = HashRing(
            {"server_shard_index_{}".format(idx): {"instance": idx} for idx in xrange(len(aggr_names))}, vnodes=vnodes
        )

        result = [(h, aggr_names[hash_ring[h]]) for h in hosts]
        return result

    def __fetch_hosts_wrapper(self, aggr_names, src, group, vnodes, retries):
        s = set()
        try:
            hosts = fetch_hosts(aggr_names, src, None, self.log).split("\n")
            if '' in hosts:
                s.remove('')
            s = set(self.__set_group(hosts, aggr_names, vnodes))
        except Exception as e:
            self.log.exception(
                "Exception while fetching hosts with aggrname={}, src={}, group={}. Message: {}".format(
                    aggr_names, src, group, e
                )
            )
            if retries > 0:
                sleep(0.5)
                return self.__fetch_hosts_wrapper(aggr_names, src, group, vnodes, retries - 1)
            s = set()
        if '' in s:
            s.remove('')
        return (s, group)

    def __collect_hosts_set(self, names, src, group, vnodes):
        self.f_list.append(self.executor.submit(self.__fetch_hosts_wrapper, names, src, group, vnodes, 3))

    def add(self, metagroup, src, aggr_names, vnodes):
        self.__collect_hosts_set(aggr_names, src, metagroup, vnodes)

    def finilize(self):
        hosts = dict()
        for f in as_completed(self.f_list):
            s, group = f.result()
            if group not in hosts:
                hosts[group] = set()
            hosts[group] = hosts[group].union(s)

        items = []
        for key, value in hosts.items():
            items += [(host[0], host[1], key) for host in value]

        self.f_list = []
        return sorted(items)

    def close(self):
        self.executor.shutdown(wait=True)


def generate_yasm_agent_placement():
    global REGIONS
    with open('con_locations.json') as _reg_inp:
        # https://c.yandex-team.ru/api/datacenters?format=yaml&field=name,golem_name,parent
        REGIONS = json.load(_reg_inp)

    log = ThreadSafeLogger()
    configs_dir = sys.argv[2]
    output_hosts_file = sys.argv[3]
    if len(sys.argv) != 4:
        log.error("invalid arguments number")
        exit(1)
    fetcher = Fetcher(32, log)

    for file in get_config_files(configs_dir, log):
        metagroup, src, aggr_names, vnodes = parse_yaml(file, log)
        fetcher.add(metagroup, src, aggr_names, vnodes)

    items = fetcher.finilize()
    fetcher.close()
    tmp_file = "{}.{}".format(output_hosts_file, uuid4())
    write_csv(tmp_file, items, " ")
    os.rename(tmp_file, output_hosts_file)


def generate_tsdb_placement():
    mapping = {}
    log = ThreadSafeLogger()

    if len(sys.argv) != 4:
        log.error("invalid arguments number")
        exit(1)

    configs_dir = sys.argv[2]
    output_file = sys.argv[3]

    for entry in os.walk(configs_dir):
        for filename in entry[2]:
            if not filename.endswith('.yaml'):
                continue

            with open('{}/{}'.format(entry[0], filename)) as f:
                try:
                    y = yaml.safe_load(f)

                    for aggr in y['instances']:
                        group = aggr['aggr_name']
                        hosts = aggr['hosts']

                        if group in mapping:
                            print('CLASH! {}({}) is already in the mapping: {}'.format(group, hosts, mapping[group]))
                        else:
                            mapping[group] = hosts

                    with open(output_file, 'w') as fd:
                        json.dump(mapping, fd)
                except Exception as exc:
                    print('error while parsing {}/{}: {}'.format(entry[0], filename, traceback.format_exc(exc)))


def print_usage():
    print('Usage: ' + sys.argv[0] + ' <agent-placement|tsdb-placement> <configs_dir> <output_file>\n\n' +
          'agent-placement Discovers yasm agents and constructs a mapping of yasm agent hosts\n' +
          'tsdb-placement  Constructs a mapping of yasm groups to TSDB replicas')


def main():
    if len(sys.argv) < 2:
        print_usage()
    elif sys.argv[1] == "agent-placement":
        generate_yasm_agent_placement()
    elif sys.argv[1] == "tsdb-placement":
        generate_tsdb_placement()
    else:
        print_usage()

if __name__ == "__main__":
    main()
