import sys
import os
import json
import logging
from argparse import ArgumentParser

import yt.wrapper as yt
from yql.api.v1.client import YqlClient
from saas.tools.devops.check_backup.lib import routines

logger = logging.getLogger('script')


def load_arguments(json_input, service_name, service_ctype):
    if not json_input:
        if (service_name or service_ctype):
            if not (service_name and service_ctype):
                raise ValueError('Incorrect arguments: if json_input (--input) is not used, please provide both --service and --ctype')
            services = [{'service': service_name, 'ctype': service_ctype}]
            return services
        else:
            logger.info('Reading json_input from stdin')
            fin = sys.stdin
    elif json_input == "-":
        fin = sys.stdin
    else:
        fin = open(json_input)

    try:
        services = json.load(fin)
    finally:
        if fin is not sys.stdin:
            fin.close()

    if service_name or service_ctype:
        filtered = [s for s in services
                    if (not service_name or service_name == s['service']) and
                       (not service_ctype or service_ctype == s['ctype'])]
        services = list(filtered)

    return services


#
# "Prepare" operations (fetch configuration details from Ferryman config and other sources)
#

def _get_fm_lazy(fm_cfg, service):
    if fm_cfg is not None:
        return fm_cfg
    fm_cfg = routines.get_ferryman_config(service['service'], service['ctype'], service.get('fm_host', None))
    if fm_cfg:
        routines.validate_ferryman(fm_cfg, service['service'], service['ctype'])
    return fm_cfg


def _get_name(service):
    return service.get('service', '<unnamed>') + " " + service.get('ctype', '')


def fetch_service_data(service):
    fm_cfg = None
    if 'yt_cluster' not in service:
        service['yt_cluster'] = routines.get_ferryman_yt_cluster(_get_fm_lazy(fm_cfg, service))
    if 'dishes' not in service:
        service['dishes'] = routines.get_ferryman_dishes_path(_get_fm_lazy(fm_cfg, service))


def prepare_list(json_input, service_name, service_ctype):
    services = load_arguments(json_input, service_name, service_ctype)

    result = []
    for service in services:
        try:
            logger.info('Processing %s', _get_name(service))
            fetch_service_data(service)
        except Exception as e:
            service['ERROR'] = str(e)
            logger.exception('Failed to process service %s', _get_name(service))
        result.append(service)

    sys.stdout.write(json.dumps(result, sort_keys=True, indent=4))


def _create_yt_client(yt_proxy):
    if not yt_proxy:
        raise ValueError('yt_cluster is not specified')
    return yt.YtClient(proxy=yt_proxy, config=yt.default_config.get_config_from_env())


def find_last_dish(service):
    if 'last_dish_timestamp' in service and 'yt_index_table' in service:
        return  # Nothing to do

    assert 'dishes' in service
    yt_client = _create_yt_client(service['yt_cluster'])
    if 'last_dish_timestamp' not in service:
        last_dish_ts = routines.get_last_dish_ts(yt_client, service['dishes'])
        service['last_dish_timestamp'] = last_dish_ts
    else:
        last_dish_ts = int(service['last_dish_timestamp'])

    if 'yt_index_table' not in service:
        yt_index_table = routines.read_path_from_dish(yt_client, service['dishes'], last_dish_ts)
        if yt_index_table is None:
            logging.warning('No backup found for service {}'.format(_get_name(service)))
            yt_index_table = ''
        service['yt_index_table'] = yt_index_table


def prepare_last_backups(json_input, service_name, service_ctype):
    services = load_arguments(json_input, service_name, service_ctype)

    result = []
    for service in services:
        try:
            logger.info('Processing %s', _get_name(service))
            fetch_service_data(service)
            find_last_dish(service)
        except Exception as e:
            service['ERROR'] = str(e)
            logger.exception('Failed to process service %s', _get_name(service))

        result.append(service)

    sys.stdout.write(json.dumps(result, sort_keys=True, indent=4))


#
# Script builders
#
def format_detach_to_yt(service, method_args):
    if 'yt_cluster' not in service:
        raise ValueError('Incomplete data')

    wc = method_args['wc']
    path = wc + '/' + service['ctype'] + '/' + service['service']

    return './detach_to_yt -t {ctype} -s {service} --yt {yt_cluster} --index-path {index_path} --result-path {pub_path} > ./detach_{service}_{ctype}.log 2>&1'.format(
           ctype=service['ctype'],
           service=service['service'],
           yt_cluster=service['yt_cluster'],
           index_path=path + '/data',
           pub_path=path + '/pub')


def format_clean_detach(service, method_args):
    """Cleans the tables created by format_detach_to_yt"""

    if 'yt_cluster' not in service:
        raise ValueError('Incomplete data')

    if not service.get('service', ''):
        raise ValueError('serivce')  # an extra check, because rm -rf is dangerous

    wc = method_args['wc']
    path = wc + '/' + service['ctype'] + '/' + service['service']

    tmpl = ['yt remove --proxy {yt_cluster} --path {index_path} 2>&1',
            'yt remove --proxy {yt_cluster} --path {pub_path} --recursive 2>&1']

    return list([msg.format(yt_cluster=service['yt_cluster'],
                            index_path=path + '/data',
                            pub_path=path + '/pub')
                for msg in tmpl])


def format_dump_backup(service, method_args):
    if 'yt_cluster' not in service:
        raise ValueError('Incomplete data')
    if not service.get('yt_index_table', ''):
        logger.warning("Skipping service %s: no data provided about backup", _get_name(service))
        return None

    wc = method_args['wc']
    svc_path = wc + '/' + service['ctype'] + '/' + service['service']
    tmp_path = '//tmp/saas/dump_b_' + service['ctype'] + '_' + service['service']

    return "./dump_fullarc_to_yt --yt {yt_cluster} --mode yt --index-dir '{yt_index_table}' --output '{svc_path}/backup.dump' " \
           "--tmp-dir '{tmp_path}' --dump-format UrlsOnly > ./{service}_{ctype}_backup_dump.log 2>&1".format(
           yt_cluster=service['yt_cluster'],
           yt_index_table=service['yt_index_table'].split('[')[0],
           svc_path=svc_path,
           tmp_path=tmp_path,
           ctype=service['ctype'],
           service=service['service'])


def format_dump_detach(service, method_args):
    if 'yt_cluster' not in service:
        raise ValueError('Incomplete data')
    if not service.get('yt_index_table', ''):
        logger.warning("Skipping service %s: no data provided about backup", _get_name(service))
        return None

    wc = method_args['wc']
    svc_path = wc + '/' + service['ctype'] + '/' + service['service']
    tmp_path = '//tmp/saas/dump_i_' + service['ctype'] + '_' + service['service']

    return "./dump_fullarc_to_yt --yt {yt_cluster} --mode yt --index-dir '{svc_path}/data' --output '{svc_path}/index.dump' " \
           "--tmp-dir '{tmp_path}' --dump-format UrlsOnly > ./{service}_{ctype}_index_dump.log 2>&1".format(
           yt_cluster=service['yt_cluster'],
           svc_path=svc_path,
           tmp_path=tmp_path,
           ctype=service['ctype'],
           service=service['service'])


def format_compare_dumps(service, method_args):
    if 'yt_cluster' not in service:
        raise ValueError('Incomplete data')

    if 'last_dish_timestamp' not in service:
        logger.warning("Skipping service %s: no data provided about backup", _get_name(service))
        return None

    path = method_args['wc'] + '/' + service['ctype'] + '/' + service['service']
    return "./check_backup compare-dumps --yt {} --index-dump '{}' --backup-dump '{}' --backup-ts {} --result '{}' --title '{}' --service '{}' --ctype '{}'".format(
           service['yt_cluster'],
           path + '/index.dump',
           path + '/backup.dump',
           service['last_dish_timestamp'],
           path + '/diff.sample',
           '[C] ' + service['service'] + ' ' + service['ctype'],
           service['service'],
           service['ctype'])


def _make_script(output, services, method, method_args, num_threads):
    if not num_threads:
        num_threads = 1
    rec_no = 1
    result = []
    for service in services:
        msgs = None
        try:
            logger.info('Processing %s', _get_name(service))
            msgs = method(service, method_args)
        except Exception as e:
            logger.exception('Failed to process service %s', _get_name(service))
        if not msgs:
            continue
        if not isinstance(msgs, list):
            msgs = [msgs]
        for msg in msgs:
            if num_threads > 1:
                msg = msg + " &"
            output.write(msg)
            output.write('\n')
        if num_threads > 1 and rec_no % num_threads == 0:
            output.write('wait\n')
        rec_no = rec_no + 1

    if num_threads > 1 and (rec_no - 1) % num_threads != 0:
        output.write('wait\n')


def write_script(json_input, service_name, service_ctype, workcopy, num_threads, cmd):
    assert cmd  # e.g. "format_detach_to_yt"
    services = load_arguments(json_input, service_name, service_ctype)
    _make_script(sys.stdout, services, cmd, method_args={'wc': workcopy}, num_threads=num_threads)


#
# Other operations
#
def compare_dumps(yt_proxy, index_dump, backup_dump, backup_ts, result, title, display_name, display_ctype):
    yql_client = YqlClient(db=yt_proxy,
                           token=os.environ.get('YQL_TOKEN', None),
                           token_path=os.path.expanduser('~/.yql/token'))
    yql_client.config.progress_on_stderr = False if os.environ.get('YQL_LOG_LEVEL', '') == 'CRITICAL' else True

    stats = routines.compare_dumps(yql_client, backup_dump, backup_ts, index_dump, result, title)

    yt_client = _create_yt_client(yt_proxy)
    num_rows = yt_client.row_count(result)
    if num_rows > 0:
        logger.warning('Diff found(%i): %s', num_rows, result)
        wiki_link = 'compare_dumps: ((https://yt.yandex-team.ru/{}/navigation?path={} Diff))'.format(yt_proxy, result)
    else:
        logger.info('Looks good')
        wiki_link = 'compare_dumps: Looks good'

    if display_name and display_ctype:
        wiki_text = '||((https://saas-mon.n.yandex-team.ru/service_info?service={service_name}&ctype={service_ctype} {service_name})) ' \
                    '| {service_ctype} | {link} ||'.format(
                        service_name=display_name,
                        service_ctype=display_ctype,
                        link=wiki_link)
    else:
        wiki_text = wiki_link
    sys.stdout.write('    ' + wiki_text)
    sys.stdout.write('\n')


def parse_args():
    def add_args_for_input(subparser):
        subparser.add_argument('--service', help='SaaS service name', dest='service_name')
        subparser.add_argument('--ctype', help='SaaS service ctype', dest='service_ctype')
        subparser.add_argument('--input', help='Json list of services', dest='json_input')

    def add_args_for_script_gen(subparser):
        subparser.add_argument('--wc', help='Workcopy folder on YT', dest='workcopy', required=True)
        subparser.add_argument('--threads', help='Number of threads in the generated script', dest='num_threads', type=int)
        subparser.set_defaults(method=write_script)

    parser = ArgumentParser()
    subparsers = parser.add_subparsers()

    mode = subparsers.add_parser('prepare-services-list')
    mode.set_defaults(method=prepare_list)
    add_args_for_input(mode)

    mode = subparsers.add_parser('prepare-last-backups')
    mode.set_defaults(method=prepare_last_backups)
    add_args_for_input(mode)

    mode = subparsers.add_parser('script-dump-backup')
    mode.set_defaults(cmd=format_dump_backup)
    add_args_for_input(mode)
    add_args_for_script_gen(mode)

    mode = subparsers.add_parser('script-detach')
    mode.set_defaults(cmd=format_detach_to_yt)
    add_args_for_input(mode)
    add_args_for_script_gen(mode)

    mode = subparsers.add_parser('script-dump-detach')
    mode.set_defaults(cmd=format_dump_detach)
    add_args_for_input(mode)
    add_args_for_script_gen(mode)

    mode = subparsers.add_parser('script-compare-dumps')
    mode.set_defaults(cmd=format_compare_dumps)
    add_args_for_input(mode)
    add_args_for_script_gen(mode)

    mode = subparsers.add_parser('compare-dumps')
    mode.set_defaults(method=compare_dumps)
    mode.add_argument('--yt', help='YT cluster to use', dest='yt_proxy', required=True)
    mode.add_argument('--index-dump', help='Index dump, generated with dump_fullarc_to_yt', required=True)
    mode.add_argument('--backup-dump', help='Backup dump, generated with dump_fullarc_to_yt', required=True)
    mode.add_argument('--backup-ts', help='Backup timestamp (the query tries to ignore changes newer than that)', type=int, required=True)
    mode.add_argument('--title', help='Display name for YQL web interface', required=False)
    mode.add_argument('--service', help='SaaS service name (optional, logging only)', dest='display_name')
    mode.add_argument('--ctype', help='SaaS service ctype (optional, logging only)', dest='display_ctype')
    mode.add_argument('--result', help='Output path', required=True)

    mode = subparsers.add_parser('script-clean-detach')
    mode.set_defaults(cmd=format_clean_detach)
    add_args_for_input(mode)
    add_args_for_script_gen(mode)
    return parser.parse_args()


def _init_logs():
    logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(asctime)s %(filename)s:%(lineno)d: %(message)s')
    logger.setLevel(logging.INFO)
    if 'YQL_LOG_LEVEL' not in os.environ:
        os.environ['YQL_LOG_LEVEL'] = 'CRITICAL'


if __name__ == '__main__':
    _init_logs()

    args = parse_args()
    args_dict = vars(args)
    method = args_dict.pop('method')
    method(**args_dict)
