#!/usr/bin/env python
# -*- coding: utf-8 -*-

import subprocess
import hashlib
import requests
import argparse
import logging
import os
import time
import shutil
import sys

import saas.library.st_indexer_report.python.report_handler as report_handler

import saas.tools.ssm.modules.dm_api as dm_api
import saas.tools.ssm.modules.sandbox_api as sandbox_api
from saas.tools.devops.lib23.deploy_manager_api import DeployManagerApiClient
from saas.library.python.errorbooster import ErrorBooster, handle_exception


ErrorBooster('saas-gsi')


def calc_md5(file_path):
    hash_md5 = hashlib.md5()
    with open(file_path, 'rb') as f:
        for chunk in iter(lambda: f.read(4096), b''):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()


def compare_hash(file_path, hash_sum):
    """
    Check hash sum of local file and compare with specified hash sum
    :param file_path: type str
    :param hash_sum: type str
    :return: type boolean
    """
    file_hash = calc_md5(file_path)
    logging.debug('Hash sum comparison. File: {}  Target: {}'.format(file_hash, hash_sum))
    return file_hash == hash_sum


def get_configuration_data(dm_client, service_name='', service_type='', slot=''):
    """
    Get configuration data of service using dm_client
    :param dm_client: type saas.tools.ssm.modules.dm_api.DeployManagerAPI
    :param service_name: type str
    :param service_type: type str
    :param slot: type str
    :return: type dict
    """
    configuraion_data = {}
    resp = dm_client._get_list_conf(service_name, service_type, slot).json()
    for cfg in resp.get('files'):
        filename = cfg['rename']
        fileurl = cfg['url']
        configuraion_data[filename] = fileurl
    return configuraion_data


def get_config_and_write(dm_client, config_name, config_url, config_path):
    """
    Get config file and write it to configs/ directory
    :param dm_client: type saas.tools.ssm.modules.dm_api.DeployManagerAPI
    :param config_name: type str
    :param config_url: type str
    :param config_path: type str
    :return:
    """
    logging.info('Getting and writing config file: %s', config_name)
    retries = [0, 1, 2, 4]
    for ret in retries:
        if ret > 0:
            time.sleep(ret)
        config_data = dm_client._get_conf(config_url)
        if config_data.ok:
            write_file(config_data, config_path + '/' + config_name)
            return

    raise RuntimeError('Getting config file {} FAILED'.format(config_name))


def get_searchproxy_instance(dm_client):
    """
    Get searchproxy instance using dm_client
    :param dm_client: type saas.tools.ssm.modules.dm_api.DeployManagerAPI
    :return: type str
    """
    logging.info('Getting searchproxy instance')
    searchproxy_instances = dm_client._get_searchproxy_instances().json()
    searchproxy_instance = searchproxy_instances[0]['slots'][0]['slot']
    logging.info('Found searchproxy instance: {}'.format(searchproxy_instance))
    return searchproxy_instance


def write_file(file_data, file_path):
    """
    Write file to disk
    :param file_data:
    :param file_path: type str
    :return:
    """
    with open(file_path, 'wb') as f:
        for line in file_data:
            f.write(line)


def download_file(file_dir, rbtorrent, file_url, file_name, binary):
    """
    Download file from specified url to specified path
    :param file_path: type str
    :param rbtorrent: type str
    :param file_url: type str
    """
    try:
        logging.info('Try to download file with skynet %s', rbtorrent)
        subprocess.check_call(['sky', 'get', '-u', '-w', '-d', file_dir, rbtorrent])
    except (OSError, subprocess.CalledProcessError) as exception:
        logging.error(exception)
        if not binary:
            raise exception
        logging.error('Skynet is probably not found in system. Download using direct method.')
        logging.info('Downloading {} from url: {}'.format(file_name, file_url))
        file_data = requests.get(file_url, stream=True)
        write_file(file_data, os.path.join(file_dir, file_name))


def get_service_file_info(dm_client, service_name, filename='rtyserver'):
    """
    Get service files_id
    :param dm_client: type saas.tools.ssm.modules.dm_api.DeployManagerAPI
    :param service_name: type str
    :param filename: type str
    :return:
    """
    logging.info('Get files of service')
    service_files = dm_client._get_files().json()
    file_info = service_files['services'][service_name]['files'][filename]
    return {'task_id': file_info['task_id'],
            'resource_type': file_info['resource_type']}


def get_from_sandbox(sandbox_client, resource_name, task_info, target_path, binary):
    """
    Get standalone indexer binary
    :param sandbox_client: type saas.tools.ssm.modules.sandbox_api.SandboxAPI
    :param filename: type str
    :param task_info: type dict format {'task_id': ..., 'resource_type': ...}
    :param target_path: type str
    :param runnable: type bool
    :return:
    """
    filepath = os.path.join(target_path, resource_name)

    # Get information about resource from sandbox
    logging.info('Get {} information from task {}'.format(resource_name, task_info['task_id']))
    resource_id = sandbox_client.get_target_resource(task_info['task_id'], task_info['resource_type'])
    if not resource_id:
        raise RuntimeError('Resource {} not found from task {}'.format(task_info['resource_type'], task_info['task_id']))

    resource_data = sandbox_client.get_resource(resource_id)
    resource_rbtorrent = resource_data['skynet_id']
    resource_md5 = resource_data['md5']
    resource_url = resource_data['http']['proxy']

    logging.info('Resource md5sum : {}'.format(resource_md5))

    # Check for already downloaded binary
    if not os.path.exists(target_path):
        os.makedirs(target_path)
    if os.path.exists(filepath):
        logging.info('Found already exists {} binary'.format(resource_name))
        if resource_md5 and compare_hash(filepath, resource_md5):
            logging.info('md5sum for {} is same with resource, no need to download'.format(resource_name))
            return
        else:
            logging.info('md5sum for {} is not same with resource, remove old resource'.format(resource_name))
            if os.path.isfile(filepath):
                os.remove(filepath)
            else:
                shutil.rmtree(filepath)

    # Download, make executable and check md5
    download_file(target_path, resource_rbtorrent, resource_url, resource_name, binary)
    logging.info('Downloading finished')

    if resource_md5 and not compare_hash(filepath, resource_md5):
        raise RuntimeError('md5 hash sum of downloaded file does not correspond target md5 sum of resource {}'.format(resource_md5))


def get_additional_resource_info(additional_resource):
    splitted_info = additional_resource.split(":")
    if len(splitted_info) == 2:
        return splitted_info
    else:
        raise RuntimeError("wrong additional resource, read flags description")


def prepare_logging(options):
    """
    Prepare logging settings
    :param options: argparse.Namespace
    """
    logger = logging.getLogger()
    handlers = [logging.StreamHandler()]
    if options.debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    if options.logfile:
        handlers.append(logging.FileHandler(options.logfile))
    if options.reportfile:
        handlers.append(report_handler.ReportHandler(options.reportfile))
    formatter = logging.Formatter('[%(asctime)s GSI [%(process)d] [%(levelname)s] %(message)s',
                                  datefmt='%Y-%m-%d %H:%M:%S')
    for handler in handlers:
        handler.setFormatter(formatter)
        logger.addHandler(handler)


def parse_args(*args):
    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
                                     prog="Get standalone_indexer tool",
                                     description="""
DESCRIPTION
Getting configurations for standalone indexer and standalone_indexer binary file
Required arguments: --service, --ctype
Optional arguments: --logfile, --debug, --path, --symlinks, --no-binary, --no-dict, --download-and-put-nanny-resource
""")
    try:
        dm = DeployManagerApiClient()
        allowed_ctypes = dm.ctypes
    except:
        allowed_ctypes = [
            'prestable_turbo', 'stable_patents', 'stable_hamster_unisearch', 'testing', 'prestable', 'prestable_drive', 'prestable_ymusic',
            'stable_middle_kv', 'prestable_ydo', 'stable_lavka', 'stable_turbo', 'prestable_ugc', 'stable_hamster_ymusic', 'stable_dj', 'stable_market',
            'stable_gemini', 'stable_rapid_clicks', 'stable_canary', 'stable', 'stable_ydo', 'stable_ymusic', 'prestable_zen', 'prestable_market',
            'prestable_market_idx', 'stable_drive', 'stable_hamster_ydo', 'stable_kv', 'stable_hamster_turbo', 'stable_zen', 'stable_refresh', 'stable_unisearch',
            'prestable_dj', u'stable_hamster', u'prestable_lavka', u'stable_market_idx', u'stable_ugc'
        ]

        logging.exception("Can't reliably obtain ctypes from DM, assuming %s", allowed_ctypes)

    parser.add_argument('--service', dest='service', required=True)
    parser.add_argument('--ctype', dest='ctype', required=True, choices=allowed_ctypes)
    parser.add_argument('--path', dest='dst_dir', default='./')
    parser.add_argument('--logfile', dest='logfile')
    parser.add_argument('--report-path', dest='reportfile')
    parser.add_argument('--debug', dest='debug', action='store_true', default=False)
    parser.add_argument('--symlinks', dest='symlinks', action='store_true', default=False)
    parser.add_argument('--no-binary', dest='fetch_binary', action='store_false', default=True)
    parser.add_argument('--no-dict', dest='fetch_dict', action='store_false', default=True)
    parser.add_argument('--remove-old', dest='remove_old', action='store_true', default=False)
    parser.add_argument('--download-and-put-nanny-resource', dest='additional_nanny_resources', action='append', default=[],
                        help='download from nanny resource with name and put it to local dst. format: NAME:DEST_DIR; example: dssm_model:/models/dssm or AnotherFile:/path/to/another/file')

    if args:
        options = parser.parse_args(args)
    else:
        options = parser.parse_args()
    return options


def create_symlink(src, dst):
    dst_path = os.path.join(os.path.curdir, dst)
    if src == dst_path:
        return
    if os.path.exists(dst_path):
        os.unlink(dst_path)
    os.symlink(src, dst_path)


def main(*args):
    # Options
    options = parse_args(*args)

    # Logging
    prepare_logging(options)

    # Check configs directory
    configs_dir = os.path.join(options.dst_dir, 'configs')
    if not os.path.isdir(configs_dir):
        os.makedirs(configs_dir)

    actual_files = set()
    # Clients
    dm_client = dm_api.DeployManager(options.service, options.ctype, use_proxy=True)
    sandbox_client = sandbox_api.SandboxAPI()

    try:
        # Check service exists
        if not dm_client.check_service_exists(strong_ctype=True):
            raise RuntimeError('Service %s with ctype %s not found.' % (options.service, options.ctype))

        for additional_resource in options.additional_nanny_resources:
            resource_name, dst_dir = get_additional_resource_info(additional_resource)
            if os.path.commonprefix([dst_dir, options.dst_dir]) == options.dst_dir:
                actual_files.add(os.path.join(dst_dir, resource_name))

            logging.info('Fetching additional resource {} to {}'.format(resource_name, dst_dir))
            rtyserver_task_info = get_service_file_info(dm_client, options.service, filename=resource_name)
            get_from_sandbox(sandbox_client, resource_name, rtyserver_task_info, dst_dir, False)

        if options.fetch_binary:
            # Get standalone indexer binary
            rtyserver_task_info = get_service_file_info(dm_client, options.service, filename='rtyserver')
            rtyserver_task_info['resource_type'] = 'SAAS_UTIL_STANDALONE_INDEXER'
            get_from_sandbox(sandbox_client, 'standalone_indexer', rtyserver_task_info, options.dst_dir, True)
            # make binary runnable
            binary_path = os.path.join(options.dst_dir, 'standalone_indexer')
            os.chmod(binary_path, 0o0755)

        actual_files.add(os.path.join(options.dst_dir, 'standalone_indexer'))

        # Get dict.dict
        if options.fetch_dict and options.ctype in ['stable', 'prestable']:
            dict_dict_task_info = get_service_file_info(dm_client, options.service, filename='dict.dict')
            get_from_sandbox(sandbox_client, 'dict.dict', dict_dict_task_info, options.dst_dir, True)
            actual_files.add(os.path.join(options.dst_dir, 'dict.dict'))

        # Get config files
        configuration_data = get_configuration_data(dm_client)
        for config in configuration_data:
            get_config_and_write(dm_client, config, configuration_data[config], configs_dir)
            actual_files.add(os.path.join(configs_dir, config))

        # Get searchmap.json
        searchproxy_instance = get_searchproxy_instance(dm_client)
        searchproxy_configuration_data = get_configuration_data(dm_client, 'searchproxy', 'searchproxy', searchproxy_instance)
        get_config_and_write(dm_client, 'searchmap.json', searchproxy_configuration_data['searchmap.json'], configs_dir)
        actual_files.add(os.path.join(configs_dir, 'searchmap.json'))

        # Create symlinks
        if options.symlinks:
            create_symlink(configs_dir, 'configs')
            create_symlink(os.path.join(options.dst_dir, 'standalone_indexer'), 'standalone_indexer')

        if options.remove_old:
            for directory, dirs, files in os.walk(options.dst_dir):
                if os.path.abspath(directory) in actual_files:
                    continue
                for file in files:
                    path = os.path.abspath(os.path.join(directory, file))
                    if path not in actual_files:
                        logging.info('Removing old resource: {0}'.format(path))
                        os.remove(path)
        logging.info('All done! Now you can run standalone_indexer')
    except RuntimeError as e:
        handle_exception(e)
        logging.critical(e)
        logging.critical('During program operation errors occured. Correct work of standalone_indexer is not guaranteed.')
    except Exception as e:
        handle_exception(e)
        logging.critical(e)
        sys.exit(1)


if __name__ == '__main__':
    main()
