# -*- coding: utf-8 -*-

from datetime import datetime
import os
import time
import logging
import tempfile
import random
import socket

from sandbox import common
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import utils
from sandbox.projects.common import cms
from sandbox.projects.common import string
from sandbox.projects.common.search import settings as media_settings
from sandbox.sandboxsdk import copy
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk import paths
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk.errors import SandboxTaskFailureError


SEARCH_DATABASE_RESOURCE_ID = 'search_database_resource_id'
BAD_HOSTS = 'bad_hosts'


class DatabaseRsyncPath(sp.SandboxStringParameter):
    """
        Full rsync-link to search database
    """
    name = 'search_database_rsync_path'
    description = 'Shard download link (rsync://host::full/path/, etc)'
    default_value = ''


class BackupDatabaseResource(sp.SandboxBoolParameter):
    """
        Create async backup task for created database resources
        (like auto_backup = True in resource props)
    """
    name = 'backup_database'
    description = 'Create backup task for the created db resource'
    default_value = False


class DatabaseShardName(sp.SandboxStringParameter):
    """
        Download database by given shard name (e.g. primus015-050-<timestamp>)
    """
    name = 'database_shard_name'
    description = 'Specify a shard name'
    default_value = ''


class DatabaseShardHosts(sp.SandboxStringParameter):
    """
        Try to download shard from specified hosts (comma-separated).
        If not set, shard hosts will be detected using CMS.
    """
    name = 'database_shard_hosts'
    description = 'Try to download the shard from hosts (ex.: host1,host2,host3)'
    default_value = ''
    separator = ','


class DatabaseType(sp.SandboxStringParameter):
    """
        Downloaded search database type
    """
    name = 'db_type'
    description = 'Choose database type'
    choices = [
        ('basesearch', 'basesearch'),
        ('middlesearch', 'middlesearch'),
        ('oxygen', 'oxygen'),
    ]
    default_value = ''


class DatabaseAttr(sp.SandboxStringParameter):
    """
        Add attributes to created shard resource (format: attr1=v1, attr2=v2)
    """
    name = 'resource_attrs'
    description = 'Add attributes to the created db resource (ex.: attr1=v1, attr2=v2)'
    do_not_copy = True
    default_value = ''


class CheckDbState(sp.SandboxBoolParameter):
    """
        Check shards on readiness or not
        (check acquire:OK and install:OK flags in shard.state)
    """
    name = 'check_if_shard_is_ready'
    description = 'Check if shard is ready'
    default_value = True


class BaseGetDatabaseTask(SandboxTask):
    """
        Base class for all database downloading tasks
    """
    input_parameters = [
        DatabaseRsyncPath,
        DatabaseShardName,
        DatabaseShardHosts,
        BackupDatabaseResource,
        DatabaseType,
        DatabaseAttr,
        CheckDbState,
    ]

    @property
    def database_owner(self):
        return 'SEARCH-DATABASE-USERS'  # SEARCH-1776

    @property
    def database_filename(self):
        return 'basesearch_database'

    def on_enqueue(self):
        """
            Create shard resource (to be able to chain tasks)
        """
        SandboxTask.on_enqueue(self)

        db_attrs = string.parse_attrs(self.ctx[DatabaseAttr.name])
        db_type = utils.get_or_default(self.ctx, DatabaseType)
        if db_type:
            db_attrs['db_type'] = db_type

        db_resource = self.create_resource(
            self.descr, self.database_filename,
            self._get_database_resource_type(),
            attributes=db_attrs,
            owner=self.database_owner,
        )
        self.ctx[SEARCH_DATABASE_RESOURCE_ID] = str(db_resource.id)

    def check_downloaded_database(self, path, expected_files=None, unexpected_files=None):
        """
            Проверить содержимое базы базового поиска
            :path: путь к базе
            :expected_files: файлы, которые должны быть в базе
            :unexpected_files: файлы, которых не должно быть в базе
            TODO: выяснить, что можно проверить и сделать нормальную проверку
        """
        eh.ensure(os.path.exists(path), 'Basesearch database was not created successfully')
        if expected_files:
            for file_name in expected_files:
                eh.ensure(
                    os.path.exists(os.path.join(path, file_name)),
                    'Expected {} file not found, probably wrong database type'.format(file_name)
                )
        if unexpected_files:
            for file_name in unexpected_files:
                eh.ensure(
                    not os.path.exists(os.path.join(path, file_name)),
                    'Unexpected {} file found, probably wrong database type'.format(file_name)
                )
        return True

    def get_database_state(self, rsync_path):
        """
            Obtain search shard state
            :rsync_path: rsync path to database shard
            :spam_state: check spam files state or not
            :return: True, if shard is ready, False otherwise
        """
        if not self.ctx.get(CheckDbState.name, True) or rsync_path.startswith('rbtorrent'):
            logging.info('Do not check if db is ready')
            return True
        shard_state_file_path = tempfile.mkstemp()[1]
        try:
            copy.RemoteCopy(rsync_path + 'shard.state', shard_state_file_path)()
        except common.errors.SubprocessError as e:
            logging.info('Failed to download shard.state file: {}'.format(str(e)))
            return False
        shard_state = open(shard_state_file_path).read()
        return 'acquire:OK' in shard_state and 'install:OK' in shard_state

    def _check_database_on_hosts(self, shard_name, shard_hosts):
        """
            Проверяет, готова ли база 'shard_name' по shard.state файлу на хостах из 'shard_hosts'
            :shard_name: название шарда базы
            :shard_hosts: хосты, на которых проверять
            :return: если база готова, возвращает rsync ссылку на неё; в противном случае - None
        """
        logging.info('Check database shard %s on hosts %s', shard_name, shard_hosts)
        for host in shard_hosts:
            database_rsync_path = 'rsync://{}/BASE/{}/'.format(socket.getfqdn(host), shard_name)

            if self.get_database_state(database_rsync_path):
                return database_rsync_path, host
        return None, None

    def _get_unexpected_files(self):
        """
            Список файлов, которые не должны присутствовать в шарде (при их наличии таск фейлится)
        """
        return []

    def _get_expected_files(self):
        """
            Список файлов, которые должны присутствовать в шарде (при их отсутствии таск фейлится)
        """
        return []

    def _get_ignored_files(self):
        """
            Список файлов, которые игнорируются при скачивании шарда
        """
        return []

    def _get_filter_out_host_patterns(self):
        """
            С хостов, в которых есть любая из заданных подстрок, шард не будет качаться.
        """
        return []

    def _get_database_resource_type(self):
        """
            Тип ресурса скачиваемой базы
        """
        raise NotImplementedError

    def _get_cms_configurations(self):
        """
            Список CMS-конфигураций, в которых нужно искать шарды.
            Поиск осуществляется в том же порядке, в котором конфигурации идут в списке
        """
        return [
            'WEB',
            'IMGS',
            'HEAD',
        ]

    def _on_shard_not_ready(self, shard_description):
        """
            Действие при невозможности получить готовый шард
            (шард на указанном хосте не готов или нет ни одного хоста с готовым шардом)
        """
        eh.check_failed('Shard {} is not ready'.format(shard_description))

    def _get_default_shard_name(self):
        """
            Имя шарда по умолчанию, если в параметрах не задан ни конкретный путь для скачивания, ни имя шарда
        """
        eh.check_failed('Shard name or rsync link are not specified, no default shard either')

    def _get_default_rsync_path(self):
        """
            rsync шарда по умолчанию, если в параметрах не задан
        """
        return None

    def download_database(self, remote_path, timeout=7200):
        """
            Загружаем базу по указанной ссылке (rsync, rbtorrent, etc)
            :remote_path: ссылка, по которой нужно загрузить базу
            :return: идентификатор sandbox-ресурса с базой
        """
        logging.info('Start downloading %s', remote_path)
        database_resource = channel.sandbox.get_resource(int(self.ctx[SEARCH_DATABASE_RESOURCE_ID]))
        if os.path.exists(database_resource.path):
            paths.remove_path(database_resource.path)
        copy.RemoteCopy(remote_path, database_resource.path)(
            exclude=self._get_ignored_files(),
            timeout=timeout,
        )
        self.check_downloaded_database(
            database_resource.path,
            self._get_expected_files(),
            self._get_unexpected_files()
        )

        self._set_attrs(database_resource)
        return database_resource.id

    def _set_attrs(self, db):
        shard_name = self.ctx.get(DatabaseShardName.name)
        if shard_name:
            channel.sandbox.set_resource_attribute(db.id, media_settings.SHARD_INSTANCE_ATTRIBUTE_NAME, shard_name)
        if self.ctx.get(BackupDatabaseResource.name):
            logging.info('Request backup database, id = %s', db.id)
            channel.sandbox.set_resource_attribute(db.id, 'backup_task', True)
        try:
            stamp_tag_list = fu.read_lines(os.path.join(db.path, 'stamp.TAG'))

            timestamp = int(stamp_tag_list[0].strip())
            timestamp_hr = datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%d_%H:%M')
            channel.sandbox.set_resource_attribute(db.id, 'database_timestamp', timestamp)
            channel.sandbox.set_resource_attribute(db.id, 'shard_timestamp_hr', timestamp_hr)
            for line in stamp_tag_list:
                if 'ShardName=' in line:
                    channel.sandbox.set_resource_attribute(
                        db.id, 'shard_name', line.strip().replace('ShardName=', '')
                    )
                elif 'robotScatterGroup=' in line:
                    channel.sandbox.set_resource_attribute(
                        db.id, 'robot_scatter_group', line.strip().replace('robotScatterGroup=', '')
                    )
                elif 'SearchZone=' in line:
                    channel.sandbox.set_resource_attribute(
                        db.id, 'search_zone', line.strip().replace('SearchZone=', '')
                    )

        except Exception as e:
            logging.info("WARNING!\n%s ", e)

    def filter_hosts_list(self, host_list):
        """
            Filter host list from banned DCs and bad hosts
        """
        bad_hosts = self.ctx.get(BAD_HOSTS, [])
        logging.info("Bad shard hosts: %s", bad_hosts)

        def inner_filter(host):
            for pattern in self._get_filter_out_host_patterns():
                if pattern in host:
                    return False
            if host in bad_hosts:
                return False
            return True

        logging.info("Shard hosts before filter: %s", host_list)
        return filter(inner_filter, host_list)

    def sort_by_dc(self, hosts):
        """
            Переупорядочивает хосты так, чтобы в начале были хосты
            из того же ДЦ, где запущена задача
        """
        task_dc = common.utils.HostInfo.dc(self.host + ".search.yandex.net")[:3]
        logging.info("Got '{}' DC for task execution host '{}'".format(task_dc, self.host))
        if task_dc == "unk":
            logging.info("Hosts are not sorted because task dc is unknown")
            return hosts

        same_dc = []
        other_dc = []
        for host in hosts:
            host_dc = common.utils.HostInfo.dc(host + ".search.yandex.net")[:3]
            if host_dc == "unk":
                # host first 3 letters are DC signature (e.g. man1-8159)
                host_dc = host[0:3]
            logging.info("Host '{}' DC is '{}'".format(host, host_dc))
            if host_dc == task_dc:
                same_dc.append(host)
            else:
                other_dc.append(host)
        return same_dc + other_dc

    def on_execute(self):
        if not self.ctx[DatabaseRsyncPath.name]:
            self.ctx[DatabaseRsyncPath.name] = self._get_default_rsync_path()
        database_rsync_path = self.ctx[DatabaseRsyncPath.name]
        if database_rsync_path:
            logging.info('Get database by rsync link %s', database_rsync_path)
            # если rsync ссылка не заканчивается на /, добавляем слеш - нужно, чтобы скачивалась всегда папка
            if (
                (not database_rsync_path.endswith('/')) and
                (not database_rsync_path.endswith('*')) and
                (not database_rsync_path.startswith('rbtorrent'))
            ):
                self.ctx[DatabaseRsyncPath.name] += '/'
            database_rsync_path = self.ctx[DatabaseRsyncPath.name]
            if self.get_database_state(database_rsync_path):
                self.download_database(database_rsync_path, timeout=self.ctx.get("kill_timeout", 7200))
            else:
                self._on_shard_not_ready(database_rsync_path)
        else:
            if not self.ctx.get(DatabaseShardName.name, ''):
                self.ctx[DatabaseShardName.name] = self._get_default_shard_name()
                logging.info("Setting shard name %s", self.ctx[DatabaseShardName.name])
            shard_name = self.ctx[DatabaseShardName.name]
            shard_hosts = self.ctx.get(DatabaseShardHosts.name)
            if shard_hosts:
                shard_hosts = [host.strip() for host in shard_hosts.split(DatabaseShardHosts.separator)]
            else:
                cms_configurations = self._get_cms_configurations()
                for conf in cms_configurations:
                    logging.info("Got cms configuration %s", conf)

                for conf in cms_configurations:
                    try:
                        shard_hosts = cms.get_shard_hosts(shard_name, cms_configuration=conf)
                        random.shuffle(shard_hosts)
                    except Exception as e:
                        if "no such conf" in str(e):
                            logging.warn(
                                'Failed to get hosts for shard %s in configuration %s',
                                shard_name, conf
                            )
                            shard_hosts = []
                        else:
                            raise
                    if shard_hosts:
                        break

                if not shard_hosts:
                    # logging.info("Could not find hosts to download database using CMS. Getting rbtorrent %s", conf)
                    raise SandboxTaskFailureError('Could not find hosts to download database using CMS')

                    # Getting Delta in incremental deploy case - FIXME
                    # r = xmlrpclib.ServerProxy("http://cmsearch.yandex.ru/xmlrpc/bs")
                    # shard = r.getShard(shard_name)
                    # if shard['resource_url']:
                    #     database_rsync_path = shard['resource_url']
                    #     self.ctx[DatabaseRsyncPath.name] = database_rsync_path
                    #     self.download_database(database_rsync_path)
                    #     return

            shard_hosts = self.filter_hosts_list(shard_hosts)
            eh.ensure(shard_hosts, 'Could not find hosts to download database from')

            self.ctx[DatabaseShardHosts.name] = DatabaseShardHosts.separator.join(shard_hosts)
            logging.info("Selected hosts with shard: %s", shard_hosts)
            shard_hosts = self.sort_by_dc(shard_hosts)
            logging.info("Hosts sorted by dc: {}".format(shard_hosts))

            max_attempt = 3
            current_host = None

            for attempt in range(max_attempt):
                logging.info("Current hosts-candidates: {}".format(shard_hosts))
                # if can't download db from one host - wait and try again
                try:
                    database_rsync_path, current_host = self._check_database_on_hosts(shard_name, shard_hosts)
                    if database_rsync_path:
                        self.ctx[DatabaseRsyncPath.name] = database_rsync_path
                        self.download_database(database_rsync_path, timeout=7200 + 3600 * attempt)
                    else:
                        self._on_shard_not_ready(shard_name)
                except Exception as e:
                    logging.info('Cannot load db:\n%s', str(e))
                    if attempt == max_attempt - 1:
                        eh.fail('Cannot download db, giving up after {} attempts:\n{}'.format(max_attempt, str(e)))

                    if current_host and len(shard_hosts) > 1 and attempt > 1:
                        # ban host if download failed
                        self.ctx.setdefault(BAD_HOSTS, []).append(current_host)
                        shard_hosts = self.filter_hosts_list(shard_hosts)

                    # handle temporary networking problems
                    sleep_time = 60 + 300 * attempt
                    logging.info("Sleep for %s seconds", sleep_time)
                    time.sleep(sleep_time)
                else:
                    break
