# -*- coding: utf-8 -*-

import logging
import tempfile

from sandbox import common

from sandbox.projects import resource_types
from sandbox.sandboxsdk import parameters as sp

from sandbox.projects.common.search import shards
from sandbox.projects.common import BaseGetDatabaseTask as bgdt


_ALREADY_DOWNLOADED = 'already_downloaded'

_DEFAULT_CMS_CONFIGURATIONS = [
    'ASPAM',
    'WEB',
    'HEAD',
]


class WaitDatabaseReady(sp.SandboxBoolParameter):
    """
        Ожидать, пока база будет готова (проверяется файл шарда shard.state)
    """
    name = 'wait_ready'
    description = 'Wait the database shard ready'
    default_value = False


class CheckSpamState(sp.SandboxBoolParameter):
    """
        Проверять шарды на готовность или нет
    """
    name = 'check_if_spam_is_ready'
    description = 'Check if spam is ready'
    default_value = True


class GetBasesearchDatabase(bgdt.BaseGetDatabaseTask):
    """
        **Описание**

            Скачивает базу для web-базового поиска. Поддерживает два способа.

            1. По указанной ссылке

                Схема rsync:// заведомо работает. Схему нужно указывать явно,
                например: rsync://ws29-490/BASE/primus022-017-1404903995
                Работает ли что-то ещё (rbtorrent:XXX, scp, etc) -- мы не проверяли, можете проверить сами.
                Скорее всего, rbtorrent:XXX работать будет.

            2. По имени шарда

                Пример имени: primus022-017-1404903995. Таск по cms находит все хосты с этой базой,
                и последовательно пытается скачать с них базу по rsync.

            Для скачивания баз других (не web) поисков лучше использовать следующие таски:

            * Для базы fusion - GET_FUSION_DATABASE
            * Для пользовательских данных на средний - GET_MIDDLESEARCH_DATABASE
    """
    type = 'GET_BASESEARCH_DATABASE'

    input_parameters = bgdt.BaseGetDatabaseTask.input_parameters + [CheckSpamState, WaitDatabaseReady]

    execution_space = 220000 + 5000  # bss.MAX_BASE_SHARD_SIZE does not fit for WebTier1

    WAIT_TIME = 1200

    def _get_cms_configurations(self):
        configurations = self.ctx.get('cms_configurations') or _DEFAULT_CMS_CONFIGURATIONS
        # only in production_base-{timestamp} configuration
        # for details please consult osol@
        shard_name = self.ctx.get(bgdt.DatabaseShardName.name)
        if not shard_name:  # suddenly it can be empty or None
            return configurations
        timestamp = shard_name.split('-')[-1]
        if not timestamp.isdigit():  # something strange, skip
            return configurations
        conf = 'production_base-{0}'.format(timestamp)
        return tuple(configurations) + (conf,)

    def _get_filter_out_host_patterns(self):
        return ['las', 'ams']

    def _get_database_resource_type(self):
        return resource_types.SEARCH_DATABASE

    def _check_spam_ready(self, rsync_path):
        if not self.ctx.get(CheckSpamState.name, True):
            logging.info('Do not check if spam is ready')
            return True
        spam_file_path = tempfile.mkstemp()[1]
        try:
            self.remote_copy(rsync_path + 'Spam/bans.rev', spam_file_path)
        except common.errors.SubprocessError:
            return False
        spam_state = open(spam_file_path).read()
        try:
            if int(spam_state.strip()):
                return True
        except ValueError:
            pass
        return False

    def _set_waiting_reason(self, reason):
        """
            Update information about why are we waiting.
        """
        self.ctx['waiting_reason'] = reason

    def _unset_waiting_reason(self):
        """
            Drop information that we're waiting for shard readiness
        """
        self.ctx['waiting_reason'] = ''

    def on_enqueue(self):
        """
            It is assumed that when we start this task as subtask,
            we will first check shard existance via search.shards::get_shard_resource_by_name
            and will not even start this task.

            When task is started separately (e.g. from GUI) then we'll check
            here that shard already exists and will do nothing (set 'already_downloaded' flag)
        """
        shard_name = self.ctx.get(bgdt.DatabaseShardName.name)
        if not shard_name:
            return

        if "WebTier1" in shard_name:
            self.execution_space = 520 * 1024  # Dected 514 Gb usage
        elif "PlatinumTier0" in shard_name:
            self.execution_space = 15 * 1024  # Dected 347 Gb usage

        database_resource = shards.get_shard_resource_by_name(shard_name)
        if database_resource is not None:
            self.ctx[bgdt.SEARCH_DATABASE_RESOURCE_ID] = str(database_resource.id)
            self.ctx[_ALREADY_DOWNLOADED] = True
            return

        bgdt.BaseGetDatabaseTask.on_enqueue(self)

    def on_execute(self):
        # See SEARCH-1680: Do not check here for shard existence
        self._unset_waiting_reason()

        if not self.ctx.get(_ALREADY_DOWNLOADED):
            # do not download anything if it was detected in on_enqueue
            bgdt.BaseGetDatabaseTask.on_execute(self)

    def get_database_state(self, rsync_path):
        """
            Obtain shard database state.
            :rsync_path: rsync path to database shard
            :return: True if ready, False otherwise.
        """
        shard_state_ok = bgdt.BaseGetDatabaseTask.get_database_state(self, rsync_path)
        if not shard_state_ok:
            self._set_waiting_reason('Shard is not ready (according to shard.state)')
            return False

        if 'geoshard' in rsync_path:
            return True

        spam_state_ok = self._check_spam_ready(rsync_path)
        if not spam_state_ok:
            self._set_waiting_reason('Spam is not ready on {}'.format(rsync_path))
        return spam_state_ok

    def _get_ignored_files(self):
        return ['old*', 'delta*', 'indexarr', 'indexiarr', 'bin', 'archmap.dat', 'experimental']

    def _on_shard_not_ready(self, shard_description):
        if self.ctx.get('wait_ready', False):
            logging.info('Shard %s is not ready yet, wait 20 minutes.', shard_description)
            self.wait_time(self.WAIT_TIME)
        else:
            bgdt.BaseGetDatabaseTask._on_shard_not_ready(self, shard_description)


__Task__ = GetBasesearchDatabase
