import logging
from datetime import datetime

from sandbox import sdk2
from sandbox.projects.collections.CopyDumpToYt import YDB_CONVERTED_DUMPS_BASEDIR
from sandbox.projects.collections.DumpYdbToYt import CollectionsDumpYdbToYt
from sandbox.common.types.task import Status
from sandbox.common.errors import TaskFailure
from sandbox.sandboxsdk import environments
from sandbox.projects.collections.mixins import YasmReportable


LOGGER = logging.getLogger(__name__)

LATEST = 'latest'
KEEP = 'keep'
WEEKLY = 'weekly'
MONTHLY = 'monthly'

DEFAULT_CONVERTED_DIRS = {
    '//home/collections-backups/ydb-converted',
}


class CollectionsDumpYdbToYtPlanner(sdk2.Task, YasmReportable):
    """
    YDB dumper entrypoint
    It runs CollectionsDumpYDBToYt for every table in database
    and make links to latest table
    """
    class Requirements(sdk2.Task.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt', version='0.9.26'),
        ]

    class Parameters(sdk2.Task.Parameters):
        monitoring_server_host = sdk2.parameters.String(
            'Monitoring server',
            default='monit.n.yandex-team.ru',
        )
        with sdk2.parameters.Group('YT parameters') as yt_parameters:
            yt_proxy = sdk2.parameters.String(
                'YT proxy',
                required=True,
            )
            yt_token_vault = sdk2.parameters.String(
                'YT token vault for creating links',
                required=True,
            )
            yt_dump_directory = sdk2.parameters.String(
                'Base dir for dumps',
                default='//home/collections-backups/ydb-new',
                required=False,
            )
            yt_converted_directory = sdk2.parameters.String(
                'Base dir for converted dump. Debug option, don\'t override it in scheduler!',
                default=YDB_CONVERTED_DUMPS_BASEDIR,
                required=False,
            )
        with sdk2.parameters.Group('YDB parameters') as ydb_parameters:
            ydb_token_vault = sdk2.parameters.String(
                'YDB token vault for cli tool',
                required=True,
            )
            endpoint = sdk2.parameters.String(
                'YDB endpoint',
                required=True,
            )
            database = sdk2.parameters.String(
                'YDB database name',
                required=True,
            )
        with sdk2.parameters.Group('Dump parameters') as dump_parameters:
            collections = sdk2.parameters.List(
                'List of YDB tables for dump',
                required=True,
            )
            timestamp = sdk2.parameters.Float(
                'Dump creation timestamp',
                required=False,
            )
            create_latest_link = sdk2.parameters.Bool(
                'Create link to latest dump',
                default=True,
                required=False,
            )
            sort_by = sdk2.parameters.Dict(
                'collection:sort_by_fields(separated by comma)',
                default={},
                required=False,
            )
        kill_timeout = 50400  # 14 hours

    @property
    def _dump_directory(self):
        from yt.wrapper import ypath_join
        directory = ypath_join(self.Parameters.yt_dump_directory, self.Parameters.database)
        LOGGER.info('Dump dir is %s', directory)
        return directory

    @property
    def _converted_directory(self):
        from yt.wrapper import ypath_join
        directory = ypath_join(self.Parameters.yt_converted_directory, self.Parameters.database)
        LOGGER.info('Converted dir is %s', directory)
        return directory

    def _sort_by(self, collection):
        raw_fields = self.Parameters.sort_by.get(collection)
        if not raw_fields:
            return []
        return raw_fields.split(',')

    def _create_output_dirs(self, yt_client, paths):
        for path in paths:
            if not yt_client.exists(path):
                yt_client.mkdir(path, recursive=True)

    def _prepare_context(self):
        from yt.wrapper import ypath_join
        # like in CopyDumpToYt
        current_time = (
            datetime.fromtimestamp(float(self.Parameters.timestamp))
            if self.Parameters.timestamp else
            datetime.now()  # kept as is for backward compatibility
        ).isoformat()
        LOGGER.info('Use current time %s', current_time)
        current_dump_dir = ypath_join(self._dump_directory, str(current_time))
        current_converted_dir = ypath_join(self._converted_directory, str(current_time))
        LOGGER.info('Current dump dir %s', current_dump_dir)
        LOGGER.info('Current converted dir %s', current_converted_dir)
        self.Context.current_dump_dir = current_dump_dir
        self.Context.current_converted_dir = current_converted_dir
        self.Context.save()

    def on_execute(self):
        from yt.wrapper import ypath_join, YtClient
        yt_token = sdk2.Vault.data(self.owner, self.Parameters.yt_token_vault)
        yt_client = YtClient(proxy=self.Parameters.yt_proxy, token=yt_token)

        with self.memoize_stage.first_step:
            LOGGER.info('First step')
            self._prepare_context()
            self._create_output_dirs(
                yt_client,
                [self.Context.current_dump_dir, self.Context.current_converted_dir],
            )
            tasks = []
            for collection in self.Parameters.collections:
                task = CollectionsDumpYdbToYt(
                    self,
                    description="Child of {}".format(self.id),
                    owner=self.owner,
                    yt_proxy=self.Parameters.yt_proxy,
                    yt_token_vault=self.Parameters.yt_token_vault,
                    ydb_token_vault=self.Parameters.ydb_token_vault,
                    endpoint=self.Parameters.endpoint,
                    database=self.Parameters.database,
                    collection=collection,
                    yt_dump_path=ypath_join(self.Context.current_dump_dir, collection),
                    yt_converted_path=ypath_join(self.Context.current_converted_dir, collection),
                    sort_by=self._sort_by(collection),
                )
                task.enqueue()
                LOGGER.info('Schedule task %s', task.id)
                tasks.append(task.id)
            self.Context.child_task_ids = tasks
            self.Context.save()
            raise sdk2.WaitTask(
                tasks,
                list(Status.Group.BREAK + Status.Group.FINISH),
                wait_all=True,
            )
        if self.memoize_stage.second_step:
            LOGGER.info('Second step')
            bad_tasks = [
                task_id
                for task_id in self.Context.child_task_ids
                if sdk2.Task[task_id].status != Status.SUCCESS
            ]
            if bad_tasks:
                raise TaskFailure('Subtasks has been failed: {0}'.format(bad_tasks))
            if self.Parameters.create_latest_link:
                self._create_latest_link(yt_client, self._dump_directory, self.Context.current_dump_dir)
                self._create_latest_link(yt_client, self._converted_directory, self.Context.current_converted_dir)
            self._report_lag(
                self.make_signal_name(
                    self.Parameters.database,
                ),
            )

    def _create_latest_link(self, yt_client, base_path, current_path):
        from yt.wrapper import ypath_join, YtHttpResponseError

        link_path = ypath_join(base_path, LATEST)
        latest_path = None
        try:
            latest_path = yt_client.get(ypath_join(link_path, '@path'))
        except YtHttpResponseError as e:
            if not e.is_resolve_error():
                raise
        if latest_path is None or current_path > latest_path:
            yt_client.link(
                link_path=link_path,
                target_path=current_path,
                recursive=True,
                force=True,
            )

    @staticmethod
    def make_signal_name(database):
        database = database[1:].replace('/', '_')
        return 'collections_ydb_backup_planner_{}'.format(
            database,
        )


__TASK__ = CollectionsDumpYdbToYtPlanner
