import logging
import json
import os
import time
from functools import wraps

from sandbox import sdk2
from sandbox.common.types.task import Semaphores
from sandbox.projects.collections.resources import CollectionsYdbCli, CollectionsYdbDumpConverter
from sandbox.projects.common import utils
from sandbox.sandboxsdk import process, environments


YDB_RETRIES_TIMEOUT = 15  # in secs
YDB_NATIVE_RETRIES = 1000
ERROR_RETRIES_COUNT = 5
DUMP_DESCRIPTION_LIMIT = 120

LOGGER = logging.getLogger(__name__)


class CollectionsDumpYdbToYt(sdk2.Task):
    """
    Worker for CollectionsDumpYDBToYtPlanner
    It should dump table from YDB and convert it in YT to old Mongo format
    """
    class Requirements(sdk2.Task.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt', version='0.9.26'),
        ]
        semaphores = Semaphores(
            acquires=[
                Semaphores.Acquire(name='COLLECTIONS_YDB_DUMP_AND_CONVERT'),
            ],
        )

    class Parameters(sdk2.Task.Parameters):
        with sdk2.parameters.Group('YT parameters') as yt_parameters:
            yt_proxy = sdk2.parameters.String(
                'YT proxy',
                required=True,
            )
            yt_token_vault = sdk2.parameters.String(
                'YT token vault',
                required=True,
            )
            yt_dump_path = sdk2.parameters.String(
                'Path to dump in YT',
                required=True,
            )
            yt_converted_path = sdk2.parameters.String(
                'Path to dump in YT in mongo format',
                required=True,
            )
        with sdk2.parameters.Group('YDB parameters') as ydb_parameters:
            ydb_token_vault = sdk2.parameters.String(
                'YDB token vault',
                required=True,
            )
            endpoint = sdk2.parameters.String(
                'YDB endpoint',
                required=True,
            )
            database = sdk2.parameters.String(
                'YDB database name',
                required=True,
            )
            collection = sdk2.parameters.String(
                'YDB table name',
                required=True,
            )
        with sdk2.parameters.Group('Postprocessing params') as postprocessing_params:
            sort_by = sdk2.parameters.List(
                'Sort by fields after dump',
            )
        kill_timeout = 43200  # 12 hours

    @property
    def ydb_token(self):
        return sdk2.Vault.data(self.owner, self.Parameters.ydb_token_vault)

    @property
    def yt_token(self):
        return sdk2.Vault.data(self.owner, self.Parameters.yt_token_vault)

    @property
    def ydb_tool_path(self):
        return self._get_tool_path(CollectionsYdbCli)

    def on_execute(self):
        converter_tool_path = self._get_tool_path(CollectionsYdbDumpConverter)

        self._dump_ydb()
        self._convert_dump(converter_tool_path, self.yt_token)

        if self.Parameters.sort_by:
            self._sort_dump(
                token=self.yt_token,
                sort_by=self.Parameters.sort_by,
            )

    def on_failure(self, prev_status):
        self._forget()

    def on_terminate(self):
        self._forget()

    def on_timeout(self, prev_status):
        self._forget()

    def on_success(self, prev_status):
        self._forget()

    def _get_tool_path(self, resource_class):
        resource_id = utils.get_and_check_last_released_resource_id(resource_class)
        LOGGER.info('Found last released resource %s', resource_id)
        tool_path = str(sdk2.ResourceData(sdk2.Resource[resource_id]).path)
        LOGGER.info('Found tool\'s path: %s', tool_path)
        return tool_path

    def _dump_ydb(self):
        dumper = self._get_dumper()
        operation_id = dumper.dump(
            yt_proxy=self.Parameters.yt_proxy,
            yt_table=self.Parameters.yt_dump_path,
            collection=self.Parameters.collection,
        )
        self.Context.operation_id = operation_id
        self.Context.save()
        operation_status = dumper.check(operation_id)
        LOGGER.info('Got operation_status %s', operation_status)
        while operation_status == dumper.CONTINUE:
            time.sleep(YDB_RETRIES_TIMEOUT)
            operation_status = dumper.check(operation_id)
            LOGGER.info('Got operation_status %s', operation_status)
        if operation_status != dumper.SUCCESS:
            raise YDBDumperError('YDB dumper fails with bad status: {0}'.format(operation_status))

    def _convert_dump(self, tool_path, token):
        args = [
            tool_path,
            '--yt-proxy', self.Parameters.yt_proxy,
            '--input-table', self.Parameters.yt_dump_path,
            '--output-table', self.Parameters.yt_converted_path,
            '--collection', self.Parameters.collection,
        ]
        LOGGER.info('Run command %s', args)
        process.run_process(
            args,
            log_prefix='convertion',
            environment={
                'YT_TOKEN': token,
            },
            wait=True,
        )
        LOGGER.info('Finish command %s', args)

    def _sort_dump(self, token, sort_by):
        import yt.wrapper as yt
        from yt.wrapper import YtClient
        client = YtClient(
            proxy=self.Parameters.yt_proxy,
            token=token,
        )
        yt.run_sort(
            self.Parameters.yt_dump_path,
            sort_by=sort_by,
            client=client,
        )

    def _get_dumper(self):
        return YDBDumper(
            endpoint=self.Parameters.endpoint,
            database=self.Parameters.database,
            yt_token=self.yt_token,
            ydb_token=self.ydb_token,
            path=self.ydb_tool_path,
        )

    def _forget(self):
        if self.Context.operation_id:
            dumper = self._get_dumper()
            dumper.forget(self.Context.operation_id)


class YDBDumperError(Exception):
    pass


def retry(foo):
    @wraps(foo)
    def _wrapper(*args, **kwargs):
        for i in range(ERROR_RETRIES_COUNT - 1):
            try:
                return foo(*args, **kwargs)
            except YDBDumperError:
                LOGGER.exception(
                    'Something went wrong, retry %s from %s with timeout %s',
                    i,
                    ERROR_RETRIES_COUNT,
                    YDB_RETRIES_TIMEOUT,
                )
                time.sleep(YDB_RETRIES_TIMEOUT)
        # we will raise last error if it is thrown
        return foo(*args, **kwargs)
    return _wrapper


class YDBDumper(object):
    CONTINUE = 'continue'
    FAIL = 'fail'
    SUCCESS = 'success'

    def __init__(self, path, yt_token, ydb_token, endpoint, database):
        self._yt_token = yt_token
        self._ydb_token = ydb_token
        self._path = path
        self._endpoint = endpoint
        self._database = database

    def _run_command(self, args, decode=True):
        LOGGER.info('Run command %s', args)
        p = process.run_process(
            args,
            log_prefix='dump',
            environment={
                'YT_TOKEN': self._yt_token,
                'YDB_TOKEN': self._ydb_token,
            },
            outs_to_pipe=True,
            check=False,
        )
        stdout, stderr = p.communicate()
        LOGGER.info('STDOUT: {0}'.format(stdout))
        LOGGER.info('STDERR: {0}'.format(stderr))
        if p.returncode:
            raise YDBDumperError('Dumper fail with error {0}'.format(stderr))
        if not decode:
            return stdout
        try:
            result = json.loads(stdout)
        except ValueError as e:
            raise YDBDumperError(e)
        return result

    def dump(self, yt_proxy, yt_table, collection):
        args = [
            self._path,
            '-e', self._endpoint,
            '-d', self._database,
            'export',
            'yt',
            '--proxy', yt_proxy,
            '--description', '"{collection} {table} {proxy}"'.format(
                collection=collection,
                table=yt_table,
                proxy=yt_proxy,
            )[:DUMP_DESCRIPTION_LIMIT],
            '--item', 'Source={ydb_table},Destination={yt_table}'.format(
                ydb_table=os.path.join(self._database, collection),
                yt_table=yt_table,
            ),
            '--retries', str(YDB_NATIVE_RETRIES),
            '--use-type-v3',
            '--json',
        ]
        result = self._run_command(args)
        if result['status'] != 'SUCCESS':
            raise YDBDumperError('Dumper returns non-success status: {0}'.format(result['status']))
        LOGGER.info('Complete command %s', args)
        return result['id']

    @retry
    def check(self, operation_id):
        args = [
            self._path,
            '-e', self._endpoint,
            '-d', self._database,
            'operation',
            'get',
            operation_id,
            '--json',
        ]
        result = self._run_command(args)
        if not result.get('ready', False):
            return self.CONTINUE
        if result['status'] == 'SUCCESS' and result['metadata']['progress'] == 'PROGRESS_DONE':
            return self.SUCCESS
        return self.FAIL

    def forget(self, operation_id):
        args = [
            self._path,
            '-e', self._endpoint,
            '-d', self._database,
            'operation',
            'forget',
            operation_id,
        ]
        self._run_command(args, decode=False)


__TASK__ = CollectionsDumpYdbToYt
