# -*- coding: UTF-8 -*-

import sandbox.common.errors as ce
from sandbox.projects.common import binary_task
from sandbox.projects.metrika.utils import CommonParameters
from sandbox.projects.metrika.utils.base_metrika_task import BaseMetrikaTask, with_parents

from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox import sdk2
from sandbox.projects.kikimr.resources import YdbCliBinary
from sandbox.sdk2.helpers import subprocess as sp, ProcessLog
from sandbox.projects.metrika.utils.mixins.juggler_reporter import JugglerReporterMixin

import os
import json
import logging
import time
from datetime import datetime


@with_parents
class MetrikaVisitsYdbToYtBackup(BaseMetrikaTask, JugglerReporterMixin):
    PROGRESS_DONE = "PROGRESS_DONE"
    SUCCESS = "SUCCESS"
    ydb_cli = None
    yt_token = None
    ydb_token = None

    class Requirements(BaseMetrikaTask.Requirements):
        environments = (PipEnvironment('yandex-yt'), PipEnvironment('yandex-yt-yson-bindings-skynet'))

    class Parameters(CommonParameters):
        with sdk2.parameters.Group("Step 1. YT Prepare") as yt_prepare:
            yt_token = sdk2.parameters.Vault(
                "YT token from Vault",
                description='"name" or "owner:name"',
                required=True,
            )

            proxy = sdk2.parameters.String(
                "YT proxy (cluster)",
                default="hahn",
                required=True,
            )

            destination_path = sdk2.parameters.String(
                "Path to the dir where backup dir will be saved",
                description="There must not be a slash in the end of destination!"
                            " In mentioned directory the new directory with name"
                            " represented by timestamp as name will be created",
                default_value="//home/metrika/disaev/ydb_backup",
                required=True,
            )

            min_backup_count = sdk2.parameters.Integer(
                "Minimum count of backup dirs in YT destination path",
                default=1,
                required=False,
            )

        with sdk2.parameters.Group("Step 2. YDB Prepare") as ydb_prepare:
            ydb_token = sdk2.parameters.Vault(
                "YDB token from Vault",
                description='"name" or "owner:name"',
                required=True,
            )

            ydb_endpoint = sdk2.parameters.String(
                "YDB endpoint",
                description="host:port",
                default_value="ydb-ru-prestable.yandex.net:2135",
                required=True,
            )

            ydb_database = sdk2.parameters.String(
                "YDB database name",
                default_value="/ru-prestable/metrika/testing/visits",
                required=True,
            )

            ydb_tables = sdk2.parameters.List(
                "Path to a table in ydb to be backed up. Can end with a wildcard *",
                description="Path to a table in ydb, can end with a *",
                required=True,
            )

            check_interval_time = sdk2.parameters.Integer(
                "Check progress status interval time (sec.)",
                default=10,
                required=False,
            )

        with sdk2.parameters.Group("Step 3. Juggler Notifications") as juggler_prepare:
            juggler_host_name = sdk2.parameters.String(
                "juggler-host (for schedulers)",
                default="metrika-sandbox"
            )
            juggler_service_name = sdk2.parameters.String(
                "juggler-service (for schedulers)",
                default="metrika-visits4d-ydb-to-yt-backup"
            )

        _binary = binary_task.binary_release_parameters_list(stable=True)

    def on_prepare(self):
        if len(self.Parameters.juggler_host_name) > 0:
            self.juggler_host = self.Parameters.juggler_host_name
        if len(self.Parameters.juggler_service_name) > 0:
            self.juggler_service = self.Parameters.juggler_service_name

    def get_common_cmd_part(self):
        args = [self.ydb_cli]
        args += ["--endpoint={}".format(self.Parameters.ydb_endpoint)]
        args += ["--database={}".format(self.Parameters.ydb_database)]
        return args

    def get_ydb_cli(self):
        ydb_cli_resource = YdbCliBinary.find(
            attrs=dict(released="stable", platform="linux")
        ).first()

        if ydb_cli_resource is None:
            raise ce.TaskError("Cannot find {} resource".format(YdbCliBinary.name))
        self.ydb_cli = str(sdk2.ResourceData(ydb_cli_resource).path)

        postfix = ".tgz"
        if self.ydb_cli.endswith(postfix):
            with ProcessLog(self, logger="ydb_cli_unpack") as pl:
                work_dir = os.getcwd()
                sp.check_call(["tar", "-zxf", self.ydb_cli], shell=False, stdout=pl.stdout, stderr=pl.stderr, cwd=work_dir)
                pl.logger.info("Data after extraction : {}".format(os.listdir(work_dir)))
                self.ydb_cli = os.path.join(work_dir, os.path.basename(self.ydb_cli)[:-len(postfix)])

    def check_operation_status(self, parsed_output):
        status = parsed_output["status"]
        if status != self.SUCCESS:
            raise ce.TaskError("Output status on backup starting isn't SUCCESS. Current status: {}. Output: {}".format(status, parsed_output))

    def prepare_yt_target_directory(self):
        logging.info("YT target directory preparing started.")

        from yt import wrapper as yt
        yt.config.set_proxy(self.Parameters.proxy)
        yt.config["token"] = self.yt_token

        current_timestamp = str(int(time.mktime(datetime.now().timetuple())))
        self.Context.yt_backup_path = os.path.join(self.Parameters.destination_path, current_timestamp)

        logging.info("Check data for deletion in YT directory {}".format(self.Parameters.destination_path))

        all_files_list = yt.list(self.Parameters.destination_path)
        logging.info("List of all files in YT directory {}: {}".format(self.Parameters.destination_path, all_files_list))

        if current_timestamp in all_files_list:
            raise ce.TaskError("File with name {} is already exists in {} YT directory.".format(current_timestamp, self.Parameters.destination_path))

        backup_list = []
        for node in all_files_list:
            node_path = os.path.join(self.Parameters.destination_path, node)
            if yt.get_attribute(node_path, "type") == "map_node":
                backup_list.append(node_path)
        backup_list.sort()
        logging.info("List of backup dirs in YT directory {}: {}".format(self.Parameters.destination_path, backup_list))

        if len(backup_list) > self.Parameters.min_backup_count:
            for backup in backup_list[:len(backup_list) - self.Parameters.min_backup_count]:
                logging.info("Backup dir {} is old and will be deleted".format(backup))
                yt.remove(backup, recursive=True)

        logging.info("Create {} directory in YT".format(self.Context.yt_backup_path))
        yt.create("map_node", self.Context.yt_backup_path)

    def _args_for_tables(self, tables):
        args = []
        for table in tables:
            destination = os.path.join(self.Context.yt_backup_path, os.path.basename(table))
            args += ["--item=source={},destination={}".format(table, destination)]
        return args

    def start_backup(self):
        logging.info("YDB backup initializing.")

        args = self.get_common_cmd_part()
        args += ["export"]
        args += ["yt"]
        args += ["--proxy={}".format(self.Parameters.proxy)]
        args += ["--token={}".format(self.yt_token)]

        if any(x.endswith('*') for x in self.Parameters.ydb_tables):
            import ydb
            driver_config = ydb.DriverConfig(
                endpoint=self.Parameters.ydb_endpoint,
                database=self.Parameters.ydb_database,
                auth_token=str(self.ydb_token)
            )
            with ydb.Driver(driver_config) as driver:
                driver.wait(timeout=5, fail_fast=True)
                for path in self.Parameters.ydb_tables:
                    if path.endswith('*'):
                        folder, wildcarded_name = os.path.split(path)
                        prefix = wildcarded_name[:-1]
                        directory = driver.scheme_client.list_directory(folder)
                        tables = (os.path.join(folder, table.name) for table in directory.children if table.name.startswith(prefix))
                        args += self._args_for_tables(tables)
                    else:
                        args += self._args_for_tables((path,))
        else:
            args += self._args_for_tables(self.Parameters.ydb_tables)

        args += ["--format=proto-json-base64"]
        logging.debug("args={}".format(args))

        output = None
        with ProcessLog(self, logger="backup_launching") as pl:
            process = sp.Popen(args, env={"YDB_TOKEN": self.ydb_token}, stdout=sp.PIPE, stderr=pl.stderr)
            output = process.communicate()[0]

        logging.info("Backup started. Script output: {}".format(output))
        parsed_output = json.loads(output)
        self.check_operation_status(parsed_output)
        self.Context.backup_id = parsed_output["id"]

    def check_progress_status(self):
        logging.info("Start waiting for {} status.".format(self.PROGRESS_DONE))

        args = self.get_common_cmd_part()
        args += ["operation"]
        args += ["get"]
        args += [self.Context.backup_id]
        args += ["--format=proto-json-base64"]
        while True:
            time.sleep(self.Parameters.check_interval_time)

            with ProcessLog(self, logger="backup_status_check") as pl:
                process = sp.Popen(args, env={"YDB_TOKEN": self.ydb_token}, stdout=sp.PIPE, stderr=pl.stderr)
                output = process.communicate()[0]
                logging.info("Ask YDB for progress status. Full output: {}".format(output))
                parsed_output = json.loads(output)
                self.check_operation_status(parsed_output)
                progress_status = parsed_output["metadata"]["progress"]
                if progress_status == self.PROGRESS_DONE:
                    logging.info("Progress status: {}. Now the copy of the data is in YT.".format(progress_status))
                    break
                else:
                    logging.info("Progress status: {}".format(progress_status))

    def forget_backuped_data(self):
        logging.info("Prepare to clear(forget) backuped data.")

        args = self.get_common_cmd_part()
        args += ["operation"]
        args += ["forget"]
        args += [self.Context.backup_id]
        with ProcessLog(self, logger="forget_backuped_data") as pl:
            sp.check_call(args, env={"YDB_TOKEN": self.ydb_token}, shell=False, stdout=pl.stdout, stderr=pl.stderr)
        logging.info("Backuped data has been forgotten.")

    def on_execute(self):
        self.yt_token = self.Parameters.yt_token.data()
        self.ydb_token = self.Parameters.ydb_token.data()
        self.prepare_yt_target_directory()
        self.get_ydb_cli()
        self.start_backup()
        self.check_progress_status()
        self.forget_backuped_data()
        logging.info("Congratulations! Backup is finished.")
