import json
import logging
import os
import time
from datetime import datetime

from sandbox import sdk2
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.projects.intranet.admins.ydbHelper import YdbHelper
import sandbox.common.errors as ce


class IntranetYdbToYtBackup(sdk2.Task):
    """Export YDB database to YT"""

    class Requirements(sdk2.Task.Requirements):
        environments = (
            PipEnvironment('yandex-yt', use_wheel=False),
            PipEnvironment('yandex-yt-yson-bindings-skynet', use_wheel=False)
        )

    class Parameters(sdk2.Parameters):

        proxy = sdk2.parameters.String(
            "YT proxy (cluster)",
            default="hahn",
            required=True,
        )

        destination_path = sdk2.parameters.String(
            "Path to the dir where backup dir will be saved",
            description="There must not be a slash in the end of destination!"
                        " In mentioned directory the new directory with name"
                        " represented by timestamp as name will be created",
            required=True,
        )

        ydb_endpoint = sdk2.parameters.String(
            "YDB endpoint",
            description="host:port",
            default_value="ydb-ru-prestable.yandex.net:2135",
            required=True,
        )

        ydb_database = sdk2.parameters.String(
            "YDB database name",
            required=True,
        )

        check_interval_time = sdk2.parameters.Integer(
            "Check progress status interval time (sec.)",
            default=300,
            required=False,
        )

        min_backup_count = sdk2.parameters.Integer(
            "Minimum guaranteed count of backup dirs in YT destination path",
            default=0,
            required=False,
        )

        yt_yav_secret_key = sdk2.parameters.String(
            "yt token name",
            default_value='robot-mb-yt',
            description="Token name to extract from YAV, "
                        "necessary to access tables on yt directly"
        )

        ydb_yav_secret_key = sdk2.parameters.String(
            "ydb token name",
            default_value='ydb.token',
            description="Token name to extract from YAV, "
                        "necessary if you want to export an environment different from PROD"
        )

        ydb_yav_secret = sdk2.parameters.YavSecret("YDB YAV secret", required=True)
        yt_yav_secret = sdk2.parameters.YavSecret("YT YAV secret", required=True)
        table_filter = sdk2.parameters.List(
            'Table filter',
            required=True,
            description="Backup only tables in this list"
        )

        folder_name_date_format = sdk2.parameters.Bool(
            "Use 'YYYY-mm-dd' format for the folder name",
            description="Otherwise, the unix timestamp is used",
            default=False,
        )

        create_latest_link = sdk2.parameters.Bool(
            "Create 'latest' link in YT folder",
            default=False,
        )

        tables_ignore_prefix = sdk2.parameters.String(
            "Prefix of ignored tables",
            description="Tables with this prefix in the name will be ignored",
            required=False
        )

    def prepare_yt_target_directory(self, yt_token, tables):
        logging.info("YT target directory preparing started.")

        from yt import wrapper as yt
        yt.config.set_proxy(self.Parameters.proxy)
        yt.config["token"] = yt_token

        backup_folder_name = datetime.now().strftime('%Y-%m-%d') if self.Parameters.folder_name_date_format \
            else '{:.0f}'.format(time.time())
        destination = self.Parameters.destination_path

        self.Context.yt_backup_path = os.path.join(destination, backup_folder_name)

        logging.info("Check data for deletion in YT directory {}".format(destination))

        all_files_list = yt.list(destination)
        logging.info("List of all files in YT directory {}: {}".format(destination, all_files_list))

        if backup_folder_name in all_files_list:
            raise ce.TaskError("File with name {} already exists in {} YT directory.".format(backup_folder_name,
                                                                                             destination))

        backup_list = []
        for node in all_files_list:
            node_path = os.path.join(destination, node)
            if yt.get_type(node_path) == "map_node":
                backup_list.append(node_path)
        backup_list.sort()
        logging.info("List of backup dirs in YT directory {}: {}".format(destination, backup_list))

        number_of_backups = self.Parameters.min_backup_count
        if len(backup_list) > number_of_backups:
            for backup in backup_list[:len(backup_list) - number_of_backups]:
                logging.info("Backup dir {} is old and will be deleted".format(backup))
                yt.remove(backup, recursive=True)

        logging.info("Create {} directory in YT".format(self.Context.yt_backup_path))
        yt.create("map_node", self.Context.yt_backup_path)
        for table in tables:
            table_dir = os.path.dirname(table)
            if table_dir:
                yt.create(
                    "map_node",
                    self.Context.yt_backup_path + '/' + table_dir,
                    recursive=True,
                    ignore_existing=True,
                )

    @staticmethod
    def __extract_tables_from_ls(data, table_filter=[], tables_ignore_prefix=""):
        # no json format available before https://st.yandex-team.ru/KIKIMR-10435
        lines = filter(lambda line: not not line, data.splitlines())
        # extract column names from table output
        column_number = {}
        import re
        idx = 0
        for col in lines[1].split('|')[1:-1]:
            m = re.search(r'\w+', col)
            column_number[m.group(0)] = idx
            idx += 1
        tables = []
        for row in lines[3:-1]:
            columns = row.split('|')[1:-1]
            path = columns[column_number['Name']].strip()
            col_type = columns[column_number['Type']].strip()
            if col_type != 'table' or path.startswith(".sys") or path.startswith(".sys_health"):
                continue
            if table_filter and path not in table_filter:
                continue
            if tables_ignore_prefix and path.startswith(tables_ignore_prefix):
                continue
            tables += [path]
        if not tables:
            raise ce.TaskError("Nothing to backup: check table filter or task parameters")
        return tables

    @staticmethod
    def __extract_export_yt_operation_id(data):
        parsed_output = json.loads(data)
        status = parsed_output["status"]
        if status != "SUCCESS":
            raise ce.TaskError(
                "Output status on backup starting isn't SUCCESS."
                "Current status: {}. Output: {}".format(status, parsed_output)
            )
        return parsed_output['id']

    @staticmethod
    def __check_progress_status(data):
        parsed_output = json.loads(data)
        progress_status = parsed_output["metadata"]["progress"]
        if progress_status == 'PROGRESS_DONE':
            logging.info("Progress status: {}. Now the copy of the data is in YT.".format(progress_status))
            return True
        else:
            logging.info("Progress status: {}".format(progress_status))
            return False

    def start_export_operation(self, yh, database, tables_to_export):
        operation_id = IntranetYdbToYtBackup.__extract_export_yt_operation_id(
            yh.start_export(
                database,
                tables_to_export,
                self.Context.yt_backup_path
            )
        )
        return operation_id

    def on_execute(self):
        from yt import wrapper as yt

        ydb_token = self.Parameters.ydb_yav_secret.data()[self.Parameters.ydb_yav_secret_key]
        yt_token = self.Parameters.yt_yav_secret.data()[self.Parameters.yt_yav_secret_key]

        database = self.Parameters.ydb_database

        yt.config.set_proxy(self.Parameters.proxy)
        yt.config["token"] = yt_token
        yh = YdbHelper(
            ydb_token,
            self.Parameters.ydb_endpoint,
            database,
            yt_token,
            self.Parameters.proxy,
            self
        )

        with self.memoize_stage.start_export:
            tables_to_export = IntranetYdbToYtBackup.__extract_tables_from_ls(yh.ls(), self.Parameters.table_filter,
                                                                              self.Parameters.tables_ignore_prefix)
            self.prepare_yt_target_directory(yt_token, tables_to_export)
            self.Context.operation_id = IntranetYdbToYtBackup.__extract_export_yt_operation_id(
                yh.start_export(
                    database,
                    tables_to_export,
                    self.Context.yt_backup_path
                )
            )
            self.Context.tables_to_export = tables_to_export

            # no reason to ask yet, there's no way export is going to finish that fast
            raise sdk2.WaitTime(self.Parameters.check_interval_time)

        with self.memoize_stage.wait_for_completion_and_forget(commit_on_entrance=False):
            if not IntranetYdbToYtBackup.__check_progress_status(yh.operation_get(self.Context.operation_id)):
                raise sdk2.WaitTime(self.Parameters.check_interval_time)

            yh.operation_forget(self.Context.operation_id)
            logging.info("Backup is finished. Will now sort tables in place")

        tables_to_export = self.Context.tables_to_export
        for table in tables_to_export:
            destination = os.path.join(self.Context.yt_backup_path, table)
            primary_keys = list(json.loads(yh.describe(table))['primary_key'])
            yt.run_sort(destination, sort_by=primary_keys)
        logging.info("Backup is done, sort operations finished.")

        if self.Parameters.create_latest_link:
            latest_link_path = os.path.join(self.Parameters.destination_path, "latest")
            yt.link(self.Context.yt_backup_path, latest_link_path, force=True)
            logging.info("Latest link created")
