# -*- coding: UTF-8 -*-

import json
import logging
import os
import random
import string
import time
from multiprocessing import TimeoutError

import sandbox.common.errors as ce
from sandbox import sdk2
from sandbox.projects.music.deployment.helpers.MusicBaseTask import MusicBaseTask
from sandbox.projects.music.deployment.helpers.YdbHelper import YdbHelper
from sandbox.sandboxsdk.environments import PipEnvironment


class MusicExportYdbToYt(MusicBaseTask, sdk2.Task):
    """Exports all ydb user-created tables to YT cluster"""

    ALLOWED_TABLE_OWNERS = {'robot-muz-ydb-qa@staff', 'robot-music-ydb@staff'}

    class Requirements(sdk2.Task.Requirements):
        environments = (
            PipEnvironment('yandex-yt', use_wheel=False),
            PipEnvironment('yandex-yt-yson-bindings-skynet', use_wheel=False)
        )

    class Parameters(sdk2.Task.Parameters):

        proxy = sdk2.parameters.String(
            "YT proxy (cluster)",
            default="hahn",
            required=True,
        )

        destination_path = sdk2.parameters.String(
            "Path to the dir where backup dir will be saved",
            description="There must not be a slash in the end of destination!"
                        " In mentioned directory the new directory with name"
                        " represented by timestamp as name will be created",
            default_value="//home/music/noobgam/tmp",
            required=True,
        )

        ydb_endpoint = sdk2.parameters.String(
            "YDB endpoint",
            description="host:port",
            default_value="ydb-ru-prestable.yandex.net:2135",
            required=True,
        )

        ydb_database = sdk2.parameters.String(
            "YDB database name",
            default_value="/ru-prestable/musicbackend/qa/music/",
            required=True,
        )

        check_interval_time = sdk2.parameters.Integer(
            "Check progress status interval time (sec.)",
            default=300,
            required=False,
        )

        min_backup_count = sdk2.parameters.Integer(
            "Minimum guaranteed count of backup dirs in YT destination path",
            default=0,
            required=False,
        )

        yt_token_name = sdk2.parameters.String(
            "yt token name",
            default_value='yt_prod_token',
            description="Token name to extract from YAV, "
                        "necessary to access tables on yt directly"
        )

        ydb_token_name = sdk2.parameters.String(
            "ydb token name",
            default_value='ydb_prod_token',
            description="Token name to extract from YAV, "
                        "necessary if you want to export an environment different from PROD"
        )

        ydb_from_yt_token = sdk2.parameters.YavSecret(
            "YAV secret with ydb-yt migration tokens",
            default='sec-01ej69jtmv3t8g2675y81dc5bv',
            required=True
        )

    def prepare_yt_target_directory(self, yt_token, tables):
        logging.info("YT target directory preparing started.")

        from yt import wrapper as yt
        yt.config.set_proxy(self.Parameters.proxy)
        yt.config["token"] = yt_token

        current_timestamp = '{:.0f}'.format(time.time())
        random_letters = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
        dir_name = current_timestamp + '_' + random_letters
        destination = self.Parameters.destination_path

        self.Context.yt_backup_path = os.path.join(destination, dir_name)

        logging.info("Check data for deletion in YT directory {}".format(destination))

        all_files_list = yt.list(destination)
        logging.info("List of all files in YT directory {}: {}".format(destination, all_files_list))

        if current_timestamp in all_files_list:
            raise ce.TaskError("File with name {} already exists in {} YT directory.".format(current_timestamp,
                                                                                             destination))

        backup_list = []
        for node in all_files_list:
            node_path = os.path.join(destination, node)
            if yt.get_type(node_path) == "map_node":
                backup_list.append(node_path)
        backup_list.sort()
        logging.info("List of backup dirs in YT directory {}: {}".format(destination, backup_list))

        number_of_backups = self.Parameters.min_backup_count
        if len(backup_list) > number_of_backups:
            for backup in backup_list[:len(backup_list) - number_of_backups]:
                logging.info("Backup dir {} is old and will be deleted".format(backup))
                yt.remove(backup, recursive=True)

        logging.info("Create {} directory in YT".format(self.Context.yt_backup_path))
        yt.create("map_node", self.Context.yt_backup_path)
        for table in tables:
            table_dir = os.path.dirname(table)
            if table_dir:
                yt.create(
                    "map_node",
                    self.Context.yt_backup_path + '/' + table_dir,
                    recursive=True,
                    ignore_existing=True,
                )

    @staticmethod
    def __extract_tables_from_ls(data):
        # no json format available before https://st.yandex-team.ru/KIKIMR-10435
        lines = filter(lambda line: not not line, data.splitlines())
        # extract column names from table output
        column_number = {}
        import re
        idx = 0
        for col in lines[1].split('|')[1:-1]:
            m = re.search('\w+', col)
            column_number[m.group(0)] = idx
            idx += 1
        tables = []
        for row in lines[3:-1]:
            columns = row.split('|')[1:-1]
            owner = columns[column_number['Owner']].strip()
            path = columns[column_number['Name']].strip()
            col_type = columns[column_number['Type']].strip()
            if col_type != 'table' or owner not in MusicExportYdbToYt.ALLOWED_TABLE_OWNERS:
                continue
            tables += [path]
        return tables

    @staticmethod
    def __extract_export_yt_operation_id(data):
        parsed_output = json.loads(data)
        status = parsed_output["status"]
        if status != "SUCCESS":
            raise ce.TaskError(
                "Output status on backup starting isn't SUCCESS."
                "Current status: {}. Output: {}".format(status, parsed_output)
            )
        return parsed_output['id']

    @staticmethod
    def __check_progress_status(data):
        parsed_output = json.loads(data)
        progress_status = parsed_output["metadata"]["progress"]
        if progress_status == 'PROGRESS_DONE':
            logging.info("Progress status: {}. Now the copy of the data is in YT.".format(progress_status))
            return True
        else:
            logging.info("Progress status: {}".format(progress_status))
            return False

    def start_export_operation(self, yh, database, tables_to_export):
        operation_id = MusicExportYdbToYt.__extract_export_yt_operation_id(
            yh.start_export(
                database,
                tables_to_export,
                self.Context.yt_backup_path
            )
        )
        return operation_id

    def on_execute(self):
        from yt import wrapper as yt

        yt_token = self.Parameters.ydb_from_yt_token.data()[self.Parameters.yt_token_name]
        ydb_token = self.Parameters.ydb_from_yt_token.data()[self.Parameters.ydb_token_name]

        database = self.Parameters.ydb_database

        yt.config.set_proxy(self.Parameters.proxy)
        yt.config["token"] = yt_token
        yh = YdbHelper(
            ydb_token,
            self.Parameters.ydb_endpoint,
            database,
            yt_token,
            self.Parameters.proxy,
            self
        )

        with self.memoize_stage.start_export:
            tables_to_export = MusicExportYdbToYt.__extract_tables_from_ls(yh.ls())
            self.prepare_yt_target_directory(yt_token, tables_to_export)
            self.Context.operation_id = MusicExportYdbToYt.__extract_export_yt_operation_id(
                yh.start_export(
                    database,
                    tables_to_export,
                    self.Context.yt_backup_path
                )
            )
            self.Context.tables_to_export = tables_to_export

            # no reason to ask yet, there's no way export is going to finish that fast
            raise sdk2.WaitTime(self.Parameters.check_interval_time)

        if not self.Context.operation_done:
            self.Context.operation_done = MusicExportYdbToYt.__check_progress_status(
                yh.operation_get(self.Context.operation_id))
            if not self.Context.operation_done:
                raise sdk2.WaitTime(self.Parameters.check_interval_time)
            yh.operation_forget(self.Context.operation_id)
            logging.info("Backup is finished. Will now sort tables in place")

        tables_to_export = self.Context.tables_to_export
        tables_done = self.Context.tables_done or dict()
        tables_not_done = self.Context.tables_not_done or dict()
        for table in tables_to_export:
            if table not in tables_done and table not in tables_not_done:
                destination = os.path.join(self.Context.yt_backup_path, table)
                primary_keys = list(json.loads(yh.describe(table))['primary_key'])
                op = yt.run_sort(destination, sort_by=primary_keys, sync=False)
                tables_not_done[table] = op.id
            elif table in tables_not_done:
                op = yt.operations_tracker.Operation(tables_not_done[table])
                try:
                    op_state = yt.get_operation_state(op.id)
                    if op_state.is_finished():
                        if op_state.is_unsuccessfully_finished():
                            raise ce.TaskError('Operation {} failed', op.id)
                        tables_done[table] = op.id
                        del tables_not_done[table]
                except TimeoutError:
                    logging.info("Operation {} is still running".format(op.id))
        self.Context.tables_not_done = tables_not_done
        self.Context.tables_done = tables_done
        if not not tables_not_done:
            raise sdk2.WaitTime(self.Parameters.check_interval_time)

        logging.info("Backup is done, sort operations finished.")
