# coding=utf-8
import logging
import os

from sandbox import sdk2
from sandbox.common.errors import TaskFailure
from sandbox.common.utils import singleton_property
from sandbox.projects.common import binary_task
from sandbox.projects.metrika.utils import CommonParameters
from sandbox.projects.metrika.utils.base_metrika_task import with_parents, BaseMetrikaTask


@with_parents
class UploadCypressDump(BaseMetrikaTask):
    """
    Загружает данные в YT. Полезно для загрузки в кластер для дальнейшего анализа дампа из yt-local.
    """

    class Parameters(CommonParameters):
        with sdk2.parameters.Group("Назначение") as destination:
            yt_cluster = sdk2.parameters.String("YT cluster name", default="hahn", required=True)
            destination_path = sdk2.parameters.String("Базовый каталог назначения", required=True, default="//home/metrika/lambda-test-data")
            create_subdir = sdk2.parameters.Bool("Cоздать подкаталог с идентификатором задачи", required=True, default=True)
            cypress_resource = sdk2.parameters.Resource("Ресурс с данными", required=True,
                                                        description="Должен содержать каталог с файлами, см. https://wiki.yandex-team.ru/yt/userdoc/localmode/#lokalnyjjkiparis")
            cypress_root_dir = sdk2.parameters.String("Путь в ресурсе до корневого каталога Кипариса", required=True, default=".")
        with sdk2.parameters.Group("Секреты") as secrets:
            yt_token = sdk2.parameters.Vault("YT-токен", required=True, default="METRIKA:robot-metrika-test-yt")

        _binary = binary_task.binary_release_parameters_list(stable=True)

    @singleton_property
    def yt_client(self):
        from yt.wrapper import YtClient
        return YtClient(proxy=self.Parameters.yt_cluster, token=self.Parameters.yt_token.data())

    def on_execute(self):
        """
        1. Синхронизируем ресурс.
        2. Сканируем его на предмет файлов *.meta от указанного корня, составляем список - нужно для прогресса
        3. Создаём каталог/подкаталог назначения, если применимо - выставляем атрибуты из корневого файла .meta
        3. В цикле с прогрессом и memoize_stage загружаем
        """
        from yt.wrapper import ypath_join

        with sdk2.helpers.ProgressMeter("Загрузка ресурса с дампом"):
            root_dir = sdk2.ResourceData(self.Parameters.cypress_resource).path / self.Parameters.cypress_root_dir

        if not root_dir.exists():
            raise TaskFailure("{} doesn't exists".format(root_dir.as_posix()))

        with self.memoize_stage.create_root_dir(commit_on_entrance=False):
            self.yt_client.create(path=self.destination_path, type="map_node", recursive=True, attributes=UploadCypressDump.read_attributes_for_dir(root_dir))
            self.set_info('Каталог для загрузки данных: <a href="https://yt.yandex-team.ru/{db}/navigation?path={dst}">{dst}</a>'.format(
                db=self.Parameters.yt_cluster, dst=self.destination_path), do_escape=False)

        workload = []
        logging.info("====> Start scanning resource")

        for root, dirs, files in os.walk(root_dir.as_posix()):
            for d_s in dirs:
                d = sdk2.Path(root) / d_s
                logging.debug("Directory: {}".format(d.as_posix()))
                workload.append(d)
            for f_s in files:
                f = sdk2.Path(root) / f_s
                if f.suffix == '.meta':
                    logging.debug("Metafile: {}".format(f.as_posix()))
                    workload.append(f)

        logging.info("====> Finish scanning resource. Total workload {}".format(len(workload)))

        with sdk2.helpers.ProgressMeter("Загрузка в YT", maxval=len(workload)) as progress:
            for item in workload:
                if item.is_dir():
                    self.create_map_node(item, ypath_join(self.destination_path, item.relative_to(root_dir).as_posix()))
                    progress.add(1)
                elif item.is_file() and item.suffix == '.meta':
                    self.create_table_node(item, ypath_join(self.destination_path, item.with_suffix('').relative_to(root_dir).as_posix()))
                    progress.add(1)

    def create_map_node(self, local_dir, remote_path):
        with self.memoize_stage[remote_path](commit_on_entrance=False):
            logging.info("Create directory {}".format(remote_path))
            self.yt_client.create(path=remote_path, type="map_node", attributes=UploadCypressDump.read_attributes_for_dir(local_dir), ignore_existing=True)

    def create_table_node(self, meta_file, remote_path):
        from yt.wrapper import yson
        with self.memoize_stage[remote_path](commit_on_entrance=False):
            logging.info("Create table {}".format(remote_path))
            data_file = meta_file.with_suffix('')
            if data_file.is_file():
                with meta_file.open("rb") as f:
                    try:
                        meta = yson.load(f)
                    except yson.YsonError:
                        logging.exception("Failed to load meta file {0}, skipping".format(meta_file.as_posix()))
                        return

                    meta_type = meta["type"]

                    if meta_type != "table":
                        logging.warning("Found metafile {0} with currently unsupported type {1}, skipping".format(meta_file.as_posix(), meta_type))
                        return

                    if "format" not in meta:
                        logging.warning("Found metafile {0} with unspecified format, skipping".format(meta_file.as_posix()))
                        return

                    attributes = meta.get("attributes", {})
                    sorted_by = attributes.pop("sorted_by", [])

                    with self.yt_client.Transaction():
                        self.yt_client.create(path=remote_path, type="table", attributes=attributes)
                        with data_file.open("rb") as table_file:
                            try:
                                self.yt_client.write_table(remote_path, table_file, format=meta["format"], raw=True)
                            except yson.YsonError:
                                logging.warning("Probably bad written yson, try to fix.", exc_info=True)
                                self.write_fixed_table(remote_path, data_file)
                        if sorted_by:
                            self.yt_client.run_sort(remote_path, sort_by=sorted_by)

            else:
                logging.warning("Datafile for metafile {0} not found, skipping".format(meta_file.as_posix()))

    def write_fixed_table(self, remote_path, data_file):
        from yt.wrapper import yson
        with data_file.open("rb") as table_file:
            self.yt_client.write_table(remote_path, [yson.loads(s) for s in table_file], raw=False)

    @staticmethod
    def read_attributes_for_dir(local_path):
        from yt.wrapper import yson
        meta_file = local_path / '.meta'
        if meta_file.is_file():
            with meta_file.open(mode="rb") as f:
                try:
                    meta = yson.load(f)
                except yson.YsonError:
                    logging.exception("Failed to load meta file {0}, meta will not be processed".format(meta_file.as_posix()))
                    return {}
                return meta.get("attributes", {})
        else:
            return {}

    @property
    def destination_path(self):
        from yt.wrapper import ypath_join
        if self.Parameters.create_subdir:
            return ypath_join(self.Parameters.destination_path, str(self.id))
        else:
            return str(self.Parameters.destination_path)
