# -*- coding: UTF-8 -*-

import csv
from datetime import datetime
from dateutil import tz
import json
import logging
import urllib

from sandbox import sdk2
import sandbox.common.errors as ce
from sandbox.projects.kikimr.resources import YdbBackupData
from sandbox.sdk2.service_resources import SandboxTasksBinary


class YdbRestoreToYt(sdk2.Task):
    """Restore previously saved YDB backup to YT"""

    class Parameters(sdk2.Task.Parameters):

        with sdk2.parameters.RadioGroup("YdbRestoreToYt binary type") as release_type:
            release_type.values.stable = release_type.Value("stable", default=True)
            release_type.values.test = release_type.Value("test")

        backup_resource = sdk2.parameters.Resource(
            "Select resource with backup",
            resource_type=YdbBackupData,
            multiple=False,
            required=True,
        )

        tables_to_ignore = sdk2.parameters.List(
            "List of tables to ignore", default=[],
        )

        with sdk2.parameters.Group("Restore parameters") as config_group:
            proxy = sdk2.parameters.String(
                "YT proxy (cluster)",
                default="hahn",
                required=True,
            )
            root = sdk2.parameters.String(
                "Root folder",
                description="available placeholders: "
                            "%backup_date% = backup date in format YYYY-MM-DD; "
                            "%now% = current timestamp; "
                            "%backup_time% = backup creation timestamp",
                required=True,
            )
            yt_token = sdk2.parameters.Vault(
                "YT token from Vault",
                description='"name" or "owner:name"',
                required=True,
            )

    def on_save(self):
        attrs = {
            "target": "sandbox/projects/kikimr/tasks/YdbRestoreToYt",
            "release": self.Parameters.release_type or "stable"
        }
        res = SandboxTasksBinary.find(attrs=attrs).first()
        if res is not None:
            self.Requirements.tasks_resource = res.id
        else:
            raise ce.ResourceNotFound("Can't find binary for %(type)s task (%(res)s with attrs: %(attrs)s)" % {
                "type": self.type.name,
                "res": SandboxTasksBinary.name,
                "attrs": attrs
            })

    def on_execute(self):
        from yt import wrapper as yt, yson
        from google.protobuf import text_format
        from kikimr.public.api.protos import ydb_table_pb2, ydb_value_pb2

        csv_parsers = {
            "boolean": lambda val: None if val == "null" else val == "1",
            "double": lambda val: None if val == "null" else yson.YsonDouble(val),
            "int64": lambda val: None if val == "null" else yson.YsonInt64(val),
            "uint64": lambda val: None if val == "null" else yson.YsonUint64(val),
            "string": lambda val: None if val == "null" else urllib.unquote_plus(val),
            "utf8": lambda val: None if val == "null" else urllib.unquote_plus(val),
            "json": lambda val: None if val == "null" else json.loads(urllib.unquote_plus(val)),
            "yson": lambda val: None if val == "null" else yson.loads(urllib.unquote_plus(val)),
        }

        type_mapper = {
            ydb_value_pb2.Type.BOOL: "boolean",
            ydb_value_pb2.Type.INT8: "int64",
            ydb_value_pb2.Type.INT16: "int64",
            ydb_value_pb2.Type.INT32: "int64",
            ydb_value_pb2.Type.INT64: "int64",
            ydb_value_pb2.Type.UINT8: "uint64",
            ydb_value_pb2.Type.UINT16: "uint64",
            ydb_value_pb2.Type.UINT32: "uint64",
            ydb_value_pb2.Type.UINT64: "uint64",
            ydb_value_pb2.Type.FLOAT: "double",
            ydb_value_pb2.Type.DOUBLE: "double",
            ydb_value_pb2.Type.DATE: "string",
            ydb_value_pb2.Type.DATETIME: "string",
            ydb_value_pb2.Type.TIMESTAMP: "string",
            ydb_value_pb2.Type.INTERVAL: "string",
            ydb_value_pb2.Type.TZ_DATE: "string",
            ydb_value_pb2.Type.TZ_DATETIME: "string",
            ydb_value_pb2.Type.TZ_TIMESTAMP: "string",
            ydb_value_pb2.Type.STRING: "string",
            ydb_value_pb2.Type.UTF8: "utf8",
            ydb_value_pb2.Type.YSON: "any",
            ydb_value_pb2.Type.JSON: "any",
            ydb_value_pb2.Type.UUID: "string",
        }

        type_parsers = {
            ydb_value_pb2.Type.YSON: csv_parsers["yson"],
            ydb_value_pb2.Type.JSON: csv_parsers["json"],
        }
        for type in type_mapper:
            if type not in type_parsers:
                type_parsers[type] = csv_parsers[type_mapper[type]]

        yt.config.set_proxy(self.Parameters.proxy)
        yt.config["token"] = self.Parameters.yt_token.data()

        backup_data = sdk2.ResourceData(self.Parameters.backup_resource)
        backup_time = self.Parameters.backup_resource.created.astimezone(tz.tzutc()).replace(tzinfo=None)

        ROOT = yt.YPath(
            self.Parameters.root
                .replace("%now%", datetime.utcnow().strftime("%Y%m%dT%H%M%S"))
                .replace("%backup_time%", backup_time.strftime("%Y%m%dT%H%M%S"))
                .replace("%backup_date%", backup_time.strftime("%Y-%m-%d"))
        )

        logging.info("1. Remove old YT directory %s" % ROOT)
        yt.remove(ROOT, force=True, recursive=True)

        logging.info("2. Search for data in backup")
        logging.info("  Will ignore tables [%s]" % " ".join(self.Parameters.tables_to_ignore))
        for table_schema_path in backup_data.path.glob("**/scheme.pb"):
            table_path = table_schema_path.parent.relative_to(backup_data.path)
            logging.info("  Found %s" % table_schema_path.relative_to(backup_data.path))

            if not table_schema_path.is_file():
                logging.warn("    It's not file, skip")
                continue

            if str(table_path) in self.Parameters.tables_to_ignore:
                logging.info("    It's in ignored tables, skip")
                continue

            logging.info("    1. Read scheme")
            schema = ydb_table_pb2.CreateTableRequest()
            text_format.Parse(table_schema_path.read_text(), schema)

            yt_table_name = ROOT.join(str(table_path))
            yt_table_schema = []
            csv_fieldnames = []
            column_parsers = {}

            for column in schema.columns:
                yt_table_schema.append({
                    "name": column.name,
                    "type": type_mapper[column.type.optional_type.item.type_id]
                })
                csv_fieldnames.append(column.name)
                column_parsers[column.name] = type_parsers[column.type.optional_type.item.type_id]

            logging.info("    2. Create table in YT")
            yt.create("table", yt_table_name, recursive=True)
            yt.alter_table(yt_table_name, schema=yt_table_schema, dynamic=False)

            logging.info("    3. Search for data_*.csv in %s" % table_path)
            table_data = []
            for data_path in table_schema_path.parent.glob("data_*.csv"):
                logging.info("      Found %s" % data_path.relative_to(backup_data.path))

                if not data_path.is_file():
                    logging.warn("        It's not file, skip")
                    continue

                logging.info("        Collecting data")
                with data_path.open("r") as csv_file:
                    for record in csv.DictReader(csv_file, fieldnames=csv_fieldnames):
                        parsed_row = {}
                        for column_name in column_parsers:
                            parsed_row[column_name] = column_parsers[column_name](record[column_name])
                        table_data.append(parsed_row)

            logging.info("    4. Writing collected data to YT table")
            yt.write_table(yt_table_name, table_data, raw=False)

        logging.info(
            "Restored data is available now at https://yt.yandex-team.ru/%(proxy)s/navigation?path=%(path)s" % {
                "proxy": self.Parameters.proxy,
                "path": ROOT
            }
        )
