# -*- coding: utf-8 -*-

import json
import logging
import os
import os.path
import shutil

from sandbox.common.types import client
from sandbox import sdk2
from sandbox.projects import resource_types
from sandbox.projects.userdata import resources as userdata_resources


class CombineYtTables(sdk2.Task):
    """
    Replacement for old CombineMRTables task:

    Take several USERDATA_TABLES_ARCHIVE resources and create one with all the tables
    """

    class Requirements(sdk2.Task.Requirements):
        client_tags = client.Tag.LINUX_PRECISE
        required_ram = 23 << 10
        execution_space = 40000

    class Parameters(sdk2.Task.Parameters):
        with sdk2.parameters.Group("Input parameters") as input_parameters_group:
            state_resource_ids = sdk2.parameters.List(
                label="Resources with stored tables",
                value_type=sdk2.parameters.Integer,
                required=True,
                description="IDs of USERDATA_TABLES_ARCHIVE resources to merge",
            )
            metadata_resource_ids = sdk2.parameters.List(
                label="Resources with dumped metadata",
                value_type=sdk2.parameters.Integer,
                required=False,
                description="IDs of USERFEAT_YT_METADATA resources to merge",
            )
        with sdk2.parameters.Group("Misc parameters") as misc_parameters_group:
            merge_resource_attrs = sdk2.parameters.Bool(
                label="Merge resources' attributes",
                required=False,
                default_value=True
            )
        with sdk2.parameters.Group("Output parameters") as output_parameters_group:
            result_description = sdk2.parameters.String(
                label="Result description",
                default_value="<no description>",
            )
            extra_attrs = sdk2.parameters.Dict(
                label="Add attributes to resulting resource (will be formatted via task context)",
                value_type=sdk2.parameters.String,
                required=False,
            )
            drop_attrs = sdk2.parameters.List(
                label="Drop these attributes from merged",
                value_type=sdk2.parameters.String,
                required=False,
            )

    class Context(sdk2.Task.Context):
        pass

    RESOURCE_ATTRIBUTES_OF_INTEREST = [
        "base_timestamp",
        "browse_days_per_period",
        "browse_first_date",
        "browse_last_date",
        "browse_num_periods",
        "counters_first_date",
        "counters_last_date",
        "data_format_version",
        "sc_first_date",
        "sc_last_date",
        "tables_prefix",
        "yandex_first_date",
        "yandex_last_date",
    ]
    USERFEAT_META_JSON = "userfeat_meta.json"

    input_state_resources = []
    input_state_resource_data = []
    metadata_json = {}

    state_attrs = {}
    state_resource = None
    state_resource_data = None
    metadata_resource = None
    metadata_resource_data = None

    def create_resources(self):
        self.state_resource = resource_types.USERDATA_TABLES_ARCHIVE(
            task=self,
            description=self.Parameters.result_description,
            path="result_state",
            arch="any",
        )
        self.state_resource_data = sdk2.ResourceData(self.state_resource)
        self.state_resource_data.path.mkdir(0o755, parents=True, exist_ok=True)

        if self.Parameters.metadata_resource_ids:
            self.metadata_resource = userdata_resources.UserfeatYtMetadata(
                task=self,
                description=self.Parameters.result_description,
                path="result_metadata",
                arch="any",
            )
            self.metadata_resource_data = sdk2.ResourceData(self.metadata_resource)
            self.metadata_resource_data.path.mkdir(0o755, parents=True, exist_ok=True)

    def get_tables_prefix(self):
        return "sandbox/"

    def collect_input_states(self):
        for resource_id in self.Parameters.state_resource_ids:
            resource = sdk2.Resource.find(id=resource_id).first()
            if not resource:
                logging.error("Input resource #%d is not found", resource_id)
                raise Exception("Input resource #{} is not found".format(resource_id))
            data = sdk2.ResourceData(resource)
            for attr_name in self.RESOURCE_ATTRIBUTES_OF_INTEREST:
                if hasattr(resource, attr_name):
                    self.state_attrs[attr_name] = getattr(resource, attr_name)
            self.input_state_resources.append(resource)
            self.input_state_resource_data.append(data)
        logging.info("Attributes collected off the input resources: %s", str(self.state_attrs))

    def save_state(self):
        def reprefix(n, op, np):
            if np is None:
                return n
            np = np.replace('/', ':')
            return np + n[len(op):]

        for resource, data in zip(self.input_state_resources, self.input_state_resource_data):
            files = [i for i in os.listdir(str(data.path)) if i.endswith(".lenval.gz")]
            for f in files:
                target_prefix_name = reprefix(f, getattr(resource, "tables_prefix"), self.get_tables_prefix())
                # XXX should I warn/fail on duplicates?
                shutil.copyfile(
                    str(data.path.joinpath(f)),
                    str(self.state_resource_data.path.joinpath(target_prefix_name))
                )

        extra = self.Parameters.extra_attrs
        if extra and isinstance(extra, dict):
            for k, v in extra.iteritems():
                if isinstance(v, str):
                    self.state_attrs[k.format(**self.ctx)] = v.format(**self.ctx)
                else:
                    self.state_attrs[k.format(**self.ctx)] = v
        drop_attrs = self.Parameters.drop_attrs
        if drop_attrs and isinstance(drop_attrs, list):
            for k in drop_attrs:
                if k in self.state_attrs:
                    self.state_attrs.pop(k)
        self.state_attrs["tables_prefix"] = self.get_tables_prefix()
        for k, v in self.state_attrs.iteritems():
            setattr(self.state_resource, k, v)
        self.state_resource_data.ready()

    def collect_input_metadata(self):
        for resource_id in self.Parameters.metadata_resource_ids:
            resource = sdk2.Resource.find(id=resource_id).first()
            if not resource:
                logging.warning("Resource #%d is not found", resource_id)
                raise Exception("Input metadata resource #{} is not found".format(resource_id))
            data = sdk2.ResourceData(resource)
            json_path = data.path.joinpath(self.USERFEAT_META_JSON)
            with open(str(json_path)) as json_input:
                j = json.load(json_input)
            if not j:
                logging.warning("Resource #%d has no metadata, skipping it", resource_id)
                continue
            for k, v in j.iteritems():
                if k in self.metadata_json:
                    raise Exception(
                        "key {} from resource {} is already present".format(k, resource_id)
                    )
                self.metadata_json[k] = v
        logging.info("Collected metadata: %s", str(self.metadata_json))

    def save_metadata(self):
        if self.metadata_resource is None or self.metadata_resource_data is None:
            return
        self.metadata_resource_data.path.joinpath(self.USERFEAT_META_JSON).write_bytes(
            json.dumps(self.metadata_json)
        )
        self.metadata_resource_data.ready()

    def on_execute(self):
        self.create_resources()
        self.collect_input_states()
        self.collect_input_metadata()
        self.save_state()
        self.save_metadata()
