# -*- coding: utf-8 -*-

import os
import logging
import uuid
from collections import defaultdict
import json

from sandbox import common, sdk2
import sandbox.sandboxsdk.environments as sdk_environments
import sandbox.common.types.resource as ctr
from sandbox.sdk2.helpers import subprocess as sp

from sandbox.projects.adv_machine.common import AdvMachineReleaseTask, process_wrapper, get_yt_config
from sandbox.projects.adv_machine.common.parameters import YTParameters
from sandbox.projects.adv_machine.common.resources import AdvMachineCmInputTablesSample, AdvMachineRobotScripts


logger = logging.getLogger(__name__)


class MakeAdvMachineCmTablesSample(AdvMachineReleaseTask):

    """Create AdvMachine ClusterMaster tables sample"""

    class Requirements(AdvMachineReleaseTask.Requirements):
        cores = 2
        disk_space = 2 * 1024  # 1Gb
        environments = [sdk_environments.PipEnvironment('yandex-yt'), ]

    class Parameters(AdvMachineReleaseTask.Parameters):
        with sdk2.parameters.Group('Input resources') as input_res_block:
            robot_scripts = sdk2.parameters.LastReleasedResource(
                'Robot Executables (with am_cm_tables)',
                resource_type=AdvMachineRobotScripts,
                state=(ctr.State.READY, ),
                required=True,
            )

        yt = YTParameters

    def on_execute(self):
        yt_tmp_dir = "//tmp/sandbox/make_cm_tables_sample/" + str(uuid.uuid4())  # TODO: replace with normal YT tmp dir

        common_path = str(sdk2.ResourceData(self.Parameters.robot_scripts).path)
        tables_tool_bin = os.path.join(common_path, "tables_tool/am_cm_tables")
        tables_description = os.path.join(common_path, "tables.json")
        cmd = [
            tables_tool_bin, 'sample',
            '-t', tables_description,
            '-p', yt_tmp_dir,
            '-d', "20190131",
            '-v',
        ]
        env = get_yt_config(self.Parameters.yt, self.author)
        env.update({'MKL_CBWR': 'COMPATIBLE'})
        with process_wrapper(self, logger='am_cm_tables') as process:
            sp.check_call(cmd, stdout=process.stdout, stderr=process.stderr, env=env)

        sample_data = sdk2.ResourceData(AdvMachineCmInputTablesSample(self, 'AdvMachine ClusterMaster input tables sample', 'tables_snapshot'))
        sample_data = str(sample_data.path)
        os.mkdir(sample_data)

        self.do_prepare(output_dir=sample_data, path=yt_tmp_dir, prefix=yt_tmp_dir, attributes_to_save=["_yql_read_udf", "_yql_read_udf_type_config"], check_sorting=False, mock=False, save_meta=False)

    def _get_st_message(self, resources, release_services):
        return 'Готов новый семпл таблиц для ClusterMaster.\nАвтор:{}. \nТаск {}'.format(
            self.author,
            common.utils.get_task_link(self.id),
        )

    def read_table(self, path, path_stripped, output_dir, data_spec, check_sorting, mock):
        import yt.wrapper as yt

        if yt.get_attribute(path, 'sorted') is False and check_sorting:
            raise Exception('{} is not sorted'.format(path))

        with open(os.path.join(output_dir, path_stripped), 'w') as output_file:
            if mock:
                rich_path = yt.TablePath(path, start_index=0, end_index=10)
            else:
                rich_path = path
            response = yt.read_table(
                rich_path,
                raw=True,
                format=yt.format.YsonFormat(),
            )
            output_file.write(response.read())

        data_spec[path_stripped]['max_row_weight'] = 64 * yt.common.MB
        if yt.get_attribute(path, 'dynamic'):
            data_spec[path_stripped]['dynamic'] = True
            data_spec[path_stripped]['mount'] = (yt.get_attribute(path, 'tablet_state') == 'mounted')
            schema = yt.get_attribute(path, 'schema')
            data_spec[path_stripped]['schema'] = {
                'attributes': schema.attributes,
                'value': list(schema)
            }
        else:
            if yt.exists(path + '/@sorted_by'):
                data_spec[path_stripped]['sorted_by'] = list(yt.get_attribute(path, 'sorted_by'))

    def recursive_read(
            self,
            path,
            strip_prefix,
            output_dir,
            data_spec,
            user_meta_attrs_to_save,
            check_sorting,
            mock,
            save_meta
    ):
        import yt.wrapper as yt
        yt.config = self.Parameters.yt

        YT_LIST_RESPONSE_LIMIT = 15000
        logging.info(path)
        node_type = yt.get_type(path)
        path_stripped = path[len(strip_prefix)+1:].lstrip('/') if path.startswith(strip_prefix) else None

        data_spec[path_stripped] = defaultdict(dict)

        if node_type == 'table':
            self.read_table(path, path_stripped, output_dir, data_spec, check_sorting, mock)
        elif node_type == 'map_node':
            if not os.path.exists(os.path.join(output_dir, path_stripped)):
                os.makedirs(os.path.join(output_dir, path_stripped))

            for node in sorted(yt.list(path, absolute=True, max_size=YT_LIST_RESPONSE_LIMIT)):
                self.recursive_read(
                    node,
                    strip_prefix,
                    output_dir,
                    data_spec,
                    user_meta_attrs_to_save,
                    check_sorting,
                    mock,
                    save_meta,
                )
        else:
            logging.error(node)
            raise Exception('Unknown node type.')

        if not data_spec[path_stripped]:
            data_spec.pop(path_stripped)

        return data_spec

    def do_prepare(self, output_dir, path, prefix, attributes_to_save, check_sorting, mock, save_meta):
        output_dir_abspath = os.path.abspath(output_dir)
        if not os.path.exists(output_dir_abspath):
            os.makedirs(output_dir_abspath)

        data_spec_dict = {}
        self.recursive_read(
            path=path,
            strip_prefix=prefix,
            data_spec=data_spec_dict,
            user_meta_attrs_to_save=attributes_to_save,
            check_sorting=check_sorting,
            mock=mock,
            output_dir=output_dir_abspath,
            save_meta=save_meta
        )

        with open(os.path.join(output_dir_abspath, 'data_spec.json'), 'w') as data_spec_file:
            json.dump(data_spec_dict, data_spec_file, indent=4, separators=(',', ': '), sort_keys=True)
