# -*- coding: utf-8 -*-

import logging
import os
import sandbox.sandboxsdk.task as sdk_task
import sandbox.sandboxsdk.process as sdk_process
import sandbox.sandboxsdk.parameters as sdk_parameters
import sandbox.sandboxsdk.paths as sdk_paths

import sandbox.projects.resource_types as resource_types
import sandbox.projects.common.mapreduce_runner as mapreduce_runner


class MrServerParameter(sdk_parameters.SandboxStringParameter):
    """
        Mapreduce cluster to use
    """
    name = 'mr_server'
    description = 'Mapreduce server ("local" in a locally set up MR microcluster):'
    default_value = 'local'
    required = True


class MapreduceExecutableResourceParameter(sdk_parameters.LastReleasedResource):
    name = 'mapreduce_resource_id'
    description = 'mapreduce-executable resource'
    resource_type = resource_types.MAPREDUCE_EXECUTABLE


class MetadocExecutableResourceParameter(sdk_parameters.ResourceSelector):
    name = 'metadoc_resource_id'
    description = 'metadoc-executable resource'
    resource_type = resource_types.IMAGES_MR_INDEX_METADOC


class Command(sdk_parameters.SandboxStringParameter):
    name = 'command'
    description = ''
    resource_type = resource_types.IMAGES_MR_INDEX_METADOC
    default_value = """{metadoc_bin} CreateMetadoc
--fast-index
--config-dir {config_dir}
--shard-number 0
--link-filter junk
--link-quota per_lang
--link-ranker mxnet
--job-memory-limit 4831838208
--image-group-limit 5
--server {mr_server}
--index-prefix sandbox/images
--index-state 20220102-235959
"""

# TODO: maybe use USERDATA_TABLES_ARCHIVE?


class InputImagesTableParameter(sdk_parameters.ResourceSelector):
    name = 'input_images_table_resource_id'
    description = 'inputdoc.images table resource'
    resource_type = resource_types.IMAGES_MR_TABLE


class InputImagesUrlTableParameter(sdk_parameters.ResourceSelector):
    name = 'input_images_url_table_resource_id'
    description = 'inputdoc.images.url table resource'
    resource_type = resource_types.IMAGES_MR_TABLE


class InputLinksTableParameter(sdk_parameters.ResourceSelector):
    name = 'input_links_table_resource_id'
    description = 'inputdoc.links table resource'
    resource_type = resource_types.IMAGES_MR_TABLE


class MetadocConfig(sdk_parameters.ResourceSelector):
    name = 'metadoc_config'
    description = 'metadoc config'
    resource_type = resource_types.IMAGES_MR_INDEX_CONFIG


class ImagesTestMrindexMetadoc(sdk_task.SandboxTask):
    """
        Таск для тестирования основного режима работы metadoc.
    """
    type = 'IMAGES_TEST_MRINDEX_METADOC'
    input_parameters = [MetadocExecutableResourceParameter,
                        MapreduceExecutableResourceParameter,
                        Command,
                        MrServerParameter,
                        InputImagesTableParameter,
                        InputImagesUrlTableParameter,
                        InputLinksTableParameter,
                        MetadocConfig,
                       ]
    server_runner = None

    def on_execute(self):
        try:
            self.setup_server_if_needed()
            self.run()
        finally:
            self.teardown_server()

    def run(self):
        self.metadoc_bin = self.sync_resource(self.ctx[MetadocExecutableResourceParameter.name])
        self.config_dir = self.sync_resource(self.ctx[MetadocConfig.name])

        prefix = 'sandbox/images/index/20220102-235959/'
        self.load_parameter_to_mr(InputImagesTableParameter, prefix + 'inputdoc/0/inputdoc.images')
        self.load_parameter_to_mr(InputImagesUrlTableParameter, prefix + 'inputdoc/0/inputdoc.images.url')
        self.load_parameter_to_mr(InputLinksTableParameter, prefix + 'inputdoc/0/inputdoc.links')

        cmd = self.ctx[Command.name]
        cmd = cmd.format(metadoc_bin=self.metadoc_bin, config_dir=self.config_dir, mr_server=self.mapreduce_server)
        logging.info("will run %s", cmd)
        sdk_process.run_process(cmd.split(), log_prefix='metadoc')

        self.create_resource_from_table(prefix + 'metadoc/0/erf', 'erf', 'erf table (output of metadoc CreatePortion)')
        self.create_resource_from_table(prefix + 'metadoc/0/metadoc', 'metadoc', 'metadoc table (output of metadoc CreatePortion)')

    def load_to_mr(self, file_name, table_name):
        logging.info("Uploading %s to %s/%s", file_name, self.mapreduce_server, table_name)
        sdk_process.run_process([self.mapreduce_bin,
                     '-server', self.mapreduce_server,
                     '-subkey', '-lenval',
                     '-writesorted', table_name],
                    stdin=open(file_name))

    def load_from_mr(self, file_name, table_name):
        logging.info("Downloading %s to %s/%s", file_name, self.mapreduce_server, table_name)
        sdk_process.run_process([self.mapreduce_bin,
                     '-server', self.mapreduce_server,
                     '-subkey', '-lenval',
                     '-read', table_name],
                    stdout=open(file_name, 'w'))

    def load_parameter_to_mr(self, parameter_class, table_name):
        table_file = self.sync_resource(self.ctx[parameter_class.name])
        self.load_to_mr(table_file, table_name)

    def create_resource_from_table(self, table_name, short_name, description,
                                   resource_type=resource_types.IMAGES_MR_TABLE, attributes=None):
        dir_name = 'mr_result_%s' % short_name
        sdk_paths.make_folder(dir_name, True)
        self.load_from_mr(os.path.join(dir_name, short_name), table_name)
        self.create_resource(description, dir_name, resource_type, None, attributes)

    def setup_server_if_needed(self):
        self.mapreduce_server = self.ctx[MrServerParameter.name]
        self.mapreduce_bin = self.sync_resource(self.ctx[MapreduceExecutableResourceParameter.name])

        if self.mapreduce_server == 'local':
            self.server_runner = mapreduce_runner.MapreduceRunner(self.mapreduce_bin,
                                                 self.abs_path('mr_dir'),
                                                 self.client_info)

    def teardown_server(self):
        if self.server_runner is not None:
            self.server_runner.teardown_server()


__Task__ = ImagesTestMrindexMetadoc
