import os
import logging
import json
import time
import shutil
import os.path

import sandbox.sandboxsdk.environments as sdk_environments

from sandbox import sdk2
from sandbox.sdk2.helpers import subprocess as sp

from sandbox import common
import sandbox.common.types.task as ctt
from sandbox.common.errors import TaskFailure

from sandbox.projects import resource_types
from sandbox.projects.common import apihelpers

from sandbox.projects.SuggestTestSecond import suggest_build_shard
from sandbox.projects.SuggestTestSecond import utils
from sandbox.projects.SuggestTestSecond.suggest_build_shard import PrepareResourceShard  # noqa


class PrepareResource(sdk2.Resource):
    releasers = ['temnajab', ]
    releasable = True
    any_arch = True


class SuggestTestSecond(sdk2.Task):
    """ build suggest shard dict """

    class Requirements(sdk2.Requirements):
        environments = [sdk_environments.PipEnvironment('yandex-yt'), ]

    class Parameters(sdk2.Task.Parameters):

        mode = sdk2.parameters.String(
            "test(build N shards with skiped prepare stage(shard size about 10k-100k)|main",
            name="mode",
            required=True,
            default='main'
        )

        with sdk2.parameters.Group("Sandbox resources download for build") as app_params_block:
            sandbox_resources = sdk2.parameters.Resource(
                "Sandbox resources. binary name|bash|configs",
                multiple=True
            )

        with sdk2.parameters.Group("YT setting") as yt_settings_block:
            yt_cluster = sdk2.parameters.String(
                "YT cluster (i.e. hahn)",
                name="yt_cluster",
                default="hahn",
                required=True,
            )
            yt_token = sdk2.parameters.String(
                "YT token vault name",
                name="yt_token",
                default="YT_TOKEN_SUGGEST",
                required=True,
            )
            table_prefix = sdk2.parameters.String(
                "yt path for build dir",
                name="table_prefix",
                required=False,
            )

        with sdk2.parameters.Group("mode - main build") as mode_main_block:
            shard_size = sdk2.parameters.Integer(
                'line count in ready for one shard, max = 100kk',
                name="shard_size",
                default=50000000,
                required=True
            )
            days_count = sdk2.parameters.Integer(
                "days for build",
                name="days count",
                default=70,
                required=True
            )
            child_ram = sdk2.parameters.Integer(
                "ram requirements for childs tasks in Gb",
                name="child ram requirements",
                required=True,
                default=128
            )
            child_disk = sdk2.parameters.Integer(
                "disk requirements for childs tasks in Gb",
                name="child disk requirements",
                required=True,
                default=256
            )
            autodeploy = sdk2.parameters.String(
                "deploy resource after build to suggest_shard",
                name="autodeploy",
                default="yes",
                required=True,
            )

        with sdk2.parameters.Group("mode - test build") as mode_test_block:
            shards_count = sdk2.parameters.Integer(
                "haw many shards build",
                name="shards count",
                default=22,
                required=False
            )
            prepare_data_resource_id = sdk2.parameters.Integer(
                "prepare reoucese from normal build",
                name="prepare_data resource id",
                required=False
            )

    def on_execute(self):

        with self.memoize_stage.first_run:
            self.Context.second_step = True
            logging.info('stage filters and prepare data')

            logging.info('checkout files from svn')
            path_suggest_framework = utils.checkout_from_svn('suggest_framework', 'arcadia/quality/trailer/suggest_dict/suggest_framework')

            logging.info('download sabdbox resources')
            self.download_resources()

            # logging.info('download main script')
            # main_script = apihelpers.get_last_resource_with_attribute(resource_types.SUGGET_SHARD_MAIN_SCRIPT)
            # logging.info('main script id:' + str(main_script.id))
            # utils.download_sandbox_resource(main_script.id)

            logging.info('download filters')
            filters = apihelpers.get_last_resource_with_attribute(resource_types.SUGGET_SHARD_FILTERS)
            logging.info('filters id:' + str(filters.id))
            utils.download_sandbox_resource(filters.id)

            logging.info('download build shard files')
            build_one_shard = apihelpers.get_last_resource_with_attribute(resource_types.SUGGET_SHARD_BUILD_ONE_SHARD)
            logging.info('build shard files id:' + str(build_one_shard.id))
            utils.download_sandbox_resource(build_one_shard.id)

            logging.info('download geodata')
            # utils.download_sandbox_resource(self.Parameters.geodata.id)
            from projects.geobase.Geodata5BinStable import resource as gbr
            geodata = apihelpers.get_last_resource_with_attribute(gbr.GEODATA5BIN_STABLE)
            logging.info('geodata id:' + str(geodata.id))
            utils.download_sandbox_resource(geodata.id)

            logging.info('checkout shards scripts')
            path_shards_scripts = utils.checkout_from_svn(
                'shards_scripts',
                'arcadia/quality/trailer/suggest_dict/tools/bigdata/scripts'
            )

            # for c in range(60):
            #     logging.info('STARTING')
            #     time.sleep(1)

            hahn_table_prefix = self.Parameters.table_prefix if self.Parameters.table_prefix else '//home/suggest-prod/bigdict/' + str(time.time())
            logging.info('hahn dir:' + hahn_table_prefix)

            prepare_data_resource_id = None
            if self.Parameters.mode == 'main':
                logging.info('start main script')
                with sdk2.helpers.ProcessLog(self, logger="run_filters") as pl:
                    cmd = path_shards_scripts + '/build_shards.sh' \
                          + ' ' + path_suggest_framework \
                          + ' ' + hahn_table_prefix \
                          + ' ' + str(self.Parameters.days_count) \
                          + ' ' + sdk2.Vault.data(self.Parameters.yt_token)
                    logging.info('call: %s', cmd)
                    sp.Popen(cmd, shell=True, stdout=pl.stdout, stderr=sp.STDOUT).wait()

                # in this state ready files: [groups.1, regions_streams.meta]
                # this files build script

                logging.info('craete prepare_files dir')
                if not os.path.exists('prepare_files'):
                    os.makedirs('prepare_files')
                logging.info('copy files for prepare_files')
                shutil.copy('groups.1', 'prepare_files/groups')
                shutil.copy('regions_streams.meta', 'prepare_files/regions_streams.meta')

                desc = 'prepare files for build dict(groups|regions_streams.meta)'
                path = 'prepare_files'
                resource = PrepareResource(self, desc, path)
                data = sdk2.ResourceData(resource)
                data.ready()

                logging.info('prepare files id: %i', resource.id)
                prepare_data_resource_id = resource.id
            elif self.Parameters.mode == 'test':
                prepare_data_resource_id = self.Parameters.prepare_data_resource_id
            else:
                raise NameError('bad mode: expexted test|main' + ' got ' + self.Parameters.mode)

            import yt.wrapper as yt
            yt.config["proxy"]["url"] = self.Parameters.yt_cluster
            yt.config["token"] = sdk2.Vault.data(self.Parameters.yt_token)

            shards_count = None
            shards_size = self.Parameters.shard_size
            if self.Parameters.mode == 'main':
                row_count = yt.get(hahn_table_prefix + '/ready/@row_count')
                logging.info('row_count: %i', row_count)
                shards_count = int(row_count/self.Parameters.shard_size) + 1
                if shards_count < 22:
                    shards_count = 22
                    shards_size = row_count/shards_count + 1
            elif self.Parameters.mode == 'test':
                shards_count = self.Parameters.shards_count
            logging.info('shards count: %i', shards_count)
            logging.info('shards size: %i', shards_size)

            childs = []
            for c in range(shards_count):
                child = suggest_build_shard.SuggestBuildShard(
                    self,
                    # priority=ctt.Priority(ctt.Priority.Class.BACKGROUND, ctt.Priority.Subclass.LOW),
                    priority=ctt.Priority(ctt.Priority.Class.SERVICE, ctt.Priority.Subclass.NORMAL),
                    # priority=self.Parameters.priority,
                    description='Child of {}'.format(self.id) + ' shard ' + str(c),
                    owner=self.Parameters.owner,
                    part=c,
                    rec_from=c * shards_size + 1,
                    rec_to=c * shards_size + shards_size,
                    prepare_files=prepare_data_resource_id,
                    shard_files=build_one_shard.id,
                    geodata_id=geodata.id,
                    yt_token_temnajab=self.Parameters.yt_token,
                    table_prefix=hahn_table_prefix
                )
                child.Requirements.disk_space = self.Parameters.child_disk * 1024
                child.Requirements.ram = self.Parameters.child_ram * 1024
                child.save()
                child.enqueue()
                childs.append(child)

            self.Context.childs_ids = [x.id for x in childs]
            logging.info('stage filters and prepare data done')
            raise sdk2.WaitTask(childs, ctt.Status.Group.FINISH + ctt.Status.Group.BREAK)

        logging.info('stage sum shards')

        api = common.rest.Client()

        logging.info('douing shard map')
        shards_map = dict()
        shards_map['ts'] = int(time.time())
        shards = dict()

        recall = 0.0
        porno_recall = 0.0

        for id in self.Context.childs_ids:
            logging.info('child: %i out: %s', id, sdk2.Task[id].Parameters.shard_resouce_ext)

            task = sdk2.Task[id]
            logging.info('task status:' + str(task.status))
            if task.status != ctt.Status.SUCCESS:
                raise TaskFailure('fail build')

            child_info = sdk2.Task[id].Parameters.shard_resouce_ext
            child_json = json.loads(child_info)
            child_recall = float(child_json['recall'].replace('%', '')) if child_json['recall'] != '' else 0.0
            child_porno_recall = float(child_json['porno_recall'].replace('%', '')) if child_json['porno_recall'] != '' else 0.0
            res_id = child_json['shard']

            recall += child_recall
            porno_recall += child_porno_recall

            shard = dict()
            shard['id'] = sdk2.Task[id].Parameters.part
            shard['res_id'] = res_id
            json_tmp = api.resource[res_id].read()
            shard['skynet_id'] = json_tmp['skynet_id']
            shards[sdk2.Task[id].Parameters.part] = shard
        shards_map['shards'] = shards
        shards_map['recall'] = recall
        shards_map['porno_recall'] = porno_recall

        resShardMap = resource_types.SUGGET_SHARD_MAP(self, "shards map", "shards.json")
        resShardMap.autodeploy = self.Parameters.autodeploy
        resShardMap.backup_task = 'True'
        resShardMap.ttl = 999
        api.resource[resShardMap.id].attribute(name="name", value="shards_map")

        resOut = sdk2.ResourceData(resShardMap)
        resOut.path.write_bytes(json.dumps(shards_map))

        logging.info('stage sum shards done')
    # end on_execute

    def download_resources(self):
        if self.Parameters.sandbox_resources:
            for resource in self.Parameters.sandbox_resources:
                logging.info('download resrouce %s id %i', str(resource), resource.id)
                utils.download_sandbox_resource(resource.id)
