# -*- coding: utf-8 -*-

from sandbox.projects.common.news.YtScriptTaskV2 import YtScriptV2

from sandbox.projects import resource_types

from sandbox import sdk2
from sandbox.projects.common import utils
from sandbox.projects.common import apihelpers
from sandbox.sandboxsdk import sandboxapi
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.errors import SandboxTaskFailureError

from sandbox.yasandbox import manager


import os
import logging
import shutil
import json


class MakeArcNewsdShardV2(YtScriptV2):
    '''
    Download and register archive slave_newsd shard
    '''

    class Requirements(YtScriptV2.Requirements):
        disk_space = 40 * 1024
        ram = 8192

    class Parameters(YtScriptV2.Parameters):

        newsd_table = sdk2.parameters.String("Path to newsd table on YT", required=True, default_value="//home/news-prod/archive/newsd/newsd")
        lower_bound = sdk2.parameters.Integer("Lower bound of shard (inclusive), format: YYYYMMDD", required=False, default_value=None)
        upper_bound = sdk2.parameters.Integer("Upper bound of shard (exclusive), format: YYYYMMDD", required=False, default_value=None)
        days = sdk2.parameters.String("List of days for shard instead of upper and lower bound", required=False, default_value=None)
        shard_name = sdk2.parameters.String("Shard name", required=True, default_value=None)
        shard_number = sdk2.parameters.Integer("Shard number", required=True, default_value=None)
        newsd_statework_binary = sdk2.parameters.Resource("Resource with newsd_statework", required=True, resource_type=resource_types.NEWSD_STATEWORK)
        ttl = sdk2.parameters.Integer("TTL of shard resource", required=True, default=14)
        restore_shard = sdk2.parameters.Bool("Restore this shard", default_value=False)

    def _get_iss_shards_tool(self):
        try:
            tool_id = utils.get_and_check_last_released_resource_id(
                resource_types.ISS_SHARDS,
                arch=sandboxapi.ARCH_ANY
            )
            return str(sdk2.ResourceData(sdk2.Resource[tool_id]).path)
        except Exception as e:
            logging.error("Cannot get latest stable ISS_SHARDS tool: %s", e)

    def calc_shard_md5(self, directory):
        md5 = {}
        p = process.run_process(
            ["md5sum", "stindex", "stories", "urlindex", "newsdata2", "tagindex"],
            work_dir=directory,
            log_prefix='md5sum',
            outputs_to_one_file=False,
        )
        with open(p.stdout_path) as fd:
            for line in fd:
                line = line.rstrip()
                (md5sum, filename) = line.split("  ")
                md5[filename] = md5sum
        return md5

    def on_execute(self):
        restore = self.Parameters.restore_shard

        shard_dir = str(self.path('shard_data'))
        if os.path.exists(shard_dir):
            shutil.rmtree(shard_dir)
        os.makedirs(shard_dir)

        resource_id = self.Parameters.newsd_statework_binary
        newsd_statework = str(sdk2.ResourceData(sdk2.Resource[resource_id]).path)
        os.chmod(newsd_statework, 0775)

        scriptdir = self.get_scripts()
        python_binary = self.get_python()

        env = os.environ.copy()
        python_path = os.path.join(scriptdir, "lib/python")
        env['PYTHONPATH'] = python_path

        token = self.get_token()
        env['YT_TOKEN'] = token

        cmdline = "{python} {cmd} --yt-proxy {proxy} --table {table} --binary {binary} --out-dir {out}".format(
            python=python_binary,
            cmd=self.get_cmdline(),
            proxy=self.get_yt_proxy(),
            table=self.Parameters.newsd_table,
            binary=newsd_statework,
            out=shard_dir
        )
        if self.Parameters.days is not None:
            cmdline += " --days " + self.Parameters.days
        elif self.Parameters.lower_bound is not None and self.Parameters.upper_bound is not None:
            cmdline += " --since {} --to {}".format(self.Parameters.lower_bound, self.Parameters.upper_bound)
        else:
            raise SandboxTaskFailureError("Bad task params: failed to get days for shard")

        process.run_process(cmdline, shell=True, work_dir=scriptdir, environment=env, log_prefix='script_run')

        shard_md5 = self.calc_shard_md5(shard_dir)

        last_shard = apihelpers.get_last_resource_with_attribute(
            resource_type=resource_types.SLAVE_NEWSD_ARCHIVE_SHARD,
            attribute_name='shard_number',
            attribute_value=str(self.Parameters.shard_number),
            status='READY',
        )
        if last_shard is None:
            logging.info("Last resource not found")
        else:
            logging.info("Last resource: {}".format(last_shard))

        if last_shard is not None and not restore:
            md5_json_str = channel.sandbox.get_resource_attribute(last_shard.id, attribute_name="shard_md5")
            last_shard_md5 = json.loads(md5_json_str)
            if cmp(shard_md5, last_shard_md5) == 0:
                manager.resource_manager.touch(last_shard.id)
                logging.info("MD5 are same, just update old resource")
                return
            logging.info("MD5 differ, publish new resource")

        iss_shards = str(self._get_iss_shards_tool())
        process.run_process(
            [
                iss_shards,
                'configure',
                shard_dir,
                '--id',
                self.Parameters.shard_name
            ],
            work_dir=str(self.path()),
            log_prefix='iss_shards_configure'
        )

        process.run_process(
            [
                iss_shards,
                'register',
                shard_dir
            ],
            work_dir=str(self.path()),
            log_prefix='iss_shards_register',
            check=not restore
        )

        resource = resource_types.SLAVE_NEWSD_ARCHIVE_SHARD(
            self,
            'Slave_newsd shard - ' + self.Parameters.shard_name,
            shard_dir,
            arch='any',
            ttl=self.Parameters.ttl,
            shard_name=self.Parameters.shard_name,
            shard_number=self.Parameters.shard_number,
            shard_md5=json.dumps(shard_md5, ensure_ascii=False).encode('utf-8'),
        )
        sdk2.ResourceData(resource).ready()
