# -*- coding: utf-8 -*-

import sandbox.projects.common.news.YtScriptTask as ys

from sandbox.projects import resource_types

from sandbox.projects.common import utils
from sandbox.projects.common import apihelpers
from sandbox.sandboxsdk import sandboxapi
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.errors import SandboxTaskFailureError

import os
import logging
import shutil
import json


class NewsdTable(sp.SandboxStringParameter):
    name = 'newsd_table'
    description = 'Path to newsd table on YT'
    required = True
    default_value = "//home/news-prod/archive/newsd/newsd"


class ShardName(sp.SandboxStringParameter):
    name = 'shard'
    description = 'Shard name'
    required = True
    default_value = None


class ShardNumber(sp.SandboxIntegerParameter):
    name = 'shard_number'
    description = 'Shard number'
    required = True
    default_value = None


class LowerBound(sp.SandboxIntegerParameter):
    name = 'lower_bound'
    description = 'Lower bound of shard (inclusive), format: YYYYMMDD'
    required = False
    default_value = None


class UpperBound(sp.SandboxIntegerParameter):
    name = 'upper_bound'
    description = 'Upper bound of shard (exclusive), format: YYYYMMDD'
    required = False
    default_value = None


class Days(sp.SandboxStringParameter):
    name = 'days'
    description = 'List of days for shard instead of upper and lower bound'
    required = False
    default_value = None


class Binary(sp.ResourceSelector):
    name = 'binary'
    description = 'Resource with newsd_statework'
    required = True
    resource_type = resource_types.NEWSD_STATEWORK


class Ttl(sp.SandboxIntegerParameter):
    name = 'ttl'
    description = 'TTL of shard resource'
    required = True
    default_value = 14


class Restore(sp.SandboxBoolParameter):
    name = 'restore'
    description = 'Restore this shard'
    required = True
    default_value = False


class MakeArcNewsdShard(ys.YtScript):
    '''
    Download and register archive slave_newsd shard
    '''

    type = 'MAKE_ARCHIVE_NEWSD_SHARD'

    execution_space = 40 * 1024
    required_ram = 8192

    input_parameters = ys.get_base_params() + [
        NewsdTable,
        LowerBound,
        UpperBound,
        Days,
        ShardName,
        ShardNumber,
        Binary,
        Ttl,
        Restore
    ]

    def _get_iss_shards_tool(self):
        try:
            tool_id = utils.get_and_check_last_released_resource_id(
                resource_types.ISS_SHARDS,
                arch=sandboxapi.ARCH_ANY
            )
            return self.sync_resource(tool_id)
        except Exception as e:
            logging.error("Cannot get latest stable ISS_SHARDS tool: %s", e)

    def calc_shard_md5(self, directory):
        md5 = {}
        p = process.run_process(
            ["md5sum", "stindex", "stories", "urlindex", "newsdata2", "tagindex"],
            work_dir=directory,
            log_prefix='md5sum',
            outputs_to_one_file=False,
        )
        with open(p.stdout_path) as fd:
            for line in fd:
                line = line.rstrip()
                (md5sum, filename) = line.split("  ")
                md5[filename] = md5sum
        return md5

    @staticmethod
    def _share_shard_data(directory):
        """ shares shard data without registration """

        import api.copier
        copier = api.copier.Copier()
        files = os.listdir(directory)
        resid = copier.create(files, cwd=directory).resid()
        logging.info("Share content of directory %s %s", directory, resid)

    def on_execute(self):
        from sandbox.yasandbox.api.xmlrpc.resource import touch_resource

        restore = self.ctx[Restore.name]

        shard_dir = self.path('shard_data')
        if os.path.exists(shard_dir):
            shutil.rmtree(shard_dir)
        os.makedirs(shard_dir)

        resource_id = self.ctx[Binary.name]
        newsd_statework = self.sync_resource(resource_id)
        os.chmod(newsd_statework, 0775)

        scriptdir = self.get_scripts()
        python_binary = self.get_python()

        env = os.environ.copy()
        python_path = os.path.join(scriptdir, "lib/python")
        env['PYTHONPATH'] = python_path

        token = self.get_token()
        env['YT_TOKEN'] = token
        env['YT_LOG_LEVEL'] = 'debug'

        cmdline = "{python} {cmd} --yt-proxy {proxy} --table {table} --binary {binary} --out-dir {out}".format(
            python=python_binary,
            cmd=self.get_cmdline(),
            proxy=self.get_yt_proxy(),
            table=self.ctx[NewsdTable.name],
            binary=newsd_statework,
            out=shard_dir
        )
        if self.ctx.get(Days.name) is not None:
            cmdline += " --days " + self.ctx[Days.name]
        elif self.ctx.get(LowerBound.name) and self.ctx.get(UpperBound.name):
            cmdline += " --since {} --to {}".format(self.ctx[LowerBound.name], self.ctx[UpperBound.name])
        else:
            raise SandboxTaskFailureError("Bad task params: failed to get days for shard")

        process.run_process(cmdline, shell=True, work_dir=scriptdir, environment=env, log_prefix='script_run')

        shard_md5 = self.calc_shard_md5(shard_dir)

        last_shard = apihelpers.get_last_resource_with_attribute(
            resource_type=resource_types.SLAVE_NEWSD_ARCHIVE_SHARD,
            attribute_name=ShardNumber.name,
            attribute_value=str(self.ctx[ShardNumber.name]),
            status='READY',
        )
        if last_shard is not None and not restore:
            md5_json_str = channel.sandbox.get_resource_attribute(last_shard.id, attribute_name="shard_md5")
            last_shard_md5 = json.loads(md5_json_str)
            if cmp(shard_md5, last_shard_md5) == 0:
                touch_resource(last_shard.id)
                return

        iss_shards = self._get_iss_shards_tool()
        process.run_process(
            [
                iss_shards,
                'configure',
                shard_dir,
                '--id',
                self.ctx[ShardName.name]
            ],
            work_dir=self.path(),
            log_prefix='iss_shards_configure'
        )

        try:
            self._share_shard_data(shard_dir)
        except Exception:
            logging.exception("Unable to preshare shard's content (non critical error)")

        process.run_process(
            [
                iss_shards,
                'register',
                shard_dir
            ],
            work_dir=self.path(),
            log_prefix='iss_shards_register',
            check=not restore
        )

        resource = self.create_resource(
            description='Slave_newsd shard - ' + self.ctx[ShardName.name],
            resource_path=shard_dir,
            resource_type=resource_types.SLAVE_NEWSD_ARCHIVE_SHARD,
            arch='any',
            attributes={
                'ttl': self.ctx[Ttl.name],
                'shard_name': self.ctx[ShardName.name],
                'shard_number': self.ctx[ShardNumber.name],
                'shard_md5': json.dumps(shard_md5, ensure_ascii=False).encode('utf-8'),
            }
        )

        self.mark_resource_ready(resource.id)


__Task__ = MakeArcNewsdShard
