# -*- coding: utf-8 -*-

import sandbox.projects.common.news.YtScriptTask as ys

from sandbox.projects.common import utils
from sandbox.projects.common import apihelpers
from sandbox.projects.common import solomon

from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.channel import channel

import os
import tarfile
import shutil
import time


class Package(sp.ResourceSelector):
    name = 'package'
    description = 'Builder package. Leave empty if you want the last released resource'
    required = False
    resource_type = 'NEWS_INDEXER_YT_PACKAGE'


class NewsData(sp.ResourceSelector):
    name = 'news_data'
    description = 'Archive with news data. leave empty if you want the last resource'
    required = False
    resource_type = 'NEWS_BACKOFFICE_DATA'


class Storage(sp.SandboxStringParameter):
    name = 'storage'
    description = 'Path to storage'
    required = True
    default_value = "//home/news/storage/info/entries"


class MediaMeta(sp.SandboxStringParameter):
    name = 'media_meta'
    description = 'Prefix of metainfo for media'
    required = True
    default_value = "//home/news/storage/meta"


class Media(sp.SandboxStringParameter):
    name = 'media'
    description = 'Prefix of media data'
    required = True
    default_value = "//home/news-prod/archive/media"


class Result(sp.SandboxStringParameter):
    name = 'result'
    description = 'Prefix of output tables'
    required = True
    default_value = "//home/news-prod/archive/newsd"


class Since(sp.SandboxIntegerParameter):
    name = 'since'
    description = 'Date to start from (inclusive) format: YYYYMMDD'
    required = False
    default_value = None


class To(sp.SandboxIntegerParameter):
    name = 'to'
    description = 'Date to stop at (exclusive) format: YYYYMMDD'
    required = False
    default_value = None


class ShardsCount(sp.SandboxIntegerParameter):
    name = 'shards_count'
    description = 'Shards count'
    required = True
    default_value = 78


class Clear(sp.SandboxBoolParameter):
    name = 'clear'
    description = 'Clear output tables before start'
    default_value = False


class Threads(sp.SandboxIntegerParameter):
    name = 'threads'
    description = 'Number of threads on worker'
    required = True
    default_value = 8


class DoNotRemapMedia(sp.SandboxBoolParameter):
    name = 'do_not_run_yt_media_mapping'
    description = 'Do not run yt_media_mapping'
    default_value = False


class AdditionalScriptParameters(sp.ListRepeater, sp.SandboxStringParameter):
    name = 'additional_script_parameters'
    description = 'Additional command line parameters for the main script'
    default_value = []
    required = False


class AdditionalEnvironmentVariables(sp.ListRepeater, sp.SandboxStringParameter):
    name = 'additional_environment_variables'
    description = 'Additional environment variables for scripts (`key=value` format)'
    default_value = []
    required = False


class VaultTvmEnv(sp.SandboxStringParameter):
    name = 'vault_solomon_tvm_env'
    description = 'Solomon TVM token from Vault, format: vault_owner:vault_key'
    required = True


class RunNewsArchiveClustering(ys.YtScript):
    '''
    Run Builder on archive in YT
    '''

    type = 'RUN_NEWS_ARCHIVE_CLUSTERING'

    cores = 1

    input_parameters = ys.get_base_params() + [
        VaultTvmEnv,
        Package,
        NewsData,
        Storage,
        MediaMeta,
        Media,
        Result,
        Since,
        To,
        ShardsCount,
        Clear,
        Threads,
        DoNotRemapMedia,
        AdditionalScriptParameters,
        AdditionalEnvironmentVariables,
    ]

    def on_execute(self):
        token = self.get_token()
        (owner, key) = self.ctx.get(VaultTvmEnv.name).split(':')
        solomon_tvm = self.get_vault_data(owner, key)

        if self.ctx.get(Clear.name) and not (self.ctx.get(Since.name) and self.ctx.get(To.name)):
            raise SandboxTaskFailureError('Start and finish date are required in "clear" mode')

        if bool(self.ctx.get(Since.name)) ^ bool(self.ctx.get(To.name)):
            raise SandboxTaskFailureError('You have to specify both start and finish date')

        package_resource_id = self.ctx.get(Package.name)
        if not package_resource_id:
            package_resource_id = utils.get_and_check_last_released_resource_id(
                Package.resource_type,
                arch=self.client_info['arch']
            )
        package = self.sync_resource(package_resource_id)

        news_data_resource_id = self.ctx.get(NewsData.name)
        if not news_data_resource_id:
            res = apihelpers.get_last_resource_with_attrs(
                resource_type=NewsData.resource_type,
                attrs={
                    'production': 1
                },
                params={
                    'status': 'READY',
                    'omit_failed': True,
                }
            )
            if res is None:
                raise SandboxTaskFailureError('Cannot find resource ' + str(NewsData.resource_type))
            news_data_resource_id = res.id
        news_data = self.sync_resource(news_data_resource_id)

        scriptdir = self.get_scripts()
        python_binary = self.get_python()

        env = os.environ.copy()
        python_path = os.path.join(scriptdir, "lib/python")
        env['PYTHONPATH'] = python_path
        env['YT_TOKEN'] = token

        if self.get_pool():
            env['YT_POOL'] = self.get_pool()

        if env.get('YT_LOG_LEVEL', '').upper() != 'DEBUG':
            env['YT_LOG_LEVEL'] = 'INFO'  # Use at least INFO verbosity level

        for additional_env_str in utils.get_or_default(self.ctx, AdditionalEnvironmentVariables):
            additional_env_name, additional_env_value = additional_env_str.split('=', 1)
            env[additional_env_name] = additional_env_value

        bin_dir = self.path("bin")
        if os.path.exists(bin_dir):
            shutil.rmtree(bin_dir)
        os.makedirs(bin_dir)
        tar = tarfile.open(package)
        tar.extract("bin/yt_media_mapping", path=bin_dir)
        yt_media_mapping = os.path.join(bin_dir, "bin/yt_media_mapping")

        since = str(self.ctx[Since.name]) if self.ctx.get(Since.name) else "20000101"
        to = str(self.ctx[To.name]) if self.ctx.get(To.name) else time.strftime("%Y%m%d")

        commonLabels = {
            'project': 'news',
            'cluster': 'main',
            'service': 'main',
        }
        sensors = [
            {
                'labels': {
                    'archive': 'clustering',
                    'sensor': 'start',
                },
                'ts': int(time.time()),
                'value': int(time.time()),
            }
        ]
        solomon.push_to_solomon_v2(params=commonLabels, sensors=sensors, token=solomon_tvm)

        media_cmd = [
            yt_media_mapping,
            "--yt-proxy", self.get_yt_proxy(),
            "--storage", self.ctx[Storage.name],
            "--meta", self.ctx[MediaMeta.name],
            "--dest", self.ctx[Media.name],
            "--since", since,
            "--to", to,
        ]
        if not self.ctx.get(DoNotRemapMedia.name):
            self.__run_process_with_output_link(media_cmd, work_dir=self.path(), environment=env, log_prefix='media_mapping', process_name='media_mapping')

        cmd = [
            python_binary, self.get_cmdline(),
            "--yt-proxy", self.get_yt_proxy(),
            "--since-file", "since",
            "--storage", self.ctx[Storage.name],
            "--media-prefix", self.ctx[Media.name],
            "--package", package,
            "--news-data", news_data,
            "--target-prefix", self.ctx[Result.name],
            "--threads", str(self.ctx[Threads.name]),
            "--shards-count", str(self.ctx[ShardsCount.name]),
        ]
        if self.get_pool():
            cmd.append("--pool")
            cmd.append(self.get_pool())
        if self.ctx.get(Since.name):
            cmd.append("--since")
            cmd.append(str(self.ctx[Since.name]))
            cmd.append("--to")
            cmd.append(str(self.ctx[To.name]))
            if self.ctx[To.name] - self.ctx[Since.name] > 30000:  # 3 years
                cmd.append("--max-failed-jobs")
                cmd.append("1000")
        if self.ctx.get(Clear.name):
            cmd.append("--rewrite")
        cmd.extend(utils.get_or_default(self.ctx, AdditionalScriptParameters))
        self.__run_process_with_output_link(cmd, work_dir=scriptdir, environment=env, log_prefix='script_run', process_name='script')

        sensors = [
            {
                'labels': {
                    'archive': 'clustering',
                    'sensor': 'finish',
                },
                'ts': int(time.time()),
                'value': int(time.time()),
            }
        ]
        solomon.push_to_solomon_v2(params=commonLabels, sensors=sensors, token=solomon_tvm)

    def __run_process_with_output_link(self, *args, **kwargs):
        process_name = kwargs.pop("process_name", "Process")
        kwargs['wait'] = False
        proc = process.run_process(*args, **kwargs)
        logs_rid = getattr(getattr(channel.task, '_log_resource', None), 'id', None)
        if logs_rid is not None:
            res = channel.sandbox.get_resource(logs_rid)
            if res:
                url = '/'.join([res.proxy_url, proc.stdout_path_filename])
                self.set_info('{0} started. <a href="{1}" target="_blank">output</a>'.format(process_name, url), do_escape=False)
        proc.wait()
        process.check_process_return_code(proc)


__Task__ = RunNewsArchiveClustering
