from sandbox import sdk2

from sandbox.common.types import task as ctt
from sandbox.projects import resource_types
from sandbox.projects.common.news.YtScriptTaskV2 import YtScriptV2
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.channel import channel

import logging
import os
import tarfile
import shutil
import time


class RunNewsArchiveClusteringV2(YtScriptV2):
    class Parameters(YtScriptV2.Parameters):

        solomon_tvm_vault_selector = sdk2.parameters.YavSecret("Solomon TVM token from YAV", required=True)
        solomon_tvm_vault_field = sdk2.parameters.String("Token field in YAV secret", required=True, default="monitoring-oauth")
        yql_vault_selector = sdk2.parameters.YavSecret("YQL token from YAV", required=False)
        yql_token_field = sdk2.parameters.String("Token field in YAV secret", required=True, default="asp437_yql_token")
        package = sdk2.parameters.Resource("Builder package. Leave empty if you want the last released resource", resource_type=resource_types.NEWS_INDEXER_YT_PACKAGE)
        news_data = sdk2.parameters.Resource("Archive with news data. leave empty if you want the last resource", resource_type=resource_types.NEWS_BACKOFFICE_DATA)
        storage_path = sdk2.parameters.String("Path to storage with documents", required=True, default_value="//home/news/storage/info/entries")
        media_meta_prefix = sdk2.parameters.String("Prefix path for media metainfo", required=True, default_value="//home/news/storage/meta")
        media_prefix = sdk2.parameters.String("Prefix path for media storage", required=True, default_value="//home/news/storage/media")
        result_prefix = sdk2.parameters.String("Prefix path for output tables", required=True, default_value="//home/news-prod/archive/newsd")
        since = sdk2.parameters.Integer("Data to start from (inclusive) format: YYYYMMDD", default_value=None)
        to = sdk2.parameters.Integer("Data to stop at (exclusive) format: YYYYMMDD", default_value=None)
        shards_count = sdk2.parameters.Integer("Shards count", required=True, default_value=112)
        clear_build = sdk2.parameters.Bool("Clear output tables before start", default_value=False)
        threads = sdk2.parameters.Integer("Number of threads on worker", default_value=8)
        do_not_remap_media = sdk2.parameters.Bool("Do not run yt_media_mapping", default_value=False)
        yt_media_mapping_use_dynstorage = sdk2.parameters.Bool("Use dynstorage in yt_media_mapping", default_value=False)
        with yt_media_mapping_use_dynstorage.value[True]:
            media_images_path = sdk2.parameters.String("Prefix path for media images info")
            media_extlinks_path = sdk2.parameters.String("Prefix path for media extlinks info")
            media_video_path = sdk2.parameters.String("Prefix path for media videos info")
            media_hosted_video_path = sdk2.parameters.String("Prefix path for hosted videos info")

        additional_script_parameters = sdk2.parameters.List("Additional command line parameters for the main script")
        additional_environment_variables = sdk2.parameters.List("Additional environment variables for scripts (`key=value` format)")
        test_mode = sdk2.parameters.Bool("Run in test mode", default_value=False)
        solomon_cluster_label = sdk2.parameters.String("Value for label `cluster` in Solomon. Default 'main'", default_value="main")

    def on_save(self):
        if self.Parameters.test_mode and not bool(self.Parameters.to):
            self.Parameters.to = time.strftime("%Y%m%d")

    def on_execute(self):
        from sandbox.projects.common import apihelpers, solomon, utils

        yt_token = self.get_token()

        solomon_tvm = ""
        if not self.Parameters.test_mode:
            solomon_tvm = self.Parameters.solomon_tvm_vault_selector.data()[self.Parameters.solomon_tvm_vault_field]

        yql_token = ""
        if self.Parameters.yql_vault_selector:
            yql_token = self.Parameters.yql_vault_selector.data()[self.Parameters.yql_token_field]

        if self.Parameters.clear_build and not (self.Parameters.since and self.Parameters.to):
            raise SandboxTaskFailureError('Start and finish date are required in "clear" mode')

        if bool(self.Parameters.since) ^ bool(self.Parameters.to):
            raise SandboxTaskFailureError('You have to specify both start and finish date')

        package_resource_id = self.Parameters.package
        if not package_resource_id:
            package_resource_id = utils.get_and_check_last_released_resource_id(
                resource_types.NEWS_INDEXER_YT_PACKAGE,
                release_status=ctt.ReleaseStatus.PRESTABLE if self.Parameters.test_mode else ctt.ReleaseStatus.STABLE,
            )
        package = str(sdk2.ResourceData(sdk2.Resource[package_resource_id]).path)

        news_data_resource_id = self.Parameters.news_data
        if not news_data_resource_id:
            res = apihelpers.get_last_resource_with_attrs(
                resource_type=resource_types.NEWS_BACKOFFICE_DATA,
                attrs={
                    'production': 1,
                },
                params={
                    'status': 'READY',
                    'omit_failed': True,
                },
            )
            if res is None:
                raise SandboxTaskFailureError("Cannot find NEWS_BACKOFFICE_DATA resource")
            news_data_resource_id = res.id
        news_data = str(sdk2.ResourceData(sdk2.Resource[news_data_resource_id]).path)

        scriptdir = self.get_scripts()
        python_binary = self.get_python()

        env = os.environ.copy()
        python_path = os.path.join(scriptdir, "lib/python")
        env['PYTHONPATH'] = python_path
        env['YT_TOKEN'] = yt_token
        env['YQL_TOKEN'] = yql_token

        if self.get_pool():
            env['YT_POOL'] = self.get_pool()

        if env.get('YT_LOG_LEVEL', '').upper() != 'DEBUG':
            env['YT_LOG_LEVEL'] = 'INFO'

        if self.Parameters.additional_environment_variables:
            for additional_env_str in self.Parameters.additional_environment_variables:
                additional_env_name, additional_env_value = additional_env_str.split('=', 1)
                env[additional_env_name] = additional_env_value

        bin_dir = str(self.path('bin'))
        if os.path.exists(bin_dir):
            shutil.rmtree(bin_dir)
        os.makedirs(bin_dir)
        tar = tarfile.open(package)
        tar.extract("bin/yt_media_mapping", path=bin_dir)
        yt_media_mapping = os.path.join(bin_dir, 'bin/yt_media_mapping')

        since = str(self.Parameters.since) if self.Parameters.since else "20100101"
        to = str(self.Parameters.to) if self.Parameters.to else time.strftime("%Y%m%d")

        common_labels = {
            'project': 'news',
            'cluster': self.Parameters.solomon_cluster_label,
            'service': 'main'
        }
        sensors = [
            {
                'labels': {
                    'archive': 'clustering',
                    'sensor': 'start',
                },
                'ts': int(time.time()),
                'value': int(time.time()),
            },
        ]
        if not self.Parameters.test_mode:
            solomon.push_to_solomon_v2(params=common_labels, sensors=sensors, token=solomon_tvm)

        media_cmd = []
        if self.Parameters.yt_media_mapping_use_dynstorage:
            media_cmd =[
                yt_media_mapping,
                '--yt-proxy', self.get_yt_proxy(),
                '--storage', self.Parameters.storage_path,
                '--dest', self.Parameters.media_prefix,
                '--since', since,
                '--to', to,
                '--dynstorage-images', self.Parameters.media_images_path,
                '--dynstorage-extlinks', self.Parameters.media_extlinks_path,
                '--dynstorage-video', self.Parameters.media_video_path,
                '--dynstorage-hosted-video', self.Parameters.media_video_path,
                '--use-dynstorage-input'
            ]
        else:
            media_cmd =[
                yt_media_mapping,
                '--yt-proxy', self.get_yt_proxy(),
                '--storage', self.Parameters.storage_path,
                '--meta', self.Parameters.media_meta_prefix,
                '--dest', self.Parameters.media_prefix,
                '--since', since,
                '--to', to,
            ]

        if not self.Parameters.do_not_remap_media:
            self.run_process_with_output_link(
                media_cmd,
                work_dir=str(self.path()),
                environment=env,
                log_prefix='media_mapping',
                process_name='media_mapping'
            )

        cmd = [
            python_binary, self.get_cmdline(),
            '--yt-proxy', self.get_yt_proxy(),
            '--since-file', 'since',
            '--storage', self.Parameters.storage_path,
            '--media-prefix', self.Parameters.media_prefix,
            '--package', package,
            '--news-data', news_data,
            '--target-prefix', self.Parameters.result_prefix,
            '--threads', str(self.Parameters.threads),
            '--shards-count', str(self.Parameters.shards_count),
        ]

        if self.get_pool():
            cmd.append('--pool')
            cmd.append(self.get_pool())
        if self.Parameters.since:
            cmd.append('--since')
            cmd.append(str(self.Parameters.since))
            cmd.append('--to')
            cmd.append(str(self.Parameters.to))
            if self.Parameters.to - self.Parameters.since > 30000:  # 3 years
                cmd.append('--max-failed-jobs')
                cmd.append('1000')
        if self.Parameters.clear_build:
            cmd.append('--rewrite')

        if self.Parameters.additional_script_parameters:
            cmd.extend(self.Parameters.additional_script_parameters)

        if self.Parameters.test_mode:
            cmd.append('--testing-day-selection')

        logging.info("cmd: {}".format(cmd))
        self.run_process_with_output_link(
            cmd,
            work_dir=scriptdir,
            environment=env,
            log_prefix='script_run',
            process_name='script',
        )

        sensors = [
            {
                'labels': {
                    'archive': 'clustering',
                    'sensor': 'finish',
                },
                'ts': int(time.time()),
                'value': int(time.time()),
            },
        ]
        if not self.Parameters.test_mode:
            solomon.push_to_solomon_v2(params=common_labels, sensors=sensors, token=solomon_tvm)
