from sandbox import sdk2

from sandbox.common.types import task as ctt
from sandbox.projects import resource_types
from sandbox.projects.common.news.YtScriptTaskV2 import YtScriptV2

import logging
import os
import tarfile
import shutil
import time


class RunNewsArchiveTolokaPreprocessing(YtScriptV2):
    class Paramaters(YtScriptV2.Parameters):
        yql_vault_selector = sdk2.parameters.YavSecret("YQL token from YAV", required=True)
        yql_token_field = sdk2.parameters.String("Token field in YAV secret", required=True, default="yql_token")
        package = sdk2.parameters.Resource("Builder package. Leave empty if you want the last released resource", resource_type=resource_types.NEWS_INDEXER_YT_PACKAGE)
        storage_path = sdk2.parameters.String("Path to storage with documents", required=True, default_value="//home/news/storage/info/entries")
        toloka_prefix = sdk2.parameters.String("Prefix path for input toloka tables", required=True, default_value="//home/news/rt-toloka/full")
        result_prefix = sdk2.parameters.String("Prefix path for output tables", required=True, default_value="//home/news-prod/archive/toloka")
        additional_script_paramters = sdk2.parameters.List("Additional command line parameters for the main script")
        additional_environment_variables = sdk2.parameters.List("Additional environment variables for scripts (`key=value` format)")

    def on_execute(self):
        from sandbox.projects.common import utils

        yt_token = self.get_token()
        yql_token = self.Parameters.yql_vault_selector.data()[self.Parameters.yql_token_field]

        package_resource_id = self.Parameters.package
        if not package_resource_id:
            package_resource_id = utils.get_and_check_last_released_resource_id(
                resource_types.NEWS_INDEXER_YT_PACKAGE,
                release_status=ctt.ReleaseStatus.STABLE,
            )
        package = str(sdk2.ResourceData(sdk2.Resource[package_resource_id]).path)

        scriptdir = self.get_scripts()
        python_binary = self.get_python()

        env = os.environ.copy()
        python_path = os.path.join(scriptdir, 'lib/python')
        env['PYTHONPATH'] = python_path
        env['YT_TOKEN'] = yt_token
        env['YQL_TOKEN'] = yql_token

        if self.get_pool():
            env['YT_POOL'] = self.get_pool()

        if env.get('YT_LOG_LEVEL', '').upper() != 'DEBUG':
            env['YT_LOG_LEVEL'] = 'INFO'

        if self.Parameters.additional_environment_variables:
            for additional_env_str in self.Parameters.additional_environment_variables:
                additional_env_name, additional_env_value = additional_env_str.split('=', 1)
                env[additional_env_name] = additional_env_value

        cmd = [
            python_binary, self.get_cmdline(),
            '--yt-proxy', self.get_yt_proxy(),
            '--storage', self.Parameters.storage_path,
            '--toloka-prefix', self.Parameters.toloka_prefix,
            '--dst', self.Parameters.result_prefix,
            '--package', package,
        ]

        if self.get_pool():
            cmd.append('--pool')
            cmd.append(self.get_pool())
        if self.Parameters.additional_script_parameters:
            cmd.extend(self.Parameters.additional_script_parameters)

        logging.info('cmd: {}'.format(cmd))
        self.run_process_with_output_link(
            cmd,
            work_dir=scriptdir,
            environment=env,
            log_prefix='script_rin',
            process_name='toloka_preprocessing',
        )
