import os
import time
import shutil
import tarfile
import logging

from sandbox import sdk2, common
from sandbox.projects import resource_types
from sandbox.sandboxsdk import environments, process, parameters as sp
from sandbox.common.types import task as ctt
from sandbox.projects.common import apihelpers, utils
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.channel import channel


class PythonBinary(sp.ResourceSelector):
    name = 'news_python'
    description = 'Resource with News python, leave empty if you want last released resource'
    required = False
    resource_type = 'NEWS_PYTHON'


class SportArchiveClusteringV2(sdk2.Task):
    class Parameters(sdk2.Task.Parameters):
        owner = 'SPORT'
        tags = ['SPORT-ARCHIVES', 'SPORT']

        with sdk2.parameters.Group('Config') as config:
            test_mode = sdk2.parameters.Bool(
                'Run in test mode',
                default_value=False
            )

        with sdk2.parameters.Group('YT') as yt:
            with sdk2.parameters.RadioGroup('YT cluster') as yt_cluster:
                yt_cluster.values['arnold'] = yt_cluster.Value('arnold', default=True)
                yt_cluster.values['hahn'] = yt_cluster.Value('hahn')
            yt_token = sdk2.parameters.YavSecretWithKey(
                'YT token',
                default_value='sec-01cpt5v1fddttqtcxwakkshvt3#YT_TOKEN',
                required=True,
            )
        with sdk2.parameters.Group('Script') as script:
            script_url = sdk2.parameters.String(
                'Script directory svn url',
                required=True,
            )
            script_cmdline = sdk2.parameters.String(
                'Script cmdline',
                required=True,
            )
            script_patch = sdk2.parameters.String(
                'Arcadia patch (arc patches not supported)',
                default_value='',
            )
            news_python = sdk2.parameters.Resource(
                'Resource with News python, leave emtpy if you want last released resource',
                resource_type=resource_types.NEWS_PYTHON,
            )
            additional_script_parameters = sdk2.parameters.List(
                'Additional command line parameters for the main script'
            )

        with sdk2.parameters.Group('YQL') as yql:
            yql_vault_selector = sdk2.parameters.YavSecretWithKey(
                'YQL token from YAV',
                default_value='sec-01cpt5v1fddttqtcxwakkshvt3#YT_TOKEN',
                required=False
            )

        package = sdk2.parameters.Resource(
            'Builder package. Leave empty if you want the last released resource',
            resource_type=resource_types.NEWS_INDEXER_YT_PACKAGE
        )
        news_data = sdk2.parameters.Resource(
            'Archive with news data. leave empty if you want the last resource',
            resource_type=resource_types.NEWS_BACKOFFICE_DATA
        )
        storage_path = sdk2.parameters.String(
            'Path to storage with documents',
            default_value='//home/sport/storage/info/entries',
            required=True,
        )
        media_meta_prefix = sdk2.parameters.String(
            'Prefix path for media metainfo',
            default_value='//home/sport/storage/meta',
            required=True,
        )
        media_prefix = sdk2.parameters.String(
            'Prefix path for media storage',
            default_value='//home/sport/storage/media',
            required=True,
        )
        result_prefix = sdk2.parameters.String(
            'Prefix path for output tables',
            default_value='//home/sport-prod/archive/newsd',
            required=True,
        )
        since = sdk2.parameters.Integer(
            'Data to start from (inclusive) format: YYYYMMDD',
            default_value=None,
        )
        to = sdk2.parameters.Integer(
            'Data to stop at (exclusive) format: YYYYMMDD',
            default_value=None,
        )
        shards_count = sdk2.parameters.Integer(
            'Shards count',
            required=True,
            default_value=112
        )
        clear_build = sdk2.parameters.Bool(
            'Clear output tables before start',
            default_value=False,
        )
        threads = sdk2.parameters.Integer(
            'Number of threads on worker',
            default_value=8,
        )
        do_not_remap_media = sdk2.parameters.Bool(
            'Do not run yt_media_mapping',
            default_value=False,
        )
        yt_media_mapping_use_dynstorage = sdk2.parameters.Bool(
            'Use dynstorage in yt_media_mapping',
            default_value=False,
        )
        with yt_media_mapping_use_dynstorage.value[True]:
            media_images_path = sdk2.parameters.String(
                'Prefix path for media images info'
            )
            media_extlinks_path = sdk2.parameters.String(
                'Prefix path for media extlinks info'
            )
            media_video_path = sdk2.parameters.String(
                'Prefix path for media videos info'
            )
            media_hosted_video_path = sdk2.parameters.String(
                'Prefix path for hosted videos info'
            )

        additional_environment_variables = sdk2.parameters.List(
            'Additional environment variables for scripts (`key=value` format)'
        )

    class Requirements(sdk2.Task.Requirements):
        environment = (environments.SvnEnvironment(), environments.PipEnvironment('yandex-yt'))
        cores = 1
        ram = 1024  # 1GB

        class Caches(sdk2.Requirements.Caches):
            pass

    @property
    def yt_token(self):
        return self.Parameters.yt_token.value()

    @property
    def yt_cluster(self):
        return self.Parameters.yt_cluster

    @property
    def yt_proxy(self):
        return self.Parameters.yt_proxy

    @property
    def yt_pool(self):
        return self.Parameters.yt_env['YT_POOL']

    @property
    def cmd_line(self):
        return self.Parameters.script_cmd_line

    @property
    def scripts(self):
        script_dir = str(self.path('script'))
        if os.path.exists(script_dir):
            shutil.rmtree(script_dir)
        os.makedirs(script_dir)

        svn_url = self.Parameters.script_url

        sdk2.svn.Arcadia.check(svn_url)
        sdk2.svn.Arcadia.export(svn_url, script_dir)

        arcadia_patch = common.encoding.force_unicode_safe(self.Parameters.script_patch)
        if arcadia_patch:
            arcadia_patch_path, is_zipatch = sdk2.svn.Arcadia.fetch_patch(arcadia_patch, str(self.path()))
            if arcadia_patch_path:
                sdk2.svn.Arcadia.apply_patch_file(script_dir, arcadia_patch_path, is_zipatch, strip_depth=3)

        return script_dir

    @property
    def python(self):
        python_binary = 'python'
        python_resource_id = self.Parameters.news_python
        if not python_resource_id:
            # XXX: Check, may not work in SDK2 task
            python_resource_id = utils.get_and_check_last_released_resource_id(
                PythonBinary.resource_type,
                # arch=self.arch
            )
        if python_resource_id:
            pytar = sdk2.ResourceData(sdk2.Resource[python_resource_id]).path
            python_binary = pytar
        return str(python_binary)

    def run_process_with_output_link(self, *args, **kwargs):
        process_name = kwargs.pop('process_name', 'Process')
        kwargs['wait'] = False
        proc = process.run_process(*args, **kwargs)
        logs_rid = getattr(getattr(channel.task, '_log_resource', None), 'id', None)
        if logs_rid is not None:
            res = channel.sandbox.get_resource(logs_rid)
            if res:
                url = '/'.join([res.proxy_url, proc.stdout_path_filename])
                self.set_info('{0} started. <a href="{1}" target="_blank">output</a>'.format(process_name, url), do_escape=False)
        proc.wait()
        process.check_process_return_code(proc)

    def on_save(self):
        if self.Parameters.test_mode and not bool(self.Parameters.to):
            self.Parameters.to = time.strftime('%Y%m%d')

    def on_execute(self):
        yt_token = self.yt_token

        yql_token = ''
        if self.Parameters.yql_vault_selector:
            yql_token = self.Parameters.yql_vault_selector.data()[self.Parameters.yql_token_field]

        if self.Parameters.clear_build and not (self.Parameters.since and self.Parameters.to):
            raise SandboxTaskFailureError('Start and finish date are required in "clear" mode')

        if bool(self.Parameters.since) ^ bool(self.Parameters.to):
            raise SandboxTaskFailureError('You have to specify both start and finish date')

        package_resource_id = self.Parameters.package
        if not package_resource_id:
            package_resource_id = utils.get_and_check_last_released_resource_id(
                resource_types.NEWS_INDEXER_YT_PACKAGE,
                release_status=ctt.ReleaseStatus.PRESTABLE if self.Parameters.test_mode else ctt.ReleaseStatus.STABLE,
            )
        package = str(sdk2.ResourceData(sdk2.Resource[package_resource_id]).path)

        news_data_resource_id = self.Parameters.news_data
        if not news_data_resource_id:
            res = apihelpers.get_last_resource_with_attrs(
                resource_type=resource_types.NEWS_BACKOFFICE_DATA,
                attrs={
                    'production': 1,
                },
                params={
                    'status': 'READY',
                    'omit_failed': True,
                },
            )
            if res is None:
                raise SandboxTaskFailureError('Cannot find NEWS_BACKOFFICE_DATA resource')
            news_data_resource_id = res.id
        news_data = str(sdk2.ResourceData(sdk2.Resource[news_data_resource_id]).path)

        scriptdir = self.get_scripts()
        python_binary = self.get_python()

        env = os.environ.copy()
        python_path = os.path.join(scriptdir, 'lib/python')
        env['PYTHONPATH'] = python_path
        env['YT_TOKEN'] = yt_token
        env['YQL_TOKEN'] = yql_token

        if self.yt_pool:
            env['YT_POOL'] = self.yt_pool

        if env.get('YT_LOG_LEVEL', '').upper() != 'DEBUG':
            env['YT_LOG_LEVEL'] = 'INFO'

        raise SandboxTaskFailureError('TEST FIXED')

        if self.Parameters.additional_environment_variables:
            for additional_env_str in self.Parameters.additional_environment_variables:
                additional_env_name, additional_env_value = additional_env_str.split('=', 1)
                env[additional_env_name] = additional_env_value

        bin_dir = str(self.path('bin'))
        if os.path.exists(bin_dir):
            shutil.rmtree(bin_dir)
        os.makedirs(bin_dir)
        tar = tarfile.open(package)
        tar.extract('bin/yt_media_mapping', path=bin_dir)
        yt_media_mapping = os.path.join(bin_dir, 'bin/yt_media_mapping')

        since = str(self.Parameters.since) if self.Parameters.since else '20100101'
        to = str(self.Parameters.to) if self.Parameters.to else time.strftime('%Y%m%d')

        if self.Parameters.yt_media_mapping_use_dynstorage:
            media_cmd = [
                yt_media_mapping,
                '--yt-proxy', self.get_yt_proxy(),
                '--storage', self.Parameters.storage_path,
                '--dest', self.Parameters.media_prefix,
                '--since', since,
                '--to', to,
                '--dynstorage-images', self.Parameters.media_images_path,
                '--dynstorage-extlinks', self.Parameters.media_extlinks_path,
                '--dynstorage-video', self.Parameters.media_video_path,
                '--dynstorage-hosted-video', self.Parameters.media_video_path,
                '--use-dynstorage-input'
            ]
        else:
            media_cmd = [
                yt_media_mapping,
                '--yt-proxy', self.get_yt_proxy(),
                '--storage', self.Parameters.storage_path,
                '--meta', self.Parameters.media_meta_prefix,
                '--dest', self.Parameters.media_prefix,
                '--since', since,
                '--to', to,
            ]

        if not self.Parameters.do_not_remap_media:
            self.run_process_with_output_link(
                media_cmd,
                work_dir=str(self.path()),
                environment=env,
                log_prefix='media_mapping',
                process_name='media_mapping'
            )

        cmd = [
            python_binary, self.get_cmdline(),
            '--yt-proxy', self.get_yt_proxy(),
            '--since-file', 'since',
            '--storage', self.Parameters.storage_path,
            '--media-prefix', self.Parameters.media_prefix,
            '--package', package,
            '--news-data', news_data,
            '--target-prefix', self.Parameters.result_prefix,
            '--threads', str(self.Parameters.threads),
            '--shards-count', str(self.Parameters.shards_count),
        ]

        if self.get_pool():
            cmd.append('--pool')
            cmd.append(self.get_pool())
        if self.Parameters.since:
            cmd.append('--since')
            cmd.append(str(self.Parameters.since))
            cmd.append('--to')
            cmd.append(str(self.Parameters.to))
            if self.Parameters.to - self.Parameters.since > 30000:  # 3 years
                cmd.append('--max-failed-jobs')
                cmd.append('1000')
        if self.Parameters.clear_build:
            cmd.append('--rewrite')

        if self.Parameters.additional_script_parameters:
            cmd.extend(self.Parameters.additional_script_parameters)

        if self.Parameters.test_mode:
            cmd.append('--testing-day-selection')

        logging.info('cmd: {}'.format(cmd))
        self.run_process_with_output_link(
            cmd,
            work_dir=scriptdir,
            environment=env,
            log_prefix='script_run',
            process_name='script',
        )
