# -*- coding: utf-8 -*-
import json
import logging
import os
import copy
import time
import re
import glob
from contextlib import nested
from subprocess import Popen, PIPE
from urlparse import urlparse
from distutils.dir_util import copy_tree

from sandbox import sdk2
from sandbox.common import config
from sandbox.common import urls
from sandbox.common.types import task as ctt
from sandbox.common.types import misc as ctm
from sandbox.common.errors import TaskFailure
from sandbox.common.utils import singleton_property
from sandbox.sandboxsdk import environments

from sandbox.projects.common.yasm import push_api

from sandbox.projects.trendbox_ci.beta.modules.escape_json_str import escape_json_str
from sandbox.projects.trendbox_ci.beta.modules.process import run_process
from sandbox.projects.trendbox_ci.beta.modules.isolation import isolate_container
from sandbox.projects.trendbox_ci.beta.modules.logs_placeholder import job_logs_placeholder
from sandbox.projects.trendbox_ci.beta.modules.schedulers import is_scheduler_allowed_to_run, is_owner_allowed_to_run_task
from sandbox.projects.trendbox_ci.beta.managers.resources import ResourcesManager
from sandbox.projects.trendbox_ci.beta.managers.vault import VaultManager
from sandbox.projects.trendbox_ci.beta.managers.task_dependencies import TaskDependenciesManager
from sandbox.projects.trendbox_ci.beta.managers.arc.context import create_arc_context, prepare_arc_environments
from sandbox.projects.trendbox_ci.beta.tasks.trendbox_ci_base_task import TrendboxCiBaseTask
from sandbox.projects.trendbox_ci.beta.tasks.trendbox_ci_job.parameters import WakingOutputParametersPool
from sandbox.projects.trendbox_ci.beta.resources import TRENDBOX_CI_LXC_IMAGE_BETA


class TrendboxJobException(Exception):
    pass


class TrendboxCiJob(TrendboxCiBaseTask):
    """Trendbox CI Job"""

    name = 'TRENDBOX_CI_JOB_BETA'

    class Requirements(TrendboxCiBaseTask.Requirements):
        dns = ctm.DnsType.LOCAL
        disk_space = 20 * 1024
        environments = (
            environments.NodeJS('12.13.0'),
            environments.ArcEnvironment(export_token_env=False),
        )

    class Context(TrendboxCiBaseTask.Context):
        log_resource_id = None

    class Parameters(TrendboxCiBaseTask.Parameters):
        with sdk2.parameters.Group('Job Identifiers') as job_id_block:
            job_id = sdk2.parameters.String(
                'Job ID',
            )
            is_ci = sdk2.parameters.Bool(
                'Is task started from CI',
                default=False
            )

        with sdk2.parameters.Group('Container') as container_block:
            _container = sdk2.parameters.Container(
                'LXC resource',
                description='Commands of lifecycle will be launched in specified Linux Container',
                resource_type=TRENDBOX_CI_LXC_IMAGE_BETA,
                platform='linux_ubuntu_14.04_trusty',
                required=True,
            )
            is_privileged_container = sdk2.parameters.Bool('Privileged', description='You will be root inside LXC', default=False)
            isolated = sdk2.parameters.Bool('Isolated network', description='Allow requests only to yandex network (act like there is no internet)', default=False)

        with sdk2.parameters.Group('Config') as trendbox_block:
            trendbox_config = sdk2.parameters.JSON(
                'Job configuration',
                description='JSON with job info (lifecycle steps, environments, etc)',
                required=True,
            )
            workcopies = sdk2.parameters.JSON(
                'Workcopies configuration',
            )
            trendbox_web_logs_url_template = sdk2.parameters.String(
                'Logs URL template',
                description='Steps log will appear on this Trendbox UI page',
            )

        with sdk2.parameters.Output():
            with sdk2.parameters.Group('Job API') as public_api:
                logs_urls = sdk2.parameters.JSON('Logs urls', description='URLs to log files of lifecycle steps', default=[])
                resources = sdk2.parameters.JSON('Resources', description='Resources meta info', default=[])
                reports = sdk2.parameters.JSON('Reports', description='Report resources meta info', default=[])
                exit_code = sdk2.parameters.Integer('Exit code', default=-1)

        waking_output_parameters_pool = WakingOutputParametersPool

    def on_save(self):
        if not self.Parameters.is_ci:
            error_message=(
                'Trendbox CI закрывается, запуски таски не из CI запрещены. '
                'Подробнее — {post_link}. '
                'Если требуется перезапустить проверку в CI и рестарт таски из UI или Sandbox не работает, поребейзите PR.'
            ).format(
                post_link='https://clubs.at.yandex-team.ru/trendbox-ci/396',
            )

            if not is_owner_allowed_to_run_task(self):
                raise Exception(error_message)

        if self.scheduler and not is_scheduler_allowed_to_run(self):
            raise Exception("Trendbox CI закрывается, новые планировщики запрещены. Подробнее — https://clubs.at.yandex-team.ru/trendbox-ci/396")

        super(TrendboxCiJob, self).on_save()

        # Network isolation requires sudo, because of using ip6tables
        # https://unix.stackexchange.com/a/385256
        if self.Parameters.is_privileged_container or self.Parameters.isolated:
            self.Requirements.privileged = True

        self.set_task_archive_resource()

    def on_enqueue(self):
        super(TrendboxCiJob, self).on_enqueue()

        self.task_dependencies.wait()

    def on_prepare(self):
        super(TrendboxCiJob, self).on_prepare()

        logging.debug('Prepared working directory: {}'.format(self._working_path))

        if self._is_ramdrive_supported:
            logging.debug('Working directory mounted in ramdrive: {}'.format(self.ramdrive.path))

        # This part is to prepare Arc-related environments before privileged container starts.
        # Caches and environments folders are read-only in on_execute method of privileged tasks.
        if self.Parameters.is_privileged_container and self._is_arcanum_provider():
            logging.debug('Preparing Arc environments')
            prepare_arc_environments()

        logging.info('Node.js installed to "{}"'.format(self._node_path))
        logging.info('npm installed to "{}"'.format(self._npm_path))

    def on_finish(self, prev_status, status):
        self.task_dependencies.awake(status=status)

        super(TrendboxCiJob, self).on_finish(prev_status, status)

        self._safe_send_status_signal(status=status)

    def on_break(self, prev_status, status):
        self._save_job_error_log_name()
        self.header()

        self.task_dependencies.awake(status=status)

        super(TrendboxCiJob, self).on_break(prev_status, status)

        self._safe_send_status_signal(status=status)

    def on_failure(self, prev_status):
        self._save_job_error_log_name()
        self.header()

        super(TrendboxCiJob, self).on_failure(prev_status)

    @sdk2.header()
    def header(self):
        report = {}

        report.update(self.reports.trendbox_ui_logs_link(self.Parameters.trendbox_web_logs_url_template))
        report.update(self.reports.reports(self.Parameters.reports))
        report.update(self._get_job_error_report())

        return report

    @singleton_property
    def task_dependencies(self):
        return TaskDependenciesManager(self, self._get_task_dependencies_options())

    def track_execution_time(self):
        now = time.time()

        if self.Context.first_execution_started_at is ctm.NotExists:
            self.Context.first_execution_started_at = now

        self.Context.execution_started_at = now

    @property
    def execution_time(self):
        if self.Context.execution_started_at is ctm.NotExists:
            return 0

        return int(time.time() - self.Context.execution_started_at)

    @property
    def total_execution_time(self):
        if self.Context.first_execution_started_at is ctm.NotExists:
            return 0

        return int(time.time() - self.Context.first_execution_started_at)

    @property
    def _log_dir_name(self):
        return 'job_logs'

    @property
    def _node_environment(self):
        """
        :rtype: sandbox.sandboxsdk.environments.NodeJs
        """
        return self.Requirements.environments[0]

    @property
    def _node_path(self):
        """
        :return: path to Node.js bin
        :rtype: pathlib2.Path
        """
        return self._node_environment.node

    @property
    def _npm_path(self):
        """
        :return: path to npm bin
        :rtype: pathlib2.Path
        """
        return self._node_environment.npm

    @property
    def _node_modules_path(self):
        """
        :return: path to node_modules
        :rtype: pathlib2.Path
        """
        return self._working_path.joinpath('node_modules')

    def _run_process(self, *args, **kwargs):
        """
        Runs subprocess, write logs to task log resource.

        :return: exit code
        :rtype: int
        """
        return run_process(task=self, *args, **kwargs)

    def on_execute(self):
        super(TrendboxCiJob, self).on_execute()

        self.resource_manager = ResourcesManager(self)

        self.init_environ()
        self.track_execution_time()

        if self.Parameters.isolated:
            isolate_container(self)

        with self.memoize_stage.running_job_cli(2) as stage:
            if stage.runs > 1:
                raise TaskFailure('Task was unexpectedly restarted, finishing with error')

            self.run_job_cli()

            metainfo = self.process_job_meta()

            exit_code = self._get_exit_code(metainfo)
            self.send_webhook(status=ctt.Status.EXECUTING)

            if exit_code == 0:
                return

            if self._is_killed_by_timeout(metainfo):
                raise TaskFailure('Job lifecycle timeout exceeded: {}'.format(self._get_job_timeout()))

            raise TaskFailure('Job lifecycle exit with code {}'.format(exit_code))

    def init_environ(self):
        """
        Init environment variables for job
        """
        logging.debug('Initializing environ')

        os.environ['CI'] = 'true'
        os.environ['TRENDBOX_CI'] = 'true'
        os.environ['TRENDBOX_CACHE_ARTIFACT_TYPE'] = 'TRENDBOX_CI_RESOURCE_BETA'
        os.environ['TRENDBOX_JOB_HOST'] = config.Registry().this.id
        os.environ['DEBUG'] = 'trendbox-ci:*,si:*'
        # TODO: Remove after FEI-10005
        os.environ['SANDBOX_SYNCHROPHAZOTRON_PATH'] = str(self.synchrophazotron)
        os.environ['TRENDBOX_SYNCHROPHAZOTRON'] = str(self.synchrophazotron)
        # TODO: Remove after: FEI-13554, FEI-14026
        os.environ['SANDBOX_TASK_ID'] = str(self.id)
        os.environ['SANDBOX_TASK_DIR'] = str(self.path())
        os.environ['SANDBOX_SESSION_TOKEN'] = str(self.agentr.token)

        if self._is_arcanum_provider():
            vault_manager = VaultManager(self)
            os.environ['ARC_TOKEN'] = vault_manager.read_vault('ARC_TOKEN', self.owner)
            os.environ['USE_ARC'] = 'true'

        logging.debug('Inited environ: {}'.format(os.environ))

    def run_job_cli(self):
        """
        Run job cli
        """
        logging.debug('Running job cli')

        vault_manager = VaultManager(self)
        vault_envs = vault_manager.get_all_vault_env().items()

        contexts = (job_logs_placeholder(self._get_log_dir(), 'README_NO_LOGS.md'),)

        use_arc = self._is_arcanum_provider()
        trendbox_config = self.Parameters.trendbox_config

        if use_arc:
            trendbox_config = self._get_arc_prepared_config(trendbox_config)

            arc_object_store_path = None
            mount_options = dict(
                mount_point=self.arc_mount_path,
                store_path=self.arc_store_path
            )

            if self.Parameters.is_privileged_container:
                arc_object_store_path = self.arc_object_store_path
                arc_object_store_path.mkdir(parents=True, exist_ok=True)

                mount_options['object_store_path'] = str(arc_object_store_path)

            contexts += (create_arc_context(**mount_options),)

        self._create_log_dir()
        self._create_log_res()

        with nested(*contexts):
            try:
                self._run_process(
                    process_name='run_job_cli',
                    cmd=u'{trendbox} run-job --config \'{config}\' --log-dir={log_dir} --meta-file={meta_file} --timeout={timeout} --silent-lifecycle'.format(  # nopep8
                        trendbox=self._job_cli_path,
                        config=escape_json_str(json.dumps(trendbox_config)),
                        log_dir=self._get_log_dir(),
                        meta_file=self._get_meta_file_path(),
                        timeout=self._get_job_timeout(),
                    ),
                    env=dict(os.environ.items() + vault_envs),
                )
            except Exception:
                raise
            finally:
                self._mark_as_ready_log_res()
                self._dump_arc_logs()

        logging.debug('Finished job cli')

    def process_job_meta(self):
        meta = self._read_job_meta()
        self._output_meta(meta)

        logs = meta.get('logs')
        resources = meta.get('resources')
        reports = meta.get('reports')

        logs_output = []
        resources_output = []
        reports_output = []

        if not logs:
            raise TrendboxJobException('Logs not found in meta file')

        if not self._check_log_dir_empty():
            raise TrendboxJobException('Logs dir is empty')

        res = self._get_log_res()
        res_url = self.resource_manager.get_resource_http_proxy(res.id)
        for log in logs:
            logs_output.append('{}/{}'.format(res_url, os.path.basename(log)))

        for r in resources:
            res_attrs = r['attrs']
            res = self.resource_manager.create_resource(
                relative_path=r['path'],
                resource_type=r['type'],
                description=r['description'],
                **res_attrs
            )
            if res:
                resources_output.append(dict(
                    id=res.id,
                    url=self.resource_manager.get_resource_http_proxy(res.id),
                    name=r['name'],
                    attrs=res_attrs,
                ))

        job_status = ctt.Status.SUCCESS if self._get_exit_code(meta) == 0 else ctt.Status.FAILURE
        for r in reports:
            res_attrs = r['attrs']
            res_status = res_attrs.get('status', job_status)
            res_attrs.setdefault('status', job_status)
            res = self.resource_manager.create_report(r['path'], **res_attrs)
            if res:
                reports_output.append(dict(
                    status=res_status,
                    url=self.resource_manager.get_resource_http_proxy(res.id),
                    id=res.id,
                    title=r['title'],
                    main=r['main'],
                ))

        self.Parameters.logs_urls = logs_output
        self.Parameters.resources = resources_output
        self.Parameters.reports = reports_output

        return meta

    def _dump_arc_logs(self):
        try:
            if self._is_arcanum_provider():
                logs_src = self._working_path.joinpath('arc_store', '.arc', 'traces')
                logs_dst = self.log_path().joinpath('arc')

                logs_dst.mkdir()
                copy_tree(str(logs_src), str(logs_dst))
        except Exception:
            logging.exception('Unable to copy arc logs to working copy')

    def _create_log_res(self):
        res = self.resource_manager.create_log_resource(self._get_log_dir())
        self.Context.log_resource_id = res.id

    def _get_log_res(self):
        if self.Context.log_resource_id is not None:
            return sdk2.Resource.find(task=self, id=self.Context.log_resource_id).first()
        else:
            raise TrendboxJobException('There is no log resource ID saved in context')

    def _mark_as_ready_log_res(self):
        res = self._get_log_res()
        res_data = sdk2.ResourceData(res)
        res_data.ready()

    def _create_log_dir(self):
        self._get_log_dir().mkdir(mode=0o777)

    def _check_log_dir_empty(self):
        return len(list(self._get_log_dir().iterdir())) > 0

    def _get_log_dir(self):
        return self._working_path.joinpath(self._log_dir_name)

    def _get_meta_file_path(self):
        return self._working_path.joinpath('job_meta.json')

    def _read_job_meta(self):
        meta_file = self._get_meta_file_path()

        if not meta_file.exists():
            raise TrendboxJobException('Job meta file does not exist')

        with meta_file.open() as f:
            data = json.load(f)
            if not data:
                raise TrendboxJobException('Job meta file is empty')

            logging.debug('Meta file content: {}'.format(data))

            return data

    def _output_meta(self, meta_content):
        """
        :type meta_content: dict
        """
        self.Parameters.exit_code = self._get_exit_code(meta_content)

    @staticmethod
    def _get_exit_code(meta_content):
        return int(meta_content.get('exitCode', -1))

    @staticmethod
    def _is_killed_by_timeout(meta_content):
        return bool(meta_content.get('killedByTimeout', False))

    @staticmethod
    def ensure_list(value):
        return value if isinstance(value, list) else [value]

    def _get_job_timeout(self):
        return '{}ms'.format(self._get_trendbox_config_value(['timeout'], 0))

    def _get_task_dependencies_options(self):
        return self._get_trendbox_config_value(['cloud', 'options', 'task_dependencies'], {})

    def _extract_path_from_url(self, url):
        parsed = urlparse(url)

        return parsed.path

    def _remove_preceding_slash(self, path):
        if path.startswith('/'):
            return path[1:]

        return path

    def _get_arc_prepared_config(self, original_trendbox_config):
        trendbox_config = copy.deepcopy(original_trendbox_config)

        if 'checkout' in trendbox_config:
            checkout_config = trendbox_config.get('checkout')
        else:
            workcopies = trendbox_config.get('workcopies')
            [after, before, base] = [workcopies.get(key, None) for key in ['after', 'before', 'base']]
            checkout_config = after or before or base

        configs = self.ensure_list(checkout_config)

        for config_item in configs:
            repository_url = config_item.get('repository', '')
            project_path = self._remove_preceding_slash(self._extract_path_from_url(repository_url))

            if not project_path:
                raise TrendboxJobException('Arcadia project path for repository {} is empty'.format(repository_url))

            logging.debug('Found project in checkout config: {}'.format(project_path))

            config_item.update({
                'type': 'arc',
                'vcs': 'arc',
                'path': project_path,
                'mount_path': str(self.arc_mount_path),
                'store_path': str(self.arc_store_path)
            })

        return trendbox_config

    def _is_arcanum_provider(self):
        ARCADIA_REPO_PREFIX = 'arcadia:'

        workcopies_base_repo = self._get_base_repository()
        workcopies_before_repo = self._get_trendbox_config_value(['workcopies', 'before', 'repository'], '')
        workcopies_after_repo = self._get_trendbox_config_value(['workcopies', 'after', 'repository'], '')

        if (workcopies_base_repo or workcopies_before_repo or workcopies_after_repo).startswith(ARCADIA_REPO_PREFIX):
            return True

        return False

    def _get_trendbox_config_value(self, path, default=None):
        get = lambda d, key: d.get(key, default) if isinstance(d, dict) else default

        return reduce(get, path, self.Parameters.trendbox_config)

    def _normalize_arcadia_repo(self, repo):
        if repo.startswith('arcadia:/'):
            return repo.split('#')[0]

        return repo

    def _get_workcopy_repository(self, key):
        workcopy = self._get_trendbox_config_value(['workcopies', key], {'repository': ''})

        return self.ensure_list(workcopy)[0].get('repository', '')

    def _get_repository(self):
        return self._normalize_arcadia_repo(self._get_workcopy_repository('base') or self._get_workcopy_repository('after') or self._get_workcopy_repository('before'))

    def _get_base_repository(self):
        return self._get_workcopy_repository('base')

    def _safe_send_status_signal(self, status):
        try:
            self._send_status_signal(status)
        except Exception:
            logging.exception('Exception while sending status signal to yasm')

    def _send_status_signal(self, status):
        """
        :param status: task status
        :type status: str
        """
        if getattr(self.Context, 'copy_of'):
            return

        repo = self._get_repository() or 'unknown'
        job_name = self._get_trendbox_config_value(['name']) or 'unknown'
        workflow_name = self._get_trendbox_config_value(['workflow', 'name']) or 'unknown'

        # Yasm tag values are limited: https://wiki.yandex-team.ru/golovan/userdocs/tagsandsignalnaming
        sanitize = lambda x: re.sub(r'[^a-zA-Z0-9\-_]', '_', x)

        push_api.push_signals(
            signals={
                'status_{}_mmmm'.format(status.lower()): 1,
                'execution_time_min_hgram': [self.total_execution_time / 60, 1],
            },
            tags={
                'itype': 'trendboxci',
                'prj': 'trendboxci',
                'job_name': sanitize(job_name),
                'workflow_name': sanitize(workflow_name),
                'repo': sanitize(repo),
            }
        )

        self.zeroline_reporter.report_task({
            'build_id': self.Parameters.build_id,
            'task_type': job_name,
            'task_id': self.id,
            'project': repo,
            'build_context': workflow_name,
            'status': status,
            'duration': self.total_execution_time,
        })

    def _save_job_error_log_name(self):
        log_dir = self._get_log_dir()
        list_of_files = glob.glob('{}/*'.format(log_dir))

        if len(list_of_files):
            latest_file = max(list_of_files, key=os.path.getmtime)
            short = latest_file.split('/')[-1]
            self.Context.job_error_log_name = short

    def _get_log_tail(self, file, lines=10):
        output = ''
        p = Popen(['tail', str(-lines), file], shell=False, stderr=PIPE, stdout=PIPE)
        res, err = p.communicate()
        if err:
            logging.exception('Error while reading tail from {}: {}'.format(file, err.decode()))
        else:
            output = res.decode()

        return output

    def _get_job_error_report(self):
        items = []
        log_name = self.Context.job_error_log_name
        if log_name:
            exec_dir_link = urls.execution_dir_link(self.id)
            log_url = os.path.join(exec_dir_link, self._log_dir_name, log_name)
            log_path = os.path.join(str(self._get_log_dir()), log_name)
            link = '<a href="{}">{}</a>'.format(log_url, log_name)
            tail = self._get_log_tail(log_path)

            items.append({
                link: '...<br/>' + tail.replace('\n', '<br/>'),
            })

        return {'<h3 id="latest-logs">Latest logs</h3>': items} if len(items) else {}
