# coding=utf-8
from __future__ import unicode_literals

import datetime
import logging
import os
import random
import resource
import signal
import stat
import subprocess
import time

import gevent
import gevent.event
from yp_proto.yp.client.hq.proto import types_pb2
from enum import IntEnum
from sepelib.util import fs
from sepelib.util import retry
from sepelib.gevent import greenthread

from instancectl.lib import specutil
from instancectl.lib import envutil
from instancectl.status import prober
from instancectl import utils
from instancectl.jobs import errors as job_errors
from instancectl.lib import pbutil
from instancectl.lib import procutil
from instancectl.lib.process.popen import PopenProcess
from instancectl.lib.process import porto_container
from instancectl.lib.procutil import ExecuteHelper
from infra.nanny.instancectl.proto import instancectl_pb2
from yp_proto.yp.client.hq.proto import types_pb2 as hq_types_pb2
from instancectl.lib import handler
from instancectl import constants
from instancectl.coredumps import sender
from instancectl import errors
from . import helpers


class ArgumentEvaluationError(job_errors.JobError):
    pass


class ArgumentFormatError(job_errors.JobError):
    pass


class ScriptNotFoundError(job_errors.JobError):
    """Для секции не задан требуемый скрипт"""
    pass


class IncorrectJobParametersError(job_errors.JobError):
    """Некорректные параметры задачи"""
    pass


class JobStatusCheckResult(IntEnum):
    """
    Результат проверки состояния бинарника
    """
    STARTED = 0
    NOT_STARTED = 10
    NOT_INSTALLED = 20
    UNKNOWN = 30
    PENDING = 40


class Job(greenthread.GreenThread):
    """
    Управлятор демоном

    FSM:

    PREPARED --> INSTALLED --> STARTED --> CRASHED --> INSTALLED

    Переход междку состояниями:
    PREPARED --> INSTALLED (удачный запуск install_script, фейл возвращает в PREPARED)

    INSTALLED --> STARTED (запуске через Popen, фейл переводит в CRASHED)

    STARTED --> CRASHED (останов демона)

    CRASHED --> INSTALLED (успешно отработал restart_script)

    Тормозим чрезвычайно просто --- убиваем основной тред и гадаем на статусах. Если STARTED, то гасим инстанс
    через stop_script. В любом случае добиваем сначала через сигналы.

    :type process: instancectl.process.process.Process
    :type script_runner: instancectl.lib.procutil.ScriptRunner
    """

    USER_SCRIPT_RETRY_MAX_TIMEOUT = 120
    SUBPROCESS_WAIT = 5

    def __init__(self, spec, env, minidump_sender, limits, environment,
                 successful_start_timeout, install_script, restart_script, uninstall_script,
                 args_to_eval, rename_binary, coredump_probability, always_coredump, coredumps_dir, expand_spec,
                 porto_mode, work_dir, coredump_filemask, core_files_limit,
                 restart_sleeper):
        """
        :type spec: yp_proto.yp.client.hq.proto.types_pb2.Container
        :type minidump_sender: optional[instancectl.coredumps.sender.MinidumpSender]
        :type limits: dict
        :type environment: dict[unicode, unicode]
        :type successful_start_timeout: int | float | None
        :type args_to_eval: dict[unicode, unicode]
        :type env: instancectl.lib.envutil.InstanceCtlEnv
        :type porto_mode: porto_container.PortoMode
        :type work_dir: unicode
        :type coredump_filemask: unicode
        :type core_files_limit: int
        :type restart_sleeper: sepelib.util.retry.RetrySleeper
        """
        super(Job, self).__init__()
        self.name = spec.name
        self.log = logging.getLogger('{}[{}]'.format(self.__class__.__name__, self.name))
        self.process = None
        self.crash_log = logging.getLogger('crash')

        self._state = instancectl_pb2.ContainerInternalState()
        self._state.last_state_change_time.GetCurrentTime()
        self._state.state = instancectl_pb2.ContainerInternalState.NOT_PREPARED

        self._env = env

        self.spec = spec
        self._rename_binary = rename_binary
        self._successful_start_timeout = successful_start_timeout

        self.run_flag_fd = None

        self._limits = limits
        self._minidump_sender = minidump_sender or sender.EmptyMinidumpSender()
        self._work_dir = work_dir
        self._stdout_path = os.path.join(self._work_dir, '{}.out'.format(self.name))
        self._stderr_path = os.path.join(self._work_dir, '{}.err'.format(self.name))
        self._stdout = None
        self._stderr = None
        self._event_log = utils.get_event_logger('section.{}'.format(self.name))
        self._restart_sleeper = restart_sleeper
        self._process_environment = environment
        self.script_runner = None
        self.prober = None
        self._install_script = install_script
        self._install_script_restart_sleeper = envutil.create_restart_sleeper(env.install_script_restart_policy)
        self._restart_script = restart_script
        self._uninstall_script = uninstall_script
        self._args_to_eval = args_to_eval
        self._coredump_probability = coredump_probability
        self._always_coredump = always_coredump
        self._coredumps_dir = coredumps_dir
        self.start_event = gevent.event.Event()
        self.termination_event = gevent.event.Event()
        self._expand_spec = expand_spec
        self._handler_runner = None
        self.pre_stop_action_finished = gevent.event.Event()
        self.porto_mode = porto_mode
        self._coredump_filemask = coredump_filemask
        self._core_files_limit = core_files_limit
        self._cpu_policy = hq_types_pb2.CpuPolicy.Type.Name(spec.cpu_policy.type)
        self.mock_retry_sleeper_output = env.mock_retry_sleeper_output

    @staticmethod
    def _state_value_to_name(value):
        """
        :type value: int
        :rtype: unicode
        """
        return pbutil.enum_value_to_name(instancectl_pb2.ContainerInternalState.DESCRIPTOR, value)

    def set_state(self, state_int):
        """
        :type state_int: int
        """
        state_pb = instancectl_pb2.ContainerInternalState()
        state_pb.last_state_change_time.GetCurrentTime()
        state_pb.state = state_int
        state_pb.restart_count = self._state.restart_count
        state_pb.last_termination_status.CopyFrom(self._state.last_termination_status)
        self._set_state(state_pb)

    def _set_state(self, state_pb):
        """
        :type state_pb: instancectl_pb2.ContainerInternalState
        """
        state_name = self._state_value_to_name(state_pb.state)
        self.log.info('New state: %s', state_name)
        self._event_log.info('New state: %s', state_name)
        self._state = state_pb

    def handle_process_termination(self, exit_status):
        if exit_status is None:
            return None
        elif exit_status == 0:
            state = self.handle_process_exit(self.process.pid)
        else:
            state = self.handle_process_crash(self.process.pid, exit_status)
        self.process = None
        return state

    def handle_process_exit(self, pid):
        """
        :type pid: int
        """
        s = instancectl_pb2.ContainerInternalState()
        s.last_state_change_time.GetCurrentTime()
        s.state = instancectl_pb2.ContainerInternalState.EXITED
        s.restart_count = self._state.restart_count
        s.last_start_time.CopyFrom(self._state.last_start_time)
        self.fill_last_termination_status(0, s.last_termination_status)
        self._set_state(s)
        self.log.info('Exited with code 0')
        self._event_log.info('Exited with code 0')
        self.termination_event.set()
        return s

    def handle_process_crash(self, pid, exit_status):
        """
        :type pid: int
        :type exit_status: int | None
        """
        s = instancectl_pb2.ContainerInternalState()
        s.last_state_change_time.GetCurrentTime()
        s.state = instancectl_pb2.ContainerInternalState.CRASHED
        s.restart_count = self._state.restart_count
        s.last_start_time.CopyFrom(self._state.last_start_time)
        self.fill_last_termination_status(exit_status, s.last_termination_status)
        self._set_state(s)
        exit_pb = s.last_termination_status.exit_status
        if exit_pb.if_exited:
            self.log.error('Exited with code %s', exit_pb.exit_status)
            self._event_log.error('Exited with code %s', exit_pb.exit_status)
        if exit_pb.if_signaled:
            self.log.error('Terminated with signal %s, coredumped: %s', exit_pb.term_signal, exit_pb.coredumped)
            self._event_log.error('Terminated with signal %s, coredumped: %s', exit_pb.term_signal, exit_pb.coredumped)
        if not exit_pb.if_exited and not exit_pb.if_signaled:
            self.log.error('Container not found unexpectedly, exit status is unknown')
            self._event_log.error('Container not found unexpectedly, exit status is unknown')
        t = time.time()
        self.crash_log.error({
            'timestamp_ms': int(t * 1000),
            'datetime': datetime.datetime.fromtimestamp(t),
            'section': self.name,
            'exit_code': self._make_legacy_exit_code(exit_pb),
            'pid': pid,
            'instance_name': self._env.instance_name,
            'exit_status': exit_pb.exit_status if exit_pb.if_exited else '',
            'term_signal': exit_pb.term_signal if exit_pb.if_signaled else '',
            'coredumped': '1' if exit_pb.coredumped else '',
        })
        self.termination_event.set()
        return s

    @staticmethod
    def _make_legacy_exit_code(s):
        """
        We have to continue log this strange exit code for backward compatibility

        :type s: clusterpb.types_pb2.ExitStatus
        :rtype: int | unicode
        """
        if s.if_signaled:
            return -s.term_signal
        elif s.if_exited:
            return s.exit_status
        return ''

    def handle_process_start(self):
        s = instancectl_pb2.ContainerInternalState()
        s.last_state_change_time.GetCurrentTime()
        s.state = instancectl_pb2.ContainerInternalState.STARTED
        s.restart_count = self._state.restart_count + 1
        s.last_start_time.GetCurrentTime()
        s.last_termination_status.CopyFrom(self._state.last_termination_status)
        self._set_state(s)
        self.start_event.set()
        return s

    def get_state(self):
        """
        :rtype: infra.nanny.instancectl.proto.instancectl_pb2.ContainerInternalState
        """
        return self._state

    def fill_last_termination_status(self, exit_status, pb):
        """
        :type exit_status: int | None
        :type pb: clusterpb.types_pb2.LastTerminationStatus
        """
        if exit_status is not None:
            utils.cast_exit_status_to_pb(exit_status, pb.exit_status)
        pb.start_time.CopyFrom(self._state.last_start_time)
        pb.termination_time.GetCurrentTime()

        try:
            pb.stderr_tail = utils.read_last_bytes_from_file(self._stderr_path, constants.HQ_LOG_TAIL_BYTE_LIMIT)
        except Exception:
            self.log.warning('Could not read from binary stderr')

        try:
            pb.stdout_tail = utils.read_last_bytes_from_file(self._stdout_path, constants.HQ_LOG_TAIL_BYTE_LIMIT)
        except Exception:
            self.log.warning('Could not read from binary stdout')

    @property
    def pid_file_path(self):
        """
        Путь к PID файлу
        """
        return os.path.join(constants.PIDS_DIR, self.name)

    def _store_pid_ignore_errors(self, pid):
        """
        Try to put started process pid into pid-file.

        We must ignore errors: RTCSUPPORT-4760

        :param int pid: PID
        :return:
        """
        if not pid:
            return
        try:
            self._store_pid(pid)
        except Exception:
            self.log.exception('cannot put pid %s to pid-file', pid)

    def _store_pid(self, pid):
        """
        :type pid: int
        """
        fs.makedirs_ignore(constants.PIDS_DIR)

        self.log.debug('storing pid (%s) to file %s', pid, self.pid_file_path)
        with open(self.pid_file_path, 'w') as fd:
            fd.write(str(pid))

    def eval_argument(self, arg, script):
        """
        evaluates argument, using shell script. Timeout is 5 seconds
        """
        self.log.debug('evaluating %s', arg)

        timeout = 5
        self.log.debug('timeout: %d' % timeout)

        starttime = time.time()
        self.log.debug('starting %s', script)

        execute_helper = ExecuteHelper(self._limits)

        stderr = utils.tryopen(self._stderr_path, 'a')
        if stderr is None:
            self.log.info('Redirecting stderr to /dev/null for %r.', self.name)
            stderr = utils.tryopen('/dev/null', 'a')

        process = subprocess.Popen(
            script,
            shell=True,
            preexec_fn=execute_helper,
            stdout=subprocess.PIPE,
            stderr=stderr,
            env=self._process_environment,
        )

        while time.time() - starttime < timeout and process.poll() is None:
            gevent.sleep(0.1)

        return_code = process.poll()
        if return_code is None:
            os.kill(process.pid, signal.SIGKILL)
            self.log.error('script for %s killed by timeout', arg)
            raise ArgumentEvaluationError('script for %s killed by timeout' % arg)

        if return_code:
            msg = 'evaluating argument {} ended with return code {}'.format(arg, return_code)
            self.log.error(msg)
            raise ArgumentEvaluationError(msg)

        res = process.stdout.readline().strip()
        self.log.info('evaluation of argument %s: %s', arg, res)
        return res

    def eval_arguments(self, command, args_to_eval):
        """
        :type command: list[unicode]
        :type args_to_eval: dict[unicode, unicode]
        :rtype: list[unicode]
        """
        self.log.debug('starting to evaluate arguments')
        self.log.debug('incoming args: %s', command)
        evaluated = {}
        for arg, script in args_to_eval.iteritems():
            evaluated[arg] = self.eval_argument(arg, script)
        self.log.debug('evaluated arguments: %r', evaluated)
        result = []
        for c in command:
            try:
                r = c.format(**evaluated)
            except KeyError as e:
                self.log.error('Failed formatting arguments', exc_info=True)
                raise ArgumentFormatError(str(e))
            result.append(r)
        self.log.info('Outgoing args: %s', result)
        return result

    def _hardlink_file(self, source_file, destination_file):
        self.log.info('Hard linking file: %s to %s', source_file, destination_file)

        try:
            fs.remove_ignore(destination_file)
        except EnvironmentError:
            self.log.exception('Failed unlinking %s', destination_file)

        if os.path.exists(destination_file):
            try:
                os.unlink(destination_file)
            except OSError as err:
                self.log.exception('Failed unlinking %s', destination_file)
                return False
        try:
            os.link(source_file, destination_file)
        except OSError:
            self.log.exception('Failed hard linking %s to %s', source_file, destination_file)
            return False

        self.log.debug('Hard linked %s to %s', source_file, destination_file)
        return True

    def run_script(self, script_name, script, arguments, additional_env):
        """
        Запустить restart_script или install_script

        :param script_name: название скрипта
        :type script_name unicode
        :type script: unicode
        :type arguments: list[unicode]
        :type additional_env: dict[unicode, unicode]
        :rtype: bool
        """
        self.log.debug('Running %s: %r', script_name, script)
        self._event_log.info('Running %s', script_name)

        try:
            self.script_runner.run(script,
                                   arguments=arguments,
                                   timeout=None,
                                   additional_env=additional_env)
        except handler.HandlerRunnerError as e:
            self.log.info('%s failed: %s', script_name, e)
            self._event_log.info('%s failed: %s', script_name, e)
            return False

        self.log.info('%s OK', script_name)
        return True

    def _mock_sleep(self, script_name, duration):
        with open(self.mock_retry_sleeper_output, 'a') as f:
            f.write("retry_sleep {} {}\n".format(script_name, duration))

    def _sleep_on_retry(self, retry_sleeper, script_name):
        sleep_time = retry_sleeper.get_next_time_to_sleep()
        if not self.mock_retry_sleeper_output:
            self.log.info('Sleep %s seconds before restart %s', sleep_time, script_name)
            gevent.sleep(sleep_time)
            return
        self._mock_sleep(script_name, sleep_time)

    def run(self):
        """
        Основной цикл Job реализующий FSM
        """
        self.log.debug("Starting section: %s", self.name)

        # Пытаемся заинсталлить инстанс
        if self._install_script:
            self._run_script_with_retries('install_script',
                                          self._install_script,
                                          arguments=[],
                                          retry_sleeper=self._install_script_restart_sleeper
                                          )

        self.set_state(instancectl_pb2.ContainerInternalState.INSTALLED)

        while True:
            if self._state.state == instancectl_pb2.ContainerInternalState.INSTALLED:
                try:
                    # Запуск бинарника
                    self.start_binary()
                except Exception:
                    self.log.exception('Failed starting binary')
                    self._event_log.exception('Failed starting binary')
                    self._restart_sleeper.increment()
                else:
                    self.handle_process_start()
                    self._store_pid_ignore_errors(self.process.pid)
            elif self._state.state == instancectl_pb2.ContainerInternalState.STARTED:
                try:
                    s = self.process.wait(self.SUBPROCESS_WAIT)
                except errors.ContainerNotFoundError:
                    self.log.error('Container not found and its exit status cannot be retrieved')
                    self.handle_process_crash(self.process.pid, None)
                    self.process = None
                except errors.ContainerCommunicationError:
                    self.log.error('Container wait call failed, will retry', exc_info=True)
                    gevent.sleep(1.0)
                else:
                    self.handle_process_termination(s)
            elif self._state.state in (instancectl_pb2.ContainerInternalState.CRASHED,
                                       instancectl_pb2.ContainerInternalState.EXITED):
                need_restart = specutil.is_restart_needed(self.spec.restart_policy,
                                                          self._state.last_termination_status.exit_status)
                if not need_restart:
                    return
                if self._need_sleep_on_restart():

                    self._sleep_on_retry(self._restart_sleeper, self.name)
                else:
                    self._restart_sleeper.reset()

                if self._restart_script:
                    env = utils.cast_exit_status_pb_to_env(self._state.last_termination_status.exit_status)
                    self.run_script('restart_script', self._restart_script, arguments=[], additional_env=env)

                self.set_state(instancectl_pb2.ContainerInternalState.INSTALLED)

            gevent.sleep()

    def _need_sleep_on_restart(self):
        """
        :rtype: bool
        """
        if self._successful_start_timeout is None:
            return True
        return self._state.last_start_time.ToMicroseconds() / 1000000.0 > time.time() - self._successful_start_timeout

    def start_binary(self):
        """
        Непосредственный запуск бинарника
        """
        self.log.info('Attempting to start a binary')

        command = self.spec.command

        if self._rename_binary:
            self._hardlink_file(command[0], self._rename_binary)
            binary = self._rename_binary
        else:
            binary = command[0]

        if not os.path.isabs(binary):
            if os.path.exists(binary):
                binary = os.path.join('.', os.path.relpath(binary, start='.'))
            elif os.path.exists(os.path.join(self._env.instance_dir, binary)):
                binary = os.path.join(self._env.instance_dir, binary)

        if os.path.isdir(binary):
            raise ValueError('Failed to start a binary "{}": it is a directory. '
                             'Validate the sandbox resource you are using and the service config.'.format(binary))

        try:
            mode = os.stat(binary).st_mode
            original_mode = mode
            for i in (stat.S_IXUSR, stat.S_IXGRP, stat.S_IXOTH):
                mode = mode | i
            if original_mode != mode:
                os.chmod(binary, mode)
        except Exception as err:
            self.log.warning('Failed updating %s mode: %r', binary, err)

        main_binary_limits = self._limits.copy()
        l = self._make_core_limit(binary)
        if l is not None:
            main_binary_limits['core'] = helpers.make_limit(resource.RLIMIT_CORE, l)

        args = command[1:]
        if self._args_to_eval:
            args = self.eval_arguments(args, self._args_to_eval)
        command = [binary] + args
        cmd_line = subprocess.list2cmdline(command)
        self.log.info('Starting: %s', cmd_line)
        self._event_log.info('Starting: %s', cmd_line)
        if self.porto_mode.enabled:
            core_command = self._make_core_command()

            self.process = porto_container.PortoContainer(sub_container=self.name,
                                                          command=command,
                                                          stdout=self._stdout_path,
                                                          stderr=self._stderr_path,
                                                          env=self._process_environment,
                                                          limits=main_binary_limits,
                                                          cwd=self._work_dir,
                                                          security_context=self.spec.security_context,
                                                          allocation=self.spec.resource_allocation,
                                                          porto_mode=self.porto_mode,
                                                          core_command=core_command,
                                                          cpu_policy=self._cpu_policy,
                                                          )
        else:
            stdout, stderr = self._get_stdout_stderr()
            self.process = PopenProcess(section=self.name,
                                        command=command,
                                        pass_fds=[self.run_flag_fd],
                                        cwd=self._work_dir,
                                        stdout=stdout,
                                        stderr=stderr,
                                        env=self._process_environment,
                                        limits=main_binary_limits)

        self.process.start()

    def _run_script_with_retries(self, script_name, script, arguments, retry_sleeper):
        """
        Выполняет :param script_name: до тех пор, пока он не вернёт 0.

        Попытки повторяются с экспоненциально растущими перерывами, ограниченнымы сверху :param max_timeout:

        :type script_name: str | unicode
        :type script: unicode
        :type arguments: list[unicode]
        :type retry_sleeper: sepelib.util.retry.RetrySleeper
        """
        retry_sleeper.reset()

        while not self.run_script(script_name, script, arguments, additional_env={}):
            self._sleep_on_retry(retry_sleeper, script_name)

    def uninstall(self):
        if self._uninstall_script:
            self._run_script_with_retries('uninstall_script',
                                          self._uninstall_script,
                                          arguments=[],
                                          retry_sleeper=retry.RetrySleeper(max_delay=self.USER_SCRIPT_RETRY_MAX_TIMEOUT)
                                          )

    def _get_stdout_stderr(self):
        """
        Возвращает stdout и stderr для пользовательских скриптов и бинарника
        """
        if self._stdout is None:
            self._stdout = utils.tryopen(self._stdout_path, 'a')
            if self._stdout is None:
                self.log.info('Redirecting stdout to /dev/null for %r.', self.name)
                self._stdout = utils.tryopen('/dev/null', 'a')

        if self._stderr is None:
            self._stderr = utils.tryopen(self._stderr_path, 'a')
            if self._stderr is None:
                self.log.info('Redirecting stderr to /dev/null for %r.', self.name)
                self._stderr = utils.tryopen('/dev/null', 'a')

        return self._stdout, self._stderr

    def stop_container(self, term_barrier):
        """
        :type term_barrier: gevent.event.Event
        """
        self.log.info('Stop requested')
        for i in xrange(10):
            try:
                with gevent.Timeout(1):
                    super(Job, self).stop()
            except gevent.Timeout:
                self.log.warning('Cannot stop job watcher threadon %s attempt', i + 1)
            else:
                self.log.info('Job watcher thread stopped')

        is_started = self._state.state == instancectl_pb2.ContainerInternalState.STARTED
        pre_stop = self.spec.lifecycle.pre_stop
        if is_started and self.process is not None and pre_stop.type != types_pb2.Handler.NONE:
            args = handler.HandlerRunnerArguments(exec_args=[])
            try:
                self._handler_runner.run(pre_stop, args)
            except handler.HandlerRunnerError as e:
                self.log.warning('Prestop action failed: %s', e)
            if self.spec.lifecycle.stop_grace_period_seconds:
                timeout = self.spec.lifecycle.stop_grace_period_seconds
                self.log.info('Waiting for instance to stop for terminate_timeout=%s', timeout)
                self._event_log.info('Waiting for instance to stop for terminate_timeout=%s', timeout)
                with gevent.Timeout(timeout, False):
                    while True:
                        s = self.process.get_exit_status()
                        if self.handle_process_termination(s) is not None:
                            break
                        gevent.sleep(0.1)
        self.pre_stop_action_finished.set()
        if self.spec.lifecycle.term_barrier == types_pb2.Lifecycle.WAIT:
            self.log.info('Waiting for TERM barrier (waiting for all containers pre stop actions to be made)')
            term_barrier.wait()
        if self.process is not None and self.process.get_exit_status() is None:
            try:
                self.process.terminate()
                self.log.info('Sent terminate signal')
                self._event_log.info('Sent terminate signal')
            except Exception as err:
                self.log.warn(
                    'Failed terminating %r: %r.',
                    self.name,
                    err
                )
                return False

            self.log.info('Waiting %s before sending SIGKILL to process',
                          self.spec.lifecycle.termination_grace_period_seconds)
            with gevent.Timeout(self.spec.lifecycle.termination_grace_period_seconds, False):
                while True:
                    s = self.process.get_exit_status()
                    if self.handle_process_termination(s) is not None:
                        break
                    gevent.sleep(0.1)

            if self.process is not None:
                try:
                    self.process.kill()
                    self.log.info('Sent kill to process')
                    self._event_log.info('Sent kill to process')
                except Exception:
                    self.log.exception('Failed to kill job')
                    self._event_log.error('Failed to kill job')

        if self.process is not None:
            s = self.process.get_exit_status()
            self.handle_process_termination(s)
        self.log.info('Process stopped')

    def init(self):
        stdout, stderr = self._get_stdout_stderr()
        e = handler.ExecRunner(stdout=stdout,
                               stderr=stderr,
                               env=self._process_environment,
                               limits=self._limits,
                               pass_fds=[self.run_flag_fd],
                               log=logging.getLogger('{}-ready-probe'.format(self.name)))
        self.prober = prober.Prober(prober.ExecProber(e))
        self.script_runner = procutil.ScriptRunner(e)
        self._handler_runner = handler.HandlerRunner(
            http_get_runner=handler.HttpGetRunner(),
            exec_runner=e,
        )

    def get_prober(self):
        """
        :rtype: instancectl.status.prober.prober.Prober
        """
        return self.prober

    def set_run_flag_fd(self, fd):
        """
        :type fd: file
        """
        self.run_flag_fd = fd.fileno()

    def start(self):
        self._minidump_sender.start()
        super(Job, self).start()

    def reopenlogs(self):
        """
        Запуск скрипта переоткрытия логов демона
        """
        if self.spec.HasField('reopen_log_action'):
            h = self.spec.reopen_log_action.handler
            self.log.info('Running reopenlog action')
            self._event_log.info('Running reopenlog action')
            args = handler.HandlerRunnerArguments(exec_args=[])
            try:
                self._handler_runner.run(h, args)
            except handler.HandlerRunnerError as e:
                self.log.info('Reopenlog failed: %s', e)
                self._event_log.info('Reopenlog failed: %s', e)
            else:
                self.log.info('Reopenlog action: OK')
                self._event_log.info('Reopenlog action: OK')

    def _make_core_limit(self, binary):
        """
        Определяет, разрешено ли бинарнику откладывание корок

        :rtype: int | None
        """
        # Probability will be set in core command
        if self.spec.coredump_policy.type != types_pb2.CoredumpPolicy.NONE:
            return helpers.LIMIT_UNLIMITED

        if self._coredump_probability is None:
            return None

        # Бросаем монетку и с вероятностью coredump_probability выставляем limit_core=unlimited,
        # (в противном случае limit_core=0)
        #
        # Если в папке /coredumps есть корка, похожая на наш демон с mtime больше mtime нашего демона,
        # то корки принудительно выкручиваются в 0. Опция always_coredump выключает это поведение и
        # вероятность корки будет зависеть тольк от coredump_probability

        if random.randint(0, 99) >= self._coredump_probability:
            return 0

        if self._always_coredump:
            return helpers.LIMIT_UNLIMITED

        binary_mtime = os.stat(binary).st_mtime
        binary_name = os.path.basename(binary)
        if self._coredump_filemask:
            core_mask = self._coredump_filemask
        else:
            core_mask = '{}*'.format(binary_name)

        self.log.info('Checking for cores for mask "%s" in directory "%s", count limit: "%s", mtime: "%s"',
                      core_mask,
                      self._coredumps_dir,
                      self._core_files_limit,
                      binary_mtime)

        try:
            cores_exist = utils.is_fresh_core_exists(core_mask,
                                                     binary_mtime,
                                                     self._core_files_limit,
                                                     self._coredumps_dir)
        except Exception:
            self.log.exception('Core checking failed')
            return 0

        if cores_exist:
            self.log.info('Disabled coredumps due to existing cores')
            return 0

        return helpers.LIMIT_UNLIMITED

    def process_spec(self, vault_client, yav_client):
        specutil.populate_container_env(self.spec, self._process_environment, vault_client, yav_client)
        if self._expand_spec:
            specutil.expand_container_spec_variables(self.spec, self._process_environment)

    def _make_core_command(self):
        core_type = self.spec.coredump_policy.type

        if core_type == types_pb2.CoredumpPolicy.NONE:
            return None

        elif core_type == types_pb2.CoredumpPolicy.CUSTOM_CORE_COMMAND:
            return self.spec.coredump_policy.custom_processor.command

        elif core_type == types_pb2.CoredumpPolicy.COREDUMP:
            core_spec = self.spec.coredump_policy.coredump_processor

            core_command = "{binary_path} core_process --output={output_path}".format(
                binary_path=os.readlink("/proc/self/exe"),
                output_path="/".join([
                    core_spec.path,
                    ("{section_name}-${{CORE_TASK_NAME}}-${{CORE_PID}}-${{CORE_TID}}-${{CORE_SIG}}"
                        .format(section_name=self.name))
                ])
            )

            if self._env.default_container_env.get('DEPLOY_ENGINE', "") == 'YP_LITE':
                core_command += " --instance-name={}".format(self._env.hostname)

            if not core_spec.probability:
                raise IncorrectJobParametersError("Invalid core probability")

            if not core_spec.count_limit:
                raise IncorrectJobParametersError("Invalid core count limit")

            if not core_spec.total_size_limit:
                raise IncorrectJobParametersError("Invalid core total size limit")

            core_command += " --probability={}".format(str(core_spec.probability))

            if core_spec.cleanup_policy.type == types_pb2.CoredumpCleanupPolicy.TTL:

                if not core_spec.cleanup_policy.ttl.seconds:
                    raise IncorrectJobParametersError("Invalid ttl set")

                core_command += " --ttl={}".format(str(core_spec.cleanup_policy.ttl.seconds))

            core_command += " --count-limit={}".format(str(core_spec.count_limit))
            # Spec contains Mb now translate to bytes
            size_bytes = core_spec.total_size_limit * 1024 * 1024
            core_command += " --total-size-limit={}".format(str(size_bytes))

            if core_spec.aggregator.type == types_pb2.CoredumpAggregator.SAAS_AGGREGATOR:

                if not (core_spec.aggregator.saas.url and
                        core_spec.aggregator.saas.service_name and
                        core_spec.aggregator.saas.gdb.exec_path):

                    raise IncorrectJobParametersError("Invalid core aggregation parameters")

                gdb_path = core_spec.aggregator.saas.gdb.exec_path
                if not os.path.isabs(gdb_path):
                    home_dir_path = os.path.join(self._env.instance_dir, gdb_path)
                    if os.path.exists(home_dir_path):
                        gdb_path = home_dir_path

                core_command += " --gdb-path={}".format(gdb_path)
                core_command += " --aggr-url={}".format(core_spec.aggregator.saas.url)
                core_command += " --svc-name={}".format(core_spec.aggregator.saas.service_name)

                ctype = self._env.orthogonal_tags.get('a_ctype', "")
                if ctype:
                    core_command += " --ctype={}".format(ctype)

            return core_command

        else:
            raise IncorrectJobParametersError("Unknown coredump policy type")
