# coding: utf-8
import os
import re
import sys
import json
import shlex
import signal
import weakref
import time
from itertools import chain
from functools import partial

from ..service_config import ServiceConfig
from ..procs import Proc, ProcStartException
from ..interval import IntervalController
from ..framework.component import Component
from ..framework.greendeblock import Deblock
from ..framework.utils import Path
from ..framework import event, utils
from ..kernel_util.sys.gettime import monoTime
from ..kernel_util.functional import memoized

import yaml
import gevent
try:
    from gevent.coros import Semaphore
except ImportError:
    from gevent.lock import Semaphore


# FSA
#   STOPPED → DEPENDENCY_WAIT
#   DEPENDENCY_WAIT → STARTING, STOPPED
#   STARTING → PRERUNNING, STOPPING
#   PRERUNNING → STARTING, RUNNING, STOPPING
#   RUNNING → STARTING, PREFAIL, STOPPING
#   PREFAIL → STARTING, RUNNING, STOPPING
#   STOPPING → KILLING
#   KILLING → CLEANUP
#   CLEANUP → COOLDOWN, STOPPED, STARTING
#   COOLDOWN → STARTING, STOPPED
class State(object):
    STOPPED = 'STOPPED'
    DEPENDENCIES_WAIT = 'DEPENDENCIES_WAIT'
    STARTING = 'STARTING'
    PRERUNNING = 'PRERUNNING'
    RUNNING = 'RUNNING'
    PREFAIL = 'PREFAIL'
    STOPPING = 'STOPPING'
    KILLING = 'KILLING'
    CLEANUP = 'CLEANUP'
    COOLDOWN = 'COOLDOWN'


class ProcAdapter(Proc):
    ready = True
    succeeded = True
    failed = False

    def __init__(self, successful, exitstatus):
        self.ready = True
        self.succeeded = successful
        self.failed = not successful
        self.exitstatus = exitstatus

    def link(self, cb):
        cb(self)

    def kill(self):
        pass


class ProcsTracker(object):
    def __init__(self, notify_event, context_changed_event, fields_changed_event, procs=None):
        self._notify_event = notify_event
        self._context_changed_event = context_changed_event
        self._fields_changed_event = fields_changed_event
        self._procs = set(procs) if procs else set()
        self._fast_procs = set()  # checks or stops, they aren't meant to remain after stop

    def grab_from(self, other):
        # NOTE we always assume, that we're attaching procs to 'empty' service,
        # hence there cannot be a situation when the same proc is started twice
        for proc in other._procs:
            if not proc.exitstatus:
                self.started(proc, False)

        for proc in other._fast_procs:
            if not proc.exitstatus:
                self.started(proc, True)

        other._procs = set()
        other._fast_procs = set()

    def _raw_args(self):
        return {proc.raw_args for proc in self._procs if getattr(proc, 'raw_args', None) is not None}

    def pending_execs(self, required):
        return set(required) - self._raw_args()

    def on_finished(self, proc):
        if proc not in self._procs and proc not in self._fast_procs:
            return

        self._fast_procs.discard(proc)
        if proc in self._procs:
            self._procs.discard(proc)
            if getattr(proc, 'raw_args', None) is not None:
                self._notify_event.set()
        if proc.restorable:
            self._context_changed_event.release()
            self._fields_changed_event.set()

    def is_running(self, path):
        return path in self._raw_args()

    def all_tasks_running(self, paths):
        return self._raw_args() == set(paths)

    def running(self):
        return len(self._procs)

    def pids(self):
        return (proc.pid for proc in self._procs if hasattr(proc, 'pid'))

    def uuids(self):
        return (proc.uuid for proc in self._procs if hasattr(proc, 'uuid'))

    def signal(self, sig, all=False, fast=False):
        if all or not fast:
            for proc in self._procs.copy():
                proc.send_signal(sig)

        if all:
            for proc in self._fast_procs.copy():
                proc.send_signal(sig)

    def started(self, proc, fast):
        if fast:
            self._fast_procs.add(proc)
        else:
            raw = getattr(proc, 'raw_args', None)
            # under certain conditions we can attempt to start new proc
            # even before previous one is finalized, hence let's kill
            # both and restart 'em later
            if raw and raw in self._raw_args():
                proc.raw_args = None
                proc.send_signal(signal.SIGKILL)
            self._procs.add(proc)
        proc.link(self.on_finished)
        self._notify_event.set()
        if proc.restorable:
            self._context_changed_event.release()
            self._fields_changed_event.set()

    def context(self, base):
        return filter(None, (proc.context(base) for proc in self._procs.copy()))

    def rss(self):
        return sum(proc.rss for proc in self._procs)

    def cpu_usage(self):
        return sum(proc.cpu_usage for proc in self._procs)

    def reset_poll_interval(self):
        for proc in self._procs:
            if hasattr(proc, 'poll_interval'):
                proc.poll_interval.reset()
                proc.wake_event.set()

    def __nonzero__(self):
        return bool(self._procs and self._fast_procs)

    def __iter__(self):
        return chain(self._procs, self._fast_procs)


class StateController(object):
    def __init__(self,
                 state, required_state, parent,
                 successful_checks=0,
                 start_attempts=0,
                 minimum_cooldown=900,
                 maximum_cooldown=7200,
                 ):
        self.state = state
        self._required_state = required_state
        self.parent = weakref.ref(parent)
        self.check = None
        self.stop = None
        self.state_started = monoTime()
        self.successful_checks = successful_checks
        self.start_attempts = start_attempts
        self.cooldown_controller = IntervalController(initial=minimum_cooldown, maximum=maximum_cooldown)
        self.check_controller = IntervalController(initial=0.1, multiplier=1.2, variance=0.1, maximum=300.)
        self.start_controller = IntervalController(initial=0.1, maximum=300.)

    def tick(self, parent):
        fnname = '_%s_to_%s' % (self.state.lower(), self.required_state.lower())
        state_fun = getattr(self, fnname)
        return state_fun(parent)

    def restart(self, parent):
        self.set_required_state(State.RUNNING, parent)
        self.start_attempts = 0
        self.start_controller.reset()

        if self.state in (
            State.STARTING,
            State.PRERUNNING,
            State.RUNNING,
        ):
            self.state = State.STOPPING
        parent.context_changed_event.release()

    @property
    def time_in_state(self):
        return monoTime() - self.state_started

    @property
    def state_change_expired(self):
        return self.time_in_state > Service.STATE_CHANGE_TIMEOUT

    @property
    def required_state(self):
        return self._required_state

    def set_required_state(self, val, parent):
        if val != self._required_state:
            parent.log.info("required_state %s -> %s", self._required_state, val)
            self._required_state = val
        if val == State.STOPPED:
            self.start_controller.reset()

    def _set_state(self, state, parent):
        parent.log.debug("state %s -> %s", self.state, state)

        self.state = state
        self.state_started = monoTime()
        parent.context_changed_event.release()

        if state == State.STOPPED:
            self.check_controller.reset()
            self.cooldown_controller.reset()
            self.start_attempts = 0
            self.successful_checks = 0
            parent._stopped_event.set()
        elif state == State.DEPENDENCIES_WAIT:
            after = parent.after
            if after:
                try:
                    parent.namespace.start_services(after, 0)
                except Exception:
                    # FIXME we should spawn start asynchronously and don't catch timeout
                    pass
        elif state == State.STARTING:
            self.successful_checks = 0
            parent._started_event.clear()
            parent._stopped_event.clear()
            parent.make_rundir()
            parent.write_all_registry(parent._registry_data)
            parent.write_api()
        elif state == State.PRERUNNING:
            parent._started_event.set()
        elif state == State.RUNNING:
            self.start_controller.reset()
            self.cooldown_controller.reset()
            self.start_attempts = 0
        elif state == State.PREFAIL:
            self.check_controller.reset()
        elif state == State.STOPPING:
            parent._started_event.clear()
            parent._procs_tracker.reset_poll_interval()
            self.check_controller.reset()
            if self.check is not None:
                self.check.kill()
            self.check = None
        elif state == State.CLEANUP:
            parent._stop_root_container()
        elif state == State.COOLDOWN:
            self.cooldown_controller.schedule_next()
            self.check_controller.reset()
            parent._stopped_event.set()

        parent.send_state_report(state)
        # second pass, since side-effects can take some time
        parent.context_changed_event.release()

    def _perform_check(self, parent):
        if self.check is not None:
            if self.check.succeeded:
                self.successful_checks += 1
                self.check = None
                return True, self.check_controller.schedule_next()
            elif self.check.failed:
                parent.log.warning('service check FAILED: %r', self.check.exitstatus)
                self.successful_checks = 0
                self.check = None
                return False, self.check_controller.schedule_next()
            return None, 5.0

        if self.state_started < parent.last_check_time:
            remaining_sleep = self.check_controller.interval - (monoTime() - parent.last_check_time)
            if remaining_sleep > 0:
                return None, remaining_sleep

        self.check = parent.spawn_check()
        if self.check is None:  # failed to start check
            return None, 1.0
        if self.check.ready:
            return bool(self.check.succeeded), 0
        return None, 5.0

    # FSA processing

    def _stopped_to_running(self, parent):
        self._set_state(State.DEPENDENCIES_WAIT, parent)
        return 0

    def _stopped_to_stopped(self, parent):
        return Service.IDLE_LOOP_TIMEOUT

    def _dependencies_wait_to_running(self, parent):
        if parent._deps_state.is_set() or not parent.after:
            self._set_state(State.STARTING, parent)
            return 0

        if self.state_change_expired:
            parent.log.warning("prerequisites start failed, continuing start without 'em")
            self._set_state(State.STARTING, parent)
            return 0

        return Service.STATE_CHANGE_TIMEOUT

    def _dependencies_wait_to_stopped(self, parent):
        self._set_state(State.STOPPED, parent)
        return Service.IDLE_LOOP_TIMEOUT

    def _starting_to_running(self, parent):
        if self.time_in_state > Service.STATE_CHANGE_TIMEOUT:
            parent.log.info("service failed to start")
            self._set_state(State.STOPPING, parent)
            return 0

        if not parent.all_procs_running():
            remaining_sleep = (self.start_controller.interval - (monoTime() - parent.last_start_time)
                               if parent.last_start_time > 0
                               else 0)
            if remaining_sleep > 0:
                return remaining_sleep

            self.start_attempts += 1
            parent.spawn_missing()
            self.start_controller.schedule_next()
            return 5.

        success_status, sleep = self._perform_check(parent)
        if success_status:
            parent.log.info("service is up and running")
            self._set_state(State.PRERUNNING, parent)
        return sleep

    def _starting_to_stopped(self, parent):
        self._set_state(State.STOPPING, parent)
        return 0

    def _prerunning_to_running(self, parent):
        if not parent.all_procs_running():
            if monoTime() - parent.last_start_time > 900:
                self.check_controller.reset()
            self._set_state(State.STARTING, parent)
            return 0

        try:
            success_status, sleep = self._perform_check(parent)
        except Exception as e:
            parent.log.warning("check failed: %s", e)
            self._set_state(State.STARTING, parent)
            sleep = 0
        else:
            if success_status is False:
                self._set_state(State.STARTING, parent)
            elif success_status is True and self.time_in_state > 1800:
                self._set_state(State.RUNNING, parent)

        return sleep

    def _prerunning_to_stopped(self, parent):
        self._set_state(State.STOPPING, parent)
        return 0

    def _running_to_running(self, parent):
        if not parent.all_procs_running():
            if monoTime() - parent.last_start_time > 900:
                self.check_controller.reset()
            self._set_state(State.STARTING, parent)
            return 0

        try:
            success_status, sleep = self._perform_check(parent)
        except Exception as e:
            parent.log.warning("check failed: %s", e)
            self._set_state(State.PREFAIL, parent)
            sleep = 0
        else:
            if success_status is False:
                self._set_state(State.PREFAIL, parent)
                sleep = 0

        return sleep

    def _running_to_stopped(self, parent):
        self._set_state(State.STOPPING, parent)
        return 0

    def _prefail_to_running(self, parent):
        if not parent.all_procs_running():
            if monoTime() - parent.last_start_time > 900:
                self.check_controller.reset()
            self._set_state(State.STARTING, parent)
            return 0

        success_status, sleep = self._perform_check(parent)
        if success_status:
            parent.log.info("check succeeded, service recovered")
            self._set_state(State.RUNNING, parent)
            self.check_controller.interval = self.check_controller.maximum
        elif self.time_in_state > Service.STATE_CHANGE_TIMEOUT:
            parent.log.info("service failed to recover")
            self._set_state(State.STOPPING, parent)
            return 0

        return sleep

    def _prefail_to_stopped(self, parent):
        self._set_state(State.STOPPING, parent)
        return 0

    def _stopping_to_running(self, parent):
        return self._stopping_to_stopped(parent)

    def _stopping_to_stopped(self, parent):
        if self.stop is None:
            self.stop = parent.spawn_stop()
            if not self.stop.ready:
                return 1.0

        if self.state_change_expired:
            self.stop = None
            self._set_state(State.KILLING, parent)
            return 0

        success_status, sleep = self._perform_check(parent)

        if self.stop.ready:
            self.stop = None

        if success_status is False and not parent.procs_left(all=False):
            self.stop = None
            self._set_state(State.CLEANUP, parent)
            return 0

        return sleep

    def _killing_to_running(self, parent):
        return self._killing_to_stopped(parent)

    def _killing_to_stopped(self, parent):
        parent.kill_procs(all=False)

        if not parent.procs_left(all=False):
            self._set_state(State.CLEANUP, parent)
            return 0

        return 1.0

    def _cleanup_to_running(self, parent):
        # give stop-proc a chance to die by itself
        if not self.state_change_expired and parent.procs_left(all=True):
            return 5.0

        parent.kill_procs(all=True)

        if not parent.procs_left(all=True):
            if self.start_attempts > 60:
                self._set_state(State.COOLDOWN, parent)
                parent.check_source_hash()
            else:
                self._set_state(State.STARTING, parent)
            return 0

        return 1.0

    def _cleanup_to_stopped(self, parent):
        parent.kill_procs(all=True)

        if not parent.procs_left(all=True):
            parent.log.info("service stopped")
            self._set_state(State.STOPPED, parent)
            return Service.IDLE_LOOP_TIMEOUT

        return 1.0

    def _cooldown_to_running(self, parent):
        remaining_sleep = self.cooldown_controller.interval - self.time_in_state
        if remaining_sleep > 0:
            return remaining_sleep

        self._set_state(State.STARTING, parent)
        return 0

    def _cooldown_to_stopped(self, parent):
        parent.log.info("service stopped")
        self._set_state(State.STOPPED, parent)
        return Service.IDLE_LOOP_TIMEOUT


class ServiceCorrupted(Exception):
    pass


class Service(Component):
    arg_re = re.compile(r'\$\{(?:([^:}]+):)?([^}]+)\}', re.UNICODE)
    cfg_item_split_re = re.compile(r'(?<!\\):', re.UNICODE)
    cfg_parts_split_re = re.compile(r'(?<!\\)\.', re.UNICODE)

    STATE_CHANGE_TIMEOUT = 90.0
    IDLE_LOOP_TIMEOUT = 300.0
    MINIMAL_CHECK_INTERVAL = 1.0
    DEPS_CHECK_INTERVAL = 5.0
    STOPPING_CHECK_INTERVAL = 0.5
    STATE_CHANGE_CHECK_INTERVAL = 10.0
    CLEANUP_KILL_INTERVAL = 1.0
    CHECK_TIMEOUT = 10.0

    def __init__(self,
                 namespace,
                 starter,
                 deblock,
                 output_logger_factory,
                 context_changed_event,
                 apifile=None,
                 fieldsfile=None,
                 cfg=None,
                 procs=None,
                 registry=None,
                 state=State.STOPPED,
                 required_state=State.STOPPED,
                 context=None,
                 stats=None,
                 reporter=None,
                 force_restart=None,
                 ):
        """
        :param Namespace namespace: parent namespace containing this service
        :param Starter starter: fabric to start processes
        :param ..framework.greendeblock.Deblock deblock: deblock for i/o operations
        :param output_logger_factory:
        :param Semaphore context_changed_event: event set when some context data changed
        :param str apifile: path where rendered api should be stored
        :param ServiceConfig cfg: service description
        :param set procs: set of Proc attached to this service
        :param ConfigUpdater registry: config registry to query for service config sections
        :param State state: current service state
        :param State required_state: state to achieve
        :param str apifile: path to file where API should be stored
        :param dict context: saved context to load from
        :param str force_restart: if 'ALL', all imported, if 'SKYDEPS', all skydeps dependent services will be restarted
        """
        self.cfg = cfg
        self.starter = starter
        self.deblock = deblock
        self.registry = registry
        self.apifile = apifile
        self.fieldsfile = fieldsfile
        self.reporter = reporter

        self.context_changed_event = context_changed_event
        self.fields_changed_event = event.Event()
        self._deps_state = event.Event()
        self._started_event = event.Event()

        self._stopped_event = event.Event()
        self.service_event = event.Event()
        self._deps_state.link_event(self.service_event)

        self._procs_tracker = ProcsTracker(self.service_event,
                                           self.context_changed_event,
                                           self.fields_changed_event, procs)
        self._state_controller = StateController(state, required_state, self)
        self.stats = stats

        self.fsa_paused = False
        self._registry_state = {}
        self._registry_data = {}
        self._proc_start_lock = Semaphore(1)
        self.last_check_time = -1
        self.last_start_time = -1
        self.valid_cgroups = None

        if context is not None:
            self.cfg = ServiceConfig.from_context(base=namespace.base, **context['cfg'])
            restart = False
            if force_restart == 'ALL':
                restart = True
            elif force_restart == 'SKYDEPS':
                if self.cfg.cfg_version() >= 6:
                    restart = self.cfg.restart_on_skydeps_upgrade

            self._registry_state = context.get('registry_state', {})
            self._state_controller.state = self._translate_state(context['state'], force_restart=restart)
            self._state_controller._required_state = context['required_state']
            self._state_controller.check_controller.interval = context.get('check_interval')
            self._state_controller.start_controller.interval = context.get('start_interval')
            self.last_check_time = utils.to_monotime(context.get('last_check_time', -1))
            self.last_check_time = -1 if self.last_check_time > monoTime() else self.last_check_time
            self.last_start_time = utils.to_monotime(context.get('last_start_time', -1))
            self.last_start_time = -1 if self.last_start_time > monoTime() else self.last_start_time

            state_start_time = context.get('state_start_time', -1)
            if state_start_time > 0:
                self._state_controller.state_started = utils.to_monotime(state_start_time)

        if self.state in (State.PRERUNNING, State.RUNNING):
            self._started_event.set()
        elif self.state in (State.COOLDOWN, State.STOPPED):
            self._stopped_event.set()

        self._state_controller.check_controller.maximum = self.cfg.max_check_interval

        super(Service, self).__init__(logname=None, log_msg_prefix=self.cfg.name, parent=namespace)

        if not self.after:
            self._deps_state.set()

        self.out_log = self.deblock.apply(output_logger_factory.make_logger, self.name)
        self.check_log = self.deblock.apply(output_logger_factory.make_logger, self.name, "check")
        self.logdir = output_logger_factory.logdir.join(self.name).strpath

        try:
            self.make_rundir()

            if context is not None:
                for proc_ctx in context['procs']:
                    proc = self.starter.reconnect(
                        base=self.base,
                        log=self.log,
                        out_log=self.out_log,
                        ctx=proc_ctx)
                    if proc:
                        self._procs_tracker.started(proc, False)

            self.register_cfg_handlers()

            if not self.deblock.apply(
                Path(self.cfg.format_item('${CURDIR}')).check,
                exists=1,
                dir=1,
            ):
                raise ServiceCorrupted("Service base is not found")

        except Exception as e:
            self.log.exception(
                "service creation failed, service will be removed from namespace: %s" % (e,),
                exc_info=sys.exc_info()
            )
            if namespace is not None:
                self._procs_tracker.signal(signal.SIGKILL, all=True)
                namespace.childs.remove(self)
            raise

    def _translate_state(self, state, force_restart):
        if not force_restart:
            return state
        elif state in (State.STARTING, State.PRERUNNING, State.RUNNING, State.PREFAIL):
            return State.STOPPING
        elif state == State.COOLDOWN:
            return State.STOPPED
        else:
            return state

    def start(self):
        if self.cfg.conf_sections and not self.registry:
            raise RuntimeError("Cannot run service subscribed to config, without registry")

        try:
            super(Service, self).start()

            if self.cfg.conf_sections:
                self.registry.subscribe(
                    self.registry_changed,
                    [self.cfg_parts_split_re.split(section) for section in self.cfg.configs],
                    self.cfg.as_dict()['version'] >= 4
                )
            if self.cfg.cgroup is None:
                self.registry.subscribe(
                    self.default_cgroup_changed,
                    [('skynet', 'skycore', 'config')],
                    config_only=True
                )
            else:
                self.valid_cgroups = self.starter.get_valid_cgroups(self.cfg.take('cgroup'))

            self.fields_changed_event.set()
        except Exception as e:
            self.log.exception(
                "service start failed, service will be removed from namespace: %s" % (e,),
                exc_info=sys.exc_info()
            )
            self.stop()
            namespace = self.namespace
            if namespace is not None:
                self._procs_tracker.signal(signal.SIGKILL, all=True)
                namespace.childs.remove(self)

        return self

    def stop(self):
        super(Service, self).stop()

        if self.cfg.conf_sections:
            self.log.info("unsubscribing from %s", self.cfg.configs)
            self.registry.unsubscribe(self.registry_changed)

        self.registry.unsubscribe(self.default_cgroup_changed)

        self.cfg.set_type_handler('cfg', None)  # remove reference
        self.cfg.set_var_handler('RUNNING_PROCESSES', None)
        self.cfg.set_var_handler('PIDS', None)

        self._procs_tracker.signal(signal.SIGKILL, fast=True)

        return self

    def pause_fsa(self):
        self.fsa_paused = True

    def unpause_fsa(self):
        self.fsa_paused = False

    def register_cfg_handlers(self):
        self.cfg.set_var_handler('NAMESPACE', self.namespace.name)
        self.cfg.set_var_handler('RUNDIR', self.rundir)
        self.cfg.set_var_handler('SKYNET', self.namespace.skynetdir or '')
        self.cfg.set_var_handler('SUPERVISOR', self.namespace.supervisordir or '')
        self.cfg.set_var_handler('RUNNING_PROCESSES', self._procs_tracker.running)
        self.cfg.set_var_handler('PIDS', lambda: ' '.join(str(pid) for pid in self._procs_tracker.pids()))
        self.cfg.set_type_handler('cfg', self._get_config_var)
        self.fields_changed_event.set()

    @Deblock.wrap_fun
    def use_symlink(self, link_name):
        """ use symlink as service service root """
        utils.ensure_link(os.path.relpath(self.cfg.basepath, os.path.dirname(link_name)), link_name)
        self.cfg.set_var_handler('CURDIR', link_name)
        self.fields_changed_event.set()

    @Deblock.wrap_fun
    def install_service_symlinks(self, linkdir):
        """ install symlinks to all service data for convenient usage """
        maindir = os.path.join(linkdir, self.name)
        utils.ensure_dir(maindir)

        for source, target in (
            (self.cfg.basepath, os.path.join(maindir, 'src')),
            (self.rundir, os.path.join(maindir, 'var')),
            (self.logdir, os.path.join(maindir, 'outstreams')),
        ):
            self.log.debug("creating link %r -> %r", target, source)
            try:
                utils.ensure_link(source, target)
            except Exception as e:
                self.log.warning("%r creation failed: %s", target, e)

    def attach_procs(self, old_service):
        if old_service is not None:
            self._procs_tracker.grab_from(old_service._procs_tracker)
            self.fields_changed_event.set()

    def proc_uuids(self):
        return self._procs_tracker.uuids()

    def set_required_state(self, state):
        self._state_controller.set_required_state(state, self)
        self.context_changed_event.release()

    def start_service(self, timeout=90.0):
        self.set_required_state(State.RUNNING)
        self.service_event.set()
        return self._started_event.wait(timeout=timeout)

    def stop_service(self, timeout=STATE_CHANGE_TIMEOUT * 1.5):
        self.set_required_state(State.STOPPED)
        self.service_event.set()
        return self.state in (State.COOLDOWN, State.STOPPED) or self._stopped_event.wait(timeout=timeout)

    def restart_service(self, timeout=STATE_CHANGE_TIMEOUT * 3.):
        self._state_controller.restart(self)
        self.service_event.set()
        self.state in (State.COOLDOWN, State.STOPPED) or self._stopped_event.wait(timeout=timeout / 2.)
        self._started_event.wait(timeout=timeout)

    @memoized
    def get_api(self, kind):
        api = self.cfg.take('api')
        if api is None:
            return api

        return api.get(kind)

    def get_field(self, field, raw):
        return self.cfg.get_field(field, raw)

    def send_state_report(self, state):
        metainfo = self.namespace.get_service_metainfo(self.name)
        if self.reporter and metainfo:
            self.reporter.send_state_report(
                self.namespace.name,
                self.name,
                state,
                str(metainfo.release.get('version', None))
            )

    @Deblock.wrap_fun
    def make_rundir(self):
        user = self.cfg.take('user')

        rundir = Path(self.namespace.rundir).join(self.name)
        rundir.ensure(dir=True)

        if os.getuid() == 0 and user != 'root':
            rundir.chown(user, 0, rec=1)

        self.rundir = rundir.strpath

    @Deblock.wrap_fun
    def _write_registry(self, key, new_cfg):
        fmt = self.cfg.take('conf_format')
        if key:
            target_filename = os.path.join(self.cfg.registry_filename_dir, key + '.' + fmt)
        else:
            # old notification format
            target_filename = self.cfg.registry_filename
            # with format > 2 we can use what it provides, otherwise the path is constant
            target_filename = (
                target_filename
                if os.path.isabs(target_filename)
                else os.path.join(self.rundir, 'configuration.' + fmt)
            )
        dirname = Path(target_filename).dirpath()
        dirname.ensure(dir=True)
        if os.getuid() == 0:
            dirname.chown(self.cfg.take('user'), 0, rec=1)
        new_filename = target_filename + '.new'

        with open(new_filename, 'wb') as f:
            if fmt == 'json':
                json.dump(new_cfg, f)
            else:
                yaml.dump(new_cfg, f, default_flow_style=False, Dumper=getattr(yaml, 'CSafeDumper', yaml.SafeDumper))
            f.flush()
            os.fsync(f.fileno())

        os.rename(new_filename, target_filename)

    def write_all_registry(self, registry_data):
        for key, data in registry_data.items():
            self._write_registry(key, data)

    @Deblock.wrap_fun
    def write_api(self):
        if self.apifile is None:
            return

        apifile = Path(self.apifile)
        api = self.cfg.take('api')
        if api is not None:
            apifile.dirpath().ensure(dir=True)
            with open(apifile.strpath + ".new", 'w') as f:
                yaml.dump(api, f, default_flow_style=False, Dumper=getattr(yaml, 'CSafeDumper', yaml.SafeDumper))
                f.flush()
                os.fsync(f.fileno())
            Path(apifile.strpath + '.new').move(apifile)

    @Deblock.wrap_fun
    def write_fields(self):
        if self.fieldsfile is None:
            return

        def to_dumpable(obj):
            if isinstance(obj, (set, frozenset)):
                return list(obj)
            return obj

        fieldsfile = Path(self.fieldsfile)
        fields = {
            'raw': {
                name: to_dumpable(self.cfg.get_field(name, True))
                for name in self.cfg.field_mappings
            },
            'rendered': {
                name: to_dumpable(self.cfg.get_field(name, False))
                for name in self.cfg.field_mappings
            },
        }
        fieldsfile.dirpath().ensure(dir=True)
        with open(fieldsfile.strpath + '.new', 'w') as f:
            yaml.dump(fields, f, default_flow_style=False, Dumper=getattr(yaml, 'CSafeDumper', yaml.SafeDumper))
            f.flush()
            os.fsync(f.fileno())
        Path(fieldsfile.strpath + '.new').move(fieldsfile)

    @Deblock.wrap_fun
    def check_source_hash(self):
        unit = self.namespace.get_service_metainfo(self.name)
        if not unit or not unit.content_md5 or not unit.content_paths:
            return
        md5 = unit.content_md5

        source = Path(self.cfg.basepath)
        if not source.check(dir=1, exists=1) or md5 != source.hash_contents(unit.content_paths):
            self.log.warning("service contents check failed, hash mismatch, service is probably corrupted")
            unit.dirty = True

    def report_rusage(self):
        nsname = self.namespace.name
        name = self.name
        # FIXME remove protected property access
        cpu_usage = sum(proc.cpu_usage for proc in self._procs_tracker._procs)
        rss = sum(proc.rss for proc in self._procs_tracker._procs)
        old_cpu_usage = self.stats.service_get_val(nsname, name, 'main_last_cpu_usage', inmemory=True)
        if old_cpu_usage is not None:
            cpu_diff = cpu_usage - float(old_cpu_usage)
            cpu_diff = max(0, cpu_diff)
            self.stats.service_inc_num(nsname, name, 'main_cpu_usage', cpu_diff)
        self.stats.service_set_val(nsname, name, 'main_last_cpu_usage', cpu_usage, inmemory=True)
        self.stats.service_set_val(nsname, name, 'main_rss', rss, inmemory=True)

    def _service_send_registry(self):
        action = self.cfg.take('conf_action')
        try:
            self._process_action(action, 'notify-config')
        except Exception:
            self.log.exception(
                "failed to notify service about registry change, service continues flight without changes",
                exc_info=sys.exc_info()
            )

    def _process_action(self, action, kind):
        if action is None:
            return
        elif isinstance(action, int):
            self._procs_tracker.signal(action)
        elif action == 'RESTART' and self.cfg.cfg_version() >= 5:
            self._state_controller.restart(self)
            self.service_event.set()
        else:
            return self._start_proc(action, fast=True, kind=kind)

    def _update_last_check_time(self, proc):
        # we intentionally do NOT notify context_changed_event here
        # because this timestamp is not so important and we can safely
        # lose it at any accidental restart, while yaml dumping is
        # significant amount of CPU
        self.last_check_time = monoTime()

    def _update_usage_stats(self, kind, proc):
        if not self.stats or not self.namespace:
            return

        if kind == 'service':
            kind = 'main'

        def _update(self, kind, proc):
            self.stats.service_set_val(
                self.namespace.name,
                self.name,
                kind + '_last_cpu_usage',
                proc.cpu_usage,
                inmemory=True,
            )

            if kind == 'main':
                old_cpu_usage = self.stats.service_get_val(self.namespace.name,
                                                           self.name,
                                                           'main_last_cpu_usage',
                                                           inmemory=True)
                if old_cpu_usage is None:
                    return
                cpu_diff = proc.cpu_usage - float(old_cpu_usage)
                cpu_diff = max(0, cpu_diff)
            else:
                cpu_diff = proc.cpu_usage

            self.stats.service_inc_num(
                self.namespace.name,
                self.name,
                kind + '_cpu_usage',
                cpu_diff,
            )

        gevent.spawn(_update, self, kind, proc)

    def _stop_root_container(self, log=None):
        log = log or self.log
        if self.cfg.take('porto') in ('no', False):
            return

        try:
            self.starter.stop_container(self.root_container)
        except Exception as e:
            log.warning("root container stop failed: %s", e)

    def _start_proc(self, path, fast, kind, root=False, args=None, raw_args=None):
        if not self._proc_start_lock.acquire(timeout=60.):
            self.log.error("failed to acquire start lock for 60s, proc is not started: %r", path)
            raise RuntimeError("proc start lock is blocked")

        try:
            if self.stats and self.namespace:
                # kind:
                #  - service
                #  - check
                #  - stop
                #  - install, upgrade
                #  - uninstall, preupgrade
                #  - notify-config

                # Do not use upgrade, preupgrade here
                statname = {
                    'service': 'proc_main_count',
                    'check': 'proc_check_count',
                    'stop': 'proc_stop_count',
                    'install': 'proc_install_count',
                    'uninstall': 'proc_uninstall_count',
                    'notify-config': 'proc_config_notify_count'
                }.get(kind, None)

                if statname is not None:
                    self.stats.service_inc_num(self.namespace.name, self.name, statname)

            options_kind = {
                'service': 'exec',
                'upgrade': 'install',
                'preupgrade': 'uninstall',
                'notify-config': 'notify',
                # 'any-other': 'any-other',
            }.get(kind, kind)

            args = args or shlex.split(path)
            env = self.cfg.take('env') or {}
            proc = self.starter.run(log=self.log,
                                    out_log=self.check_log if kind == 'check' else self.out_log,
                                    args=args,
                                    service_root=self.cfg.format_item('${CURDIR}'),
                                    porto=self.porto_mode,
                                    user='root' if root else self.cfg.take('user'),
                                    cgroups=self.valid_cgroups,
                                    limits=self.cfg.take('limits'),
                                    env=env,
                                    fast=fast,
                                    root_container=self.root_container,
                                    porto_meta_options=self.cfg.get_porto_options('meta'),
                                    porto_options=self.cfg.get_porto_options(options_kind),
                                    tags=(kind, self.name, '@' + self.namespace.name),
                                    raw_args=raw_args,
                                    )
            self._procs_tracker.started(proc, fast)
            if kind == 'check':
                proc.link(self._update_last_check_time)
            proc.link(self.service_event)
            proc.link(partial(self._update_usage_stats, kind))
        finally:
            self._proc_start_lock.release()

        return proc

    def spawn_missing(self):
        pending_execs = self._procs_tracker.pending_execs(self.cfg.executables)
        for path in pending_execs:
            if path not in self.cfg.executables:
                self.log.debug("not starting %r, this service don't use this executable anymore", path)
                continue
            if self._procs_tracker.is_running(path):
                self.log.debug("not starting %r, it's already running", path)
                continue
            gevent.spawn(self._start_proc, path=self.cfg.format_item(path), fast=False, kind='service', raw_args=path)
            self.last_start_time = monoTime()
            self.context_changed_event.release()

    def _get_config_var(self, var_type, var_name):
        section, key = self.cfg_item_split_re.split(var_name, 1)
        section_parts = self.cfg_parts_split_re.split(section)
        try:
            section_conf = self.registry.query(section_parts, deepcopy=False)
        except (KeyError, TypeError):
            self.log.warning("registry section not found: `%r`, cannot format variable: %r", section, var_name)
            raise

        key_parts = self.cfg_parts_split_re.split(key)
        try:
            val = section_conf['config']
            processed = []
            for part in key_parts:
                processed.append(part)
                val = val[part]
        except (KeyError, TypeError):
            self.log.warning("path `%r` not found in registry section %r", '.'.join(processed), section)
            raise

        return str(val)

    def spawn_stop(self):
        try:
            stop = self.cfg.take('stop')
            proc = self._process_action(stop, 'stop')
            if proc is None:
                return ProcAdapter(True, "no stop-process needed")
        except Exception as e:
            self.log.exception("failed to start stop proc")
            return ProcAdapter(False, str(e))
        return proc

    def spawn_check(self):
        try:
            if self.stats and self.namespace:
                self.report_rusage()

            check = self.cfg.take('check')
            if check is None:
                self.last_check_time = monoTime()
                return ProcAdapter(self._state_controller.required_state == State.RUNNING, "no check-process needed")
            return self._start_proc(check, fast=True, kind='check')
        except ProcStartException as e:
            self.log.exception("failed to start check proc, internal error, will retry later")
            return
        except Exception as e:
            self.log.exception("failed to start check proc, assuming service not running")
            return ProcAdapter(False, str(e))

    def all_procs_running(self):
        result = self._procs_tracker.all_tasks_running(self.cfg.executables)
        if not result:
            self.log.debug(
                "not all procs are running (%d expected, %d is up)",
                len(self.cfg.executables), self._procs_tracker.running()
            )
        return result

    def kill_procs(self, all=False):
        self._procs_tracker.signal(signal.SIGKILL, all=all)

    def procs_left(self, all=False):
        return bool(self._procs_tracker) if all else self._procs_tracker.running()

    def on_install(self, upgrade):
        mode = 'upgrade' if upgrade else 'install'
        script = self.cfg.take('install_script', MODE=mode)
        if not script:
            return

        job = self._start_proc(script, fast=False, kind=mode, root=self.cfg.take('install_as_privileged'))
        if not job.wait(Service.STATE_CHANGE_TIMEOUT):
            job.send_signal(signal.SIGKILL)
            self.log.error("failed to %s (timed out)", mode)
            raise Exception("%s: failed to %s (timed out)", self.name, mode)

        if job.exitstatus.get('exitstatus') != 0:
            raise Exception("%s: install script (mode %r) failed: %s" % (self.name, mode, job.exitstatus))

        return job.exitstatus

    def on_uninstall(self, upgrade):
        mode = 'preupgrade' if upgrade else 'uninstall'
        script = self.cfg.take('uninstall_script', MODE=mode)
        if not script:
            return

        job = self._start_proc(script, fast=False, kind=mode, root=self.cfg.take('install_as_privileged'))
        if not job.wait(Service.STATE_CHANGE_TIMEOUT):
            job.send_signal(signal.SIGKILL)
            self.log.error("failed to %s (timed out)", mode)
            raise Exception("%s: failed to %s (timed out)", self.name, mode)

        if job.exitstatus.get('exitstatus') != 0:
            raise Exception("%s: uninstall script (mode %r) failed: %s" % (self.name, mode, job.exitstatus))

        return job.exitstatus

    def deps_state_changed(self, deps_ready):
        if deps_ready:
            self._deps_state.set()
        else:
            self._deps_state.clear()

    def registry_changed(self, path, new_cfg):
        really_changed = False
        key = '.'.join(path) if path else None
        self._registry_data[key] = new_cfg
        if not path:
            # old mode
            for section in self.cfg.configs:
                path = self.cfg_parts_split_re.split(section)
                old_hash = self._registry_state.get(section)
                new_hash = self.registry.query(path, new_cfg)['config_hash']
                if old_hash != new_hash:
                    really_changed = True
                    self._registry_state[section] = new_hash
        else:
            old_hash = self._registry_state.get(key)
            new_hash = new_cfg.pop('__config_hash', None) if new_cfg else None
            if old_hash != new_hash:
                really_changed = True
                self._registry_state[key] = new_hash

        if really_changed:
            self._write_registry(key, new_cfg)
            self.write_api()
            if self.state in (State.STARTING, State.PRERUNNING, State.RUNNING):
                self._service_send_registry()
            self.fields_changed_event.set()

    def default_cgroup_changed(self, path, new_config):
        try:
            cgroup = new_config['starter']['default_cgroup']
        except (KeyError, TypeError):
            # this exception means that some section is not resolved yet
            # hence we can just do nothing, and leave previous default.
            # NB: if config applicance never succeeded, default 'skycore/services' will remain.
            self.log.info("cgroup config changed but section is not resolved yet, service is not restarted")
            return True

        old_cgroup = self._registry_state.get('__default_cgroup__')
        if old_cgroup != cgroup:
            self._registry_state['__default_cgroup__'] = cgroup
            self._state_controller.restart(self)
            self.context_changed_event.release()
            self.fields_changed_event.set()

        return True

    # properties
    @property
    def base(self):
        return self.namespace.base

    @property
    def state(self):
        return self._state_controller.state

    @property
    def current_state_uptime(self):
        return utils.human_time(
            time.time() - utils.from_monotime(self._state_controller.state_started)
        )

    @property
    def name(self):
        return self.cfg.name

    @property
    def namespace(self):
        return self.parent and self.parent()

    @property
    def after(self):
        after = self.cfg.take('after') if hasattr(self.cfg, 'after') else None
        if after:
            return filter(lambda name: name in self.namespace.services, after)

    @property
    def needs_restart(self):
        return self.cfg.take('restart_on_upgrade')

    @property
    def dependencies(self):
        return self.cfg.take('dependencies')

    @property
    def root_container(self):
        return self.cfg.take('porto_container') or 'skycore/%s/%s' % (self.namespace.name, self.name)

    @property
    def porto_mode(self):
        mode = self.cfg.take('porto')
        if mode != 'auto' or not self._procs_tracker:
            return mode

        if next(iter(self._procs_tracker)).in_porto:
            return 'yes'
        else:
            return 'no'

    @Component.green_loop(logname='fsa_ticker')
    def _fsa_ticker(self, log):
        while True:
            try:
                self.service_event.clear()
                if not self.fsa_paused:
                    sleep = self._state_controller.tick(self)
                else:
                    sleep = 1.0
            except Exception:
                log.exception("tick failed, will sleep 0.5s before next attempt")
                gevent.sleep(0.5)
            else:
                # log.debug("sleeping for %s", sleep)
                self.service_event.wait(sleep)

    @Component.green_loop(logname='fld_watchr')
    def _fields_writer(self, log):
        while True:
            try:
                if self.fields_changed_event.wait(30.):
                    self.fields_changed_event.clear()
                    self.write_fields()
            except Exception:
                log.exception("fields write failed")

    @property
    def context(self):
        unit = self.namespace.get_service_metainfo(self.name)
        meta = unit.release if unit else None
        unit = unit.as_dict() if unit else None
        return {
            'service': self.name,
            'cfg': self.cfg.as_dict(self.base),
            'state': self.state,
            'required_state': self._state_controller.required_state,
            'procs': self._procs_tracker.context(self.base),
            'meta': meta,  # backward compatibility
            'unit': unit,
            'registry_state': self._registry_state,
            'last_check_time': utils.from_monotime(self.last_check_time),
            'last_start_time': utils.from_monotime(self.last_start_time),
            'check_interval': self._state_controller.check_controller._interval,
            'start_interval': self._state_controller.start_controller._interval,
            'state_start_time': utils.from_monotime(self._state_controller.state_started),
        }

    def __eq__(self, other):
        return (self.cfg == other.cfg
                and self._state_controller.required_state == other._state_controller.required_state
                )

    def __str__(self):
        return '%s ("%s", required state %s, state %s)' % (
            self.__class__.__name__, self.name, self._state_controller.required_state, self.state
        )
