# coding: utf-8
import logging
import os

import enum
import gevent
import gevent.event
import gevent.queue
import gevent.threading
from infra.swatlib.gevent.geventutil import force_kill_greenlet
from infra.swatlib.gevent import greenthread
from infra.swatlib.zk.client import CoordError, KazooState
from kazoo.exceptions import LockTimeout, ConnectionClosedError
from sepelib.util import retry
from sepelib.util.exc.format import format_exc

_STANDOFF_TIMEOUT_EXCEEDED = object()
_DISCONNECTED = object()


class RestartPolicy(enum.Enum):
    DO_NOT_RESTART = 1
    RESTART_ON_EXCEPTION = 2
    ALWAYS_RESTART = 3


class Runnable(object):
    def run(self):
        raise NotImplementedError


def set_on_disconnect(state, event):
    if state != KazooState.CONNECTED:
        event.set()


def cancel_on_disconnect(state, lock):
    if state != KazooState.CONNECTED:
        lock.cancel()


class ExclusiveService(greenthread.GreenThread):
    """
    A service than runs under a Zookeeper lock.
    Designed to be used as a wrapper for unaware service.
    """
    # it's a good practice, mentioned in google chubby papers
    # to give up control once in a while
    DEFAULT_STANDOFF_STRATEGY = lambda: 12 * 3600
    DEFAULT_ACQUIRE_TIMEOUT_STRATEGY = lambda: None

    ZK_PATH = '/exclusive_services'

    def __init__(self, coord, name, runnable,
                 acquire_timeout_strategy=DEFAULT_ACQUIRE_TIMEOUT_STRATEGY,
                 standoff_strategy=DEFAULT_STANDOFF_STRATEGY,
                 restart_policy=RestartPolicy.DO_NOT_RESTART,
                 exit_instead_of_release_lock=False):
        """
        :param coord: zookeeper client instance
        :type runnable: Runnable or callable
        :param callable standoff_strategy:
            a callable that returns current standoff timeout,
            MUST NOT raise any exceptions.
        :param callable acquire_timeout_strategy:
            a callable that returns amount of seconds we wait before giving up on acquiring the lock,
            MUST NOT raise any exceptions.
        :param RestartPolicy restart_policy: RestartPolicy.*
        """
        super(ExclusiveService, self).__init__()

        self._service = None
        if callable(runnable):
            self._run = runnable
        else:
            self._run = runnable.run

        self.name = 'exclusive({})'.format(name)
        self._log = logging.getLogger(self.name)
        self._coord = coord
        self._lock = self._coord.lock(os.path.join(self.ZK_PATH, name))
        self._standoff_strategy = standoff_strategy
        self._acquire_timeout_strategy = acquire_timeout_strategy
        self._restart_policy = restart_policy

        self._stopped = False
        self._stopping_lock = gevent.threading.Lock()
        self._exit_instead_of_release_lock = exit_instead_of_release_lock

    def _wait_disconnect(self, chan, standoff_timeout=None):
        event = gevent.event.Event()
        listener = lambda state: set_on_disconnect(state, event)
        self._coord.add_listener(listener)
        event.wait(timeout=standoff_timeout)
        self._coord.remove_listener(listener)
        if event.is_set():
            chan.put(_DISCONNECTED)
        else:
            chan.put(_STANDOFF_TIMEOUT_EXCEEDED)

    def _wait_service(self, chan):
        try:
            rv = self._service.get()
        except Exception as e:
            chan.put(e)
        else:
            chan.put(rv)

    def _wait_disconnect_or_service(self, standoff_timeout=None):
        chan = gevent.queue.Queue()
        gs = (
            gevent.spawn(self._wait_disconnect, chan=chan, standoff_timeout=standoff_timeout),
            gevent.spawn(self._wait_service, chan=chan),
        )
        try:
            return chan.get()
        finally:
            for g in gs:
                force_kill_greenlet(g, kill_timeout=1)

    def _release_lock(self):
        if self._exit_instead_of_release_lock:
            # DEPLOY-1583: exit in any of cases:
            # * zk session failure
            # * standoff timeout
            # * service exception
            # * service stop
            self._log.info('exiting...')
            raise SystemExit(123)
        self._lock.release()

    def run(self):
        sleeper = retry.RetrySleeper(max_delay=5)

        self._stopped = False

        listener = lambda state: cancel_on_disconnect(state, self._lock)
        while not self._stopped:
            acquire_timeout = self._acquire_timeout_strategy()
            self._log.info("acquiring lock with timeout of {} seconds...".format(acquire_timeout))
            assert not self._lock.is_acquired
            self._coord.add_listener(listener)
            try:
                self._lock.acquire(timeout=acquire_timeout)
            except LockTimeout:
                self._log.info('failed to acquire lock within timeout of {} seconds'.format(acquire_timeout))
            except CoordError as e:
                self._log.info(format_exc('failed to acquire lock', e))
                sleeper.increment()
            else:
                # we locked, yey!
                # now start service and wait for session state change
                sleeper.reset()
                standoff_timeout = self._standoff_strategy()
                self._log.info("became singleton - "
                               "starting service with standoff timeout of {} seconds...".format(standoff_timeout))
                with self._stopping_lock:
                    if self._stopped:
                        return
                    self._service = gevent.Greenlet(self._run)
                    self._service.start()
                res = self._wait_disconnect_or_service(standoff_timeout=standoff_timeout)
                if res is _DISCONNECTED:
                    self._log.warn("disconnected - stopping service")
                    force_kill_greenlet(self._service, log=self._log)
                    self._release_lock()
                    self._log.info("service stopped")
                elif res is _STANDOFF_TIMEOUT_EXCEEDED:
                    self._log.info("was leading for too long (more than {} seconds), "
                                   "stand off - stopping service".format(standoff_timeout))
                    # First we stop service, then release lock
                    # Thus we encounter less races
                    force_kill_greenlet(self._service, log=self._log)
                    self._release_lock()
                    self._log.info("service stopped")
                elif isinstance(res, Exception):
                    self._log.info("service raised an exception")
                    self._release_lock()
                    if self._restart_policy not in (RestartPolicy.ALWAYS_RESTART, RestartPolicy.RESTART_ON_EXCEPTION):
                        break
                    self._log.info("restarting according to the restart policy...")
                else:
                    self._log.info("service returned {!r}".format(res))
                    self._release_lock()
                    if self._restart_policy != RestartPolicy.ALWAYS_RESTART:
                        break
                    self._log.info("restarting according to the restart policy...")

                # sleep for sometime to let someone take leadership
                gevent.sleep(2)
            finally:
                self._coord.remove_listener(listener)

    def stop(self):
        self._log.info("stopping exclusive service...")
        self._lock.cancel()
        with self._stopping_lock:
            self._stopped = True
            if self._service:
                self._log.info("stopping service itself...")
                force_kill_greenlet(self._service, log=self._log)
        try:
            self._log.info("releasing lock...")
            self._release_lock()
        except ConnectionClosedError:
            # it's okay if we failed
            # application must be shutting down
            self._log.info("releasing lock ended with ConnectionClosedError, ignoring")
        else:
            self._log.info("successfully released lock")
        super(ExclusiveService, self).stop()
        assert not self._lock.is_acquired
        self._log.info("stopped exclusive service")
