# -*- coding: utf-8 -*-

import logging
import os

from sandbox import common
from sandbox import sdk2
import sandbox.common.types.client as ctc
from sandbox.projects.rtmr.clusters import RTMR_CLUSTERS
from sandbox.projects.rtmr.common import get_rtmr_scripts
from sandbox.sdk2.helpers import subprocess as sp

_ACTIONS = [
    "restart-storage",
    "restart-server",
    "reboot",
    "power-cycle",
    "update-man4",
]


class RtmrRollingRestart(sdk2.Task):
    """Rolling restart RTMR cluster"""

    class Requirements(sdk2.Task.Requirements):
        client_tags = ctc.Tag.GENERIC & ~ctc.Tag.LXC
        disk_space = 2 * 1024  # 2Gb

    class Parameters(sdk2.Task.Parameters):
        description = "Rolling restart RTMR cluster"
        kill_timeout = 7 * 24 * 3600

        with sdk2.parameters.String("Cluster name", multiline=True, required=True) as cluster_name:
            _first = True
            for _name in RTMR_CLUSTERS:
                if _first:
                    cluster_name.values[_name] = cluster_name.Value(default=True)
                    _first = False
                else:
                    cluster_name.values[_name] = None
        excluded_nodes = sdk2.parameters.List(
            "Excluded nodes",
            sdk2.parameters.String
        )
        with sdk2.parameters.String("Action", multiline=True, required=True) as action:
            for _action_name in _ACTIONS:
                if _ACTIONS[0] == _action_name:
                    # First action
                    action.values[_action_name] = action.Value(default=True)
                else:
                    action.values[_action_name] = None

        with sdk2.parameters.Group("Restart settings") as timeouts_block:
            enable_throttler = sdk2.parameters.Bool(
                "Throttler",
                default_value=False
            )
            with enable_throttler.value[True]:
                throttler_batch_size = sdk2.parameters.Integer(
                    "Batch Size",
                    required=True,
                    default_value=3
                )
                throttler_batch_time = sdk2.parameters.Integer(
                    "Batch Time, seconds",
                    required=True,
                    default_value=120
                )

            restart_timeout = sdk2.parameters.Integer(
                "Restart timeout, seconds",
                required=True,
                default_value=600
            )
            restart_force = sdk2.parameters.Bool(
                "Ignore restart failures",
                default_value=False
            )

        with sdk2.parameters.Group("Cluster health check settings") as cluster_health_block:
            waitok_timeout = sdk2.parameters.Integer(
                "Waiting time, seconds",
                required=True,
                default_value=600
            )
            waitok_ignore = sdk2.parameters.Bool(
                "Ignore waiting cluster health failures",
                default_value=False
            )
            waitok_delay = sdk2.parameters.Integer(
                "Health check delay, seconds",
                required=True,
                default_value=15
            )
            max_non_active_tablets = sdk2.parameters.Integer(
                "Max non active tablets",
                required=True,
                default_value=0
            )
            max_nonreplicated_bsgroups = sdk2.parameters.Integer(
                "Max non replicated bsgroups",
                required=True,
                default_value=50
            )
            max_incomplete_bsgroups = sdk2.parameters.Integer(
                "Max incompleted bsgroups",
                required=True,
                default_value=50
            )
        secret_name = sdk2.parameters.String(
            "Vault secret name with SSH key",
            required=True,
            default_value="robot-rtmr-mnt-ssh"
        )
        secret_owner = sdk2.parameters.String(
            "Vault secret owner",
            required=True,
            default_value="RTMR-DEV"
        )
        # FIXME: add required=True or remove this comment (was `reqtuired=True`)
        remote_user = sdk2.parameters.String(
            "Remote username",
            default_value="robot-rtmr-mnt"
        )
        scripts_url = sdk2.parameters.ArcadiaUrl(
            "RTMR Scripts Arcadia url",
            required=True,
            default_value="arcadia:/arc/trunk/arcadia"
        )

    class Context(sdk2.Task.Context):
        state_file = None
        state = None

    def update_state(self):
        if self.Context.state_file is None:
            self.Context.state_file = str(sdk2.Path("state.txt").absolute())

        excluded_hosts = set()
        if self.Context.state is not None:
            excluded_hosts.update(set(self.Context.state))

        state_file = self.Context.state_file
        try:
            with open(state_file, "r") as fd:
                for line in fd.readlines():
                    line = line.strip()
                    if len(line) > 0:
                        excluded_hosts.add(line)
        except IOError:
            pass

        excluded_hosts.update(set(self.Parameters.excluded_nodes))
        self.Context.state = list(excluded_hosts)
        with open(state_file, "w") as fd:
            for node in excluded_hosts:
                fd.write(node + "\n")

    def on_execute(self):
        self.update_state()
        scripts_path = get_rtmr_scripts(self, self.Parameters.scripts_url)
        cmd = [
            os.path.join(scripts_path, "rolling_restart.py"),
            "--state-file", self.Context.state_file,
            "--cluster", self.Parameters.cluster_name,
            "--wait-ok-timeout", str(self.Parameters.waitok_timeout),
            "--restart-timeout", str(self.Parameters.restart_timeout),
            "--status-check-delay", str(self.Parameters.waitok_delay),
            "--max-nonactive-tablets", str(self.Parameters.max_non_active_tablets),
            "--max-nonreplicated-bsgroups", str(self.Parameters.max_nonreplicated_bsgroups),
            "--max-damaged-bsgroups", str(self.Parameters.max_incomplete_bsgroups),
            "--action", self.Parameters.action,
            "--user", self.Parameters.remote_user,
        ]
        if self.Parameters.waitok_ignore:
            cmd.append("--wait-ok-force")
        if self.Parameters.restart_force:
            cmd.append("--restart-force")
        if not self.Parameters.enable_throttler:
            cmd.append("--rack")
        else:
            cmd.extend([
                "--batch-size", str(self.Parameters.throttler_batch_size),
                "--batch-time", str(self.Parameters.throttler_batch_time),
            ])

        with sdk2.helpers.ProcessLog(self, logger=logging.getLogger("rolling_restart.py")) as pl:
            with sdk2.ssh.Key(self, self.Parameters.secret_owner, self.Parameters.secret_name):
                proc = sp.Popen(cmd, stdout=pl.stdout, stderr=sp.STDOUT, env=os.environ.copy())
                proc.wait()
                if proc.returncode != 0:
                    raise common.errors.TaskError("Rolling restart error return code: " + str(proc.returncode))
        self.update_state()
        self.set_info("Done")
