"""Host health status management."""

import logging

from gevent.lock import Semaphore
from mongoengine import StringField, LongField

from sepelib.core import constants
from walle.expert.types import CheckType, CheckStatus
from walle.models import Document, timestamp
from walle.util.gevent_tools import gevent_idle_iter
from walle.util.mongo.bulk_group import SingleCollectionBulkGroup

log = logging.getLogger(__name__)


class HealthCheck(Document):
    id = StringField(primary_key=True, help_text="Check's synthetic primary key")
    fqdn = StringField(min_length=1, required=True, help_text="FQDN of the host checks were received for")
    type = StringField(choices=CheckType.ALL, required=True, help_text="Juggler check type a.k.a. service_name")
    status = StringField(choices=CheckStatus.ALL, required=True, help_text="Wall-E check status")
    status_mtime = LongField(required=True, help_text="Last check status modification time")
    timestamp = LongField(required=False, help_text="Time check status had been received")
    metadata = StringField(help_text="Check metadata for some of check types")

    api_fields = ("id", "fqdn", "type", "status", "status_mtime", "timestamp", "metadata")

    meta = {
        "collection": "health_checks",
        "db_alias": "health",
        "indexes": [
            {"name": "fqdn", "fields": ["fqdn"]},
        ],
        # NOTE(rocco66): disable exceptions raising when undeclared field is met
        # make shure all documents have same fields
        "strict": False,
    }

    # empirical values, seems optimal for the performance (when running from one instance).
    # increasing this value may cause query size exceed
    MONGODB_BULK_SIZE = 2000

    # WALLE-3448 Experimentally Found Value
    PARALLEL_MONGODB_UPDATES = 20

    PARALLEL_MONGODB_UPDATES_LOCK = Semaphore(value=PARALLEL_MONGODB_UPDATES)

    @staticmethod
    def mk_check_key(host_name, check_type):
        return "|".join([host_name, check_type])

    @classmethod
    def bulk_update(cls):
        return BulkHealthUpdater()


class BulkHealthUpdater:
    def __init__(self):
        self.changed = 0
        self.deleted = 0
        self.received = 0

        self._bulk_group = _HealthUpdaterBulkGroup(
            collection=HealthCheck.get_collection(),
            stats_key=("juggler", "check_storing"),
            bulk_size_limit=HealthCheck.MONGODB_BULK_SIZE,
            parallel_bulks_lock=HealthCheck.PARALLEL_MONGODB_UPDATES_LOCK,
        )

    def save_checks(self, host_name, checks, receive_time):
        for _check, changed in checks:
            check_key = HealthCheck.mk_check_key(host_name, _check["type"])

            with self._bulk_group.current() as checks_bulk:
                if changed:
                    self.changed += 1
                self._bulk_group.save_check(checks_bulk, check_key, _check, receive_time)
                self.received += 1

    def delete_checks(self, host_name, checks):
        for _check in checks:
            check_key = HealthCheck.mk_check_key(host_name, _check["type"])

            with self._bulk_group.current() as checks_bulk:
                self.deleted += 1
                self._bulk_group.delete_check(checks_bulk, check_key, _check["timestamp"])

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self._bulk_group.finish()


class _HealthUpdaterBulkGroup(SingleCollectionBulkGroup):
    _check, _delete = None, None

    def save_check(self, bulk, check_key, check, timestamp):
        self._check = True
        # special case when check with this id exists, but its timestamp >= our timestamp:
        # insert op will fail because of duplicate primary key,
        # but other operations in the bulk will be executed (because it is unordered)
        query = {"_id": check_key, "timestamp": {"$lt": timestamp}}
        bulk.find(query).upsert().update_one({"$set": check})

    def delete_check(self, bulk, check_key, check_timestamp):
        self._delete = True
        query = {"_id": check_key, "timestamp": check_timestamp}
        bulk.find(query).remove()

    def execute(self):
        if self._check or self._delete:
            self._workers_pool.spawn(self._execute_bulk, self._bulk, self._parallel_bulks_lock)

    def _start_bulk(self):
        self._check, self._delete = False, False
        super()._start_bulk()

    def _handle_bulk_write_error(self, error):
        DUPLICATE_KEY_ERROR = 11000
        # some bulk items can fail with duplicate id error (code 11000), it's ok for us (see comment for .save_check)
        if not all(err['code'] == DUPLICATE_KEY_ERROR for err in error.details.get('writeErrors', [])):
            super()._handle_bulk_write_error(error)


def _sort_dict_to_str(d):
    # I found that generator expression works 50% slower here. Don't sure why, just use list comprehension.
    return "|".join([key + ":" + _sort_value_to_string(value) for key, value in sorted(d.items())])


def _sort_list_to_str(val):
    # I found that generator expression works 50% slower here. Don't sure why, just use list comprehension.
    return "|".join([_sort_value_to_string(item) for item in sorted(val)])


def _sort_value_to_string(value):
    # json.dumps is too slow
    if isinstance(value, dict):
        return _sort_dict_to_str(value)
    elif isinstance(value, (list, tuple)):
        return _sort_list_to_str(value)
    else:
        try:
            return str(value)
        except Exception:
            # don't bother, really
            return ""


def _gc_outdated_host_health():
    """Finds and deletes outdated hosts health checks"""

    query = HealthCheck.objects(timestamp__lte=timestamp() - constants.DAY_SECONDS)
    count = query.count()
    if count:
        log.info("Found %d outdated host health entries", count)
        for health_check in gevent_idle_iter(query):
            health_check.delete()
        log.info("Deleted outdated host health entries")
