import math
import logging
import calendar
import collections
import datetime as dt

from sandbox import common
from sandbox.common.types import resource as ctr

from sandbox.services.base import service as base_service

from sandbox.yasandbox import controller
from sandbox.yasandbox.database import mapping


logger = logging.getLogger(__name__)


class CleanResources(base_service.SingletonService):
    """
    Resource records cleanup service thread.
    """
    # Interval in seconds to which resource expire time is rounded.
    EXPIRES_TIME_INTERVAL = 600
    # Default dataset limit for single query.
    UPDATE_CHUNK_SIZE = 100000
    GARBAGE_COLLECTOR_CHUNK_SIZE = 400000  # processed in ~20-30sec
    # Purge hosts records after specified amount of days. I.e., for example, if the resource will be marked
    # as DELETED after 14 after its last access, it will be physically removed from all hosts not later
    # than in 16 days after the moment it was deleted.
    MAX_RESOURCE_AFTERLIFE = 30
    NOTIFICATION_TIMEOUT = 120

    tick_interval = 300

    def __init__(self, *args, **kwargs):
        super(CleanResources, self).__init__(*args, **kwargs)

    def tick(self):
        expired, outdated, immortal = self._find_expired_resources()
        self._reset_expiration_on_resources(immortal)
        self._update_expiration_on_resources(outdated)
        self._delete_expired_resources(expired)

        self._run_garbage_collector()
        self._purge_unused_broken_resources(self.service_config.get("default_ttl"))
        self._mark_resources_without_hosts_as_broken()
        self._check_unknown_hosted_resources()
        self._purge_unused_broken_resources_hosts(self.MAX_RESOURCE_AFTERLIFE)

        resource_link_ttl = self.service_config.get("resource_link_ttl")
        logger.info("Remove resource links not accessed more than %s days", resource_link_ttl)
        removed_resource_links_count = controller.Resource.clean_temporary_resource_links(resource_link_ttl)
        logger.info("Removed %s resource links", removed_resource_links_count)

    @staticmethod
    def _get_sorted_ids_from_pymongo_resources(resources):
        return sorted(r["_id"] for r in resources)

    @staticmethod
    def _get_total_size_of_pymongo_resources(resources):
        return sum(int(r["size"]) for r in resources) << 10

    @classmethod
    def _round_expire_time(cls, expire_time):
        """
        Round expire time up to the nearest interval
        """
        if expire_time is None:
            return expire_time
        timestamp = float(calendar.timegm(expire_time.timetuple()))
        timestamp = int(math.ceil(timestamp / cls.EXPIRES_TIME_INTERVAL) * cls.EXPIRES_TIME_INTERVAL)
        return dt.datetime.utcfromtimestamp(timestamp)

    def _get_ttl_from_pymongo_resource(self, r):
        ttl = self.service_config.get("default_ttl")
        for attr in r.get("attrs", []):
            if attr["k"] == "ttl":
                ttl = controller.Resource.cast_attr(attr["v"]).strip().lower()  # noqa
                break
        if ttl == "inf":
            return None
        try:
            td = dt.timedelta(days=int(ttl))
            _ = r["time"]["at"] + td  # noqa
            return td.days
        except (ValueError, TypeError, OverflowError):
            logger.warning("Resource's #%d TTL %r is in wrong format. Treat it as default.", r["_id"], ttl)
            return self.service_config.get("default_ttl")

    def _find_expired_resources(self):
        """
            Search for resources with expire time lower than current time, group them in three categories:
            * immortal - resources with expiration time, but ttl=inf, so expiration should be reset
            * outdated - expired but still used resources, expiration time should be updated
            * expired - expired and not used resources, should be deleted
        """
        now = dt.datetime.utcnow()
        base_qs = mapping.Resource.objects.all().only("id", "size", "attributes", "time").as_pymongo()
        expired, outdated, immortal = [], [], []
        for resource in base_qs.filter(state=mapping.Resource.State.READY, time__expires__lt=now):
            ttl = self._get_ttl_from_pymongo_resource(resource)
            if ttl is None:
                immortal.append(resource)
                resource["time"]["ex"] = None
            elif resource["time"]["at"] + dt.timedelta(days=ttl) > now:
                outdated.append(resource)
                resource["time"]["ex"] = resource["time"]["at"] + dt.timedelta(days=ttl)
            else:
                expired.append(resource)
        return expired, outdated, immortal

    def _reset_expiration_on_resources(self, resources):
        if not resources:
            return
        resource_ids = self._get_sorted_ids_from_pymongo_resources(resources)
        logger.debug(
            "Resetting expiration information for %d resources in total for %s: %r",
            len(resources), common.utils.size2str(self._get_total_size_of_pymongo_resources(resources)), resource_ids
        )
        for ids_chunk in common.utils.grouper(resource_ids, self.UPDATE_CHUNK_SIZE):
            mapping.Resource.objects(id__in=ids_chunk).update(unset__time__expires=True)

    def _update_expiration_on_resources(self, resources):
        logger.debug(
            "Actualizing expiration information for %d resources in total for %s",
            len(resources), common.utils.size2str(self._get_total_size_of_pymongo_resources(resources))
        )
        resource_ids_by_expiration_time = collections.defaultdict(list)
        for resource in resources:
            rounded_expiration_time = self._round_expire_time(resource["time"]["ex"])
            resource_ids_by_expiration_time[rounded_expiration_time].append(resource["_id"])

        for expire_time, resource_ids in resource_ids_by_expiration_time.iteritems():
            for ids_chunk in common.utils.grouper(resource_ids, self.UPDATE_CHUNK_SIZE):
                mapping.Resource.objects(id__in=ids_chunk).update(set__time__expires=expire_time)
                logger.debug("Updated to expire at %s: %r", common.utils.utcdt2iso(expire_time), ids_chunk)

    def _delete_expired_resources(self, resources):
        if not resources:
            return
        resource_ids = self._get_sorted_ids_from_pymongo_resources(resources)
        logger.info(
            "Marking %d expired resources in total for %s as DELETED: %r",
            len(resources), common.utils.size2str(self._get_total_size_of_pymongo_resources(resources)), resource_ids
        )
        controller.Resource.list_resources_audit("Mark resource delete", resource_ids, state=ctr.State.DELETED)
        for ids_chunk in common.utils.chunker(resource_ids, self.UPDATE_CHUNK_SIZE):
            mapping.Resource.objects(id__in=ids_chunk).update(
                set__state=mapping.Resource.State.DELETED,
                set__time__updated=dt.datetime.utcnow()
            )
        logger.info("Totally %d expired resources are deleted", len(resources))

    def _run_garbage_collector(self):
        last_resource_id = self.context.get("last_gc_resource_id", -1)
        logger.info("Running garbage collector for immortal resources starting from %d", last_resource_id)
        base_qs = mapping.Resource.objects.all().only("id", "size", "attributes", "time").as_pymongo()
        cursor = base_qs.filter(
            state=mapping.Resource.State.READY,
            time__expires__exists=False,
            read_preference=mapping.ReadPreference.SECONDARY,
            id__gt=last_resource_id
        ).timeout(None).limit(self.GARBAGE_COLLECTOR_CHUNK_SIZE).order_by("id")
        total_size = 0
        backup_resource_ids = []
        inconsistent_resources_count = 0
        count = 0
        for count, resource in enumerate(cursor, start=1):
            ttl = self._get_ttl_from_pymongo_resource(resource)
            if ttl is not None:
                expiration_time = resource["time"]["at"] + dt.timedelta(days=ttl)
                mapping.Resource.objects(id=resource["_id"]).update_one(set__time__expires=expiration_time)
                inconsistent_resources_count += 1
            elif not any(_["k"] == ctr.ServiceAttributes.BACKUP_TASK for _ in resource.get("attrs", [])):
                backup_resource_ids.append(resource["_id"])
            total_size += int(resource["size"])
            last_resource_id = resource["_id"]
        if count < self.GARBAGE_COLLECTOR_CHUNK_SIZE:
            last_resource_id = -1
        self.context["last_gc_resource_id"] = last_resource_id
        logger.debug("Set last resource garbage collector id to %d", last_resource_id)
        if backup_resource_ids:
            mapping.Resource.objects(id__in=backup_resource_ids).update(
                push__attributes=mapping.Resource.Attribute(key=ctr.ServiceAttributes.BACKUP_TASK, value=0)
            )
        logger.debug(
            "Garbage collector checked %d resources in total for %s. "
            "Marked %d resources with actual expiration time, %d for backup",
            count, common.utils.size2str(total_size << 10),
            inconsistent_resources_count, len(backup_resource_ids)
        )

    def _purge_unused_broken_resources(self, lifetime_days):
        logger.info("Purging broken resources that are unused longer than %d days", lifetime_days)
        purged_count = mapping.Resource.objects(
            state=mapping.Resource.State.BROKEN,
            hosts_states__state=mapping.Resource.HostState.State.OK,
            time__accessed__lt=dt.datetime.utcnow() - dt.timedelta(days=lifetime_days),
        ).update(
            set__hosts_states__S__state=mapping.Resource.HostState.State.MARK_TO_DELETE
        )
        logger.info("Totally %d broken resources are purged", purged_count)

    def _mark_resources_without_hosts_as_broken(self):
        resource_ids = sorted(
            mapping.Resource.objects(
                state=mapping.Resource.State.READY,
                hosts_states__0__exists=False,
                mds=None,
                md5__ne=ctr.EMPTY_FILE_MD5,
            ).limit(self.UPDATE_CHUNK_SIZE).fast_scalar("id")
        )
        mapping.Resource.objects(id__in=resource_ids).update(
            set__state=mapping.Resource.State.BROKEN,
            set__time__accessed=dt.datetime.utcnow(),
            set__time__updated=dt.datetime.utcnow(),
        )
        logger.info(
            "Totally %d (limit is %d) resources are marked as broken because of no copies available: %r",
            len(resource_ids), self.UPDATE_CHUNK_SIZE, resource_ids
        )
        controller.Resource.list_resources_audit(
            "Mark as BROKEN because of no copies available", resource_ids, state=ctr.State.BROKEN
        )

    def _purge_unused_broken_resources_hosts(self, limit_days):
        """
        Drop any hosts records for resources marked as BROKEN or DELETED, which older
        than an amount of days specified.
        """
        logger.info("Purging hosts record for resources that are unused longer than %d days", limit_days)
        purged_count = mapping.Resource.objects(
            state__in=[mapping.Resource.State.BROKEN, mapping.Resource.State.DELETED],
            time__accessed__lt=dt.datetime.utcnow() - dt.timedelta(days=limit_days),
            hosts_states__ne=None
        ).update(
            unset__hosts_states=True
        )
        logger.info("Purged hosts records for %d deleted/broken resources", purged_count)

    def _check_unknown_hosted_resources(self, hosts_limit=5):
        """ Checks ready resources for host records, which are not known as registered client. """
        known_hosts = {client.hostname for client in controller.Client.list()}
        resource_hosters = set(mapping.Resource.hosters())
        unknown_hosts = list(resource_hosters - known_hosts)
        if len(unknown_hosts) > hosts_limit:
            logger.error(
                "Possible data error - detected more than %d unknown hosts: %r",
                hosts_limit, sorted(unknown_hosts)
            )
        resource_ids = list(mapping.Resource.objects(
            state=mapping.Resource.State.READY,
            hosts_states__host__in=unknown_hosts
        ).fast_scalar("id"))
        for ids_chunk in common.utils.grouper(resource_ids, self.UPDATE_CHUNK_SIZE):
            mapping.Resource.objects(id__in=ids_chunk).update(
                # TODO: Native mongoengine query doesn't work due to a bug in parameter validation logic.
                # Mongoengine thinks update value to a `hosts_states.host` (StringField) should also be a string.
                #
                # Correct (but buggy) expression:
                # `pull__hosts_states__host__in=unknown_lst`
                __raw__={"$pull": {"hosts": {"h": {"$in": unknown_hosts}}}}
            )
        logger.info(
            "Totally %d resource records found with unknown hosts (%r): %r",
            len(resource_ids), unknown_hosts, resource_ids
        )
