import os
import json
import math
import uuid
import time
import random
import urllib
import datetime
import urlparse
import itertools
import collections

import six

from sandbox import common
import sandbox.common.types.misc as ctm
import sandbox.common.types.task as ctt
import sandbox.common.types.client as ctc
import sandbox.common.types.resource as ctr
import sandbox.common.types.notification as ctn

from sandbox.yasandbox.controller import user as user_controller
from sandbox.yasandbox.controller import task as task_controller
from sandbox.yasandbox.controller import client as client_controller
from sandbox.yasandbox.database import mapping


#: Maximum chunk size in GiB for resource backup thread
MAX_BACKUP_CHUNK_SIZE = 300

#: Default notification settings for backup tasks
BACKUP_NOTIFICATION = None


class Resource(object):
    Model = mapping.Resource
    logger = None
    ListEntry = collections.namedtuple("Resource", ["id", "type", "size"])
    UPDATE_SANDBOX_RESOURCES = "UpdateSandboxResources"
    SHORT_UPDATE_FIELDS = ("attributes", "description")

    LIST_QUERY_MAP = {
        "id": ("id", "id"),
        "type": ("resource_type", "type"),
        "arch": ("arch", "arch"),
        "state": ("state", "state"),
        "owner": ("owner", "owner"),
        "client": ("host", "hosts__h"),
        "task_id": ("task_id", "task_id"),
        "dependant": ("dependant", "task_id"),
        "accessed": ("accessed", "time__at"),
        "created": ("created", "time__ct"),
        "updated": (None, "time__up"),
        "attr_name": ("attr_name", None),
        "attr_value": ("attr_value", None),
        "any_attr": ("any_attr", None),
        "attrs": ("attrs", None),
        "limit": ("limit", None),
        "offset": ("offset", None),
        "order": ("order_by", None),
    }  # Query map for resource request

    ShortTask = collections.namedtuple("ShortTask", ["id", "author", "owner", "parent_id"])

    RENAMED_FILEDS = {
        "file_name": "path",
        "description": "name"
    }

    @classmethod
    def list_query(
        cls,
        id=0, resource_type=None, owner=None, task_id=None, arch=None, date=None, state=None, skynet_id="",
        any_attrs=None, all_attrs=None, host=None, omit_failed=False, accessed=None, created=None,
    ):
        """
        Build query with specified parameters for resources

        :param id: one or more resource ids
        :param resource_type: one or more resource types (every resource type must be string)
        :param owner: resource owner
        :param task_id: one or more task ids of resources
        :param arch: arch for resource
        :param date: filter resource that created after date and before date + one day
        :param state: one or more states of resource
        :param skynet_id: skynet id of resource
        :param any_attrs: filter resource that have one or more attributes with key and value from any_attrs
        :param all_attrs: filter resource that have all attributes with key and value from all_attrs
        :param host: host of resource
        :param omit_failed: imit broken resources
        :param accessed: filter resource by access time after accessed[0] and before accessed[1]
        :param created: filter resource by created time after accessed[0] and before accessed[1]
        :return: dict with database query
        """

        if omit_failed:
            state = (cls.Model.State.READY, cls.Model.State.NOT_READY,)

        query = {}

        if id:
            if isinstance(id, (list, tuple)):
                query["id__in"] = id
            else:
                query["id"] = id

        if resource_type:
            if isinstance(resource_type, (list, tuple)):
                query["type__in"] = resource_type
            else:
                query["type"] = resource_type

        if owner:
            query["owner"] = owner

        if task_id:
            if isinstance(task_id, (list, tuple)):
                query["task_id__in"] = task_id
            else:
                query["task_id"] = task_id

        if arch:
            query["arch__in"] = (arch, "any",)

        if date:
            ts = time.strptime(date, "%Y-%m-%d")
            data_gt = datetime.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday)
            data_lt = data_gt + datetime.timedelta(days=1)
            query["time__created__gt"] = data_gt
            query["time__created__lt"] = data_lt
        elif created:
            query["time__created__gte"] = created[0]
            query["time__created__lte"] = created[1]

        if accessed:
            query["time__accessed__gte"] = accessed[0]
            query["time__accessed__lte"] = accessed[1]

        if state:
            if isinstance(state, (list, tuple)):
                query["state__in"] = state
            else:
                query["state"] = state

        if skynet_id:
            query["skynet_id"] = skynet_id

        nulls = (None, "")
        attrs = []
        attrs_exists_keys = []
        attrs_exists_values = []
        for k, v in (any_attrs or all_attrs or {}).iteritems():
            if k in nulls and v in nulls:
                continue
            if v in nulls:
                attrs_exists_keys.append(cls.cast_attr(k))
            elif k in nulls:
                attrs_exists_values.append(cls.cast_attr(v))
            else:
                attrs.append({"key": cls.cast_attr(k), "value": cls.cast_attr(v)})

        op = "__in" if any_attrs else "__all"

        for name, data in (
                ("attributes__key", attrs_exists_keys),
                ("attributes__value", attrs_exists_values)
        ):
            if data:
                if len(data) > 1:
                    query[name + op] = data
                else:
                    query[name] = data[0]

        if attrs:
            if len(attrs) > 1:
                query["attributes" + op] = attrs
            else:
                query["attributes__match"] = attrs[0]

        if host:
            query["hosts_states__host"] = host

        return query

    @classmethod
    def prepare_api_query(cls, query):
        limit = query.pop("limit", 1)
        offset = query.pop("offset", 0)
        mongo_query = None

        order_by = query.pop("order_by", None)  # Empty list if not specified
        if not order_by and "id" not in query:
            order_by = ["-id"]

        attrs = {}
        raw_attrs = query.pop("attrs", None)
        if raw_attrs:
            try:
                attrs = json.loads(raw_attrs)
            except ValueError as ex:
                raise ValueError("Bad json value for 'attrs': {}, Error: {}".format(raw_attrs, ex))

            if isinstance(attrs, list):
                attrs = type("", (type(attrs),), {"iteritems": lambda _: iter(_)})(attrs)
            elif not isinstance(attrs, dict):
                raise ValueError(
                    "Parameter 'attrs' must be dict or list of pairs, not: {!r}".format(type(attrs))
                )

        attr_name = query.pop("attr_name", None)
        attr_value = query.pop("attr_value", None)

        if attr_name:
            attrs[attr_name] = attr_value
        if query.pop("any_attr", False):
            query["any_attrs"] = attrs
        elif attrs:
            query["all_attrs"] = attrs

        dependant = query.pop("dependant", None)
        if dependant:
            ids = list(mapping.Task.objects(id=dependant).fast_scalar("requirements__resources"))
            if ids and ids[0]:
                query["id"] = ids[0]
            else:
                mongo_query = mapping.Resource.objects.none()

        query_kwargs = None

        # Convert query to mongo
        if mongo_query is None:
            query_kwargs = cls.list_query(**query)
            mongo_query = cls.Model.objects(**query_kwargs)

        if order_by:
            mongo_query = mongo_query.order_by(*order_by)
        if offset:
            mongo_query = mongo_query.skip(offset)

        # `limit` is required and thus always present
        mongo_query = mongo_query.limit(limit)

        # Resource hosts are requested separately by `ResourceLinks` if needed
        mongo_query = mongo_query.exclude("hosts_states")

        # Lite mongo query
        mongo_query = mongo_query.lite()

        return mongo_query, query_kwargs, offset, limit

    @classmethod
    def template_mongo_query(cls, query):
        query, offset, limit = common.api.remap_query(query, cls.LIST_QUERY_MAP, save_query=True)
        return cls.prepare_api_query(query)[0]

    @classmethod
    def last_resource(cls, resource_id=None, filter=None):
        if resource_id is not None:
            return cls.Model.objects.with_id(resource_id)
        if filter:
            copy_filter = filter.copy()
            return cls.template_mongo_query(copy_filter).first()

    @classmethod
    def initialize(cls):
        global BACKUP_NOTIFICATION
        if BACKUP_NOTIFICATION is None:
            BACKUP_NOTIFICATION = dict(
                transport=ctn.Transport.EMAIL,
                statuses=list(ctt.Status.Group.BREAK),
                recipients=[user_controller.Group.service_group],
            )
        cls.Model.ensure_indexes()
        mapping.ResourceLink.ensure_indexes()
        mapping.ResourceMeta.ensure_indexes()
        cls.logger = common.log.get_core_log("resources_audit")

    @classmethod
    def resource_audit_item(
        cls, message, resource=None, timestamp=None, resource_id=None, state=None, attributes=None
    ):
        from sandbox.services.modules.statistics_processor.schemas import yt_schemas

        if timestamp is None:
            timestamp = datetime.datetime.utcnow()
        if resource is None:
            resource_id = resource_id or 0
            state = state or ""
            attributes = attributes or ""
        else:
            resource_id = resource_id or resource.id
            state = state or resource.state
            attributes = attributes or str(resource.attributes_dict())
        return yt_schemas.ResourceAudit.make(
            timestamp=timestamp,
            id=resource_id,
            state=state,
            attributes=attributes,
            author=(
                task_controller.Task.request.user.login
                if task_controller.Task.request else
                "None"
            ),
            message=message
        )

    @classmethod
    def resource_audit(
        cls, message, resource=None, timestamp=None, resource_id=None, state=None, attributes=None
    ):
        common.statistics.Signaler().push(
            cls.resource_audit_item(
                message, resource=resource, timestamp=timestamp, resource_id=resource_id, state=state,
                attributes=attributes
            )
        )

    @classmethod
    def list_resources_audit(cls, message, resource_ids, timestamp=None, state=None, attributes=None):
        if timestamp is None:
            timestamp = datetime.datetime.utcnow()
        audit = [
            cls.resource_audit_item(
                message, timestamp=timestamp, resource_id=resource_id, state=state, attributes=attributes
            )
            for resource_id in resource_ids
        ]
        common.statistics.Signaler().push(audit)

    @classmethod
    def resource_meta(cls, resource):
        """
        Return ResourceMeta object for resource

        :param resource: mapping.Resource object
        :return: ResourceMeta object of resource
        """
        if resource.resource_meta:
            return mapping.ResourceMeta.objects(hash=resource.resource_meta[0]).first()
        return None

    @classmethod
    def resource_meta_list(cls, resource):
        """
        Return list of ResourceMeta objects of resource in order from resource meta to it's parents

        :param resource:  mapping.Resource object
        :return: list of ResourceMeta objects
        """
        if resource.resource_meta:
            resource_meta = {obj.hash: obj for obj in mapping.ResourceMeta.objects(hash__in=resource.resource_meta)}
            result = [resource_meta[hash] for hash in resource.resource_meta]
            return result

        return None

    @classmethod
    def update_meta_objects(cls, resource_meta=None):
        """
        Check ResourceMeta objects in database and insert new objects.

        :param resource_meta: list of ResourceMeta dicts of resource from resource meta to it's parents
        :return: tuple of ResourceMeta hashes and ResourceMeta objects
        """
        if not resource_meta:
            return [], []
        meta_objects = []

        for meta in resource_meta:
            meta_objects.append(mapping.ResourceMeta(resource_meta=mapping.ResourceMeta.Meta(**meta)))

        hashes = [meta_object.calculated_hash for meta_object in meta_objects]
        db_hashes = set(mapping.ResourceMeta.objects(hash__in=hashes).fast_scalar("hash"))
        for idx, hash in enumerate(hashes):
            if hash not in db_hashes:
                meta_objects[idx].save()
        return hashes, meta_objects

    @classmethod
    def cast_attr(cls, value):
        """
        Cast value to python str

        :param value: string value
        :return: str value
        """
        return value.encode("utf-8") if isinstance(value, unicode) else str(value)

    @classmethod
    def _check_abs_paths(cls, path1, path2):
        """
        Check that one path isn't include to other path
        :param path1: linux path 1
        :param path2: linux path 2
        :return: True if one path includes to other path
        """

        norm_path1 = os.path.normpath(path1).split(os.sep)  # TODO: don't work for windows
        norm_path2 = os.path.normpath(path2).split(os.sep)
        return all(l == r for l, r in zip(norm_path1, norm_path2))

    @classmethod
    def duplicates_on_creation(cls, resource_type, path, attrs, task_id):
        """ Find duplicated resource and return it

        :param resource_type: type of Resource
        :param path: resource path
        :param attrs: resource attrs dict
        :param task_id: resource task id
        :return: duplicated resource id if found else None
        """

        self_attrs = {
            k: str(v)
            for k, v in attrs.iteritems()
            if k not in ctr.ServiceAttributes
        }
        query = {
            "task_id": task_id,
            "state__in": [cls.Model.State.READY, cls.Model.State.NOT_READY],
        }

        resources = list(cls.Model.objects(**query).order_by("id"))

        for res in resources:
            if path == res.path:
                if (
                    res.type == str(resource_type) and self_attrs == res.attributes_dict(exclude=ctr.ServiceAttributes)
                ):
                    return res

                raise common.errors.TaskError(
                    "Resource already exists (#{}) with different attributes:\n{!r} vs {!r}".format(
                        res.id, self_attrs, res.attributes_dict(exclude=ctr.ServiceAttributes)
                    )
                )
            elif cls._check_abs_paths(path, res.path):
                raise common.errors.TaskError(
                    "Path '{}' of the resource {} has intersection with the path '{}' of resource {} #{}".format(
                        path, resource_type, res.path, res.type, res.id
                    )
                )

    @classmethod
    def duplicates_on_update(cls, resource):
        """
        Remove duplicated resources on resource update

        :param resource: mapping.Resource object
        :return: None
        """

        self_attrs = {
            k: str(v)
            for k, v in resource.attributes_dict().iteritems()
            if k not in ctr.ServiceAttributes
        }
        query = {
            "task_id": resource.task_id,
            "id__ne": resource.id,
        }

        if resource.state == cls.Model.State.READY:
            query["state__in"] = [cls.Model.State.READY, cls.Model.State.NOT_READY]
        else:
            query.update({
                "state": cls.Model.State.NOT_READY,
                "type": resource.type
            })

        for res in cls.Model.objects(**query).order_by("id"):
            if res.state == cls.Model.State.NOT_READY and res.type == str(resource.type) and res.path == resource.path:
                if self_attrs == res.attributes_dict(exclude=ctr.ServiceAttributes):
                    cls.logger.warning(
                        "Resource #%s was deleted as duplicate of #%s", res.id, resource.id
                    )
                    res.delete()
            elif resource.state == cls.Model.State.READY and cls._check_abs_paths(res.path, resource.path):
                raise common.errors.TaskError(
                    'Path `{}` of the resource {} has intersection with the path `{}` of resource {} #{}'.format(
                        resource.path, resource.type, res.path, res.type, res.id
                    )
                )

    @classmethod
    def prepare_attrs(cls, attrs):
        """
        Cast resource attrs

        :param attrs: resource attrs dict
        :return: casted resource attrs dict
        """
        return {cls.cast_attr(k).strip(): cls.cast_attr(v).strip() for k, v in (attrs or {}).iteritems()}

    @classmethod
    def _is_valid_ttl(cls, value):
        if value:
            try:
                value = float(value)
            except ValueError:
                return False
            return math.isinf(value) or 1 <= value < 10000
        return False

    @classmethod
    def update_ttl(cls, resource, attributes_changed):
        """
        Update ttl and resource time fields

        :param resource: mapping.Resource object
        :param attributes_changed: bool whether resource attributes were changed
        :return: None
        """
        attrs = resource.attributes_dict()
        # Validation for the TTL attribute, it should be either 'inf' or a number less than 10000
        ttl = attrs.get(ctr.ServiceAttributes.TTL)
        if ttl:
            if not cls._is_valid_ttl(ttl):
                raise common.errors.TaskError(
                    "Bad TTL: {}. It should be either 'inf' or a number less than 10000".format(ttl)
                )
            ttl = float(ttl)

        # Remove expiration time if resource is RELEASED or ttl is set to infinity
        if attrs.get(ctr.ServiceAttributes.RELEASED) or (ttl and math.isinf(ttl)):
            resource.time.expires = None
        elif not resource.time.expires:
            # If expiration date was not provided for the resource yet, set `expires` to 1 day.
            # The service thread, which checks expired resources, will actualize it later.
            resource.time.expires = datetime.datetime.now() + datetime.timedelta(days=1)
        elif attributes_changed:
            # If attributes were changed, set `expires` to now. Service process clean_resources will actualize it later
            resource.time.expires = datetime.datetime.now()

    @classmethod
    def create(
        cls, name, path, md5, resource_type, task_id, resource_meta, task,
        state=None, created_time=0, skynet_id=None, attrs=None,
        arch=ctm.OSFamily.ANY, size=0, mds=None, multifile=None, executable=None,
        system_attributes=None
    ):
        """
        Create new resource

        :param name: resource deescription
        :param path: resource path
        :param md5: md5 hash
        :param resource_type: type of resource
        :param task_id: resource task id
        :param resource_meta: list of ResourceMeta dicts with meta information about resource and its parents
        :param task: short or full task object
        :param state: resource state
        :param created_time: resource time of creation
        :param skynet_id: resource skynet id
        :param attrs: resource attrs dict
        :param arch: resource arch
        :param size: resource size
        :param mds: resource mds information
        :param multifile: resource is a directory
        :param executable: resource is executable
        :param system_attributes: resource system attributes
        :return: mapping.Resource object
        """
        hashes, resource_meta_objects = cls.update_meta_objects(resource_meta)
        attrs = cls.prepare_attrs(attrs)
        current_resource_meta = resource_meta_objects[0]

        if current_resource_meta.resource_meta.auto_backup:
            attrs["backup_task"] = True

        obj = cls.duplicates_on_creation(resource_type, path, attrs, task_id)
        if obj is not None:
            return obj

        if task_id != task.id:
            other_task = cls.ShortTask(
                *mapping.Task.objects.fast_scalar("id", "author", "owner", "parent_id").with_id(task_id)
            )
            if (
                other_task.owner and
                other_task.owner != task.owner and
                not user_controller.user_has_permission(other_task.author, (task.owner,))
            ):
                raise ValueError(
                    "User {!r} not allowed to create resource owned by {!r} for task #{}".format(
                        other_task.author, task.owner, task_id
                    )
                )

        model = cls.Model()
        model.resource_meta = hashes
        model.time = mapping.Resource.Time()

        model.type = resource_type
        model.name = name
        model.path = path
        model.owner = task.owner

        model.task_id = task_id
        model.arch = (
            ctm.OSFamily.ANY
            if current_resource_meta.resource_meta.any_arch or not arch else
            common.platform.get_arch_from_platform(arch)
        )
        created_time = created_time or int(time.time())
        model.time.created = datetime.datetime.utcfromtimestamp(created_time)
        model.time.accessed = datetime.datetime.utcfromtimestamp(created_time)

        model.state = state if state else cls.Model.State.NOT_READY
        model.size = size
        model.md5 = md5
        model.skynet_id = skynet_id
        model.multifile = multifile
        model.executable = executable
        model.attributes = [cls.Model.Attribute(key=k, value=v) for k, v in attrs.iteritems()]

        cls.set_mds(model, mds)
        cls.update_ttl(model, False)

        model.time.updated = datetime.datetime.utcnow()
        if system_attributes:
            model.system_attributes = model.SystemAttributes()
            for attr_name, attr_value in system_attributes.iteritems():
                setattr(model.system_attributes, attr_name, attr_value)

        model.save(force_insert=True, write_concern=common.config.Registry().server.mongodb.write_concern)
        cls.logger.info(
            "Created resource #%s of type '%s' in state '%s' with attrs: %s",
            model.id, model.type, model.state, attrs
        )
        cls.resource_audit("Resource created", resource=model)
        return model

    @classmethod
    def mds_model_to_dict(cls, resource):
        """
        Convert mds database model to dict
        :param resource: mapping.Resource object
        :return: dict of mds info
        """
        if resource.mds:
            return {
                "key": resource.mds.key,
                "namespace": resource.mds.namespace,
            }
        return resource.mds

    @classmethod
    def update(cls, resource, data, client_id, resource_meta, full_update=True, db_update=True):
        """
        Update resource

        :param resource: mapping.Resource object
        :param data: update data
        :param client_id: client id of request
        :param resource_meta: ResourceMeta object with meta information about resource
        :param full_update: need full update of resource
        :param db_update: need update resource in database
        :return: resource is updated or neeed to be updated
        """
        updated = False
        attributes_changed = False

        for name, value in data.iteritems():
            if name == "mds":
                if value:
                    value = dict(value)
                if cls.mds_model_to_dict(resource) != value:
                    cls.set_mds(resource, value)
                    updated = True
            elif name == "size":
                value = value >> 10
                if value != resource.size:
                    updated = True
                    resource.size = value
            elif name == "attributes":
                resource.attributes = [
                    mapping.Resource.Attribute(key=k, value=v)
                    for k, v in cls.prepare_attrs(value).iteritems()
                ]
                attributes_changed = True
            elif name in cls.RENAMED_FILEDS:
                real_name = cls.RENAMED_FILEDS[name]
                if value != getattr(resource, real_name):
                    updated = True
                    setattr(resource, real_name, value)
            else:
                if name in cls.SHORT_UPDATE_FIELDS:
                    setattr(resource, name, value)
                elif getattr(resource, name) != value:
                    setattr(resource, name, value)
                    updated = True

        cls.logger.info(
            "Updating resource #%s in state '%s' with attrs: %r",
            resource.id, resource.state, resource.attributes_dict()
        )

        if full_update and resource.state in (ctr.State.READY, ctr.State.BROKEN):
            new_host = client_id
            if resource.arch == ctm.OSFamily.ANY and not resource_meta.resource_meta.any_arch:
                cl = client_controller.Client.get(new_host)
                client_arch = cl.platform if cl else ""
                if resource.arch != client_arch:
                    updated = True
                    resource.arch = client_arch
            if resource.state == ctr.State.READY or data.get("size"):
                if not any(new_host == hs.host for hs in resource.hosts_states):
                    updated = True
                    if db_update and new_host:
                        cls.add_host(resource, new_host)
        if db_update:
            cls.duplicates_on_update(resource)
            cls.update_ttl(resource, attributes_changed)
            if updated:
                resource.time.updated = datetime.datetime.utcnow()
            resource.save()

        cls.resource_audit("Update resource", resource=resource)

        return updated

    @classmethod
    def force_backup(cls, ids, dc=None, size=None):
        ids = list(common.utils.chain(ids))
        resources = cls.Model.objects.in_bulk(ids).values()
        if not resources:
            return

        size = size >> 10 if size else sum(_.size for _ in resources)
        if not dc:
            hosts = collections.Counter()
            for r in resources:
                for hs in r.hosts_states:
                    hosts[hs.host] += 1

            dc = None
            if hosts:
                client = client_controller.Client.get(hosts.most_common(1)[0][0])
                if client:
                    dc = client.info.get("system", {}).get("dc")

        parameters = {
            "resource_id": ",".join(map(str, ids)),
            "validate_backup_task": True,
            "dc": dc,
            "chunk_size": size * 100 / (MAX_BACKUP_CHUNK_SIZE << 20),
        }

        # A little hack here - ignore low disk space tag for fresh tasks bundle backup
        if len(resources) == 1 and resources[0].type == "SANDBOX_TASKS_ARCHIVE":
            parameters["ignore_low_disk"] = True

        backup_id = task_controller.Task.create_service_task(
            "BACKUP_RESOURCE_2",
            description="Backing up resources of task #{} with total size {}".format(
                resources[0].task_id, common.utils.size2str(size << 10)
            ),
            parameters=parameters,
            requirements=dict(
                disk_space=(size + (10 << 10)) << 10  # REST API expects bytes here
            ),
            priority=ctt.Priority(ctt.Priority.Class.SERVICE, ctt.Priority.Subclass.LOW),
            notifications=[BACKUP_NOTIFICATION],
            tags=["AUTOBACKUP", "FORCE", (dc or "unk").upper()]
        )

        cls.logger.info(
            "Created backup task #%s for resources %r with total size %s",
            backup_id, ids, common.utils.size2str(size << 10)
        )
        mapping.Resource.objects(id__in=ids, attributes__key="backup_task").update(
            set__attributes__S__value=backup_id,
            set__time__updated=datetime.datetime.utcnow()
        )
        return backup_id

    @classmethod
    def insufficient_resources_query(cls, host):
        """
        Makes query for resources for the given storage host, which has redundancy problems, i.e.,
        has insufficient copies on `H@SANDBOX_STORAGE` hosts group.
        Notice: the request can take significant amount of time (up to several minutes).

        :param host: Host to be checked.
        :return: A query for resources for the given storage host, which has redundancy problems
        """

        settings = common.config.Registry()
        hosts = set(settings.server.storage_hosts)
        hosts.discard(host)
        from sandbox import sdk2
        query = cls.Model.objects(
            type__ne=str(sdk2.service_resources.TaskLogs),
            state=cls.Model.State.READY,
            hosts_states__state=cls.Model.HostState.State.OK,
            hosts_states__host__nin=hosts,
            hosts_states__host__in=[host],
        ).order_by("+id")
        return query

    @classmethod
    def extra_resources_to_drop(cls, host, limit=50, copies=2):
        """
        Drops (marks as deleted) extra resources on the host, which has at least N copies on other hosts,
        ordered by usage time and number of copies. Returns empty list for storage hosts.

        :param host: Host to be checked.
        :param limit: Maximum allowed resources to be dropped. `None` means no limit.
        :param copies: Amount of copies to be available on other hosts.
        :return: A list of tuple contain resource ID, type and size.
        """

        if host in set(common.config.Registry().server.storage_hosts):
            return []
        ret = cls.Model.client_excessive_redundancy(host, copies, limit)
        return [cls.ListEntry(r["_id"], r["type"], r["size"]) for r in ret]

    @classmethod
    def resources_to_remove(cls, host, limit=100, replicated=False):
        """
        Returns a list of resource IDs which can be safely removed from the host given.

        :param host: Host ID to be checked.
        :param limit: Maximum amount of resources to return.
        :param replicated: Flags to check only "DELETED" resources or "READY" resources with copy on storage host.
        :return: A list of tuple contain resource ID, type and size.
        """

        def order_func(q):
            return q.order_by("+time.accessed").fast_scalar("id", "type", "size")

        settings = common.config.Registry()
        if not replicated:
            match = {"host": host, "state": cls.Model.HostState.State.MARK_TO_DELETE}
            query = cls.Model.objects(hosts_states__match=match)
            objs = order_func(query.limit(limit) if limit else query)

            if not objs.first():
                # no results with this query, construct more broad one
                query = cls.Model.objects(hosts_states__host=host, state=cls.Model.State.DELETED)
                objs = order_func(
                    query.limit(limit) if limit else query
                )

        elif host not in settings.server.storage_hosts:
            query = cls.Model.objects(
                hosts_states__host=host,
                mds__ne=None,
            )
            objs = list(order_func(
                query.limit(limit) if limit else query
            ))
            if not limit or len(objs) < limit:
                query = cls.Model.objects(
                    hosts_states__host=host,
                    hosts_states__host__in=list(settings.server.storage_hosts),  # at least one storage
                    hosts_states__2__exists=True,  # at least 3 sources
                )
                objs.extend(order_func(
                    query.limit(limit - len(objs)) if limit else query
                ))
        else:
            return []
        return [cls.ListEntry(*r) for r in objs]

    @classmethod
    def drop_host_resources(cls, host, resources):
        """
        Remove host from sources of resources

        :param host: host to be removed from resources
        :param resources: list of resource ids
        """

        resources.sort()
        cls.logger.info("Remove host '%s' for resources %r", host, resources)
        cls.Model.objects(id__in=resources).update(pull__hosts_states__host=host)
        cls.list_resources_audit("Remove host {}".format(host), resources)

    @classmethod
    def touch(cls, resource_id, host=None):
        """
        Update given resource's usage time.
        If optional parameter `host` is passed mark this host state as OK

        :param resource_id: id of resource to touch
        :param host: hostname to mark its state as Ok
        """

        if host is not None:
            cls.Model.objects(
                id=resource_id, hosts_states__host=host
            ).update_one(
                set__time__accessed=datetime.datetime.utcnow(),
                set__hosts_states__S__state=cls.Model.HostState.State.OK
            )
        else:
            delay = datetime.timedelta(seconds=common.config.Registry().common.resources.touch_delay)
            threshold = datetime.datetime.utcnow() - delay
            cls.Model.objects(
                id=resource_id, state__ne=ctr.State.DELETED, time__accessed__lte=threshold
            ).update_one(
                set__time__accessed=datetime.datetime.utcnow(),
                write_concern={"w": 0}
            )
        try:
            # Do backup to warehouse bucket for in demand resource
            for bucket, size in cls.Model.objects(
                id=resource_id, mds__exists=True, mds__backup_namespace=None
            ).fast_scalar("mds__namespace", "size"):
                mapping.ResourcesToWarehouse(id=resource_id, size=size, bucket=bucket).save()
        except mapping.NotUniqueError:
            pass

    @classmethod
    def hard_add_host(cls, resource_id, host):
        """
        Add host to resource sources

        :param resource_id: ID if resource
        :param host: name of host
        """

        update = dict(add_to_set__hosts_states=cls.Model.HostState(
            host=host,
            state=cls.Model.HostState.State.OK
        ))
        settings = common.config.Registry()
        if host not in settings.server.storage_hosts:
            update["set__time__accessed"] = datetime.datetime.utcnow()
        cls.Model.objects(id=resource_id).update_one(**update)

    @classmethod
    def add_host(cls, resource, host):
        """
        Push new host or update time of existed OK host

        :param resource: mapping.Resource model
        :param host: name of host
        """
        host_state = next((hs for hs in resource.hosts_states if hs.host == host), None)
        if host_state is None:
            cls.hard_add_host(resource.id, host)
        else:
            if host_state.state == cls.Model.HostState.State.OK:
                cls.touch(resource.id)
            else:
                cls.touch(resource.id, host)
        cls.logger.debug("Added host '%s' for resource #%s", host, resource.id)
        cls.resource_audit("Update resource", resource=resource)

    @classmethod
    def remove_host(cls, resource, host):
        """
        Remove host from resource sources

        :param resource:mapping.Resource model
        :param host: name of host
        """
        cls.Model.objects(id=resource.id, hosts_states__host=host).update_one(
            set__hosts_states__S__state=cls.Model.HostState.State.MARK_TO_DELETE)
        cls.logger.debug("Removed host '%s' from resource sources #%s", host, resource.id)
        cls.resource_audit("Remove host {}".format(host), resource=resource)

    @classmethod
    def set_mds(cls, resource, mds):
        """
        Set mds source

        :param resource: mapping.Resource model
        :param mds: dict with schema sandbox.web.api.v1.schemas.resource.MDS
        """
        if mds is not None:
            # XXX: filter out defaults to save free space in DB
            kws = {"key": mds["key"]}
            if mds.get("executable"):
                kws["executable"] = True
            kws["namespace"] = mds.get("namespace") or ctr.DEFAULT_S3_BUCKET
            resource.mds = resource.MDS(**kws)
            cls.logger.debug("Set MDS metadata for resource #%s: %r", resource.id, kws)
            cls.resource_audit("Set MDS metadata for resource. {}".format(kws), resource=resource)

    @classmethod
    def get(cls, resource_id):
        """
        Return mapping.Resource model with ID equal to resource_id or None if it does not exist

        :param resource_id: mapping.Resource model or resource id
        :return: mapping.Resource with ID equal to resource_id or None
        """

        return cls.Model.objects.with_id(resource_id)

    @classmethod
    def get_hosts(cls, resource_or_id, all=False):
        """
        Return resource source hosts. If 'all' is set to True return list of hosts include hosts in MARK_DO_DELETE state

        :param resource_or_id: mapping.Resource object or resource id
        :param all: include hosts in MARK_DO_DELETE state
        :return: list of resource source hosts
        """

        if isinstance(resource_or_id, (cls.Model, mapping.base.LiteDocument)):
            data = resource_or_id.hosts_states
            return [h.host for h in data if all or h.state == cls.Model.HostState.State.OK]
        else:
            data = cls.Model.objects(id=resource_or_id).fast_scalar("hosts_states").first() or []
            return [h.get("h") for h in data if all or h.get("st") == cls.Model.HostState.State.OK]

    @classmethod
    def group_hosts(cls, hosts):
        """
        Group hosts by the presence of the tag STORAGE in client tags and shuffle client into groups

        :param hosts: list of host names
        :return: list of mapping.Client, where clients in head haven't STORAGE tag
        """

        nstg = lambda _: ctc.Tag.STORAGE not in _.tags
        clients = filter(None, client_controller.Client.load_list(hosts))
        groups = [list(g) for _, g in itertools.groupby(sorted(clients, key=nstg), nstg)]
        map(random.shuffle, groups)
        return list(itertools.chain.from_iterable(groups))

    @classmethod
    def sources(cls, resource):
        """
        Return resource hosts

        :param resource: mapping.Resource object
        :return: list of mapping.Client, where clients in head haven't STORAGE tag
        """

        hosts = cls.get_hosts(resource)
        return cls.group_hosts(hosts)

    @classmethod
    def local_task_path(cls, resource, *arg):
        return os.path.join(*(ctt.relpath(resource.task_id) + list(arg)))

    @classmethod
    def local_path(cls, resource):
        return cls.local_task_path(resource, resource.path)

    @classmethod
    def http_url(cls, resource, host=None):
        """
        Return http url for resource

        :param resource: mapping.Resource object
        :param host: host with this resource
        :return: http url for resource
        """
        host = client_controller.Client.get(host) if isinstance(host, basestring) else host
        host = host or next(iter(cls.sources(resource)), None)
        http_prefix = (host.info if host else {}).get("system", {}).get("fileserver", "")

        quoted_filename = urllib.quote(resource.path)
        path = (
            "/".join(("resource", str(resource.id), os.path.basename(quoted_filename)))
            if host and ctc.Tag.NEW_LAYOUT in host.tags else
            cls.local_task_path(resource, quoted_filename)
        )
        return urlparse.urljoin(http_prefix, path) if http_prefix else ""

    @classmethod
    def rsync_url(cls, resource, host=None, http_url=None):
        """
        Return url to rsync resource

        :param resource: mapping.Resource object
        :param host: host with this resource
        :param http_url: http url for the given resource and host; passed to avoid computing it multiple times
        :return: url to rsync resource
        """

        http_prefix = http_url or cls.http_url(resource, host)
        if not http_prefix:
            return ""
        host = host or next(iter(cls.sources(resource)), None)
        r = urlparse.urlparse(http_prefix)
        if ctc.Tag.NEW_LAYOUT in getattr(host, "tags", {}):
            r = r._replace(
                scheme="rsync",
                netloc=r.netloc.split(":")[0],
                path="/sandbox-resources"
            )
            return "/".join((r.geturl(),) + ctr.relpath(resource.id) + (os.path.basename(resource.path),))

        r = r._replace(
            scheme="rsync",
            netloc=r.netloc.split(":")[0],
            path="/sandbox-tasks"
        )
        return "/".join((r.geturl(), cls.local_path(resource)))

    @classmethod
    def proxy_url(cls, resource):
        """
        Return froxy url of resource

        :param resource: mapping.Resource object
        :return: proxy url of resource
        """

        settings = common.config.Registry()
        proxy_settings = settings.client.fileserver.proxy
        if proxy_settings.host:
            return "{}://{}/{}".format(proxy_settings.scheme.http, proxy_settings.host, resource.id)
        else:
            return cls.http_url(resource, settings.this.id)

    @classmethod
    def get_rsync_links(cls, resource, all_hosts=False):
        """
        Return rsync links for resource

        :param resource: mapping.Resource object
        :param all_hosts: if False return only links of available clients
        :return: list of rsync links for resource
        """

        return [cls.rsync_url(resource, client) for client in cls.sources(resource) if all_hosts or client.alive]

    @classmethod
    def get_http_links(cls, resource, all_hosts=False):
        """
        Return http links for resource

        :param resource: mapping.Resource object
        :param all_hosts: if False return only links of available clients
        :return: list of http links for resource
        """

        return [cls.http_url(resource, client) for client in cls.sources(resource) if all_hosts or client.alive]

    @classmethod
    def user_has_permission(cls, resource, user):
        """
        Check write access from user to resource

        :param resource: mapping.Resource object
        :param user: user login or mapping.User object
        :return: True if user has write access to resource and False in another way
        """
        if user_controller.user_has_permission(user, [resource.owner]):
            return True
        try:
            task = task_controller.Task.get(resource.task_id)
        except task_controller.Task.NotExists:
            return False
        return user_controller.user_has_permission(user, (task.author, task.owner))

    @classmethod
    def _conv_lists(cls, res):
        if res is not None:
            return {k: (list(v) if k in ("rsync", "http") else v) for k, v in res.iteritems()}

    @classmethod
    def _get_updated_sandbox_resource(cls, resource_name):
        obj = mapping.Service.objects.with_id(cls.UPDATE_SANDBOX_RESOURCES)
        return cls._conv_lists(dict(obj.context.get("resources", {}).get(resource_name) or {})) if obj else {}

    @common.utils.classproperty
    def tasks_image(cls):
        return cls._get_updated_sandbox_resource("tasks_image")

    @common.utils.classproperty
    def tasks_resource(cls):
        return cls._get_updated_sandbox_resource("tasks_res")

    @classmethod
    def venv_resource(cls, client_platform):
        obj = mapping.Service.objects.with_id(cls.UPDATE_SANDBOX_RESOURCES)
        return cls._conv_lists(dict(
            dict(obj.context.get("resources", {}).get("venv_res", {})).get(client_platform) or {}
        )) if obj else {}

    @classmethod
    def list_task_resources(
        cls,
        task_id=0, resource_type='', state='', omit_failed=False, owner='', arch='',
        attr=None, attr_name='', attr_value=None, any_attrs=None, all_attrs=None, id=0,
        limit=0, offset=0, skynet_id='', host='', order_by='-id', date='', load=True
    ):
        if attr:
            attr_name, attr_value = attr
        if not any_attrs and (attr_name or attr_value):
            any_attrs = {attr_name: attr_value}

        query = cls.list_query(
            id=id,
            resource_type=resource_type,
            owner=owner,
            task_id=task_id,
            arch=arch,
            date=date,
            state=state,
            skynet_id=skynet_id,
            any_attrs=any_attrs,
            all_attrs=all_attrs,
            host=host,
            omit_failed=omit_failed,
        )
        result = cls.Model.objects(**query).skip(int(offset))
        if limit:
            result = result.limit(int(limit))
        if order_by:
            result = result.order_by(order_by)
        return list(result.scalar('id')) if not load else list(result)

    @common.utils.classproperty
    def lxc_resources(cls):
        obj = mapping.Service.objects.with_id(cls.UPDATE_SANDBOX_RESOURCES)
        resources = obj.context.get("resources") if obj else {}
        return dict(resources["lxc_res"] or {}) if resources and "lxc_res" in resources else {}

    @common.utils.classproperty
    def porto_layers_resources(cls):
        obj = mapping.Service.objects.with_id(cls.UPDATE_SANDBOX_RESOURCES)
        resources = obj.context.get("resources") if obj else {}
        return dict(resources["porto_layers_res"] or {}) if resources and "porto_layers_res" in resources else {}

    @classmethod
    def update_resource_temporary_link(cls, link_id):
        delay = datetime.timedelta(seconds=common.config.Registry().common.resources.touch_delay)
        threshold = datetime.datetime.utcnow() - delay
        mapping.ResourceLink.objects(id=link_id, accessed__lte=threshold).update_one(
            set__accessed=datetime.datetime.utcnow(),
            write_concern={"w": 0}
        )

    @classmethod
    def create_temporary_link(cls, resource_id, author):
        link = mapping.ResourceLink.objects(author=author, resource_id=resource_id).first()
        if link is not None:
            cls.update_resource_temporary_link(link.id)
            return link
        link = mapping.ResourceLink(
            id=uuid.uuid4().hex, resource_id=resource_id, author=author, accessed=datetime.datetime.utcnow()
        )
        cls.logger.info("Create link for resource %s and user %s.", resource_id, author)
        link.save()
        return link

    @classmethod
    def proxy_temporary_link(cls, link):
        proxy_config = common.config.Registry().client.fileserver.proxy

        if proxy_config.host:
            scheme, host = proxy_config.scheme.http, proxy_config.host
            return "{}://{}/resource/link/{}".format(scheme, host, link.id)

    @classmethod
    def clean_temporary_resource_links(cls, ttl):
        delay = datetime.timedelta(days=ttl)
        threshold = datetime.datetime.utcnow() - delay
        return mapping.ResourceLink.objects(accessed__lte=threshold).delete()

    @classmethod
    def lock_resources_for_backup(cls, host, limit=10):
        now = datetime.datetime.utcnow()
        ids = list(cls.Model.objects(
            state=cls.Model.State.READY,
            time__accessed__lt=now - datetime.timedelta(hours=3),
            attributes=cls.Model.Attribute(key="ttl", value="inf"),
            attributes__key__ne="mds",
            hosts_states__host=host,
        ).limit(limit).fast_scalar("id"))
        cls.Model.objects(
            id__in=ids,
            state=cls.Model.State.READY,
            time__accessed__lt=now - datetime.timedelta(hours=3),
        ).update(set__time__accessed=now)
        return list(cls.Model.objects(
            id__in=ids,
            state=cls.Model.State.READY,
            time__accessed=now,
        ).fast_scalar("id"))

    @classmethod
    def mark_force_cleanup_mds_bucket_resources(cls, bucket_id):
        mapping.Resource.objects(
            mds__namespace=bucket_id,
            state__in=[ctr.State.BROKEN, ctr.State.DELETED]
        ).update(
            force_cleanup=True
        )


class ResourceLinks(object):
    """
    Stateful controller for fast resource sources retrieval.
    """

    Client = collections.namedtuple("Client", ("host", "tags", "storage", "info", "alive"))

    def __init__(self, resources, shuffle=True):
        """
        :param shuffle: reorder urls and sources on each rendering
        :param resources: iterable of `mapping.Resource` objects
        """
        self._resources = {r.id: r for r in resources}
        self._shuffle = shuffle
        self._http_url_cache = {}

    @common.patterns.singleton_property
    def all_sources(self):
        """
        Mapping from resource ID to *all* its source hostnames, without any checks and reordering.
        While inaccurate, it is extremely fast and should be used for anonymous requests.
        :rtype: Dict[int, list]
        """
        all_hosts = mapping.Resource.objects.filter(id__in=list(self._resources)).fast_scalar("id", "hosts_states")
        return {rid: [h["h"] for h in (hosts or []) if h["st"] == "OK"] for rid, hosts in all_hosts}

    @common.patterns.singleton_property
    def sources(self):
        """
        Mapping from resource ID to its source hostnames (either alive or not).
        :rtype: Dict[int, list]
        """
        return self._sources(make_source_func=lambda resource, host: host.host, alive_only=False)

    def http_url_with_cache(self, resource, host=None):
        url = self._http_url_cache[(resource.id, host.host)] = Resource.http_url(resource, host)
        return url

    @common.patterns.singleton_property
    def http(self):
        """
        Mapping from resource ID to its HTTP URLs (alive only)
        :rtype: Dict[int, list]
        """
        return self._sources(make_source_func=self.http_url_with_cache)

    @common.patterns.singleton_property
    def http_verbose(self):
        return self._sources(make_source_func=lambda resource, host: {
            "url": Resource.http_url(resource, host),
            "host": host.host,
            "storage": host.storage,
            "alive": host.alive,
            "state": ctr.HostState.OK,
        })

    def rsync_url_with_cache(self, resource, host=None):
        http_url = self._http_url_cache.get((resource.id, host.host))
        return Resource.rsync_url(resource, host=host, http_url=http_url)

    @common.patterns.singleton_property
    def rsync(self):
        """
        Mapping from resource ID to its rsync URLs (alive only)
        :rtype: Dict[int, list]
        """
        return self._sources(make_source_func=self.rsync_url_with_cache)

    @common.patterns.singleton_property
    def rsync_verbose(self):
        return self._sources(make_source_func=lambda resource, host: {
            "url": Resource.rsync_url(resource, host),
            "host": host.host,
            "storage": host.storage,
            "alive": host.alive,
            "state": ctr.HostState.OK,
        })

    @common.patterns.singleton_property
    def proxy(self):
        """
        Mapping from resource ID to its proxy URL
        :rtype: Dict[int, str]
        """
        proxy_config = common.config.Registry().client.fileserver.proxy

        if proxy_config.host:
            scheme, host = proxy_config.scheme.http, proxy_config.host
            return {rid: "{}://{}/{}".format(scheme, host, rid) for rid in self._resources}

        # Proxy is not configured: take any http source.
        http_links = self._sources(make_source_func=Resource.http_url, limit=1, alive_only=False)

        proxies = {}
        for rid, urls in six.iteritems(http_links):
            proxies[rid] = urls[0] if urls else ""

        return proxies

    @common.patterns.singleton_property
    def _clients(self):
        """
        Mapping from hostname to `Client` for all resources.
        :rtype: Dict[str, Client]
        """
        all_hosts = set(itertools.chain.from_iterable(self.all_sources.itervalues()))

        dead_timeout = common.config.Registry().server.web.mark_client_as_dead_after
        dead_threshold = datetime.datetime.utcnow() - datetime.timedelta(seconds=dead_timeout)

        clients = {}

        query = mapping.Client.objects(hostname__in=all_hosts).fast_scalar("hostname", "tags", "context", "updated")
        for hostname, tags, context, updated in query:
            tags = tags or []  # I guess it can be None for some reason. See comments for SANDBOX-7046.
            clients[hostname] = self.Client(
                host=hostname,
                tags=tags,
                storage=ctc.Tag.STORAGE in tags,
                info=six.moves.cPickle.loads(context),
                alive=(updated > dead_threshold),
            )

        return clients

    @common.patterns.singleton_property
    def _filtered_sources(self):
        clients = self._clients
        sources = self.all_sources.copy()

        def host_sort_key(host):
            # storages come first
            primary = -1 if clients[host].storage else 1
            secondary = random.random() if self._shuffle else host
            return (primary, secondary)

        for rid in sources:
            # Remove sources that are not currently in Sandbox
            sandbox_hosts = six.moves.filter(clients.__contains__, sources[rid])
            # Group hosts: storages first, then clients. Shuffle hosts within each group.
            sources[rid] = sorted(sandbox_hosts, key=host_sort_key)

        return sources

    def _sources(self, make_source_func, limit=None, alive_only=True):
        """
        Generic method returning a mapping from resource ID to a list of its sources.
        The type of sources (http/rsync/hostname/etc) depends on the `make_source_func` function.

        :param make_source_func: function that returns source string
                                 from the given `mapping.Resource` and `Client` objects
        :param limit: the maximum number of sources for each resource
        :param alive_only: return alive clients only
        :rtype: Dict[int, list]
        """
        sources = {}

        for rid, hosts in six.iteritems(self._filtered_sources):
            resource = self._resources[rid]
            sources[rid] = urls = []

            if limit is not None:
                hosts = itertools.islice(hosts, limit)

            for host in hosts:
                client = self._clients[host]
                if alive_only and not client.alive:
                    continue

                url = make_source_func(resource, host=client)
                urls.append(url)

        return sources
