import collections
import itertools
import logging
import time

_log = logging.getLogger(__name__)

ResourceSpec = collections.namedtuple('ResourceSpec', ['resource', 'size', 'replication', 'parts_count'])
HostSpec = collections.namedtuple('HostSpec', ['host', 'freespace', 'rack'])


class NoAvailableHosts(RuntimeError):
    pass


def _is_planned(mapping, spec):
    return len(mapping[spec.resource]) >= spec.replication


class ShardedStack(object):
    def __init__(self):
        self.dict = collections.defaultdict(lambda: [])
        self.to_remove = []
        self.index = 0

    def append(self, res_spec):
        self.dict[resource_group(res_spec)].append((self.index, res_spec))
        self.index += 1

    def clear(self):
        for group in self.to_remove:
            if group in self.dict and len(self.dict[group]) == 0:
                del self.dict[group]
        self.to_remove = []

    def iterate(self, banned_groups, mapping):
        self.clear()
        result = []
        for res_group, resources in self.dict.items():
            if res_group not in banned_groups:
                while resources and _is_planned(mapping, resources[-1][1]):
                    resources.pop()
                if resources:
                    result.append(resources[-1])
                else:
                    self.to_remove.append(res_group)
        result.sort(key=lambda x: x[0], reverse=True)
        return (x[1] for x in result)


def dump_mapping(mapping):
    dump_path = None
    if dump_path:
        resources = []
        for resource, hosts in mapping.items():
            for host in hosts:
                resources.append((host, resource))

        with open(dump_path + "/mapping", "w") as f:
            for host, resource in sorted(resources, key=lambda r : r[0]):
                print >>f, host, resource.shard_number, resource.path


def consistent_mapping1(resources_specs, hosts_specs, hard_limit_multiplier, max_shard_parts_on_host, rack_antiaffinity, hash_function):
    start_time = time.time()
    order = []
    freespace = collections.defaultdict(int)

    for spec in resources_specs:
        order.append((resource_hash(spec, spec.parts_count, hash_function), True, spec))
    for spec in hosts_specs:
        order.append((host_hash(spec, hash_function), False, spec))
        freespace[spec.host] = spec.freespace
    order.sort(key=lambda x: x[:2])

    banned_groups = collections.defaultdict(set)
    shard_quotas = collections.defaultdict(lambda: max_shard_parts_on_host)
    resources_on_host = collections.defaultdict(lambda: 0)
    hard_resources_limit = (len(resources_specs) / len(hosts_specs) + 1) * hard_limit_multiplier

    def _key(item):
        if rack_antiaffinity:
            return item.rack
        else:
            return item.host

    mapping = collections.defaultdict(set)
    queue = ShardedStack()
    hosts = [tup for tup in order if not tup[1]]
    for _hash, is_resource, item in itertools.chain(order, hosts):
        if is_resource:
            if not _is_planned(mapping, item):
                for _ in xrange(item.replication - len(mapping[item.resource])):
                    queue.append(item)
        else:
            for res_spec in queue.iterate(banned_groups[_key(item)], mapping):
                shard = resource_shard(res_spec)
                if _is_planned(mapping, res_spec):
                    continue
                if shard_quotas[(item.host, shard)] <= 0:
                    continue
                if freespace[item.host] < res_spec.size:
                    break
                if resources_on_host[item.host] >= hard_resources_limit:
                    break

                group = resource_group(res_spec)
                assert group not in banned_groups[_key(item)]

                freespace[item.host] -= res_spec.size
                banned_groups[_key(item)].add(group)
                shard_quotas[(item.host, shard)] -= 1
                resources_on_host[item.host] += 1

                mapping[res_spec.resource].add(item.host)

    not_fitted = 0
    not_fitted_replicas = 0
    not_fitted_sample = None
    for spec in resources_specs:
        if not _is_planned(mapping, spec):
            not_fitted += 1
        if len(mapping[spec.resource]) < spec.replication:
            replicas = spec.replication - len(mapping[spec.resource])
            not_fitted_replicas += replicas
            not_fitted_sample = spec.size

    if not_fitted > 0 or not_fitted_replicas > 0:
        _log.error("can't fit %d resources, %d replicas", not_fitted, not_fitted_replicas)
        for _, is_resource, item in order:
            if not is_resource:
                if freespace[item.host] > not_fitted_sample:
                    _log.debug("Host %s not empty. Left space %d. Resources on host %d/%d", item.host, freespace[item.host], resources_on_host[item.host], hard_resources_limit)

        raise NoAvailableHosts()

    finish_time = time.time()
    secs = int(finish_time - start_time)
    _log.debug("planning took %.2d:%.2d", secs / 60, secs % 60)

    dump_mapping(mapping)

    return mapping


def consistent_mapping2(resources_specs, hosts_specs, hard_limit_multiplier=2.0, max_shard_parts_on_host=2, rack_antiaffinity=True, hash_function=hash):
    _log.debug("use consistent_mapping2")

    start_time = time.time()
    order = []
    freespace = collections.defaultdict(int)

    max_freespace = 0
    for spec in hosts_specs:
        max_freespace = max(max_freespace, spec.freespace)

    min_resource_size = max_freespace + 1

    for spec in resources_specs:
        order.append((resource_hash(spec, spec.parts_count, hash_function), True, spec))
        min_resource_size = min(min_resource_size, spec.size)

    host_multiplier = 3000  # map 1Tb to host_multiplier points

    for spec in hosts_specs:
        hash_value = host_hash(spec, hash_function)

        for _ in range(0, int(host_multiplier * spec.freespace / (1024*1024*1024*1024))):
            order.append((hash_value, False, spec))
            hash_value = hash_function(str(hash_value))

        freespace[spec.host] = spec.freespace

    order.sort(key=lambda x: x[:2])

    banned_groups = collections.defaultdict(set)
    shard_quotas = collections.defaultdict(lambda: max_shard_parts_on_host)
    resources_on_host = collections.defaultdict(lambda: 0)
    hard_resources_limit = (len(resources_specs) / len(hosts_specs) + 1) * hard_limit_multiplier

    def _key(item):
        if rack_antiaffinity:
            return item.rack
        else:
            return item.host

    mapping = collections.defaultdict(set)

    queue = collections.deque()

    hosts = [tup for tup in order if not tup[1]]
    for _hash, is_resource, item in itertools.chain(order, hosts):
        if is_resource:
            if not _is_planned(mapping, item):
                for _ in xrange(item.replication - len(mapping[item.resource])):
                    queue.appendleft(item)
        else:
            if freespace[item.host] < min_resource_size:
                continue
            if resources_on_host[item.host] >= hard_resources_limit:
                continue

            queue_next = collections.deque()

            for res_spec in queue:
                assert not _is_planned(mapping, res_spec)

                shard = resource_shard(res_spec)

                fits = True
                if shard_quotas[(item.host, shard)] <= 0:
                    fits = False
                if freespace[item.host] < res_spec.size:
                    fits = False
                if resources_on_host[item.host] >= hard_resources_limit:
                    fits = False

                group = resource_group(res_spec)
                if group in banned_groups[_key(item)]:
                    fits = False

                if fits:
                    freespace[item.host] -= res_spec.size
                    banned_groups[_key(item)].add(group)
                    shard_quotas[(item.host, shard)] -= 1
                    resources_on_host[item.host] += 1

                    mapping[res_spec.resource].add(item.host)
                else:
                    queue_next.append(res_spec)

            queue = queue_next

    not_fitted = 0
    not_fitted_replicas = 0
    not_fitted_sample = None
    for spec in resources_specs:
        if not _is_planned(mapping, spec):
            not_fitted += 1
        if len(mapping[spec.resource]) < spec.replication:
            replicas = spec.replication - len(mapping[spec.resource])
            not_fitted_replicas += replicas
            not_fitted_sample = spec.size

    if not_fitted > 0 or not_fitted_replicas > 0:
        _log.error("can't fit %d resources, %d replicas", not_fitted, not_fitted_replicas)
        for _, is_resource, item in order:
            if not is_resource:
                if freespace[item.host] > not_fitted_sample:
                    _log.debug("Host %s not empty. Left space %d. Resources on host %d/%d", item.host, freespace[item.host], resources_on_host[item.host], hard_resources_limit)

        raise NoAvailableHosts()

    finish_time = time.time()
    secs = int(finish_time - start_time)
    _log.debug("planning took %.2d:%.2d", secs / 60, secs % 60)

    dump_mapping(mapping)
    return mapping


def consistent_mapping(resources_specs, hosts_specs, hard_limit_multiplier=2.0, max_shard_parts_on_host=2, rack_antiaffinity=True, hash_function=hash, planner_version=1):
    if planner_version == 2:
        return consistent_mapping2(resources_specs, hosts_specs, hard_limit_multiplier, max_shard_parts_on_host, rack_antiaffinity, hash_function)
    else:
        return consistent_mapping1(resources_specs, hosts_specs, hard_limit_multiplier, max_shard_parts_on_host, rack_antiaffinity, hash_function)


def resource_shard(spec):
    return (spec.resource.tier.name, spec.resource.shard_number)


def resource_group(spec):
    chunk, _ = spec.resource.path.rsplit('/', 1)
    return '/'.join((spec.resource.tier.name, spec.resource.shard_number, chunk))


def host_hash(spec, hash_function):
    return hash_function(spec.host) % 2 ** 64


def resource_hash(spec, parts_per_chunk, hash_function):
    _, number = spec.resource.path.rsplit('/', 1)
    return (hash_function(resource_group(spec)) + int(number) * 2 ** 64 / parts_per_chunk) % 2 ** 64
