#!/home/dldmitry/walle_venv/bin/python
from __future__ import print_function

import os
import json
from collections import defaultdict
from analyze import base_images, infra_resource_names, datacenter

USE_INFRA_CACHE = True

service_quota = defaultdict(lambda: defaultdict(list))


def fix_storage_class(storage_class):
    if storage_class == "nvme":
        return "ssd"
    else:
        return storage_class


def process_spec(spec):
    hostname = spec["hostname"]
    resources = spec["resources"]
    if not resources:
        return

    container_use_our_base_layer = set()
    for resource in resources:
        resource_uuid = resource["uuid"]
        if not resource_uuid or resource["type"] != "layer" or resource["container_fqdn"] == hostname:
            continue
        if resource_uuid.strip("layer_rbtorrent_") in base_images:
            container_use_our_base_layer.add(resource["container_fqdn"])

    service_map = {}
    storage_class_set = {fix_storage_class(capacity_spec["storage_class"]) for capacity_spec in spec["capacities"]}
    per_pod_resource_cache = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
    for resource in resources:
        storage_class = fix_storage_class(resource["storage_class"])
        if not resource["resource_capacity"]:
            continue

        deploy_engine = resource["deploy_engine"]
        service_id = resource["service_id"]
        resource_uuid = resource["uuid"]
        if not resource_uuid or not deploy_engine or not service_id:
            continue

        resource_name = resource.get("resource_name")
        container_fqdn = resource["container_fqdn"] or hostname
        has_shared_resource = bool(resource_name and resource_name in infra_resource_names)
        has_shared_layer = bool(resource["type"] in ("layer", "unpacked_layer") and container_fqdn in container_use_our_base_layer)
        if USE_INFRA_CACHE and (has_shared_resource or has_shared_layer):
            container_fqdn = hostname
            storage_class = "hdd" if "hdd" in storage_class_set else "ssd"
        if container_fqdn != hostname:
            per_pod_resource_cache[storage_class][container_fqdn][resource_uuid].append(resource)
            service_map[container_fqdn] = (deploy_engine, service_id)

    for storage_class, resource_cache in per_pod_resource_cache.items():
        for container_fqdn, pod_resource_cache in resource_cache.items():
            if container_fqdn == hostname:
                continue
            per_pod_cache_size = 0
            for pod_resource_list in pod_resource_cache.values():
                per_pod_cache_size += pod_resource_list[0]["resource_capacity"]
            if per_pod_cache_size >= 1024 * 1024:
                service_quota[service_map[container_fqdn]][storage_class].append(per_pod_cache_size)


def compute_quota():
    quota_total = defaultdict(int)
    for (deploy_engine, service_id), storages in sorted(service_quota.items()):
        for storage_class, cache_size_list in storages.items():
            quota_size = int(1.5 * sum(cache_size_list))
            quota_total[storage_class] += quota_size
            print("{}\t{}\t{}\t{}".format(deploy_engine, service_id, storage_class, quota_size))
    for storage_class, total in quota_total.items():
        print("total, TB: {} {}".format(storage_class, total / 1024. / 1024. / 1024. / 1024))


def main():
    root_path = "data"
    for name in os.listdir(root_path):
        if datacenter not in name:
            continue
        spec = json.load(open("{}/{}".format(root_path, name)))
        process_spec(spec)
    compute_quota()


if __name__ == "__main__":
    main()

