#!/home/zomb-sandbox/venv/bin/python

from __future__ import print_function

import six
import sys
import argparse
import collections

sys.path = filter(
    lambda p: any(map(p.startswith, ("/skynet", "/home/zomb-sandbox"))),
    sys.path
)

from sandbox.common import fs as common_fs
from sandbox.common import rest as common_rest
from sandbox.common import config as common_config
from sandbox.common import format as common_format
from sandbox.common import console as common_console
from sandbox.common import patterns as common_patterns
from sandbox.common import itertools as common_itertools

from sandbox.yasandbox.database import mapping


SANDBOX_SERVICE_ID = 469


@six.add_metaclass(common_patterns.ThreadSafeSingletonMeta)
class Abc(object):

    _CHUNK_SIZE = 30

    def __init__(self):
        self._slug_to_service = {}

    @common_patterns.singleton_classproperty
    def client(cls):
        config = common_config.Registry()
        token = common_fs.read_settings_value_from_file(
            config.server.auth.oauth.token, ignore_file_existence=True
        )
        return common_rest.Client(config.server.services.group_synchronizer.abc_api_url, token)

    def init_services(self, slugs):
        not_found = [_ for _ in slugs if _ not in self._slug_to_service]
        if not_found:
            for chunk in common_itertools.chunker(slugs, self._CHUNK_SIZE):
                resp = self.client.services["/"].read(fields="id,slug", slug__in=",".join(chunk))
                assert not resp["next"]
                for service in resp["results"]:
                    self._slug_to_service[service["slug"].lower()] = service

    def slug_to_id(self, slug):
        slug = slug.lower()
        self.init_services([slug])

        if slug not in self._slug_to_service:
            raise ValueError("Unknown service '{}'".format(slug))
        return self._slug_to_service[slug]["id"]


@six.add_metaclass(common_patterns.SingletonMeta)
class S3Buckets(object):

    def __init__(self):
        self._bucket_info = {}
        for bucket in self._buckets(SANDBOX_SERVICE_ID):
            self._bucket_info[bucket["name"]] = bucket

    @common_patterns.singleton
    def _buckets(self, service_id):
        mds_client = common_rest.Client("https://s3-idm.mds.yandex.net")

        resp = mds_client.stats.buckets.read(service_id=service_id)
        buckets = resp["results"]
        while resp["next"]:
            resp = mds_client.stats.buckets.read(service_id=service_id, cursor=resp["next"])
            buckets.extend(resp["results"])

        return buckets

    def bucket_info(self, bucket_name):
        return self._bucket_info.get(bucket_name)


class Bucket(object):

    def __init__(self, abc_name, sb_groups):
        self.abc_name = abc_name
        self.groups = sb_groups
        self.s3_info = None

        self.abc_id = Abc().slug_to_id(abc_name)
        self.s3_info = S3Buckets().bucket_info(self.s3_bucket_name)

        self._group_sizes_bytes = {}
        self._group_deleted_sizes_bytes = {}
        self._group_sizes_amount = {}

    @property
    def s3_bucket_name(self):
        return "sandbox-{}".format(self.abc_id)

    @property
    def s3_bucket_size(self):
        """ In bytes """
        if self.s3_info:
            return int(self.s3_info["max_size"])

    @property
    def total_size(self):
        return sum(self._group_sizes_bytes.values())

    @property
    def total_deleted_size(self):
        return sum(self._group_deleted_sizes_bytes.values())

    @property
    def total_amount(self):
        return sum(self._group_sizes_amount.values())

    def update_group(self, group, size_in_bytes, amount, deleted=False):
        if deleted:
            self._group_deleted_sizes_bytes[group] = size_in_bytes
        else:
            self._group_sizes_bytes[group] = size_in_bytes
            self._group_sizes_amount[group] = amount

    @property
    def recommended_s3_bucket_size(self):
        return max(self.total_size * 10 / 8, 64 << 30)  # 256 GiB ought to be enough for anybody


def get_buckets():
    abc2groups = collections.defaultdict(list)
    for group in mapping.Group.objects(abc__ne=None):
        abc2groups[group.abc].append(group.name)

    Abc().init_services(list(abc2groups))
    abc2bucket = {}
    for abc_name, groups in abc2groups.items():
        abc2bucket[abc_name] = Bucket(abc_name, groups)

    return abc2bucket.values()


def _calc_resources_max_storage(calc_for_all):
    all_buckets = get_buckets()

    groups_total = mapping.Group.objects().count()
    groups_with_abc = mapping.Group.objects(abc__ne=None).count()
    print(
        "There are {} groups within {} abc-services. Total group count: {}".format(
            groups_with_abc, len(all_buckets), groups_total
        )
    )

    no_s3_buckets = [_ for _ in all_buckets if not _.s3_info]
    print("There are {} services without buckets in S3".format(len(no_s3_buckets)))

    buckets = all_buckets if calc_for_all else no_s3_buckets

    pb = common_console.ProgressBar("Counting resources", maxval=len(buckets))
    for bucket in buckets:
        pipeline = [
            {"$match": {
                "state": {"$in": ["READY", "DELETED"]},
                "attrs.k": "backup_task",
                "owner": {"$in": bucket.groups},
            }},
            {"$group": {
                "_id": {"owner": "$owner", "state": "$state"},
                "size": {"$sum": "$size"},  # KiB
                "amount": {"$sum": 1},
            }},
        ]
        group_sizes = mapping.Resource.aggregate(pipeline)
        for info in group_sizes:
            bucket.update_group(
                info["_id"]["owner"],
                int(info["size"]) << 10,
                info["amount"],
                deleted=info["_id"]["state"] == "DELETED",
            )
        pb.add(1)
    pb.finish()

    buckets.sort(key=lambda b: b.total_size, reverse=True)
    return buckets


def print_buckets(buckets):
    data = [
        [
            "Bucket",
            "Total size",
            "Total DELETED size",
            "Resources amount",
            "S3-bucket size",
            "Recommended S3-bucket size",
            "Sandbox groups",
        ],
        None,
    ]

    def prettify_size(size):
        if size is None:
            return "-"
        return common_format.size2str(size)

    total_size = 0
    total_deleted_size = 0
    total_amount = 0
    total_s3_size = 0
    total_s3_recommended = 0
    for bucket in buckets:
        total_size += bucket.total_size
        total_deleted_size += bucket.total_deleted_size
        total_amount += bucket.total_amount
        total_s3_size += bucket.s3_bucket_size or 0
        s3_recommended = bucket.recommended_s3_bucket_size
        total_s3_recommended += s3_recommended

        data.append([
            bucket.s3_bucket_name,
            prettify_size(bucket.total_size),
            prettify_size(bucket.total_deleted_size),
            bucket.total_amount,
            prettify_size(bucket.s3_bucket_size),
            prettify_size(s3_recommended),
            " ".join(bucket.groups),
        ])

    data.extend([
        None,
        [
            "Total:",
            prettify_size(total_size),
            prettify_size(total_deleted_size),
            total_amount,
            prettify_size(total_s3_size),
            prettify_size(total_s3_recommended),
            "",
        ]
    ])
    common_format.print_table(data)


def _args_to_set_recommended_quota(bucket):
    total_gib = bucket.recommended_s3_bucket_size >> 30
    return [
        "--bucket", bucket.s3_bucket_name,
        "--measure", "G",
        "--max-size", str(total_gib),
    ]


def _print_create_s3_bucket_actions(buckets, limit):
    no_buckets = [_ for _ in buckets if _.s3_bucket_size is None]
    if no_buckets:
        print("There are {} services without buckets".format(len(no_buckets)))
        to_create = min(limit, len(no_buckets))
        print("Create {} of them with following commands:".format(to_create))
        for bucket in no_buckets[:to_create]:
            args = ["./excavator", "mds", "create-bucket"] + _args_to_set_recommended_quota(bucket)
            print(" ".join(args))
    else:
        print("All services have S3 buckets")


def _print_update_s3_quota_actions(buckets, limit):
    lack_of_quota = [_ for _ in buckets if _.s3_bucket_size is not None and _.total_size > _.s3_bucket_size]
    if lack_of_quota:
        print("\nThere are {} services with lack of S3 quota".format(len(lack_of_quota)))
        to_update = min(limit, len(lack_of_quota))
        print("Update quota for {} of them with following commands:".format(to_update))
        for bucket in lack_of_quota[:to_update]:
            args = ["./excavator", "mds", "set-quota"] + _args_to_set_recommended_quota(bucket)
            print(" ".join(args))


def _print_groups_to_force_upload(buckets, limit):
    all_groups = sum([_.groups for _ in buckets], [])
    no_transfer = set(mapping.Group.objects(name__in=all_groups, mds_transfer_resources__ne=True).scalar("name"))
    print("\nThere are {} groups with mds_transfer_resources=False".format(len(no_transfer)))

    limit = min(limit, len(no_transfer))
    groups = []
    for bucket in buckets:
        for group in bucket.groups:
            if group in no_transfer:
                groups.append(group)
                if len(groups) >= limit:
                    break
        if len(groups) >= limit:
            break

    print("First {} groups to force resources transfer:\n{}".format(len(groups), " ".join(groups)))


def main():
    parser = argparse.ArgumentParser(
        description="Count resource sizes for groups"
    )
    parser.add_argument(
        "--all", action="store_true", help="Look for groups without buckets too",
    )
    parser.add_argument(
        "-n", metavar="N", type=int, help="Show groups only for top N abc-services. 0 means 'show all'",
    )
    parser.add_argument(
        "--show-actions", default=0, type=int, metavar="N", help="Print next N actions to do",
    )

    args = parser.parse_args()

    mapping.ensure_connection()
    buckets = all_buckets = _calc_resources_max_storage(args.all)

    if args.n and args.n < len(all_buckets):
        buckets = all_buckets[:args.n]
    print_buckets(buckets)

    if args.show_actions:
        _print_create_s3_bucket_actions(buckets, args.show_actions)
        _print_update_s3_quota_actions(buckets, args.show_actions)
        _print_groups_to_force_upload(buckets, args.show_actions)


if __name__ == "__main__":
    main()
