#!/usr/bin/env python

from __future__ import print_function

import argparse
import collections
import datetime as dt

import numpy
import requests
import progressbar as pb

from sandbox import common
import sandbox.common.types.client as ctc
import sandbox.common.types.resource as ctr

import sandbox.sdk2

import sandbox.yasandbox.manager
import sandbox.yasandbox.database.mapping as mp


def _storages():
    cm = sandbox.yasandbox.manager.client_manager
    return sorted(
        [c.hostname for c in cm.list() if ctc.Tag.STORAGE in c.tags],
        key=lambda _: (len(_), _)
    )


def per_storage(__):
    hdr = [
        "Host",
        "DC",
        "Total Amt",
        "Pure Amt",
        "Unique Amt",
        "Trash Amt",
        "Total Size",
        "Pure Size",
        "Unique Size",
        "Trash Size",
        "Total Disk",
        "Used Disk",
        "Free Disk",
        "Available*",
        "Delta"
    ]
    data = [hdr, None]
    hosts = _storages()
    bcz = common.console.AnsiColorizer()
    cz = common.console.AnsiColorizer(False)
    colorizer = [[bcz.white] * len(hdr), None]
    palette = [cz.green, cz.cyan, cz.yellow, cz.white]

    dcs = []
    pbar = pb.ProgressBar(
        widgets=["Fetching Data: ", pb.Bar(), " ", pb.Percentage(), " ", pb.Timer(), " ", pb.ETA()],
        max_value=len(hosts) * 3
    ).start()
    for i, host in enumerate(hosts):
        pbar.update(i * 3)
        resources = mp.Resource.host_resources_stats(host)

        pbar.update(i * 3 + 1)
        rs = sandbox.yasandbox.manager.resource_manager.storage_insufficient_redundancy(host)

        pbar.update(i * 3 + 2)
        c = sandbox.yasandbox.manager.client_manager.load(host)

        total_disk = c.info["system"]["total_space"] << 20
        free_disk = c.info["system"]["free_space"] << 20
        total_amount = sum(v[0] for v in resources.itervalues())
        pure_amout = sum(v[0] for k, v in resources.iteritems() if k == ctr.State.READY)
        total_size = int(sum(v[1] for v in resources.itervalues()) * 1024)
        pure_size = int(sum(
            v[1] for k, v in resources.iteritems() if k == ctr.State.READY) * 1024
        )
        available = free_disk + total_size - pure_size

        dc = c.info["system"]["dc"]
        if dc not in dcs:
            dcs.append(dc)
        colorizer.append(
            [bcz.green if c.is_alive() else bcz.magenta, palette[dcs.index(dc) % len(palette)]] +
            [None] * (len(hdr) - 2)
        )
        pf = free_disk * 100 / total_size if total_size else 100
        if pf < 4:
            colorizer[-1][12] = bcz.magenta
        elif pf < 8:
            colorizer[-1][12] = cz.yellow
        else:
            colorizer[-1][12] = cz.cyan

        pf = available * 100 / total_size if total_size else 100
        if pf < 4:
            colorizer[-1][13] = bcz.magenta
        elif pf < 8:
            colorizer[-1][13] = cz.yellow
        else:
            colorizer[-1][13] = cz.cyan

        row = [
            host,
            dc,
            total_amount,
            pure_amout,
            len(rs),
            total_amount - pure_amout,
            total_size,
            pure_size,
            sum(r[-1] for r in rs) << 10,
            total_size - pure_size,
            total_disk,
            total_disk - free_disk,
            free_disk,
            available,
            total_size - total_disk + free_disk
        ]
        data.append(row)
    pbar.finish()

    uniques, trashes = map(numpy.array, ([_[8] for _ in data[2:] if _], [_[9] for _ in data[2:] if _]))
    tp1, tp2 = [numpy.percentile(trashes, _) for _ in(20, 33)]
    for no in xrange(2, len(hosts) + 2):
        v = data[no][8]
        if v <= 300 << 30:
            colorizer[no][8] = cz.cyan
        elif v <= 600 << 30:
            colorizer[no][8] = cz.yellow
        else:
            colorizer[no][8] = bcz.magenta

        v = data[no][9]
        if v <= tp1:
            colorizer[no][9] = bcz.magenta
        elif v <= tp2:
            colorizer[no][9] = cz.yellow
        else:
            colorizer[no][9] = cz.cyan

    data += [
        None,
        ["Grand Total", len(set(row[1] for row in data[2:]))] +
        [sum(row[2 + i] for row in data[2:]) for i in xrange(len(data[0]) - 2)]
    ]
    colorizer += [None, [bcz.white] * len(hdr)]

    for no, row in enumerate(data):
        if no > 1:
            for i in xrange(4, len(hdr) - 2) if row else []:
                row[2 + i] = common.utils.size2str(row[2 + i])
        for i in xrange(len(hdr)):
            if row:
                c = colorizer[no][i]
                row[i] = c(row[i]) if c else row[i]
    common.utils.print_table(data)


def top(args):
    group_by = args.groupby.lower() if not args.owner else "type"
    order_by = args.orderby.lower()
    data = [
        [group_by.capitalize(), "Total", None],
        ["", "Size", "Amount"]
    ]
    if args.mds:
        data[0].extend(["MDS"] + [None] * 3)
        data.insert(1, [""] * 3 + ["Expected", None, "Actual", None])
        data[2].extend(["Size", "Amount", "Size", "Amount"])
    data.append(None)
    head = len(data)

    since = dt.datetime.strptime(args.since, "%Y-%m-%d")
    to = dt.datetime.strptime(args.to, "%Y-%m-%d")
    rows = args.rows
    print("Storage resources statistics for period from {} to {}".format(
        common.utils.dt2str(since), common.utils.dt2str(to)
    ))

    if not args.host:
        hosts = _storages()
        state = ctr.State.READY
    else:
        hosts = [args.host]
        state = None
    query = mp.Resource.storage_top_usage(
        hosts, since, to, args.immortal or args.mds, group_by, order_by, args.owner, state
    )
    for no, row in enumerate(query):
        row = list(row)
        if args.mds:
            mds = list(mp.Resource.objects(**{
                "attributes": mp.Resource.Attribute(key="ttl", value="inf"),
                "state": ctr.State.READY,
                "time__created__gt": since,
                "time__created__lte": to,
                group_by: row[0],
            }).scalar("id", "size", "attributes"))
            row.extend([sum(_[1] for _ in mds), len(mds)])
            mds = [r for r in mds if any(attr.key == "mds" for attr in r[2])]
            row.extend([sum(_[1] for _ in mds), len(mds)])
        data.append(row)
        if no + 1 == rows:
            break

    data += [
        None,
        ["Grand Total"] + [sum(row[1 + i] for row in data[head:]) for i in xrange(len(data[0]) - 1)]
    ]
    size_columns = [1, 3, 5] if args.mds else [1]
    for row in data[head:]:
        for c in size_columns if row else []:
            row[c] = common.utils.size2str(row[c] << 10)
    common.utils.print_table(data)

    if args.mds:
        r = requests.get(
            "{}/statistics-{}".format(common.config.Registry().common.mds.dl.url, ctr.DEFAULT_MDS_NAMESPACE)
        )
        r.raise_for_status()
        r = r.json()
        print("MDS statistics: {} out of {} free.".format(
            common.utils.size2str(r["free_space"]), common.utils.size2str(r["total_space"])
        ))


def _get_weak_backuped_resources(host, dc_hosts, not_dc_hosts, exclude_types):
    resource_manager = sandbox.yasandbox.manager.resource_manager
    ids = resource_manager.backuped_resources(
        on_hosts=[host], not_on_hosts=not_dc_hosts, exclude_types=exclude_types,
    ).scalar("id")
    dc_other_hosts = list(set(dc_hosts) - {host})
    return resource_manager.backuped_resources(ids=ids, on_hosts=dc_other_hosts)


def _purge_weak_backups(args, host, dc_hosts, not_dc_hosts, exclude_types):
    resource_manager = sandbox.yasandbox.manager.resource_manager
    with common.console.LongOperation("Counting resources to update") as long_op:
        resources = _get_weak_backuped_resources(host, dc_hosts, not_dc_hosts, exclude_types)
        resource_count = resources.count()
        long_op.intermediate("There are {} resources to update".format(resource_count))

    no_copy_resource_ids = []
    progress_bar = common.console.ProgressBar("Remove resources from {}".format(host), maxval=resource_count)
    for resource in resources.order_by("-id"):
        if len(resource.hosts_states) < 2:
            no_copy_resource_ids.append(resource.id)
        elif not args.dry_run:
            resource_manager.remove_host(resource.id, host)
        progress_bar.add(1)
    progress_bar.finish()
    if no_copy_resource_ids:
        print("Resource ids with no copy on other hosts:", ", ".join(map(str, no_copy_resource_ids)))


def weak_backup(args):
    data = [["DC", "Storage", "Weak backups", "Backups"], None]

    hosts = _storages()
    clients_info = map(sandbox.yasandbox.manager.client_manager.load, hosts)

    dc2hosts = collections.defaultdict(list)
    host2dc = {}
    for host, client in zip(hosts, clients_info):
        dc = client.info["system"].get("dc", "unknown")
        dc2hosts[dc].append(host)
        host2dc[host] = dc

    common.projects_handler.load_project_types(reuse=True)
    resource_manager = sandbox.yasandbox.manager.resource_manager
    exclude_types = [rt.name for rt in sandbox.sdk2.Resource if rt.service]

    if args.purge:
        host = args.purge
        dc_hosts = dc2hosts[host2dc[host]]
        not_dc_hosts = list(set(hosts) - set(dc_hosts))
        _purge_weak_backups(args, host, dc_hosts, not_dc_hosts, exclude_types)
    else:
        total_weak_backup_count = 0
        progress_bar = common.console.ProgressBar("Counting backups", maxval=len(hosts) * 2)
        for dc, dc_hosts in dc2hosts.iteritems():
            not_dc_hosts = list(set(hosts) - set(dc_hosts))

            dc_label = dc
            for host in dc_hosts:
                backup_count = resource_manager.backuped_resources(on_hosts=[host], exclude_types=exclude_types).count()
                progress_bar.add(1)

                weak_backup_count = _get_weak_backuped_resources(host, dc_hosts, not_dc_hosts, exclude_types).count()
                progress_bar.add(1)
                total_weak_backup_count += weak_backup_count

                data.append([dc_label, host, weak_backup_count, backup_count])
                dc_label = ""
            data.append(None)
        progress_bar.finish()

        data.append(["Total", len(hosts), total_weak_backup_count, ""])
        common.utils.print_table(data)


def main():
    sandbox.yasandbox.manager.use_locally()
    mp.ensure_connection()

    # Create the top-level parser.
    parser = argparse.ArgumentParser(
        formatter_class=lambda *args, **kwargs: argparse.ArgumentDefaultsHelpFormatter(*args, width=120, **kwargs),
        description="Storage resources statistics calculation tool."
    )
    subparsers = parser.add_subparsers(help="operational mode")

    # Create "usage" sub-parser.
    subparser = subparsers.add_parser("usage", help="show per storage host usage")
    subparser.set_defaults(func=per_storage)

    # Create "top" sub-parser.
    subparser = subparsers.add_parser("top", help="show top N resources with biggest storage space consumption")
    subparser.add_argument(
        "--groupby", "-g",
        choices=("type", "owner"), default="type", help="group the table by given field"
    )
    subparser.add_argument(
        "--orderby", "-o",
        choices=("size", "amount"), default="size", help="order group the table by given field"
    )
    now = dt.datetime.utcnow()
    subparser.add_argument(
        "--since", "-s",
        default=common.utils.dt2str(now - dt.timedelta(days=30)).split(" ")[0],
        help="resource creation UTC date in ISO 8601 format to check period from"
    )
    subparser.add_argument(
        "--to", "-t",
        default=common.utils.dt2str(now + dt.timedelta(hours=23, minutes=59)).split(" ")[0],
        help="resource creation UTC date in ISO 8601 format to check period to"
    )
    subparser.add_argument(
        "--immortal", "-i", action="store_true",
        help="query resources only with infinite TTL"
    )
    subparser.add_argument(
        "--mds", "-m", action="store_true",
        help="show statistics related to MDS"
    )
    subparser.add_argument("--rows", "-r", default=25, type=int, help="amount of rows to show")
    subparser.add_argument(
        "--owner", "-O", type=str,
        help="show statistics by resource type for given owner (forces group by type)"
    )
    subparser.add_argument(
        "host",
        metavar="HOST", nargs="?", help="host to count the statistics; counts over all storage hosts if not specified"
    )
    subparser.set_defaults(func=top)

    # Create "weak_backup" subparser
    subparser = subparsers.add_parser("weak", help="show resources with weak backups (in one DC)")
    subparser.set_defaults(func=weak_backup)
    subparser.add_argument(
        "--purge", metavar="HOST", default=None, type=str, choices=_storages(),
        help="host to purge: remove resources that are backuped here",
    )
    subparser.add_argument("--dry_run", action="store_true", help="do not perform updates")

    # Parse the args and call whatever function was selected.
    args = parser.parse_args()
    args.func(args)


if __name__ == "__main__":
    main()
