from __future__ import absolute_import, print_function, division

import argparse
import msgpack
import os
import json
import datetime
import shlex
import time
import subprocess
from threading import Timer
from collections import namedtuple

from pprint import pprint


REPORT_VERSION = 1

CommandResult = namedtuple('CommandResult', ['returncode', 'out', 'err', 'has_timeout', 'elapsed'])


def kill_process(process, dto):
    """
    timed out recipe from
      https://stackoverflow.com/questions/1191374/using-module-subprocess-with-timeout/10768774#10768774
    """
    dto["value"] = True
    process.kill()


def run_command(args, lines=False, timeout_sec=60, exception_on_timeout=True, update_env=None):
    if type(args) == str:
        args = shlex.split(args)

    cmdline = " ".join(args)

    started_time = time.time()

    new_env = os.environ.copy()
    if update_env:
        new_env.update(**update_env)

    proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=new_env)

    timeout_dto = {"value": False}
    timer = Timer(timeout_sec, kill_process, [proc, timeout_dto])

    timer.start()
    out, err = proc.communicate()
    timer.cancel()

    if lines:
        out = filter(None, out.splitlines())
        err = filter(None, err.splitlines())

    elapsed_time = time.time() - started_time

    if exception_on_timeout and timeout_dto["value"]:
        raise Exception("got timeout (%r sec) on [%s]" % (timeout_sec, cmdline))

    return CommandResult(returncode=proc.returncode, out=out, err=err, has_timeout=timeout_dto["value"],
                         elapsed=elapsed_time)


def parse_size(size_str):
    if size_str.endswith("G"):
        return int(float(size_str[:-1]) * 1024**3)
    elif size_str.endswith("M"):
        return int(float(size_str[:-1]) * 1024**2)
    elif size_str.endswith("K"):
        return int(float(size_str[:-1]) * 1024)
    else:
        return 0


def parse_du_result(data, top_users_count, user_top_dirs_count):
    users_to_home = {}
    for s in data:
        line = s.strip()
        ss = line.split("\t")
        # print(ss)
        if len(ss) == 2:
            size, path = ss
            if path == '/home/':
                continue
            path_parts = path.strip().split('/')
            if len(path_parts) < 3:
                continue
            user = path_parts[2]
            if user not in users_to_home:
                users_to_home[user] = [0, []]
            if len(path_parts) == 3:
                # example: 24    /home/ubuntu
                users_to_home[user][0] = parse_size(size)
                # print('user: {}, size: {}'.format(user, int(size) // 1024**3))
            if len(path_parts) > 3:
                # example: 24    /home/ubuntu/any
                users_to_home[user][1].append((parse_size(size), path_parts[-1]))

    users_home_sorted = sorted([(u, s, d) for u, (s, d) in users_to_home.iteritems()],
                               key=lambda o: o[1], reverse=True)
    if len(users_home_sorted) > top_users_count:
        users_home_sorted = users_home_sorted[:top_users_count]

    result = {}

    for user, total_size, dirs in users_home_sorted:
        top_dirs = sorted(dirs, reverse=True)
        if len(top_dirs) > user_top_dirs_count:
            top_dirs = top_dirs[:user_top_dirs_count]
        result[user] = {
            'total_size': total_size,
            'top_dir_sizes': {p: s for s, p in top_dirs}
        }

    return result


def get_du_top(top_users_count, user_top_dirs_count, du_result_file=None, timeout_sec=60*10):
    data = None
    if du_result_file:
        if not os.path.exists(du_result_file):
            raise Exception('du_result_file: "{}" passed, but does not exists.'.format(du_result_file))
        with open(du_result_file) as fp:
            data = fp.readlines()
    else:
        res = run_command('du -d 2 -h /home/', lines=True, timeout_sec=timeout_sec)
        data = res.out
    return parse_du_result(data, top_users_count, user_top_dirs_count)


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--format', choices=('pretty', 'msgpack'), default='pretty')
    parser.add_argument('--timeout', type=int, default=60*10)
    parser.add_argument('--du-result-file', default="")
    parser.add_argument('--top-users-count', type=int, default=5)
    parser.add_argument('--user-top-dirs-count', type=int, default=5)
    return parser.parse_args()


def main():
    args = parse_args()
    result = {
        'report_version': REPORT_VERSION,
        'du_top': get_du_top(
            top_users_count=args.top_users_count,
            user_top_dirs_count=args.user_top_dirs_count,
            du_result_file=args.du_result_file,
            timeout_sec=args.timeout
        )
    }

    if args.format == 'pretty':
        pprint(result)
    if args.format == 'msgpack':
        print(msgpack.packb(result))


if __name__ == '__main__':
    main()
