import os
import re
import json
import os.path
import requests
import logging
import time
import argparse

import msgpack

import porto

from common import _update_iss_instance_env, ISS_ILIST_URL, instance_major_tag, instance_minor_tag, instance_version, instance_group


VOLUMES_ROOT = '/place/db/iss3/volumes'  # root for all wolumes (RX-303)


class DU(object):
    def __init__(self, hdd=0, ssd=0):
        self.hdd = hdd
        self.ssd = ssd

    def __add__(self, other):
        return DU(self.hdd + other.hdd, self.ssd + other.ssd)

    def __iadd__(self, other):
        self.hdd += other.hdd
        self.ssd += other.ssd

        return self

    def total(self):
        return self.hdd + self.ssd


def is_on_ssd(path):
    if path.startswith('/ssd'):
        return True

    return False


def get_iss_instances_env():
    r = requests.get(ISS_ILIST_URL, timeout=10)
    r.raise_for_status()
    all_instances = r.json()

    result = {}
    for k in all_instances.keys():
        env = all_instances[k]

        try:
            env, instance_type = _update_iss_instance_env(env)

            host, _, port = k.partition(':')
            try:
                port = int(port)
            except ValueError:
                # qloud/yp ports
                pass
            result[(host.partition('.')[0], port)] = {
                "type": instance_type,
                "env": env,
            }
        except Exception:
            logging.exception("Exception while updating iss instance env")

    return result


def _get_dump_json(instance_dir):
    dump_json_path = os.path.join(instance_dir, 'dump.json')
    if not os.path.exists(dump_json_path):
        return None
    with open(dump_json_path) as dump_json:
        data = json.load(dump_json)
        return data


def get_file_usage(file_path, follow_symlink=False):
    try:
        if follow_symlink:
            file_path = os.path.realpath(file_path)
        stat = (os.lstat if not follow_symlink else os.stat)(file_path)
    except:
        return DU()

    try:
        usage = stat.st_blocks * 512
    except:
        usage = stat.st_size

    if is_on_ssd(file_path):
        return DU(ssd=usage)
    else:
        return DU(hdd=usage)


def get_disk_usage_if_exists_or_0(path):
    if not os.path.exists(path):
        return DU()

    return get_disk_usage(path)


def get_sum_usage(original_paths):
    unique_paths = list(set(map(os.path.realpath, original_paths)))
    return sum((get_disk_usage_if_exists_or_0(dir_) for dir_ in unique_paths), DU())


def get_disk_usage(path):
    """
    If path is symlink, it is automatically followed. However, child symlinks are NOT followed
    """
    path = str(path)
    if os.path.isdir(path):
        total = DU()
        total += get_file_usage(path)
        for dirpath, dirnames, filenames in os.walk(os.path.realpath(path)):
            for filename in filenames:
                total += get_file_usage(os.path.join(dirpath, filename))
            for dirname in dirnames:
                total += get_file_usage(os.path.join(dirpath, dirname))

        return total
    else:
        return get_file_usage(path, follow_symlink=True)


def get_resources_size(instance_dir, dump_json, resource_class):
    total = DU()

    for resource_name, resource_data in dump_json['resources'].iteritems():
        if resource_data['@class'] != resource_class:
            continue

        total += get_disk_usage(os.path.join(instance_dir, resource_name))

    return total


def get_instance_name(dump_json):
    properties = dump_json['properties']
    try:
        return properties['BSCONFIG_IHOST'] + ":" + properties['BSCONFIG_IPORT']
    except:
        return properties['BSCONFIG_INAME']


def get_callisto_usage(instance_dir, dump_json):
    instance_port = dump_json['properties']['BSCONFIG_IPORT']
    generation_conf_filepath = os.path.join(instance_dir, 'generation.conf')
    if not os.path.exists(generation_conf_filepath):
        return DU()
    with open(generation_conf_filepath) as generation_conf:
        callisto_generation = None
        for line in generation_conf:
            m = re.match('export GENERATION="(.*)"$', line)
            if m:
                callisto_generation = m.group(1)
                break
    if callisto_generation is None:
        return DU()

    if 'itag_copy_on_ssd' in dump_json['properties']['tags']:
        callisto_prefix = '/ssd'
    else:
        callisto_prefix = '/db/www'

    instance_callisto_dir = os.path.join(callisto_prefix, callisto_generation, str(instance_port))

    if os.path.exists(instance_callisto_dir):
        return get_disk_usage(instance_callisto_dir)
    else:
        return DU()


def get_webcache_usage(dump_json):
    instance_name = get_instance_name(dump_json)

    hdd_webcache_dir = os.path.join('/db/bsconfig/webcache', instance_name)
    ssd_webcache_dir = os.path.join('/ssd/webcache', instance_name)

    return get_sum_usage([hdd_webcache_dir, ssd_webcache_dir])


def get_logs_usage(dump_json):
    hdd_logs_dir = '/db/www/logs/'
    ssd_logs_dir = '/ssd/www/logs/'
    instance_port = dump_json['properties']['BSCONFIG_IPORT']

    def get_instance_logs_dirs_and_files(logs_dir, instance_port):
        if not os.path.exists(logs_dir):
            return []

        dirs_and_files = []

        instance_port_str = str(instance_port)

        for dirpath, dirnames, filenames in os.walk(logs_dir):
            for filename in filenames:
                if instance_port_str in filename:
                    dirs_and_files.append(os.path.join(dirpath, filename))
            i = 0
            while i < len(dirnames):
                if instance_port_str in dirnames[i]:
                    dirs_and_files.append(os.path.join(dirpath, dirnames[i]))
                    del dirnames[i]
                else:
                    i += 1

        return dirs_and_files

    found_log_stuff = get_instance_logs_dirs_and_files(hdd_logs_dir, instance_port) + get_instance_logs_dirs_and_files(ssd_logs_dir, instance_port)

    return get_sum_usage(found_log_stuff)


def get_instance_disk_usage_breakdown(instance_dir):
    instance_dir_usage = get_disk_usage(instance_dir)
    dump_json = _get_dump_json(instance_dir)

    iss_resources_usage = get_resources_size(instance_dir, dump_json, "ru.yandex.iss.Resource")
    iss_shards_usage = get_resources_size(instance_dir, dump_json, "ru.yandex.iss.Shard")

    webcache_usage = get_webcache_usage(dump_json)
    callisto_usage = get_callisto_usage(instance_dir, dump_json)
    logs_usage = get_logs_usage(dump_json)

    return {
        'workdir_hdd_size': instance_dir_usage.hdd,
        'workdir_ssd_size': instance_dir_usage.ssd,

        'iss_resources_ssd_size': iss_resources_usage.ssd,
        'iss_resources_hdd_size': iss_resources_usage.hdd,

        'iss_shards_hdd_size': iss_shards_usage.hdd,
        'iss_shards_ssd_size': iss_shards_usage.ssd,

        'webcache_hdd_size': webcache_usage.hdd,
        'webcache_ssd_size': webcache_usage.ssd,

        'callisto_hdd_size': callisto_usage.hdd,
        'callisto_ssd_size': callisto_usage.ssd,

        'logs_hdd_size': logs_usage.hdd,
        'logs_ssd_size': logs_usage.ssd,

        'version': 2
    }


def get_volume_size(porto_conn, volume_path):
    try:
        return int(porto_conn.FindVolume(volume_path).GetProperty('space_used'))
    except porto.exceptions.VolumeNotFound:
        return 0


def get_volumes_disk_usage(instance_data):
    """Get disk usage for instance volumes (RX-303)"""
    result = dict()

    try:
        if 'volumes' not in instance_data:
            return result
        if len(instance_data['volumes']) == 0:
            return result

        c = porto.Connection(timeout=5)
        c.connect()

        # detect ordinary volumes sizes
        for volume_info in instance_data['volumes']:
            if volume_info['uuid'] != '':
                volume_path = '{}/{}'.format(VOLUMES_ROOT, volume_info['uuid'])
                result[volume_info['mountPoint']] = get_volume_size(c, volume_path)

        # detect root volume path
        container_name = instance_data['containerPath'].rpartition('/')[0]
        root_path = c.Find(container_name).GetData('root')
        result['/'] = get_volume_size(c, root_path)

        # detect workdir volume path
        container_name = instance_data['containerPath']
        workdir_path = c.Find(container_name).GetData('cwd')
        result['/workdir'] = get_volume_size(c, workdir_path)
    except Exception as e:
        logging.exception('Got exception when calculating volumes disk usage: {}'.format(str(e)))

    return result


def get_instances_with_usages():
    instance_to_du = {}
    instances = get_iss_instances_env()
    for instance, instance_data in instances.iteritems():
        instance_dir = instance_data['env']['instanceDir']
        instance_du_breakdown = get_instance_disk_usage_breakdown(str(instance_dir))
        volumes_disk_usage = get_volumes_disk_usage(instance_data['env'])
        try:
            instance_to_du[instance] = {
                'major_tag': instance_major_tag(instance_data),
                'minor_tag': instance_minor_tag(instance_data),
                'version': instance_version(instance_data),
                'group': instance_group(instance_data),
                "port": instance_data['env']['properties']['port'],
                "ts": time.time()
            }
            instance_to_du[instance].update(instance_du_breakdown)
            instance_to_du[instance]['volumes_size'] = volumes_disk_usage
        except:
            logging.exception("Problem when adding instance {}".format(instance))

    return instance_to_du.values()


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--format', type=str, default='pretty',
                        choices=('pretty', 'binary'),
                        help='Output in serialized or pretty format')
    args = parser.parse_args()

    instances_data = get_instances_with_usages()

    if args.format == 'pretty':
        print(json.dumps(instances_data, sort_keys=True, indent=4))
    elif args.format == 'binary':
        print(msgpack.packb(instances_data))


if __name__ == '__main__':
    main()
