import json
import re
import subprocess
from subprocess import PIPE

PATH_TO_FILE = "svn+ssh://arcadia.yandex.ru/robots/trunk/directadmin-data/abc-hardware/direct-hardware.json"
DOWNLOAD_CHUNK_SIZE = 10

comments = {
    '2020-03-17': 'Стали учитывать полки'
}


# main
def create_table():
    revisions, dates = zip(*get_revisions(limit=100))
    jsons = get_jsons(revisions)

    rows = []
    for i in range(len(jsons)):
        comment = comments.get(dates[i], '')
        rows.append([dates[i], *get_stats(jsons[i]), comment])

    # header = ('#дата     ', 'всего', 'mysql', 'ppchouse', 'graphite', 'прочее', 'коммент')
    # template = ' || '.join(['{:<%d}' % len(s) for s in header])
    header = ('#дата', 'всего', 'mysql', 'ppchouse', 'graphite', 'прочее', 'коммент')
    template = '{}\t' * len(header)
    print("Подсчёт юнитов:")
    print(template.format(*header))
    for row in rows[::-1]:
        print(template.format(*row))


# returns revision names with dates (take last per day)
def get_revisions(limit):
    result = subprocess.getoutput(f"svn log {PATH_TO_FILE}")
    lines = result.splitlines()
    matches = [re.match(r"(r\d*).*(\d\d\d\d-\d\d-\d\d).*", line) for line in lines]
    revisions = [(match[1], match[2]) for match in matches if match is not None]

    # take the last per day
    revisions_unique = []
    prev_date = None
    for revision, date in revisions:
        if date != prev_date:
            revisions_unique.append((revision, date))
        prev_date = date

    if len(revisions_unique) > limit:
        print(f'limit={limit}')
    return revisions_unique[:limit]


def get_jsons(revisions):
    commands = [["svn", "cat", f"{PATH_TO_FILE}@{revision}"] for revision in revisions]
    print(f'скачано 0/{len(commands)} ревизий', end='')

    jsons = []
    i = 0
    commands_chunked = chunked(commands, DOWNLOAD_CHUNK_SIZE)
    for commands_chunk in commands_chunked:
        # start in parallel
        pipes = [subprocess.Popen(cmd, stdout=PIPE, stderr=PIPE) for cmd in commands_chunk]

        for pipe in pipes:
            stdout, stderr = pipe.communicate()
            jsons.append(stdout.decode('utf-8'))
            i += 1
            print(f'\rскачано {i}/{len(commands)} ревизий', end='')

    print()
    return jsons


def chunked(arr, chunk_size):
    return [arr[i:i + chunk_size] for i in range(0, len(arr), chunk_size)]


def get_stats(json_str):
    parsed = json.loads(json_str)

    total_units = 0
    mysql_units = 0
    ppchouse_units = 0
    graphite_units = 0

    storage_units = precompute_storage_units(parsed)

    def contains(s, words):
        return sum([word in s for word in words]) > 0

    for hardware in parsed:
        if hardware['type'] == 'storage':
            total_units += storage_units[hardware['bot_id']]
            continue
        else:
            total_units += 1

        storages = hardware.get('storages', [])
        # count storage units + one server unit
        units_with_storage = sum([storage_units[int(bot_id)] for bot_id in storages if int(bot_id) in storage_units]) + 1
        fqdn = hardware['fqdn']

        if contains(fqdn, ('ppcdata', 'mysql', 'ppcdict', 'ppclog', 'ppcmonitor', 'ppcstandby')):
            mysql_units += units_with_storage
        if 'ppchouse' in fqdn:
            ppchouse_units += units_with_storage
        if 'graphite' in fqdn:
            graphite_units += units_with_storage

    other_units = total_units - mysql_units - ppchouse_units - graphite_units
    return total_units, mysql_units, ppchouse_units, graphite_units, other_units


# returns dict bot_id -> units
def precompute_storage_units(parsed):
    units_by_kind = {
        '15/SATA/3.5/AIC-XJ-115-02': 1,
        '15/SATA/3.5/DELL-MD1000': 4,
        '24/SATA/3.5/AIC-J2024-01': 2,
        '24/SATA/3.5/AIC-XJ3000-2243S': 2,
        '24/SATA/3.5/GB/T250-S3R/2U': 2,
        '24/SATA/3.5/Huawei-OceanSpaceS2300': 4,
        '24/SATA/3.5/Huawei-OceanStorS2300': 4,
        '60/SATA/3.5/AIC-XJ3000-4603S': 4,
    }
    units_by_id = dict()

    for hardware in parsed:
        if hardware['type'] != 'storage':
            continue
        kind = hardware['kind']
        if kind not in units_by_kind:
            print(f'unknown storage kind: {kind}, 3 units by default.')
        units = units_by_kind.get(kind, 3)  # 3 by default
        units_by_id[hardware['bot_id']] = units
    return units_by_id


if __name__ == '__main__':
    create_table()
