# -*- coding: utf-8 -*-
"""
Выгрузка счётчиков места на YT

https://yt.yandex-team.ru/hahn/navigation?path=//home/mpfs-stat/storage/user_space_counters

Выгрузка идёт в несколько процессов, каждый пишет в свою временную таблицу, которые потом конкатенируем в одну результирующую.
"""
import itertools
import yt.wrapper as yt
import ujson
from datetime import date, datetime
from contextlib import contextmanager
from multiprocessing.pool import ThreadPool
from multiprocessing import Pool
from multiprocessing import Process
from mpfs.engine.process import setup_admin_script
setup_admin_script()
from mpfs.dao.base import Session
from mpfs.metastorage.postgres.query_executer import PGQueryExecuter
from mpfs.core.mrstat.stat_utils import set_yt_proxy, quit_if_mrstat_disabled
from mpfs.common.util import chunks2

today_date = date.today()

RESULT_FOLDER = '//home/mpfs-stat/storage/user_space_counters'
RESULT_TABLE = '%s/%s' % (RESULT_FOLDER, today_date)
TMP_FOLDER = '//home/mpfs-stat/tmp/user_space_counters/%s' % today_date

SQL = "SELECT uid, path, data, version FROM disk.disk_info WHERE path IN ('/limit', '/total_size') ORDER BY uid;"


def fetch_all_counters_from_shard(shard_endpoint):
    conn = Session.create_from_shard_endpoint(shard_endpoint)
    cursor = conn.execute(SQL)
    db_host = conn._pg_connection.engine.url.host
    export_dt = '%s' % datetime.now()
    for uid, rows in itertools.groupby(cursor, key=lambda r: r.uid):
        result = {
            'uid': uid,
            'limit': 0,
            'total_size': 0,
            'db_host': db_host,
            'export_dt': export_dt
        }
        for row in rows:
            try:
                result[row.path.strip('/')] = int(float(row.data))
            except Exception:
                pass
        yield ujson.dumps(result)


def fetch_all_counters(shard_endpoints):
    for shard_endpoint in shard_endpoints:
        for row in fetch_all_counters_from_shard(shard_endpoint):
            yield row


def upload_to_yt(shard_endpoints):
    try:
        table_name = '_'.join([str(i.get_name()) for i in shard_endpoints])
        yt.write_table('%s/%s' % (TMP_FOLDER, table_name), fetch_all_counters(shard_endpoints), raw=True, format="json")
    except Exception as e:
        print "%r" % e
        raise


def before_export():
    if yt.exists(TMP_FOLDER):
        yt.remove(TMP_FOLDER, recursive=True)
    yt.mkdir(TMP_FOLDER, recursive=True)


def after_export():
    if not yt.exists(RESULT_FOLDER):
        yt.mkdir(RESULT_FOLDER, recursive=True)
    if yt.exists(RESULT_TABLE):
        yt.remove(RESULT_TABLE)

    result_paths = yt.list(TMP_FOLDER, absolute=True)
    yt.concatenate(result_paths, RESULT_TABLE)

    yt.remove(TMP_FOLDER, recursive=True)


@contextmanager
def print_duration(msg):
    start_dt = datetime.now()
    try:
        yield
    finally:
        end_dt = datetime.now()
        print "<%s> <%s> %s" % (end_dt, end_dt - start_dt, msg)


if __name__ == '__main__':
    quit_if_mrstat_disabled()
    set_yt_proxy()

    # клиент к YT коряво работает, если сделать какой-то запрос в основном процессе до форка детей (валятся ошибки в "детях")
    # поэтому все подготовительные действия запускаем не в основнои процессе, а в подпроцессе.
    with print_duration('Prepare'):
        p = Process(target=before_export)
        p.start()
        p.join()

    with print_duration('Export'):
        tasks = list(chunks2(PGQueryExecuter().get_all_shard_endpoints(), 4))
        pool = Pool(len(tasks))
        pool.map(upload_to_yt, tasks)
        pool.close()
        pool.join()

    with print_duration('Concatenate and clean up'):
        p = Process(target=after_export)
        p.start()
        p.join()
