#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime
import yt.wrapper as yt

from nile.api.v1 import clusters
from qb2.api.v1 import extractors as se
from qb2.api.v1 import filters as sf

from mpfs.engine.process import setup_admin_script
setup_admin_script()

from mpfs.config import settings
from mpfs.core.mrstat.stat_utils import set_yt_proxy, StatPublisher, quit_if_mrstat_disabled
from mpfs.core.mrstat.report import parse_args
from mpfs.core.file_recovery.logic.manager import process_raw_reports


YT_REPORTS_FOLDER = '//home/mpfs-stat/storage/file_recovery_reports'


def convert_rest_path_to_mpfs(rest_path):
    if not rest_path.startswith('disk:/'):
        return
    return "/disk/%s" % rest_path[len('disk:/'):]


def extaract_reports(dates, result_path):
    cluster = clusters.Hahn(settings.mrstat['yt_token'])
    job = cluster.job()
    input_tables = [job.table('//statbox/ydisk-mpfs-access-log/%s' % d) for d in dates]
    job.concat(*input_tables) \
        .qb2(
            log='ydisk-mpfs-access-log',
            fields=[
                se.log_field('uid'),
                se.custom('report_name', lambda p: p.rstrip('/').rsplit('/', 1)[-1], 'page'),
                se.parameters('local_md5', 'local_sha256', 'local_size',
                              'remote_md5', 'remote_sha256', 'remote_size',
                              'resource_id'),
                se.parameter('path').rename('rest_path'),
                se.custom('mpfs_path', convert_rest_path_to_mpfs, 'rest_path'),
            ],
            filters=[
                sf.equals('appname', 'platform'),
                sf.equals('status', '204'),
                sf.startswith('page', '/v1/disk/restore/report/'),
                sf.startswith('mpfs_path', '/disk'),
            ]
        ) \
        .unique('uid', 'mpfs_path') \
        .put(result_path)
    job.run()


def download_and_process_reports(reports_yt_path):
    set_yt_proxy()
    if not yt.exists(reports_yt_path):
        print "Table %s not exists" % reports_yt_path
        return
    return process_raw_reports(yt.read_table(reports_yt_path, format="json", raw=True))


REPORT_CONFIG_YAML = """
---
dimensions:
- fielddate: date
- status: string
measures:
- num: number
titles:
  num: Кол-во событий
  status: Статус обработки репорта
"""


def publish(counter):
    if not counter:
        return

    report_data = []
    for k, v in counter.iteritems():
        report_data.append({
            'fielddate': str(datetime.date.today()),
            'status': str(k),
            'num': int(v)
        })

    # не хотим фейлить весь процесс из-за публикации
    try:
        StatPublisher.create_and_upload(
            'Disk/DiskInternal/FileRecoveryReportsProcessing',
            'Восстановление. Обработка репортов',
            REPORT_CONFIG_YAML,
            report_data
        )
    except Exception as e:
        print "Got exception during publishing. %r" % e


if __name__ == '__main__':
    quit_if_mrstat_disabled()
    dates, _, _ = parse_args()
    result_path = '%s/%s' % (YT_REPORTS_FOLDER, dates[0])

    extaract_reports(dates, result_path)
    stat = download_and_process_reports(result_path)
    publish(stat)
