#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-

from os import getenv

from yt.wrapper import YtClient, JsonFormat, TablePath
from datetime import datetime, timedelta
import yt.yson as yson


PROXY = 'hahn'
TOKEN = getenv('DISK_AXIS_YT_TOKEN')
LOGS_PATH = '//statbox/axis-all-persfacts-log'
PRECHARGED_PATH = '//home/axis/precharged'

MAX_PRECHARGE_DAYS = 100

MAX_KEEP_OLD = 3

MR_SCRIPT_FILE = 'precharge_facts_mr.py'

DATE_FORMAT = '%Y-%m-%d'

SCHEMA = yson.YsonList([
    {"name": "uid", "type": "string"},
    {"name": "data", "type": "string"},
    {"name": "extract_time", "type": "string"},
    {"name": "iso_eventtime", "type": "string"},
    {"name": "source", "type": "string"},
    {"name": "source_uri", "type": "string"},
    {"name": "subkey", "type": "string"},
    {"name": "tskv_format", "type": "string"},
    {"name": "type", "type": "string"},
    {"name": "unixtime", "type": "string"}
])


def parse_date_safe(date, default=None):
    try:
        return datetime.strptime(date, DATE_FORMAT).date()
    except (ValueError, TypeError):
        return default


def resolve_day(day):
    return parse_date_safe(day, datetime.now().date())


class FactsPrecharger(object):
    def __init__(self, proxy=PROXY, token=TOKEN):
        super(FactsPrecharger, self).__init__()
        self.client = YtClient(proxy=proxy, token=token)

    def get_new_logs(self, edge, day):
        return [LOGS_PATH + '/' + date.strftime(DATE_FORMAT)
                for date in (parse_date_safe(name)
                             for name in self.client.get(LOGS_PATH).keys())
                if date >= edge and date <= day]

    def get_latest_not_empty_precharged(self, day):
        dates = sorted([date for date in (parse_date_safe(name)
                        for name in self.client.get(PRECHARGED_PATH).keys())
                        if date is not None and date <= day], reverse=True)
        if dates:
            for id, date in enumerate(dates):
                path = (PRECHARGED_PATH + '/' +
                        date.strftime(DATE_FORMAT) + '/@row_count')
                rows_count = self.client.get(path)
                if rows_count > 0:
                    break

            last_date = dates[id]

            if (MAX_KEEP_OLD > id):
                for date in dates[MAX_KEEP_OLD:]:
                    path = PRECHARGED_PATH + '/' + date.strftime(DATE_FORMAT)
                    self.client.remove(path, force=True)
            return last_date
        else:
            return None

    def precharge(self, day=None,
                  max_precharge_days=MAX_PRECHARGE_DAYS,
                  force_precharge_days=None):
        self.client.mkdir('//home/axis/precharged', recursive=True)

        day = resolve_day(day)

        latest_precharged = self.get_latest_not_empty_precharged(day)

        if force_precharge_days is None:
            edge = latest_precharged or day - timedelta(days=max_precharge_days)
        else:
            edge = day - timedelta(days=force_precharge_days)

        input_tables = self.get_new_logs(edge, day)

        if latest_precharged:
            input_tables += [PRECHARGED_PATH + '/' +
                             latest_precharged.strftime(DATE_FORMAT)]

        output_table = PRECHARGED_PATH + '/' + day.strftime(DATE_FORMAT)

        output_table = TablePath(output_table,
                                 attributes={"schema": SCHEMA})

        self.client.run_map_reduce('python ' + MR_SCRIPT_FILE + ' map',
                                   'python ' + MR_SCRIPT_FILE + ' reduce',
                                   input_tables,
                                   output_table,
                                   sync=True,
                                   reduce_by=['uid', 'unixtime',
                                              'type', 'data'],
                                   map_files=[MR_SCRIPT_FILE],
                                   reduce_files=[MR_SCRIPT_FILE],
                                   format=JsonFormat())

        self.client.run_sort(output_table, sort_by=['uid'], sync=True)

if __name__ == '__main__':
    precharger = FactsPrecharger()
    precharger.precharge()
