#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-

from yt.wrapper import YtClient, JsonFormat, TablePath
from datetime import datetime, timedelta

DATE_FORMAT = '%Y-%m-%d'
MAX_PREV_DAYS = 3
MAX_KEEP_OLD = 3


def parse_date_safe(date, default=None):
    try:
        return datetime.strptime(date, DATE_FORMAT).date()
    except (ValueError, TypeError):
        return default


def resolve_day(day):
    return parse_date_safe(day, datetime.now().date())


class DataAppender(object):
    def __init__(self, proxy, token, logs_path, result_path):
        super(DataAppender, self).__init__()
        self.client = YtClient(proxy=proxy, token=token)
        self.logs_path = logs_path
        self.result_path = result_path

    def get_new_logs(self, edge, day):
        return [self.logs_path + '/' + date.strftime(DATE_FORMAT)
                for date in (parse_date_safe(name) for name in
                             self.client.get(self.logs_path).keys())
                if date is not None and date >= edge and date <= day]

    def get_latest_not_empty_result(self, day):
        dates = sorted([date for date in (parse_date_safe(name)
                        for name in self.client.get(self.result_path).keys())
                        if date is not None and date <= day], reverse=True)
        if dates:
            for id, date in enumerate(dates):
                path = (self.result_path + '/' +
                        date.strftime(DATE_FORMAT) + '/@row_count')
                rows_count = self.client.get(path)
                if rows_count > 0:
                    break

            last_date = dates[id]

            if (MAX_KEEP_OLD > id):
                for date in dates[MAX_KEEP_OLD:]:
                    path = self.result_path + '/' + date.strftime(DATE_FORMAT)
                    self.client.remove(path, force=True)
            return last_date
        else:
            return None

    def update(self, schema, mr_script_file, reduce_by, sort_by,
               day=None, force_prev_days=None, max_prev_days=MAX_PREV_DAYS):
        self.client.mkdir(self.result_path, recursive=True)

        day = resolve_day(day)

        latest_result = self.get_latest_not_empty_result(day)

        if force_prev_days is None:
            edge = latest_result or day - timedelta(days=max_prev_days)
        else:
            edge = day - timedelta(days=force_prev_days)

        input_tables = self.get_new_logs(edge, day)

        if latest_result:
            input_tables += [self.result_path + '/' +
                             latest_result.strftime(DATE_FORMAT)]

        output_table = self.result_path + '/' + day.strftime(DATE_FORMAT)

        output_table = TablePath(output_table,
                                 attributes={"schema": schema})

        self.client.run_map_reduce('python ' + mr_script_file + ' map',
                                   'python ' + mr_script_file + ' reduce',
                                   input_tables,
                                   output_table,
                                   sync=True,
                                   reduce_by=reduce_by,
                                   map_files=[mr_script_file],
                                   reduce_files=[mr_script_file],
                                   format=JsonFormat())

        self.client.run_sort(output_table, sort_by=sort_by, sync=True)

        return output_table
