# coding=utf-8
import logging
import functools
from datetime import datetime, timedelta, date
from typing import Union, Tuple

from django.conf import settings
import yt.wrapper as yt

from travel.avia.admin.lib.yt_helpers import configure_wrapper

REMOVE_FIELDS = [
    '_stbx', 'id', 'source_uri', 'tskv_format', 'unixtime',
    'original_log_datetime', 'iso_eventtime', 'lmt', 'title',
    '_source_table'
]


@yt.aggregator
def add_table_name_to_record(source_table, record):
    record['_source_table'] = source_table
    yield record


def reduce_old_records(current_table, _, records):
    last_record = None

    # сортируем по времени, чтобы записи в сжатых таблицах оказались выше
    for r in sorted(records, key=lambda r: int(r['unixtime'])):
        # Выходим, если нет номера рейса
        if 'number' not in r or not r['number'].strip():
            return

        cleaned_record = {k: v for k, v in r.iteritems() if k not in REMOVE_FIELDS}

        if cleaned_record != last_record:
            last_record = cleaned_record

            if '_source_table' in r and r['_source_table'] == current_table:
                yield r


class TabloCompressor(object):
    """
    Расписания пишут свое табло в YT много раз в день,
    Нам необходимо из всех их записей взять только уникальные по ключу:
    ['number', 'arrival', 'departure']
    И сложить их в отдельную таблицу
    """
    def __init__(self, yt, logger, precise=30):
        self._yt = yt
        self._logger = logger
        self._precise = precise

    def create_compressed_tablo(self, current_date, force=False):
        # type: (date, bool) -> Tuple[str, Union[str, None]]
        """
        :param current_date: время для которого будет создана сжатая таблица
        :param force: нужно ли игнорировать результат предыдщего сжатия?
        :return: (статус, сообщение об ошибки)
        """
        try:
            return self._create_compressed_tablo(current_date, force)
        except Exception as e:
            return 'error', str(e)

    def _create_compressed_tablo(self, current_date, force):
        source_table_path = self._create_tablo_name(current_date)
        result_table_path = self._create_compressed_tablo_name(current_date)

        source_table = self._find_table(source_table_path)
        result_table = self._find_table(result_table_path)

        if not source_table:
            return 'error', 'can not compress tablo, because source [{}] is not found'.format(source_table_path)
        source_table_modification_time = self._get_modification_date(source_table)

        if not result_table:
            self._yt.create('table', result_table_path, recursive=True)
            result_modification_time = None
        else:
            result_modification_time = self._get_modification_date(result_table)

        if force:
            result_modification_time = None

        if result_modification_time and source_table_modification_time <= result_modification_time:
            return 'skip', None

        with self._yt.TempTable() as tmp_table:
            self._logger.info('add table name to record')
            self._yt.run_map(
                functools.partial(add_table_name_to_record, source_table_path),
                source_table=source_table_path,
                destination_table=tmp_table,
                spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
            )

            previous_compressed_tables = []
            for i in range(1, self._precise + 1, 1):
                previous_compressed_table_path = self._create_compressed_tablo_name(current_date - timedelta(days=i))
                if self._yt.exists(previous_compressed_table_path):
                    previous_compressed_tables.append(previous_compressed_table_path)
            self._logger.info('count previous tables: %d', len(previous_compressed_tables))

            self._logger.info('sort by [number, arrival, departure]')
            yt.run_sort(
                [tmp_table] + previous_compressed_tables,
                tmp_table,
                sort_by=['number', 'arrival', 'departure']
            )

            self._logger.info('uniq by [number, arrival, departure]')
            self._yt.run_reduce(
                functools.partial(reduce_old_records, source_table_path),
                tmp_table,
                result_table_path,
                reduce_by=['number', 'arrival', 'departure'],
                spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
                memory_limit=3 * 1024 * 1024 * 1024,
            )

        return 'done', None

    def _create_tablo_name(self, current_date):
        formatted_date = current_date.strftime('%Y-%m-%d')
        return '//home/rasp/logs/rasp-tablo-dump-log/{}'.format(formatted_date)

    def _create_compressed_tablo_name(self, current_date):
        formatted_date = current_date.strftime('%Y-%m-%d')
        return '//home/rasp/logs/rasp-tablo-compressed-log/{}'.format(formatted_date)

    def _find_table(self, path):
        if not self._yt.exists(path):
            return
        tables = list(
            self._yt.search(
                path,
                node_type="table",
                attributes=['modification_time', 'row_count']
            )
        )

        if not tables:
            return

        return tables[0]

    def _get_modification_date(self, table):
        return datetime.strptime(
            table.attributes['modification_time'][:19],
            '%Y-%m-%dT%H:%M:%S'
        )


class TabloCompressorFactory(object):
    def create(self, proxy=None):
        configure_wrapper(yt)
        yt.config['proxy']['url'] = proxy

        return TabloCompressor(yt, logging.getLogger(__name__))

tablo_compressor_factory = TabloCompressorFactory()
