# -*- encoding: utf-8 -*-
import os
import logging

from collections import defaultdict
from datetime import datetime, date, timedelta

import sandbox.common.types.notification as ctn
from sandbox.projects.avia.lib.yt_helpers import YtClientFactory
from sandbox import sdk2
from sandbox.sandboxsdk.environments import PipEnvironment

from sandbox.projects.avia.base import AviaBaseTask


def parse_table_name(path):
    parts = path.split('/')
    readable_table_name_index = -2

    # По умочанию ожидаются логи по пути //<path-to-log>/avia-some-day-logs/YYYY-MM-DD
    # Новые логи имеют вид //<path-to-log>/avia-some-day-logs/1d/YYYY-MM-DD
    if path.split('/')[readable_table_name_index] == '1d':
        readable_table_name_index = -3
    return parts[readable_table_name_index]


def path_filter(path):
    return parse_table_name(path).startswith('avia')


def get_last_yt_logs(yt_client, logs_path):
    tables = defaultdict(list)

    for table in yt_client.search(logs_path, node_type=['table'], path_filter=path_filter, depth_bound=2):
        table_name = table[0: -len("/YYYY-MM-DD")]
        try:
            table_date = datetime.strptime(table.split('/')[-1], '%Y-%m-%d').date()
        except ValueError:
            continue

        if table_date == date.today():
            continue

        tables[table_name].append(table)
        tables[table_name] = sorted(tables[table_name], reverse=True)[:2]

    return dict(tables)


def convert_eventtime(record):
    iso_eventtime = record.get('iso_eventtime')

    if not iso_eventtime:
        return

    try:
        iso_eventtime = datetime.strptime(iso_eventtime, '%Y-%m-%d %H:%M:%S')
    except ValueError:
        return

    yield {
        'eventtime': iso_eventtime.strftime('%Y-%m-%d %H:%M')
    }


def aggregate_eventtime(key, records):
    yield {
        'eventtime': key['eventtime']
    }


class AviaCheckYtTables(AviaBaseTask):
    """ Checks YT logs tables """

    class Requirements(sdk2.Task.Requirements):
        environments = (
            PipEnvironment('yandex-yt', version='0.10.8'),
            PipEnvironment('yandex-yt-yson-bindings-skynet', version='0.3.32-0'),
        )

        cores = 1
        ram = 1024

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):

        with sdk2.parameters.Group('Task settings') as task_settings:
            with sdk2.parameters.RadioGroup('Mode') as mode:
                mode.values['hourly'] = mode.Value('hourly', default=True)
                mode.values['daily'] = mode.Value('daily')

                with mode.value['hourly']:
                    destination_table = sdk2.parameters.String(
                        'Destination log YT path',
                        default='//home/avia/logs/avia-json-redir-log',
                        required=True,
                    )
                    hourly_recipients = sdk2.parameters.List(
                        'Recipients',
                        default=['avia-alerts@yandex-team.ru', 'arancio@yandex-team.ru'],
                        required=True,
                    )
                    hourly_max_timedelta = sdk2.parameters.Float(
                        'Hourly max timedelta (hours)',
                        required=True,
                        default=1.5,
                    )

                with mode.value['daily']:
                    destination = sdk2.parameters.List(
                        'Destination logs YT paths',
                        default=['//home/avia/logs'],
                        required=True,
                    )
                    daily_recipients = sdk2.parameters.List(
                        'Recipients',
                        default=['avia-alerts@yandex-team.ru', 'arancio@yandex-team.ru'],
                        required=True,
                    )

        with sdk2.parameters.Group('YT Settings') as yt_settings:
            vaults_owner = sdk2.parameters.String('Token vault owner', default='AVIA', required=True)
            yt_vault_name = sdk2.parameters.String('YT Token vault name', required=True, default='YT_TOKEN')
            yt_proxy = sdk2.parameters.String('YT cluster', required=True, default='hahn')

    def fill_task_tracker_config(self):
        """
        Define different source and host in solomon metric for different modes
        """
        super(AviaCheckYtTables, self).fill_task_tracker_config()
        suffix = self.Parameters.mode
        self.task_tracker_config.source = '{}.{}'.format(self.task_tracker_config.source, suffix)
        self.task_tracker_config.host = '{}.{}'.format(self.task_tracker_config.host, suffix)

    def on_execute(self):
        logging.info('Start')
        yt_client = YtClientFactory().create(
            proxy=self.Parameters.yt_proxy,
            token=sdk2.Vault.data(self.Parameters.vaults_owner, self.Parameters.yt_vault_name),
        )

        logging.info('Start work')

        if self.Parameters.mode == 'daily':
            self._check_daily(yt_client)
        elif self.Parameters.mode == 'hourly':
            self._check_hourly(yt_client)
        else:
            logging.error('Unknown type: %s', self.Parameters.mode)
        logging.info('End')

    def _check_daily(self, yt_client):
        yesterday_str = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d')
        body_lines = []

        for path in self.Parameters.destination:
            tables = get_last_yt_logs(yt_client, path)

            for table_path, last_tables in tables.items():
                yesterday_log_date = last_tables[0].split('/')[-1]
                day_before_yesterday_log_date = last_tables[1].split('/')[-1]

                if not last_tables[0].endswith(yesterday_str):
                    line = '%s:\tTABLE DOES NOT EXIST (last: %s)' % (table_path, yesterday_log_date)
                    body_lines.append(line)
                    logging.info(line)
                    continue

                if len(last_tables) != 2:
                    line = '%s:\tONLY ONE LOG FOUND', table_path
                    body_lines.append(line)
                    logging.info(line)
                    continue

                day_before_yesterday_rows_count = yt_client.get_attribute(last_tables[1], 'row_count')
                yesterday_log_rows_count = yt_client.get_attribute(last_tables[0], 'row_count')

                if not int(day_before_yesterday_rows_count):
                    line = '%s:\tEMPTY TABLE (last: %s)' % (table_path, day_before_yesterday_log_date)
                    body_lines.append(line)
                    logging.info(line)
                    continue

                rows_count_delta = int(yesterday_log_rows_count) - int(day_before_yesterday_rows_count)
                rows_count_delta_percent = (float(rows_count_delta) / day_before_yesterday_rows_count) * 100

                rows_count_delta_sigh = '+' if rows_count_delta > 0 else ''
                rows_count_delta_percent_sign = '+' if rows_count_delta_percent > 0 else ''

                line = '%s:\t%s%s (%s%.2f %%) (%s - %s)' % (
                    table_path,
                    rows_count_delta_sigh, rows_count_delta,
                    rows_count_delta_percent_sign, rows_count_delta_percent,
                    day_before_yesterday_log_date, yesterday_log_date
                )
                body_lines.append(line)
                logging.info(line)

        self._send_mail(self.Parameters.daily_recipients, body_lines)

    def _check_hourly(self, yt_client):
        import yt.wrapper as yt_wrapper

        max_timedelta = timedelta(hours=self.Parameters.hourly_max_timedelta)
        now = datetime.now()
        table = os.path.join(self.Parameters.destination_table, now.strftime('%Y-%m-%d'))

        # На границе суток таблицы ещё может не быть
        if (not yt_client.exists(table) and
                (now - now.replace(hour=0, minute=0, second=0, microsecond=0)) < max_timedelta):
            logging.warning(
                'Table "%s" does not exists for less than %d hours, ignore',
                table,
                max_timedelta.total_seconds() / 3600,
            )
            return

        tmp_table = yt_client.create_temp_table()

        logging.info('Run map')
        yt_client.run_map(
            convert_eventtime,
            source_table=table,
            destination_table=tmp_table,
            format=yt_wrapper.YsonFormat(),
        )

        logging.info('Run sort')
        yt_client.run_sort(
            tmp_table,
            tmp_table,
            sort_by=['eventtime']
        )

        logging.info('Run reduce')
        yt_client.run_reduce(
            aggregate_eventtime,
            tmp_table,
            tmp_table,
            format=yt_wrapper.YsonFormat(),
            reduce_by=['eventtime'],
        )

        logging.info('Calc last eventtime')
        last_eventtime = None
        for record in yt_client.read_table(tmp_table, format=yt_wrapper.YsonFormat(), raw=False):
            eventtime = record['eventtime']
            eventtime = datetime.strptime(eventtime, '%Y-%m-%d %H:%M')

            if not last_eventtime or eventtime > last_eventtime:
                last_eventtime = eventtime

        now = datetime.now()

        if not last_eventtime:
            err_msg = 'No rows in the table: {}'.format(table)
        elif last_eventtime > datetime.now():
            err_msg = 'The latest row in the table {} is in the future: {}'.format(
                table,
                last_eventtime.strftime('%Y-%m-%d %H:%M'),
            )
        elif (last_eventtime + max_timedelta) < now:
            err_msg = 'The latest row in the table {} was made too long ago: {}'.format(
                table,
                last_eventtime.strftime('%Y-%m-%d %H:%M'),
            )
        else:
            logging.info('Everything is ok for table %s', table)
            err_msg = None

        if err_msg:
            logging.warn(err_msg)
            body_lines = ['WARNING!', '', err_msg]
            self._send_mail(self.Parameters.hourly_recipients, body_lines)

    def _send_mail(self, recipients, body_lines):
        logging.info('Sending message to recipients: %s', recipients)
        message_body = '\n'.join(body_lines)
        logging.info('Message body: %s', message_body)
        self.server.notification(
            subject=u'Проверка YT',
            body=message_body,
            recipients=recipients,
            transport=ctn.Transport.EMAIL,
            type='html',
        )
