# coding=utf-8
import logging
import os
from datetime import datetime, timedelta

from sandbox import sdk2
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.projects.avia.lib.logs import configure_logging, get_sentry_dsn
from sandbox.projects.avia.lib.yt_helpers import tables_for_daterange, YtClientFactory

from sandbox.projects.avia.base import AviaBaseTask


SUCCESS_BOOK_LOG_PATH = '//home/rasp/logs/avia-success-book-log'
PARTNER_BOOK_LOG_PATH = '//home/avia/logs/avia-partner-booking-log'
REDIR_LOG_PATH = '//home/avia/logs/avia-json-redir-log'
SHOW_LOG_PATH = '//home/rasp/logs/rasp-tickets-show-log'
ERROR_TABLE = '//home/rasp/merge_books_json_error_table'

DATE_FORMAT = '%Y-%m-%d'
DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'

SCHEMA = [
    {'type': 'string', 'name': 'marker'},
    {'type': 'string', 'name': 'status'},
    {'type': 'string', 'name': 'success_eventtime'},
    {'type': 'string', 'name': 'mow_eventtime'},
    {'type': 'string', 'name': 'source'},
    {'type': 'string', 'name': 'fromId'},
    {'type': 'string', 'name': 'toId'},
    {'type': 'string', 'name': 'when'},
    {'type': 'string', 'name': 'return_date'},
    {'type': 'string', 'name': 'klass'},
    {'type': 'string', 'name': 'national_version'},
    {'type': 'string', 'name': 'lang'},
    {'type': 'string', 'name': 'qid'},
    {'type': 'string', 'name': 'forward'},
    {'type': 'string', 'name': 'backward'},
    {'type': 'string', 'name': 'partner'},
    {'type': 'string', 'name': 'start_eventtime'},
    {'type': 'string', 'name': 'yandexuid'},
    {'type': 'string', 'name': 'passportuid'},
    {'type': 'string', 'name': 'userip'},
    {'type': 'int64',  'name': 'adult_seats'},
    {'type': 'int64',  'name': 'children_seats'},
    {'type': 'int64',  'name': 'infant_seats'},
    {'type': 'string', 'name': 'show_id'},
    {'type': 'string', 'name': 'user_from_geo_id'},
    {'type': 'string', 'name': 'user_from_key'},
]


START_COPY_FIELDS = (
    'marker',
    'fromId',
    'toId',
    'when',
    'return_date',
    'klass',
    'national_version',
    'lang',
    'qid',
    'yandexuid',
    'passportuid',
    'userip',
    'adult_seats',
    'children_seats',
    'infant_seats',
    'show_id',
    'user_from_geo_id',
    'user_from_key',
)

SHOW_COPY_FIELDS = (
    'forward',
    'backward',
    'partner',
)


def utc_to_mow(utc_dt_string):
    # Эта функция проходит через pickle, попадает в YT и работает там.
    # В интерпретаторе YT не работает pytz, поэтому костыль на UTC+3.
    return (
        datetime.strptime(utc_dt_string, DATETIME_FORMAT) + timedelta(hours=3)
    ).strftime(DATETIME_FORMAT)


def success_book_mapper(record):
    yield {
        'status': record['status'],
        'marker': record['marker'],
        'success_eventtime': record['eventtime'],
        'source': 'pixel',
    }


def partner_booking_mapper(record):
    yield {
        'status': record['status'],
        'marker': record['marker'],
        'success_eventtime': record['created_at'],
        'source': record['source'],
    }


def with_mow_eventtime_mapper(record):
    record['mow_eventtime'] = utc_to_mow(record['success_eventtime'])
    yield record


def filter_empty_markers(key, records):
    merged_row = {}
    for r in records:
        merged_row.update(r)

    if merged_row['marker']:
        yield clean_start_record(merged_row)


def clean_start_record(record):
    ans_record = {
        field: record[field]
        for field in START_COPY_FIELDS
    }
    for field in SHOW_COPY_FIELDS:
        ans_record[field] = record.get(field, '')
    ans_record['start_eventtime'] = record['eventtime']

    ans_record['user_from_geo_id'] = str(ans_record['user_from_geo_id'])

    return ans_record


def merge_records(key, records):
    success_records = []
    start_records = []

    for r in records:
        if 'status' in r:
            success_records.append(r)
        else:
            start_records.append(r)

    for success_record in success_records:
        ans_record = dict(success_record)
        del ans_record['@table_index']
        for start_record in start_records:
            ans_record.update(start_record)
            yield ans_record


class AviaMergeStartAndSuccessJsonLogs(AviaBaseTask):
    """ Merge start and success json logs """

    class Requirements(sdk2.Task.Requirements):
        cores = 1
        ram = 8192

        class Caches(sdk2.Requirements.Caches):
            pass  # We do not need caches

        environments = (
            PipEnvironment('raven'),
            PipEnvironment('yandex-yt', version='0.10.8'),
            PipEnvironment('yandex-yt-yson-bindings-skynet', version='0.3.32-0'),
        )

    class Parameters(sdk2.Task.Parameters):

        with sdk2.parameters.Group('YT Settings') as yt_settings:
            vaults_owner = sdk2.parameters.String('Token vault owner', required=True)
            vault_name = sdk2.parameters.String('Token vault name', required=True, default='YT_TOKEN')
            proxy = sdk2.parameters.String('Proxy', required=True, default='hahn')

        with sdk2.parameters.Group('Task settings') as task_settings:
            start_date = sdk2.parameters.String('Minimal date', required=False, default=None)
            end_date = sdk2.parameters.String('Maximal date', required=False, default=None)
            output_path = sdk2.parameters.String('Output path', required=True, default='//home/rasp/logs/avia-json-result-book-log')
            window_width = sdk2.parameters.Integer('Window width', required=True, default=31)

    def parse_date(self, value, default):
        if value:
            return datetime.strptime(DATE_FORMAT)

        return default

    def on_execute(self):
        import yt.wrapper as yt
        logging.info('Start')
        self._configure_sentry()

        today = datetime.now()
        start_date = self.parse_date(self.Parameters.start_date, today)
        end_date = self.parse_date(self.Parameters.end_date, today)

        current_date = start_date

        logging.info('Sorting start tables')
        table_range = (
            (start_date - timedelta(self.Parameters.window_width)).date(),
            end_date.date()
        )

        ytc = YtClientFactory.create(
            proxy=self.Parameters.proxy,
            token=sdk2.Vault.data(self.Parameters.vaults_owner, self.Parameters.vault_name),
        )

        start_table = ytc.create_temp_table()
        show_table = ytc.create_temp_table()
        merged_start_show_table = ytc.create_temp_table()
        start_tables = tables_for_daterange(ytc, REDIR_LOG_PATH, *table_range)
        show_tables = tables_for_daterange(ytc, SHOW_LOG_PATH, *table_range)

        ytc.run_sort(
            source_table=start_tables,
            destination_table=start_table,
            sort_by='show_id',
            spec={
                'schema_inference_mode': 'from_output',
            },
        )
        ytc.run_sort(
            source_table=show_tables,
            destination_table=show_table,
            sort_by='show_id',
        )
        ytc.run_join_reduce(
            binary=filter_empty_markers,
            source_table=['<primary=%true>' + start_table, '<foreign=%true>' + show_table],
            destination_table=merged_start_show_table,
            reduce_by=['show_id'],
            join_by=['show_id'],
        )

        ytc.run_sort(
            source_table=merged_start_show_table,
            destination_table=merged_start_show_table,
            sort_by='marker',
            spec={
                'schema_inference_mode': 'from_output',
            },
        )

        while current_date <= end_date:
            curr_date = current_date.strftime(DATE_FORMAT)
            logging.info('Work: %s', curr_date)
            success_log_table = os.path.join(SUCCESS_BOOK_LOG_PATH, curr_date)
            partner_log_table = os.path.join(PARTNER_BOOK_LOG_PATH, curr_date)

            temp_table = ytc.create_temp_table()
            if ytc.exists(success_log_table):
                ytc.run_map(
                    source_table=success_log_table,
                    destination_table=temp_table,
                    binary=success_book_mapper,
                )

            if ytc.exists(partner_log_table):
                ytc.run_map(
                    source_table=partner_log_table,
                    destination_table='<append=true>' + temp_table,
                    binary=partner_booking_mapper,
                )

            ytc.run_map(
                source_table=temp_table,
                destination_table=temp_table,
                binary=with_mow_eventtime_mapper,
            )

            ytc.run_sort(
                source_table=temp_table,
                destination_table=temp_table,
                sort_by='marker',
            )

            output_table = os.path.join(self.Parameters.output_path, curr_date)

            if not ytc.exists(output_table):
                ytc.create_table(
                    output_table,
                    recursive=True,
                    attributes={'schema': SCHEMA, 'optimize_for': 'scan'}
                )

            ytc.run_reduce(
                source_table=[
                    '<primary=%true>' + temp_table,
                    '<foreign=%true>' + merged_start_show_table
                ],
                destination_table=[output_table, ERROR_TABLE],
                binary=merge_records,
                reduce_by=['marker'],
                join_by=['marker'],
                format=yt.YsonFormat(control_attributes_mode='row_fields'),
            )

            current_date += timedelta(days=1)

        logging.info('End')

    def _configure_sentry(self):
        configure_logging(get_sentry_dsn(self))
