# -*- encoding: utf-8 -*-

import inspect
import os
import sys
from datetime import datetime, timedelta
from optparse import OptionParser

import pytz

import yt.wrapper as yt
import yt.logger_config as yt_logger_config
import yt.logger as yt_logger


SUCCESS_BOOK_LOG_PATH = '//home/rasp/logs/avia-success-book-log'
PARTNER_BOOK_LOG_PATH = '//home/avia/logs/avia-partner-booking-log'
START_BOOK_LOG_PATH = '//home/rasp/logs/avia-start-book-log'
OUTPUT_PATH = '//home/rasp/logs/result-book-log'
ERROR_TABLE = '//home/rasp/merge_books_error_table'

ALLOWED_ENVS = ['production', 'dev']


PYTZ_PATH = os.path.abspath(os.path.dirname(inspect.getfile(pytz)))


WINDOW_WIDTH = 31
DATE_FORMAT = '%Y-%m-%d'
DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'

SCHEMA = [
    {'type': 'string', 'name': 'marker'},
    {'type': 'string', 'name': 'status'},
    {'type': 'string', 'name': 'success_eventtime'},
    {'type': 'string', 'name': 'mow_eventtime'},
    {'type': 'string', 'name': 'source'},
    {'type': 'string', 'name': 'fromId'},
    {'type': 'string', 'name': 'toId'},
    {'type': 'string', 'name': 'when'},
    {'type': 'string', 'name': 'return_date'},
    {'type': 'string', 'name': 'klass'},
    {'type': 'string', 'name': 'national_version'},
    {'type': 'string', 'name': 'lang'},
    {'type': 'string', 'name': 'qid'},
    {'type': 'string', 'name': 'order_key'},
    {'type': 'string', 'name': 'partner_code'},
    {'type': 'string', 'name': 'start_eventtime'},
    {'type': 'string', 'name': 'yandexuid'},
    {'type': 'string', 'name': 'passportuid'},
    {'type': 'string', 'name': 'userip'},
    {'type': 'int64',  'name': 'adult_seats'},
    {'type': 'int64',  'name': 'children_seats'},
    {'type': 'int64',  'name': 'infant_seats'},
]


START_COPY_FIELDS = [
    'fromId',
    'toId',
    'when',
    'return_date',
    'klass',
    'national_version',
    'lang',
    'qid',
    'order_key',
    'partner_code',
    'yandexuid',
    'passportuid',
    'userip',
    'adult_seats',
    'children_seats',
    'infant_seats',
]


def utc_to_mow(utc_dt_string):
    return pytz.UTC.localize(
        datetime.strptime(utc_dt_string, DATETIME_FORMAT)
    ).astimezone(
        pytz.timezone('Europe/Moscow')
    ).strftime(DATETIME_FORMAT)


def success_book_mapper(record):
    yield {
        'status': record['status'],
        'marker': record['marker'],
        'success_eventtime': record['eventtime'],
        'source': 'pixel',
    }


def partner_booking_mapper(record):
    yield {
        'status': record['status'],
        'marker': record['marker'],
        'success_eventtime': record['created_at'],
        'source': record['source'],
    }


def with_mow_eventtime_mapper(record):
    record['mow_eventtime'] = utc_to_mow(record['success_eventtime'])
    yield record


def clean_start_record(record):
    ans_record = {
        field: record[field]
        for field in START_COPY_FIELDS
    }

    ans_record['start_eventtime'] = record['eventtime']

    # Сменим типы
    for field in ['adult_seats', 'children_seats', 'infant_seats']:
        ans_record[field] = int(ans_record[field])

    return ans_record


def merge_records(key, records):
    success_records = []
    start_records = []

    for r in records:
        if 'status' in r:
            success_records.append(r)
        else:
            start_records.append(clean_start_record(r))

    for success_record in success_records:
        ans_record = dict(success_record)
        ans_record['@table_index'] = 0
        for start_record in start_records:
            ans_record.update(start_record)
            yield ans_record

    if len(start_records) == 0:
        yield {
            '@table_index': 1,
            'marker': key['marker'],
            'timestamp': datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
        }


def find_all_files_recursive(source_dir):
    return [
        os.path.join(root, filename)
        for root, _, files in os.walk(source_dir)
        for filename in files
    ]


def replace_head_of_path(path, head, target_prefix):
    return os.path.join(target_prefix, os.path.relpath(path, head))


def generate_path_for_yt(paths, head, target_prefix):
    return [
        (path, replace_head_of_path(path, head, target_prefix))
        for path in paths
    ]


def main():
    import travel.avia.admin.init_project  # noqa

    import logging

    from django.conf import settings

    import travel.avia.admin.lib.yt_helpers as yth
    from travel.avia.admin.lib.logs import add_stdout_handler, create_current_file_run_log
    from travel.avia.admin.lib.yt_helpers import configure_wrapper

    log = logging.getLogger(__name__)
    create_current_file_run_log()

    today_str = datetime.today().strftime(DATE_FORMAT)

    optparser = OptionParser()

    optparser.add_option('-v', '--verbose', action='store_true')
    optparser.add_option('-p', '--proxy', dest='proxy', default=settings.YT_PROXY)
    optparser.add_option('-s', '--start-date', dest='start_date', default=today_str)
    optparser.add_option('-e', '--end-date', dest='end_date', default=today_str)

    options, args = optparser.parse_args()

    if options.verbose:
        add_stdout_handler(log)

    else:
        yt_logger_config.LOG_LEVEL = 'WARNING'
        reload(yt_logger)

    log.debug('Pytz path: %s', PYTZ_PATH)
    configure_wrapper(yt)
    if options.proxy != settings.YT_PROXY:
        yt.config['proxy']['url'] = options.proxy

    yt.config['pickling']['additional_files_to_archive'] = generate_path_for_yt(
        find_all_files_recursive(PYTZ_PATH), PYTZ_PATH, 'pytz'
    )

    log.info('Start')

    current_env = settings.ENVIRONMENT
    if current_env not in ALLOWED_ENVS:
        allowed_envs_str = ', '.join(ALLOWED_ENVS)
        log.info('Current ENVIRONMENT %s. Run only %s allowed.' % (current_env, allowed_envs_str))
        sys.exit()

    start_date = datetime.strptime(options.start_date, DATE_FORMAT)
    end_date = datetime.strptime(options.end_date, DATE_FORMAT)

    current_date = start_date

    log.info('Sorting start tables')
    start_table = yt.create_temp_table()
    start_tables = yth.tables_for_daterange(yt, START_BOOK_LOG_PATH, (start_date - timedelta(WINDOW_WIDTH)).date(), end_date.date()),
    yt.run_sort(
        source_table=start_tables,
        destination_table=start_table,
        sort_by='marker',
        spec={
            'schema_inference_mode': 'from_output',
        },
    )

    while current_date <= end_date:
        log.info('Work: %s', current_date.strftime(DATE_FORMAT))
        success_log_table = os.path.join(SUCCESS_BOOK_LOG_PATH, current_date.strftime(DATE_FORMAT))
        partner_log_table = os.path.join(PARTNER_BOOK_LOG_PATH, current_date.strftime(DATE_FORMAT))

        temp_table = yt.create_temp_table()
        if yt.exists(success_log_table):
            yt.run_map(
                source_table=success_log_table,
                destination_table=temp_table,
                binary=success_book_mapper,
            )

        if yt.exists(partner_log_table):
            yt.run_map(
                source_table=partner_log_table,
                destination_table='<append=true>' + temp_table,
                binary=partner_booking_mapper,
            )

        yt.run_map(
            source_table=temp_table,
            destination_table=temp_table,
            binary=with_mow_eventtime_mapper,
        )

        yt.run_sort(
            source_table=temp_table,
            destination_table=temp_table,
            sort_by='marker',
        )

        output_table = os.path.join(OUTPUT_PATH, current_date.strftime(DATE_FORMAT))

        if not yt.exists(output_table):
            yt.create(
                'table',
                output_table,
                attributes={
                    'schema': SCHEMA,
                    'optimize_for': 'scan',
                },
                recursive=True
            )

        yt.run_reduce(
            source_table=['<primary=true>' + temp_table, '<foreign=true>' + start_table],
            destination_table=[output_table, '<append=true>' + ERROR_TABLE],
            binary=merge_records,
            reduce_by=['marker'],
            join_by=['marker'],
            format=yt.YsonFormat(control_attributes_mode="row_fields")
        )

        current_date += timedelta(days=1)

    log.info('Done')
