# -*- encoding: utf-8 -*-
import heapq
import sys

from datetime import datetime, timedelta
from optparse import OptionParser

import yt.wrapper as yt
import yt.logger_config as yt_logger_config
import yt.logger as yt_logger


def yt_last_logs_tables(path, days):
    return heapq.nlargest(
        days,
        yt.search(path, node_type="table"),
        key=lambda table: datetime.strptime(table.split('/')[-1], '%Y-%m-%d')
    )


def fix_flight_number(flight_number):
    try:
        iata, number = flight_number.strip().split(' ')
        number = int(number)

    except ValueError:
        return None

    if len(iata) == 2:
        return '%s %s' % (iata, number)


def fast_datetime_convert(pydate):
    """
    2013-12-03 01:06:51
    year, month, day, hour, minute, second
    """
    if pydate:
        return datetime(
            int(pydate[0:4]), int(pydate[5:7]), int(pydate[8:10]),
            int(pydate[11:13]), int(pydate[14:16]), int(pydate[17:19])
        )

    else:
        return None


def map_tablo(record):
    today = datetime.now()
    number = fix_flight_number(record.get('number'))

    arrival = fast_datetime_convert(record['arrival'])
    real_arrival = fast_datetime_convert(record['real_arrival'])
    arrival_delay = (real_arrival - arrival).total_seconds() if (arrival and real_arrival) else 0

    departure = fast_datetime_convert(record['departure'])
    real_departure = fast_datetime_convert(record['real_departure'])
    departure_delay = (real_departure - departure).total_seconds() if (departure and real_departure) else 0

    start_datetime = fast_datetime_convert(record['start_datetime'])

    if start_datetime:
        flight_date = '%s (start)' % start_datetime.strftime('%Y-%m-%d')
        event_date = start_datetime

    elif arrival:
        flight_date = '%s (arr)' % arrival.strftime('%Y-%m-%d')
        event_date = arrival

    elif departure:
        flight_date = '%s (dep)' % departure.strftime('%Y-%m-%d')
        event_date = departure

    else:
        flight_date = None

    if flight_date and number and event_date <= today:
        yield {
            'number': number,
            'type': 'arrival' if (arrival or real_arrival) else 'departure',
            'flight_date': flight_date,
            'unixtime': record['unixtime'],
            'arrival_cancelled': record['arrival_cancelled'],
            'arrival': record['arrival'],
            'arrival_delay': int(arrival_delay),
            'departure_cancelled': record['departure_cancelled'],
            'departure_delay': int(departure_delay),
            'departure': record['departure'],
            'real_arrival': record['real_arrival'],
            'real_departure': record['real_departure'],
        }


def reduce_tablo(key, records):
    last_record = None

    # Ищем последнее событие
    for r in records:
        unixtime = int(r['unixtime'])

        if not last_record or int(last_record['unixtime']) < unixtime:
            last_record = r

    yield last_record


def map_scoring(record):
    arrival_cancelled = record['arrival_cancelled']
    departure_cancelled = record['departure_cancelled']

    arrival_delay = float(record['arrival_delay']) / 3600

    if record['type'] == 'departure' and not departure_cancelled:
        return

    if arrival_cancelled or departure_cancelled:
        score = -5

    elif arrival_delay >= 6:
        score = -4

    elif arrival_delay >= 3:
        score = -3

    elif arrival_delay >= 1:
        score = -2

    elif arrival_delay >= 0.25:
        score = -1

    else:
        score = 0

    if arrival_cancelled or departure_cancelled:
        description = '%s, %s: cancelled, score %s' % (
            record['number'],
            record['flight_date'],
            score
        )

    else:
        description = '%s, %s: %s planned: %s, real: %s, score %s' % (
            record['number'],
            record['flight_date'],
            record['type'],
            record[record['type']],
            record['real_%s' % record['type']],
            score
        )

    yield {
        'number': record['number'],
        'flight_date': record['flight_date'],
        'good': 0 if score else 1,
        'score': score,
        'type': record['type'],
        'arrival': record['arrival'],
        'departure': record['departure'],
        'description': description
    }


def fix_flight_date(key, records):
    # Если дата прилета больше, чем дата вылета,
    # то этот прилет относится к прошлому дню
    events = {}

    for r in records:
        events[r['type']] = r

    departure_record = events.get('departure', None)
    arrival_record = events.get('arrival', None)

    if departure_record and arrival_record:
        departure = fast_datetime_convert(departure_record['departure'])
        arrival = fast_datetime_convert(arrival_record['arrival'])

        if arrival > departure:
            arrival_record['flight_date'] = (arrival - timedelta(days=1)).strftime('%Y-%m-%d')

    for event_type, r in events.items():
        yield r


def merge_scoring(key, records):
    events = {}
    for r in records:
        type = r['type']
        events[type] = r

    departure_event = events.get('departure', None)
    arrival_event = events.get('arrival', None)

    if departure_event and arrival_event:
        # Если вылет хороший, то оставляем прилет, т.к. баллы
        # за плохой вылет снижаются только при вылете вовремя
        if int(departure_event.get('good')):
            yield arrival_event

        else:
            yield departure_event
    else:
        yield (departure_event or arrival_event)


def reduce_scoring(key, records):
    scores = 0
    total_routes = 0
    good_routes = 0
    bad_routes = 0
    descriptions = []

    for r in records:
        total_routes += 1
        score = int(r['score'])

        if score:
            bad_routes += 1
            scores += score

        else:
            good_routes += 1

        descriptions.append(r['description'])

    yield {
        'number': key['number'],
        'scores': scores,
        'good': good_routes,
        'bad': bad_routes,
        'description': '\n'.join(sorted(descriptions)),
    }


def main():
    import travel.avia.admin.init_project  # noqa

    import logging

    from django.conf import settings
    from django.db import transaction

    from travel.avia.library.python.avia_data.models import FlightRating
    from travel.avia.admin.lib.logs import add_stdout_handler, create_current_file_run_log
    from travel.avia.admin.lib.yt_helpers import configure_wrapper

    log = logging.getLogger(__name__)
    create_current_file_run_log()

    @transaction.atomic
    def save_results(table):
        min_events_count = options.days / 6
        try:
            log.info('Save results from %s' % table)
            FlightRating.objects.all().delete()

            for record in yt.read_table(tmp_table, format=yt.DsvFormat(), raw=False):
                scores = int(record['scores'])
                good = int(record['good'])
                bad = int(record['bad'])
                total = good + bad
                description = str(record['description'])

                if total >= min_events_count:
                    flight_rating = FlightRating(
                        number=record['number'],
                        scores=scores,
                        good_count=good,
                        bad_count=bad,
                        bad_percent=float(bad) / total * 100,
                        avg_scores=float(scores) / total,
                        scores_description=description,
                    )

                    flight_rating.save()

        except Exception:
            log.exception('Error:')
            sys.exit(1)

    # BEGIN main()
    optparser = OptionParser()

    optparser.add_option('-v', '--verbose', action='store_true')
    optparser.add_option("-d", "--days", dest="days", type=int, help="number of logs", default=90)
    optparser.add_option("-p", "--proxy", dest="proxy", default=settings.YT_PROXY)

    options, args = optparser.parse_args()

    if options.verbose:
        add_stdout_handler(log)

    else:
        yt_logger_config.LOG_LEVEL = 'WARNING'
        reload(yt_logger)

    log.info('Start')
    try:
        configure_wrapper(yt)

        source_tables = yt_last_logs_tables('//home/rasp/logs/rasp-tablo-compressed-log', options.days)

        log.info('Create temp table')
        tmp_table = yt.create_temp_table()

        log.info('Run tablo map')
        yt.run_map(
            map_tablo,
            source_table=source_tables,
            destination_table=tmp_table,
            format=yt.DsvFormat(),
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        log.info('Run tablo sort')
        yt.run_sort(
            source_table=tmp_table,
            sort_by=['number', 'flight_date', 'type'],
        )

        log.info('Run tablo reduce')
        yt.run_reduce(
            reduce_tablo,
            tmp_table,
            tmp_table,
            format=yt.DsvFormat(),
            reduce_by=['number', 'flight_date', 'type'],
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        log.info('Run tablo sort')
        yt.run_sort(
            source_table=tmp_table,
            sort_by=['number', 'flight_date'],
        )

        log.info('Run scoring map')
        yt.run_map(
            map_scoring,
            source_table=tmp_table,
            destination_table=tmp_table,
            format=yt.DsvFormat(),
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},

        )

        log.info('Run sort before merge')
        yt.run_sort(
            source_table=tmp_table,
            sort_by=['number', 'flight_date'],
        )

        log.info('Run scoring reduce (merge results)')
        yt.run_reduce(
            merge_scoring,
            source_table=tmp_table,
            destination_table=tmp_table,
            format=yt.DsvFormat(),
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
            reduce_by=['number', 'flight_date'],
        )

        log.info('Run sort before cals scores')
        yt.run_sort(
            source_table=tmp_table,
            sort_by=['number', 'flight_date'],
        )

        log.info('Run scoring reduce (calc scores)')
        yt.run_reduce(
            reduce_scoring,
            source_table=tmp_table,
            destination_table=tmp_table,
            format=yt.DsvFormat(),
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
            reduce_by='number',
        )

        save_results(tmp_table)

        log.info('Remove tmp table: %s' % tmp_table)
        yt.remove(tmp_table)

    except Exception:
        log.exception('Error:')
        sys.exit(1)

    log.info('Done')
