# -*- coding: utf-8 -*-
import logging
import os
import sys
from collections import defaultdict, Counter
from datetime import datetime, timedelta
from operator import itemgetter

from travel.avia.library.python.iata_correction import IATACorrector


def direct_flight(flight_number):
    return flight_number and ',' not in flight_number


def mapper(record):
    from_key = record.get('city_from_key')
    to_key = record.get('city_to_key')

    if not (from_key and to_key):
        return
    if direct_flight(record['forward']):
        yield {
            'direction': '%s_%s' % (from_key, to_key),
            'flight_number': record['forward'],
        }
    if direct_flight(record['backward']):
        yield {
            'direction': '%s_%s' % (to_key, from_key),
            'flight_number': record['backward'],
        }


def most_frequent(key, records):
    c = Counter(map(itemgetter('direction'), records))
    yield dict(key, direction=c.most_common(1)[0][0])


def calculate_airline_intersections(records, logger):
    logger.info(u'Start')
    max_for_airline = 6
    processed_flights = set()
    unknown_airlines = set()
    airline_directions = defaultdict(lambda: set())

    records = list(records)

    iata_corrector = IATACorrector(logger=logger)
    company_ids_by_flight_number = iata_corrector.flight_numbers_to_carriers(
        [record['flight_number'] for record in records]
    )

    for record in records:
        try:
            flight_number = record['flight_number']
            company_id = company_ids_by_flight_number.get(flight_number)
            if not company_id:
                unknown_airlines.add(flight_number)
                continue
            processed_flights.add(flight_number)
            airline_directions[company_id].add(record['direction'])
        except Exception:
            logger.error('Error while parsing record %r', record)

    logger.info('Processed %d flights for %d airlines', len(processed_flights), len(airline_directions))

    airline_intersection_rates = {}
    for airline, directions in airline_directions.iteritems():

        direction_count = float(len(directions))
        intersection_rates = {
            a: len(directions.intersection(airline_directions[a])) / direction_count
            for a in airline_directions if a != airline
        }

        airline_intersection_rates[airline] = [
            (a, rate) for a, rate in
            sorted(intersection_rates.iteritems(), key=itemgetter(1), reverse=True)[:max_for_airline]
            if rate > 0.1
        ]

        logger.debug(
            '%r. Directions_count: %d. Similar airlines: %r',
            airline, len(directions), airline_intersection_rates[airline]
        )
    return airline_intersection_rates


def _main():
    import yt.wrapper as yt
    import yt.logger_config as yt_logger_config
    import yt.logger as yt_logger
    from django.db import transaction
    from travel.avia.admin.lib.logs import add_stdout_handler, create_current_file_run_log
    from travel.avia.admin.lib.yt_helpers import safe_tables_for_daterange, configure_wrapper
    from travel.avia.library.python.avia_data.models import SimilarAirlines

    log = logging.getLogger(__name__)
    create_current_file_run_log()
    from optparse import OptionParser

    optparser = OptionParser()
    optparser.add_option('-v', '--verbose', action='store_true')
    optparser.add_option('-d', '--days', dest='days', type='int',
                         help="number of last logs to aggregate", default=365)
    options, args = optparser.parse_args()

    if options.verbose:
        add_stdout_handler(log)

    else:
        yt_logger_config.LOG_LEVEL = 'WARNING'
        reload(yt_logger)

    @transaction.atomic
    def store_result(airlane_intersection_rates):
        log.info('Store SimilarAirlines to DB')
        SimilarAirlines.objects.all().delete()
        objects = []
        for airline, similar_airlines in airlane_intersection_rates.iteritems():
            for similar_airline, rate in similar_airlines:
                objects.append(SimilarAirlines(
                    airline_id=airline,
                    similar_airline_id=similar_airline,
                    score=rate,
                ))
        try:
            SimilarAirlines.objects.bulk_create(objects)
        except Exception:
            log.error('Error on saving SimilarAirline')
            raise

    def _main(tmp_table):
        now = datetime.now()
        left_dt = now - timedelta(days=options.days)
        source_tables = safe_tables_for_daterange(
            yt, '//home/rasp/logs/rasp-tickets-show-log',
            left_dt.date(), now.date()
        )

        log.info('Map show log tables: %s -> %s', source_tables, tmp_table)
        yt.run_map_reduce(
            mapper=mapper,
            reducer=most_frequent,
            source_table=source_tables,
            destination_table=tmp_table,
            sort_by='flight_number',
            reduce_by='flight_number',
            format=yt.DsvFormat()
        )

    try:
        configure_wrapper(yt)
        tmp_table = yt.create_temp_table()
        _main(tmp_table)
        airline_intersections = calculate_airline_intersections(
            yt.read_table(tmp_table, format=yt.DsvFormat(), raw=False), log
        )
        yt.remove(tmp_table)
        store_result(airline_intersections)

    except Exception:
        log.exception('Error:')
        sys.exit(1)

    log.info('Done')


def main():
    import travel.avia.admin.init_project  # noqa
    from travel.avia.admin.lib.script_helpers.script_runner import script_runner

    script_runner.run_in_process(
        _main, script_code=os.path.basename(__file__)[:-3]
    )
