# -*- encoding: utf-8 -*-
import functools
import operator
import sys

from collections import defaultdict
from copy import copy
from datetime import datetime
from optparse import Option, OptionParser, OptionValueError

import yt.wrapper as yt
import yt.logger_config as yt_logger_config
import yt.logger as yt_logger


def yt_last_logs_tables(path, days):
    tables = []
    for table in yt.search(path, node_type="table"):
        tables.append((
            datetime.strptime(table.split('/')[-1], '%Y-%m-%d'),
            table
        ))
    sorded_tables = sorted(tables, reverse=True)[:days]

    return [t[1] for t in sorded_tables]


def check_number(option, opt, value):
    try:
        days=int(value)

    except ValueError:
        raise OptionValueError(
            "option %s: bad number: %r" % (opt, value))

    return days


# Convert station to settlement
def convert_point(point_key, stations_map):
    if point_key.startswith('s'):
        return stations_map.get(point_key, None)

    else:
        return point_key


def main():
    import travel.avia.admin.init_project  # noqa

    import logging

    from django.conf import settings
    from django.db import transaction

    from travel.avia.library.python.avia_data.models import SimilarDirection
    from travel.avia.library.python.common.models.geo import Station
    from travel.avia.admin.lib.logs import add_stdout_handler, create_current_file_run_log
    from travel.avia.admin.lib.yt_helpers import configure_wrapper

    log = logging.getLogger(__name__)
    create_current_file_run_log()

    def search_map(stations_map, record):
        from_id = record['from_id']
        to_id = record['to_id']

        from_key = convert_point(from_id, stations_map)
        to_key = convert_point(to_id, stations_map)

        try:
            uuid = long(record.get('passportuid')) or long(record.get('yandexuid'))

        except ValueError:
            return

        if all([from_key, to_key, uuid, record.get('service') == 'ticket']):
            direction = '%s_%s' % (from_key, to_key)

            yield {
                'direction': direction,
                'uuid': uuid
            }

    def search_reduce(key, records):
        directions = set()

        for r in records:
            directions.add(r['direction'])

        if len(directions) > 1:
            yield {
                'uuid': key['uuid'],
                'directions': ','.join(directions)
            }

    def search_reverse_reduce(key, records):
        uuids = set()

        for r in records:
            uuids.add(r['uuid'])

        if len(uuids) > 1:
            yield {
                'direction': key['direction'],
                'uuids': ','.join(uuids)
            }

    def precached_objects():
        stations = Station.objects.filter(
            t_type__code='plane',
            settlement_id__isnull=False,
            hidden=False
        )

        stations = [s for s in stations if s.iata or s.sirena_id]

        return {s.point_key: s.settlement.point_key for s in stations}

    def prepare_data(stations_map, intermediate_table, tmp_table_data, tmp_table_index, log):
        source_tables = yt_last_logs_tables('//home/rasp/logs/rasp-users-search-log', options.days)

        log.info('Prepare intermediate table: %s -> %s', source_tables, intermediate_table)
        yt.run_map(
            functools.partial(search_map, stations_map),
            source_table=source_tables,
            destination_table=intermediate_table,
            format=yt.DsvFormat(),
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        log.info('Sort intermediate table: %s ', intermediate_table)
        yt.run_sort(
            source_table=intermediate_table,
            sort_by=['uuid'],
        )

        log.info('Build data table: %s ', tmp_table_data)
        yt.run_reduce(
            search_reduce,
            intermediate_table,
            tmp_table_data,
            format=yt.DsvFormat(),
            reduce_by=['uuid'],
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        log.info('Sort intermediate table: %s ', intermediate_table)
        yt.run_sort(
            source_table=intermediate_table,
            sort_by=['direction'],
        )

        log.info('Build index table: %s ', tmp_table_index)
        yt.run_reduce(
            search_reverse_reduce,
            intermediate_table,
            tmp_table_index,
            format=yt.DsvFormat(),
            reduce_by=['direction'],
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

    @transaction.atomic
    def make_results(tmp_table_data, tmp_table_index, log):
        log.info('Load data from %s', tmp_table_data)

        uuids_data = {}
        index_data = {}
        results = defaultdict(lambda: defaultdict(int))

        for record in yt.read_table(tmp_table_data, format=yt.DsvFormat(), raw=False):
            uuid = long(record['uuid'])
            directions = record['directions'].split(',')

            uuids_data[uuid] = directions

        log.info('Load index from %s', tmp_table_index)
        for record in yt.read_table(tmp_table_index, format=yt.DsvFormat(), raw=False):
            direction = record['direction']
            uuids = [long(u) for u in record['uuids'].split(',')]

            index_data[direction] = uuids

        all_directions = set()

        for directions in uuids_data.values():
            for direction in directions:
                all_directions.add(direction)

        log.info('Make data')
        for direction in all_directions:
            uuids = index_data.get(direction)

            if not uuids:
                continue

            for uuid in uuids:
                uuid_directions = uuids_data.get(uuid)

                if not uuid_directions:
                    continue

                for uuid_direction in uuid_directions:
                    results[direction][uuid_direction] += 1

        log.info('Save data')
        SimilarDirection.objects.all().delete()

        for direction, sim_dirs in results.items():
            # Выкидываем повторения
            filtered_sim_dirs = {k: v for k, v in sim_dirs.iteritems() if v > 1 and k != direction}
            sorted_sim_dirs = sorted(
                filtered_sim_dirs.items(),
                key=operator.itemgetter(1),
                reverse=True
            )[:options.quantity]

            for sd, count in sorted_sim_dirs:
                from_key, to_key = sd.split('_')

                similar_direction = SimilarDirection(
                    direction=direction,
                    similar_from_id=int(from_key[1:]),
                    similar_to_id=int(to_key[1:]),
                    count=count
                )

                try:
                    similar_direction.save()

                except Exception:
                    continue

        sd_count = SimilarDirection.objects.all().count()

        log.info('%s rows saved' % sd_count)

    # BEGIN main()
    optparser = OptionParser(option_class=Yoption)

    optparser.add_option('-v', '--verbose', action='store_true')
    optparser.add_option("-d", "--days", dest="days", type="days", help="number of last logs to aggregate", default=7)
    optparser.add_option("-n", "--number", dest="quantity", type="quantity", help="quantity of max directions", default=10)
    optparser.add_option("-p", "--proxy", dest="proxy", default=settings.YT_PROXY)

    options, args = optparser.parse_args()

    if options.verbose:
        add_stdout_handler(log)

    else:
        yt_logger_config.LOG_LEVEL = 'WARNING'
        reload(yt_logger)

    log.info('Start')

    try:
        configure_wrapper(yt)

        stations_map = precached_objects()

        log.info('Number of days: %s' % options.days)
        log.info('%s airports with settlemet loaded' % len(stations_map))

        log.info('---------------------------------')

        intermediate_table = yt.create_temp_table()

        tmp_table_data = yt.create_temp_table()

        tmp_table_index = yt.create_temp_table()

        prepare_data(stations_map, intermediate_table, tmp_table_data, tmp_table_index, log)

        make_results(tmp_table_data, tmp_table_index, log)

        yt.remove(intermediate_table)
        yt.remove(tmp_table_data)
        yt.remove(tmp_table_index)

    except Exception:
        log.exception('Error:')
        sys.exit(1)

    log.info('Done')


class Yoption(Option):
    TYPES = Option.TYPES + ("days", "quantity", "countries")
    TYPE_CHECKER = copy(Option.TYPE_CHECKER)
    TYPE_CHECKER["days"] = check_number
    TYPE_CHECKER["quantity"] = check_number
