# coding: utf8
""" Выгрузка статистики поисковых форм морды/тача Расписаний и морды Авиа. """
import logging
from collections import defaultdict

import yt
from django.conf import settings

from common.data_api.search_stats.search_stats_saver import SearchStatsSaver
from common.db.mongo import databases
from common.models.geo import Station
from common.utils.lock import lock
from travel.rasp.library.python.common23.logging import log_run_time, create_current_file_run_log
from travel.rasp.suggests_tasks.suggests.generate.ytwork.search_stat_runner import create_client, StatTablesHelper, SearchStatRunner
from travel.rasp.suggests_tasks.suggests.storage import Storage
from travel.rasp.suggests_tasks.suggests.utils import enumer

logger = logging.getLogger('yt_search_stat')
client = create_client(proxy=settings.YT_CLUSTER, token=settings.YT_TOKEN)


def get_geoid(row):
    try:
        return int(row.get('geoid'))
    except (ValueError, TypeError):
        return None


def get_stat_from_yt(table):
    by_obj = defaultdict(lambda: defaultdict(dict))

    for row in enumer(client.read_table(table, format='json'), each=10 ** 5):
        obj_type, obj_id = row['obj_id'][0], int(row['obj_id'][1:])
        t_type = row['transport_type']
        geoid = get_geoid(row)
        total = int(row['total'])
        by_obj[(obj_type, obj_id)][t_type][geoid] = total

    return by_obj


def get_routes_stat_from_yt(table):
    routes = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

    for row in enumer(client.read_table(table, format='json'), each=10 ** 5):
        obj_from_key = row['from_id'][0], int(row['from_id'][1:])
        obj_to_key = row['to_id'][0], int(row['to_id'][1:])

        t_type = row['transport_type']
        geoid = get_geoid(row)
        total = int(row['total'])

        routes[t_type][obj_from_key][obj_to_key][geoid] = total

    return routes


def gen_stat(stations_settlements, days_back, yt_path_prefix):

    yt_work_path = '{}/{}/search_stat'.format(settings.YT_ROOT_PATH, yt_path_prefix)

    source_tables = StatTablesHelper(client, settings.YT_SEARCH_LOG_PATH).get_dates_back(days_back)
    suggests_stat = SearchStatRunner(client, source_tables, stations_settlements, yt_work_path)
    suggests_stat.run()

    def get_table(name, **kwargs):
        table_path = '{}/{}'.format(yt_work_path, name)
        return yt.wrapper.TablePath(table_path, client=client, **kwargs)

    stat_table, routes_table = get_table('stat_result'), get_table('routes_result')
    source_tables = StatTablesHelper(client, settings.YT_SEARCH_LOG_PATH).get_dates_back(days_back)
    with log_run_time('get_stat_from_yt', logger=logger):
        stat_by_obj = get_stat_from_yt(stat_table)

    with log_run_time('get_routes_stat_from_yt', logger=logger):
        routes_stat = get_routes_stat_from_yt(routes_table)

    return {
        'source_tables': sorted(source_tables),
        'by_obj': stat_by_obj,
        'routes': routes_stat,
    }


def get_stations_settlements():
    stations_settlements = {
        # s['id'] сохраняется в json как строка, s['settlement_id'] как число
        str(s['id']): s['settlement_id']
        for s in Station.objects.values('id', 'settlement_id')
    }
    return stations_settlements


@lock('suggests_yt_search_stat', timeout=3600)
def main(work_dir, ytprefix=None, days=90):
    try:
        if ytprefix is None:
            ytprefix = settings.YT_PREFIX

        with log_run_time('ytstat', logger=logger):
            stations_settlements = get_stations_settlements()
            stat = gen_stat(stations_settlements, days_back=days, yt_path_prefix=ytprefix)

            storage = Storage(work_dir)
            storage.save_stat(stat)

            # сохраняем в монгу данные о популярных направлениях
            search_stats_saver = SearchStatsSaver(
                databases['default_no_timeout'].search_stats_from,
                databases['default_no_timeout'].search_stats_to,
                top_size=50,
                logger=logger
            )
            with log_run_time('save_routes_stat_to_mongo', logger=logger):
                search_stats_saver.save_stats(stat['routes'])

    except Exception as ex:
        logger.error(repr(ex), exc_info=True)


def run(*args, **kwargs):
    create_current_file_run_log()
    main(*args, **kwargs)


if __name__ == '__main__':
    run()
