# -*- encoding: utf-8 -*-
import csv
import os
import sys
from collections import defaultdict
from datetime import datetime, timedelta
from functools import partial
from itertools import chain
from optparse import OptionParser
from urlparse import urljoin

import requests

import yt.wrapper as yt
import yt.logger_config as yt_logger_config
import yt.logger as yt_logger

YT_VARIANTS_LOG_1_DAY_ROOT_PATH = '//logs/avia-variants-log/1d'
YT_VARIANTS_LOG_30_MIN_ROOT_PATH = '//logs/avia-variants-log/30min'
YT_RASP_VARIANTS_LOG_ROOT_PATH = '//logs/rasp-im-trainpricing-log/1d'
YT_TICKET_DAEMON_QUERY_LOG = '//logs/rasp-partners-query-log/1d'
YT_VARIANTS_LOG_MAPPER_RESULT_PATH = '//home/avia/air-traffic-stat/reduced-variants-log'
YT_RASP_VARIANTS_LOG_MAPPER_RESULT_PATH = '//home/avia/air-traffic-stat/reduced-rasp-variants-log'

DATA_PATH = os.path.join(os.path.dirname(__file__), 'data', 'after_covid_resurrection')
MAP_LITTLE_DOTS_CSV = os.path.join(DATA_PATH, 'little_dots.csv')
BIG_SETTLEMENTS_CSV = os.path.join(DATA_PATH, 'big_settlements.csv')

YAKS_YQL_QUERY = """
USE hahn;

$q = (
SELECT
    CAST(variants_len as Int64) as variants_len,
    -- qid,
    String::SplitToList(String::SplitToList(qid,".")[3],"_")[0] as from_point,
    String::SplitToList(String::SplitToList(qid,".")[3],"_")[1] as to_point,
    String::SplitToList(String::SplitToList(qid,".")[3],"_")[2] as forward_date,
    String::SplitToList(iso_eventtime, ' ')[0] as iso_event_day

FROM Range(`{source_table}`, `{start_date}`, `{end_date}`)
WHERE String::Contains(qid, 'None_economy_1_0_0_ru')
);

$sum = SELECT SUM(variants_len) as variants_sum, from_point, to_point, forward_date, iso_event_day
FROM $q WHERE String::StartsWith(from_point, 'c') AND String::StartsWith(to_point, 'c')
GROUP BY (from_point, to_point, forward_date, iso_event_day)
;

INSERT INTO `{destination_table}` WITH TRUNCATE
SELECT
    CAST(String::SplitToList(from_point, 'c')[1] as Int64) as from_settlement_id,
    CAST(String::SplitToList(to_point, 'c')[1] as Int64) as to_settlement_id,
    forward_date,
    iso_event_day,
    0 as direct_flight_routes,
    0 as connecting_flight_routes
FROM $sum WHERE variants_sum = 0;
"""


def ordered_group_by(iterable, key):
    from collections import OrderedDict
    groups = OrderedDict()
    for item in iterable:
        groups.setdefault(key(item), []).append(item)
    return groups.iteritems()


def read_csv_settlement_ids(csv_path):
    with open(csv_path, 'rb') as f:
        reader = csv.reader(f, delimiter=';')
        next(reader)  # Skip title
        for line in reader:
            yield int(line[0])


def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]


def reduce_by_key_with_min_price(key, records):
    min_price = min(r['national_price'] for r in records)
    yield dict(key, national_price=min_price)


def rasp_filter_and_prepare(express_code_to_settlement, record):
    if not record['cMinPrice']:
        return

    from_settlement_id = express_code_to_settlement.get(int(record['tOriginStationCode']))
    to_settlement_id = express_code_to_settlement.get(int(record['tDestinationStationCode']))
    if not from_settlement_id or not to_settlement_id or from_settlement_id == to_settlement_id:
        return

    yield {
        'from_settlement_id': from_settlement_id,
        'to_settlement_id': to_settlement_id,
        'forward_date': record['tDepartureDateTime'][:10],
        'direct_flight': True,
        'route_key': record['tDisplayTrainNumber'],
        'iso_event_day': record['iso_eventtime'][:10],
        'national_price': int(record['cMinPrice']),
    }


def get_rasp_allowed_settlements():
    from travel.avia.library.python.common.models.geo import Country, CityMajority, Settlement

    russian_region_capitals = set(Settlement.objects.filter(
        country_id=Country.RUSSIA_GEO_ID,
        majority_id__lte=CityMajority.REGION_CAPITAL_ID
    ).values_list('id', flat=True))
    little_dots_settlements = set(read_csv_settlement_ids(MAP_LITTLE_DOTS_CSV))
    big_settltements = set(read_csv_settlement_ids(BIG_SETTLEMENTS_CSV))
    return russian_region_capitals | little_dots_settlements | big_settltements


def filter_allowed_settlements(allowed_settlements, record):
    if record['from_settlement_id'] in allowed_settlements and record['to_settlement_id'] in allowed_settlements:
        yield record


def add_rasp_yaks(key, records):
    """Добавляем Яков в поезда"""
    today = datetime.now().date()

    completed_forward_dates = set()
    for record in records:
        completed_forward_dates.add(record['forward_date'])
        yield record

    for forward_delta in xrange(0, 60):
        yak_date = str(today + timedelta(days=forward_delta))
        if yak_date in completed_forward_dates:
            continue
        yield dict(
            key,
            forward_date=yak_date,
            iso_event_day=str(today),
            min_price=None,
            direct_flight_routes=0,
            connecting_flight_routes=0
        )


def filter_and_prepare(record):
    if record.get('date_backward'):
        return  # TODO(olegpro): пока игнорируем туда-обратно, можно добить для полноты, если не хватит только туда

    if 'None' not in record['query_id']:
        # TODO(olegpro): почему то условия на 'date_backward' недостаточно и остаются qkey с обратными датами c10493_c100_2020-12-28_2020-12-30_economy_1_0_0_ru
        return

    if record['national_version_id'] != 1:
        return  # TODO(olegpro): в других нац.версиях хуже полнота - индекс не получится нормальный, лучше игнорировать пока

    if record['class_id'] != 1:
        return  # Учитываем только поиски для эконома.

    if record['adults'] != 1 or record['children'] != 0 or record['infants'] != 0:
        return  # TODO(olegpro): пока игнорируем разные наборы пассажиров. Можно брать варианты сочетаний рейсов для полноты в индексе, но не пытаться высчитать минцену

    if record['from_airport_id'] or record['to_airport_id']:
        return  # TODO(olegpro): Игнорируем поиски до аэропортов, но можно приводить к городам и дополнять.

    if record['forward_count_transfers'] > 1:
        route_key = ';;'
    else:
        route_key = ';'.join(segment['route'] for segment in record['forward_segments'])

    qkey = record['query_id'].split('.')[3]
    yield {
        'from_settlement_id': record['from_settlement_id'],
        'to_settlement_id': record['to_settlement_id'],
        'forward_date': record['forward_date'],
        'direct_flight': bool(record['forward_count_transfers'] == 0),
        'route_key': route_key,
        'iso_event_day': record['iso_eventtime'][:10],
        'qkey': qkey,
        'national_price': int(record['national_price'] / 100),  # конвертируем копейки в рубли
    }


def combine_by_search_day(key, records):
    direct_flight_routes = set()
    connecting_flight_routes = set()
    min_price = None

    for record in records:
        if min_price is None:
            min_price = record['national_price']
        else:
            min_price = min(min_price, record['national_price'])

        if record['direct_flight']:
            direct_flight_routes.add(record['route_key'])
        else:
            connecting_flight_routes.add(record['route_key'])

    result = dict(
        key,
        min_price=min_price,
        direct_flight_routes=len(direct_flight_routes),
        connecting_flight_routes=len(connecting_flight_routes),
    )
    if not isinstance(key['forward_date'], str):  # Поправил, чтоб не менять для авиа, т.к. от распов приходит строка
        result['forward_date'] = str(datetime.fromtimestamp(key['forward_date']).date())
    yield result


def full_data_reducer(key, records):
    """
        Считаем отношение числа прямых рейсов за день вылета к максимальному числу прямых за историю поисков в окне

        Note: Подмешиваем лог с яками, в котором не заполнен min_price.
         За день поиска не должны совпадать як и запись в логе вариантов, но на всякий случай есть проверки тут.
        :return:
    """
    by_search_day = {}

    for record in records:
        result = {
            'min_price': record.get('min_price'),
            'direct_flight_routes': record['direct_flight_routes'],
            'connecting_flight_routes': record['connecting_flight_routes'],
        }
        if record['iso_event_day'] not in by_search_day or by_search_day[record['iso_event_day']]['min_price']:
            # Отдаем предпочтение тому, что был Як, если по логу вариантов не пусто, а по записи в ТД - Як
            by_search_day[record['iso_event_day']] = result

    if any(v['min_price'] for v in by_search_day.itervalues()):
        min_price = min(v['min_price'] for v in by_search_day.itervalues() if v['min_price'])
    else:
        min_price = None
    max_direct_flights_count = max(v['direct_flight_routes'] for v in by_search_day.itervalues())
    max_connecting_flights_count = max(v['connecting_flight_routes'] for v in by_search_day.itervalues())

    last_search_date = max(by_search_day)  # Todo(olegpro): Тут ещё можно подумать, чтоб считать дату последнего поиска с учетом Яков, а минимальную цену не заNULL-ять и брать последнюю
    event_day_aggregation = by_search_day[last_search_date]

    yield {
        'from_settlement_id': key['from_settlement_id'],
        'to_settlement_id': key['to_settlement_id'],
        'forward_date': key['forward_date'],
        'last_search_date': last_search_date,
        'direct_flights': event_day_aggregation['direct_flight_routes'],
        'connecting_flights': event_day_aggregation['connecting_flight_routes'],
        'last_min_price': event_day_aggregation['min_price'],
        'historical_max_direct_flights': max_direct_flights_count,
        'historical_max_connecting_flights': max_connecting_flights_count,
        'historical_min_price': min_price,
    }


def add_next_direct_flight_search_day(search_days_window, s2s_settlements, key, records):
    """
        Считаем отношение числа прямых рейсов за день вылета к максимальному числу прямых за историю поисков в окне
        Для каждой даты вылета по направлению проставляем, в какой день в будущем есть прямые рейсы(ограничиваемся поисками за последнюю неделю)
        :param int search_days_window: Для показа, что прямые рейсы открываются с даты X учитываются поиски не старше стольки дней
    """
    if key['to_settlement_id'] in s2s_settlements:
        return

    by_forward_date = {}

    for record in records:
        by_forward_date[record['forward_date']] = record

    actual_search_day = str((datetime.now() - timedelta(days=search_days_window)).date())
    direct_flight_days = calculate_next_direct_flight_day(by_forward_date, actual_search_day)

    new_coefficients_by_forward_date = calculate_coefficient(by_forward_date)

    today = str(datetime.now().date())
    for forward_date, record in by_forward_date.iteritems():
        # Фильтруем то, что уже вылетело, дальше не нужно, т.к. посчитаны нужные цифры про прошлое
        # Фильтруем то, что не искали на сайте больше search_days_window дней
        # Фильтруем то, по чему не было прямых рейсов за историю поиска в дату вылета и в неделю около даты вылета год назад
        if (
            forward_date >= today
            and record['last_search_date'] >= actual_search_day
            and (
                new_coefficients_by_forward_date[forward_date]['last_year_week_historical_max_direct_flights']
                or new_coefficients_by_forward_date[forward_date]['this_year_week_direct_flights']
            )
        ):
            yield dict(
                record,
                next_direct_flight_day=direct_flight_days.get(forward_date),
                **new_coefficients_by_forward_date[forward_date]
            )


def calculate_next_direct_flight_day(by_forward_date, actual_search_day):
    """Считаем в какой день в будущем для выбранного дня есть прямые рейсы"""
    direct_flight_days = {}
    next_direct_flight_day = None

    for forward_date in sorted(by_forward_date, reverse=True):
        record = by_forward_date[forward_date]
        if record['last_search_date'] > actual_search_day:  # todo(olegpro): Эту проверку можно вынести выше
            if record['direct_flights'] > 0:
                next_direct_flight_day = forward_date
        direct_flight_days[forward_date] = next_direct_flight_day
    return direct_flight_days


def calculate_coefficient(by_forward_date):
    """
    Рассчитываем коэффициент за +-3 дня от даты вылета с учетом прямых на те же даты год назад
    Устанавливаем коэффициент =5 если на эту дату или +-3 есть прямые рейсы, но не было прямых год назад на эти же даты
    """
    coefficients_by_forward_date = defaultdict(dict)

    for forward_date in by_forward_date:
        past_direct_flights = 0
        current_direct_flights = 0
        fwd_date = datetime.strptime(forward_date, '%Y-%m-%d').date()

        for days_shift in xrange(-3, 4):
            current_date = fwd_date + timedelta(days=days_shift)
            if current_date.day == 29 and current_date.month == 2:
                past_date = current_date.replace(year=current_date.year - 1, day=current_date.day - 1)
            else:
                past_date = current_date.replace(year=current_date.year - 1)

            past_date = str(past_date)
            current_date = str(current_date)

            past_direct_flights += by_forward_date.get(past_date, {}).get('historical_max_direct_flights', 0)
            current_direct_flights += by_forward_date.get(current_date, {}).get('direct_flights', 0)

        if past_direct_flights:
            coefficient = round((current_direct_flights * 5. / past_direct_flights), 1)
            coefficient = min(coefficient, 5.)
        elif current_direct_flights > 0:
            coefficient = 5.
        else:
            coefficient = 0.

        forward_date_statistic = coefficients_by_forward_date[forward_date]
        forward_date_statistic['coefficient'] = coefficient
        forward_date_statistic['last_year_week_historical_max_direct_flights'] = past_direct_flights
        forward_date_statistic['this_year_week_direct_flights'] = current_direct_flights

    return dict(coefficients_by_forward_date)


def add_city_coefficient(key, records):
    records = [record for record in records]
    actual_flights = sum(record['direct_flights'] for record in records)
    historical_max_flights = sum(record['historical_max_direct_flights'] for record in records)
    if not historical_max_flights:
        actual_flights = sum(record['connecting_flights'] for record in records)
        historical_max_flights = sum(record['historical_max_connecting_flights'] for record in records)

    coefficient = round(5. * (float(actual_flights) / historical_max_flights), 1) if historical_max_flights else 0.

    for record in records:
        yield dict(record, city_coefficient=coefficient)


class ResultsWriter(object):
    def __init__(self, yt_client, logger):
        self._yt_client = yt_client
        self._logger = logger

    @staticmethod
    def _parse_date(date):
        return datetime.strptime(date, '%Y-%m-%d').date() if date else None

    def _fill_model(self, air_traffic_stat_model, record, transport):
        air_traffic_stat_model.departure_settlement_id = record['from_settlement_id']
        air_traffic_stat_model.arrival_settlement_id = record['to_settlement_id']
        air_traffic_stat_model.date_forward = self._parse_date(record['forward_date'])
        air_traffic_stat_model.last_search_date = self._parse_date(record['last_search_date'])
        air_traffic_stat_model.direct_flights = record['direct_flights']
        air_traffic_stat_model.connecting_flights = record['connecting_flights']
        air_traffic_stat_model.last_min_price = record['last_min_price']
        air_traffic_stat_model.historical_max_direct_flights = record['historical_max_direct_flights']
        air_traffic_stat_model.historical_max_connecting_flights = record['historical_max_connecting_flights']
        air_traffic_stat_model.historical_min_price = record['historical_min_price']
        air_traffic_stat_model.next_direct_flight_day = self._parse_date(record['next_direct_flight_day'])
        air_traffic_stat_model.coefficient = record['coefficient']
        air_traffic_stat_model.city_coefficient = record['city_coefficient']
        air_traffic_stat_model.transport = transport

    def _write_rows(self, settlement_ids, table_path, transport):
        from travel.avia.library.python.avia_data.models.air_traffic_recovery import AirTrafficRecoveryStat, TransportType
        from django.db import transaction

        objects = []
        self._logger.info('Read %s', table_path)
        for record in self._yt_client.read_table(table_path, format=yt.JsonFormat(), raw=False):
            if not (int(record['from_settlement_id']) in settlement_ids and int(record['to_settlement_id']) in settlement_ids):
                continue

            at = AirTrafficRecoveryStat()
            self._fill_model(at, record, transport)
            objects.append(at)

        self._logger.info('Save %s statistic', TransportType(transport).name)
        with transaction.atomic():
            AirTrafficRecoveryStat.objects.filter(transport=transport).delete()
            AirTrafficRecoveryStat.objects.bulk_create(objects, batch_size=10000)

        self._logger.info('%d %s lines updated', len(objects), TransportType(transport).name)

    def upload_to_db(self, avia_results_table_path, rasp_results_table_path):
        from travel.avia.library.python.avia_data.models.air_traffic_recovery import TransportType
        from travel.avia.library.python.common.models.geo import Settlement

        settlement_ids = set(Settlement.objects.all().values_list('id', flat=True))

        self._write_rows(settlement_ids, avia_results_table_path, TransportType.plane.value)
        self._write_rows(settlement_ids, rasp_results_table_path, TransportType.train.value)


def get_s2s_settlements():
    from travel.avia.library.python.common.models.geo import Station2Settlement

    exclusions = {
        99,  # Мюнхен
        213,  # Москва
        10448,  # Милан
        10441,  # Тенерифе
        11511,  # Анталья
        21275,  # Монастир
    }
    return set(Station2Settlement.objects.all().values_list('settlement_id', flat=True)) - exclusions


class Heater(object):
    EXP_CONFIG_NAME = 'kudamozhno'
    LAST_SEARCH_DATE_DELTA = 13
    MAX_FORWARD_DELTA = 60  # Число дней, на которое греем направления из Москвы и Петербурга
    MAX_EMPTY_DATES_DIRECTIONS = 50000

    def __init__(self, logger):
        self._logger = logger

    def heat(self):
        config = list(chain(self.old_search_directions(), self.mow_led_directions(), self.empty_dates_directions()))
        self._logger.info('Prepared %d directions for YEAH', len(config))

        self._post_config(config, self.EXP_CONFIG_NAME)

    def empty_dates_directions(self):
        from travel.avia.library.python.avia_data.models.air_traffic_recovery import AirTrafficRecoveryStat, TransportType

        directions_cnt = 0
        today = datetime.now().date()

        directions = (
            AirTrafficRecoveryStat.objects.filter(
                transport=TransportType.plane.value,
                historical_max_direct_flights__gt=0,
                date_forward__lte=today + timedelta(days=self.MAX_FORWARD_DELTA),
            )
            .exclude(departure_settlement_id__in={2, 213})  # LED и MOW греем отдельно безусловно
            .order_by('-last_search_date')
            .values_list('departure_settlement_id', 'arrival_settlement_id', 'date_forward')
        )

        for direction_key, group in ordered_group_by(directions, key=lambda x: x[:2]):
            completed_forward_dates = set()
            for departure_settlement_id, arrival_settlement_id, date_forward in group:
                completed_forward_dates.add(date_forward)

            max_forward_delta = self.MAX_FORWARD_DELTA if len(group) > self.MAX_FORWARD_DELTA / 2 else self.MAX_FORWARD_DELTA / 2
            for forward_delta in xrange(0, max_forward_delta):
                yak_date = today + timedelta(days=forward_delta)
                if yak_date in completed_forward_dates:
                    continue
                yield dict(
                    experiment='empty-dates',
                    national_version='ru',
                    lang='ru',
                    travel_time=0,
                    req_count=1,
                    code_from='c%d' % direction_key[0],
                    code_to='c%d' % direction_key[1],
                    min_forward_delta=forward_delta,
                    max_forward_delta=forward_delta,
                )

                directions_cnt += 1
                if directions_cnt == self.MAX_EMPTY_DATES_DIRECTIONS:
                    return

    def old_search_directions(self):
        from travel.avia.library.python.avia_data.models.air_traffic_recovery import AirTrafficRecoveryStat, TransportType

        now = datetime.now().date()

        directions = AirTrafficRecoveryStat.objects.filter(
            transport=TransportType.plane.value,
            last_search_date__lte=(datetime.now().date() - timedelta(days=self.LAST_SEARCH_DATE_DELTA)),
            direct_flights__gt=0,
            departure_settlement_id__in=self.settlements(),
        ).values_list('departure_settlement_id', 'arrival_settlement_id', 'date_forward')

        for departure_settlement_id, arrival_settlement_id, date_forward in directions:
            date_forward_delta = (date_forward - now).days
            if date_forward_delta <= 0:
                continue
            departure_settlement_key = 'c%d' % departure_settlement_id
            arrival_settlement_key = 'c%d' % arrival_settlement_id

            yield dict(
                experiment='old-searches',
                national_version='ru',
                lang='ru',
                travel_time=0,
                req_count=1,
                code_from=departure_settlement_key,
                code_to=arrival_settlement_key,
                min_forward_delta=date_forward_delta,
                max_forward_delta=date_forward_delta,
            )

    def mow_led_directions(self):
        from travel.avia.library.python.avia_data.models.air_traffic_recovery import AirTrafficRecoveryStat, TransportType

        directions = set(AirTrafficRecoveryStat.objects.filter(
            transport=TransportType.plane.value,
            departure_settlement_id__in={213, 2},
        ).values_list('departure_settlement_id', 'arrival_settlement_id'))

        for departure_settlement_id, arrival_settlement_id in directions:
            yield dict(
                experiment='russian-capitals',
                national_version='ru',
                lang='ru',
                travel_time=0,
                req_count=1,
                code_from='c%d' % departure_settlement_id,
                code_to='c%d' % arrival_settlement_id,
                min_forward_delta=0,
                max_forward_delta=self.MAX_FORWARD_DELTA,
            )

    def _post_config(self, directions, config_name):
        from django.conf import settings

        if not settings.YEAH_URL:
            self._logger.warning('YEAH_Url is not set')
        else:
            response = requests.post(
                urljoin(settings.YEAH_URL, 'heater_config/custom/{}'.format(config_name)),
                json=directions
            )
            if response.status_code != 200:
                self._logger.error(
                    'YEAH Error. Status code: %d, message: %r',
                    response.status_code, response.content
                )

    @staticmethod
    def settlements():
        from travel.avia.library.python.common.models.geo import Settlement, CityMajority, Country

        return tuple(Settlement.objects.filter(
            country_id=Country.RUSSIA_GEO_ID,
            majority_id__lte=CityMajority.REGION_CAPITAL_ID
        ).values_list('id', flat=True)) + (
            10335,  # Ташкент
            22177,  # Алматы
            163,  # Нур - Султан
        )


def main():
    import travel.avia.admin.init_project  # noqa

    import logging

    from django.conf import settings

    from yql.api.v1.client import YqlClient

    from travel.avia.admin.lib import yql_helpers
    from travel.avia.admin.lib.logs import add_stdout_handler, create_current_file_run_log
    from travel.avia.admin.lib.yt_helpers import configure_wrapper, last_logs_tables, temp_table, safe_tables_for_daterange

    logger = logging.getLogger(__name__)
    create_current_file_run_log()

    def update_rasp_statistic(ytc, yql, options, source_tables, destination_table):
        primary_reduce_key = (
            'from_settlement_id', 'to_settlement_id', 'forward_date', 'iso_event_day', 'direct_flight', 'route_key',
        )
        if not ytc.exists(YT_RASP_VARIANTS_LOG_MAPPER_RESULT_PATH):
            ytc.create(
                'table',
                YT_RASP_VARIANTS_LOG_MAPPER_RESULT_PATH,
                recursive=True,
                attributes={
                    'schema': [
                        {'name': 'from_settlement_id', 'type': 'int64'},
                        {'name': 'to_settlement_id', 'type': 'int64'},
                        {'name': 'forward_date', 'type': 'string'},
                        {'name': 'direct_flight', 'type': 'boolean'},
                        {'name': 'route_key', 'type': 'string'},
                        {'name': 'iso_event_day', 'type': 'string'},
                        {'name': 'national_price', 'type': 'int64'},
                    ],
                    'optimize_for': 'scan',
                },
            )
            ytc.run_sort(
                source_table=YT_RASP_VARIANTS_LOG_MAPPER_RESULT_PATH,
                sort_by=primary_reduce_key,
            )

        if options.days > 0:
            for source_tables_chunk in chunks(source_tables, 80):
                update_rasp_cached_map_result(primary_reduce_key, source_tables_chunk, ytc, yql)
        else:
            logger.info('Skip updating %s', YT_RASP_VARIANTS_LOG_MAPPER_RESULT_PATH)

        with temp_table(ytc) as reduced_table_path:
            allowed_settlements = get_rasp_allowed_settlements()
            ytc.run_map_reduce(
                mapper=partial(filter_allowed_settlements, allowed_settlements),
                reducer=combine_by_search_day,
                source_table=YT_RASP_VARIANTS_LOG_MAPPER_RESULT_PATH,
                destination_table=reduced_table_path,
                reduce_by=('from_settlement_id', 'to_settlement_id', 'forward_date', 'iso_event_day'),
            )  # Предварительный подсчет агрегатов

            ytc.run_map_reduce(
                mapper=None,
                reducer=add_rasp_yaks,
                source_table=reduced_table_path,
                destination_table=reduced_table_path,
                reduce_by=('from_settlement_id', 'to_settlement_id'),
            )

            ytc.run_map_reduce(
                mapper=None,
                reducer=full_data_reducer,
                source_table=reduced_table_path,
                destination_table=destination_table,
                reduce_by=('from_settlement_id', 'to_settlement_id', 'forward_date'),
            )

        ytc.run_map_reduce(
            mapper=None,
            reducer=partial(add_next_direct_flight_search_day, options.search_days_window, set()),
            source_table=destination_table,
            destination_table=destination_table,
            reduce_by=('from_settlement_id', 'to_settlement_id'),
        )

        ytc.run_map_reduce(
            mapper=None,
            reducer=add_city_coefficient,
            source_table=destination_table,
            destination_table=destination_table,
            reduce_by=('from_settlement_id', 'forward_date'),
        )

    def calc_direction_statistic(ytc, yql, options, source_tables, destination_table):
        primary_reduce_key = (
            'from_settlement_id', 'to_settlement_id', 'forward_date', 'iso_event_day',
            'qkey', 'direct_flight', 'route_key',
        )
        if not ytc.exists(YT_VARIANTS_LOG_MAPPER_RESULT_PATH):
            ytc.create(
                'table',
                YT_VARIANTS_LOG_MAPPER_RESULT_PATH,
                recursive=True,
                attributes={
                    'schema': [
                        {'name': 'from_settlement_id', 'type': 'int64'},
                        {'name': 'to_settlement_id', 'type': 'int64'},
                        {'name': 'forward_date', 'type': 'int64'},
                        {'name': 'qkey', 'type': 'string'},
                        {'name': 'direct_flight', 'type': 'boolean'},
                        {'name': 'route_key', 'type': 'string'},
                        {'name': 'iso_event_day', 'type': 'string'},
                        {'name': 'national_price', 'type': 'int64'},
                    ],
                    'optimize_for': 'scan',
                },
            )
            ytc.run_sort(
                source_table=YT_VARIANTS_LOG_MAPPER_RESULT_PATH,
                sort_by=primary_reduce_key,
            )

        if options.days > 0:
            for source_tables_chunk in chunks(source_tables, 80):
                update_cached_map_result(primary_reduce_key, source_tables_chunk, ytc, yql)
        else:
            logger.info('Skip updating %s', YT_VARIANTS_LOG_MAPPER_RESULT_PATH)

        with temp_table(ytc) as reduced_table_path:
            ytc.run_reduce(
                combine_by_search_day,
                source_table=YT_VARIANTS_LOG_MAPPER_RESULT_PATH,
                destination_table=reduced_table_path,
                reduce_by=('from_settlement_id', 'to_settlement_id', 'forward_date', 'iso_event_day'),
            )  # Предварительный подсчет агрегатов

            with temp_table(ytc) as yaks_table_path:
                query = YAKS_YQL_QUERY.format(
                    source_table=YT_TICKET_DAEMON_QUERY_LOG,
                    start_date=datetime.now().date() - timedelta(days=options.search_days_window),
                    end_date=datetime.now().date(),
                    destination_table=yaks_table_path,
                )

                operation = yql.query(query, syntax_version=1)
                operation.run()
                operation.wait_progress()
                if not operation.is_success:
                    yql_helpers.log_errors(operation, logger)
                    sys.exit(1)

                ytc.run_map_reduce(
                    mapper=None,
                    reducer=full_data_reducer,
                    source_table=(reduced_table_path, yaks_table_path),
                    destination_table=destination_table,
                    reduce_by=('from_settlement_id', 'to_settlement_id', 'forward_date'),
                )

        ytc.run_map_reduce(
            mapper=None,
            reducer=partial(add_next_direct_flight_search_day, options.search_days_window, get_s2s_settlements()),
            source_table=destination_table,
            destination_table=destination_table,
            reduce_by=('from_settlement_id', 'to_settlement_id'),
        )

        ytc.run_map_reduce(
            mapper=None,
            reducer=add_city_coefficient,
            source_table=destination_table,
            destination_table=destination_table,
            reduce_by=('from_settlement_id', 'forward_date'),
        )

    def update_cached_map_result(primary_reduce_key, source_tables, ytc, yql):
        with temp_table(ytc) as tmp_table:
            ytc.run_map(
                filter_and_prepare,
                source_table=source_tables,
                destination_table=tmp_table,
            )  # фильтруем то, что нам не нужно из логов
            ytc.run_sort(
                source_table=tmp_table,
                sort_by=primary_reduce_key,
            )
            with temp_table(ytc) as reduced_map_table_path:
                ytc.run_reduce(
                    reduce_by_key_with_min_price,
                    source_table=(tmp_table, YT_VARIANTS_LOG_MAPPER_RESULT_PATH),
                    destination_table=reduced_map_table_path,
                    # YT_VARIANTS_LOG_MAPPER_RESULT_PATH Output table //home/avia/air-traffic-stat/reduced-variants-log is not sorted: job outputs have overlapping key ranges
                    reduce_by=primary_reduce_key,
                )  # удаляем дубли для уменьшения сложности reducer-а

                query = '''
                    USE hahn;
                    PRAGMA yt.InferSchema = '1';
                    INSERT INTO `{destination_table}` WITH TRUNCATE
                    SELECT * FROM `{source_table}`;
                '''.format(
                    source_table=reduced_map_table_path,
                    destination_table=YT_VARIANTS_LOG_MAPPER_RESULT_PATH,
                )
                operation = yql.query(query, syntax_version=1)
                operation.run()
                operation.wait_progress()
                if not operation.is_success:
                    yql_helpers.log_errors(operation, logger)
                    sys.exit(1)

        ytc.run_sort(
            source_table=YT_VARIANTS_LOG_MAPPER_RESULT_PATH,
            sort_by=primary_reduce_key,
        )

    def update_rasp_cached_map_result(primary_reduce_key, source_tables, ytc, yql):
        from travel.avia.library.python.common.models.geo import StationCode

        express_code_to_settlement = {
            int(code): settlement_id for code, settlement_id in StationCode.objects
            .filter(system_id=2).values_list('code', 'station__settlement')
        }  # Система кодирования Экспресс

        with temp_table(ytc) as tmp_table:
            ytc.run_map(
                partial(rasp_filter_and_prepare, express_code_to_settlement),
                source_table=source_tables,
                destination_table=tmp_table,
            )  # фильтруем то, что нам не нужно из логов
            ytc.run_sort(
                source_table=tmp_table,
                sort_by=primary_reduce_key,
            )
            with temp_table(ytc) as reduced_map_table_path:
                ytc.run_reduce(
                    reduce_by_key_with_min_price,
                    source_table=(tmp_table, YT_RASP_VARIANTS_LOG_MAPPER_RESULT_PATH),
                    destination_table=reduced_map_table_path,
                    reduce_by=primary_reduce_key,
                )  # удаляем дубли для уменьшения сложности reducer-а

                query = '''
                    USE hahn;
                    PRAGMA yt.InferSchema = '1';

                    INSERT INTO `{destination_table}` WITH TRUNCATE
                    SELECT * FROM `{source_table}`;
                '''.format(
                    source_table=reduced_map_table_path,
                    destination_table=YT_RASP_VARIANTS_LOG_MAPPER_RESULT_PATH,
                )
                operation = yql.query(query, syntax_version=1)
                operation.run()
                operation.wait_progress()
                if not operation.is_success:
                    yql_helpers.log_errors(operation, logger)
                    sys.exit(1)

        ytc.run_sort(
            source_table=YT_RASP_VARIANTS_LOG_MAPPER_RESULT_PATH,
            sort_by=primary_reduce_key,
        )

    # BEGIN main()
    optparser = OptionParser()

    optparser.add_option('-v', '--verbose', action='store_true')
    optparser.add_option('-d', '--days', dest='days', type='int', help='number of last logs to aggregate', default=2)
    optparser.add_option('-w', '--search-days-window', dest='search_days_window', type='int', help='', default=14)

    options, args = optparser.parse_args()

    if options.verbose:
        add_stdout_handler(logger)

    else:
        yt_logger_config.LOG_LEVEL = 'WARNING'
        reload(yt_logger)

    logger.info('Start')

    try:
        configure_wrapper(yt)
        yql_client = YqlClient(token=settings.YQL_TOKEN)
        logger.info('Number of days: %s' % options.days)
        avia_source_tables = last_logs_tables(yt, YT_VARIANTS_LOG_1_DAY_ROOT_PATH, options.days)

        if options.days:
            today = datetime.now().date()
            avia_source_tables.extend(safe_tables_for_daterange(
                yt, YT_VARIANTS_LOG_30_MIN_ROOT_PATH, today, today + timedelta(days=1), date_format='%Y-%m-%dT%H:%M:%S'
            ))

        logger.info('Start operation with %d tables', len(avia_source_tables))
        avia_destination_tmp_table = yt.create_temp_table()
        calc_direction_statistic(yt, yql_client, options, avia_source_tables, avia_destination_tmp_table)

        rasp_source_tables = sorted(last_logs_tables(yt, YT_RASP_VARIANTS_LOG_ROOT_PATH, options.days))
        logger.info('Start trains operation with %d tables', len(rasp_source_tables))
        rasp_destination_tmp_table = yt.create_temp_table()
        update_rasp_statistic(yt, yql_client, options, rasp_source_tables, rasp_destination_tmp_table)

        writer = ResultsWriter(yt, logger)
        writer.upload_to_db(avia_destination_tmp_table, rasp_destination_tmp_table)

        if settings.ENVIRONMENT == 'production':
            logger.info('Start preparing YEAH config')
            heater = Heater(logger)
            heater.heat()

    except Exception:
        logger.exception('Error:')
        sys.exit(1)

    logger.info('Done')
