# -*- encoding: utf-8 -*-
import travel.avia.admin.init_project  # noqa

import abc
import argparse
import hashlib
import itertools
import logging
import os
import time
from collections import OrderedDict, Counter, namedtuple
from datetime import datetime, timedelta
from operator import itemgetter

import ujson
from django.conf import settings
import pandas as pd

from yql.api.v1.client import YqlClient
from yql.client.parameter_value_builder import YqlParameterValueBuilder as ValueBuilder

from travel.avia.admin.lib.logs import add_stdout_handler, create_current_file_run_log
from travel.avia.admin.lib.yt_helpers import yt_client_fabric


logger = logging.getLogger(__name__)

YT_AVIA_HOME = '//home/avia/'
YT_TMP_DIRECTORY = YT_AVIA_HOME + 'tmp/'
REDIRECT_QUERY_FILE = os.path.join(os.path.dirname(__file__), 'misses_by_TTL.sql')
PRICES_RELEVANCE_QUERY_FILE = os.path.join(os.path.dirname(__file__), 'revise_price_relevance.sql')
LOOK2BOOK_QUERY_FILE = os.path.join(os.path.dirname(__file__), 'look2book.sql')
DEFAULT_START_DAY = datetime(2019, 7, 4)
QUERY_SOURCES = ('wizard', 'portal', 'rasp')
ALL = 'all'
SOURCES = QUERY_SOURCES + (ALL,)
BY_DEPTH_MISSES_KEY = 'by_depth_misses'
MAGIC_L2B_CONSTANT = 100  # среднее число бронирований среднего партнера за день

# globals
RELEVANCE_STORAGE = None
BOOKS_TO_SEARCH_DEPTH = None
TTL_COUNTERS = OrderedDict()
CONST_TTLS = (30, 60, 120, 240, 360, 480, 600, 720, 1080, 1440)

Query = namedtuple(
    'Query', (
        'from_id', 'to_id', 'date_forward', 'date_backward', 'klass', 'adults',
        'children', 'infants', 'national_version', 'date_forward_unixtime'
    )
)


def naive_datetime_to_timestamp(dt, epoch_date=datetime(1970, 1, 1)):
    td = dt - epoch_date
    return int(td.total_seconds())


def create_empty_counter():
    r = {source: {'misses': 0, 'hits': 0, 'relevance': 0} for source in SOURCES}
    r[BY_DEPTH_MISSES_KEY] = {}
    return r


def parse_qkey(qkey):
    from_id, to_id, date_forward, date_backward, klass, adults, children, infants, national_version = qkey.split('_')
    date_forward = datetime(*map(int, date_forward.split('-')))
    date_backward = None if date_backward in {'None', 'null', ''} else datetime(*map(int, date_backward.split('-')))
    date_forward_unixtime = naive_datetime_to_timestamp(date_forward)
    return Query(
        from_id, to_id, date_forward, date_backward, klass, adults, children, infants, national_version, date_forward_unixtime
    )


def interval_by_depth(depth):
    if depth < 14:
        return str(depth)
    elif 14 <= depth < 21:
        return '14-21'
    elif 21 <= depth < 28:
        return '21-28'
    elif 28 <= depth < 35:
        return '28-35'
    elif 35 <= depth < 42:
        return '35-42'
    elif 42 <= depth < 49:
        return '42-49'
    elif 49 <= depth < 56:
        return '49-56'
    elif 56 <= depth < 63:
        return '56-63'
    else:
        return '63+'


class TTLCounter(object):
    __metaclass__ = abc.ABCMeta

    @abc.abstractmethod
    def __call__(self, query, timestamps):
        """
        Для Заданных парметров кэша рассчитывает, сколько было кэш-хитов/миссов
         по данному запросу
        :param Query query:
        :param typing.List[typing.List[int]] timestamps: Пользовательские поиски с источником.
         Должны быть остортированы по возрастанию
         ((100, 'wizard'), (200, 'rasp'), (201, 'portal'))
        :return dict: {'misses': 1000, 'hits': 10}
        """


class TTLCounterResultUtils(object):
    @staticmethod
    def calc_depth(query, timestamp):
        diff_days = (query.date_forward_unixtime - timestamp) / 86400.
        # diff = query.date_forward - datetime.fromtimestamp(timestamp)
        # return diff.days if diff.days > 0 else 0
        return int(diff_days) if diff_days > 0 else 0

    @staticmethod
    def add_miss(result, source, query, timestamp, result_page_created_at):
        result[ALL]['misses'] += 1
        result[source]['misses'] += 1

        price_relevance = RELEVANCE_STORAGE.get_relevance(query, timestamp,
                                                          result_page_created_at)
        result[ALL]['relevance'] += price_relevance
        result[source]['relevance'] += price_relevance

        depth = TTLCounterResultUtils.calc_depth(query, timestamp)
        # if depth not in result[BY_DEPTH_MISSES_KEY]:
        #     result[BY_DEPTH_MISSES_KEY][depth] = 0
        # result[BY_DEPTH_MISSES_KEY][depth] += 1

        depth = interval_by_depth(depth)
        if depth not in result[BY_DEPTH_MISSES_KEY]:
            result[BY_DEPTH_MISSES_KEY][depth] = 0
        result[BY_DEPTH_MISSES_KEY][depth] += 1

    @staticmethod
    def add_hit(result, source, query, timestamp, result_page_created_at):
        result[ALL]['hits'] += 1
        result[source]['hits'] += 1

        price_relevance = RELEVANCE_STORAGE.get_relevance(query, timestamp,
                                                          result_page_created_at)
        result[ALL]['relevance'] += price_relevance
        result[source]['relevance'] += price_relevance


class ConstTTLCounter(TTLCounter):
    def __init__(self, ttl):
        """

        :param int ttl: in minutes
        """
        self.ttl = ttl
        self._ttl_secs = self.ttl * 60

    def __str__(self):
        return 'ttl={}'.format(self.ttl)

    def __call__(self, query, timestamps):
        result = create_empty_counter()

        ttl_start = None
        for ts, source in timestamps:
            if not ttl_start or ts > (ttl_start + self._ttl_secs):
                ttl_start = ts
                TTLCounterResultUtils.add_miss(result, source, query, ts, ttl_start)
            else:
                TTLCounterResultUtils.add_hit(result, source, query, ts, ttl_start)

        return result


class TwoTTLCounter(TTLCounter):
    def __init__(self, short_ttl, long_ttl, depth):
        self.short_ttl = short_ttl
        self.long_ttl = long_ttl
        self._short_ttl_secs = short_ttl * 60
        self._long_ttl_secs = long_ttl * 60
        self.depth = depth

    def __str__(self):
        return 'ttl={}/{} depth={}'.format(
            self.short_ttl, self.long_ttl, self.depth,
        )

    def __call__(self, query, timestamps):
        result = create_empty_counter()

        ttl_start = None
        for ts, source in timestamps:
            days_until_departure = TTLCounterResultUtils.calc_depth(query, ts)

            ttl = self._short_ttl_secs if days_until_departure <= self.depth else self._long_ttl_secs

            if not ttl_start or ts > ttl_start + ttl:
                ttl_start = ts
                TTLCounterResultUtils.add_miss(result, source, query, ts, ttl_start)
            else:
                TTLCounterResultUtils.add_hit(result, source, query, ts, ttl_start)

        return result


class ThreeTTLCounter(TTLCounter):
    def __init__(self, short_ttl, long_ttl, very_long_ttl, depth, depth_multiplier):
        self.short_ttl = short_ttl
        self.long_ttl = long_ttl
        self.very_long_ttl = very_long_ttl
        self._short_ttl_secs = short_ttl * 60
        self._long_ttl_secs = long_ttl * 60
        self._very_long_ttl_secs = very_long_ttl * 60
        self.depth = depth
        self.very_long_depth = depth * depth_multiplier

    def __str__(self):
        return 'ttl={}/{}/{} depth={}/{}'.format(
            self.short_ttl, self.long_ttl, self.very_long_ttl, self.depth,
            self.very_long_depth
        )

    def __call__(self, query, timestamps):
        result = create_empty_counter()

        ttl_start = None
        for ts, source in timestamps:
            days_until_departure = TTLCounterResultUtils.calc_depth(query, ts)

            ttl = self._calc_ttl(days_until_departure)

            if not ttl_start or ts > ttl_start + ttl:
                ttl_start = ts
                TTLCounterResultUtils.add_miss(result, source, query, ts, ttl_start)
            else:
                TTLCounterResultUtils.add_hit(result, source, query, ts, ttl_start)

        return result

    def _calc_ttl(self, depth):
        if depth <= self.depth:
            return self._short_ttl_secs
        elif depth <= self.very_long_depth:
            return self._long_ttl_secs
        else:
            return self._very_long_ttl_secs


class LinearTTLCounter(TTLCounter):
    def __init__(self, min_ttl, max_ttl, max_depth):
        """

        """
        self.min_ttl = min_ttl
        self.max_ttl = max_ttl
        self._min_ttl_secs = min_ttl * 60
        self._max_ttl_secs = max_ttl * 60
        self.max_depth = max_depth

        self.step = (self._max_ttl_secs - self._min_ttl_secs) / float(self.max_depth)

    def __str__(self):
        return 'line={}-{}, depth={}'.format(self.min_ttl, self.max_ttl, self.max_depth)

    def __call__(self, query, timestamps):
        result = create_empty_counter()

        ttl_start = None
        for ts, source in timestamps:
            days_until_departure = TTLCounterResultUtils.calc_depth(query, ts)

            ttl = self._calc_ttl(days_until_departure)

            if not ttl_start or ts > ttl_start + ttl:
                ttl_start = ts
                TTLCounterResultUtils.add_miss(result, source, query, ts, ttl_start)
            else:
                TTLCounterResultUtils.add_hit(result, source, query, ts, ttl_start)

        return result

    def _calc_ttl(self, depth):
        if depth > self.max_depth:
            return self._max_ttl_secs
        return self._min_ttl_secs + depth * self.step


def _init_ttl_counters():
    logger.info('Init ttl counters configuration')

    for short_ttl, long_ttl, very_long_ttl, depth, depth_multiplier in itertools.product(
        (30, 60, 120, 240, 360),  # величина первого ttl - 30 мин, 60 мин, 120 мин
        (240, 360, 720, 1440),
        (1440, 2880, 4320),
        (1, 2, 3),  # порог по глубине продажи - 3 дня, 4 дня, 5 дней, 6 дней
        (2, 3, 4),  # множители для порога по глубине продажи
    ):
        if short_ttl < long_ttl < very_long_ttl:
            TTL_COUNTERS[ThreeTTLCounter(short_ttl, long_ttl, very_long_ttl, depth, depth_multiplier)] = create_empty_counter()

    for short_ttl, long_ttl, depth in itertools.product(
        (30, 60, 120, 240, 360),  # величина первого ttl - 30 мин, 60 мин, 120 мин
        (240, 360, 720, 1440, 2880, 4320),  # величина второго ttl - 60 мин, 120 мин, 180 мин, 240 мин, 300 мин
        (1, 2, 3, 4, 5, 6),  # порог по глубине продажи - 3 дня, 4 дня, 5 дней, 6 дней
    ):
        if short_ttl < long_ttl:
            TTL_COUNTERS[TwoTTLCounter(short_ttl, long_ttl, depth)] = create_empty_counter()

    for ttl in CONST_TTLS:
        TTL_COUNTERS[ConstTTLCounter(ttl)] = create_empty_counter()

    TTL_COUNTERS[LinearTTLCounter(30, 1440, 60)] = create_empty_counter()
    TTL_COUNTERS[LinearTTLCounter(30, 1440, 30)] = create_empty_counter()
    TTL_COUNTERS[LinearTTLCounter(30, 720, 30)] = create_empty_counter()
    TTL_COUNTERS[LinearTTLCounter(30, 720, 60)] = create_empty_counter()

    logger.info('Initialised %d ttl counters', len(TTL_COUNTERS))


class PriceRelevanceStorage(object):
    """Рассчитывает актуальность цен выдачи"""
    MIN_REVISES_CNT = 300  # минимальное число проверок, при которой считаем оценку соответствия выдачи действительной

    def __init__(self, cache_table_name):
        self.intervals = []
        for ttl_interval, group in itertools.groupby(
                read_from_yt_cache(
                    cache_table_name, fields=(
                    'ttl_interval', 'search_depth', 'problem_cnt', 'total_cnt')
                ),
                key=itemgetter(0)
        ):  # Рассчитываем на сортировку по order by ttl_interval, search_depth
            interval_data = {}
            problem_cnt_sum = 0
            total_cnt_sum = 0
            last_depth = 0
            relevance = 0
            for _, search_depth, problem_cnt, total_cnt in group:
                problem_cnt_sum += problem_cnt
                total_cnt_sum += total_cnt
                if total_cnt_sum > self.MIN_REVISES_CNT:
                    relevance = 1. - float(problem_cnt_sum) / total_cnt_sum
                    for depth in xrange(last_depth, search_depth + 1):
                        interval_data[depth] = relevance

                    last_depth = search_depth + 1
                    problem_cnt_sum = 0
                    total_cnt_sum = 0

            for depth in xrange(last_depth, 999):
                interval_data[depth] = relevance

            self.intervals.append(interval_data)

        assert len(self.intervals) == 6

    def get_relevance(self, query, timestamp, result_page_created_at):
        days_until_departure = TTLCounterResultUtils.calc_depth(query, timestamp)
        result_age = (timestamp - result_page_created_at) / 360
        if 0 <= result_age <= 1:
            interval_idx = 0
        elif 1 <= result_age <= 2:
            interval_idx = 1
        elif 2 <= result_age <= 3:
            interval_idx = 2
        elif 3 <= result_age <= 5:
            interval_idx = 3
        elif 5 <= result_age <= 7:
            interval_idx = 4
        else:
            interval_idx = 5
        return self.intervals[interval_idx][days_until_departure]


class BooksToSearchStorage(object):
    def __init__(self, cache_table_name):
        self._l2b = {}
        for depth, books in read_from_yt_cache(cache_table_name, fields=('depth', 'books_count')):
            self._l2b[depth] = int(books)
        for depth in xrange(365):
            assert interval_by_depth(depth) in self._l2b

    def get_books(self, depth):
        interval = interval_by_depth(depth)
        return self._l2b[interval]


class IataFinder(object):
    def __init__(self):
        import json
        with open(os.path.join(os.path.dirname(__file__), 'codes.json')) as f:
            self._iata_codes_for_search_by_point_key = json.load(f)

    def get_iata_codes_for_search(self, point_key):
        """
        :param unicode point_key:
        :return: iterator over iata codes
        :rtype: typing.Iterable[unicode]
        """

        return self._iata_codes_for_search_by_point_key.get(point_key, None)

    def get_iata_code_for_search(self, point_key):
        """
        :param unicode point_key:
        :return: first related iata code
        :rtype: unicode
        """
        codes = self.get_iata_codes_for_search(point_key)
        if codes:
            return next(iter(codes), None)
        else:
            return None


def _query_cache_table(query, *args):
    query_hash = hashlib.md5()
    query_hash.update(query)
    for arg in args:
        query_hash.update(str(arg))

    return YT_TMP_DIRECTORY + query_hash.hexdigest()


def group_timestamps_by_queries(args):
    logger.info(
        'Start fill yt cache {}'.format(datetime.fromtimestamp(time.time())))
    yt_client = yt_client_fabric.create()

    with open(REDIRECT_QUERY_FILE, 'r') as f:
        query = f.read()

    if args.yt_cache:
        cache_table = _query_cache_table(query, args.days, args.start_day)
        if yt_client.exists(cache_table):
            logger.info('Node %s already exists. Using as a cache table', cache_table)
            return cache_table

        yt_client.create(
            'table', cache_table, attributes={
                'optimize_for': 'scan',
                'schema': [
                    {
                        'type': 'string',
                        'name': 'query_key',
                    },
                    {
                        'type_v3': {
                            'type_name': 'list',
                            'item': {
                                'type_name': 'tuple',
                                'elements': [
                                    {
                                        'type': 'uint64',
                                    },
                                    {
                                        'type': 'string',
                                    },
                                ]
                            },
                        },
                        'name': 'timeshots',
                    },
                ]
            }
        )

    else:
        cache_table = yt_client.create_temp_table()
    logger.info('Cache table: %s', cache_table)

    yql_client = YqlClient(token=settings.YQL_TOKEN)

    yql_query = yql_client.query(query, syntax_version=1)
    yql_query.run(
        parameters=ValueBuilder.build_json_map({
            '$day_count': ValueBuilder.make_int32(args.days + 1),
            '$start_day': ValueBuilder.make_date(args.start_day),
            '$cache_table': ValueBuilder.make_string(cache_table),
        }),
    )
    yql_query.wait_progress()

    if not yql_query.is_ok or yql_query.status == 'ERROR':
        logger.error('FILL TEMPORARY TABLE IS FAILED')
        return None
    logger.info('timestamps by queries yt cache is ready')
    return cache_table


def calc_price_relevance_yql(args):
    # Почти копипаста с group_timestamps_by_queries, можно вынести в класс
    logger.info('Start calculating price relevance {}'.format(
        datetime.fromtimestamp(time.time())))
    yt_client = yt_client_fabric.create()

    with open(PRICES_RELEVANCE_QUERY_FILE, 'r') as f:
        query = f.read()

    if args.yt_cache:
        cache_table = _query_cache_table(query)
        if yt_client.exists(cache_table):
            logger.info('Node %s already exists. Using as a cache table',
                        cache_table)
            return cache_table

        yt_client.create('table', cache_table)

    else:
        cache_table = yt_client.create_temp_table()
    logger.info('Cache table: %s', cache_table)

    yql_client = YqlClient(token=settings.YQL_TOKEN)

    yql_query = yql_client.query(query, syntax_version=1)
    yql_query.run(
        parameters=ValueBuilder.build_json_map({
            '$cache_table': ValueBuilder.make_string(cache_table),
        }),
    )
    yql_query.wait_progress()

    if not yql_query.is_ok or yql_query.status == 'ERROR':
        logger.error('FILL TEMPORARY TABLE IS FAILED')
        return None
    logger.info('Price relevance cache is ready')
    return cache_table


def calc_look2book_yql(args):
    # Почти копипаста с group_timestamps_by_queries, можно вынести в класс
    logger.info('Start calculating look2book')
    yt_client = yt_client_fabric.create()

    with open(LOOK2BOOK_QUERY_FILE, 'r') as f:
        query = f.read()

    if args.yt_cache:
        cache_table = _query_cache_table(query)
        if yt_client.exists(cache_table):
            logger.info('Node %s already exists. Using as a cache table', cache_table)
            return cache_table

        yt_client.create('table', cache_table)

    else:
        cache_table = yt_client.create_temp_table()
    logger.info('Cache table: %s', cache_table)

    yql_client = YqlClient(token=settings.YQL_TOKEN)

    yql_query = yql_client.query(query, syntax_version=1)
    yql_query.run(
        parameters=ValueBuilder.build_json_map({
            '$cache_table': ValueBuilder.make_string(cache_table),
        }),
    )
    yql_query.wait_progress()

    if not yql_query.is_ok or yql_query.status == 'ERROR':
        logger.error('FILL TEMPORARY TABLE IS FAILED')
        return None
    logger.info('Price relevance cache is ready')
    return cache_table


def print_table(data_frame):
    pd.set_option('display.max_rows', data_frame.shape[0] + 1)
    pd.set_option('display.max_columns', data_frame.shape[1] + 1)
    pd.set_option('display.width', 1000)
    logger.info(data_frame)


def gen_results(args, read_from_cache, variants_count):
    rows = []
    # index = []
    columns = ['exp', 'misses_abs', 'hits_abs', 'rasp_hits(%)',
               'portal_hits(%)', 'wizard_hits(%)', 'all_misses(%)',
               'all_hits(%)', 'look2book_old', 'look2book', 'relevance']

    def source_hits(counter):
        return float(counter['hits']) / (counter['misses'] + counter['hits']) * 100

    for k, counter in TTL_COUNTERS.iteritems():
        all_counter = counter[ALL]

        total = all_counter['misses'] + all_counter['hits']
        misses = float(all_counter['misses']) / total * 100
        hits = float(all_counter['hits']) / total * 100
        look2book_old = all_counter['misses'] / (MAGIC_L2B_CONSTANT * args.days)

        look2book = sum([
            float(counter[BY_DEPTH_MISSES_KEY][depth]) / (BOOKS_TO_SEARCH_DEPTH.get_books(depth) * args.days) *
            float(counter[BY_DEPTH_MISSES_KEY][depth]) / sum(counter[BY_DEPTH_MISSES_KEY].itervalues())
            for depth in counter[BY_DEPTH_MISSES_KEY]
        ])

        relevance = all_counter['relevance'] / total

        rows.append([
            str(k), all_counter['misses'], all_counter['hits'],
            source_hits(counter['rasp']), source_hits(counter['portal']),
            source_hits(counter['wizard']),
            misses, hits,
            look2book_old,
            int(look2book),
            relevance,
        ])

    rows.sort(key=lambda row: (row[-2] > 5000, -row[-1]))
    result_df = pd.DataFrame(
        rows,
        columns=columns,
    )
    print_table(result_df)

    columns = map(str, TTL_COUNTERS)
    index = []
    rows = []

    intervals = [0] + [2 ** i for i in xrange(9)]
    res = [0] * len(read_from_cache)
    for i in xrange(100000):
        for idx, ttl in enumerate(read_from_cache):
            res[idx] += read_from_cache[ttl].get(i, 0)
        if i not in intervals:
            continue

        if any(res):
            interval_repr = i if i <= 2 else '{}-{}'.format(intervals[intervals.index(i) - 1], i)
            index.append(interval_repr)

            row = []
            for reads in res:
                row.append(float(reads) / variants_count * 100)
            rows.append(row)

        res = [0] * len(read_from_cache)

    multiple_readings_df = pd.DataFrame(
        rows,
        index=index,
        columns=columns,
    )
    print_table(multiple_readings_df)
    return result_df, multiple_readings_df


def read_from_yt_cache(table, fields=()):
    """
    ИЗ YQL результаты приходят до 3х раз медленнее и скрипт может упасть при выполнеинии,
     но оставил эту логику для воспроизводимости.
     Можно скачать таблицу в JSON-lines формате и положить её в /tmp
    """
    local_cache_path = '/tmp/yt_' + '_'.join(table.split('/'))
    # yt___home_avia_tmp_1dc6166ba647dad2a991308669682cfc
    try:
        with open(local_cache_path) as f:
            logger.info('start read table %s from local file: %s', table, local_cache_path)
            for line in f:
                data = ujson.loads(line)
                yield [data[field] for field in fields]
        return
    except IOError:
        pass

    query = '''
    USE hahn;
    PRAGMA yt.InferSchema = '1';

    SELECT {fields} FROM `{table}`;
    '''.format(fields=', '.join(fields), table=table)
    yql_client = YqlClient(token=settings.YQL_TOKEN)

    yql_query = yql_client.query(query, syntax_version=1)
    yql_query.run()
    yql_query.wait_progress()

    if not yql_query.is_ok:
        yield []

    logger.info('start read table from yql: %s', table)

    for results in yql_query.get_results():
        for row in results.get_iterator():
            yield row


def write_yt_result(args, data_frame):
    table = YT_AVIA_HOME + 'ttl-statistics/' + str(args.days) + '-days-' + str(datetime.now())[:len('2020-04-22 09:56:53')]
    yt_client = yt_client_fabric.create()
    yt_client.create('table', table, recursive=True,)
    yt_client.write_table(table, data_frame.to_dict('records'))
    return table


def _main(args):
    create_current_file_run_log(format='%(message)s')
    add_stdout_handler(logger)
    logger.info('Start')

    logger.info('=' * 88)
    logger.info('Выборка за {} дней. Даты с {} по {}'.format(
        args.days, args.start_day, args.start_day + timedelta(days=args.days))
    )
    logger.info('=' * 88)

    cache_table = group_timestamps_by_queries(args)
    if cache_table is None:
        return

    prices_relevance_table = calc_price_relevance_yql(args)
    if prices_relevance_table is None:
        return

    look2book_table = calc_look2book_yql(args)
    if look2book_table is None:
        return

    logger.info('Start statistics calculation')
    _init_ttl_counters()
    cache_reads = OrderedDict(
        (ttl_counter, Counter()) for ttl_counter in TTL_COUNTERS)
    uniq_queries_count = 0
    queries_without_codes_count = 0
    bad_date_forward_count = 0
    iata_finder = IataFinder()
    global RELEVANCE_STORAGE
    RELEVANCE_STORAGE = PriceRelevanceStorage(prices_relevance_table)
    global BOOKS_TO_SEARCH_DEPTH
    BOOKS_TO_SEARCH_DEPTH = BooksToSearchStorage(look2book_table)

    for qkey, timestamps in read_from_yt_cache(cache_table, fields=('query_key', 'timeshots')):
        query = parse_qkey(qkey)
        if (
                iata_finder.get_iata_code_for_search(query.from_id) is None
                or iata_finder.get_iata_code_for_search(query.to_id) is None
        ):
            queries_without_codes_count += 1
            continue

        if query.date_forward < args.start_day or (query.date_forward - args.start_day).days > 365 - args.days:
            bad_date_forward_count += 1
            continue

        uniq_queries_count += 1

        for ttl_counter in TTL_COUNTERS:
            # Рассчитываем попадания в кэш
            _result = ttl_counter(query, timestamps)

            for source in SOURCES:
                TTL_COUNTERS[ttl_counter][source]['misses'] += _result[source]['misses']
                TTL_COUNTERS[ttl_counter][source]['hits'] += _result[source]['hits']
                TTL_COUNTERS[ttl_counter][source]['relevance'] += _result[source]['relevance']

            for depth in _result[BY_DEPTH_MISSES_KEY]:
                if depth not in TTL_COUNTERS[ttl_counter][BY_DEPTH_MISSES_KEY]:
                    TTL_COUNTERS[ttl_counter][BY_DEPTH_MISSES_KEY][depth] = 0
                TTL_COUNTERS[ttl_counter][BY_DEPTH_MISSES_KEY][depth] += _result[BY_DEPTH_MISSES_KEY][depth]

            cache_reads[ttl_counter][_result['all']['hits']] += 1

    logger.info('End calc summary TTL')
    logger.info('Количество запросов без кодов поиска: {}'.format(
        queries_without_codes_count))
    logger.info('Количество запросов c date_forward в прошлом или далеком будущем: {}'.format(bad_date_forward_count))
    result_df, multiple_readings_df = gen_results(args, cache_reads, uniq_queries_count)
    counter = TTL_COUNTERS[TTL_COUNTERS.keys()[0]]
    logger.info('Количество всех запросов: {}'.format(counter[ALL]['misses'] + counter[ALL]['hits']))
    logger.info('Количество уникальных запросов: {}\n'.format(uniq_queries_count))

    print_table(result_df)
    logger.info('Распредедение количества повторных считываний из кэша:\n')
    print_table(multiple_readings_df)
    yt_result_table = write_yt_result(args, result_df)
    logger.info('Таблица с результатами: https://yt.yandex-team.ru/hahn/navigation?path={}'.format(yt_result_table))
    logger.info('End')


def main():
    """./tools/run-dev-script.sh avia_scripts/ttl_experiments/ttl_statistics.py --days=1 --yt-cache"""
    parser = argparse.ArgumentParser(description='Calculate TTL statistics')
    parser.add_argument('--days', type=int, default=7, help='Выборка за N дней')
    parser.add_argument('--start-day',
                        type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
                        default=DEFAULT_START_DAY,
                        help='День, начиная с которого брать выборку поисков')
    parser.add_argument('--yt-cache',
                        action='store_true',
                        help='Использовать закешированные результаты YQL запросов')

    args = parser.parse_args()
    _main(args)
