import argparse
import logging
import sys
from datetime import datetime

import yt.wrapper as yt


logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler(sys.stdout))


def _table_date(table):
    return datetime.strptime(table.split('/')[-1], '%Y-%m-%d').date()


def safe_tables_for_daterange(yt, path, begin, end):
    tables = []
    for t in yt.search(path, node_type='table'):
        try:
            if begin <= _table_date(t) <= end:
                tables.append(t)
        except:
            continue

    return tables


def create_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument('--left-date')
    parser.add_argument('--right-date')
    parser.add_argument('--output-table')

    return parser


def parse_args():
    return create_parser().parse_args()


def _copy_from_dict(dct, fields):
    return {
        field: dct.get(field)
        for field in fields
    }


def _create_output_table(ytc, output_table):
    ytc.create(
        'table',
        output_table,
        attributes={
            'optimize_for': 'scan',
            'schema': [
                {'name': 'route', 'type': 'string'},
                {'name': 'arrival_station_id', 'type': 'uint64'},
                {'name': 'departure_station_id', 'type': 'uint64'},
                {'name': 'departure_timestamp', 'type': 'string'},
                {'name': 'arrival_timestamp', 'type': 'string'},
                {'name': 'unixtime', 'type': 'uint64'},
                {'name': 'query_id', 'type': 'string'},
            ],
        },
    )


_KEY_COLUMNS = [
    'departure_timestamp',
    'arrival_timestamp',
    'arrival_station_id',
    'departure_station_id',
    'route',
]


def key_reducer(key, records):
    answer = dict(key)
    min_unixtime = 0
    for record in records:
        if record['unixtime'] < min_unixtime or min_unixtime == 0:
            min_unixtime = record['unixtime']
            answer.update(_copy_from_dict(record, ['unixtime', 'query_id']))

    yield answer

_COLUMNS_TO_COPY = [
    'route',
    'arrival_station_id',
    'departure_station_id',
]


def route_extractor(record):
    result = _copy_from_dict(record, ['unixtime', 'query_id'])
    for segment in ('forward_segments', 'backward_segments'):
        segments = record.get(segment)
        if not segments:
            continue

        for route in segments:
            result.update(_copy_from_dict(route, _COLUMNS_TO_COPY))
            if any(route.get(c) is None for c in ['departure_time', 'departure_offset', 'arrival_time', 'arrival_offset']):
                continue

            result['departure_timestamp'] = datetime.strftime(datetime.utcfromtimestamp(
                route['departure_time'] + route['departure_offset']
            ), '%Y-%m-%dT%H:%M:%S')
            result['arrival_timestamp'] = datetime.strftime(datetime.utcfromtimestamp(
                route['arrival_time'] + route['arrival_offset']
            ), '%Y-%m-%dT%H:%M:%S')
            yield result


def extract_routes(ytc, left_date, right_date, output_table):
    with ytc.Transaction():
        if ytc.exists(output_table):
            ytc.remove(output_table)

        _create_output_table(ytc, output_table)
        tables = [
            yt.TablePath(
                table,
                columns=['backward_segments', 'forward_segments', 'unixtime', 'query_id'],
            )
            for table in safe_tables_for_daterange(ytc, '//home/logfeller/logs/avia-variants-log/1d', left_date, right_date)
        ]
        ytc.run_map_reduce(
            source_table=tables,
            destination_table=output_table,
            mapper=route_extractor,
            reducer=key_reducer,
            reduce_by=_KEY_COLUMNS,
        )


def _parse_datetime(dt):
    return datetime.strptime(dt, '%Y-%m-%d').date()


def main():
    args = parse_args()
    logger.info('Start')

    ytc = yt.YtClient(proxy='hahn')
    extract_routes(ytc, _parse_datetime(args.left_date), _parse_datetime(args.right_date), args.output_table)
    logger.info('End')


if __name__ == '__main__':
    main()
