import os
from datetime import datetime

import haversine
import yt.wrapper as yt

import cars.settings
from cars.aggregator.static_data.operators import OPERATORS
from cars.core.util import make_yt_client
from cars.feeds import Feed
from . import regions


MIN_ORDER_DURATION = 5 * 60         # 5min
MIN_SERVICE_DURATION = 1 * 60 * 30  # 30min
MAX_ORDER_DURATION = 6 * 60 * 60    # 6h


def flatten_mapper(row):
    if row.get('type') != 'feed':
        return

    data = row['data']

    operator_short_name = data.get('operator')
    if not operator_short_name:
        return
    operator = OPERATORS.get(operator_short_name)
    if not operator:
        return

    unixtime = row['unixtime']

    feed = data.get('feed')
    if feed is None:
        feed = data.get('data', [])  # Support old logs format.
    parsed_feed = Feed.parse_feed(operator=operator, feed=feed)

    for car in parsed_feed:
        result = {
            'operator': operator.short_name,
            'unixtime': unixtime,
        }
        car_data = car.to_dict()
        car_data['car_id'] = car_data.pop('id')
        position = car_data.pop('position')
        result.update(car_data)
        result.update(position)
        yield result


class BuildOrders(object):
    def __init__(self, date):
        self.date = date
    def __call__(self, key, rows):
        date = self.date
        prev_state = None

        for state in rows:
            if prev_state is None:
                prev_state = state
                continue

            duration = state['unixtime'] - prev_state['unixtime']

            src_region = regions.get_by_location(lon=prev_state['lon'], lat=prev_state['lat'])
            dst_region = regions.get_by_location(lon=state['lon'], lat=state['lat'])
            src_region_id = src_region.id if src_region is not None else None
            dst_region_id = dst_region.id if dst_region is not None else None

            result = {
                'city_id': state.get('city_id'),
                'operator': state['operator'],
                'car_id': state['car_id'],
                'unixtime': prev_state['unixtime'],
                'duration': duration,
                'src_lat': prev_state['lat'],
                'src_lon': prev_state['lon'],
                'dst_lat': state['lat'],
                'dst_lon': state['lon'],
                'src_region': src_region_id,
                'dst_region': dst_region_id,
            }

            prev_ll = (prev_state['lat'], prev_state['lon'])
            curr_ll = (state['lat'], state['lon'])
            is_same_location = haversine.haversine(prev_ll, curr_ll) < 0.06

            is_order = (duration > MIN_ORDER_DURATION
                        and not is_same_location)
            is_service = duration > MIN_SERVICE_DURATION

            if is_order:
                orders_date = datetime.fromtimestamp(result['unixtime'])
                orders_date = orders_date.strftime('%Y-%m-%d')
                if orders_date == date:
                    yield yt.create_table_switch(0)
                    #result['@table_index'] = 0
                    yield result
            elif is_service:
                orders_date = datetime.fromtimestamp(result['unixtime'])
                orders_date = orders_date.strftime('%Y-%m-%d')
                if orders_date == date:
                    yield yt.create_table_switch(1)
                    #result['@table_index'] = 1
                    yield result

            prev_state = state


def main():
    yt_config = cars.settings.YT['data']
    yt_config['token'] = os.environ['YT_TOKEN']
    y = make_yt_client('data')

    logs_dir = yt_config['logs_dir']
    log_tables = y.list(logs_dir, absolute=True)
    log_tables.sort()
    output_dir = os.path.join(yt_config['base_dir'], 'external', 'orders')
    signals_dir = os.path.join(yt_config['base_dir'], 'external', 'signals')
    out_of_service_dir = os.path.join(yt_config['base_dir'], 'external', 'out-of-service')
    y.mkdir(output_dir, recursive=True)
    y.mkdir(signals_dir, recursive=True)
    y.mkdir(out_of_service_dir, recursive=True)
    orders = y.list(signals_dir, absolute=False)

    for table in log_tables:
        date = table.split('/')[-1]
        ind = log_tables.index(table)
        signals_table = os.path.join(signals_dir, date)
        orders_table = os.path.join(output_dir, date)
        out_of_service_table = os.path.join(out_of_service_dir, date)
        if date not in y.list(signals_dir, absolute=False):
            if ind == len(log_tables)-1:
                break
            src = log_tables[ind:ind+2]
            y.run_map(
                flatten_mapper,
                src,
                signals_table,
                spec={
                    'data_size_per_job': 256 * yt.common.MB,
                },
            )
            y.run_sort(signals_table, sort_by=['operator', 'car_id', 'unixtime'])

            y.run_reduce(
                BuildOrders(date),
                signals_table,
                [orders_table, out_of_service_table],
                reduce_by=['operator', 'car_id'],
                sort_by=['operator', 'car_id', 'unixtime'],
                spec={
                    'partition_data_size': 256 * yt.common.MB,
                },
            )
            y.run_sort(orders_table, sort_by=['unixtime'])


if __name__ == '__main__':
    main()
