import datetime
import os

import yt.wrapper as yt

import cars.settings
from cars.core.util import make_yt_client


@yt.aggregator
def car_pool_mapper(rows):
    results = set()

    for row in rows:
        unixtime = row['unixtime']
        operator = row['operator']

        city_id = row['city_id']
        date = datetime.datetime.utcfromtimestamp(unixtime).date()
        for i in range(7):
            result_date = date + datetime.timedelta(days=i)
            result = (city_id, operator, result_date.isoformat(), row['car_id'])
            results.add(result)

    for result in results:
        yield {
            'city_id': result[0],
            'operator': result[1],
            'date': result[2],
            'car_id': result[3],
        }


def car_pool_reducer(key, rows):
    car_ids = {r['car_id'] for r in rows}
    n_cars = len(car_ids)
    yield {
        'city_id': key['city_id'],
        'operator': key['operator'],
        'date': key['date'],
        'n_cars': n_cars,
    }


def main():
    yt_config = cars.settings.YT['data']
    yt_config['token'] = os.environ['YT_TOKEN']   
    y = make_yt_client('data')

    signals_table = os.path.join(yt_config['base_dir'], 'signals')
    car_pool_table = os.path.join(yt_config['base_dir'], 'car-pool')

    y.run_map_reduce(
        car_pool_mapper,
        car_pool_reducer,
        signals_table,
        car_pool_table,
        reduce_by=['city_id', 'operator', 'date'],
        format='json',
        spec={
            'mapper_data_size_per_job': 128 * yt.common.MB,
            'reducer_data_size_per_job': 128 * yt.common.MB,
        },
    )


if __name__ == '__main__':
    main()
