#!/usr/bin/env python3
import collections
import random

import yt.wrapper as yt

import cars.settings
from cars.aggregator.static_data.cities import CITIES
from cars.aggregator.static_data.operators import OPERATORS
from cars.core.util import make_yt_client
from cars.feeds import Feed


def main():
    y = make_yt_client('data')
    log_tables = y.list(cars.settings.YT['data']['logs_dir'], absolute=True)
    log_tables = sorted(log_tables)[-14:]
    with y.TempTable() as temp_table:
        y.run_map(
            mapper,
            log_tables,
            temp_table,
            spec={
                'data_size_per_job': 128 * yt.common.MB,
            },
            format='json',
        )
        model_counter = collections.Counter()
        for row in y.read_table(temp_table):
            key = (row['operator'], row['city_id'], row['model'])
            model_counter[key] += row['count']

    for (operator, city_id, model), count in sorted(model_counter.items()):
        city = CITIES[city_id]
        print(operator, city.short_name, model, count)


@yt.aggregator
def mapper(rows):
    model_counter = collections.Counter()

    for row in rows:
        if row.get('type') != 'feed':
            continue

        if random.random() > 0.01:
            continue

        operator_short_name = row.get('operator')
        operator = OPERATORS.get(operator_short_name)
        if not operator:
            continue

        feed = row['data'].get('feed', [])
        try:
            parsed_feed = Feed.parse_feed(operator, feed)
        except Exception:
            continue

        for car in parsed_feed:
            key = (car.operator.short_name, car.city_id, car.model)
            model_counter[key] += 1

    for (operator, city_id, model), count in model_counter.items():
        yield {
            'operator': operator,
            'city_id': city_id,
            'model': model,
            'count': count,
        }


if __name__ == '__main__':
    main()
