# -*- coding: UTF-8 -*-
"""Сохранить баллы пробок из архива на yt в локальный файл"""

from __future__ import print_function

import csv
import argparse
import datetime
import collections

import yt.wrapper as yt

from analytics.geo.tools.dates.lib.dates_tools import iterate_dates_by_dates_str, get_dates_begin_end, daterange

JAMS_ARCHIVE_PATH = '//home/maps/jams/production/jamsarchive/jamsarchive'
JAMS_ARCHIVE_NEW_PATH = '//home/logfeller/logs/maps-core-jams-info-production-levels/1d'
JAMS_ARCHIVE_NEW_STARTDATE = datetime.datetime(2020, 2, 1)


def get_tables(dates):
    date1, date2 = get_dates_begin_end(dates, return_str=False)

    if date1 < JAMS_ARCHIVE_NEW_STARTDATE:
        if date2 < JAMS_ARCHIVE_NEW_STARTDATE:
            date2_cut = date2
        else:
            date2_cut = JAMS_ARCHIVE_NEW_STARTDATE - datetime.timedelta(days=1)
        days = []
        for d in daterange(date1, date2_cut):
            days.append(d.strftime('%Y%m%d'))
        months = sorted(list({date[:6] for date in days}))
        tables = ['{}/levels_{}_{}'.format(JAMS_ARCHIVE_PATH, date[:4], date[4:])
                  for date in months]
    else:
        tables = []

    if date2 >= JAMS_ARCHIVE_NEW_STARTDATE:
        if date1 < JAMS_ARCHIVE_NEW_STARTDATE:
            date1_cut = JAMS_ARCHIVE_NEW_STARTDATE
        else:
            date1_cut = date1
        tables_new = []
        for d in daterange(date1_cut, date2):
            tables_new.append(JAMS_ARCHIVE_NEW_PATH + "/" + d.strftime('%Y-%m-%d'))
    else:
        tables_new = []

    return tables, tables_new


class MapFilterRegion:
    def __init__(self, regions):
        self.regions = regions

    def __call__(self, rec):
        if rec['region_id'] not in self.regions:
            return
        yield rec


def mean(values_list):
    return sum(values_list) / len(values_list)


def main():
    command_arguments = argparse.ArgumentParser(description='saves traffic level archive from YT to local file')
    command_arguments.add_argument('--proxy', default='hahn')
    command_arguments.add_argument('--dates', '-d', required=True, help='dates range in format like 201412_201501 days are ignored for dates older than 20190901')
    command_arguments.add_argument('--regions', '-r', type=int, nargs='+')
    command_arguments.add_argument('--sample', '-s', choices=['hour'])
#     command_arguments.add_argument('--town', )
    command_arguments.add_argument('--output', '-o', default='levels.csv', help='output filename')
    command_arguments.add_argument('--full', action='store_true', help='save all fields (adds timestamp and jams_length)')
    args = command_arguments.parse_args()

    yt.config.set_proxy(args.proxy)

    tables_old, tables_new = get_tables(args.dates)
    if tables_old and args.sample:
        raise NotImplementedError("Can't use sample for old tables")

    if args.regions:
        args.regions = set(map(str, args.regions))

    with open(args.output, 'w') as wf:
        writer = csv.writer(wf, delimiter=';', quoting=csv.QUOTE_MINIMAL)
        if args.full:
            writer.writerow('region_id,timestamp,date_time,jams_length,level'.split(','))
        else:
            writer.writerow('region_id,date_time,level'.split(','))
        for table in tables_old:
            print(table)
            for row in yt.read_table(table, format=yt.JsonFormat(attributes={'encode_utf8': False})):
                if args.regions and row['subkey'] not in args.regions:
                    continue
                values = row['value'].split('\t')
                if args.full:
                    row = [row['subkey']] + values
                else:
                    row = [row['subkey'], values[1], values[3]]
                    if not row[-1]:
                        continue
                writer.writerow(row)

        drop_i = []
        for i, table in enumerate(tables_new):
            if not yt.exists(table):
                print(table, 'not found')
                drop_i.append(i)
                continue
        for i in reversed(drop_i):
            del tables_new[i]

        if len(tables_new) > 10 and args.regions:
            table_in = tables_new
            table_out = yt.create_temp_table()
            mapper = MapFilterRegion(args.regions)
            print(mapper, table_in, table_out)
            yt.run_map(mapper, table_in, table_out)
            tables_new = [table_out]

        for table in tables_new:
            print(table)
            data_storage = dict()
            for row in yt.read_table(table, format=yt.JsonFormat(attributes={'encode_utf8': False})):
                if args.regions and row['region_id'] not in args.regions:
                    continue
                if args.full:
                    data_storage[(row['region_id'], row["timestamp"])] = [row["isotime"], row["jams_length"], row["level"]]
                else:
                    data_storage[(row['region_id'], row["isotime"])] = [row["level"]]

            if args.sample:
                if args.full:
                    raise NotImplementedError()
                data_sampled = collections.defaultdict(lambda: collections.defaultdict(list))
                for (region_id, time_str), (level, ) in data_storage.items():
                    data_sampled[region_id][time_str[:13]+':00'].append(int(level))
                data_storage = {
                    (region_id, time_str): [mean(levels)]
                    for region_id, region_data in data_sampled.items()
                    for time_str, levels in region_data.items()
                }

            data_storage = sorted([list(k) + v for k, v in data_storage.items()])
            for row in data_storage:
                writer.writerow(row)


if __name__ == '__main__':
    main()
