#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""Выгружаем данные по нужным сегментам из хранилища приматченных треков

get_segments_data

"""

from __future__ import print_function

from analytics.geo.tools.dates.lib.dates_tools import get_tables_by_dates_str


TRAVEL_TIMES_TABLE = '//home/maps/jams/production/data/travel_times/'


class MapFilterIds():
    def __init__(self, ids, clids_filter=None, use_fields=None, mark_ids=None):
        """
        :param ids:
        :param clids_filter:
        :param use_fields:
        :param mark_ids: Список id'шников, которые нужно пометить (добавляется столбец marked, где у них будет True, у
        остальных False)
        """
        """
        :param ids:
        :param clids_filter:
        :param use_fields:
        :param mark_ids: Список id'шников, которые нужно пометить (добавляется столбец marked, где у них будет True, у
        остальных False)
        """
        self.ids = ids
        self.clids_filter = clids_filter
        self.use_fields = use_fields # None #['persistent_id', 'segment_index', 'uuid']
        self.mark_ids = (set(mark_ids)
                            if mark_ids is not None and not isinstance(mark_ids, set) else
                        mark_ids)

        if isinstance(ids, dict):
            if any(isinstance(name, list) for name in ids.values()):
                self.call = self.filter_id_clid_add_multi_group
            else:
                self.call = self.filter_id_clid_add_group
            if self.use_fields is not None:
                self.use_fields.append('group')
                if mark_ids is not None:
                    self.use_fields.append('marked')
        else:
            self.call = self.filter_id_clid
            if mark_ids:
                raise NotImplementedError()


    def __call__(self, rec):
        for res in self.call(rec):
            yield res

    def filter_id_clid(self, rec):
        if (rec.get('persistent_id') not in self.ids
            or (self.clids_filter is not None and rec.get('clid') not in self.clids_filter)
        ):
            return

        if self.use_fields:
            yield {f: rec.get(f) for f in self.use_fields}
        else:
            yield rec

    def filter_id_clid_add_group(self, rec):
        if (rec.get('persistent_id') not in self.ids
            or (self.clids_filter is not None and rec.get('clid') not in self.clids_filter)
        ):
            return

        rec['group'] = self.ids[rec['persistent_id']]
        if self.mark_ids is not None:
            rec['marked'] = rec['persistent_id'] in self.mark_ids
        yield rec

    def filter_id_clid_add_multi_group(self, rec):
        if (rec.get('persistent_id') not in self.ids
            or (self.clids_filter is not None and rec.get('clid') not in self.clids_filter)
        ):
            return

        if isinstance(self.ids[rec['persistent_id']], list):
            if self.mark_ids is not None:
                rec['marked'] = rec['persistent_id'] in self.mark_ids
            for group in self.ids[rec['persistent_id']]:
                rec['group'] = group
                if self.use_fields:
                    yield {f: rec.get(f) for f in self.use_fields}
                else:
                    yield rec
            return

        rec['group'] = self.ids[rec['persistent_id']]
        if self.mark_ids is not None:
            rec['marked'] = rec['persistent_id'] in self.mark_ids
        if self.use_fields:
            yield {f: rec.get(f) for f in self.use_fields}
        else:
            yield rec


def edge_namer(id_name_dict):
    def namer(row):
        if row is None:
            return ['name']
        return [id_name_dict.get(row['persistent_id'], 'unknown')]

    return namer

# def get_segments_data_daily(yt, persistent_ids, dates, clids_filter=None):
#     for table in get_tables_by_dates_str(TRAVEL_TIMES_TABLE, dates, '-'):
#         yield (table,
#                get_segments_data(yt, persistent_ids, table_in=table, clids_filter=clids_filter))

def get_segments_data(yt, persistent_ids,
                      dates=None, clids_filter=None, table_in=None, table_out=None, mark_ids=None, use_fields=None):
    """Профильтровать сегменты по списку рёбер persistent_ids и только по выбранным clids_filter.
    Данные берём за указанные в dates даты, либо из явно заданной таблицы table_in
    Возвращает таблицу с проездами по отобранным сегментам
    mark_edges -- ставит пометку на указанные edge'и
    """
    if table_in is None:
        assert dates, 'give dates or table_in'
        table_in = get_tables_by_dates_str(TRAVEL_TIMES_TABLE, dates, '-')
    else:
        assert not dates, "if you give table_in don't give dates"
    table_out = yt.create_temp_table() if table_out is None else table_out
    if mark_ids:
        assert len(set(mark_ids) - set(persistent_ids)) == 0
    mapper = MapFilterIds(persistent_ids, clids_filter=clids_filter, mark_ids=mark_ids, use_fields=use_fields)
    print(mapper, table_in, table_out)
    yt.run_map(mapper, table_in, table_out, spec={'data_size_per_job': 1024 * 1024 * 1024})
    return table_out

