
def dyn_table_format_mapper():
    def mapper(records):
        for record in records:
            yield Record(
                dbid_uuid=record.dbid_uuid,
                courier_info={'ar': 1, 'cr': 0}
            )
    return mapper


def order_proc_mapper(records):
    # for key, records in groups:
    for record in records:
        geopoint=record.get('doc', {}).get('order', {}).get('request', {}).get('source', {}).get('geopoint')
        if geopoint is not None:
            yield Record(
                geopoint=geopoint,
                lat=geopoint[1],
                lon=geopoint[0],
                d = {'ar':1, 'cr':0}
            )


def default_mapper_courier(records):
    for record in records:
        yield Record(
            dbid_uuid=record.dbid_uuid,
            courier_info={'ar': 1, 'cr': 0}
        )

def default_mapper_tz(records):
    for record in records:
        yield Record(
            tariff_zone=record.tariff_geo_zone_code,
            tariff_zone_info={'ar_': 1, 'cr_': 0}
        )


# schema.update({'courier_info': qt.Json})
# schema.update({'dbid_uuid': qt.String})

# schema.update({'tariff_zone_info': qt.Json})
# schema.update({'tariff_zone': qt.String})

# schema.update({'geo_hash_info': qt.Json})
# schema.update({'geo_hash': qt.String})

from nile.api.v1 import extractors as ne

custom_extractors = {}
for ce_key in schema.keys():
    custom_extractors[ce_key] = ne.custom(lambda x: x, ce_key)

job = cluster.job('Couriers collection' + str(time.time()))
job = job.env(bytes_decode_mode='strict',
              yt_spec_defaults={'max_failed_job_count': 1000})

orders_d_c = OrdersSessionsLogsDataContext(
    job,
    datetime.datetime.strptime(
        '2021-02-20', '%Y-%m-%d')
    ,
    datetime.datetime.strptime(
        '2021-02-21', '%Y-%m-%d'
    ),
    ['sessions']
)



# tmp_features = orders_d_c.get_sessions().unique(
#     'dbid_uuid'
# ).map(
#     default_mapper
# ).project(**custom_extractors).sort('dbid_uuid').put(
#     args.usual_features_table_yt_path + '_tmp', schema=schema,
# )


# tmp_features = orders_d_c.get_sessions().unique(
#     'tariff_geo_zone_code'
# ).map(
#     default_mapper
# ).filter(
#     qf.defined('tariff_zone')
# ).project(**custom_extractors).sort('tariff_zone').put(
#     args.usual_features_table_yt_path, schema=schema,
# )


# job.table('//home/taxi-dwh/etl/raw_history/orders/order_proc/2021-02-26').map(
#     order_proc_mapper
# ).put(
#     '//home/taxi-delivery/analytics/dev/ml_availability/tmp_orders'
# )

job.table(
    '//home/taxi-delivery/analytics/dev/ml_availability/tmp_orders_geohash').project(
    'geo_hash_info',
    geo_hash=ne.custom(lambda x: str(x), 'geo_hash')
).unique('geo_hash').project(**custom_extractors).sort('geo_hash').put(
    '//home/taxi-delivery/analytics/dev/ml_availability/tmp_orders_geohash_unique',
    schema=schemыa
)

dm_dc = DatamartsDataContext(
    job,
    (utc_datetime - datetime.timedelta(days=2)),
    utc_datetime,
)
dm_dc.get_dima_frolov_cube().put(
    '//home/taxi-delivery/analytics/dev/ml_availability/tmp/driver_tags'
)

# job.table('//home/taxi-dwh/etl/raw_history/orders/order_proc/2021-02-26').map(
#     order_proc_mapper
# ).put(
#     '//home/taxi-delivery/analytics/dev/ml_availability/tmp_orders'
# )

# job.table('//home/taxi-delivery/analytics/dev/ml_availability/tmp_orders_geohash').project(
#     'geo_hash_info',
#     geo_hash=ne.custom(lambda x: str(x), 'geo_hash')
# ).unique('geo_hash').project(**custom_extractors).sort('geo_hash').put(
#     '//home/taxi-delivery/analytics/dev/ml_availability/tmp_orders_geohash_unique',
#     schema=schema
# )

job.run()

# from collections import defaultdict
# aggregators_by_tz = defaultdict(lambda: defaultdict(list)) # defaultdict(lambda : defaultdict(int))
# aggregators_by_geohash = defaultdict(lambda: defaultdict(list))
#
# statuses = Counter()
#
# for r in cluster.read('//home/taxi-delivery/analytics/dev/ml_availability/tmp/light_order_proc_wo'):
#
#     ts_created = datetime_2_timestamp(parse_timestring(r.created, 'UTC'))
#     ts_seen = datetime_2_timestamp(parse_timestring(r.seen, 'UTC'))
#
#     # print(look_for_performer_time)
#
#     tz = r.tariff_zone
#     geohash=r.geohash
#
#     n_cands = r.n_candidates
#     c_i = r.candidate_index
#     sp = r.sp
#     look_for_performer_time = ts_seen - ts_created
#
#     status = '{}_{}'.format(
#         six.ensure_str(r.status or 'None'),
#         six.ensure_str(r.taxi_status or 'None')
#     )
#     statuses[status] += 1 #??
#
#     tz_final = {}
#     geohash_final = {}
#     #
#
#
#     aggregators_by_tz[tz] = tz_final
#     aggregators_by_geohash[geohash] = geohash_final


# сколько заказов в геоозоне

# отношение уник с термосумками к уник заказам в геозоне

# for cl in r.doc_class_list:
#     cl_set.add(cl)

# for req in r.unique_special_reqs:
#     reqs_set.add(req)


# cargo_d_c = CargoLogsDataContext(
#     job,
#     from_dttm,
#     to_dttm
# )
#
# cargo_d_c.get_claims()