import argparse
from collections import Counter
import time
import re

import datetime
from nile.api.v1 import aggregators as na
from nile.api.v1 import extractors as ne
from nile.api.v1 import filters as nf
from nile.api.v1 import Record
from qb2.api.v1 import filters as qf
import numpy as np


from projects.batching.batching_custom_params import (
    VKUSVILL_CORP_CLIENT_ID, VV_ADDRESSES, VV_ADDRESSES_IDS,
    YARCHE_CORP_CLIENT_ID, YARCHE_ADDRESSES
)
from projects.data_sources.data_context.cargo import \
    DataContext as CargoLogsDataContext
from projects.data_sources.data_context.raworders_dmorders_sessions import \
    DataContext as OrdersSessionsLogsDataContext

from projects.data_sources.data_context.raw_services_logs import \
    DataContext as RawServicesLogsDataContext
from projects.efficiency_metrics.project_config import get_project_cluster
from projects.batching.nile_blocks.corps import calc_a_density_reducer


def get_last_record(groups):
    for key, records in groups:
        for record in records:
            d = {'lon': record.longitude, 'lat':record.latitude}
        yield Record(
            key,
            geo=d
        )

def get_first_record(groups):
    for key, records in groups:
        first_record = None
        for record in records:
            if first_record is None:
                first_record = record
        yield Record(
            first_record
        )

def agg_reducer_by_time(groups):
    for key, records in groups:
        courier_subsidies_w_commision_sum = 0
        courier_subsidies_w_commision_list =[]
        subsidies_distr = Counter()

        for record in records:
            courier_subsidies_w_commision_sum += record.subsidy_w_commission_value
            courier_subsidies_w_commision_list.append(
                record.subsidy_w_commission_value # с вытченнной комиссией
            )
            # subsidies_distr[record.?]
        yield Record(
            key,
            courier_subsidies_w_commision_sum=courier_subsidies_w_commision_sum,
            courier_subsidies_w_commision_list=courier_subsidies_w_commision_list
        )


def mapper(records):
    for record in records:
        id_vv_from_comment = None
        try:

            id_vv_from_comment = int(
                re.findall(
                    r'\d+',
                    re.findall(r'Магазин:\s*\d+', record.comment)[0]
                )[0]
            )
            # exist comments without substring "Магазин: "
        except:
            pass

        yield Record(
            record,
            id_vv_from_comment=id_vv_from_comment
        )

def calc_time_till_cancell(groups):
    for key, records in groups:
        new_ts = None
        performer_lookup_ts = None
        cancelled_ts = None
        performer_found_ts = None
        delivered_returned_ts = None
        delivered_ts = None
        for record in records:
            if record.get('new_status') == 'new':
                new_ts = record.updated_ts
            if record.get('new_status') == 'performer_lookup':
                performer_lookup_ts = record.updated_ts
            if record.get('new_status') == 'cancelled':
                cancelled_ts = record.updated_ts
            if record.get('new_status') == 'performer_found':
                performer_found_ts = record.updated_ts
            if record.get('new_status') in ['delivered', 'returned']:
                delivered_returned_ts = record.updated_ts
            if record.get('new_status') == 'delivered':
                delivered_ts = record.updated_ts
        yield Record(
            key,
            new_ts=new_ts,
            performer_lookup_ts=performer_lookup_ts,
            cancelled_ts=cancelled_ts,
            performer_found_ts=performer_found_ts,
            delivered_returned_ts=delivered_returned_ts,
            delivered_ts=delivered_ts
        )

import json
def calc_not_batched_prices(records):
    for record in records:
        # try:
        #     json.loads(record.get('recalc_response')).get('driver', {}).get(
        #         'price', {}).get('total')

        if record.get('recalc_response') is None:

            yield Record(
                record,
                #driver_potential_price=json.loads(record.get('recalc_response')).get('driver', {}).get('price', {}).get('total'),
            )
        else:
            yield Record(
                record,
                driver_potential_price=json.loads(record.get('recalc_response')).get('driver', {}).get('price', {}).get('total')
            )

if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--yt-proxy', type=str, default='Hahn')
    parser.add_argument('--yt-path-dir-to', type=str,
                        default='//home/taxi-delivery/analytics/production/claims_logs/{}')
    parser.add_argument('--start-date', type=str)
    parser.add_argument('--finish-date', type=str)

    args = parser.parse_args()

    cluster = get_project_cluster()

    yt_path_dir_to = args.yt_path_dir_to
    start_date = args.start_date
    finish_date = args.finish_date

    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )

    cargo_d_c = CargoLogsDataContext(
        job,
        datetime.datetime.strptime(
            start_date, '%Y-%m-%d')
        ,
        datetime.datetime.strptime(
            finish_date, '%Y-%m-%d'
        ),
    )

    points = cargo_d_c.get_points().project(ne.all(['timestamp', 'source']))

    claim_points = cargo_d_c.get_claim_points().project(
        ne.all(['timestamp', 'source'])
    )

    segments = cargo_d_c.get_segments().project(ne.all(['timestamp', 'source']))

    claims = cargo_d_c.get_claims().filter(
        # nf.custom(lambda x: x is not None, 'taxi_order_id')
    ).project(
        'taxi_order_id', 'uuid_id', 'timestamp', 'final_price', 'corp_client_id',
        'status', 'zone_id', 'dispatch_flow', 'final_pricing_calc_id',
        'is_delayed', 'due', 'eta', 'currency',
        'last_status_change_ts', 'claim_kind',
        claim_id='id',
        utc_date=ne.custom(
            lambda x:
            datetime.datetime.utcfromtimestamp(x).strftime('%Y-%m-%d'),
            'timestamp'
        ),
        sec_till_delivered_deadline=ne.custom(
            lambda x, y: (x - y) if x is not None else None, 'due', 'timestamp'
        ),
        sec_till_fake_1_hour_deadline=ne.const(3600.),
        fake_deadline_1_hour_timestamp=ne.custom(
            lambda x: x + 60 * 60, 'timestamp'
        )
    )

    o_s = cargo_d_c.get_cargo_orders().project(
        'cargo_pricing_price', 'waybill_ref', 'provider_order_id'
    )

    # TODO: поисследовать, как сделать норм адреса
    waybills = cargo_d_c.get_waybills().project(ne.all(['timestamp', 'source']))

    claims_log_statuses = cargo_d_c.get_claims_change_log().groupby(
        'claim_id'
    ).sort(
        'updated_ts'
    ).reduce(
        calc_time_till_cancell
    ).filter(
        qf.defined('new_ts')
    ).project(
        ne.all(),
        sec_till_performer_lookup=ne.custom(
            lambda x, y: ((x or 0) - y) if (x is not None) else None,
            'performer_lookup_ts', 'new_ts'
        ),
        sec_till_cancelled=ne.custom(
            lambda x, y: ((x or 0) - y) if (x is not None) else None,
            'cancelled_ts', 'new_ts'
        ),
        sec_till_performer_found = ne.custom(
            lambda x, y: ((x or 0) - y) if (x is not None) else None,
            'performer_found_ts', 'new_ts'
        ),
        sec_till_delivered_returned = ne.custom(
            lambda x, y: ((x or 0) - y) if (x is not None) else None,
            'delivered_returned_ts', 'new_ts'
        ),
        sec_till_delivered=ne.custom(
            lambda x, y: ((x or 0) - y) if (x is not None) else None,
            'delivered_ts', 'new_ts'
        )

    )

    cpc_table = cargo_d_c.get_cargo_pricing_calculations()

    orders_d_c = OrdersSessionsLogsDataContext(
        job,
        datetime.datetime.strptime(
            start_date, '%Y-%m-%d')
        ,
        datetime.datetime.strptime(
            finish_date, '%Y-%m-%d'
        ),
    )

    corp_clients = orders_d_c.get_corp_clients(get_last_dttm_flag=True)

    # ### START
    pre_final = points.join(
        claim_points, by='point_id', type='inner'
    ).join(
        claims,
        type='inner', by='claim_id'
    ).join(
        segments, by_left='uuid_id', by_right='claim_id', type='left'
    ).join(
        waybills, by_left='chosen_waybill', by_right='external_ref', type='left'
    ).join(
        o_s, by_left='chosen_waybill', by_right='waybill_ref', type='left'
    ).join(
        claims_log_statuses,
        by='claim_id', type='left'
    ).join(
        corp_clients,
        by_left='corp_client_id', by_right='client_id', type='left'
    #)#.filter(
        #nf.custom(lambda x: x == 'source', 'type')
    ).put(
        yt_path_dir_to.format('{}/cl_p_s_w_o_st_final'.format(finish_date))
    )

    # subventions = orders_d_c.get_dm_subvention_transaction_log().groupby(
    #     'order_id'
    # ).sort(
    #     'utc_transaction_dttm'
    # ).reduce(
    #     agg_reducer_by_time
    # )
    #
    # final = pre_final.join(
    #     subventions, by_left='taxi_order_id', by_right='order_id', type='left'
    # ).join(
    #     cpc_table, by='final_pricing_calc_id', type='left'
    # ).map(
    #     calc_not_batched_prices
    # ).project(
    #     ne.all(
    #         'recalc_response'
    #     )
    #     # 'driver_potential_price', 'uuid_id'
    # ).put(
    #     yt_path_dir_to.format('{}/cl_p_s_w_o_st_pr_final_w_subsidies'.format(finish_date))
    # )
    # ## TODO: uncoment!!!!

    ##########



    # # final = job.table(yt_path_dir_to.format('{}/cl_p_s_w_o_final_w_subsidies'.format(finish_date)))
    # #
    # all_vv_points_a = final.filter(
    #     nf.custom(
    #         lambda x: ('Доставка из магазина ВкусВилл' in x)
    #         if (x is not None) else False,
    #         'comment'
    #     ),
    #     nf.custom(lambda x: x == 'source', 'type')
    # ).map(mapper).project(
    #     ne.all(['comment']),
    # ).put(
    #     yt_path_dir_to.format('{}/all_vv_points_a'.format(finish_date))
    # )
    # all_vv_points_a.filter(
    #     # nf.custom(lambda x: x in VV_ADDRESSES, 'fullname'),
    #     nf.custom(lambda x: x in VV_ADDRESSES_IDS, 'id_vv_from_comment')
    #     # nf.custom(lambda x: x == VKUSVILL_CORP_CLIENT_ID, 'corp_client_id'),
    # ).put(
    #     yt_path_dir_to.format('{}/vv_exclusive_a'.format(finish_date))
    # )
    #
    # # TODO - YARCHE
    # # final = job.table(yt_path_dir_to.format('{}/cl_p_s_w_o_final_w_subsidies'.format(finish_date)))
    #
    # all_yarche_points_a = final.filter(
    #     nf.custom(lambda x: x == 'source', 'type'),
    #     nf.custom(lambda x: x == YARCHE_CORP_CLIENT_ID, 'corp_client_id'),
    # ).put(
    #     yt_path_dir_to.format('{}/all_yarche_points_a'.format(finish_date))
    # )
    #
    # # all_yarche_points_a = job.table(yt_path_dir_to.format('{}/all_yarche_points_a'.format(finish_date)))
    # #
    # # print (sum(YARCHE_ADDRESSES.values(), []))
    # #
    #
    # addr_2_unique_addr = {}
    # for unique_addr, addr_list in YARCHE_ADDRESSES.items():
    #     for addr in addr_list:
    #         addr_2_unique_addr[addr] = unique_addr
    # # # for k, v in addr_2_unique_addr.items():
    # # #     print (k, '\nTO\n', v)
    # # #     print ('\n\n\n')
    # #
    # all_yarche_points_a.filter(
    #     nf.custom(lambda x: x in sum(YARCHE_ADDRESSES.values(), []), 'fullname')
    # ).project(
    #     ne.all('fullname'),
    #     fullname=ne.custom(lambda x: addr_2_unique_addr[x],'fullname')
    # ).put(
    #     yt_path_dir_to.format('{}/yarche_exclusive_a'.format(finish_date))
    # )


    # orders_d_c = OrdersSessionsLogsDataContext(
    #     job,
    #     datetime.datetime.strptime(
    #         start_date, '%Y-%m-%d')
    #     ,
    #     datetime.datetime.strptime(
    #         finish_date, '%Y-%m-%d'
    #     ),
    # )
    # tmp = orders_d_c.get_corp_clients(get_last_dttm_flag=True).put(
    #     yt_path_dir_to.format('{}/corp_client_id_2_name'.format(finish_date))
    # )
    #
    # from qb2.api.v1 import filters as qf
    #
    # job.table('//home/taxi-delivery/analytics/dev/2021-02-24/all_b2b_a_statistics').filter(
    #     qf.defined('corp_id_histogram')
    # ).project(
    #
    #
    #     'n_orders', 'time_btw_ts_mean', 'time_btw_ts_less_10_share',
    #     'time_btw_ts_less_20_share', 'time_btw_ts_less_30_share',
    #     'time_btw_ts_less_60_share', 'time_btw_ts_less_10_sum',
    #     'time_btw_ts_less_20_sum', 'time_btw_ts_less_30_sum',
    #     'time_btw_ts_less_60_sum', 'fullname',
    #
    #     corp_client_id=ne.custom(lambda x: list(x.keys())[0], 'corp_id_histogram')
    # ).join(
    #     tmp, by_left='corp_client_id', by_right='client_id', type='inner'
    # ).unique(
    #     'fullname', 'corp_client_id'
    #     # TODO maybe do not need this unique
    # ).put(
    #     yt_path_dir_to.format('{}/corp_client_id_2_name_joined'.format(finish_date))
    # )

    # # calc plots with cancells!
    # source_table = job.table(
    #     yt_path_dir_to.format('{}/corp_client_id_2_name_joined'.format(finish_date))
    # )

    # claims_log_statuses = cargo_d_c.get_claims_change_log().groupby(
    #     'claim_id'
    # ).sort(
    #     'updated_ts'
    # ).reduce(
    #     calc_time_till_cancell
    # ).put(
    #     yt_path_dir_to.format('{}/claims_log_statuses'.format(finish_date))
    # )

    # claims_log_statuses = job.table(
    #     yt_path_dir_to.format('{}/claims_log_statuses'.format(finish_date))
    # )

    from qb2.api.v1 import filters as qf

    # job.table(
    #     '//home/taxi-delivery/analytics/dev/2021-03-02/cl_p_s_w_o_final_w_subsidies'
    # ).join(
    #     claims_log_statuses.filter(
    #         qf.defined('new_ts')
    #     ).project(
    #         ne.all(),
    #         sec_till_performer_lookup=ne.custom(
    #             lambda x, y: ((x or 0) - y) if (x is not None) else None,
    #             'performer_lookup_ts', 'new_ts'
    #         ),
    #         sec_till_cancelled=ne.custom(
    #             lambda x, y: ((x or 0) - y) if (x is not None) else None,
    #             'cancelled_ts', 'new_ts'
    #         )
    #     ),
    #     by='claim_id', type='left'
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/all_clients_2_name_2_time_cancelles_tmp'.format(finish_date))
    # )

    # job.table(
    #     yt_path_dir_to.format(
    #         '{}/all_clients_2_name_2_time_cancelles_tmp'.format(finish_date))
    # ).project(
    #     ne.all(),
    #     # sec_till_vv_gets_signal=ne.custom(
    #     #     lambda x: None if x < 0 else x, 'sec_till_vv_gets_signal'
    #     # )
    # ).join(
    #     source_table, by='fullname', type='inner'
    # ).put(
    #     # TODO: мы не знаем, где раскатан Эксклюзив, а где нет
    #     yt_path_dir_to.format('{}/all_clients_2_name_2_time_cancelles'.format(finish_date))
    # )

    # # minus_n_orders
    # job.table(
    #     # TODO: мы не знаем, где раскатан Эксклюзив, а где нет
    #     yt_path_dir_to.format('{}/all_clients_2_name_2_time_cancelles'.format(finish_date))
    # ).sort(
    #     'minus_n_orders'
    # ).filter(
    #     qf.defined('time_btw_ts_less_10_share'),
    #     nf.custom(lambda x: x > 0.47, 'time_btw_ts_less_10_share')
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/all_clients_2_name_2_time_cancelles_sorted'.format(finish_date))
    # )

    # job.table(yt_path_dir_to.format(
    #     '{}/all_clients_2_name_2_time_cancelles_sorted'.format(finish_date))
    # ).project(
    #     'sec_till_cancelled', 'status', 'fullname', 'corp_client_id', 'name'
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/all_clients_2_name_2_time_cancelles_sorted_tmp'.format(finish_date))
    # )




    ## TODO: - cpo a la Serezha Kulik

    # cpc_table = cargo_d_c.get_cargo_pricing_calculations()
    #
    # c_table = cargo_d_c.get_claims()
    #
    # cpc_table.join(c_table, by='final_pricing_calc_id', type='inner').put(
    #     yt_path_dir_to.format('{}/pricing_batch_tmp'.format(finish_date))
    # )



    # job.table(yt_path_dir_to.format('{}/pricing_batch_tmp'.format(finish_date))).map(
    #     calc_not_batched_prices
    # ).project(ne.all(
    #     'recalc_response'
    # )).put(
    #     yt_path_dir_to.format('{}/pricing_batch_tmp_parsed'.format(finish_date))
    # )

    # final = job.table(
    #     yt_path_dir_to.format('{}/cl_p_s_w_o_final_w_subsidies'.format(finish_date))
    # )
    #
    # pricing_batch = job.table(
    #     yt_path_dir_to.format('{}/pricing_batch_tmp_parsed'.format(finish_date))
    # ).project(
    #     'driver_potential_price', 'uuid_id'
    # )
    #
    # final.join(pricing_batch, type='left', by='uuid_id').put(
    #     yt_path_dir_to.format('{}/cl_p_s_w_o_final_w_subsidies_w_batched_prices'.format(finish_date))
    # )

    # job.table(
    #     yt_path_dir_to.format('{}/cl_p_s_w_o_final_w_subsidies_w_batched_prices'.format(finish_date))
    # ).filter(
    #     nf.custom(lambda x: x == 'source', 'type')
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/cl_p_s_w_o_final_w_subsidies_w_batched_prices_tmp'.format(
    #             finish_date))
    # )

    # final = job.table(
    #     yt_path_dir_to.format(
    #         '{}/cl_p_s_w_o_final_w_subsidies_w_batched_prices_tmp'.format(
    #             finish_date))
    # )
    #
    #
    # all_vv_points_a = final.filter(
    #     nf.custom(
    #         lambda x: ('Доставка из магазина ВкусВилл' in x)
    #         if (x is not None) else False,
    #         'comment'
    #     ),
    #     nf.custom(lambda x: x == 'source', 'type')
    # ).map(mapper).project(
    #     ne.all(['comment']),
    # ).put(
    #     yt_path_dir_to.format('{}/all_vv_points_a'.format(finish_date))
    # )
    # all_vv_points_a.filter(
    #     # nf.custom(lambda x: x in VV_ADDRESSES, 'fullname'),
    #     nf.custom(lambda x: x in VV_ADDRESSES_IDS, 'id_vv_from_comment')
    #     # nf.custom(lambda x: x == VKUSVILL_CORP_CLIENT_ID, 'corp_client_id'),
    # ).put(
    #     yt_path_dir_to.format('{}/vv_exclusive_a'.format(finish_date))
    # )
    #
    #






    #
    ######## TODO: - start B2B
    # # orders_d_c = OrdersSessionsLogsDataContext(
    # #     job,
    # #     datetime.datetime.strptime(
    # #         start_date, '%Y-%m-%d')
    # #     ,
    # #     datetime.datetime.strptime(
    # #         finish_date, '%Y-%m-%d'
    # #     ),
    # # )
    # #
    # #
    # # corps = orders_d_c.get_orders().project(
    # #     'order_id', 'utc_order_dttm',
    # #     'success_order_flg', 'travel_time_min', 'travel_distance_km',
    # #     'order_cost', 'decoupling_success_flg', 'corp_order_flg', 'corp_client_id'
    # # )
    #
    # # .join(
    # #         corps, by_left='taxi_order_id', by_right='order_id', type='left'
    # #     )
    #
    #
    # final.filter(
    #     nf.custom(lambda x: x == 'source', 'type')
    # ).groupby(
    #     'fullname'
    # ).aggregate(
    #     n_orders=na.count(),
    #     daily_histogram=na.histogram(field='utc_date', sorted=True),
    #     corp_id_histogram=na.histogram(field='corp_client_id', sorted=True)
    # ).project(
    #     ne.all(),
    #     minus_n_orders=ne.custom(lambda x: -x, 'n_orders')
    #     # mean_orders_daily=
    # ).sort(
    #     'minus_n_orders'
    # ).put(
    #     # '//home/taxi-delivery/analytics/dev/2021-02-24/all_b2b'
    #     yt_path_dir_to.format('{}/all_b2b_a'.format(finish_date))
    # )




    # final = job.table(yt_path_dir_to.format('{}/cl_p_s_w_o_final_w_subsidies'.format(finish_date)))
    #
    # final.filter(
    #     nf.custom(lambda x: x == 'source', 'type')
    # ).groupby(
    #     'fullname'
    # ).sort(
    #     'timestamp'
    # ).reduce(
    #     calc_a_density_reducer
    # ).project(
    #     ne.all(),
    #     minus_n_orders=ne.custom(lambda x: -x, 'n_orders')
    #     # mean_orders_daily=
    # ).sort(
    #     'minus_n_orders'
    # ).put(
    #     # '//home/taxi-delivery/analytics/dev/2021-02-24/all_b2b'
    #     yt_path_dir_to.format('{}/all_b2b_a_statistics'.format(finish_date))
    # )

    #
    # job.table(yt_path_dir_to.format('{}/vv_exclusive_a_'.format(finish_date))).join(
    #     job.table(yt_path_dir_to.format('{}/vv_exclusive_a'.format(finish_date))).project(
    #     'claim_id'
    # ),
    #     type='left_only', by='claim_id'
    # ).put(yt_path_dir_to.format('{}/vv_exclusive_a_left_only'.format(finish_date)))
    #
    #
    # job.table('//home/taxi-delivery/analytics/dev/batching/dead_tmp/2021-03-30/cl_p_s_w_o_st_pr_final_w_subsidies').filter(
    #     nf.custom(lambda x: x in [
    #         'kazan', 'samara', 'lipetsk', 'kaliningrad', 'ufa', 'volgograd'
    #     ], 'zone_id'),
    #     nf.custom(lambda x: x not in [
    #         'dc28c565829e48cca458b5feb161d5d6', # ООО Лидер Консалт Персонал
    #         '27751a2d0b534affb80d243bc2f68981', # ВкусВилл & Yandex.Taxi: Экспресс-доставка
    #         '4decd14f25424e8b82b6d3e4f56d23b2', # ООО Лидер Консалт Персонал
    #         'f9283655a43840a5bbbc016b9b531667', # Yandex.Go Israel
    #         '87cda4c03d60416bb522e83fe24b3d50', # Яндекс.Еда Доставка 2
    #         # 'b57e4179dff24b00be2f8b505885a180' # ООО «СТ Логистик»
    #               ], 'corp_client_id'),
    # ).unique('corp_client_id', 'zone_id').put(
    #     '//home/taxi-delivery/analytics/dev/batching/dead_tmp/2021-03-30/ld_exp_wave_2'
    # )

    job.run()


