import argparse
from collections import Counter
import time
import re

import datetime
from nile.api.v1 import aggregators as na
from nile.api.v1 import extractors as ne
from nile.api.v1 import filters as nf
from nile.api.v1 import Record
from qb2.api.v1 import filters as qf
import numpy as np

import geohash as gh


from projects.batching.batching_custom_params import (
    VKUSVILL_CORP_CLIENT_ID, VV_ADDRESSES, VV_ADDRESSES_IDS,
    YARCHE_CORP_CLIENT_ID, YARCHE_ADDRESSES
)
from projects.data_sources.data_context.cargo import \
    DataContext as CargoLogsDataContext
from projects.data_sources.data_context.raworders_dmorders_sessions import \
    DataContext as OrdersSessionsLogsDataContext

from projects.data_sources.data_context.raw_services_logs import \
    DataContext as RawServicesLogsDataContext
from projects.efficiency_metrics.project_config import get_project_cluster
from projects.batching.nile_blocks.corps import calc_a_density_reducer


def get_last_record(groups):
    for key, records in groups:
        for record in records:
            d = {'lon': record.longitude, 'lat':record.latitude}
        yield Record(
            key,
            geo=d
        )

def get_first_record(groups):
    for key, records in groups:
        first_record = None
        for record in records:
            if first_record is None:
                first_record = record
        yield Record(
            first_record
        )

def agg_reducer_by_time(groups):
    for key, records in groups:
        courier_subsidies_w_commision_sum = 0
        courier_subsidies_w_commision_list =[]
        subsidies_distr = Counter()

        for record in records:
            courier_subsidies_w_commision_sum += record.subsidy_w_commission_value
            courier_subsidies_w_commision_list.append(
                record.subsidy_w_commission_value # с вытченнной комиссией
            )
            # subsidies_distr[record.?]
        yield Record(
            key,
            courier_subsidies_w_commision_sum=courier_subsidies_w_commision_sum,
            courier_subsidies_w_commision_list=courier_subsidies_w_commision_list
        )


def mapper(records):
    for record in records:
        id_vv_from_comment = None
        try:

            id_vv_from_comment = int(
                re.findall(
                    r'\d+',
                    re.findall(r'Магазин:\s*\d+', record.comment)[0]
                )[0]
            )
            # exist comments without substring "Магазин: "
        except:
            pass

        yield Record(
            record,
            id_vv_from_comment=id_vv_from_comment
        )

def calc_time_till_cancell(groups):
    for key, records in groups:
        new_ts = None
        performer_lookup_ts = None
        cancelled_ts = None
        for record in records:
            if record.get('new_status') == 'new':
                new_ts = record.updated_ts
            if record.get('new_status') == 'performer_lookup':
                performer_lookup_ts = record.updated_ts
            if record.get('new_status') == 'cancelled':
                cancelled_ts = record.updated_ts
        yield Record(
            key,
            new_ts=new_ts,
            performer_lookup_ts=performer_lookup_ts,
            cancelled_ts=cancelled_ts
        )

import json
def calc_not_batched_prices(records):
    for record in records:
        # try:
        #     json.loads(record.get('recalc_response')).get('driver', {}).get(
        #         'price', {}).get('total')

        is_new_format_api = None
        if record.get('recalc_response') is None:

            yield Record(
                record,
                #driver_potential_price=json.loads(record.get('recalc_response')).get('driver', {}).get('price', {}).get('total'),
            )
        else:
            recalc_response = json.loads(record.get('recalc_response'))
            if 'driver' in recalc_response:
                driver_potential_price = recalc_response.get('driver', {}).get(
                    'price', {}).get('total')
                is_new_format_api = 0
            elif 'price' in recalc_response:
                driver_potential_price = recalc_response.get('price', {}).get(
                    'driver', {}).get('price', {}).get('total')
                is_new_format_api = 1
            yield Record(
                record,
                driver_potential_price=driver_potential_price,
                is_new_format_api=is_new_format_api
            )



def duplicate_mapper(records):
    for record in records:
        for window in [0.25, 0.5, 1, 2, 5, 10, 20, 30, 60]:
            yield Record(
                record,
                is_cancelled_till_window_sec_or_not_cancelled=int(
                    (record.get('sec_till_cancelled', 0) or 0) < (window * 60)
                ),
                window=str(window)
            )

def calc_cancelled_prob_by_reducer(groups):
    for key, records in groups:

        #     df[(df['fullname']==addr)][['sec_till_cancelled', 'status', 'is_cancelled']]
        #     ids_filtered = (df['sec_till_cancelled'] < 60*n_mins)| (df['sec_till_cancelled'].isnull())
        #     p_less_than_n_min[addr].append(df[(df['fullname']==addr)&(ids_filtered)][['is_cancelled']].mean())

        cnt_iscancelledtillwindowsec_or_notcancelled_all_statuses = 0
        cnt_iscancelledtillwindowsec_or_notcancelled_cancelled = 0
        cnt_not_cancelled = 0
        cnt_cancelled = 0
        cnt_orders = 0
        for record in records:
            cnt_not_cancelled += (record.status != 'cancelled')
            cnt_cancelled += (record.status == 'cancelled')
            cnt_iscancelledtillwindowsec_or_notcancelled_cancelled += record.is_cancelled_till_window_sec_or_not_cancelled
            # record.status == 'cancelled'
            cnt_orders += 1
        yield Record(
            key,
            prob=(
                (cnt_iscancelledtillwindowsec_or_notcancelled_cancelled - cnt_not_cancelled)
                /cnt_orders
            ),
            abs=(cnt_iscancelledtillwindowsec_or_notcancelled_cancelled - cnt_not_cancelled),
            # prob_2=(
            #         (
            #                     cnt_iscancelledtillwindowsec_or_notcancelled_cancelled - cnt_not_cancelled)
            #         / (cnt_iscancelledtillwindowsec_or_notcancelled_cancelled)
            # ),

            cnt_orders=cnt_orders,
            cnt_cancelled=cnt_cancelled,
            cnt_not_cancelled=cnt_not_cancelled,
            cnt_iscancelledtillwindowsec_or_notcancelled_cancelled=cnt_iscancelledtillwindowsec_or_notcancelled_cancelled,
            cnt_iscancelledtillwindowsec_or_notcancelled_all_statuses=cnt_iscancelledtillwindowsec_or_notcancelled_all_statuses,
            # prob=(
            #     cnt_iscancelledtillwindowsec_or_notcancelled_cancelled * 1. /
            #     (
            #             cnt_not_cancelled +
            #             cnt_iscancelledtillwindowsec_or_notcancelled_cancelled
            #     )
            # )
        )

def _dict_reducer(groups):
    for key, records in groups:
        cnt_orders = 0
        tmp_probs = {}
        tmp_abs = {}
        for record in records:
            cnt_orders = record.cnt_orders

            tmp_probs[record.window] = record.prob
            tmp_abs[record.window] = record.abs
        yield Record(
            key,
            cnt_orders=cnt_orders,
            probs=tmp_probs,
            abs=tmp_abs
        )


def _reducer_geo_and_fullname(groups):
    for key, records in groups:
        fullname_list = set()
        latitude = None
        longitude = None
        lat_lon_list = set()
        for record in records:
            fullname_list.add(record.fullname)
            latitude = record.latitude
            longitude = record.longitude
            lat_lon_list.add('{}_{}'.format(latitude, longitude))
        yield Record(
            key,
            fullname_list=list(fullname_list),
            lat_lon_list=list(lat_lon_list),
            latitude=latitude,
            longitude=longitude
        )


if __name__ == '__main__':

    #python -m projects.batching.launcher_other_corps \
    # --yt-path-dir-to '//home/taxi-delivery/analytics/dev/batching/dead/{}'\
    # --start-date '2021-02-20'\
    # --finish-date '2021-03-07'

    # python launcher_other_corps.py --yt-path-dir-to //home/taxi-delivery/analytics/dev/batching/dead_tmp/{} --start-date '' --finish-date 2021-03-17

    # python launcher_other_corps.py --yt-path-dir-to //home/taxi-delivery/analytics/dev/batching/dead_tmp/{} --start-date 2021-03-16 --finish-date 2021-03-23


    # python launcher_other_corps.py --yt-path-dir-to //home/taxi-delivery/analytics/dev/batching/dead_tmp/{} --start-date 2021-03-16 --finish-date 2021-03-23
    parser = argparse.ArgumentParser()
    parser.add_argument('--yt-proxy', type=str, default='Hahn')
    parser.add_argument('--yt-path-dir-to', type=str,
                        default='//home/taxi-delivery/analytics/dev/{}')
    parser.add_argument('--start-date', type=str)
    parser.add_argument('--finish-date', type=str)

    args = parser.parse_args()

    cluster = get_project_cluster()

    yt_path_dir_to = args.yt_path_dir_to
    start_date = args.start_date
    finish_date = args.finish_date

    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )
    #
    final = job.table(
        yt_path_dir_to.format('{}/cl_p_s_w_o_st_pr_final_w_subsidies'.format(finish_date))
    ).project(
        ne.all(),
        geo_string=ne.custom(
            lambda x, y: '{}_{}'.format(x, y),
            'latitude', 'longitude'
        )
        # geohash_7=ne.custom(
        #     lambda x, y: gh.encode(x, y, precision=7),
        #     'latitude', 'longitude'
        # )
    )

    # final.filter(
    #     nf.custom(lambda x: x != 'platform_usage', 'claim_kind'),
    #     nf.custom(lambda x: start_date < x < finish_date, 'utc_date'),
    #     nf.custom(lambda x: x not in [
    #       'dc28c565829e48cca458b5feb161d5d6', # ООО Лидер Консалт Персонал
    #       '27751a2d0b534affb80d243bc2f68981', # ВкусВилл & Yandex.Taxi: Экспресс-доставка
    #       '4decd14f25424e8b82b6d3e4f56d23b2', # ООО Лидер Консалт Персонал
    #       'f9283655a43840a5bbbc016b9b531667', # Yandex.Go Israel
    #       '87cda4c03d60416bb522e83fe24b3d50', # Яндекс.Еда Доставка 2
    #       # 'b57e4179dff24b00be2f8b505885a180' # ООО «СТ Логистик»
    #   ], 'corp_client_id'),
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/all_b2b_for_probs_cancells'.format(finish_date))
    # )

    # job.table(yt_path_dir_to.format(
    #         '{}/all_b2b_for_probs_cancells'.format(finish_date))).map(
    #     duplicate_mapper
    # ).groupby(
    #     'corp_client_id', 'name', 'zone_id', 'geo_string', 'window'
    # ).reduce(
    #     calc_cancelled_prob_by_reducer
    # ).sort(
    #     'geo_string', 'window'
    # ).groupby(
    #     'corp_client_id', 'name', 'zone_id', 'geo_string'#, 'window'
    # ).reduce(
    #     _dict_reducer
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/all_b2b_for_probs_cancells_windows_tmp'.format(finish_date))
    # )

    # final.filter(
    #     nf.custom(lambda x: x != 'platform_usage', 'claim_kind'),
    #     nf.custom(lambda x: start_date < x < finish_date, 'utc_date')
    # ).groupby(
    #     'corp_client_id', 'utc_date', 'name', 'zone_id', 'geo_string'#'geohash_7' # 'fullname',
    # ).sort(
    #     'timestamp'
    # ).reduce(
    #     calc_a_density_reducer
    # ).project(
    #     ne.all(),
    #     minus_n_orders=ne.custom(lambda x: -x, 'n_orders')
    #     # mean_orders_daily=
    # ).sort(
    #     'minus_n_orders'
    # ).put(
    #     yt_path_dir_to.format('{}/all_b2b_a_statistics_by_geo_string'.format(finish_date))
    # )

    # job.table(yt_path_dir_to.format(
    #     '{}/all_b2b_a_statistics_by_geo_string'.format(finish_date))
    # ).filter(
    #     nf.custom(lambda x: x not in [
    #         'dc28c565829e48cca458b5feb161d5d6', # ООО Лидер Консалт Персонал
    #         '27751a2d0b534affb80d243bc2f68981', # ВкусВилл & Yandex.Taxi: Экспресс-доставка
    #         '4decd14f25424e8b82b6d3e4f56d23b2', # ООО Лидер Консалт Персонал
    #         'f9283655a43840a5bbbc016b9b531667', # Yandex.Go Israel
    #         '87cda4c03d60416bb522e83fe24b3d50', # Яндекс.Еда Доставка 2
    #         # 'b57e4179dff24b00be2f8b505885a180' # ООО «СТ Логистик»
    #     ], 'corp_client_id'),
    #     # nf.custom(lambda x: x!= "ООО Лидер Консалт Персонал", 'name'),
    #     # nf.custom(lambda x: x == 'nizhnynovgorod', 'zone_id')
    # ).filter(
    #     # qf.defined('time_btw_ts_less_10_share'),
    #     qf.defined('minus_n_orders'),
    #     # nf.custom(lambda x: x > 0.3, 'time_btw_ts_less_10_share'),
    #     # nf.custom(lambda x: x < -15, 'minus_n_orders')
    # ).groupby(
    #     'corp_client_id', 'name', 'zone_id', 'geo_string' # 'fullname',
    # ).aggregate(
    #     minus_n_orders=na.mean('minus_n_orders'),
    #     time_btw_ts_less_025_share=na.mean('time_btw_ts_less_025_share'),
    #     time_btw_ts_less_05_share=na.mean('time_btw_ts_less_05_share'),
    #     time_btw_ts_less_1_share= na.mean('time_btw_ts_less_1_share'),
    #     time_btw_ts_less_2_share= na.mean('time_btw_ts_less_2_share'),
    #     time_btw_ts_less_5_share= na.mean('time_btw_ts_less_5_share'),
    #     time_btw_ts_less_10_share= na.mean('time_btw_ts_less_10_share'),
    #     time_btw_ts_less_20_share= na.mean('time_btw_ts_less_20_share'),
    #     time_btw_ts_less_30_share= na.mean('time_btw_ts_less_30_share'),
    #     time_btw_ts_less_60_share= na.mean('time_btw_ts_less_60_share')
    # ).filter(
    #     # qf.defined('time_btw_ts_less_10_share'),
    #     qf.defined('minus_n_orders'),
    #     # nf.custom(lambda x: x > 0.3, 'time_btw_ts_less_10_share'),
    #     # nf.custom(lambda x: x > 0.3, 'time_btw_ts_less_10_share'),
    #     # nf.custom(lambda x: x < -15, 'minus_n_orders')
    # ).sort(
    #     'minus_n_orders'
    # ).put(
    #     yt_path_dir_to.format('{}/all_b2b_a_statistics_filtered_by_geo_string'.format(finish_date))
    # )



    # tmp = final.project(
    #          'fullname', 'zone_id', 'latitude', 'longitude', 'corp_client_id', 'geo_string'
    #      ).groupby('zone_id', 'geo_string', 'corp_client_id').reduce(
    #          _reducer_geo_and_fullname
    #      ).put(
    #     yt_path_dir_to.format(
    #         '{}/geo_string_2_addresses'.format(
    #             finish_date))
    # )

    # tmp = job.table(
    #     yt_path_dir_to.format(
    #         '{}/geo_string_2_addresses'.format(
    #             finish_date))
    # )
    #
    #
    # job.table(yt_path_dir_to.format('{}/all_b2b_a_statistics_filtered_by_geo_string'.format(finish_date))).join(
    #     tmp, # 'fullname',
    #     type='inner',
    #     by=['zone_id', 'geo_string', 'corp_client_id'], # 'fullname',
    #     assume_unique_left=True
    #  ).sort(
    #     'minus_n_orders'
    # ).put(
    #      yt_path_dir_to.format(
    #          '{}/all_b2b_a_statistics_filtered_w_geo_by_geo_string'.format(finish_date))
    #  )


    # b2b_table = job.table(
    #     yt_path_dir_to.format(
    #                  '{}/all_b2b_a_statistics_filtered_w_geo_by_geo_string'.format(finish_date))
    # )
    # probs_table = job.table(
    #     yt_path_dir_to.format(
    #         '{}/all_b2b_for_probs_cancells_windows_tmp'.format(finish_date))
    # )
    #
    #
    # b2b_table.join(
    #     probs_table,
    #     type='inner', by=['zone_id', 'geo_string', 'corp_client_id']
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/final_tmp'.format(finish_date))
    # )

    windows = [
        "0.25",
        "0.5",
        "1",
        "2",
        "5",
        "10",
        "20",
        "30",
        "60"
    ]

    times_btw = [
        "time_btw_ts_less_025_share",
        "time_btw_ts_less_05_share",
        "time_btw_ts_less_1_share",
        "time_btw_ts_less_2_share",
        "time_btw_ts_less_5_share",
        "time_btw_ts_less_10_share",
        "time_btw_ts_less_20_share",
        "time_btw_ts_less_30_share",
        "time_btw_ts_less_60_share"
    ]

    PROD_CORP_NAMES = [
        'OZON MARKET', 'X5 Retail Group', 'АО Дикси Юг', 'ВЕРНЫЙ',
        'ВкусВилл & Yandex.Taxi: Экспресс-доставка',
        'Доставка Beeline (Вымпелком)', 'Доставка Т2 Мобайл',
        'Доставка Теле2 (СПБ Телеком)',
        'Доставка Ярче', 'ОКТОБЛУ', 'ООО «Аптечная сеть О3»', 'ООО «Лента»'
    ]

    def _func(x, y, z):
        # 'estimated_cancells_05', 'estimated_cancells_1',
        #                          'estimated_cancells_10'

        if x == True:
            return 10 * 60
        elif y == True:
            return 1 * 60
        elif z == True:
            return 0.5 * 30
        else:
            return 0


    # job.table(
    #     yt_path_dir_to.format(
    #         '{}/final_tmp'.format(finish_date))
    # ).project(
    #     ne.all(),
    #     estimated_cancells_60=ne.custom(
    #         lambda x, y: (y.get('60')< 0.02), 'minus_n_orders', 'probs'
    #     ),
    #     estimated_cancells_30=ne.custom(
    #         lambda x, y: (y.get('30')< 0.02), 'minus_n_orders', 'probs'
    #     ),
    #     estimated_cancells_20=ne.custom(
    #         lambda x, y: (y.get('20')< 0.02), 'minus_n_orders', 'probs'
    #     ),
    #     estimated_cancells_10=ne.custom(
    #         lambda x, y: (y.get('10')< 0.02), 'minus_n_orders', 'probs'
    #     ),
    #     estimated_cancells_5=ne.custom(
    #         lambda x, y: (y.get('5')< 0.02), 'minus_n_orders', 'probs'
    #     ),
    #     estimated_cancells_2 = ne.custom(
    #         lambda x, y: (y.get('2')< 0.02), 'minus_n_orders', 'probs'
    #     ),
    #     estimated_cancells_1 = ne.custom(
    #         lambda x, y: (y.get('1')< 0.02), 'minus_n_orders', 'probs'
    #     ),
    #     estimated_cancells_05=ne.custom(
    #         lambda x, y: (y.get('0.5')< 0.02), 'minus_n_orders', 'probs'
    #     )
    #
    # ).filter(
    #         nf.custom(
    #             lambda a, b, c, d, e, f, g, h, i: (((a or 0) > 0.3) or
    #                                                (b or 0) > 0.3) or
    #                                               ((c or 0) > 0.3) or
    #                                               ((d or 0) > 0.3) or
    #                                               ((e or 0) > 0.3) or
    #                                               ((f or 0) > 0.3) or
    #                                               ((g or 0) > 0.3) or
    #                                               ((h or 0) > 0.3) or
    #                                               ((i or 0) > 0.3)
    #
    # ,
    #             "time_btw_ts_less_025_share",
    #             "time_btw_ts_less_05_share",
    #             "time_btw_ts_less_1_share",
    #             "time_btw_ts_less_2_share",
    #             "time_btw_ts_less_5_share",
    #             "time_btw_ts_less_10_share",
    #             "time_btw_ts_less_20_share",
    #             "time_btw_ts_less_30_share",
    #             "time_btw_ts_less_60_share"
    #         ),
    #     nf.custom(lambda x: x < -5, 'minus_n_orders'),
    #     nf.custom(
    #         lambda a, b, c, d, e, f, g, h:
    #         (a + b + c + d + e + f + g + h) != 0,
    #         'estimated_cancells_60', 'estimated_cancells_30', 'estimated_cancells_20',
    #         'estimated_cancells_10', 'estimated_cancells_5',
    #         'estimated_cancells_2', 'estimated_cancells_1', 'estimated_cancells_05'
    #     ),
    #     nf.custom(lambda x: x in PROD_CORP_NAMES, 'name'),
    #
    # ).project(
    #     ne.all(),
    #     n_mins=ne.custom(
    #         lambda x, y, z: _func(x, y, z),
    #         'estimated_cancells_10',
    #         'estimated_cancells_1',
    #         'estimated_cancells_05',
    #     )
    # ).filter(
    #     nf.custom(lambda x: x > 0, 'n_mins')
    # ).put(
    #         yt_path_dir_to.format(
    #             '{}/tmp_less'.format(finish_date))
    #     )

    job.table(
        yt_path_dir_to.format(
            '{}/tmp_less'.format(finish_date))
    ).filter(
        nf.custom(lambda x: x == 600, 'n_mins')
    ).put(
        yt_path_dir_to.format(
            '{}/tmp_600_sec'.format(finish_date))
    )

    job.table(
        yt_path_dir_to.format(
            '{}/tmp_less'.format(finish_date))
    ).filter(
        nf.custom(lambda x: x == 60, 'n_mins')
    ).put(
        yt_path_dir_to.format(
            '{}/tmp_60_sec'.format(finish_date))
    )
    job.table(
        yt_path_dir_to.format(
            '{}/tmp_less'.format(finish_date))
    ).filter(
        nf.custom(lambda x: x == 15, 'n_mins')
    ).put(
        yt_path_dir_to.format(
            '{}/tmp_15_sec'.format(finish_date))
    )



    ##### DONE

    # final.filter(
    #     #     # до того как включили батчинг
    #     #     nf.custom(lambda x: '2021-03-05' < x < '2021-03-12', 'utc_date')
    #     # ).join(
    #     #     # до того как включили батчинг
    #     #     job.table(
    #     #         yt_path_dir_to.format(
    #     #                  '{}/all_b2b_a_statistics_filtered_w_geo'.format(finish_date)
    #     #     )).project(
    #     #     'corp_client_id', 'fullname'
    #     #     ).unique('corp_client_id', 'fullname'),
    #     #     by=['corp_client_id', 'fullname'], type='inner'
    #     # ).put(
    #     #     yt_path_dir_to.format(
    #     #         '{}/claims_filtered_by_corps_fullname'.format(finish_date))
    #     # )


    # job.table('//home/taxi-delivery/analytics/dev/batching/dead_tmp/2021-03-17/all_b2b_a_statistics_filtered_w_geo').join(
    #     job.table('//home/taxi-delivery/analytics/dev/2021-03-17/corps_whitelist_from_cancells').project('fullname'),
    #     by='fullname', type='inner'
    # ).put(
    #     '//home/taxi-delivery/analytics/dev/batching/dead_tmp/2021-03-17/all_b2b_a_statistics_filtered_w_geo_whitetlist'
    # )

    # job.table(yt_path_dir_to.format('{}/all_b2b_a_statistics'.format(finish_date))).filter(
    #     nf.custom(lambda x: x not in [
    #         'dc28c565829e48cca458b5feb161d5d6',
    #         '27751a2d0b534affb80d243bc2f68981',
    #         '4decd14f25424e8b82b6d3e4f56d23b2',
    #         'f9283655a43840a5bbbc016b9b531667',
    #         '87cda4c03d60416bb522e83fe24b3d50',
    #         'b57e4179dff24b00be2f8b505885a180' # oo - ст логиститк
    #     ], 'corp_client_id'),
    #     # nf.custom(lambda x: x== "ООО Лидер Консалт Персонал", 'name'),
    #     # nf.custom(lambda x: x == 'nizhnynovgorod', 'zone_id')
    # ).groupby(
    #     'fullname', 'corp_client_id', 'name', 'zone_id'
    # ).aggregate(
    #     time_btw_ts_less_1_share=na.mean('time_btw_ts_less_1_share'),
    #     minus_n_orders=na.mean('minus_n_orders')
    # ).filter(
    #     qf.defined('time_btw_ts_less_1_share'),
    #     qf.defined('minus_n_orders'),
    #     nf.custom(lambda x: x > 0.3, 'time_btw_ts_less_1_share'),
    #     nf.custom(lambda x: x < -15, 'minus_n_orders')
    # ).sort(
    #     'minus_n_orders'
    # ).put(
    #     yt_path_dir_to.format('{}/all_b2b_a_statistics_filtered'.format(finish_date))
    # )



    # cargo_d_c = CargoLogsDataContext(
    #     job,
    #     datetime.datetime.strptime(
    #         start_date, '%Y-%m-%d')
    #     ,
    #     datetime.datetime.strptime(
    #         finish_date, '%Y-%m-%d'
    #     ),
    # )

    ###########

    # orders_d_c = OrdersSessionsLogsDataContext(
    #     job,
    #     datetime.datetime.strptime(
    #         start_date, '%Y-%m-%d')
    #     ,
    #     datetime.datetime.strptime(
    #         finish_date, '%Y-%m-%d'
    #     ),
    # )
    # tmp = orders_d_c.get_corp_clients(get_last_dttm_flag=True).put(
    #     yt_path_dir_to.format('{}/corp_client_id_2_name'.format(finish_date))
    # )
    #
    # from qb2.api.v1 import filters as qf
    #
    # job.table('//home/taxi-delivery/analytics/dev/2021-02-24/all_b2b_a_statistics').filter(
    #     qf.defined('corp_id_histogram')
    # ).project(
    #
    #
    #     'n_orders', 'time_btw_ts_mean', 'time_btw_ts_less_10_share',
    #     'time_btw_ts_less_20_share', 'time_btw_ts_less_30_share',
    #     'time_btw_ts_less_60_share', 'time_btw_ts_less_10_sum',
    #     'time_btw_ts_less_20_sum', 'time_btw_ts_less_30_sum',
    #     'time_btw_ts_less_60_sum', 'fullname',
    #
    #     corp_client_id=ne.custom(lambda x: list(x.keys())[0], 'corp_id_histogram')
    # ).join(
    #     tmp, by_left='corp_client_id', by_right='client_id', type='inner'
    # ).unique(
    #     'fullname', 'corp_client_id'
    #     # TODO maybe do not need this unique
    # ).put(
    #     yt_path_dir_to.format('{}/corp_client_id_2_name_joined'.format(finish_date))
    # )

    # # calc plots with cancells!
    # source_table = job.table(
    #     yt_path_dir_to.format('{}/corp_client_id_2_name_joined'.format(finish_date))
    # )

    # claims_log_statuses = cargo_d_c.get_claims_change_log().groupby(
    #     'claim_id'
    # ).sort(
    #     'updated_ts'
    # ).reduce(
    #     calc_time_till_cancell
    # ).put(
    #     yt_path_dir_to.format('{}/claims_log_statuses'.format(finish_date))
    # )

    # claims_log_statuses = job.table(
    #     yt_path_dir_to.format('{}/claims_log_statuses'.format(finish_date))
    # )

    from qb2.api.v1 import filters as qf

    # job.table(
    #     '//home/taxi-delivery/analytics/dev/2021-03-02/cl_p_s_w_o_final_w_subsidies'
    # ).join(
    #     claims_log_statuses.filter(
    #         qf.defined('new_ts')
    #     ).project(
    #         ne.all(),
    #         sec_till_performer_lookup=ne.custom(
    #             lambda x, y: ((x or 0) - y) if (x is not None) else None,
    #             'performer_lookup_ts', 'new_ts'
    #         ),
    #         sec_till_cancelled=ne.custom(
    #             lambda x, y: ((x or 0) - y) if (x is not None) else None,
    #             'cancelled_ts', 'new_ts'
    #         )
    #     ),
    #     by='claim_id', type='left'
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/all_clients_2_name_2_time_cancelles_tmp'.format(finish_date))
    # )

    # job.table(
    #     yt_path_dir_to.format(
    #         '{}/all_clients_2_name_2_time_cancelles_tmp'.format(finish_date))
    # ).project(
    #     ne.all(),
    #     # sec_till_vv_gets_signal=ne.custom(
    #     #     lambda x: None if x < 0 else x, 'sec_till_vv_gets_signal'
    #     # )
    # ).join(
    #     source_table, by='fullname', type='inner'
    # ).put(
    #     # TODO: мы не знаем, где раскатан Эксклюзив, а где нет
    #     yt_path_dir_to.format('{}/all_clients_2_name_2_time_cancelles'.format(finish_date))
    # )

    # # minus_n_orders
    # job.table(
    #     # TODO: мы не знаем, где раскатан Эксклюзив, а где нет
    #     yt_path_dir_to.format('{}/all_clients_2_name_2_time_cancelles'.format(finish_date))
    # ).sort(
    #     'minus_n_orders'
    # ).filter(
    #     qf.defined('time_btw_ts_less_10_share'),
    #     nf.custom(lambda x: x > 0.47, 'time_btw_ts_less_10_share')
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/all_clients_2_name_2_time_cancelles_sorted'.format(finish_date))
    # )

    # job.table(yt_path_dir_to.format(
    #     '{}/all_clients_2_name_2_time_cancelles_sorted'.format(finish_date))
    # ).project(
    #     'sec_till_cancelled', 'status', 'fullname', 'corp_client_id', 'name'
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/all_clients_2_name_2_time_cancelles_sorted_tmp'.format(finish_date))
    # )


    # final = job.table(
    #     yt_path_dir_to.format('{}/cl_p_s_w_o_final_w_subsidies'.format(finish_date))
    # )
    #
    # final.join(pricing_batch, type='left', by='uuid_id').put(
    #     yt_path_dir_to.format(
    #         '{}/cl_p_s_w_o_final_w_subsidies_w_batched_prices_tmp'.format(
    #             finish_date)
    #     )
    # )

    # final = job.table(
    #     yt_path_dir_to.format(
    #         '{}/cl_p_s_w_o_final_w_subsidies_w_batched_prices_tmp'.format(
    #             finish_date))
    # )


    #
    ######## TODO: - start B2B
    # # orders_d_c = OrdersSessionsLogsDataContext(
    # #     job,
    # #     datetime.datetime.strptime(
    # #         start_date, '%Y-%m-%d')
    # #     ,
    # #     datetime.datetime.strptime(
    # #         finish_date, '%Y-%m-%d'
    # #     ),
    # # )
    # #
    # #
    # # corps = orders_d_c.get_orders().project(
    # #     'order_id', 'utc_order_dttm',
    # #     'success_order_flg', 'travel_time_min', 'travel_distance_km',
    # #     'order_cost', 'decoupling_success_flg', 'corp_order_flg', 'corp_client_id'
    # # )
    #
    # # .join(
    # #         corps, by_left='taxi_order_id', by_right='order_id', type='left'
    # #     )
    #
    #


    # final = job.table(yt_path_dir_to.format('{}/cl_p_s_w_o_final_w_subsidies'.format(finish_date)))
    #
    # final.filter(
    #     nf.custom(lambda x: x == 'source', 'type')
    # ).groupby(
    #     'fullname'
    # ).sort(
    #     'timestamp'
    # ).reduce(
    #     calc_a_density_reducer
    # ).project(
    #     ne.all(),
    #     minus_n_orders=ne.custom(lambda x: -x, 'n_orders')
    #     # mean_orders_daily=
    # ).sort(
    #     'minus_n_orders'
    # ).put(
    #     # '//home/taxi-delivery/analytics/dev/2021-02-24/all_b2b'
    #     yt_path_dir_to.format('{}/all_b2b_a_statistics'.format(finish_date))
    # )

    job.run()


    ### TODO: this code is from launcher -  woth usual monitoring
    # job = cluster.job('Couriers collection' + str(time.time()))
    # job = job.env(
    #     bytes_decode_mode='strict',
    #     yt_spec_defaults={'max_failed_job_count': 1000}
    # )
    #
    # ## BAM ####
    #
    # def calc_aa_test_reducer(groups):
    #     for key, records in groups:
    #         n_claims = 0
    #         COLS = [
    #             "sec_till_cancelled",
    #             "sec_till_performer_lookup",
    #             "cargo_pricing_price",
    #             "final_price",
    #             "is_cancelled_after_2_min",
    #             "is_cancelled_after_5_min",
    #             "is_cancelled_after_10_min",
    #             "is_cancelled_after_30_min",
    #             "is_failed_by_clients_flag",
    #             "is_pay_cancelled_flag",
    #             "is_cancelled_flag",
    #             "is_failed_by_us_flag",
    #             "is_cancelled_by_courier_flag",
    #             "is_performer_not_found_flag",
    #             "is_success_flag",
    #             "is_batched_flag"
    #         ]
    #         d = Counter({col: 0 for col in COLS})
    #
    #         for record in records:
    #             n_claims += 1
    #             for col in COLS:
    #                 d[col] += (record.get(col, 0) or 0)
    #         d['n_claims'] = n_claims
    #         yield Record(
    #             key,
    #             **d
    #         )
    #
    # exp_whitelist=job.table(
    #     '//home/taxi-delivery/analytics/dev/batching_vv/2021-03-02/corp_whitelist_filtered'
    # )
    #
    # job.table(yt_path_dir_to.format(
    #         '{}/batching_export_quality_included'.format(finish_date))
    # ).filter(
    #     nf.custom(lambda x: x not in WHITELIST_CORPS, 'fullname')
    # ).join(
    #     exp_whitelist, by='fullname', type='inner'
    # ).groupby(
    #     'fullname', 'tariff_zone', 'corp_client_id'
    # ).reduce(
    #     calc_aa_test_reducer
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/aa_test'.format(finish_date))
    # )
    #
    # job.run()


    # TODO: //home/taxi-delivery/analytics/dev/batching/dead/{}

    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )

    # SALT = 'batching_roll_out'
    #
    # import hashlib
    # def hash_string(value, salt):
    #     return hashlib.md5(
    #         six.ensure_binary(value, 'utf-8') + six.ensure_binary(salt,
    #                                                               'utf-8'),
    #     ).hexdigest()
    #
    #
    # def get_prob_via_hash(value, salt):
    #     prob = (int(hash_string(value, salt), base=16) % 10000) / 10000.0
    #     return prob
    #
    #
    # # group = job.table(yt_path_dir_to.format(
    # #     '{}/aa_test_test_group_2_points'.format(finish_date)))
    #
    # blacklist_exp_ld = job.concat(*[
    #     job.table(
    #         '//home/taxi-delivery/analytics/dev/batching/dead/2021-03-11/fullnames_msc_unique_filtered'
    #     ),
    #     job.table(
    #         '//home/taxi-delivery/analytics/dev/batching/dead/2021-03-11/corps_fullname_regions_filtered'
    #     )
    #
    # ])
    #
    #     # job.table(
    #     # '//home/taxi-delivery/analytics/dev/batching/ld_exp/corps_whitelist')
    #
    # tmp = job.table(
    #     yt_path_dir_to.format('{}/aa_test'.format(finish_date))
    # ).join(
    #     blacklist_exp_ld, type='left_only', by=['corp_client_id', 'fullname']
    # ).project(
    #     ne.all(),
    #     real_value=ne.custom(
    #         lambda x, y: get_prob_via_hash('{}_{}'.format(x, y), SALT),
    #         # '{}_{}_{}'.format(x, y, SALT),
    #         'corp_client_id', 'fullname'
    #     )
    # )
    #
    # fst = tmp.filter(
    #     nf.custom(lambda x: x >= 0.5, 'real_value')
    # ).put(
    #     yt_path_dir_to.format('{}/aa_test_test_group'.format(finish_date))
    # )
    #
    # tmp.filter(
    #     nf.custom(lambda x: x < 0.5, 'real_value')
    # ).put(
    #     yt_path_dir_to.format('{}/aa_test_control_group'.format(finish_date))
    # )
    #
    # fst.join(
    #     job.table(
    #         '//home/taxi-delivery/analytics/dev/batching/dead_tmp/2021-03-11/cl_p_s_w_o_st_pr_final_w_subsidies'
    #     ).project(
    #         'corp_client_id', 'fullname', "latitude", "longitude"
    #     ),
    #     type='left', by=['corp_client_id', 'fullname']
    # ).unique('corp_client_id', 'fullname').put(
    #     yt_path_dir_to.format('{}/aa_test_test_group_2_points'.format(finish_date))
    # )

    job.run()


