import argparse
from collections import Counter
from itertools import chain
import time
import re

import datetime
import six
from nile.api.v1 import aggregators as na
from nile.api.v1 import extractors as ne
from nile.api.v1 import filters as nf
from nile.api.v1 import Record
from qb2.api.v1 import filters as qf
from qb2.api.v1 import typing as qt


from projects.batching.batching_custom_params import (
    YARCHE_ADDRESSES, VV_ADDRESSES_TMP
)
from projects.data_sources.data_context.cargo import \
    DataContext as CargoLogsDataContext
from projects.data_sources.data_context.raworders_dmorders_sessions import \
    DataContext as OrdersSessionsLogsDataContext
from projects.data_sources.data_context.raw_services_logs import \
    DataContext as RawServicesLogsDataContext
from projects.efficiency_metrics.project_config import get_project_cluster
from projects.batching.nile_blocks.corps import calc_a_density_reducer
from projects.common.gp_transfer import transfer_from_yt_2_gp


def calc_main_metrics_reducer(groups):
    for key, records in groups:
        n_claims = 0
        unique_taxi_orders = set()
        n_claims_ld = 0
        waybill_counter = Counter()
        for record in records:
            n_claims += 1
            unique_taxi_orders.add(record.taxi_order_id)
            n_claims_ld += ('logistic-dispatch' in record.chosen_waybill
                            if record.get('chosen_waybill') else 0)
            waybill_counter[record.chosen_waybill] += 1
        n_unique_taxi_orders = len(unique_taxi_orders)
        n_batched = sum([el for el in waybill_counter.values() if el > 1])
        yield Record(
            key,
            n_claims_ld=n_claims_ld,
            n_claims=n_claims,
            n_unique_taxi_orders=n_unique_taxi_orders,
            # n_batched_pairs=(n_claims - n_unique_taxi_orders) * 2,
            n_batched=n_batched,
            batched_share=(n_batched * 1. / n_claims)
            # batched_share=(n_claims - n_unique_taxi_orders) * 2 / n_claims
        )


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--yt-proxy', type=str, default='Hahn')
    parser.add_argument(
        '--yt-path-dir-to', type=str,
        default='//home/taxi-delivery/analytics/dev/batching/dead/{}'
    )
    parser.add_argument('--start-date', type=str)
    parser.add_argument('--finish-date', type=str)

    args = parser.parse_args()

    cluster = get_project_cluster()

    yt_path_dir_to = args.yt_path_dir_to
    start_date = args.start_date
    finish_date = args.finish_date

    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )

    schema = {
        "batched_share": qt.Float,
        "n_batched": qt.Int64,
        "n_claims": qt.Int64,
        "n_claims_ld": qt.Int64,
        "n_unique_taxi_orders": qt.Int64,
        "utc_date": qt.String
    }

    # job.table(yt_path_dir_to.format(
    #     '{}/cl_p_s_w_o_st_pr_final_w_subsidies'.format(finish_date))
    # ).filter(
    #     nf.custom(lambda x: x != 'platform_usage', 'claim_kind')
    # ).groupby('utc_date').reduce(
    #     calc_main_metrics_reducer
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/main_metrics'.format(finish_date)),
    #     schema=schema
    # )

    # job.table(yt_path_dir_to.format(
    #     '{}/cl_p_s_w_o_st_pr_final_w_subsidies'.format(finish_date))
    # ).filter(
    #     nf.custom(lambda x: x != 'platform_usage', 'claim_kind')
    # ).groupby('utc_date', 'zone_id').reduce(
    #     calc_main_metrics_reducer
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/main_metrics_by_zone'.format(finish_date)),
    #     schema=schema.update({'zone_id': qt.String})
    # )

    job.run()

    # transfer_from_yt_2_gp(
    #     yt_table_path=yt_path_dir_to.format(
    #         '{}/main_metrics'.format(finish_date)),
    #     gp_table_name='analyst.nkozlovskaya_batching_fst',
    #     schema=schema
    # )
    #
    #
    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )

    schema = {
        'tariff_zone': qt.String,
        'status': qt.String,
        # 'claim_id': qt.Int64,
        'claim_id': qt.String,
        'utc_date': qt.String,
        'corp_client_id': qt.String,
        'taxi_order_id': qt.String,
        'fullname': qt.String,
        'sec_till_cancelled': qt.Float,
        'sec_till_performer_lookup': qt.Float,
        'sec_till_performer_found': qt.Float,
        'sec_till_delivered_returned': qt.Float,
        'sec_till_delivered': qt.Float,
        'sec_till_delivered_deadline': qt.Float,
        'sec_till_fake_1_hour_deadline': qt.Float,
        'cargo_pricing_price': qt.Float,
        'final_price': qt.Float,
        'is_cancelled_after_2_min': qt.Int64,
        'is_cancelled_after_5_min': qt.Int64,
        'is_cancelled_after_10_min': qt.Int64,
        'is_cancelled_after_30_min': qt.Int64,
        'is_failed_by_clients_flag': qt.Int64,
        'is_pay_cancelled_flag': qt.Int64,
        'is_cancelled_flag': qt.Int64,
        'is_failed_by_us_flag': qt.Int64,
        'is_cancelled_by_courier_flag': qt.Int64,
        'is_performer_not_found_flag': qt.Int64,
        'is_success_flag': qt.Int64,
        'dispatch_type': qt.String,
        'is_batched_flag': qt.Int64,
        'tariff_zone': qt.String,
        'corp_name': qt.String,
        'is_late': qt.Int64,
        'is_fake_late': qt.Int64,
        'is_delayed_order': qt.Int64,
        'time_2_delivery': qt.Float,
        'employer': qt.String,
        'custom_settings': qt.String,
        'group': qt.String,
    }

    FINISHED_STATUSES = [
        'delivered_finish', 'returned_finish',
        'performer_not_found',
        'cancelled', 'cancelled_with_payment', 'cancelled_by_taxi',
        'cancelled_with_items_on_hands'
    ]
    SUCCESS_FINISHED_STATUSES = ['delivered_finish', 'returned_finish']
    FAILED_FINISHED_STATUSES_BECAUSE_OF_US = [
        'performer_not_found', 'cancelled_by_taxi'
    ]
    CANCELLED_STATUSES_BECAUSE_OF_CLIENT = [
        'cancelled'
    ]
    PAY_CANCELLED_STATUSES_BECAUSE_OF_CLIENT = [
        'cancelled_with_payment', 'cancelled_with_items_on_hands'
    ]

    final = job.table(yt_path_dir_to.format(
        '{}/cl_p_s_w_o_st_pr_final_w_subsidies'.format(finish_date))
    ).filter(
       nf.custom(lambda x: x != 'platform_usage', 'claim_kind')
    ).project(
        ne.all(['fullname']),
        fullname=ne.custom(
            lambda x: str(x).replace("'",'').replace('"',''), 'fullname'
        )
    )

    # TODO join with corp names (filter)
    # TODO add to schema
    # TODO join with table, where batches are

    # TODO: дбавитиь сюда настрйоки, с котрыми зпаускааем батчи
    # TODO: переложиьт в продовую таблицу папку с названием даты, когда запускали (production)

    # toDO: custom_settings + табла с раскаткой

    # employers = job.table(
    #     '//home/taxi-delivery/production/batching/ld_experiment/employers_2021-03-14'
    # ).project('employer', 'corp_client_id')


    # YARCHE_AND_VV_EXPERIMENTS_YT_PATH = (
    #     '//home/taxi-delivery/production/batching/dead_batching/prod/first_addresses'
    # )
    # FIRST_WAVE = job.concat(
    #     job.table(
    #         '//home/taxi-delivery/analytics/dev/batching/dead/2021-03-07/aa_test_test_group_2_points'
    #     ).project('fullname', group=ne.const('exp')),
    #     # job.table(
    #     #     '//home/taxi-delivery/analytics/dev/batching/dead/2021-03-07/aa_test_control_group'
    #     # ).project('fullname', group=ne.const('control'))
    # )
    # whitelist_custom_settings_table = job.concat(
    #     *[
    #         job.table(YARCHE_AND_VV_EXPERIMENTS_YT_PATH),
    #         FIRST_WAVE
    #     ]
    # ).unique(
    #     'fullname'
    # )

    # final = final.join(
    #     whitelist_custom_settings_table, by='fullname', type='inner',
    #     assume_unique_right=True
    # )#.join(
    # #     employers, by='corp_client_id', type='left'
    # # )
    #
    # # final = final.filter(
    # #     nf.custom(lambda x: x in WHITELIST_CORPS, 'fullname')
    # # ).project(
    # #     ne.all('fullname'),
    # #     fullname=ne.custom(lambda x: addr_2_unique_addr[x],'fullname')
    # # )#.put(
    # # '//home/taxi-delivery/analytics/dev/batching/dead/2021-03-07/change_fullname_whitelist'
    # # '//home/taxi-delivery/analytics/dev/batching/dead/2021-03-07/change_fullname_whitelist'
    # # #     yt_path_dir_to.format('{}/change_fullname_whitelist'.format(finish_date))
    # # # )
    #
    # is_batched_flag_table = final.groupby(
    #     'taxi_order_id'
    # ).aggregate(
    #     number_of_claims_on_one_order=na.count()
    # ).project(
    #     'taxi_order_id',
    #     is_batched_flag=ne.custom(
    #         lambda x: 1 if (x > 1) else 0, 'number_of_claims_on_one_order'
    #     )
    #
    # )#.put(
    #     yt_path_dir_to.format(
    #         '{}/is_batched_flag'.format(finish_date)),
    # )

    # is_batched_flag_table = job.table(
    #     yt_path_dir_to.format(
    #         '{}/is_batched_flag'.format(finish_date)),
    # )


# .join(
    #         is_batched_flag_table, by='taxi_order_id', type='left'
    #     )

    final.project(
        'status', 'claim_id', 'utc_date', 'corp_client_id', 'taxi_order_id',
        # 'is_batched_flag',
        'sec_till_cancelled', 'sec_till_performer_lookup', 'sec_till_delivered',
        'sec_till_performer_found', 'sec_till_delivered_returned',
        'sec_till_delivered_deadline', 'sec_till_fake_1_hour_deadline',
        'employer',
        'custom_settings',
        # настройки!!!! project 4 vs 10
        # TODO: cpo - per success order
        is_delayed_order=ne.custom(lambda x: int(x) if (x is not None) else None, 'is_delayed'),
        is_late=ne.custom(
            lambda x, y: int(y >= x) if (
                (x is not None) and (y is not None)) else None,
            'sec_till_delivered_deadline', 'sec_till_delivered'
        ),
        is_fake_late=ne.custom(
            lambda x: int(x >= 3600.) if x is not None else None,
            'sec_till_delivered'
        ),
        time_2_delivery='sec_till_delivered',
        tariff_zone='zone_id',
        corp_name='name',
        dispatch_type=ne.custom(
            lambda x: x.split('/')[0] if x is not None else None,
            'chosen_waybill'
        ),
        cargo_pricing_price=ne.custom(
            lambda x: x / 1000000. if x is not None else 0,
            'cargo_pricing_price'
        ),
        final_price=ne.custom(
            lambda x: x / 1000000. if x is not None else 0, 'final_price'
        ),
        is_success_flag=ne.custom(
            lambda x: int(x in SUCCESS_FINISHED_STATUSES), 'status'
        ),
        # problems with supply/dispatch
        is_performer_not_found_flag=ne.custom(
            lambda x: int(x in ['performer_not_found']), 'status'
        ),
        # problems with unhappy couriers
        is_cancelled_by_courier_flag=ne.custom(
            lambda x: int(x in ['cancelled_by_taxi']), 'status'
        ),
        is_failed_by_us_flag=ne.custom(
            lambda x: int(x in FAILED_FINISHED_STATUSES_BECAUSE_OF_US),
            'status'
        ),
        # problems with bad clients
        is_cancelled_flag=ne.custom(
            lambda x: int(x in CANCELLED_STATUSES_BECAUSE_OF_CLIENT), 'status'
        ),
        is_pay_cancelled_flag=ne.custom(
            lambda x: int(x in PAY_CANCELLED_STATUSES_BECAUSE_OF_CLIENT),
            'status'
        ),

        is_failed_by_clients_flag=ne.custom(
            lambda x: int(x in (
                    CANCELLED_STATUSES_BECAUSE_OF_CLIENT +
                    PAY_CANCELLED_STATUSES_BECAUSE_OF_CLIENT
            )),
            'status'
        ),
        fullname=ne.custom(
            lambda x: six.ensure_str(x).replace("'",'').replace('"',''),
            'fullname'
        ),
        is_cancelled_after_30_min=ne.custom(
            lambda x: int((x is not None) and (x > 30 * 60)),
            'sec_till_cancelled'
        ),
        is_cancelled_after_10_min=ne.custom(
            lambda x: int((x is not None) and (x > 10 * 60)),
            'sec_till_cancelled'
        ),
        is_cancelled_after_5_min = ne.custom(
            lambda x: int((x is not None) and (x > 5 * 60)),
            'sec_till_cancelled'
        ),
        is_cancelled_after_2_min=ne.custom(
            lambda x: int((x is not None) and (x > 2 * 60)),
            'sec_till_cancelled'
        ),
        # ToDO: есть ли кейсы, когда sec till perfformer look up None,
        #  но отмены еще не было
    ).put(
        yt_path_dir_to.format(
            '{}/batching_export_quality_included'.format(finish_date)),
        schema=schema
    )

    # job.table(
    #     yt_path_dir_to.format(
    #                 '{}/batching_export_quality_included_cp'.format(finish_date)),
    # ).unique(
    #     'claim_id'
    # ).put(
    #     yt_path_dir_to.format(
    #         '{}/batching_export_quality_included'.format(finish_date)),
    #     schema=schema
    # )

    job.run()

    # transfer_from_yt_2_gp(
    #     yt_path_dir_to.format(
    #         '{}/batching_export_quality_included'.format(finish_date)),
    #     'analyst.nkozlovskaya_batching_export_quality',
    #     schema
    # )


