
from collections import Counter
from collections import defaultdict

from nile.api.v1 import Record
import numpy as np

IGNORABLE_GAMBLE_VERDICTS = [
    'too-heavy-edge',
    'other-proposition'
]

ATTACH_CLEANUP_DATA_REASONS = [
    'payment-methods-mismatch',
    'special-requirements-mismatch',
    'no-class-intersection',
]

TYPES = ['candidates', 'edges', 'assignment', 'segment_info', 'waybill_data']


def aggregate_all_drivers_reducer(groups):
    for key, records in groups:
        n_candidates = None
        n_edges = None
        tmp = []
        for record in records:
            tmp.append([record.timestamp_ld, record.type])
            if record.type == 'candidates':
                n_candidates = len(record.available_candidates)
            if record.type == 'edges':
                n_edges = len(record.available_candidates)
            tmp.append(
                [record.timestamp_ld, record.type, n_candidates, n_edges])

        yield Record(
            key,
            q=tmp
        )

def calc_ts_bw(groups):
    for key, records in groups:
        tmp = []

        fst_cands_timestamp = None
        fst_edges_timestamp = None
        fst_assign_timestamp = None

        last_cands_timestamp = None
        last_edges_timestamp = None
        last_assign_timestamp = None

        for record in records:
            tmp.append([record.timestamp_ld, record.type])

            if (record.type == 'candidates'):
                last_cands_timestamp = record.timestamp_ld
            if (record.type == 'edges'):
                last_edges_timestamp = record.timestamp_ld
            if (record.type == 'assignment'):
                last_assign_timestamp = record.timestamp_ld

            if (fst_cands_timestamp is None) and (record.type == 'candidates'):
                fst_cands_timestamp = record.timestamp_ld
            if (fst_edges_timestamp is None) and (record.type == 'edges'):
                fst_edges_timestamp = record.timestamp_ld
            if (fst_assign_timestamp is None) and (record.type == 'assignment'):
                fst_assign_timestamp = record.timestamp_ld
        yield Record(
            key,
            queue=tmp,
            last_assign_timestamp=last_assign_timestamp,
            last_cands_timestamp=last_cands_timestamp,
            last_edges_timestamp=last_edges_timestamp,
            fst_cands_timestamp=fst_cands_timestamp,
            fst_edges_timestamp=fst_edges_timestamp,
            fst_assign_timestamp=fst_assign_timestamp
        )



def calc_different_stats(groups):
    for key, rows in groups:
        n_candidates_requests = 0
        n_times_assigned = 0
        n_candidates_in_reply = 0
        n_gambles = 0

        suspicious_entries = []
        last_candidates_timestamp = 0
        employer_code = None
        zone_id = None
        router_id = None
        time_to_due = None
        corp_client_id = None
        reject_reasons_count = {}
        tariffs_substitution = None

        n_propose_requests = 0
        n_total_edges = 0

        taxi_order_ids = set()
        last_candidates = {}

        list_of_n_candidates = []
        timeline = []

        special_requirements = []
        taxi_order_requirements = []


        # for record in records:

        for row in rows:
            if row['type'] == 'assignment':
                n_times_assigned += 1
                timeline.append(
                    ['assignment', row.timestamp_ld, row.contractor_id]
                )
                continue
            # TODO: see,s like it doesn t work
            if row['type'] == 'waybill_data' and row[
                'taxi_order_id'] is not None:
                taxi_order_ids.add(row['taxi_order_id'])
                timeline.append(
                    ['waybill_data', row.timestamp_ld]
                )
                continue
            if row['type'] == 'segment_info':
                employer_code = employer_code or row['employer_code']
                router_id = row['router_id']
                corp_client_id = corp_client_id or row['corp_client_id']
                # time_to_due = time_to_due or row['time_to_due']
                zone_id = zone_id or row['zone_id']
                tariffs_substitution = tariffs_substitution or row[
                    'tariffs_substitution']
                timeline.append(
                    ['segment_info', row.timestamp_ld, router_id]
                )
                continue
            if row['type'] == 'propose_request':
                n_propose_requests += 1

                # TODO: unncomment - requirements

                # tmp_1 = row.propose_api_request.get('special_requirements')
                # tmp_2 = row.propose_api_request.get('taxi_order_requirements')
                #
                # if (len(special_requirements) == 0) or (
                #         sys.getsizeof(special_requirements[-1]) !=
                #         sys.getsizeof(tmp_1)
                # ):
                #     special_requirements.append(tmp_1)
                #
                # if (len(taxi_order_requirements) == 0) or (
                #         sys.getsizeof(taxi_order_requirements[-1]) !=
                #         sys.getsizeof(tmp_2)
                # ):
                #     taxi_order_requirements.append(tmp_2)
                timeline.append(
                    ['propose_request', row.timestamp_ld, row.propose_api_request.get('external_ref')]
                )
                continue
            if row['type'] == 'candidates':
                n_candidates_requests += 1
                last_candidates_timestamp = row['timestamp']
                n_candidates_in_reply += len(last_candidates)
                n_ = 0
                transport_type_cnt = Counter()
                for candidate in row['available_candidates']:
                    candidate_id = candidate['candidate_id']
                    meta = {
                        'route_info': candidate['route_info'],
                        'transport': candidate['transport'],
                    }
                    last_candidates[candidate_id] = meta
                    n_ += 1
                    transport_type_cnt[candidate['transport']['type']] += 1
                timeline.append(
                    ['candidates', row.timestamp_ld, n_]# TODO:, dict(transport_type_cnt)]
                )
                continue
            if row['type'] == 'edges' and last_candidates_timestamp > 0:
                n_gambles += 1
                n_kind_all = len(row['available_candidates'])
                n_total_edges += n_kind_all
                reject_reasons = {}

                n_kind_regular = 0
                for record in row['available_candidates']:
                    reject_reasons[record['contractor_id']] = record[
                        'kind']
                    if record['kind'] == 'regular':
                        n_kind_regular += 1
                for c, meta in last_candidates.items():
                    reject_reason = reject_reasons.get(c)
                    # if not reject_reason:
                    #     reject_reason = self._get_freeze_data(c,
                    #                                           last_candidates_timestamp)
                    # if reject_reason in IGNORABLE_GAMBLE_VERDICTS:
                    #     continue
                    if reject_reason not in reject_reasons_count:
                        reject_reasons_count[reject_reason] = 0
                    reject_reasons_count[reject_reason] += 1
                    if len(suspicious_entries) >= 100:
                        continue
                    report = {
                        'candidate_id': c,
                        'lag': row[
                                   'timestamp'] - last_candidates_timestamp,
                        'kind': reject_reason,
                        'meta': meta,
                    }
                #     if reject_reason == 'regular':
                #         continue
                #         # report['freeze_data'] = self._get_freeze_data(c,
                #         #                                               row[
                #         #                                                   'timestamp'])
                #     if reject_reason in ATTACH_CLEANUP_DATA_REASONS:
                #         report[
                #             'profile_cleanup'] = self._get_profile_cleanup_data(
                #             c, row['timestamp'])
                    suspicious_entries.append(report)

                timeline.append(
                    ['edges', row.timestamp_ld,
                     n_kind_all,
                     n_kind_regular]
                )
                last_candidates = {}

        most_popular_reject_reason = None
        for reason, count in reject_reasons_count.items():
            if reason is None:
                continue
            if not most_popular_reject_reason or count > \
                    reject_reasons_count[most_popular_reject_reason]:
                most_popular_reject_reason = reason

        if router_id:
            yield Record(**{
                'suspicious_entries': suspicious_entries,
                'n_gambles': n_gambles,
                'n_candidates_requests': n_candidates_requests,
                'n_total_edges': n_total_edges,
                'n_candidates_in_reply': n_candidates_in_reply,
                'n_times_assigned': n_times_assigned,
                'employer_code': employer_code,
                'dispatch_zone_id': zone_id,
                'router_id': router_id,
                'corp_client_id': corp_client_id,
                'time_to_due': time_to_due,
                'most_popular_reject_reason': most_popular_reject_reason,
                'segment_id': key['segment_id'],
                'propose_rejections': reject_reasons_count.get(None, 0),
                'n_propose_requests': n_propose_requests,
                'tariffs_substitution': tariffs_substitution,
                'taxi_order_ids': list(taxi_order_ids),
                'timeline': timeline,
                'n_timeline': len(timeline)

                # 'sec_till_1st_event_in_ld': timeline[0] - time,
                # 'special_requirements':special_requirements,
                # 'taxi_order_requirements':taxi_order_requirements
            })



def main_reducer(groups):
    for key, records in groups:
        cnt_statuses = Counter()
        n_claims = 0
        n_claims_not_in_ld = 0
        #
        sec_till_ld_starts = defaultdict(list)
        sec_till_ld_finishes = defaultdict(list)
        sec_of_ld_works = defaultdict(list)
        sec_till_segment_is_ready = defaultdict(list)
        #
        #
        sec_btw_ld_finishes_and_cancel = []
        #
        sec_max_interval_in_ld_working = []
        #
        n_waybill_versions = Counter()
        #
        sec_till_cancel_s = []
        n_claims_cancelled_30_mins_before_due = 0
        n_claims_cancelled_20_mins_before_due = 0
        n_claims_cancelled_10_mins_before_due = 0
        n_claims_cancelled_before_due = 0

        at_least_1_gamble_till_15_sec = Counter()
        at_least_1_gamble_till_30_sec = Counter()
        at_least_1_gamble_till_60_sec = Counter()
        at_least_1_gamble_till_120_sec = Counter()
        at_least_1_gamble_till_180_sec = Counter()
        at_least_1_gamble_till_300_sec = Counter()
        at_least_1_gamble_till_600_sec = Counter()

        n_gambles_till_15_sec = Counter()
        n_gambles_till_30_sec = Counter()
        n_gambles_till_60_sec = Counter()
        n_gambles_till_120_sec = Counter()
        n_gambles_till_180_sec = Counter()
        n_gambles_till_300_sec = Counter()
        n_gambles_till_600_sec = Counter()

        at_least_1_assignment_till_15_sec = Counter()
        at_least_1_assignment_till_30_sec = Counter()
        at_least_1_assignment_till_60_sec = Counter()
        at_least_1_assignment_till_120_sec = Counter()
        at_least_1_assignment_till_180_sec = Counter()
        at_least_1_assignment_till_300_sec = Counter()
        at_least_1_assignment_till_600_sec = Counter()

        n_assignment_till_15_sec = Counter()
        n_assignment_till_30_sec = Counter()
        n_assignment_till_60_sec = Counter()
        n_assignment_till_120_sec = Counter()
        n_assignment_till_180_sec = Counter()
        n_assignment_till_300_sec = Counter()
        n_assignment_till_600_sec = Counter()

        conversion_from_available_to_regular = None
        sum_number_of_regular_candidates = 0
        sum_number = 0

        # TODO: 1) доля заявок, которые due, но отменены за 30 минут до due

        sec_till_1st_edge = []

        for record in records:
            n_claims += 1

            timeline = record.get('timeline')
            if (timeline is not None) and (
                len(timeline) > 0
            ):

                n_claims_not_in_ld += 1


                # для due заказов поиск по-другому происходит
                if record.get('is_delayed') == True:
                    correct_ld_starts_timestamp = max(
                        record.due - 30 * 60, timeline[0][1]
                    )
                else:
                    correct_ld_starts_timestamp = timeline[0][1]


                fst_assignment_timestamp = None
                for i in range(len(timeline)):
                    if (timeline[i][0] == 'edge') and (i < (len(timeline) - 1)):
                        fst_assignment_timestamp = timeline[i+1][1]
                        break

                if fst_assignment_timestamp is not None:
                    sec_till_1st_edge.append(
                        fst_assignment_timestamp - correct_ld_starts_timestamp
                    )

                # TODO -  change for due order record.timestamp - till 30 mins till due
                sec_till_ld_starts[record.status].append(
                    correct_ld_starts_timestamp - record.timestamp
                )
                sec_till_ld_finishes[record.status].append(
                    timeline[-1][1] - record.timestamp
                )
                sec_of_ld_works[record.status].append(
                    timeline[-1][1] - correct_ld_starts_timestamp
                )
                sec_till_segment_is_ready[record.status].append(
                    record.s_timestamp - record.timestamp
                )
                if record.get('sec_till_cancelled') is not None:
                    sec_btw_ld_finishes_and_cancel.append((

                            record.get('sec_till_ld_finish')
                            -
                            record.get('sec_till_cancelled')
                    ))

                    sec_till_cancel_s.append(record.get('sec_till_cancelled'))

                if record.get('empty_interval_in_ld_working') is not None:
                    sec_max_interval_in_ld_working.append(record.get('empty_interval_in_ld_working'))

                if (record.get('due') is not None) and (record.get('sec_till_cancelled') is not None):
                    sec_till_due = record.due - record.timestamp
                    sec_till_cancel = record.sec_till_cancelled

                    n_claims_cancelled_30_mins_before_due += (
                            0 < (sec_till_due - sec_till_cancel) < 30 * 60
                    )
                    n_claims_cancelled_20_mins_before_due += (
                            0 < (sec_till_due - sec_till_cancel) < 20 * 60)
                    n_claims_cancelled_10_mins_before_due += (
                            0 < (sec_till_due - sec_till_cancel) < 10 * 60)
                    n_claims_cancelled_before_due += (
                            0 < (sec_till_due - sec_till_cancel))
            cnt_statuses[record.status] +=1

            if record.get('waybills') is not None:
                n_waybill_versions[record.status] += (
                    (max([int(el[1]) for el in record.waybills])) - 1
                )

            if (timeline is not None) and (
                len(timeline) > 0
            ):

                # для due заказов поиск по-другому происходит
                if record.get('is_delayed') == True:
                    correct_claim_starts_timestamp = max(
                        record.due - 30 * 60, record.timestamp
                    )
                else:
                    correct_claim_starts_timestamp = record.timestamp

                tmp_func = lambda n_secs, type: (
                    len([el[0] for el in record.timeline if
                         (
                                 ((el[1] - correct_claim_starts_timestamp) < n_secs)
                                 and
                                 (el[0] == type)
                         )]
                        )
                )
                at_least_1_gamble_till_15_sec[record.status] += (tmp_func(15, 'edges') > 0)
                at_least_1_gamble_till_30_sec[record.status] += (tmp_func(30, 'edges') > 0)
                at_least_1_gamble_till_60_sec[record.status] += (tmp_func(60, 'edges') > 0)
                at_least_1_gamble_till_120_sec[record.status] += (tmp_func(120, 'edges') > 0)
                at_least_1_gamble_till_180_sec[record.status] += (tmp_func(180, 'edges') > 0)
                at_least_1_gamble_till_300_sec[record.status] += (tmp_func(300, 'edges') > 0)
                at_least_1_gamble_till_600_sec[record.status] += (tmp_func(600, 'edges') > 0)

                n_gambles_till_15_sec[record.status] += (tmp_func(15, 'edges'))
                n_gambles_till_30_sec[record.status] += (tmp_func(30, 'edges'))
                n_gambles_till_60_sec[record.status] += (tmp_func(60, 'edges'))
                n_gambles_till_120_sec[record.status] += (tmp_func(120, 'edges'))
                n_gambles_till_180_sec[record.status] += (tmp_func(180, 'edges'))
                n_gambles_till_300_sec[record.status] += (tmp_func(300, 'edges'))
                n_gambles_till_600_sec[record.status] += (tmp_func(600, 'edges'))

                at_least_1_assignment_till_15_sec[record.status] += (tmp_func(15, 'assignment') > 0)
                at_least_1_assignment_till_30_sec[record.status] += (tmp_func(30, 'assignment') > 0)
                at_least_1_assignment_till_60_sec[record.status] += (tmp_func(60, 'assignment') > 0)
                at_least_1_assignment_till_120_sec[record.status] += (tmp_func(120, 'assignment') > 0)
                at_least_1_assignment_till_180_sec[record.status] += (tmp_func(180, 'assignment') > 0)
                at_least_1_assignment_till_300_sec[record.status] += (tmp_func(300, 'assignment') > 0)
                at_least_1_assignment_till_600_sec[record.status] += (tmp_func(600, 'assignment') > 0)

                n_assignment_till_15_sec[record.status] += (tmp_func(15, 'assignment'))
                n_assignment_till_30_sec[record.status] += (tmp_func(30, 'assignment'))
                n_assignment_till_60_sec[record.status] += (tmp_func(60, 'assignment'))
                n_assignment_till_120_sec[record.status] += (tmp_func(120, 'assignment'))
                n_assignment_till_180_sec[record.status] += (tmp_func(180, 'assignment'))
                n_assignment_till_300_sec[record.status] += (tmp_func(300, 'assignment'))
                n_assignment_till_600_sec[record.status] += (tmp_func(600, 'assignment'))

                num = sum([el[3]for el in record.timeline if
                  (
                      (el[0] == 'edges')
                  )]
                )
                denom = sum([el[2]for el in record.timeline if
                  (
                      (el[0] == 'edges')
                  )]
                )
                conversion_from_available_to_regular = num * 1. / denom if denom != 0 else None
                sum_number_of_regular_candidates += sum([el[3]for el in record.timeline if
                  (
                      (el[0] == 'edges')
                  )]
                )
                sum_number += 1

        #
        sec_till_segment_is_ready_perc_50 = {}
        # sec_till_segment_is_ready_perc_75 = {}
        # sec_till_segment_is_ready_perc_25 = {}
        for status, secs in sec_till_segment_is_ready.items():
            sec_till_segment_is_ready_perc_50[status] = np.percentile(secs, 50)


        #
        sec_of_ld_works_perc_50 = {}
        for status, secs in sec_of_ld_works.items():
            sec_of_ld_works_perc_50[status] = np.percentile(secs, 50)
        #
        sec_till_ld_finishes_perc_50 = {}
        for status, secs in sec_till_ld_finishes.items():
            sec_till_ld_finishes_perc_50[status] = np.percentile(secs, 50)
        #
        sec_till_ld_starts_perc_50 = {}
        for status, secs in sec_till_ld_starts.items():
            sec_till_ld_starts_perc_50[status] = np.percentile(secs, 50)

        # TODO 2) - make percentiles - of defaultdict ?? - but how to? aggregate dayes after all ??

        n_claims_sec_btw_ld_finishes_and_cancel_is_more_than_5_mins = len(
            [el for el in sec_btw_ld_finishes_and_cancel if el > 5 * 60]
        )

        n_claims_sec_btw_ld_finishes_and_cancel_is_more_than_10_mins = len(
            [el for el in sec_btw_ld_finishes_and_cancel if el > 10 * 60]
        )

        n_claims_sec_btw_ld_finishes_and_cancel_is_more_than_30_mins = len(
            [el for el in sec_btw_ld_finishes_and_cancel if el > 30 * 60]
        )
        # TODO - make percentile of sec_btw_ld_finishes_and_cancel

        n_claims_sec_max_interval_in_ld_working_is_more_than_5_mins = len(
            [el for el in sec_max_interval_in_ld_working if el > 5 * 60]
        )
        n_claims_sec_max_interval_in_ld_working_is_more_than_10_mins = len(
            [el for el in sec_max_interval_in_ld_working if el > 10 * 60]
        )

        n_claims_sec_till_cancelled_less_than_15_sec = len(
            [el for el in sec_till_cancel_s if el < 15]
        )

        n_claims_sec_till_cancelled_less_than_30_sec = len(
            [el for el in sec_till_cancel_s if el < 30]
        )

        n_claims_sec_till_cancelled_less_than_60_sec = len(
            [el for el in sec_till_cancel_s if el < 60]
        )

        n_claims_sec_till_cancelled_less_than_120_sec = len(
            [el for el in sec_till_cancel_s if el < 120]
        )

        n_claims_sec_till_cancelled_less_than_180_sec = len(
            [el for el in sec_till_cancel_s if el < 180]
        )

        n_claims_sec_till_cancelled_less_than_300_sec = len(
            [el for el in sec_till_cancel_s if el < 300]
        )

        n_claims_sec_till_cancelled_less_than_600_sec = len(
            [el for el in sec_till_cancel_s if el < 600]
        )

        rec = Record(key, **{'status_{}'.format(status): n for status, n in cnt_statuses.items()})
        rec = Record(rec, ** {'n_waybills_version_{}'.format(status): n
                              for status, n in n_waybill_versions.items()}
                     )
        rec = Record(
            rec,
                   ** {'sec_till_segment_is_ready_perc_50_{}'.format(status): n for status, n in
                       sec_till_segment_is_ready_perc_50.items()})

        rec = Record(rec, **{'sec_of_ld_works_perc_50_{}'.format(status): n for status, n in sec_of_ld_works_perc_50.items()})

        rec = Record(rec, **{'sec_till_ld_finishes_perc_50_{}'.format(status): n for status, n in sec_till_ld_finishes_perc_50.items()})

        rec = Record(rec, **{'sec_till_ld_starts_perc_50_{}'.format(status): n for status, n in sec_till_ld_starts_perc_50.items()})

        rec = Record(rec, **{'at_least_1_gamble_till_15_sec_{}'.format(status):
                                 n for status, n in at_least_1_gamble_till_15_sec.items()})

        rec = Record(rec, **{'n_gambles_till_15_sec_{}'.format(status):
                                 n for status, n in n_gambles_till_15_sec.items()})

        rec = Record(rec, **{'at_least_1_assignment_till_15_sec_{}'.format(status):
                                 n for status, n in at_least_1_assignment_till_15_sec.items()})

        rec = Record(rec, **{'n_assignment_till_15_sec_{}'.format(status):
                                 n for status, n in n_assignment_till_15_sec.items()})

        rec = Record(rec, **{'at_least_1_gamble_till_30_sec_{}'.format(status):
                                 n for status, n in at_least_1_gamble_till_30_sec.items()})

        rec = Record(rec, **{'n_gambles_till_30_sec_{}'.format(status):
                                 n for status, n in n_gambles_till_30_sec.items()})

        rec = Record(rec, **{'at_least_1_assignment_till_30_sec_{}'.format(status):
                                 n for status, n in at_least_1_assignment_till_30_sec.items()})

        rec = Record(rec, **{'n_assignment_till_30_sec_{}'.format(status):
                                 n for status, n in n_assignment_till_30_sec.items()})

        rec = Record(rec, **{'at_least_1_gamble_till_60_sec_{}'.format(status):
                                 n for status, n in at_least_1_gamble_till_60_sec.items()})

        rec = Record(rec, **{'n_gambles_till_60_sec_{}'.format(status):
                                 n for status, n in n_gambles_till_60_sec.items()})

        rec = Record(rec, **{'at_least_1_assignment_till_60_sec_{}'.format(status):
                                 n for status, n in at_least_1_assignment_till_60_sec.items()})

        rec = Record(rec, **{'n_assignment_till_60_sec_{}'.format(status):
                                 n for status, n in n_assignment_till_60_sec.items()})

        rec = Record(rec, **{'at_least_1_gamble_till_120_sec_{}'.format(status):
                                 n for status, n in at_least_1_gamble_till_120_sec.items()})

        rec = Record(rec, **{'n_gambles_till_120_sec_{}'.format(status):
                                 n for status, n in n_gambles_till_120_sec.items()})

        rec = Record(rec, **{'at_least_1_assignment_till_120_sec_{}'.format(status):
                                 n for status, n in at_least_1_assignment_till_120_sec.items()})

        rec = Record(rec, **{'n_assignment_till_120_sec_{}'.format(status):
                                 n for status, n in n_assignment_till_120_sec.items()})

        rec = Record(rec, **{'at_least_1_gamble_till_300_sec_{}'.format(status):
                                 n for status, n in at_least_1_gamble_till_300_sec.items()})

        rec = Record(rec, **{'n_gambles_till_300_sec_{}'.format(status):
                                 n for status, n in n_gambles_till_300_sec.items()})

        rec = Record(rec, **{'at_least_1_assignment_till_300_sec_{}'.format(status):
                                 n for status, n in at_least_1_assignment_till_300_sec.items()})

        rec = Record(rec, **{'n_assignment_till_300_sec_{}'.format(status):
                                 n for status, n in n_assignment_till_300_sec.items()})

        yield Record(
            rec,

            sec_till_1st_assignment_perc_50=np.percentile(
                sec_till_1st_edge, 50)if len(sec_till_1st_edge) else None,
            sec_till_1st_assignment_mean=np.mean(
                sec_till_1st_edge) if len(
                sec_till_1st_edge) else None,

            n_claims_not_in_ld=n_claims_not_in_ld,
            n_claims=n_claims,
            n_claims_sec_till_cancelled_perc_50=np.percentile(sec_till_cancel_s, 50) if len(sec_till_cancel_s) else None,
            n_claims_sec_till_cancelled_less_than_600_sec=n_claims_sec_till_cancelled_less_than_600_sec,
            n_claims_sec_till_cancelled_less_than_300_sec=n_claims_sec_till_cancelled_less_than_300_sec,
            n_claims_sec_till_cancelled_less_than_180_sec=n_claims_sec_till_cancelled_less_than_180_sec,
            n_claims_sec_till_cancelled_less_than_120_sec=n_claims_sec_till_cancelled_less_than_120_sec,
            n_claims_sec_till_cancelled_less_than_60_sec=n_claims_sec_till_cancelled_less_than_60_sec,
            n_claims_sec_till_cancelled_less_than_30_sec=n_claims_sec_till_cancelled_less_than_30_sec,
            n_claims_sec_till_cancelled_less_than_15_sec=n_claims_sec_till_cancelled_less_than_15_sec,
            n_claims_sec_max_interval_in_ld_working_is_more_than_10_mins=n_claims_sec_max_interval_in_ld_working_is_more_than_10_mins,
            n_claims_sec_max_interval_in_ld_working_is_more_than_5_mins=n_claims_sec_max_interval_in_ld_working_is_more_than_5_mins,
            n_claims_sec_btw_ld_finishes_and_cancel_is_more_than_30_mins=n_claims_sec_btw_ld_finishes_and_cancel_is_more_than_30_mins,
            n_claims_sec_btw_ld_finishes_and_cancel_is_more_than_10_mins=n_claims_sec_btw_ld_finishes_and_cancel_is_more_than_10_mins,
            n_claims_sec_btw_ld_finishes_and_cancel_is_more_than_5_mins=n_claims_sec_btw_ld_finishes_and_cancel_is_more_than_5_mins,

            n_claims_cancelled_30_mins_before_due=n_claims_cancelled_30_mins_before_due,
            n_claims_cancelled_20_mins_before_due=n_claims_cancelled_20_mins_before_due,
            n_claims_cancelled_10_mins_before_due=n_claims_cancelled_10_mins_before_due,
            n_claims_cancelled_before_due=n_claims_cancelled_before_due,

            conversion_from_available_to_regular=conversion_from_available_to_regular,
            avg_number_of_regular_candidates =sum_number_of_regular_candidates * 1./ sum_number if sum_number > 0 else None
        )


def get_all_waybills_by_segment(groups):
    for key, records in groups:
        waybills = []
        for record in records:
            waybills.append([
                record.external_ref, record.waybill_building_version,
                record.created_ts, record.resolved_at
            ])
        yield Record(
            key,
            waybills=waybills
        )


def dup_mapper(records):
    for record in records:
        yield Record(record)
        d = record.to_dict()
        del d['zone_id']
        d['zone_id'] = 'all'
        yield Record(**d)
        del d['group']
        d['group'] = 'all'
        yield Record(**d)



# def get_dry_run_info_burnt_orders(
#         job
#     ):
#     job.table('//home/taxi-delivery/analytics/production/ld_ab_test/monitorings/dry_run_wave_2_2021-04-28/diff_in_cands').join(
#         job.table(
#             '//home/taxi-delivery/analytics/dev/batching/dead_tmp/2021-04-30/cl_p_s_w_o_st_pr_final_w_subsidies'
#         ),
#         by='segment_id', type='left_only'
#     ).put(
#         '//home/taxi-delivery/analytics/production/ld_ab_test/monitorings/dry_run_wave_2_2021-04-28/diff_in_cands_v1'
#     )
#
#     job.table(
#         '//home/taxi-delivery/analytics/production/ld_ab_test/monitorings/dry_run_wave_2_2021-04-28/diff_in_cands').join(
#         job.table(
#             '//home/taxi-delivery/analytics/dev/batching/dead_tmp/2021-04-30/cl_p_s_w_o_st_pr_final_w_subsidies'
#         ).filter(
#             nf.custom(
#                 lambda x: x not in ['delivered_finish', 'returned_finish'],
#                 'status')
#         ),
#         by='segment_id', type='inner'
#     ).put(
#         '//home/taxi-delivery/analytics/production/ld_ab_test/monitorings/dry_run_wave_2_2021-04-28/diff_in_cands_v2'
#     )
#     return job

# def get_tmp():
#
#     # tmp = claims.filter(
#     #     nf.custom(lambda x: x != 'platform_usage', 'claim_kind')
#     # ).join(
#     #     segments,
#     #     by_left='uuid_id', by_right='claim_id', type='left',
#     # ).project(
#     #     ne.all(),
#     #     utc_date=ne.custom(
#     #         lambda x:
#     #         datetime.datetime.utcfromtimestamp(x).strftime('%Y-%m-%d'),
#     #         'timestamp'
#     #     ),
#     #     utc_date_hour=ne.custom(
#     #         lambda x:
#     #         datetime.datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H'),
#     #         'timestamp'
#     #     )
#     # ).put(
#     #     yt_path_to.format('segments_claims')
#     # )
#
#     # raw_logs_dc = RawLogs(
#     #     job,
#     #     datetime.datetime.strptime(
#     #         from_date, '%Y-%m-%d')
#     #     ,
#     #     datetime.datetime.strptime(
#     #         to_date, '%Y-%m-%d'
#     #     ),
#     # )
#     # tmp = job.table(yt_path_to.format('segments_claims'))
#     # orders_2_dispatch = (
#     #     raw_logs_dc.get_ld_dispatch().join(
#     #         tmp.project(
#     #             ne.all(['chosen_waybill'])
#     #         ), type='inner',
#     #         by_right='segment_id',
#     #         by_left='cargo_ref_id',
#     #         # assume_small_right=True
#     #     )
#     # ).sort(
#     #     'segment_id', 'timestamp_ld'
#     #     # TODO: тут некоторые заказа зудаблированы. потому что отказывались таксисты
#     # ).put(
#     #     yt_path_to.format('tmp_log_dispatch')
#     # )
#     # orders_2_dispatch.map(
#     #     LogEntrySegmentDetector()
#     # ).sort('segment_id', 'timestamp_ld').put(
#     #     yt_path_to.format('tmp_log_dispatch_parsed')
#     # )
#
#
#     # job.table(yt_path_to.format('tmp_log_dispatch_parsed')).groupby(
#     #     'segment_id'
#     # ).sort(
#     #     'timestamp_ld'
#     # ).reduce(
#     #     calc_ts_bw
#     # ).put(
#     #     yt_path_to.format('calc_ts_bw')
#     # )
#
#     # arr = job.table(yt_path_to.format('tmp_log_dispatch_parsed')).groupby(
#     #     'segment_id'
#     # ).sort(
#     #     'timestamp_ld'
#     # ).reduce(
#     #     calc_different_stats
#     # ).put(
#     #     yt_path_to.format('calc_different_stats')
#     # )
#
#     # arr = job.table(
#     #     yt_path_to.format('calc_different_stats')
#     # )
#     # arr.join(
#     #     job.table('//home/taxi-delivery/analytics/production/ld_ab_test/monitorings/realtime_schema').filter(
#     #         nf.custom(
#     #                 lambda x: x in [
#     #                     'ld_vs_taxi_dispatch_experiment_2021-03-11',
#     #                     'corps_v4_31_03',
#     #                     'ld_vs_taxi_dispatch_experiment_2021-04-14',
#     #                     'russia_cities_15_04'
#     #                     #
#     #                     # 'ld_vs_taxi_dispatch_experiment_2021-03-11_control',
#     #                     # 'corps_v4_31_03_control',
#     #                     # 'ld_vs_taxi_dispatch_experiment_2021-04-14_control',
#     #                     # 'russia_cities_15_04_control'
#     #                 ],
#     #                 'group'
#     #             )
#     #         ).join(
#     #             job.table(yt_path_to.format('segments_claims')).project(
#     #                 "claim_id",
#     #                 "due",
#     #                 "is_delayed",
#     #                 "s_timestamp",
#     #                 "segment_id"
#     #             ), type='left', by='claim_id'
#     #     ), type='right', by='segment_id'
#     # ).put(yt_path_to.format('calc_different_stats_only_test_group'))
#
#     # job.table((yt_path_to.format('calc_different_stats_only_test_group'))).filter(
#     #     nf.custom(lambda x: x not in [
#     #         'delivered_finish', 'returned_finish', 'cancelled_with_payment',
#     #         # 'cancelled_by_taxi'
#     #     ], 'status')
#     # ).put(
#     #     (yt_path_to.format('calc_different_stats_only_test_group_only_cancelled'))
#     # )
#
#
#     # job.table((yt_path_to.format('calc_different_stats_only_test_group'))).filter(
#     #     nf.custom(lambda x: x not in [
#     #         'delivered_finish', 'returned_finish', 'cancelled_with_payment',
#     #         'cancelled_by_taxi', 'cancelled_with_items_on_hands'
#     #     ], 'status')
#     # ).project(
#     #     ne.all(),
#     #     last_timestamp_in_ld=ne.custom(lambda x: x[-1][1] if ((x is not None) and (len(x) > 0)) else None, 'timeline'),
#     #     sec_till_ld_finish=ne.custom(lambda x, y: (x[-1][1] - y) if ((x is not None) and (len(x) > 0)) else None, 'timeline', 'timestamp'),
#     #
#     #
#     #     empty_interval_in_ld_working=ne.custom(lambda x: max(
#     #             np.array([el[1] for el in x][1:])
#     #             -
#     #             np.array([el[1] for el in x][:-1])
#     #     ) if ((x is not None) and (len(x) > 1)) else None, 'timeline')
#     # ).put(
#     #     (yt_path_to.format('calc_different_stats_only_test_group_only_cancelled_restrcited'))
#     # )
#
#     # arr_ = job.table(yt_path_to.format('calc_ts_bw'))
#     # arr_.join(
#     #     job.table('//home/taxi-delivery/analytics/production/ld_ab_test/monitorings/realtime_schema').filter(
#     #         nf.custom(
#     #                 lambda x: x in [
#     #                     'ld_vs_taxi_dispatch_experiment_2021-03-11',
#     #                     'corps_v4_31_03',
#     #                     'ld_vs_taxi_dispatch_experiment_2021-04-14',
#     #                     'russia_cities_15_04'
#     #                     # 'ld_vs_taxi_dispatch_experiment_2021-03-11_control',
#     #                     # 'corps_v4_31_03_control',
#     #                     # 'ld_vs_taxi_dispatch_experiment_2021-04-14_control',
#     #                     # 'russia_cities_15_04_control'
#     #                 ],
#     #                 'group'
#     #             )
#     #         ).join(
#     #             job.table(yt_path_to.format('segments_claims')).project(
#     #                 "claim_id",
#     #                 "due",
#     #                 "is_delayed",
#     #                 "s_timestamp",
#     #                 "segment_id"
#     #             ), type='left', by='claim_id'
#     #     ), type='right', by='segment_id'
#     # ).put(yt_path_to.format('calc_ts_bw_only_test_group'))
#
#
#     # job.table(
#     #     yt_path_to.format(
#     #         'tpm_log_dispatch_parsed_mapper_upd_with_milliseconds_calc_ts')
#     # ).filter(
#     #     nf.custom(lambda x: (x[-1][0] - x[0][0]) > 30 * 60, 'queue')
#     # ).put(
#     #     yt_path_to.format(
#     #         'filtered_long_search')
#     # )
#
#
#
#
#
#
#     # job.table(
#     #     yt_path_to.format(
#     #         'filtered_long_search')
#     # ).filter(
#     #         # nf.custom(lambda x: x in ['candidates', 'edges'], 'type'),
#     #         qf.defined('segment_id')
#     # ).join(
#     #     job.table(
#     #         '//home/taxi-delivery/analytics/dev/burning_research/tmp/tpm_log_dispatch_parsed_mapper_upd_with_milliseconds'
#     #     ).filter(
#     #         nf.custom(lambda x: x in ['candidates', 'edges'], 'type'),
#     #         qf.defined('segment_id')
#     #     ),
#     #     by='segment_id', type='inner'
#     # ).put(
#     #     yt_path_to.format(
#     #         'filtered_long_search_w_cands')
#     # )
#
#
#     #
#     # job.table(yt_path_to.format(
#     #         'filtered_long_search_w_cands')).groupby(
#     #     'segment_id'
#     # ).reduce(
#     #     aggregate_all_drivers_reducer
#     # ).put(
#     #     yt_path_to.format(
#     #         'filtered_long_search_w_cands_agg')
#     # )
#
#     # job.table(
#     #     yt_path_to.format('tpm_log_dispatch_parsed_mapper_calc_ts')
#     # ).join(
#     #     job.table(yt_path_to.format('tmp_segments_claims')),
#     #     by='segment_id', type='inner'
#     # ).join(
#     #     job.table('//home/taxi-delivery/analytics/production/ld_ab_test/monitorings/realtime_schema'),
#     #     by='claim_id', type='inner'
#     # ).put(
#     #     yt_path_to.format('final')
#     # )
#
#     # def _reducer(groups):
#     #     for key, records in groups:
#     #         for record in records:
#     #             pass
#     #         yield Record(
#     #             key,
#     #             ??
#     #         )
#     #
#     # job.table(yt_path_to.format('final')).unique('claim_id').put(
#     #     yt_path_to.format('final_2_unique')
#     # )
#
#     #     .groupby(
#     #     'claim_id'
#     # ).reduce(
#     #     _reducers
#     # )
#
#     # .put(
#     #         yt_path_to.format('final_tmp')
#     #     )#\
#     #
#     #     job.table(yt_path_to.format('final_tmp'))
#     pass



def claims_pattern_ld_stages_reducer(groups):
    for key, records in groups:
        arr_full = []
        for record in records:
            arr_full.append(record.queue_secs)


        yield Record(
            key,
            arr_mean=list(np.mean(np.array(arr_full), 0)),
            arr_perc_50=list(np.percentile(np.array(arr_full), 50, 0)),
            cnt=len(arr_full),
            minus_cnt=(-1) * len(arr_full)
        )



def claim_ld_stages_mapper(records):
    for record in records:
        sec_till_1st_segment_info = None
        sec_till_1st_candidates = None
        sec_till_1st_edges = None
        sec_till_1st_assignment = None
        sec_till_1st_propose_request = None

        ts_1st_edges = None
        ts_1st_segment_info = None
        ts_1st_candidates = None
        ts_1st_assignment = None
        ts_1st_propose_request = None
        # try:

        fst_assignment_ts = None
        last_proposal_ts = None
        last_assignment_ts = None
        needed_segment_info = None
        max_last_assignment_last_proposal = None
        after_fst_assignment_segment_info = None

        n_steps_candidates = 0
        n_steps_edges = 0
        n_steps_assignment = 0

        sum_candidates_in_edges = 0
        sum_edges = 0
        sum_candidates_regular_in_edges = 0
        # sum_edges_all = 0
        # sum_edges_regular = 0

        if record.get('timeline') is None:
            continue

        # n_ = 0
        # if record.get('timeline') is None:
        #     for el in record.get('timeline'):
        #         n_ += 1
        if record.n_timeline >= 800:
            continue

        _n_ev = 0
        for el in record.timeline:
            # if _n_ev > 25000:
            #     continue
            # ассайнмент первый
            # макс (последнйи ассайнмент, последний прорпоз реквест)
            # ———
            # левая граница - первый ассанймент
            # правая - слеедующий за первый ассанментом сегмент инфо
            if (ts_1st_segment_info is None) and (el[0] == 'segment_info'):
                ts_1st_segment_info = el[1]

            if (ts_1st_candidates is None) and (el[0] == 'candidates'):
                ts_1st_candidates = el[1]

            if el[0] == 'candidates':
                n_steps_candidates += 1
            if el[0] == 'edges':
                n_steps_edges += 1
                sum_candidates_in_edges += el[2]
                sum_edges += 1
                sum_candidates_regular_in_edges += el[3]
            if el[0] == 'assignment':
                n_steps_assignment += 1

            if (ts_1st_edges is None) and (el[0] == 'edges'):
                ts_1st_edges = el[1]

            if (ts_1st_assignment is None) and (el[0] == 'assignment'):
                fst_assignment_ts = el[1]
                ts_1st_assignment = el[1]

            if (ts_1st_propose_request is None) and (el[0] == 'propose_request'):
                ts_1st_propose_request = el[1]
            if (
                    (fst_assignment_ts is not None)
                    and (after_fst_assignment_segment_info is None)
                    and (el[0] == 'segment_info')
            ):
                # needed_segment_info
                after_fst_assignment_segment_info = el[1]
            if (el[0] == 'assignment'):
                last_assignment_ts = el[1]
            if (el[0] == 'propose_request'):
                last_proposal_ts = el[1]

        # try:
        # max_last_assignment_last_proposal = max(last_assignment_ts, last_proposal_ts)
        #
        # time_1 = (max_last_assignment_last_proposal - fst_assignment_ts)
        # time_2 = (after_fst_assignment_segment_info - fst_assignment_ts)



        tmp = np.array([record.timeline[0][1]] + [el[1] for el in record.timeline])
        arr = (tmp[1:] - tmp[:-1]) if (len(tmp) > 1) else None


        yield Record(
            record,
            sec_till_1st_segment_info=(ts_1st_segment_info - record.timestamp) if (
                (ts_1st_segment_info is not None)
            ) else None,
            sec_till_1st_candidates=(ts_1st_candidates - ts_1st_segment_info) if (
                (ts_1st_candidates is not None) and ( ts_1st_segment_info is not None)
            ) else None,

            sec_till_1st_edges=(ts_1st_edges - ts_1st_candidates) if (
                (ts_1st_edges is not None) and ( ts_1st_candidates is not None)
            ) else None,

            sec_till_1st_assignment=(ts_1st_assignment - ts_1st_edges) if (
                (ts_1st_assignment is not None) and ( ts_1st_edges is not None)
            ) else None,

            sec_till_1st_propose_request=(ts_1st_propose_request - ts_1st_assignment) if (
                (ts_1st_propose_request is not None) and ( ts_1st_assignment is not None)
            ) else None,
            queue_str=' '.join([el[0] for el in record.timeline]),
            queue_secs=list(arr),
            n_steps_candidates=n_steps_candidates,
            n_steps_edges=n_steps_edges,
            n_steps_assignment=n_steps_assignment,

            n_candidates_per_edge=(sum_candidates_in_edges*1./sum_edges) if (sum_edges!=0) else None,

            #
            share_regular_edges=(sum_candidates_regular_in_edges * 1. / sum_candidates_in_edges) if (
                sum_candidates_in_edges != 0
            ) else None



            # time_1=time_1,
            # time_2=time_2,
            # time_from_segment_info_to_assignment=(- record.timeline[0][1])

        )

        # except:
        #     continue