import argparse
# import time

import datetime

from projects.efficiency_metrics.manager import Manager
from projects.efficiency_metrics.project_config import get_project_cluster

from nile.api.v1 import extractors as ne
from nile.api.v1 import filters as nf

from projects.data_sources.data_context.eda_logs import \
    DataContext as EdaOrdersDataContext
from projects.data_sources.data_context.raw_services_logs import \
    DataContext as RawLogs

from projects.data_sources.data_context.cargo import \
    DataContext as CargoLogsDataContext
import time

from nile.api.v1 import filters as nf

import geohash as gh

from qb2.api.v1 import filters as qf
from projects.burnt_orders_research.ld_parsing_nile_block import \
    LogEntrySegmentDetector


def extract_entity(log, field_name, start_symbol, end_symbol):
    prefix = '"' + field_name + '":' + start_symbol
    if prefix not in log:
        return None
    suffix_start_position = log.find(prefix) + len(prefix)
    suffix = log[suffix_start_position:]
    suffix_end_position = suffix.find(end_symbol)
    return suffix[:suffix_end_position]


def extract_string(log, field_name):
    return extract_entity(log, field_name, '"', '"')


def extract_array(log, field_name):
    return extract_entity(log, field_name, '[', ']')


def extract_dict(log, field_name):
    return extract_entity(log, field_name, '{', '}')


import json
import six
from nile.api.v1 import Record


# def segment_tariffs_mapper(records):
#     for record in records:
#
#         if (
#                 (record.get('response') is not None)
#                 and
#                 (record.get('response').startswith("""{\"waybill"""))
#         ):
#             # try:
#             response = record.get('response')  # json.loads(record['response'])
#             taxi_order_requirements = extract_dict(
#                 six.ensure_str(record['response']).replace('\n', ''),
#                 'taxi_order_requirements'
#                 # 'performer_requirements'
#             )
#
#             # 'external_ref'
#
#             external_ref = extract_string(
#                 six.ensure_str(record['response']).replace('\n', ''),
#                 'external_ref'
#                 # 'performer_requirements'
#             )  # .replace('"','')
#             yield Record(
#                 response=response,
#                 taxi_order_requirements=taxi_order_requirements,
#                 external_ref=external_ref
#
#             )
#             # except Exception:
#             #     pass


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--yt-proxy', type=str, default='Hahn')

    cluster = get_project_cluster()

    # from_date = '2021-04-19'
    # to_date = '2021-04-22'
    # from_date = '2021-04-27'
    # to_date = '2021-04-28'
    to_date = '2021-06-01'

    yt_path_to = '//home/taxi-delivery/analytics/production/ld/dispatch_metrics/{}'

    #

    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(bytes_decode_mode='strict')
                  #, yt_spec_defaults={'max_failed_job_count': 1000})


    def _by_gamble_reducer(groups):
        for key, rows in groups:

            # n_candidates_requests = 0
            # n_candidates_in_reply = 0
            #
            # suspicious_entries = []
            last_candidates_timestamp = 0
            #
            # last_candidates = {}
            #
            # list_of_n_candidates = []
            timeline = []

            cycle_id = None
            cycle_start_event_type = False
            cycle_finish_event_type = False

            for row in rows:

                if row['type'] == 'candidates':
                    last_candidates_timestamp = row['timestamp']
                    for candidate in row['available_candidates']:
                        candidate_id = candidate['candidate_id']
                        meta = {
                            'route_info': candidate['route_info'],
                            'transport': candidate['transport'],
                            'timestamp_ld': record.timestamp_ld
                        }
                        last_candidates[candidate_id] = meta
                    yield Record(
                        key,
                        type='candidates',
                        candidates=last_candidates,
                        timestamp_ld=row.timestamp_ld,
                        cycle_id=??
                    )


                if row['type'] == 'edges' and last_candidates_timestamp > 0:
                    reject_reasons = {}

                    n_kind_regular = 0
                    for record in row['available_candidates']:
                        reject_reasons[record['contractor_id']] = record['kind']
                        if record['kind'] == 'regular':
                            n_kind_regular += 1
                    for c, meta in last_candidates.items():
                        reject_reason = reject_reasons.get(c)
                        # if len(suspicious_entries) >= 100:
                        #     continue
                        report = {
                            'candidate_id': c,
                            'lag': row[
                                       'timestamp'] - last_candidates_timestamp,
                            'kind': reject_reason,
                            'meta': meta,
                        }
                        suspicious_entries.append(report)

                    timeline.append(
                        ['edges', row.timestamp_ld,
                         n_kind_all,
                         n_kind_regular]
                    )
                    last_candidates = {}


                if row['type'] == 'assignment':
                    timeline.append(
                        ['assignment', row.timestamp_ld,
                         row.contractor_id]
                    )
                    continue

            if router_id:
                yield Record(**{
                    'suspicious_entries': suspicious_entries,
                    'dispatch_zone_id': zone_id,
                    'router_id': router_id,
                    'corp_client_id': corp_client_id,
                    'time_to_due': time_to_due,
                    'segment_id': key['segment_id'],
                    'timeline': timeline,

                    # 'sec_till_1st_event_in_ld': timeline[0] - time,
                    # 'special_requirements':special_requirements,
                    # 'taxi_order_requirements':taxi_order_requirements
                })


    # job.table('//home/taxi-delivery/analytics/production/parsed_logs/segments_2_ld_parsed/{}'.format(
    #     to_date
    # )).project(
    #     'segment_id', 'timestamp_ld', 'type', 'available_candidates', 'contractor_id'
    # ).filter(
    #     nf.custom(lambda x: x in ['candidates', 'edges', 'assignment'], 'type')
    # ).put(
    #     yt_path_to.format('by_gambles_tmp')
    # )

    job.table(
        yt_path_to.format('by_gambles_tmp')
    ).groupby(
        'segment_id'
    ).sort(
        'timestamp_ld'
    ).reduce(
        _by_gamble_reducer
    ).put(
        yt_path_to.format('by_gambles_tmp_tmp')
    )


    job.run()

    # raw_logs_dc = RawLogs(
    #     job,
    #     datetime.datetime.strptime(
    #         from_date, '%Y-%m-%d')
    #     ,
    #     datetime.datetime.strptime(
    #         to_date, '%Y-%m-%d'
    #     ),
    # )

    # (
    #     raw_logs_dc.get_ld_dispatch(cargo_ref_id_is_defined=False).map(
    #         segment_tariffs_mapper
    #     )
    #         # .sort('segment_id', 'timestamp_ld')
    #         .put(
    #         yt_path_to.format('log_dispatch_raw_2021-04-28')
    #     )
    # )
    # import six
    #
    # for r in cluster.read(
    #     '//home/taxi-delivery/analytics/dev/burning_research/tmp/log_dispatch_raw_2021-04-28'
    # )[:1]:
    #     'external_ref'
    #
    #     tmp = extract_array(
    #         six.ensure_str(r['response']).replace('\n', ''),
    #         'virtual_tariffs'
    #         # 'performer_requirements'
    #     )#.replace('"','')
    #     # print("\\" in tmp) #[1:-1])
    #     print (tmp)
    #     # json.loads(tmp)#[1:-1])#, strict=False)