import argparse
# import time
import json

import datetime

# from projects.efficiency_metrics.manager import Manager
from projects.efficiency_metrics.project_config import get_project_cluster

from nile.api.v1 import extractors as ne
from nile.api.v1 import filters as nf

from projects.data_sources.data_context.eda_logs import \
    DataContext as EdaOrdersDataContext
from projects.data_sources.data_context.raw_services_logs import \
    DataContext as RawLogs

from projects.data_sources.data_context.cargo import \
    DataContext as CargoLogsDataContext
import time

from nile.api.v1 import filters as nf

import geohash as gh

from qb2.api.v1 import filters as qf
from projects.burnt_orders_research.ld_parsing_nile_block import \
    LogEntrySegmentDetector


def generate_yt_table(job, yt_path_segments):
    tmp = (
        job.table(
            '//home/taxi-delivery/analytics/production/ld_ab_test/monitorings/experiments_3_0_parsed/{}'.format(
                local_now.strftime('%Y-%m-%d'))
        ).project(
            'kwargs', 'matched', 'iso_eventtime',
            group=ne.custom(lambda x: json.loads(x)[0].get('alias'),
                            'matched')
        )
            .filter(
            nf.custom(
                lambda x: x in [
                    'cargo_claims_dispatch_flow_dryrun'
                ],
                'group',

            ),
            qf.defined('iso_eventtime'),
            nf.custom(lambda x: x > '2021-04-26', 'iso_eventtime'),
        )
            .project(
            'group',
            segment_id=ne.custom(lambda x: json.loads(x).get('segment_id'),
                                 'kwargs')
        )
    ).put(
        yt_path_segments
    )
    return job


def get_ld_logs(job, from_date, to_date, yt_path_segments, yt_pah_segments_2_ld):
    tmp = job.table(yt_path_segments)

    # taxi_order_id, - taxi_dispatch
    # segment_id - ld

    raw_logs_dc = RawLogs(
        job,
        datetime.datetime.strptime(
            from_date, '%Y-%m-%d')
        ,
        datetime.datetime.strptime(
            to_date, '%Y-%m-%d'
        ),
    )

    orders_2_dispatch = (
        raw_logs_dc.get_ld_dispatch().join(
            tmp, type='inner',
            by_right='segment_id',
            by_left='cargo_ref_id',
            # assume_small_right=True
        )
    ).put(
        yt_pah_segments_2_ld

    )
    # .sort(
    #         'segment_id', 'timestamp_ld'
    #         # TODO: тут некоторые заказа зудаблированы. потому что отказывались таксисты
    #     )

    return job


def get_td_logs(job, from_date, to_date, yt_path_segments, yt_pah_segments_2_td):
    tmp = job.table(yt_path_segments)



    raw_logs_dc = RawLogs(
        job,
        datetime.datetime.strptime(
            from_date, '%Y-%m-%d')
        ,
        datetime.datetime.strptime(
            to_date, '%Y-%m-%d'
        ),
    )

    orders_2_dispatch = (
        raw_logs_dc.get_taxi_dispatch().join(
            tmp.filter(
                qf.defined('taxi_order_id')
            ), type='inner',
            by_left='order_id',
            by_right='taxi_order_id'
            # assume_small_right=True
        )
    ).put(
        yt_pah_segments_2_td
    )

    # .sort(
    #         'taxi_order_id', 'timestamp_td'
    #         # TODO: тут некоторые заказа зудаблированы. потому что отказывались таксисты
    #     )



    return job


from projects.burnt_orders_research.ld_parsing_nile_block import \
    LogEntrySegmentDetector
def parse_ld_logs(job, from_date, to_date, yt_path_segments, yt_pah_segments_2_td):

    tmp = job.table(yt_path_segments)

    orders_2_dispatch = tmp.map(
        LogEntrySegmentDetector()
    ).put(
        yt_pah_segments_2_td
    )
    # .sort('segment_id', 'timestamp_ld')
    return job


from nile.api.v1 import Record

def get_diff_in_candidates(
        job, from_date, to_date, yt_path_from_ld, yt_path_from_td, yt_path_to
):
    def _reducer(groups):
        for key, records in groups:

            available_candidates_ld = set()
            contractors_ld = {}  # set()
            potential_contractors_ld = {}  # set()

            contractors_td = set()

            candidates_td_2_dist = {}

            utc_date = None
            utc_date_hour = None

            for record in records:

                utc_date = datetime.datetime.utcfromtimestamp(int(record.timestamp_dispatch)).strftime('%Y-%m-%d')
                utc_date_hour = datetime.datetime.utcfromtimestamp(int(record.timestamp_dispatch)).strftime('%Y-%m-%d %H')

                if record.get('timestamp_ld') is not None:
                    if record.get('available_candidates') is None:
                        continue
                    for el in record.available_candidates:
                        if el.get('candidate_id'):
                            available_candidates_ld.add(el.get('candidate_id'))
                    for el in record.available_candidates:
                        if el.get('kind', '') == 'regular':
                            contractors_ld[el['contractor_id']] = el['kind']
                        if el.get('contractor_id', '') != '':
                            potential_contractors_ld[el['contractor_id']] = el.get('kind', '')

                if record.get('timestamp_td') is not None:
                    if record.get('candidates_meta') is None:
                        continue
                    for el, v in record.candidates_meta.items():
                        if v.get('score') is not None:
                            contractors_td.add(el)
                            candidates_td_2_dist[el] = v['rd']


            in_td_set_not_in_ld_set = set()
            in_td_set_in_ld_set_but_rejected = {}

            for el in set.difference(contractors_td, contractors_ld):
                if el in potential_contractors_ld:
                    in_td_set_in_ld_set_but_rejected[el] = potential_contractors_ld[el]
                    # print potential_contractors_ld[el], el
                else:
                    in_td_set_not_in_ld_set.add(el)
                    # print 'not in LD', el

            tmp = {}
            for k, v in in_td_set_in_ld_set_but_rejected.items(): #candidates_td_2_dist.items():
                if v == 'l1-far-first-action':
                    tmp[k] = candidates_td_2_dist[k]


            yield Record(
                key,
                utc_date=utc_date,
                utc_date_hour=utc_date_hour,
                in_td_set_not_in_ld_set=list(in_td_set_not_in_ld_set),
                in_td_set_in_ld_set_but_rejected=in_td_set_in_ld_set_but_rejected,
                l1_far_first_action=tmp
            )


    ld = job.table(yt_path_from_ld)
    td = job.table(yt_path_from_td)

    job.concat(
        ld.project(
            ne.all(),
            timestamp_dispatch='timestamp_ld'
        ),
        td.project(
            ne.all(),
            timestamp_dispatch='timestamp_td'
        )
    ).groupby('segment_id').sort('timestamp_dispatch').reduce(
        _reducer
    ).put(
        yt_path_to
    )
    # .filter(
    #         qf.defined('timestamp_dispatch')
    #     )

    return job


if __name__ == '__main__':
    # python -m dry_run_ld --dttm 2021-05-06

    parser = argparse.ArgumentParser()
    parser.add_argument('--yt-proxy', type=str, default='Hahn')
    parser.add_argument('--dttm', type=str)

    utc_datetime = datetime.datetime.utcnow()
    to_dttm = utc_datetime

    cluster = get_project_cluster()

    from_date = '2021-04-26'
    to_date = to_dttm.strftime('%Y-%m-%d')


    yt_path_dir_to = '//home/taxi-delivery/analytics/production/ld_ab_test/monitorings/dry_run_3_wave'
    local_now = datetime.datetime.now()

    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )
    job = generate_yt_table(
        job, '{}/segments'.format(yt_path_dir_to)
    )
    # OK
    job.run()

    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )
    cargo_d_c = CargoLogsDataContext(
        job,
        datetime.datetime.strptime(
            from_date, '%Y-%m-%d')
        ,
        datetime.datetime.strptime(
            to_date, '%Y-%m-%d'
        ),
    )
    claims = cargo_d_c.get_claims().project(
        'taxi_order_id', 'uuid_id', 'is_delayed', 'due', 'timestamp'
    )

    segments = cargo_d_c.get_segments().project(ne.all(['chosen_waybill']))

    # waybills_segments = cargo_d_c.get_waybills_segments()

    job.table('{}/segments'.format(yt_path_dir_to)).join(
        segments,
        by='segment_id', type='left', assume_small_left=True
    ).join(
        claims, type='left', assume_small_left=True,
        by_right='uuid_id', by_left='claim_id'
    ).put(
        '{}/segments_2_taxi_order_id'.format(yt_path_dir_to)
    )

# .join(
    #     #     waybills_segments,
    #     #     by='segment_id', type='left'
    #     # )

    job.run()


    # select taxi_order_id, claim_segment_uuid
    # from taxi_ods_cargo_claims.claim_segment
    # where claim_segment_uuid in ()

    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )
    job = get_ld_logs(
        job, from_date, to_date,
        '{}/{}'.format(yt_path_dir_to, 'segments'),
        '{}/{}'.format(yt_path_dir_to, 'segments_2_ld')
    )
    # OK
    job.run()


    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )
    job = parse_ld_logs(
        job, from_date, to_date,
        '{}/{}'.format(yt_path_dir_to, 'segments_2_ld'),
        '{}/{}'.format(yt_path_dir_to, 'segments_2_ld_parsed')
    )
    # OK
    job.run()


    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )
    job = get_td_logs(
        job, from_date, to_date,
        '{}/{}'.format(yt_path_dir_to, 'segments_2_taxi_order_id'),
        '{}/{}'.format(yt_path_dir_to, 'segments_2_td')
    )
    job.run()


    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )
    job = get_diff_in_candidates(
        job, from_date, to_date,

        '{}/{}'.format(yt_path_dir_to, 'segments_2_ld_parsed'),
        '{}/{}'.format(yt_path_dir_to, 'segments_2_td'),
        '{}/{}'.format(yt_path_dir_to, 'diff_in_cands')
    )
    job.run()