import argparse
# import time

import datetime

from projects.efficiency_metrics.manager import Manager
from projects.efficiency_metrics.project_config import get_project_cluster

from nile.api.v1 import extractors as ne
from nile.api.v1 import filters as nf

from projects.data_sources.data_context.eda_logs import \
    DataContext as EdaOrdersDataContext
from projects.data_sources.data_context.raw_services_logs import \
    DataContext as RawLogs

from projects.data_sources.data_context.cargo import \
    DataContext as CargoLogsDataContext
import time

from nile.api.v1 import filters as nf

import geohash as gh

if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--yt-proxy', type=str, default='Hahn')

    cluster = get_project_cluster()

    from_date = '2021-04-10'
    to_date = '2021-04-17'

    BURNT_ORDER_NR_S = ['210415-162181', '210415-218433', '210415-198049']

    yt_path_to = '//home/taxi-delivery/analytics/dev/burning_research/{}'

    #

    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(bytes_decode_mode='strict')
                  #, yt_spec_defaults={'max_failed_job_count': 1000})

    # eda_table = EdaOrdersDataContext(
    #     job,
    #     datetime.datetime.strptime(
    #         from_date, '%Y-%m-%d')
    #     ,
    #     datetime.datetime.strptime(
    #         to_date, '%Y-%m-%d'
    #     ),
    # ).get_eda_orders().filter(
    #     nf.custom(
    #             lambda x: x in BURNT_ORDER_NR_S,
    #             'order_nr'
    #     ),
    #     # nf.custom('taxi_dispatch_cargo_uuid_id')
    #
    # ).project(
    #     "brand_name",
    #     "order_id",
    #     "utc_created_dttm",
    #     "utc_approved_dttm",
    #     "utc_place_confirmed_dttm",
    #     "utc_order_taken_plan_dttm",
    #     "utc_arrival_to_place_plan_dttm",
    #     "utc_dttm_winner_applied",
    #     "utc_cancelled_dttm",
    #     'taxi_dispatch_cargo_uuid_id'
    # ).put(
    #     yt_path_to.format('2021-04-17_burnt_orders_eda')
    # )

    eda_table = job.table(
        yt_path_to.format('2021-04-17_burnt_orders_eda')
    )

    cargo_d_c = CargoLogsDataContext(
        job,
        datetime.datetime.strptime(
            from_date, '%Y-%m-%d')
        ,
        datetime.datetime.strptime(
            to_date, '%Y-%m-%d'
        ),
    )

    claims = cargo_d_c.get_claims().project(
        'taxi_order_id', 'uuid_id', 'is_delayed', 'due', 'timestamp'
    )

    segments = cargo_d_c.get_segments()

    # eda_table.join(
    #     claims,
    #     type='inner', by_right='uuid_id', by_left='taxi_dispatch_cargo_uuid_id',
    #     assume_small_left=True
    # ).join(
    #     segments,
    #     by_left='uuid_id', by_right='claim_id', type='inner',
    #     assume_small_left=True
    # ).put(
    #     yt_path_to.format('2021-04-17_burnt_orders')
    # )

    # taxi_order_id, - taxi_dispatch
    # segment_id - ld
    job.run()



    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(bytes_decode_mode='strict', yt_spec_defaults={'max_failed_job_count': 1000})

    tmp = job.table(yt_path_to.format('2021-04-17_burnt_orders'))

    raw_logs_dc = RawLogs(
        job,
        datetime.datetime.strptime(
            from_date, '%Y-%m-%d')
        ,
        datetime.datetime.strptime(
            to_date, '%Y-%m-%d'
        ),
    )

    orders_2_dispatch = (
        raw_logs_dc.get_ld_dispatch().join(
            tmp, type='inner',
            by_right='segment_id',
            by_left='cargo_ref_id',
            assume_small_right=True
        )
    ).sort(
        'segment_id', 'timestamp_ld'
    ).put(
        yt_path_to.format('2021-04-17_log_dispatch')
        # TODO: тут некоторые заказа зудаблированы. потому что отказывались таксисты
    )

    import json
    def extr(module, resp, text, uri):
        available_candidates = []
        if module == 'planner-segment-edges':
            edges = json.loads(text)['edges']
            for edge in edges:
                contractor_id = edge['driver_id']
                kind = edge['kind']
                available_candidates.append(
                    {
                        'contractor_id': contractor_id,
                        'kind': kind
                    }
                )
        if (module == 'ServiceApi') and (resp is not None) and (
                uri == '/order-search'):
            response = json.loads(resp)
            if 'candidates' not in response:
                return None

            for candidate in response['candidates']:
                candidate_id = candidate['id']
                available_candidates.append(candidate_id)
        return available_candidates


    from qb2.api.v1 import filters as qf
    from projects.burnt_orders_research.ld_parsing_nile_block import LogEntrySegmentDetector

    # def _mapper(records):
    #     le = LogEntrySegmentDetector()
    #     for record in records:
    #         le(record)


    # job.table(yt_path_to.format('2021-04-17_log_dispatch')).map(
    #     LogEntrySegmentDetector()
    # ).sort('segment_id', 'timestamp').put(
    #     yt_path_to.format('2021-04-17_log_dispatch_parsed_mapper')
    # )
    import yt.wrapper as yt

    yt.config.set_proxy("hahn")

    # le_mapper = LogEntrySegmentDetector()
    # table_routes = yt_path_to.format('2021-04-17_log_dispatch')
    # aux_table_route = yt_path_to.format('2021-04-17_log_dispatch_parsed_mapper')
    # yt.run_map(le_mapper, table_routes, aux_table_route,)
               # spec={'combine_chunks': True, 'data_size_per_job': 1535852842})
    # yt.run_sort(aux_table_route, sort_by=['segment_id', 'timestamp'])

    job.table(yt_path_to.format('2021-04-17_log_dispatch')).filter(
        qf.defined('segment_id')
    ).project(
        # 'segment_id', 'claim_id', 'taxi_order_id', 'utc_date', 'zone_id',
        # 'module', 'response', 'text', 'uri',
        # candidates_meta=ne.custom(
        #     lambda x, y, z, w: extr(x, y, z, w),
        #     'module', 'response', 'text', 'uri'
        # ),
        ne.all(['module', 'response', 'text', 'uri']),
        candidates_meta=ne.custom(
            lambda x, y, z, w: extr(x, y, z, w),
            'module', 'response', 'text', 'uri'
        ),

        minute=ne.custom(
            lambda x:
            datetime.datetime.utcfromtimestamp(x).minute,
            'timestamp_ld'
        ),
        utc_date_hour=ne.custom(
            lambda x:
            datetime.datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H'),
            'timestamp_ld'
        ),
        # geo_hash=ne.custom(lambda x, y: gh.encode(x, y, precision=5),
        #                    'latitude', 'longitude')#'lat', 'lon')

    ).sort(
        'segment_id', 'timestamp_ld'
    ).put(
        yt_path_to.format('2021-04-17_log_dispatch_parsed')
    )

    #.filter(
    #     nf.custom(lambda x: x == "planner-segment-edges", 'module'),
    #     # nf.custom(lambda x: x is None, 'taxi_order_id')
    # )

    # orders_2_dispatch = (
    #     raw_logs_dc.get_taxi_dispatch().join(
    #         tmp, type='inner',
    #         by_left='order_id',
    #         by_right='taxi_order_id',
    #         assume_small_right=True
    #     )
    # ).sort(
    #     'taxi_order_id', 'timestamp_td'
    # ).put(
    #     yt_path_to.format('2021-04-17_taxi_dispatch')
    #     # TODO: тут некоторые заказа зудаблированы. потому что отказывались таксисты
    # )

    # from qb2.api.v1 import filters as qf
    # raw_logs_dc.get_taxi_candidates().filter(
    #     qf.defined('trace_id')
    # ).join(
    #     orders_2_dispatch, by='trace_id', type='inner', assume_small_right=True
    # ).project(
    #         "taxi_order_id",
    #         "timestamp",# (candidates, not dispath)
    #         "not_filtered_candidates",
    #         "candidates_meta",
    #         "trace_id",
    #         "utc_created_dttm",
    #         "text",
    #         "timestamp_taxi_dispatch"
    #     ).map(
    #         parse_text_field_mapper
    #     )
    #     .project(
    #         ne.all(['text'])
    #     )
    #     .put(
    #         '//home/taxi_ml/dev/drivers/delivery/tmp_tmp/inner_orders_candidates_burnt_msc_applied_cands_service_final'
    #     )
    # )

    from projects.efficiency_metrics.nile_blocks.junk import (
        not_filtered_candidates_reducer, parse_text_field_mapper
    )



    # job.table(
    #     '//home/taxi_ml/dev/drivers/delivery/tmp_tmp/inner_orders_candidates_burnt_msc_applied_cands_service_final'
    # ).groupby(
    #     'taxi_order_id', 'trace_id', "utc_created_dttm", "timestamp_taxi_dispatch"
    # ).reduce(
    #     not_filtered_candidates_reducer,
    #     memory_limit=50000
    # ).put(
    #     '//home/taxi_ml/dev/drivers/delivery/tmp_tmp/causes'
    # )

    # А сколько по времени искали исполнителя ?

    # order_id -> several trace_id (розыгрышей) -> ..

    # job.table('//home/taxi_ml/dev/drivers/delivery/tmp_tmp/inner_orders_candidates_burnt_msc_applied').project(
    #     ne.all(),
    #     utc_dttm_winner_applied= ne.custom(
    #         lambda x: datetime.datetime.utcfromtimestamp(x).strftime('%Y-%m-%dT%H:%M:%S'),
    #         'timestamp'
    #     )
    # ).put(
    #     '//home/taxi_ml/dev/drivers/delivery/tmp_tmp/inner_orders_candidates_burnt_msc_applied_2_dttm'
    # )


    from nile.api.v1 import filters as nf


    # job.table('//home/taxi_ml/dev/drivers/delivery/tmp_tmp/inner_orders_candidates').filter(
    #     nf.custom(lambda x: x == 1, 'flag'),
    #     nf.custom(lambda x: x =='Москва', 'region_name'),
    # ).project(
    #     ne.all(),
    #     utc_dttm_candidates= ne.custom(
    #         lambda x: datetime.datetime.utcfromtimestamp(x).strftime('%Y-%m-%dT%H:%M:%S'),
    #         'timestamp'
    #     )
    # ).groupby(
    #     'taxi_order_id'
    # ).sort(
    #     'timestamp'
    # ).reduce(
    #     _reducer
    # ).put(
    #     '//home/taxi_ml/dev/drivers/delivery/tmp_tmp/inner_orders_candidates_burnt_msc_applied_not'
    # # TODO: тут некоторые заказа зудаблированы. потому что отказывались таксисты
    # )

    job.run()

    # yt_p = '//home/logfeller/logs/taxi-candidates-yandex-taxi-candidates-log/1d/2021-01-30[#0:#20]'
    # for r in cluster.read(yt_p):
    #     import json
    #
    #     asd = None
    #     import six
    #     text = six.ensure_str(r.text)
    #
    #     if 'results' in six.ensure_str(text):
    #         asd = text.split(' ')[1].strip()
    #         tmp = '{"results":' + text.split(' ')[1] + '}'
    #         print (json.loads(tmp)[u'results'])
