import argparse
from collections import Counter
import io
import json
import os
import time

import datetime
from nile.api.v1 import aggregators as na
from nile.api.v1 import extractors as ne
from nile.api.v1 import filters as nf
from nile.api.v1 import Record
from qb2.api.v1 import filters as qf
from qb2.api.v1 import typing as qt
import six
import yt.wrapper as yt

from projects.data_sources.data_context.raworders_dmorders_sessions import \
    DataContext as OrdersSessionsLogsDataContext
from projects.data_sources.data_context.cargo import \
    DataContext as CargoLogsDataContext
from projects.efficiency_metrics.project_config import get_project_cluster
from projects.ml_handler_availability.dynamic_tables_util import \
    create_dyn_table_from_usual
from projects.data_sources.data_context.datamarts import \
    DataContext as DatamartsDataContext
from projects.ml_handler_availability.nile_blocks.main import (
    _reducer, calc_orders_features, duplicate_mapper, orders_duplicate_mapper,
    calc_features, concat_different_windows_reducer
)
from projects.common.time_utils import datetime_2_timestamp, parse_timestring

from projects.common.time_utils import datetime_2_timestamp, \
    parse_timestring
# [b'cargo_eds', b'cargo_eds_for_cargo_tariff', b'raiting_4.85',
#  b'cargo_multipoints', b'too_heavy_no_walking_courier', b'cargo_ozon_msk',
#  b'thermobox_option_on', b'car_couriers', b'experience_3w_or_less',
#  b'thermobag_delivery', b'thermobag_confirmed', b'lavka_wo_frauders',
#  b'afcd_req_new', b'disable_corp_delivery_rating_below_4_80',
#  b'raiting_4.9']

# [b'courier', b'eda', b'lavka', b'express']


def get_schema(type_of_dyn_table):
    assert type_of_dyn_table in ['dbid_uuid', 'tariff_zone', 'geo_hash']
    schema = {}
    if type_of_dyn_table == 'dbid_uuid':
        schema = {
            'dbid_uuid': qt.String,
            'courier_info': qt.Json
        }
    elif type_of_dyn_table == 'tariff_zone':
        schema = {
            'tariff_zone_info': qt.Json,
            'tariff_zone': qt.String
        }
    elif type_of_dyn_table == 'geo_hash':
        schema = {
            'geo_hash_info': qt.Json,
            'geo_hash': qt.String
        }
    return schema


TAXI_ORDERS_STATUSES = [
    'assigned_driving', 'pending_None', 'cancelled_driving',
    'finished_failed', 'assigned_transporting', 'cancelled_None',
    'cancelled_waiting', 'finished_expired', 'cancelled_transporting',
    'finished_cancelled', 'assigned_waiting', 'finished_complete'
]


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    # parser.add_argument('--config-path', default='feb_march_exp_config.json')
    parser.add_argument('--yt-proxy', default='Hahn')
    parser.add_argument(
        '--dyn-features-table-yt-path',
        default='',
    )
    parser.add_argument(
        '--usual-features-table-yt-path',
        default=''
    )
    parser.add_argument(
        '--type-of-dyn-table',
        default=''
    )
    parser.add_argument(
        '--dttm',
        default=''
    )
    args = parser.parse_args()

    start = time.time()

    yt.config.set_proxy(args.yt_proxy)
    cluster = get_project_cluster(proxy=args.yt_proxy)

    print (args.usual_features_table_yt_path)
    type_of_dyn_table = args.type_of_dyn_table

    # START:
    schema = get_schema(type_of_dyn_table)
    print(schema)

    KEY_2_D = {
        'geo_hash': {'key': 'geo_hash', 'value': 'geo_hash_info'},
        'tariff_zone': {'key': "tariff_zone", "value": "tariff_zone_info"}
    }


    # local_datetime = datetime.datetime.now() # not .utcnow()
    # print(local_datetime.strftime('%Y-%m-%dT%H:%M:%S'))
    # print(local_datetime.strftime('%Y-%m-%dT%H:%M:%S') - datetime.timedelta(days=1))
    # from_dttm = (local_datetime - datetime.timedelta(days=1))
    # to_dttm = local_datetime#.strftime('%Y-%m-%dT%H:%M:%S')

    utc_datetime = datetime.datetime.utcnow()  # not .utcnow()
    from_dttm = (utc_datetime - datetime.timedelta(hours=6))
    to_dttm = utc_datetime#.strftime('%Y-%m-%dT%H:%M:%S')

    to_lag_date = utc_datetime - datetime.timedelta(minutes=15)

    from projects.common.nile.dates import range_selector
    # print(range_selector(self.begin_dttm, self.end_dttm, '%Y-%m-01'))

    MINS = 5
    local_now = datetime.datetime.now()
    to_local_dttm = (
            local_now.replace(minute=0, second=0,
                              microsecond=0) + datetime.timedelta(
        minutes=(local_now.minute // MINS) * MINS)
    )

    from_local_dttm = (
            (local_now - datetime.timedelta(hours=6)).replace(minute=0,
                                                              second=0,
                                                              microsecond=0)
            + datetime.timedelta(minutes=(local_now.minute // MINS) * MINS)
    )

    dttm_needed = [
        (
                from_local_dttm + datetime.timedelta(minutes=x)
        ).strftime('%Y-%m-%dT%H:%M:%S')
        for x in range(
            0, int(
                (to_local_dttm - from_local_dttm).total_seconds() // 60. + MINS
            ),
            MINS
        )
    ]
    print(dttm_needed)

    job = cluster.job('Couriers collection' + str(time.time()))
    job = job.env(
        bytes_decode_mode='strict',
        yt_spec_defaults={'max_failed_job_count': 1000}
    )

    print (to_dttm)
    print (to_dttm + datetime.timedelta(hours=5))

    print (range_selector(from_dttm, to_dttm + datetime.timedelta(hours=28), '%Y-%m-%d'))

    orders_d_c = OrdersSessionsLogsDataContext(
        job,
        from_dttm,
        to_dttm + datetime.timedelta(hours=28),
        ['atlas_drivers', 'light_order_proc']
    )

    TIME_WINDOW_IN_SEC = [30 * 60, 60 * 60, 120 * 60]

    atlas_table = orders_d_c.get_atlas_drivers(dttm_needed).filter(
        nf.custom(
            lambda x: (
                ('courier' in x) or ('express' in x) or
                ('lavka' in x) or ('eda' in x)
            ), 'car_classes'
        )
    ).map(
        duplicate_mapper(datetime_2_timestamp(to_lag_date), TIME_WINDOW_IN_SEC)
    ).groupby(
        type_of_dyn_table, 'dttm_utc_1_min', 'window_size'
        # all couriers are collected together in the same dttm_utc_1_min
    ).reduce(
        calc_orders_features()
    ).groupby(
        type_of_dyn_table, 'window_size'
    ).reduce(
        _reducer
    )

    orders_table = orders_d_c.get_light_order_proc().project(
        ne.all('candidates')
    ).filter(
        qf.defined('created')
    ).project(
        ne.all(),
        timestamp=ne.custom(
            lambda x: datetime_2_timestamp(
                parse_timestring(x, 'UTC')
            ), 'created'
        )
    ).map(
        orders_duplicate_mapper(
            datetime_2_timestamp(to_lag_date),
            TIME_WINDOW_IN_SEC
        )
    ).groupby(
        type_of_dyn_table, 'window_size'
    ).reduce(
        calc_features
    )

    custom_extractors = {}
    for ce_key in schema.keys():
        custom_extractors[ce_key] = ne.custom(lambda x: x, ce_key)

    atlas_table.join(
        orders_table, type='inner', by=[type_of_dyn_table, 'window_size'],
        assume_small_right=True
    ).groupby(
        type_of_dyn_table
    ).reduce(
        concat_different_windows_reducer(KEY_2_D[type_of_dyn_table])
    ).project(**custom_extractors).sort(
        type_of_dyn_table
    ).put(
        args.usual_features_table_yt_path,
        schema=schema
    )

    # TODO: схему подтягивать из модели, которая только переобучилась

    job.run()
    #
    # START: load dyn table from usual table
    create_dyn_table_from_usual(
        args.usual_features_table_yt_path,
        args.dyn_features_table_yt_path,
        type_of_dyn_table
    )

    finish = time.time()

    print (finish - start)

