import yt.wrapper as yt_wrapper
from datacloud.dev_utils.yt import yt_ops, yt_utils


class SplitScores:
    def __init__(self, score_name):
        self.score_name = score_name

    def __call__(self, rec):
        if rec[self.score_name] is not None:
            yield {'hashed_cid': rec['hashed_cid'], 'score': rec[self.score_name]}


def run_split_scores(yt_client):
    input_table = '//projects/scoring/tmp/re9ulusv/tables-to-transfer-ydb/new-scores-table'
    for score in [
        'partner_a score_a', 'partner_a score_b', 'partner_a score_c',
        'partner_b score_c', 'partner_b score_b', 'partner_b score_a',
        'partner_c score_c', 'partner_c score_a', 'partner_c score_b',
    ]:
        out_table = '//projects/scoring/tmp/re9ulusv/tables-to-transfer-ydb/' + '-'.join(score.split())
        yt_client.run_map(
            SplitScores(score),
            input_table,
            yt_wrapper.TablePath(
                out_table, schema=[{'type': 'uint64', 'name': 'hashed_cid'}, {'type': 'double', 'name': 'score'}]
            ))
        yt_client.run_sort(out_table, sort_by='hashed_cid')
        yt_client.run_reduce(
            yt_ops.unique_reducer,
            out_table,
            out_table,
            reduce_by='hashed_cid'
        )


def expand_cids_mapper(rec):
    for hashed_cid in rec['hashed_cids']:
        yield {'hashed_cid': hashed_cid, 'hashed_id_value': rec['hashed_id_value']}


@yt_wrapper.with_context
def remap_cid_to_id(key, recs, context):
    id_val = None
    for rec in recs:
        if context.table_index == 0:
            id_val = rec['hashed_id_value']
        else:
            rec['hashed_id_value'] = id_val
            yield rec
            break


def extract_new_crypta(rec):
    for hashed_cid in rec['hashed_cids']:
        yield {'hashed_id': rec['hashed_id_value'], 'hashed_cid': hashed_cid}


def extract_new_hashed_features(rec):
    del rec['hashed_id_value']
    yield rec


def remap_cids(yt_client):
    input_ids_table = '//home/x-products/production_sandbox/crypta_v2/crypta_db_last/id_value_to_cid_index_hashed'
    expanded_ids_table = '//tmp/r9-expanded_ids'
    all_scores_hashed_table = '//home/x-products/production_sandbox/datacloud/score_api/all_scores_hashed'

    all_scores_hashed_to_ext_id = '//tmp/r9-all-scores-hashed-to-ext-id'
    new_crypta_table = '//projects/scoring/tmp/re9ulusv/tables-to-transfer-ydb/new-crypta'
    new_scores_table = '//projects/scoring/tmp/re9ulusv/tables-to-transfer-ydb/new-scores-table'

    # yt_client.run_map(
    #     expand_cids_mapper,
    #     input_ids_table,
    #     yt_wrapper.TablePath(
    #         expanded_ids_table,
    #         schema=[{'name': 'hashed_id_value', 'type': 'uint64'}, {'name': 'hashed_cid', 'type': 'uint64'}]
    #     )
    # )
    # yt_client.run_sort(
    #     expanded_ids_table,
    #     sort_by='hashed_cid'
    # )
    # yt_client.run_reduce(
    #     remap_cid_to_id,
    #     [
    #         expanded_ids_table,
    #         all_scores_hashed_table,
    #     ],
    #     all_scores_hashed_to_ext_id,
    #     reduce_by='hashed_cid'
    # )
    # yt_client.run_map_reduce(
    #     None,
    #     yt_ops.unique_reducer,
    #     all_scores_hashed_to_ext_id,
    #     all_scores_hashed_to_ext_id,
    #     reduce_by=['hashed_id_value', 'hashed_cid']
    # )
    # yt_client.run_map(
    #     extract_new_hashed_features,
    #     all_scores_hashed_to_ext_id,
    #     new_scores_table
    # )
    # yt_client.run_sort(
    #     new_scores_table,
    #     sort_by='hashed_cid'
    # )

    yt_client.run_map(
        extract_new_crypta,
        input_ids_table,
        yt_wrapper.TablePath(
            new_crypta_table,
            schema=[
                {'name': 'hashed_id', 'type': 'uint64'},
                {'name': 'hashed_cid', 'type': 'uint64'}
            ]
        )
    )
    yt_client.run_sort(
        new_crypta_table,
        sort_by='hashed_id'
    )


if __name__ == '__main__':
    yt_wrapper.config.set_proxy('hahn')
    yt_client = yt_utils.get_yt_client('hahn')

    # run_split_scores(yt_client)
    remap_cids(yt_client)
