#-*- coding: UTF-8 -*-
from common import *

class get_uid_puid_matching(object):
    def __init__(self, id_type, uid_field, puid_field, dates):
        self.id_type = id_type
        self.uid_field = uid_field
        self.puid_field = puid_field
        self.dates = dates
    def __call__(self, groups):
        for key, recs in groups:
            puid_freqs = {}
            for rec in recs:
                freqs = 0
                for date in rec["dates"]:
                    if date in self.dates:
                        freqs += 1
                puid_freqs[rec[self.puid_field]] = freqs
            sorted_freqs = sorted(puid_freqs.items(), key=lambda x : -x[1])
            if sorted_freqs[0][1] > 0:
                yield Record(id=key[self.uid_field],
                             id_type=self.id_type,
                             target_id=sorted_freqs[0][0],
                             target_id_type='puid')

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cluster', type=str, required=True)
    parser.add_argument('--date', type=str, required=True)
    parser.add_argument('--days_count', type=int, required=True)
    parser.add_argument('--crypta_passport_yandexuid_auth_table', type=str, required=True)
    parser.add_argument('--crypta_passport_uuid_auth_table', type=str, required=True)
    parser.add_argument('--yuid_puid_table', type=str, required=True)
    parser.add_argument('--uuid_puid_table', type=str, required=True)
    parser.add_argument('--output_table', type=str, required=True)
    args = parser.parse_args()

    end_date = dt.strptime(args.date, "%Y-%m-%d")

    cluster_name = args.cluster.lower()

    if cluster_name == 'hahn':
        cluster = clusters.yt.Hahn()
    elif cluster_name == 'arnold':
        cluster = clusters.yt.Arnold()
    else:
        raise ValueError("Unsupported cluster '{}'. Choose 'hahn' (recommended) or 'arnold'".format(args.cluster))

    cluster = cluster.env(parallel_operations_limit=10,
                          yt_spec_defaults=dict(
                              pool_trees=["physical"],
                              tentative_pool_trees=["cloud"]
                          ),
                          templates=dict(
                              tmp_root=TMP_HITMAN_TV_ONLINE_RECOMMENDATIONS,
                              title='MakeCryptaUIdPuidTableBetter'
                          ))

    dates = set([dt.strftime(end_date - timedelta(i), "%Y-%m-%d") for i in range(args.days_count)])

    job = cluster.job()
    addition_yuid_puid_table = job.table(args.crypta_passport_yandexuid_auth_table) \
                                 .groupby('id1') \
                                 .reduce(get_uid_puid_matching('yandexuid', 'id1', 'id2', dates))

    addition_uuid_puid_table = job.table(args.crypta_passport_uuid_auth_table) \
                                 .groupby('id2') \
                                 .reduce(get_uid_puid_matching('uuid', 'id2', 'id1', dates))

    pp_uuids = job.concat(addition_uuid_puid_table, job.table(args.uuid_puid_table)) \
                  .join(job.table(APP_METRICA_ACTIVE_UUID_TABLE), by='id') \
                  .filter(sf.custom(lambda x : x in PP_APP_IDS, 'app_id')) \
                  .project('id', 'id_type', 'target_id', 'target_id_type')

    output_table_schema = {"id": str, "id_type": str, "target_id": str, "target_id_type": str}

    job.concat(pp_uuids, job.table(args.yuid_puid_table), addition_yuid_puid_table) \
       .groupby('id', 'target_id') \
       .aggregate(id_type=na.any('id_type'),
                  target_id_type=na.any('target_id_type')) \
       .sort('id') \
       .put(args.output_table, schema=output_table_schema)
    job.run()

if __name__ == '__main__':
    main()
