import argparse

import yt.wrapper as yt

from collections import defaultdict
from pprint import pprint

from yql.api.v1.client import YqlClient

from crypta.lib.python.identifiers.generic_id import GenericID


VERTICES = "//home/crypta/production/state/graph/v2/matching/vertices_no_multi_profile"
CLIENT = YqlClient(db='hahn')
ID_TYPES = [
    "login",
    "phone",
    "email",
    "yandexuid",

    # "avito_hash",
    # "avito_id",
    # "cryptaid",
    # "crypta_id",
    # "distr_r1",
    # "distr_ui",
    # "dit_id",
    # "duid",
    # "edadeal",
    # "edadeal_uid",
    # "email_md5",
    # "email_sha256",
    # "fb_id",
    # "open_id",
    # "oaid",
    # "gaid",
    # "icookie",
    # "idfa",
    # "idfa_gaid",
    # "idfagaid",
    # "ifv",
    # "idfv",
    # "kp_id",
    # "login",
    # "mac",
    # "mac_ext",
    # "mac_ext_md5",
    # "md5",
    # "device_id",
    # "deviceid",
    # "devid",
    # "mm_device_id",
    # "mmdeviceid",
    # "mmetric_device_id",
    # "mmetric_devids",
    # "devidhash",
    # "mm_device_id_hash",
    # "mmetric_device_id_hash",
    # "ok_id",
    # "okid",
    # "partner_record_id",
    # "phone_md5",
    # "phone_sha256",
    # "puid",
    # "sha256",
    # "ssp_user_id",
    # "uuid",
    # "vk",
    # "vk_id",
    # "vk_name",
    # "vkid",
    # "xuniq_guid",
    # "yamoney_id",
    # "auto_id",
    # "direct_client_id",
    # "android_id",
    # "imei",
]


def get_cryptaids_via_yt(normalized_ids_types):
    aggregated_types = defaultdict(list)
    for id_, id_type in normalized_ids_types:
        aggregated_types[id_].append(id_type)

    results = []

    for id_, id_types in aggregated_types.iteritems():
        for id_type in id_types:
            for row in yt.read_table(
                yt.TablePath(
                    "//home/crypta/production/state/graph/v2/matching/vertices_no_multi_profile",
                    exact_key=(id_, id_type))
            ):
                if row:
                    results.append({
                        "cryptaId": row["cryptaId"],
                        "id_type": row["id_type"],
                        "id": row["id"],
                    })

    return results


def get_cryptaids_via_yql(normalized_ids_types):
    aggregated_types = defaultdict(list)
    for id_, id_type in normalized_ids_types:
        aggregated_types[id_].append(id_type)

    condition = ' OR '.join(
        '(id="{}"'.format(k) + ' AND id_type IN ["{}"])'.format('", "'.join(v)) for k, v in aggregated_types.iteritems()
    )

    request = CLIENT.query(
        'SELECT cryptaId, id_type, id ' +
        'FROM `//home/crypta/production/state/graph/v2/matching/vertices_no_multi_profile` ' +
        'WHERE {condition} ;'.format(condition=condition),
        syntax_version=1
    )
    request.run()

    yql_results = request.get_results()

    results = []

    for table in yql_results:
        for row in table.rows:
            results.append({name: cell for name, cell in zip(table.column_names, row)})

    return results


def get_normalized(id_value):
    normalized = []

    for id_type in ID_TYPES:
        identifier = GenericID(id_type, id_value)
        if identifier.is_significant():
            normalized.append((identifier.normalize, id_type))

    return normalized


def make_arg_parser():
    parser = argparse.ArgumentParser(description="Calc user params metrics on YQL")
    parser.add_argument(
        "--id",
        help="identifier",
        required=True,
    )
    return parser


def main():
    yt.config.set_proxy("hahn")
    args = make_arg_parser().parse_args()

    normalized_ids_types = get_normalized(args.id)
    cryptaids = get_cryptaids_via_yt(normalized_ids_types)
    # cryptaids = get_cryptaids_via_yql(normalized_ids_types)

    pprint(cryptaids)

    return cryptaids


if __name__ == "__main__":
    main()
