import yt.wrapper as yt_wrapper
from datacloud.config import compression_codecs
from datacloud.dev_utils.logging.logger import get_basic_logger

logger = get_basic_logger(__name__)


TAG = 'CRYPTA-EXTRACT'

input_crypta_tables = [
    '//home/crypta/production/state/graph/v2/matching/by_id/crypta_id/email_md5',
    '//home/crypta/production/state/graph/v2/matching/by_id/crypta_id/phone_md5',
    '//home/crypta/production/state/graph/v2/matching/by_id/crypta_id/yandexuid'
]


class CryptaExtractTablesV2(object):
    def __init__(
            self,
            base_root="//home/x-products/production",
            sub_folder="/crypta_v2/crypta_db_last",
            crypta_source_folder="//home/crypta/production/state/graph/v2/matching",
    ):
        self.root = base_root + sub_folder

        self.cid_to_all = yt_wrapper.TablePath(
            yt_wrapper.ypath_join(self.root, 'cid_to_all'),
            attributes={
                'schema': [
                    {'name': 'cid', 'type': 'string'},
                    {'name': 'id_type', 'type': 'string'},
                    {'name': 'id_value', 'type': 'string'},
                ]
            }
        )
        self.id_value_to_cid = yt_wrapper.TablePath(
            yt_wrapper.ypath_join(self.root, 'id_value_to_cid')
        )
        self.all_interesting_yuid = yt_wrapper.TablePath(
            yt_wrapper.ypath_join(self.root, 'all_interesting_yuid'),
            attributes={
                'schema': [
                    {'name': 'key', 'type': 'string'},
                    {'name': 'cid', 'type': 'string'},
                    {'name': 'yuid', 'type': 'string'},
                ]
            }
        )
        self.yuid_to_cid = yt_wrapper.TablePath(
            yt_wrapper.ypath_join(self.root, 'yuid_to_cid'),
            attributes={
                'schema': [
                    {'name': 'yuid', 'type': 'string'},
                    {'name': 'cid', 'type': 'string'},
                ]
            }
        )
        self.cid_to_yuid = yt_wrapper.TablePath(
            yt_wrapper.ypath_join(self.root, 'cid_to_yuid'),
            attributes={
                'schema': [
                    {'name': 'cid', 'type': 'string'},
                    {'name': 'yuid', 'type': 'string'},
                ]
            }
        )
        self.phone_id_value_to_cid = yt_wrapper.TablePath(
            yt_wrapper.ypath_join(self.root, 'phone_id_value_to_cid'),
            attributes={
                'schema': [
                    {'name': 'cid', 'type': 'string'},
                    {'name': 'id_value', 'type': 'string'},
                ],
                'compression_codec': compression_codecs.BROTLI_8
            }
        )
        self.email_id_value_to_cid = yt_wrapper.TablePath(
            yt_wrapper.ypath_join(self.root, 'email_id_value_to_cid'),
            attributes={
                'schema': [
                    {'name': 'cid', 'type': 'string'},
                    {'name': 'id_value', 'type': 'string'},
                ],
                'compression_codec': compression_codecs.BROTLI_8
            }
        )


def get_crypta_edges(rec):
    yield {
        'id_type': rec['target_id_type'],
        'id_value': rec['target_id'],
        'cid': rec['id']
    }


def all_interesting_yuid_reduce(key, recs):
    yuids = []
    has_phone_email = False
    for rec in recs:
        if rec["id_type"] == "yandexuid":
            yuids.append(rec["id_value"])
        elif rec["id_type"] in ("email_md5", "phone_md5"):
            has_phone_email = True
    if has_phone_email:
        yield yt_wrapper.create_table_switch(0)
        for yuid in yuids:
            yield {
                "key": "y" + yuid,
                "cid": key["cid"],
                "yuid": yuid,
            }
        yield yt_wrapper.create_table_switch(1)
        for yuid in yuids:
            yield {
                "yuid": yuid,
                "cid": key["cid"],
            }


def make_join_tables(yt_client, extract_tables):
    with yt_client.Transaction():
        # extract_tables.cid_to_all.create_table(force=True)

        yt_client.run_map(
            get_crypta_edges,
            input_crypta_tables,
            extract_tables.cid_to_all,
            spec={'title': '[{}] Get crypta edges'.format(TAG)}
        )
        yt_client.run_sort(
            extract_tables.cid_to_all,
            sort_by=("cid", "id_type"),
            spec={'title': '[{}] Sort cid_to_all table'.format(TAG)}
        )
        # extract_tables.id_value_to_cid.create_table(force=True)
        yt_client.run_sort(
            extract_tables.cid_to_all,
            extract_tables.id_value_to_cid,
            sort_by=("id_type", "id_value"),
            spec={'title': '[{}] Create id_value_to_cid table'.format(TAG)}
        )

        # extract_tables.all_interesting_yuid.create_table(force=True)
        # extract_tables.yuid_to_cid.create_table(force=True)
        yt_client.run_reduce(
            all_interesting_yuid_reduce,
            extract_tables.cid_to_all,
            [
                extract_tables.all_interesting_yuid,
                extract_tables.yuid_to_cid
            ],
            reduce_by="cid",
            output_format=yt_wrapper.YsonFormat(control_attributes_mode="iterator"),
            spec={'title': '[{}] Create All interesting yuid'.format(TAG)}
        )
        yt_client.run_sort(
            extract_tables.all_interesting_yuid,
            sort_by="key",
            spec={'title': '[{}] Sort all_interesting_yuid'.format(TAG)}
        )
        yt_client.run_sort(
            extract_tables.yuid_to_cid,
            sort_by="yuid",
            spec={'title': '[{}] Sort yuid to cid'.format(TAG)}
        )
        yt_client.run_sort(
            extract_tables.yuid_to_cid,
            extract_tables.cid_to_yuid,
            sort_by='cid',
            spec={'title': '[{}] Create cid to yuid'.format(TAG)}
        )

        # Build phone_id_value_to_cid
        # extract_tables.phone_id_value_to_cid.create_table(force=True)
        yt_client.run_sort(
            yt_wrapper.TablePath(
                extract_tables.id_value_to_cid,
                exact_key='phone_md5',
                columns=['cid', 'id_value']),
            extract_tables.phone_id_value_to_cid,
            sort_by='id_value',
            spec={'title': '[{}] Get phone id_value to cid table'.format(TAG)}
        )

        # extract_tables.email_id_value_to_cid.create_table(force=True)
        yt_client.run_sort(
            yt_wrapper.TablePath(
                extract_tables.id_value_to_cid,
                exact_key='email_md5',
                columns=['cid', 'id_value']),
            extract_tables.email_id_value_to_cid,
            sort_by='id_value',
            spec={'title': '[{}] Get email id_value to cid table'.format(TAG)}
        )
