import datetime
import logging
import os

from crypta.lib.python import templater
from crypta.lib.python.yt import yt_helpers
from crypta.profile.lib import date_helpers
from crypta.siberia.bin.custom_audience.lib.python.clustering import utils as clustering_utils
from crypta.siberia.bin.custom_audience.sites_clustering.lib.utils import (
    config,
    fields,
)

logger = logging.getLogger(__name__)


get_userid_clusterid_template = """
$host2id_table = (
    SELECT
        DISTINCT host, id
    FROM (
            SELECT
                host,
                {{id_type}} AS id,
            FROM `{{metrics_flattened_hits_table}}`
        UNION ALL
            SELECT
                host,
                {{id_type}} AS id,
            FROM `{{bar_flattened_hits_table}}`
    )
);

{% if id_type == 'yandexuid' %}
$host2id_table = (
    SELECT DISTINCT
        host2id_table.host AS host,
        host2id_table.id AS id,
    FROM $host2id_table AS host2id_table
    LEFT ONLY JOIN `{{matching_yandexuid_crypta_id_table}}` AS matching_yandexuid_crypta_id_table
    ON CAST(host2id_table.id AS String) == matching_yandexuid_crypta_id_table.id
);
{% endif %}

INSERT INTO `{{userid_clusterid_table}}`
WITH TRUNCATE

SELECT DISTINCT
    host2id_table.id AS id,
    '{{id_type}}' AS id_type,
    sites_clustering_table.{{fields.cluster_id}} AS {{fields.cluster_id}},
FROM $host2id_table AS host2id_table
INNER JOIN `{{sites_clustering_table}}` AS sites_clustering_table
ON host2id_table.host == sites_clustering_table.{{fields.name}}
ORDER BY id, id_type
"""


def get_userid_clusterid(yt_client, transaction, yql_client, sites_clustering_table, id_type, userid_clusterid_table):
    if id_type == 'yandexuid':
        metrics_flattened_hits_table = config.METRICS_YANDEXUID_FLATTEN_HITS_TABLE
        bar_flattened_hits_table = config.BAR_YANDEXUID_FLATTEN_HITS_TABLE
    elif id_type == 'crypta_id':
        metrics_flattened_hits_table = config.METRICS_CRYPTA_ID_FLATTEN_HITS_TABLE
        bar_flattened_hits_table = config.BAR_CRYPTA_ID_FLATTEN_HITS_TABLE

    yql_client.execute(
        query=templater.render_template(
            get_userid_clusterid_template,
            vars={
                'id_type': id_type,
                'metrics_flattened_hits_table': metrics_flattened_hits_table,
                'bar_flattened_hits_table': bar_flattened_hits_table,
                'matching_yandexuid_crypta_id_table': config.MATCHING_YANDEXUID_CRYPTA_ID_TABLE,
                'sites_clustering_table': sites_clustering_table,
                'userid_clusterid_table': userid_clusterid_table,
                'fields': fields,
            },
        ),
        transaction=str(transaction.transaction_id),
        title='YQL get_{}_clusterid'.format(id_type),
    )
    yt_helpers.set_ttl(
        table=userid_clusterid_table,
        ttl_timedelta=datetime.timedelta(days=config.DAYS_TO_KEEP_USERID_CLUSTERID),
        yt_client=yt_client,
    )


def update():
    yt_client = clustering_utils.get_yt_client(config=config)
    yql_client = clustering_utils.get_yql_client(config=config)

    today = date_helpers.get_today_date_string()

    sites_clustering_generate_date = yt_client.get_attribute(
        path=config.CLUSTERING_TABLE,
        attribute='generate_date',
    )
    if sites_clustering_generate_date != today:
        logger.info('sites clustering table is not updated today yet')
        return
    with yt_client.Transaction() as transaction:
        for id_type, userid_clusterid_table in zip(
                ['yandexuid', 'crypta_id'],
                [os.path.join(config.YANDEXUID_CLUSTERID_DIR, today), os.path.join(config.CRYPTAID_CLUSTERID_DIR, today)]
        ):
            get_userid_clusterid(
                yt_client,
                transaction=transaction,
                yql_client=yql_client,
                sites_clustering_table=config.CLUSTERING_TABLE,
                id_type=id_type,
                userid_clusterid_table=userid_clusterid_table,
            )

            if id_type == 'crypta_id':
                centroids_last_update_planned_date = yt_client.get_attribute(
                    path=config.CENTROIDS_TABLE,
                    attribute='last_update_planned_date',
                )

                clustering_utils.update_centroids_users_count(
                    transaction=transaction,
                    yql_client=yql_client,
                    userid_clusterid_table=userid_clusterid_table,
                    config=config,
                    fields=fields,
                )

                for attribute, value in [
                    ('last_update_planned_date', centroids_last_update_planned_date),
                    ('generate_date', today),
                ]:
                    yt_client.set_attribute(
                        path=config.CENTROIDS_TABLE,
                        attribute=attribute,
                        value=value,
                    )
