import logging
import os

from crypta.lib.python import templater
from crypta.lib.python.logging.logging_helpers import get_function_logger
from crypta.profile.lib import date_helpers
from crypta.siberia.bin.custom_audience.sites_clustering.lib.utils import (
    config,
    fields,
)


logger = logging.getLogger(__name__)
function_logger = get_function_logger(logger)


prepare_for_clustering_template = """
$host_crypta_id_table = (
        SELECT
            host,
            crypta_id,
        FROM `{{metrics_crypta_id_flattened_hits_table}}`
    UNION ALL
        SELECT
            host,
            crypta_id,
        FROM `{{bar_crypta_id_flattened_hits_table}}`
);

$host_crypta_id_users_count_table = (
    SELECT DISTINCT
        host_crypta_id_table.host AS {{fields.name}},
        host_crypta_id_table.crypta_id AS crypta_id,
        MAX_OF(
            COALESCE(crypta_id_metrica_browser_visitor_counter_table.bar_visitors_count, 0),
            COALESCE(crypta_id_metrica_browser_visitor_counter_table.metrica_visitors_count, 0),
        ) AS {{fields.users_count}},
    FROM $host_crypta_id_table AS host_crypta_id_table
    INNER JOIN `{{crypta_id_metrica_browser_visitor_counter_table}}` AS crypta_id_metrica_browser_visitor_counter_table
    ON host_crypta_id_table.host == crypta_id_metrica_browser_visitor_counter_table.site
);

{% if only_new_sites %}
$host_crypta_id_users_count_table = (
    SELECT
        host_crypta_id_users_count_table.{{fields.name}} AS {{fields.name}},
        host_crypta_id_users_count_table.crypta_id AS crypta_id,
        host_crypta_id_users_count_table.{{fields.users_count}} AS {{fields.users_count}},
    FROM $host_crypta_id_users_count_table AS host_crypta_id_users_count_table
    LEFT ONLY JOIN `{{sites_clustering_table}}` AS sites_clustering_table
    USING ({{fields.name}})
);
{% endif %}

INSERT INTO `{{sites_to_describe_table}}`
WITH TRUNCATE

SELECT
    {{fields.name}} as GroupID,
    'crypta_id' AS IdType,
    Cast(crypta_id AS String) AS IdValue,
FROM $host_crypta_id_users_count_table;

$host_users_count_table = (
    SELECT DISTINCT
        {{fields.name}},
        {{fields.users_count}},
    FROM $host_crypta_id_users_count_table
);

$max_site_id = (
    SELECT MAX(site_id)
    FROM `{{site_dict_table}}`
);

$new_site_dict_table = (
    SELECT
        site,
        site_id,
    FROM `{{site_dict_table}}`
UNION ALL
    SELECT
        {{fields.name}} as site,
        $max_site_id + ROW_NUMBER() OVER w AS site_id,
    FROM $host_users_count_table AS host_users_count_table
    LEFT ONLY JOIN `{{site_dict_table}}` AS site_dict_table
    ON host_users_count_table.{{fields.name}} = site_dict_table.site
    WINDOW w AS (ORDER BY {{fields.users_count}} DESC)
);

INSERT INTO `{{site_dict_table}}`
WITH TRUNCATE

SELECT *
FROM $new_site_dict_table;

INSERT INTO `{{sites_with_info_table}}`
WITH TRUNCATE

SELECT
    host_users_count_table.{{fields.name}} AS {{fields.name}},
    new_site_dict_table.site_id AS {{fields.id}},
    host_users_count_table.{{fields.users_count}} AS {{fields.users_count}},
FROM $host_users_count_table AS host_users_count_table
INNER JOIN $new_site_dict_table AS new_site_dict_table
ON host_users_count_table.{{fields.name}} == new_site_dict_table.site;
"""


get_sites_vectors_with_info_template = """
INSERT INTO `{{sites_vectors_with_info_table}}`
WITH TRUNCATE

SELECT
    sites_with_info_table.{{fields.name}} AS {{fields.name}},
    sites_with_info_table.{{fields.id}} AS {{fields.id}},
    sites_vectors_table.segment_vector AS host_vector,
    sites_with_info_table.{{fields.users_count}} AS {{fields.users_count}},
FROM `{{sites_with_info_table}}` AS sites_with_info_table
INNER JOIN `{{sites_vectors_table}}` AS sites_vectors_table
ON sites_with_info_table.{{fields.name}} == sites_vectors_table.GroupID
ORDER BY {{fields.users_count}} ASC
"""


@function_logger
def prepare_for_describe(transaction, yql_client, segments_to_describe_table, segments_with_info_table, only_new_segments=False):
    yql_client.execute(
        query=templater.render_template(
            prepare_for_clustering_template,
            vars={
                'metrics_crypta_id_flattened_hits_table': config.METRICS_CRYPTA_ID_FLATTEN_HITS_TABLE,
                'bar_crypta_id_flattened_hits_table': config.BAR_CRYPTA_ID_FLATTEN_HITS_TABLE,
                'crypta_id_metrica_browser_visitor_counter_table': config.CRYPTA_ID_METRICA_BROWSER_VISITOR_COUNTER_TABLE,
                'only_new_sites': only_new_segments,
                'sites_clustering_table': config.CLUSTERING_TABLE,
                'sites_to_describe_table': segments_to_describe_table,
                'site_dict_table': config.SITE_DICT_TABLE,
                'sites_with_info_table': segments_with_info_table,
                'fields': fields,
            },
        ),
        transaction=str(transaction.transaction_id),
        title='YQL prepare_for_describe',
    )


@function_logger
def get_sites_vectors_with_info(transaction, yql_client, segments_with_info_table, segments_vectors_table, segments_vectors_with_info_table):
    yql_client.execute(
        query=templater.render_template(
            get_sites_vectors_with_info_template,
            vars={
                'sites_with_info_table': segments_with_info_table,
                'sites_vectors_table': segments_vectors_table,
                'sites_vectors_with_info_table': segments_vectors_with_info_table,
                'fields': fields,
            },
        ),
        transaction=str(transaction.transaction_id),
        title='YQL get_sites_vectors_with_info',
    )


def get_id_clusterid_path(id_type, date=date_helpers.get_today_date_string()):
    if id_type == 'yandexuid':
        return os.path.join(config.YANDEXUID_CLUSTERID_DIR, date)
    if id_type == 'crypta_id':
        return os.path.join(config.CRYPTAID_CLUSTERID_DIR, date)
