import json
import os

from crypta.lib.python import templater
from crypta.lib.python.nirvana.nirvana_helpers.nirvana_transaction import NirvanaTransaction
from crypta.profile.lib.socdem_helpers import socdem_config
import crypta.profile.lib.socdem_helpers.tools.features as features_utils
from crypta.profile.lib.socdem_helpers.train_utils.train_helper import get_flat_cat_features_dict
from crypta.profile.utils.config import config


categorical_features_matching_query_template = """
PRAGMA AnsiInForEmptyOrNullableItemsCollections;

$dict_keys_to_uint_list = ($feature) -> (
    ListMap(
        DictKeys(Yson::ConvertToUint64Dict($feature)),
        ($x) -> (CAST($x AS UINT64)))
    );

$profiles_segments = (
    SELECT
        yandexuid,
        Yson::ConvertToUint64List(heuristic_common) AS heuristic_common_list,
        Yson::ConvertToUint64List(longterm_interests) AS longterm_interests_list,
    FROM `{{merged_segments_table}}`
);

{% for feature_type, feature_type_parameters in features.items() %}
${{feature_type}}_ids = (
    SELECT DISTINCT {{feature_type}}
    FROM $profiles_segments
    FLATTEN BY {{feature_type}}_list AS {{feature_type}}
    {% if feature_type_parameters['white_list'] %}
    WHERE {{feature_type}} IN ({{feature_type_parameters['white_list']}})
    {% endif %}
);

INSERT INTO `{{feature_type_parameters['output_path']}}`
WITH TRUNCATE

SELECT
    {{feature_type}} AS feature,
    ROW_NUMBER() OVER w AS feature_index,
FROM ${{feature_type}}_ids
WINDOW w AS (ORDER BY {{feature_type}})
ORDER BY feature_index;

{% endfor %}

INSERT INTO `{{top_hosts_output}}`
WITH TRUNCATE

SELECT
    host AS feature,
    ROW_NUMBER() OVER w AS feature_index,
FROM `{{metrics_idf_table}}`
WINDOW w AS (ORDER BY count DESC)
ORDER BY feature_index
LIMIT {{top_hosts_number}};
"""


def get_table_path_for_feature(feature, folder=config.PRESTABLE_CATEGORICAL_FEATURES_MATCHING_DIR):
    return os.path.join(folder, feature)


def get(yt_client, yql_client, date, output_dict):
    with NirvanaTransaction(yt_client) as transaction:
        query = templater.render_template(
            categorical_features_matching_query_template,
            vars={
                'features': {
                    'heuristic_common': {
                        'white_list': ', '.join(map(str, socdem_config.heuristic_common_white_list)),
                        'output_path': get_table_path_for_feature('heuristic_common'),
                    },
                    'longterm_interests': {
                        'white_list': None,
                        'output_path': get_table_path_for_feature('longterm_interests'),
                    },
                },
                'top_hosts_number': socdem_config.TOP_HOSTS_NUMBER,
                'metrics_idf_table': config.YANDEXUID_METRICS_IDF_TABLE,
                'merged_segments_table': config.SEGMENTS_STORAGE_BY_YANDEXUID_TABLE,
                'top_hosts_output': get_table_path_for_feature('raw_site_weights'),
            },
        )
        yql_client.execute(
            query=query,
            transaction=str(transaction.transaction_id),
            title='YQL calculate categorical features matching',
        )

        for feature in features_utils.cat_feature_types:
            yt_client.set_attribute(get_table_path_for_feature(feature), 'generate_date', date)

        cat_features_dicts, _ = features_utils.download_cat_features_to_dict(
            yt=yt_client,
            yt_folder_path=config.PRESTABLE_CATEGORICAL_FEATURES_MATCHING_DIR,
            is_mobile=False,
        )
        flat_features_dict = get_flat_cat_features_dict(cat_features_dicts, features_utils.cat_feature_types)

        with open(output_dict, 'w') as dict_file:
            json.dump(flat_features_dict, dict_file)
