import bisect
import collections
import logging

from crypta.lib.python import (
    templater,
    time_utils,
)


ID_TYPE_COUNT_QUERY_TEMPLATE = """
$types =
SELECT
    String::SplitToList(key, ":")[0] AS id_type
FROM `{{db_path}}`;

SELECT
    id_type,
    COUNT(*) AS count
FROM $types
GROUP BY id_type;
"""

TOUCHED_DAYS_AGO_QUERY_TEMPLATE = """
PRAGMA File("libcrypta_cm_udf.so", '{{crypta_cm_udf_url}}');
PRAGMA Udf = "libcrypta_cm_udf.so";

$get_touched_days_ago = ($key, $value) -> {
    RETURN ({{now_ts}} - CAST(CryptaCm::ParseMatch($key, $value).Touch AS Int64)) / 86400;
};

SELECT
    touched_days_ago,
    COUNT(*) AS count,
FROM `{{db_path}}`
WHERE `key` NOT REGEXP("^(yandexuid|icookie):")
GROUP BY
    $get_touched_days_ago(key, value) AS touched_days_ago
;
"""

DAYS_AGO_BUCKETS = range(0, 36) + [50, 100, 500, 999999]

TTL_DAYS_BY_TYPE_TEMPLATE = """
PRAGMA File("libcrypta_cm_udf.so", '{{crypta_cm_udf_url}}');
PRAGMA Udf = "libcrypta_cm_udf.so";

$get_type = ($key) -> {
    RETURN String::SplitToList($key, ":", 1 AS Limit)[0];
};

$get_ttl_days = ($key, $value) -> {
    RETURN CAST(CryptaCm::ParseMatch($key, $value).Ttl AS Int64) / 86400;
};

SELECT
    type,
    ttl_days,
    COUNT(*) AS `count`,
FROM `{{db_path}}`
WHERE `key` NOT REGEXP("^(yandexuid|icookie):")
GROUP BY
    $get_type(key) AS type,
    $get_ttl_days(key, value) AS ttl_days
;
"""

logger = logging.getLogger(__name__)


def upper_bound_for_rounded_float(array, x):
    assert x < array[-1]
    index = bisect.bisect_right(array, x)
    return array[index]


def group_to_buckets(buckets, measurements):
    ret = collections.defaultdict(int)

    for key, value in measurements.iteritems():
        bucket = str(upper_bound_for_rounded_float(buckets, int(key)))
        ret[bucket] += value

    return ret


def _get_count_stats(yt_client, yql_client, query_text):
    with yt_client.Transaction() as tx:
        result = yql_client.execute(query_text, transaction=tx.transaction_id)
        yql_result_table = result[0]
        yql_result_table.fetch_full_data()
        return yql_result_table


def _get_simple_count_stats(yt_client, yql_client, query_text):
    logger.info("\n%s", query_text)

    yql_result_table = _get_count_stats(yt_client, yql_client, query_text)

    return {key: int(count) for key, count in yql_result_table.rows}


def _get_multi_column_count_stats(yt_client, yql_client, query_text):
    logger.info("\n%s", query_text)

    yql_result_table = _get_count_stats(yt_client, yql_client, query_text)

    column_names = tuple(col[0] for col in yql_result_table.columns)
    return tuple(dict(zip(column_names, row)) for row in yql_result_table.rows)


def get_id_type_count(yt_client, yql_client, db_path):
    query_text = templater.render_template(ID_TYPE_COUNT_QUERY_TEMPLATE, dict(db_path=db_path))
    return _get_simple_count_stats(yt_client, yql_client, query_text)


def get_touched_days_ago(yt_client, yql_client, db_path, crypta_cm_udf_url):
    query_text = templater.render_template(TOUCHED_DAYS_AGO_QUERY_TEMPLATE, dict(
        now_ts=time_utils.get_current_time(),
        db_path=db_path,
        crypta_cm_udf_url=crypta_cm_udf_url,
    ))
    days_ago_count = _get_simple_count_stats(yt_client, yql_client, query_text)
    return group_to_buckets(DAYS_AGO_BUCKETS, days_ago_count)


def get_ttl_days_by_type(yt_client, yql_client, db_path, crypta_cm_udf_url):
    query_text = templater.render_template(TTL_DAYS_BY_TYPE_TEMPLATE, dict(
        now_ts=time_utils.get_current_time(),
        db_path=db_path,
        crypta_cm_udf_url=crypta_cm_udf_url,
    ))
    return _get_multi_column_count_stats(yt_client, yql_client, query_text)
