import os
from datetime import timedelta

from crypta.lib.python import templater
from crypta.lib.python.nirvana.nirvana_helpers.nirvana_transaction import NirvanaTransaction
from crypta.lib.python.yt import yt_helpers
from crypta.prism.lib.config import config
from crypta.prism.lib.nirvana import utils
from crypta.prism.services.offline_weighting.lib import table_paths


cluster_stats_query = '''
PRAGMA DisableAnsiInForEmptyOrNullableItemsCollections;
PRAGMA yt.DataSizePerJob('100m');

$end_date_iso = '{{ date }}';
$start_date_iso = CAST(CAST($end_date_iso AS Date) - DateTime::IntervalFromDays({{ stats_aggregation_period }} - 1) AS String);

$icookie_segment = (
    SELECT
        CAST(cluster AS String) AS cluster,
        icookie,
    FROM `{{ clusters_table }}`
);

$revenue_table = (
    SELECT
        revenue,
        icookie,
        abc_id,
        shows,
        clicks,
    FROM RANGE(
        `{{ statbox_event_money_dir }}`,
        $start_date_iso,
        $end_date_iso,
    )
    WHERE
        fraudbits == 0
        AND placeid IN (542, 1542)
);

$icookie_searches = (
    SELECT
        SUBSTRING(key, 1) AS icookie,
        COUNT(*) AS searches,
    FROM RANGE(
        `{{ user_sessions_pub.dir }}`,
        $start_date_iso,
        $end_date_iso,
        `{{ user_sessions_pub.path_in_dir }}`,
    )
    WHERE SUBSTRING(key, 0, 1) == 'y' -- key is yandexuid
    GROUP BY key
);

$icookie_revenue = (
    SELECT
        SUM(revenue) ?? 0. AS revenue,
        SUM_IF(revenue, abc_id == {{ main_serp_abc_id }}) ?? 0. AS serp_revenue,
        SUM_IF(revenue, abc_id == {{ rsya_abc_id }}) ?? 0. AS rsya_revenue,
        SUM(shows) ?? 0 AS shows,
        SUM_IF(shows, abc_id == {{ main_serp_abc_id }}) ?? 0 AS serp_shows,
        SUM_IF(shows, abc_id == {{ rsya_abc_id }}) ?? 0 AS rsya_shows,
        SUM(clicks) ?? 0 AS clicks,
        SUM_IF(clicks, abc_id == {{ main_serp_abc_id }}) ?? 0 AS serp_clicks,
        SUM_IF(clicks, abc_id == {{ rsya_abc_id }}) ?? 0 AS rsya_clicks,
        CAST(icookie AS String) AS icookie,
    FROM $revenue_table
    GROUP BY icookie
);

$all_uids = (
    SELECT DISTINCT icookie
    FROM (
        SELECT icookie FROM $icookie_revenue
        UNION ALL
        SELECT icookie FROM $icookie_searches
        UNION ALL
        SELECT icookie FROM $icookie_segment
    )
);

$icookie_segment_with_unmatched = (
    SELECT
        all_uids.icookie AS icookie,
        segment.cluster AS cluster,
    FROM $all_uids AS all_uids
    LEFT JOIN $icookie_segment AS segment
    USING (icookie)
);

$revenue_segment_searches = (
    SELECT
        revenue_segment.*,
        searches.searches ?? 0 AS searches,
    FROM (
        SELECT
            revenue.*,
            segment.icookie AS robust_icookie,
            segment.cluster ?? 'Unknown' AS cluster,
        FROM $icookie_segment_with_unmatched AS segment
        LEFT JOIN $icookie_revenue AS revenue
        USING(icookie)
    ) AS revenue_segment
    LEFT JOIN $icookie_searches AS searches
    ON revenue_segment.robust_icookie == searches.icookie
);

$platform_metrics = (
    SELECT
        AVG(1000. * revenue / MAX_OF(searches, 1)) AS CPT_avg,
        1000 * SUM(revenue) / SUM(searches) AS CPT_sum,
        AVG(1000. * serp_revenue / MAX_OF(searches, 1)) AS serp_CPT_avg,
        1000 * SUM(serp_revenue) / SUM(searches) AS serp_CPT_sum,
        AVG(1000. * rsya_revenue / MAX_OF(searches, 1)) AS rsya_CPT_avg,
        1000 * SUM(rsya_revenue) / SUM(searches) AS rsya_CPT_sum,

        AVG(1000. * revenue / MAX_OF(shows, 1)) AS CPM_avg,
        1000 * SUM(revenue) / SUM(shows) AS CPM_sum,
        AVG(1000. * serp_revenue / MAX_OF(serp_shows, 1)) AS serp_CPM_avg,
        1000 * SUM(serp_revenue) / SUM(serp_shows) AS serp_CPM_sum,
        AVG(1000. * rsya_revenue / MAX_OF(rsya_shows, 1)) AS rsya_CPM_avg,
        1000 * SUM(rsya_revenue) / SUM(rsya_shows) AS rsya_CPM_sum,

        AVG(revenue / MAX_OF(clicks, 1)) AS CPC_avg,
        SUM(revenue) / SUM(clicks) AS CPC_sum,
        AVG(serp_revenue / MAX_OF(serp_clicks, 1)) AS serp_CPC_avg,
        SUM(serp_revenue) / SUM(serp_clicks) AS serp_CPC_sum,
        AVG(rsya_revenue / MAX_OF(rsya_clicks, 1)) AS rsya_CPC_avg,
        SUM(rsya_revenue) / SUM(rsya_clicks) AS rsya_CPC_sum,

        COUNT_IF(searches IS NOT NULL AND searches > 0) AS active_search_users,
        SUM(revenue) AS revenue_sum,
        SUM(revenue) * 1. / COUNT_IF(searches IS NOT NULL AND searches > 0) AS revenue_per_active_search_user,
        SUM(serp_revenue) AS serp_revenue_sum,
        SUM(rsya_revenue) AS rsya_revenue_sum,
        SUM(searches) AS total_searches,
        COUNT(*) AS total_uids,
        SUM(serp_revenue) * 1. / COUNT_IF(searches IS NOT NULL AND searches > 0) AS serp_revenue_per_active_search_user,
        SUM(rsya_revenue) * 1. / COUNT_IF(searches IS NOT NULL AND searches > 0) AS rsya_revenue_per_active_search_user,
        COUNT_IF(searches IS NOT NULL AND searches > 0) * 1. / COUNT(*) AS active_users_share,
        SUM(serp_clicks) AS serp_clicks,
        SUM(rsya_clicks) AS rsya_clicks,
        SUM(serp_shows) AS serp_shows,
        SUM(serp_shows) * 1. / SUM(searches) AS serp_ShowPSearch_sum,
        SUM(serp_clicks) * 1. / SUM(serp_shows) AS serp_ClickPShow_sum,

        cluster,
    FROM $revenue_segment_searches
    GROUP BY cluster
);

INSERT INTO `{{ cluster_stats_output }}` WITH TRUNCATE
SELECT * FROM $platform_metrics;
'''


def calculate(
    yql_client,
    transaction,
    date,
    cluster_stats_output,
    clusters_table=None,
    statbox_event_money_dir=config.STATBOX_PRODUCT_MONEY_DIR,
    user_sessions_pub=config.USER_SESSIONS_NANO_BY_DATE_TABLE,
    stats_aggregation_period=config.CLUSTER_STATS_AGGREGATION_PERIOD_DAYS,
):
    clusters_table = clusters_table or os.path.join(config.CLUSTERS_DIR, date)

    user_sessions_pub_dir, user_sessions_pub_path_in_dir = user_sessions_pub.split('/{}/')
    yql_client.execute(
        templater.render_template(
            cluster_stats_query,
            vars={
                'clusters_table': clusters_table,
                'cluster_stats_output': cluster_stats_output,
                'statbox_event_money_dir': statbox_event_money_dir,
                'date': date,
                'user_sessions_pub': {
                    'dir': user_sessions_pub_dir,
                    'path_in_dir': user_sessions_pub_path_in_dir,
                },
                'stats_aggregation_period': stats_aggregation_period,
                'main_serp_abc_id': config.MAIN_SERP_ABC_ID,
                'rsya_abc_id': config.RSYA_ABC_ID,
            },
        ),
        title='YQL Prism calculate cluster stats',
        transaction=str(transaction.transaction_id),
    )


def calculate_by_date(yt_client, yql_client, date, nv_output_table_file=None, custom_output_dir=None):
    with NirvanaTransaction(yt_client) as transaction:
        tables = table_paths.resolve(custom_output_dir, date)

        calculate(
            yql_client=yql_client,
            transaction=transaction,
            cluster_stats_output=tables['cluster_stats'],
            clusters_table=tables['clusters'],
            date=date,
        )

        if custom_output_dir is None:
            yt_client.set_attribute(tables['cluster_stats'], 'stage', 'full')
            yt_helpers.set_ttl(tables['cluster_stats'], timedelta(days=config.OFFLINE_WEIGHTING_OUTPUT_TTL_DAYS), yt_client=yt_client)

        if nv_output_table_file is not None:
            utils.make_text_output(nv_output_table_file, tables['cluster_stats'])
