#!/usr/bin/env python
# -*- coding: utf-8 -*-

import datetime
import logging

from crypta.lib.python import templater
from crypta.lib.python.nirvana.nirvana_helpers.nirvana_transaction import NirvanaTransaction
from crypta.lib.python.yt import yt_helpers
from crypta.lookalike.lib.python.utils import utils
from crypta.lookalike.lib.python.utils.config import (
    config,
    environment,
)


logger = logging.getLogger(__name__)

select_apps_by_crypta_id = """
$apps_by_crypta_id = (
    SELECT
        apps,
        target_id AS crypta_id,
        apps.id_type AS id_type,
    FROM `{{app_by_devid_monthly}}` as apps
    INNER JOIN CONCAT(
        `{{matching_dir}}/idfa/crypta_id`,
        `{{matching_dir}}/gaid/crypta_id`,
    ) AS matching
    USING (id, id_type)
    {{additional_options}}
);

$apps_for_training = (
    SELECT
        'crypta_id' AS IdType,
        crypta_id AS IdValue,
        app || '__' || id_type AS GroupID,
        'app' AS segment_type,
        [id_type ?? 'other'] AS ad_types,
    FROM $apps_by_crypta_id
    FLATTEN LIST BY apps AS app
);
"""

select_rmp_goals_by_yuid_template = """
$latest_postback_log_table = (
    SELECT
        Path,
    FROM FOLDER(`{{postback_log_dir}}`)
    ORDER BY Path DESC
    LIMIT {{days_for_rmp_goals}}
);

$rmp_goals_with_ids = (
    SELECT
        postback.GoalID AS GoalID,
        GoogleAdID ?? '' AS gaid,
        IDFA ?? '' AS idfa,
        IDFV ?? '' AS ifv,
        OAID ?? '' AS oaid,
    FROM CONCAT($latest_postback_log_table) AS postback
    INNER JOIN `{{rmp_goals_table}}` AS rmp_goals
    USING (GoalID)
);

$rmp_goals_with_ids = (
    SELECT DISTINCT
        CASE
            WHEN gaid != '' THEN gaid
            WHEN idfa != '' THEN idfa
            WHEN ifv != '' THEN ifv
            ELSE oaid
        END AS id,
        CASE
            WHEN gaid != '' THEN 'gaid'
            WHEN idfa != '' THEN 'idfa'
            WHEN ifv != '' THEN 'ifv'
            ELSE 'oaid'
        END AS id_type,
        GoalID,
    FROM $rmp_goals_with_ids
);

$rmp_goals = (
    SELECT
        'crypta_id' AS IdType,
        target_id AS IdValue,
        CAST(GoalID AS String) AS GroupID,
        'rmp_goal' AS segment_type,
        ['rmp_goal'] AS ad_types,
    FROM  $rmp_goals_with_ids AS goals
    INNER JOIN
    CONCAT(
    {% for id_type in id_types %}
        `{{matcing_dir}}/{{id_type}}/crypta_id`,
    {% endfor %}
    ) AS matching
    USING(id, id_type)
);
"""

select_segments_query_template = """
PRAGMA AnsiInForEmptyOrNullableItemsCollections;

$crm_segments = (
    SELECT id AS SegmentID
    FROM `{{segments_simple_table}}`
    WHERE deleted == 0 AND content_type in ('crm', 'client_id', 'phone', 'email', 'idfa_gaid')
);

$segments_full_counts = (
    SELECT
        SegmentID,
        COUNT(*) AS ids_cnt,
    FROM `{{segments_full_table}}`
    GROUP BY SegmentID
    HAVING {{min_users_per_segment}} <= COUNT(*) AND COUNT(*) <= {{upper_limit_for_ids_in_segment}}
);

$goals_with_counts = (
    SELECT
        goal_id AS GroupID,
        COUNT(*) AS ids_cnt,
    FROM `{{goals_yandexuid_table}}`
    WHERE Cast(DateTime::MakeDate(DateTime::FromSeconds(CAST(-ts AS Uint32))) AS String) >= '{{last_date_for_goals}}'
    GROUP BY goal_id
    HAVING {{min_users_per_segment}} <= COUNT(*) AND COUNT(*) <= {{upper_limit_for_ids_in_segment}}
);

$metrika_with_counts = (
    SELECT
        segment_id AS GroupID,
        COUNT(*) AS ids_cnt,
    FROM `{{metrika_yandexuid_table}}`
    WHERE Cast(DateTime::MakeDate(DateTime::FromSeconds(CAST(ts AS Uint32))) AS String) >= '{{last_date_for_metrika}}'
    GROUP BY segment_id
    HAVING {{min_users_per_segment}} <= COUNT(*) AND COUNT(*) <= {{upper_limit_for_ids_in_segment}}
);

$selected_crm_segments = (
    SELECT
        segments_full.yandexuid AS yandexuid,
        segments_full.SegmentID + {{audience_ids_start}} AS GroupID,
        'audience' AS segment_type,
        ads_segments.ad_types as ad_types,
    FROM `{{segments_full_table}}` AS segments_full
    INNER JOIN $crm_segments AS crm_segments
    ON segments_full.SegmentID == crm_segments.SegmentID
    INNER JOIN $segments_full_counts AS segments_full_counts
    ON segments_full.SegmentID == segments_full_counts.SegmentID
    INNER JOIN `{{training_segments_table}}` AS ads_segments
    ON segments_full.SegmentID == ads_segments.segment_id
);

$ads_goals_with_counts = (
    SELECT
        all_goals.goal_id AS goal_id,
        all_goals.ad_types AS ad_types,
        all_goals.date_last_used AS date_last_used,
    FROM $goals_with_counts AS goals_with_counts
    INNER JOIN `{{training_goals_table}}` AS all_goals
    ON all_goals.goal_id == goals_with_counts.GroupID
);

$autobudget_goals = (
    SELECT
        goal_id,
        date_last_used,
        ad_types,
    FROM $ads_goals_with_counts
    WHERE 'autobudget_goal' IN ToSet(ad_types)
    ORDER BY date_last_used	DESC
    LIMIT {{max_goals_per_type}}
);

$ads_goals = (
    SELECT
        ads.goal_id AS goal_id,
        ads.date_last_used AS date_last_used,
        ads.ad_types AS ad_types,
    FROM $ads_goals_with_counts AS ads
    LEFT join $autobudget_goals AS ab
    USING (goal_id)
    WHERE ab.goal_id IS null AND ('multiplier_goal' IN ToSet(ads.ad_types) OR 'retargeting_goal' IN ToSet(ads.ad_types))
    ORDER BY date_last_used	DESC
    LIMIT {{max_goals_per_type}}
);

$meaningful_goals = (
    SELECT
        meaningful.goal_id AS goal_id,
        meaningful.date_last_used AS date_last_used,
        meaningful.ad_types AS ad_types,
    FROM $ads_goals_with_counts AS meaningful
    LEFT JOIN $autobudget_goals AS ab
    ON ab.goal_id == meaningful.goal_id
    LEFT JOIN $ads_goals AS ads
    ON ads.goal_id == meaningful.goal_id
    WHERE ab.goal_id is null AND ads.goal_id is null AND 'meaningful_goal' IN ToSet(meaningful.ad_types)
    ORDER BY date_last_used	DESC
    LIMIT {{max_goals_per_type}}
);

$all_selected_goals = (
    SELECT
        goal_id,
        ad_types,
    FROM $meaningful_goals
UNION ALL
    SELECT
        goal_id,
        ad_types,
    FROM $ads_goals
UNION ALL
    SELECT
        goal_id,
        ad_types,
    FROM $autobudget_goals
);

$selected_goals = (
    SELECT
        yandexuid,
        goals_table.goal_id AS GroupID,
        'goal' AS segment_type,
        all_goals.ad_types AS ad_types,
    FROM `{{goals_yandexuid_table}}` AS goals_table
    JOIN $goals_with_counts AS goals_with_counts
    ON goals_table.goal_id == goals_with_counts.GroupID
    INNER JOIN $all_selected_goals AS all_goals
    ON all_goals.goal_id == goals_table.goal_id
);

$selected_metrika = (
    SELECT
        yandexuid,
        segment_id AS GroupID,
        'metrika' AS segment_type,
    FROM `{{metrika_yandexuid_table}}` AS metrika_table
    INNER JOIN $metrika_with_counts AS metrika_with_counts
    ON metrika_table.segment_id == metrika_with_counts.GroupID
);

$segments_for_lal_by_yandexuid = (
    SELECT DISTINCT
        GroupID,
        yandexuid,
        segment_type,
        ad_types,
    FROM $selected_crm_segments
UNION ALL
    SELECT DISTINCT
        GroupID,
        yandexuid,
        segment_type,
        ad_types,
    FROM $selected_goals
UNION ALL
    SELECT DISTINCT
        GroupID,
        yandexuid,
        segment_type,
        ['metrika'] as ad_types,
    FROM $selected_metrika
);

$segments_for_lal_by_cryptaid = (
    SELECT * FROM $rmp_goals
UNION ALL
    SELECT * FROM $apps_for_training
);

$segments_for_lal_training = (
    SELECT
        CAST(all_segments.yandexuid AS String) AS IdValue,
        'yandexuid' AS IdType,
        CAST(all_segments.GroupID AS String) AS GroupID,
        all_segments.segment_type AS segment_type,
        all_segments.ad_types as ad_types,
    FROM $segments_for_lal_by_yandexuid AS all_segments
    INNER JOIN `{{user_data_table}}` VIEW raw AS user_data
    ON all_segments.yandexuid == CAST(user_data.yuid AS Uint64)
UNION ALL
    SELECT all_segments.*
    FROM $segments_for_lal_by_cryptaid AS all_segments
    INNER JOIN `{{user_data_by_cryptaid_table}}` VIEW raw AS user_data
    ON all_segments.IdValue == user_data.CryptaID
);

$min_users_dict = AsDict(
    AsTuple('app', {{min_users_per_app}}),
    AsTuple('audience', {{min_users_per_segment}}),
    AsTuple('goal', {{min_users_per_segment}}),
    AsTuple('metrika', {{min_users_per_segment}}),
    AsTuple('rmp_goal', {{min_users_per_rmp_goal}}),
);


$segments_counts = (
    SELECT
        GroupID,
        segment_type,
        ad_types,
        COUNT(*) AS ids_cnt,
    FROM $segments_for_lal_training
    GROUP BY GroupID, segment_type, ad_types
    HAVING COUNT(*) >= $min_users_dict[segment_type]
        AND COUNT(*) <= {{upper_limit_for_ids_in_segment}}
);

$segments_counts = (
    SELECT
        GroupID,
        segment_type,
        ad_types,
        ids_cnt,
        ROW_NUMBER() OVER w AS row_rank,
    FROM $segments_counts
    WINDOW w AS (
        PARTITION BY segment_type
        ORDER BY RANDOM(GroupID)
    )
);

INSERT INTO `{{segments_counts_table}}`
WITH TRUNCATE

SELECT
    GroupID,
    segment_type,
    ad_types,
    ids_cnt,
FROM $segments_counts
WHERE row_rank <= {{max_segments_per_type}}
ORDER BY GroupID;

INSERT INTO `{{output_table}}`
WITH TRUNCATE

SELECT
    segments_for_lal_training.*,
FROM $segments_for_lal_training AS segments_for_lal_training
INNER JOIN $segments_counts AS segments_counts
USING(GroupID)
WHERE segments_counts.row_rank <= {{max_segments_per_type}}
"""


def select(nv_params):
    yt_client = utils.get_yt_client(nv_params=nv_params)
    yql_client = utils.get_yql_client(nv_params=nv_params)

    current_date = str(datetime.date.today()) if environment.environment == 'production' else '2021-08-21'

    with NirvanaTransaction(yt_client) as transaction:
        select_apps_by_crypta_id_template = templater.render_template(
            template_text=select_apps_by_crypta_id,
            vars={
                'app_by_devid_monthly': config.APP_BY_DEVID_MONTHLY,
                'matching_dir': config.PUBLIC_MATCHING_DIRECTORY,
                'additional_options': '' if config.is_production else 'LIMIT 10000000',
            },
        )
        get_rmp_goals_query = templater.render_template(
            template_text=select_rmp_goals_by_yuid_template,
            vars={
                'id_types': config.ID_TYPES,
                'matcing_dir': config.PUBLIC_MATCHING_DIRECTORY,
                'postback_log_dir': config.POSTBACK_MOBILE_LOG,
                'rmp_goals_table': config.MOBILE_GOALS_TRACKER_TABLE,
                'days_for_rmp_goals': config.DAYS_FOR_RMP_GOALS,
            },
        )
        select_segments_query = templater.render_template(
            template_text=select_segments_query_template,
            vars={
                'upper_limit_for_ids_in_segment': config.UPPER_LIMIT_FOR_IDS_IN_SEGMENT,
                'min_users_per_segment': config.MIN_USERS_PER_SEGMENT,
                'min_users_per_rmp_goal': config.MIN_USERS_PER_RMP_GOAL,
                'min_users_per_app': config.MIN_USERS_PER_APP,
                'max_segments_per_type': config.MAX_SEGMENTS_PER_TYPE,
                'max_goals_per_type': config.MAX_GOALS_PER_TYPE,
                'last_date_for_goals': utils.get_date_from_past(current_date=current_date, months=6),
                'last_date_for_metrika': utils.get_date_from_past(current_date=current_date, months=1),
                'audience_ids_start': config.AUDIENCE_SEGMENTS_IDS_START,
                'segments_simple_table': config.AUDIENCE_SEGMENTS_INFO_TABLE,
                'segments_full_table': config.AUDIENCE_SEGMENTS_USERS_TABLE,
                'goals_yandexuid_table': config.GOALS_TABLE,
                'metrika_yandexuid_table': config.METRIKA_TABLE,
                'user_data_table': config.USER_DATA_TABLE,
                'user_data_by_cryptaid_table': config.USER_DATA_BY_CRYPTAID_TABLE,
                'training_goals_table': config.TRAINING_GOALS_TABLE,
                'training_segments_table': config.TRAINING_SEGMENTS_TABLE,
                'segments_counts_table': config.SEGMENTS_WITH_COUNTS_TABLE,
                'output_table': config.SEGMENTS_FOR_LAL_TRAINING_TABLE,
            },
        )

        yql_client.execute(
            query='{}\n{}\n{}'.format(
                select_apps_by_crypta_id_template, get_rmp_goals_query, select_segments_query,
            ),
            transaction=str(transaction.transaction_id),
            title='YQL Get segments and apps for training',
        )

        yt_helpers.write_stats_to_yt(
            yt_client=yt_client,
            table_path=config.DATALENS_LOOKALIKE_COUNTS_TABLE,
            data_to_write={
                'counter_name': 'train_val_rows',
                'count': yt_client.row_count(config.SEGMENTS_FOR_LAL_TRAINING_TABLE),
            },
            schema={
                'counter_name': 'string',
                'count': 'uint64',
            },
        )
