USE hahn;

-- responsible: yelmurat@
-- ticket: https://st.yandex-team.ru/PODB-21237
-- todo:
-- 1. add $start_date and $end_date
-- 2. add $arc_revision
-- 3. is unquote_url_python necessary? Why do we need it?


-- DECLARE $yt_folder_path AS String;
-- DECLARE $start_date AS String;
-- DECLARE $end_date AS String;
-- DECLARE $arc_revision;
$version = 'v1_7';

$start_date = "2021-01-01";
-- $today = SELECT CAST(CurrentUtcDate() as String);
$end_date = SELECT CAST(CurrentUtcDate() as String);

$use_fast_dump = %%USE_FAST_DUMP%%;

$yt_table_path = '%%YT_TABLE_PATH%%';
$output_path = $yt_table_path || '_' || $version;

PRAGMA library("default_library.sql", 'arc://analytics/unifav/dashboard/default_library.sql?rev=%%REVISION%%');
IMPORT `default_library` SYMBOLS
	$string_date_to_timestamp,

    $get_cards,
    $get_redir_logs;


$is_bad_cgi = ($cgi) -> {
    $bad_cgi = AsList('ento');
    RETURN substring($cgi, null, Find($cgi, '=')) IN $bad_cgi;
};

$preprocess_url = ($url) -> {
    $decoded = String::ReplaceAll(Url::Decode(Url::Decode(Url::Decode(Url::Decode($url)))), 'yandex.com', 'yandex.ru');
    $parsed_url = Url::Parse($decoded);
    $query = $parsed_url.Query;
    $splitted_query = String::SplitToList($query, '&');
    $filtered_query = ListFilter($splitted_query, ($cgi) -> {RETURN NOT $is_bad_cgi($cgi)});
    $sorted_query = ListSort($filtered_query);
    $merged_query = String::JoinFromList($sorted_query, '&');
    $preprocessed = Url::CutWWW2($parsed_url.Host) || $parsed_url.Path || '?' || $merged_query;
    RETURN $preprocessed;
};


$redir_logs = PROCESS $get_redir_logs($start_date, $end_date);

$not_null_cardid_clicks =
SELECT
    `dict`["cardId"] as card_id,
    $string_date_to_timestamp(`dict`["iso_eventtime"]) as event_ts
FROM $redir_logs
WHERE
    `dict`["cardId"] IS NOT NULL
    AND (
        `dict`["path"] == "click.card"
        OR `dict`["path"] == "click.link_external"
        OR 
        (
            `dict`["path"] == "access"
            AND `dict`['pageName'] == "card"
        )
    )
;

$null_cardid_clicks =
SELECT
    `dict`["userId"] AS event_user_id,
    $string_date_to_timestamp(`dict`["iso_eventtime"]) AS event_ts,
    $preprocess_url(`dict`["href"]) as preprocessed_url
FROM $redir_logs AS redir_logs
WHERE
    `dict`["href"] IS NOT NULL
    AND `dict`["userId"] IS NOT NULL
    AND `dict`["cardId"] IS NULL
    AND (
        `dict`["path"] == "click.card"
        OR `dict`["path"] == "click.link_external"
        OR 
        (
            `dict`["path"] == "access"
            AND `dict`['pageName'] == "card"
        )
    )
;

$cards = 
SELECT
    cards.*,
    $preprocess_url(COALESCE(card_source_meta_page_url, card_content_source_url)) AS preprocessed_url
FROM $get_cards($start_date, $end_date, $use_fast_dump) AS cards
;


$null_cardid_clicks_joined_cardid =
SELECT
    card_id,    
    event_ts
FROM $null_cardid_clicks as null_cardid_clicks
    INNER JOIN $cards AS cards 
        ON cards.preprocessed_url == null_cardid_clicks.preprocessed_url 
            AND cards.card_owner_id == null_cardid_clicks.event_user_id
;


$total_clicks =
SELECT * FROM $not_null_cardid_clicks
UNION ALL
SELECT * FROM $null_cardid_clicks_joined_cardid
;


$total_clicks_grouped = 
SELECT
    card_id,
    MIN(event_ts) AS min_event_ts
FROM $total_clicks
GROUP BY
    card_id
;


$card_joined_clicks = 
SELECT
    cards.card_id AS card_id,
    card_service_created_at_ts AS card_service_created_at_ts,
    is_card_domain_market,
    card_dashboard_type,

    clicks.min_event_ts AS min_event_ts
FROM $cards AS cards 
    LEFT JOIN $total_clicks_grouped AS clicks
    ON clicks.card_id == cards.card_id
;


$deltas =
SELECT
    DateTime::MakeDate(card_service_created_at_ts) AS card_created_date, 
    IF(min_event_ts is not null, Datetime::ToDays(min_event_ts - card_service_created_at_ts), 999999) AS delta, 
    min_event_ts - card_service_created_at_ts AS delta_ts,
    card_id,
    is_card_domain_market,
    card_dashboard_type
FROM $card_joined_clicks
;


$returned_d =
SELECT
    card_created_date AS fielddate, 
    is_card_domain_market,
    card_dashboard_type,
    delta <= 1 as d_1,
    delta <= 7 as d_7,
    delta <= 14 as d_14,
    delta <= 30 as d_30,
    delta <= 60 as d_60,
    delta <= 90 as d_90,
    delta <= 120 as d_120,
    delta <= 180 as d_180
FROM $deltas
WHERE delta >= 0
;


$grouped_deltas =
SELECT
    fielddate,
    is_card_domain_market,
    card_dashboard_type,
    COUNT(*) AS cnt,
    avg(d_1) as d1,
    avg(d_7) as d7,
    avg(d_14) as d14,
    avg(d_30) as d30,
    avg(d_60) as d60,
    avg(d_90) as d90,
    avg(d_120) as d120,
    avg(d_180) as d180,

    sum(if(d_1, 1, 0)) as sd1,
    sum(if(d_7, 1, 0)) as sd7,
    sum(if(d_14, 1, 0)) as sd14,
    sum(if(d_30, 1, 0)) as sd30,
    sum(if(d_60, 1, 0)) as sd60,
    sum(if(d_90, 1, 0)) as sd90,
    sum(if(d_120, 1, 0)) as sd120,
    sum(if(d_180, 1, 0)) as sd180,
FROM $returned_d
GROUP BY
    COALESCE(CAST(fielddate as String), "") as fielddate,
    is_card_domain_market,
    card_dashboard_type
;

INSERT INTO $output_path WITH TRUNCATE
SELECT *
FROM $grouped_deltas
ORDER BY fielddate;


SELECT $output_path AS target_path;
