/**
    Перенесено из https://nirvana.yandex-team.ru/flow/6d8f7569-984b-4b21-8867-5bb9f4514f9f
    см.также WMC-5926, WMCSUPPORT-2754
    вопросы по mt_squeeze можно задавать в соотв.чате в TG
 */
USE hahn;
PRAGMA yt.InferSchema;
PRAGMA yt.Pool = 'robot-webmaster';
$input_folder = "home/search-functionality/mt_squeeze/v31/click_props";
$today = CurrentUtcDate();
$days_count = 7;
$output_folder = "//home/webmaster/prod/checklist/turbo-clicks";
$date_parser = DateTime::Parse("%Y%m%d");
$format = DateTime::Format("%Y%m%d");
$cut_www_and_m_capture = Re2::Capture("^(www\\.|m\\.)?(.+)$");
$cut_www_and_m = ($s) -> { return $cut_www_and_m_capture($s)._2; };
$prepare_domain = ($url) -> { return String::ToLower($cut_www_and_m(Url::GetHost($url))) };
$basename = ($path) -> {
    $last_slash_pos = CAST(String::ReverseFind($path, "/") AS Uint64);
    RETURN String::Substring($path, $last_slash_pos + 1);
};
$last_input_table = (
    SELECT Path FROM FOLDER($input_folder)
    ORDER BY Path DESC
    LIMIT 1
);
DISCARD SELECT ENSURE(
    0,
    CurrentUtcDate() <= (DateTime::MakeTimestamp($date_parser($basename($last_input_table))) + 5 * DateTime::IntervalFromDays(1)),
    "The last input table -- " || Unwrap($last_input_table) || " -- is too old"
);
$last_day_to_output_table_name = ($last_day) -> {
    $day_to = DateTime::MakeTimestamp($date_parser($last_day)) + DateTime::IntervalFromDays(1);
    $day_from = $day_to - $days_count * DateTime::IntervalFromDays(1);
    RETURN $format($day_from) || "_" || $format($day_to);
};
$day_to = (
    -- Finding latest unprocessed table
    SELECT DateTime::MakeTimestamp($date_parser($basename(I.Path))) AS `Date`
    FROM (SELECT Path AS Path FROM FOLDER($input_folder) ORDER BY Path DESC LIMIT 5) AS I
    LEFT ONLY JOIN FOLDER($output_folder) AS O
    ON $last_day_to_output_table_name($basename(I.Path)) = $basename(O.Path)
    ORDER BY `Date` ASC
    LIMIT 1
);
$day_from = $day_to - ($days_count - 1) * DateTime::IntervalFromDays(1);
$paths = (
    SELECT ListMap(ListFromRange(0, $days_count), ($index) -> {
        $day = $day_from + $index * DateTime::IntervalFromDays(1);
        $path = $input_folder || "/" || $format($day);
        RETURN Unwrap($path);
    })
);
$input = (SELECT * FROM EACH($paths));
$output_table = $output_folder || "/"
    || $format($day_from)
    || "_"
    || $format($day_to + DateTime::IntervalFromDays(1));
-- Functions
$is_not_dynamic_click = ($x) -> { RETURN $x.is_dynamic = false; };
$is_slider_swipe = ($x) -> {
    return
        $x.converted_path = "/snippet/turbo_snippet/slider/swipe"
        or
        $x.converted_path = "/web/item/slider/swipe"
    ;
};
$is_turbo_click = ($x) -> {
    $last_block = ListReverse(TryMember($x, 'baobab_blocks', null))[0];
    $is_turbo_click = $last_block.attrs['turbo'] == 'True';
    $is_turbo_click = coalesce($is_turbo_click, false) and $last_block.name != 'slider';
    return ($is_turbo_click and not $x.is_dynamic)
        or
        (
            $x.vars["preview"] = "true"
            and $x.converted_path not like "%sideblock%"
            and not $is_slider_swipe($x)
        );
};
$extract_url = ($click) -> {
    $docUrl = nvl($click.url, String::ToLower($click.baobab_blocks[0].attrs['documentUrl']));
    $is_turbo_url = Url::GetOwner($docUrl) = "yandex.ru" AND String::StartsWith(Url::GetPath($docUrl), "/turbo");
    $url = CASE
        WHEN $is_turbo_url
        THEN Url::Decode(Url::GetCGIParam($docUrl, "text"))
        ELSE $docUrl
    END;
    RETURN $url;
};
-- Data
$raw_clicks = (
    SELECT result.clicks AS clicks
    FROM $input
    FLATTEN BY mainResults AS result
    WHERE (ui == 'touch' OR ui == 'mobileapp') AND result.url != ''
);
$processed_clicks = (
    SELECT $extract_url(c) AS url, $is_turbo_click(c) AS is_turbo
    FROM $raw_clicks
    FLATTEN BY clicks AS c
    WHERE $is_turbo_click(c) OR $is_not_dynamic_click(c)
);
$clicks_by_url = (
    SELECT
        url,
        COUNT_IF(is_turbo) AS turbo_clicks,
        COUNT(*) AS total_clicks
    FROM $processed_clicks
    WHERE Url::Parse(url).ParseError is null and String::ToLower(Url::GetPath(url)) not like '%.pdf'
    GROUP BY url
);
$autoparsed_urls = (
    SELECT Yson::ConvertToString(Yson::ParseJson(value)['url']) as url
    FROM `//home/turborss/production/yt_pull_job/history_static`
    WHERE action = "modify" AND Yson::ConvertToString(meta['source']) = "autoparser"
);
$clicks_with_autoparser_info = (
    SELECT
        L.url AS url,
        L.turbo_clicks AS turbo_clicks,
        L.total_clicks AS total_clicks,
        (R.url IS NOT NULL) AS is_autoparsed
    FROM $clicks_by_url AS L
    LEFT JOIN $autoparsed_urls AS R
    ON L.url = R.url
);
DEFINE ACTION $check($table) AS
DISCARD SELECT Ensure(
    True,
    COUNT(*) > 0,
    "Expected " || CAST($days_count AS String) || " non-empty tables"
) FROM $table;
END DEFINE;
DEFINE ACTION $process_table() AS
    -- Precondition
    EVALUATE FOR $i in $paths DO $check($i);
    INSERT INTO $output_table WITH TRUNCATE
    SELECT
        domain,
        SUM(turbo_clicks) AS turbo_clicks,
        SUM(total_clicks) AS total_clicks,
        COALESCE(SUM_IF(turbo_clicks, is_autoparsed), 0) AS autoparsed_clicks,
        ListUniq(
            ListMap(
                ListFilter(
                    TOP_BY(AsTuple(url, turbo_clicks == 0), IF(turbo_clicks == 0, total_clicks, 0), 100),
                    ($x) -> { RETURN $x.1; }
                ),
                ($x) -> { RETURN COALESCE(Url::GetTail($x.0), '/'); }
            )
        ) AS top_urls_without_turbo
    FROM $clicks_with_autoparser_info
    GROUP BY $prepare_domain(url) AS domain
    HAVING SUM(turbo_clicks) > 0
    ORDER BY domain;
END DEFINE;
EVALUATE IF $day_to IS NOT NULL
DO $process_table();
