USE hahn;
PRAGMA yt.Pool = 'robot-webmaster';

-- ВАЖНО! Tаблицы используются для варки ачивки Турбо TurboBeta Антиспамом
-- Надо хранить не менее последних 56 таблиц

$input_folder = "home/search-functionality/mt_squeeze/v31/click_props";
$output_folder = "//home/webmaster/prod/checklist/turbo-clicks-stats";
$today = CurrentUtcDate();
$max_process_days_count = 30;
$date_parser = DateTime::Parse("%Y%m%d");
$format = DateTime::Format("%Y%m%d");
$cut_www_and_m_capture = Re2::Capture("^(www\\.|m\\.)?(.+)$");
$cut_www_and_m = ($s) -> { return $cut_www_and_m_capture($s)._2; };
$prepare_domain = ($url) -> { return String::ToLower($cut_www_and_m(Url::GetHost($url))) };
$basename = ($path) -> {
    $last_slash_pos = CAST(String::ReverseFind($path, "/") AS Uint64);
    RETURN String::Substring($path, $last_slash_pos + 1);
};
$days = (
    --Finding earliest unprocessed tabled
    SELECT AGGREGATE_LIST(`Date`)
    FROM (
        select DateTime::MakeTimestamp($date_parser($basename(I.Path))) as `Date`
        FROM FOLDER($input_folder) AS I
        LEFT ONLY JOIN FOLDER($output_folder) AS O ON $basename(I.Path) = $basename(O.Path)
        ORDER BY `Date` ASC
        LIMIT $max_process_days_count
    )
);

-- Functions
$is_not_dynamic_click = ($x) -> { RETURN $x.is_dynamic = false; };
$is_slider_swipe = ($x) -> {
    return
        $x.converted_path = "/snippet/turbo_snippet/slider/swipe"
        or
        $x.converted_path = "/web/item/slider/swipe"
    ;
};

$is_turbo_click = ($x) -> {
    $last_block = ListReverse(TryMember($x, 'baobab_blocks', null))[0];
    $is_turbo_click = $last_block.attrs['turbo'] == 'True';
    $is_turbo_click = coalesce($is_turbo_click, false) and $last_block.name != 'slider';

    return ($is_turbo_click and not $x.is_dynamic)
        or
        (
            $x.vars["preview"] = "true"
            and $x.converted_path not like "%sideblock%"
            and not $is_slider_swipe($x)
        );
};
$extract_url = ($click) -> {
    $docUrl = nvl($click.url, String::ToLower($click.baobab_blocks[0].attrs['documentUrl']));
    $is_turbo_url = Url::GetOwner($docUrl) = "yandex.ru" AND String::StartsWith(Url::GetPath($docUrl), "/turbo");
    $url = CASE
        WHEN $is_turbo_url
        THEN Url::Decode(Url::GetCGIParam($docUrl, "text"))
        ELSE $docUrl
    END;
    RETURN $url;
};

-- Data

$autoparsed_urls = (
    SELECT Yson::ConvertToString(Yson::ParseJson(value)['url']) as url
    FROM `//home/turborss/production/yt_pull_job/history_static`
    WHERE action = "modify" AND Yson::ConvertToString(meta['source']) = "autoparser"
);

DEFINE ACTION $calculate_turbo_clicks($day) as
    $input = $input_folder || "/" || $format($day);
    $output = $output_folder || "/"|| $format($day);

    $raw_clicks = (
        SELECT result.clicks AS clicks
        FROM $input
        FLATTEN BY mainResults AS result
        WHERE (ui == 'touch' OR ui == 'mobileapp') AND result.url != ''
    );

    $processed_clicks = (
        SELECT $extract_url(c) AS url, $is_turbo_click(c) AS is_turbo
        FROM $raw_clicks
        FLATTEN BY clicks AS c
        WHERE $is_turbo_click(c) OR $is_not_dynamic_click(c)
    );
    $clicks_by_url = (
        SELECT
            url,
            COUNT_IF(is_turbo) AS turbo_clicks,
            COUNT(*) AS total_clicks
        FROM $processed_clicks
        WHERE Url::Parse(url).ParseError is null
        GROUP BY url
    );

    $clicks_with_autoparser_info = (
        SELECT
            L.url AS url,
            L.turbo_clicks AS turbo_clicks,
            L.total_clicks AS total_clicks,
            (R.url IS NOT NULL) AS is_autoparsed
        FROM $clicks_by_url AS L
        LEFT JOIN $autoparsed_urls AS R
        ON L.url = R.url
    );

    $top_urls_without_turbo = (
        select domain,
                url,
                MAX(total_clicks) as total_clicks FROM $clicks_with_autoparser_info
        where turbo_clicks = 0 and String::ToLower(Url::GetPath(url)) not like '%.pdf'
        group by $prepare_domain(url) AS domain, COALESCE(Url::GetTail(url),"/") as url
    );
    $select_list_of_turbo = (
     select domain,
        ListMap(
                TOP_BY(AsStruct(url as url, total_clicks as total_clicks), total_clicks, 100),
            ($x) -> { RETURN Yson::SerializeText(Yson::From(AsStruct($x.url as url,$x.total_clicks as clicks))); }
        ) AS top_urls_without_turbo
      from $top_urls_without_turbo
      group by domain
    );
   $total_clicks_stat = (
        SELECT
            domain,
            SUM(turbo_clicks) AS turbo_clicks,
            SUM(total_clicks) AS total_clicks,
            COALESCE(SUM_IF(turbo_clicks, is_autoparsed), 0) AS autoparsed_clicks,
        FROM $clicks_with_autoparser_info
            GROUP BY $prepare_domain(url) AS domain
            HAVING SUM(turbo_clicks) > 0
    );
    INSERT INTO $output WITH TRUNCATE
    SELECT
        tcs.domain as domain,
        tcs.turbo_clicks as turbo_clicks,

        tcs.total_clicks as total_clicks,
        tcs.autoparsed_clicks as autoparsed_clicks,
        IF(tl.top_urls_without_turbo is not null, tl.top_urls_without_turbo, ListCreate(YSON))as top_urls_without_turbo
    FROM $total_clicks_stat as tcs
        left join $select_list_of_turbo as tl on tcs.domain = tl.domain
    ORDER BY domain;
END DEFINE ;



EVALUATE FOR $day in $days
    DO $calculate_turbo_clicks($day)
else
    DO EMPTY_ACTION();


