PRAGMA yt.OptimizeFor = "scan";
PRAGMA AnsiInForEmptyOrNullableItemsCollections;

$date_start = '2019-05-01';
$date_end   = CAST(CurrentUtcDate() - DateTime::IntervalFromDays(2) AS String);

$bad_domains = (
    'yastatic.net',
    'svk-native.ru',
    'ampproject.org',
    'ampproject.net',
    'appo.click',
    'mobileappgroup.site',
    'googleapis.com',
    'doubleclick.net',
    'googlesyndication.com',
    'hhcdn.ru',
    ''
);

$internal_domains = (
    'yandex.ru',
    'yandex.kz',
    'yandex.by',
    'yandex.ua',
    'yandex.uz',
    'yandex.com.tr',
    'yandex.fr',
    'yandex.tm',
    'yandex.ee',
    'yandex.md',
    'yandex.com.ge',
    'yandex.tj',
    'yandex.com.am',
    'yandex.lv',
    'yandex.co.il',
    'yandex.lt',
    'yandex.az',
    'yandex.kg',
    'yandex.net',
    'yandex.com',
    'yandex-team.ru',
    'yandex',
    ''
);

$get_real_domain_root = ($site_version, $d_domain_root, $s_domain_root) -> {
    RETURN
        CASE
            WHEN NVL($site_version IN ('turbo', 'turbo_desktop'), False)    THEN $s_domain_root
            WHEN NVL($d_domain_root, '') IN $bad_domains                    THEN $s_domain_root
            WHEN NVL($d_domain_root, '') IN $internal_domains               THEN $s_domain_root
            ELSE $d_domain_root
        END;
};

$bs_rtb_dsp_stats = (
    SELECT
        report_date,
        domain_root,
        is_turbo,
        device_type,
        -- Для суммарных метрик (Plain и RTB)
        SUM_IF(d.cost_total_rub_wo_nds, d.dsp_id NOT IN (1, 5, 10)) AS tech_revenue,
        SUM_IF(d.block_shows, d.dsp_id NOT IN (1, 5, 10))           AS block_shows,
        -- Для отдельных метрик по RTB
        SUM(d.cost_total_rub_wo_nds)                                AS rtb_tech_revenue,
        SUM(d.block_shows)                                          AS rtb_block_shows,
        SUM(d.auctions)                                             AS rtb_auctions,
        SUM_IF(d.auctions, d.dsp_id NOT IN (5, 10))                 AS rtb_auctions_sold
        -- Todo:
        -- fraud
        -- commerce
    FROM
        `//home/comdep-analytics/chaos-ad/logs/bs/rtb_dsp_stats/all` AS d
        LEFT JOIN
        `//home/comdep-analytics/YAN/segmentation/latest/segments` AS s
        ON d.page_id == s.page_id
        LEFT JOIN
        `//home/comdep-analytics/chaos-ad/pi/v2/dicts_joined/latest/blocks` AS b
        ON (
            d.page_id == b.page_id AND
            d.block_id == b.block_id
        )
    WHERE
            s.is_page_external
        AND NOT s.is_page_app
        AND d.report_date BETWEEN $date_start AND $date_end
    GROUP BY
        d.report_date                                                       AS report_date,
        $get_real_domain_root(b.site_version, d.domain_root, s.domain_root) AS domain_root,
        NVL(b.site_version IN ('turbo', 'turbo_desktop'), False)            AS is_turbo,
        d.device_type                                                       AS device_type
);

$bs_direct_block_stats = (
    SELECT
        report_date,
        domain_root,
        is_turbo,
        device_type,
        -- Для суммарных метрик (Plain и RTB)
        SUM(d.cost_total)       AS tech_revenue,
        SUM(d.block_shows)      AS block_shows,
        -- Для отдельных метрик по Директа
        SUM(d.cost_total)       AS direct_tech_revenue,
        SUM(d.fraud_cost_total) AS direct_fraud_tech_revenue,
        SUM(d.block_shows)      AS direct_block_shows,
        SUM(d.ad_shows)         AS direct_ad_shows,
        SUM(d.ad_clicks)        AS direct_ad_clicks
    FROM
        `//home/comdep-analytics/chaos-ad/logs/bs/direct_block_stats/all` AS d
        LEFT JOIN
        `//home/comdep-analytics/YAN/segmentation/latest/segments` AS s
        ON d.page_id == s.page_id
        LEFT JOIN
        `//home/comdep-analytics/chaos-ad/pi/v2/dicts_joined/latest/blocks` AS b
        ON (
            d.page_id == b.page_id AND 
            d.block_id == b.block_id
        )
    WHERE
            s.is_page_external
        AND NOT s.is_page_app
        AND d.report_date BETWEEN $date_start AND $date_end
    GROUP BY
        d.report_date                                                       AS report_date,
        $get_real_domain_root(b.site_version, d.domain_root, s.domain_root) AS domain_root,
        NVL(b.site_version IN ('turbo', 'turbo_desktop'), False)            AS is_turbo,
        d.device_type                                                       AS device_type
);

$bs_stats = (
    SELECT
        report_date,
        domain_root,
        is_turbo,
        device_type,
        SUM(tech_revenue)               AS tech_revenue,
        SUM(block_shows)                AS block_shows,
        SUM(rtb_tech_revenue)           AS rtb_tech_revenue,
        SUM(rtb_block_shows)            AS rtb_block_shows,
        SUM(rtb_auctions)               AS rtb_auctions,
        SUM(rtb_auctions_sold)          AS rtb_auctions_sold,
        SUM(direct_tech_revenue)        AS direct_tech_revenue,
        SUM(direct_fraud_tech_revenue)  AS direct_fraud_tech_revenue,
        SUM(direct_block_shows)         AS direct_block_shows,
        SUM(direct_ad_shows)            AS direct_ad_shows,
        SUM(direct_ad_clicks)           AS direct_ad_clicks
    FROM (
        SELECT * FROM $bs_rtb_dsp_stats
        UNION ALL
        SELECT * FROM $bs_direct_block_stats
        )
    WHERE
        NVL(domain_root, '') NOT IN $internal_domains
    GROUP BY
        report_date,
        domain_root,
        is_turbo,
        device_type
);

$watch_log = (
    SELECT
        report_date,
        domain_root,
        is_turbo,
        device_type,
        SUM(pageviews)          AS pageviews,
        SUM(pageviews_ads)      AS pageviews_ads,
        SUM(pageviews_metrika)  AS pageviews_metrika
    FROM `//home/comdep-analytics/chaos-ad/logs/metrika/watch_log/domain_stats/all`
    WHERE
            report_date BETWEEN $date_start AND $date_end
        AND NVL(domain_root, '') != ''
    GROUP BY
        report_date,
        domain_root,
        is_turbo,
        device_type
);

$clickgen = (
    SELECT
        report_date,
        domain_root,
        False AS is_turbo,
        device_type,
        SUM(clicks)                         AS clickgen_all_clicks,
        SUM_IF(clicks, network == 'yandex') AS clickgen_yan_clicks
    FROM
        `//home/comdep-analytics/chaos-ad/logs/bs/clickgen_stats/all`
    WHERE
        report_date BETWEEN $date_start AND $date_end
    GROUP BY
        report_date,
        domain_root,
        device_type
);

$has_metrika_start = 'has_metrika.' || $date_start;
$has_metrika_end   = 'has_metrika.' || $date_end;

$has_metrika = (
    SELECT
        domain_root,
        SUM(has_metrika) / CAST(COUNT(*) AS Double) >= 0.99  AS has_metrika
    FROM RANGE(`//home/metrika/export/code`, $has_metrika_start, $has_metrika_end)
    GROUP BY
        domain  AS domain_root
);

$len = (DateTime::ToDays(Cast($date_end AS Date) - Cast($date_start AS Date)) ?? 0) + 1;
$range_list = ListFromRange(0, $len, 1);
$range_date = ListMap($range_list, ($x) -> { RETURN Cast(Cast($date_start AS Date) + DateTime::IntervalFromDays(Cast($x AS Int16)) AS String); });

$domains = (
    SELECT DISTINCT
        domain_root
    FROM
        $bs_stats
);

$tree = (
    SELECT
        report_date,
        domain_root,
        is_turbo,
        device_type
    FROM (
        SELECT
            $range_date         AS report_date,
            domain_root         AS domain_root,
            AsList(true, false) AS is_turbo,
            AsList(3, 4, 5)     AS device_type
        FROM
            $domains
        ) FLATTEN BY (report_date, is_turbo, device_type)
);

$report = (
    SELECT
        UNWRAP(t.report_date)                                           AS report_date,
        UNWRAP(t.domain_root)                                           AS domain_root,
        UNWRAP(t.is_turbo)                                              AS is_turbo,
        UNWRAP(t.device_type)                                           AS device_type,
        NVL(h.has_metrika, False)                                       AS has_metrika,
        CAST(NVL(w.pageviews,                   0) AS Int64)            AS pageviews,
        CAST(NVL(w.pageviews_ads,               0) AS Int64)            AS pageviews_ads,
        CAST(NVL(w.pageviews_metrika,           0) AS Int64)            AS pageviews_metrika,
        CAST(NVL(b.tech_revenue,                0) AS Float)            AS tech_revenue,
        CAST(NVL(c.clickgen_all_clicks,         0) AS Int64)            AS clickgen_all_clicks,
        CAST(NVL(c.clickgen_yan_clicks,         0) AS Int64)            AS clickgen_yan_clicks,
        CAST(NVL(b.block_shows,                 0) AS Int64)            AS block_shows,
        CAST(NVL(b.rtb_tech_revenue,            0) AS Float)            AS rtb_tech_revenue,
        CAST(NVL(b.rtb_block_shows,             0) AS Int64)            AS rtb_block_shows,
        CAST(NVL(b.rtb_auctions,                0) AS Int64)            AS rtb_auctions,
        CAST(NVL(b.rtb_auctions_sold,           0) AS Int64)            AS rtb_auctions_sold,
        CAST(NVL(b.direct_tech_revenue,         0) AS Float)            AS direct_tech_revenue,
        CAST(NVL(b.direct_fraud_tech_revenue,   0) AS Float)            AS direct_fraud_tech_revenue,
        CAST(NVL(b.direct_block_shows,          0) AS Int64)            AS direct_block_shows,
        CAST(NVL(b.direct_ad_shows,             0) AS Int64)            AS direct_ad_shows,
        CAST(NVL(b.direct_ad_clicks,            0) AS Int64)            AS direct_ad_clicks
    FROM
        $tree AS t
        LEFT JOIN
        $bs_stats AS b
        ON (
            t.report_date == b.report_date AND
            t.domain_root == b.domain_root AND
            t.is_turbo    == b.is_turbo    AND
            t.device_type == b.device_type
        )
        LEFT JOIN
        $watch_log AS w
        ON (
            t.report_date == w.report_date AND
            t.domain_root == w.domain_root AND
            t.is_turbo    == w.is_turbo    AND
            t.device_type == w.device_type
        )
        LEFT JOIN
        $clickgen AS c
        ON (
            t.report_date == c.report_date AND
            t.domain_root == c.domain_root AND
            t.is_turbo    == c.is_turbo    AND
            t.device_type == c.device_type
        )
        LEFT JOIN
        $has_metrika AS h
        ON t.domain_root == h.domain_root
);

$report_w_lag = (
    SELECT
        report_date,
        domain_root,
        is_turbo,
        device_type,
        has_metrika,
        pageviews,
        pageviews_ads,
        pageviews_metrika,
        tech_revenue,
        clickgen_all_clicks,
        clickgen_yan_clicks,
        block_shows,
        rtb_tech_revenue,
        rtb_block_shows,
        rtb_auctions,
        rtb_auctions_sold,
        direct_tech_revenue,
        direct_fraud_tech_revenue,
        direct_block_shows,
        direct_ad_shows,
        direct_ad_clicks,
        LAG(pageviews,      7) OVER w AS pageviews_lag7,
        LAG(tech_revenue,   7) OVER w AS tech_revenue_lag7,
        LAG(pageviews,     28) OVER w AS pageviews_lag28,
        LAG(tech_revenue,  28) OVER w AS tech_revenue_lag28,
        LAG(pageviews,    364) OVER w AS pageviews_lag364,
        LAG(tech_revenue, 364) OVER w AS tech_revenue_lag364
    FROM
        $report
    WINDOW w AS (
        PARTITION BY
            domain_root,
            is_turbo,
            device_type
        ORDER BY
            report_date
        )
);

INSERT INTO `//home/vipplanners/yanpartners/all/monitoring/domains_dataset` WITH TRUNCATE
SELECT
    report_date,
    domain_root,
    is_turbo,
    device_type,
    has_metrika,
    -- Необходимо для того, чтобы формулы в DataLens работали корректно
    IF(pageviews                 = 0, NULL, pageviews)                   AS pageviews,
    IF(pageviews_ads             = 0, NULL, pageviews_ads)               AS pageviews_ads,
    IF(pageviews_metrika         = 0, NULL, pageviews_metrika)           AS pageviews_metrika,
    IF(tech_revenue              = 0, NULL, tech_revenue)                AS tech_revenue,
    IF(clickgen_all_clicks       = 0, NULL, clickgen_all_clicks)         AS clickgen_all_clicks,
    IF(clickgen_yan_clicks       = 0, NULL, clickgen_yan_clicks)         AS clickgen_yan_clicks,
    IF(block_shows               = 0, NULL, block_shows)                 AS block_shows,
    IF(rtb_tech_revenue          = 0, NULL, rtb_tech_revenue)            AS rtb_tech_revenue,
    IF(rtb_block_shows           = 0, NULL, rtb_block_shows)             AS rtb_block_shows,
    IF(rtb_auctions              = 0, NULL, rtb_auctions)                AS rtb_auctions,
    IF(rtb_auctions_sold         = 0, NULL, rtb_auctions_sold)           AS rtb_auctions_sold,
    IF(direct_tech_revenue       = 0, NULL, direct_tech_revenue)         AS direct_tech_revenue,
    IF(direct_fraud_tech_revenue = 0, NULL, direct_fraud_tech_revenue)   AS direct_fraud_tech_revenue,
    IF(direct_block_shows        = 0, NULL, direct_block_shows)          AS direct_block_shows,
    IF(direct_ad_shows           = 0, NULL, direct_ad_shows)             AS direct_ad_shows,
    IF(direct_ad_clicks          = 0, NULL, direct_ad_clicks)            AS direct_ad_clicks,
    IF(pageviews_lag7            = 0, NULL, pageviews_lag7)              AS pageviews_lag7,
    IF(pageviews_lag28           = 0, NULL, pageviews_lag28)             AS pageviews_lag28,
    IF(pageviews_lag364          = 0, NULL, pageviews_lag364)            AS pageviews_lag364,
    IF(tech_revenue_lag7         = 0, NULL, tech_revenue_lag7)           AS tech_revenue_lag7,
    IF(tech_revenue_lag28        = 0, NULL, tech_revenue_lag28)          AS tech_revenue_lag28,
    IF(tech_revenue_lag364       = 0, NULL, tech_revenue_lag364)         AS tech_revenue_lag364
FROM
    $report_w_lag
ORDER BY
    report_date,
    domain_root,
    is_turbo,
    device_type;
