USE hahn;

PRAGMA File('libcrypta_identifier_udf.so', 'yt://hahn/home/crypta/public/udfs/libcrypta_identifier_udf.so');
PRAGMA Udf('libcrypta_identifier_udf.so');

$date = '{{ date }}';
$ts_date = SELECT DateTime::ToSeconds(Date('{{ date }}')) + 24*60*60;


$rtb_path = '//logs/bs-rtb-log/1h/' || $date || 'T14:00:00';

$app_fp_path = '//home/crypta/production/state/fingerprint/appmetrica/all_pairs';
$ssp_fp_path = '//home/crypta/production/state/fingerprint/ssp/all_pairs';


$page =
    SELECT
        CAST(PageID AS String) AS PageID,
        OptionsSsp,
        OptionsMobile,
        OptionsApp,
        Description
    FROM `home/yabs/dict/Page`
;

$extract_device_id = Re2::Capture('184=([a-zA-Z0-9-]*)');
$extract_ssp_user_id = Re2::Capture('1028=([a-zA-Z0-9-.]*)');


$rtb =
    SELECT
        pageid,
        devicetype,
        useragent,
        $extract_device_id(queryargs)._1 AS mm_device_id,
        if(CAST(sspid AS int64) > 10000 AND $extract_ssp_user_id(queryargs)._1 IS NOT NULL,
            sspid || '@' || $extract_ssp_user_id(queryargs)._1,
            NULL,
        ) AS ssp_user_id,
        sspid,
        Identifiers::IsSignificantIdfa(main.idfa) AS with_idfa,
        CASE
            WHEN p.OptionsSsp THEN 'ssp'
            WHEN p.OptionsMobile AND p.OptionsApp THEN 'sdk'
            WHEN CAST(main.devicetype AS int32) <= 4 THEN 'mobile_web'
            ELSE 'desktop_web'
        END AS trafficQualifier,
        p.Description AS description,
    FROM
        $rtb_path AS main
    LEFT JOIN --4. тип страницы где была открутка
        $page AS p
    ON
        main.pageid == p.PageID
    WHERE
        UserAgent::Parse(useragent).OSFamily = 'iOS'
;

DEFINE SUBQUERY  $count_stats($get_id_from_rtb, $get_fp_ids, $get_pairs, $trafficQualifier, $key, $span) AS
    $fp_ids = SELECT * FROM $get_fp_ids($get_pairs);

    $precisions =
        SELECT
            "0" AS SSPID,
            $frac(COUNT_IF(trueIdfa), COUNT_IF(withIdfa)) AS precision,
            $frac(COUNT_IF(totalTrueIdfa), COUNT_IF(withIdfa)) AS total_precision,
        FROM $fp_ids
        UNION ALL
        (
            -- slices with all sspid (for ssp)
            SELECT
                sspid AS SSPID,
                $frac(COUNT_IF(trueIdfa), COUNT_IF(withIdfa)) AS precision,
                $frac(COUNT_IF(totalTrueIdfa), COUNT_IF(withIdfa)) AS total_precision,
            FROM $fp_ids
            WHERE sspid IS NOT NULL
            GROUP BY sspid
        )
    ;

    $rtb_slice =
        SELECT
            $get_id_from_rtb(TableRow()) AS id,
            sspid,
            with_idfa
        FROM $rtb
        WHERE trafficQualifier == $trafficQualifier
    ;

    $log =
        SELECT
            rtb.id IS NOT NULL AS has_id,
            rtb.id IS NOT NULL AND fp.id IS NOT NULL AS has_id_fingerprint,
            rtb.id AS id,
            rtb.sspid AS sspid,
            rtb.with_idfa AS with_idfa,

            fp.id IS NOT NULL AND fp.withIdfa AS fp_with_idfa,
            fp.id IS NOT NULL AND fp.trueIdfa AS fp_true_idfa,
            fp.id IS NOT NULL AND fp.totalTrueIdfa AS fp_total_true_idfa,
        FROM $rtb_slice AS rtb
        LEFT JOIN ANY $fp_ids AS fp
        USING (id)
    ;

    $counts =
        SELECT
            sspid,
            IF(with_idfa, "with", "without") AS with_idfa,
            COUNT(*) AS total_cnt,
            COUNT_IF(has_id) AS has_id_cnt,
            COUNT_IF(has_id_fingerprint) AS has_id_fingerprint_cnt,
            COUNT_IF(fp_with_idfa) AS fp_with_idfa_cnt,
            COUNT_IF(fp_true_idfa) AS fp_true_idfa_cnt,
            COUNT_IF(fp_total_true_idfa) AS fp_total_true_idfa_cnt,
        FROM $log
        GROUP BY
            IF (
                $trafficQualifier == "sdk",
                "0",
                IF(CAST(sspid AS uint64) > 10000, sspid, NULL)
            ) AS sspid,
            with_idfa
        HAVING sspid IS NOT NULL
    ;

    $counts =
        SELECT
            *
        FROM $counts

        -- slices with all sspid (for ssp)
        UNION ALL
        SELECT
            "0" AS sspid,
            with_idfa,
            SUM(total_cnt) AS total_cnt,
            SUM(has_id_cnt) AS has_id_cnt,
            SUM(has_id_fingerprint_cnt) AS has_id_fingerprint_cnt,
            SUM(fp_with_idfa_cnt) AS fp_with_idfa_cnt,
            SUM(fp_true_idfa_cnt) AS fp_true_idfa_cnt,
            SUM(fp_total_true_idfa_cnt) AS fp_total_true_idfa_cnt,
        FROM $counts
        WHERE sspid != "0"
        GROUP BY (with_idfa)
    ;

    $counts_all =
        SELECT * FROM $counts
        UNION ALL
        -- with + without idfa
        (
            SELECT
                sspid,
                "all" AS with_idfa,
                SUM(total_cnt) AS total_cnt,
                SUM(has_id_cnt) AS has_id_cnt,
                SUM(has_id_fingerprint_cnt) AS has_id_fingerprint_cnt,
                SUM(fp_with_idfa_cnt) AS fp_with_idfa_cnt,
                SUM(fp_true_idfa_cnt) AS fp_true_idfa_cnt,
                SUM(fp_total_true_idfa_cnt) AS fp_total_true_idfa_cnt,
            FROM $counts
            GROUP BY sspid
        )
    ;

    $counts_all =
          SELECT
                  *
              FROM $counts_all AS res
          LEFT JOIN
          $precisions AS pr
          ON (pr.SSPID == res.sspid)
          WHERE pr.precision IS NOT NULL OR (res.has_id_cnt IS NOT NULL AND res.has_id_cnt > 0)
    ;

    SELECT
          $date AS fielddate,
          IF(sspid == "0", $trafficQualifier, "sspid_" || sspid) AS source,

          $key AS key,
          'ids' AS cnt_type,
          $span AS span,
          with_idfa AS with_idfa,

          total_cnt AS total,
          has_id_cnt AS real,
          $frac(has_id_cnt, total_cnt) AS data_rate,

          has_id_fingerprint_cnt  AS fp,
          IF(Math::IsFinite(precision), precision, 0.) AS precision,
          IF(Math::IsFinite(total_precision), total_precision, 0.) AS total_precision,

          $frac(fp_true_idfa_cnt, has_id_cnt) AS recall,
          $frac(fp_true_idfa_cnt, total_cnt) AS total_recall,

          $frac(has_id_fingerprint_cnt, has_id_cnt) AS coverage,
          $frac(has_id_fingerprint_cnt, total_cnt) AS total_coverage,
          0 AS true_idfa,
          0 AS with_cid,
          0 AS true_cid,
          0. AS cid_rate,
    FROM $counts_all


END DEFINE;

DEFINE SUBQUERY  $get_ssp_ids($get_pairs) AS
    $fingerprint_ssp_user_id =
        SELECT
            sspid || '@' || id1 AS id,
            MAX(withIdfa) AS withIdfa,
            MAX(trueIdfa) AS trueIdfa,
            (MAX(withIdfa) AND COUNT_IF(trueIdfa) == COUNT_IF(withIdfa)) AS totalTrueIdfa,
            sspid,
        FROM $get_pairs($ssp_fp_path)
        GROUP BY (sspid, id1)
    ;
    SELECT * FROM $fingerprint_ssp_user_id;
END DEFINE;

DEFINE SUBQUERY  $get_app_ids($get_pairs) AS
    $pairs = SELECT * FROM $get_pairs($app_fp_path);
    $fingerprint_mm_device_ids =
        SELECT
            id, id_type,
            MAX(withIdfa) AS withIdfa,
            MAX(trueIdfa) AS trueIdfa,
            (MAX(withIdfa) AND COUNT_IF(trueIdfa) == COUNT_IF(withIdfa)) AS totalTrueIdfa,
            NULL AS sspid,
        FROM (
            SELECT
                id1 AS id,
                id1Type AS id_type,
                withIdfa, trueIdfa
            FROM $pairs
            UNION ALL
            SELECT
                id2 AS id,
                id2Type AS id_type,
                withIdfa, trueIdfa
            FROM $pairs
        ) GROUP BY (id, id_type)
    ;
    SELECT * FROM $fingerprint_mm_device_ids;
END DEFINE;

$get_mm_device_id = ($row) -> {
    RETURN $row.mm_device_id;
};

$get_ssp_user_id = ($row) -> {
    RETURN $row.ssp_user_id;
};

$rtb_metrics =
    SELECT * FROM $count_stats($get_mm_device_id, $get_app_ids, $get_soup_pairs, "sdk", "soup", "total")
    UNION ALL
    SELECT * FROM $count_stats($get_mm_device_id, $get_app_ids, $get_soup_new_pairs, "sdk", "soup", "new")
    UNION ALL
    SELECT * FROM $count_stats($get_mm_device_id, $get_app_ids, $get_total_pairs, "sdk", "total", "total")
    UNION ALL
    SELECT * FROM $count_stats($get_mm_device_id, $get_app_ids, $get_new_pairs, "sdk", "total", "new")
    UNION ALL
    SELECT * FROM $count_stats($get_ssp_user_id, $get_ssp_ids, $get_soup_pairs, "ssp", "soup", "total")
    UNION ALL
    SELECT * FROM $count_stats($get_ssp_user_id, $get_ssp_ids, $get_soup_new_pairs, "ssp", "soup", "new")
    UNION ALL
    SELECT * FROM $count_stats($get_ssp_user_id, $get_ssp_ids, $get_total_pairs, "ssp", "total", "total")
    UNION ALL
    SELECT * FROM $count_stats($get_ssp_user_id, $get_ssp_ids, $get_new_pairs, "ssp", "total", "new")
;
