PRAGMA DisableSimpleColumns;

$rtb_path = '{{ rtb_path }}';
$appmetrica_path = '{{ appmetrica_path }}';
$output_pairs = '{{ output_pairs }}';
$output_stats = '{{ output_stats }}';
$timestamp = {{ ts }};
$date = '{{ date }}';
$device_threshold = {{ device_threshold }};


$params = AsList({% for sspid, features in params %} AsTuple('{{ sspid }}', ToSet({{features}})), {% endfor %});


$ssp_data = (
    SELECT
        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "1028")) as userid,
        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "478")) as width,
        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "477")) as height,
        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "334")) as lang,

        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "1033")) as country,
        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "905")) as city,
        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "906")) as region,

        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "1029")) as ppi,
        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "1032")) as last_bootup,
        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "1031")) as disk_space,
        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "1030")) as battery,

        Url::Decode(DictLookup(Dsv::Parse(queryargs, "&"), "336")) as carrier,
        sspid,
        String::AsciiToUpper(idfa) AS idfa,
        String::AsciiToUpper(ifv) AS ifv,
        useragent,
        UserAgent::Parse(useragent).OSVersion AS os_version,
        clientip6,
        uniqid, uniqidsource,
    FROM
        $rtb_path
    WHERE
        CAST(sspid AS uint64) > 10000
        AND UserAgent::Parse(useragent).OSFamily == "iOS"
        AND clientip6 != "::"
);

$ssp_data =
    SELECT
        *
    FROM
        $ssp_data
    WHERE userid IS NOT NULL AND userid != "0" AND userid != ""
;

$app_data = (
    SELECT
        CryptaID,
        DeviceID,
        OriginalDeviceID AS idfa,
        ClientIP,
        IFV,
        AppID,
        OperatorID,
        OperatorName,
        ScreenDPI,
        Locale,
        ScreenHeight,
        ScreenWidth,
        OSVersion,
    FROM
        $appmetrica_path
    WHERE
        AppPlatform == "iOS"
        AND DeviceID is not NULL
        AND APIKey == 13
        AND Ip::ToString(ClientIP) != "::"
);

$is_not_empty = ($value) -> {
    RETURN CAST($value AS String) != '' AND $value IS NOT NULL;
};

$first_part = ($str, $sep) -> {
    RETURN ListHead(String::SplitToList($str, $sep));
};

$last_part = ($str, $sep) -> {
    RETURN ListLast(String::SplitToList($str, $sep));
};

$canonize_carrier_to_op_id = ($str) -> {
    RETURN
      CAST($last_part($last_part($last_part($str, "250"), "-"), "_") AS int64);
};

$canonize_carrier_to_op_name = ($str) -> {
    RETURN IF($canonize_carrier_to_op_id($str) IS NULL, $str);
};

$canonize_lang = ($str) -> {
    RETURN $first_part($first_part($str, "_"), "-");
};

$canonize_screen_sizes = ($w, $h) -> {
    RETURN CAST(MIN_OF($w,$h) AS String) || "_" || CAST(MAX_OF($w, $h) AS String);
};

$round_ip = ($ip) -> {
    $list = String::SplitToList($ip, ".");
    $n = ListLength($list);
    RETURN IF($n > 1, String::JoinFromList(ListTake($list, $n-1), ".") || ".0", null);
};

$convert_features_to_fp = ($all_features, $features) -> {
    RETURN String::JoinFromList(
        ListMap(ListSort(DictItems(SetIntersection(
            ToDict($all_features),
            $features,
            ($k, $a, $b) -> ($a)
            ))),
            ($item) -> ($item.1)
    ), "_");
};

$app_features =
    SELECT
        idfa,
        IFV,
        AppID,
        OSVersion,
        DeviceID,
        AsList(
            AsTuple("ip", Ip::ToString(ClientIP)),
            AsTuple("ip.0", $round_ip(Ip::ToString(ClientIP))),
            AsTuple("os_v", OSVersion),
            AsTuple("size", $canonize_screen_sizes(CAST(ScreenHeight AS String), CAST(ScreenWidth AS String))),
            AsTuple("lang", ListHead(String::SplitToList(Locale, "_"))),
            AsTuple("ppi", CAST(ScreenDPI AS String)),
            AsTuple("op_name", OperatorName),
            AsTuple("op_id", CAST(OperatorID AS String)),
        ) AS fp_all_features,
    FROM $app_data
;


DEFINE ACTION $compute_fp_pairs_with_stats($sspid, $features) AS

    $_app_fp =
        SELECT
            $convert_features_to_fp(fp_all_features, $features) AS fp,
            idfa,
            OSVersion,
            DeviceID,
            AppID,
            IFV AS ifv,
        FROM $app_features
    ;

    $_app_fp =
        SELECT
            idfa,
            fp,
            DeviceID AS id,
            IdType::MM_DEVICE_ID() AS id_type,
            DeviceID,
            HyperLogLog(DeviceID) OVER w AS ids_cnt_per_fp,
            COUNT(*) OVER w AS fp_size,
            HyperLogLog(idfa) OVER w AS idfa_cnt,
            Identifiers::IsSignificantIdfa(idfa) AS with_idfa
        FROM $_app_fp
        WINDOW w AS (
            PARTITION BY fp
        )
    ;

    $app_fp =
        SELECT
            fp, id, id_type, idfa,
            HyperLogLog(DeviceID) AS DeviceID_cnt,
            COUNT(*) AS cnt,
            SOME(idfa_cnt) AS idfa_cnt,
            SOME(ids_cnt_per_fp) AS ids_cnt_per_fp,
            SOME(TableRow()) AS row,
            MAX(Identifiers::IsSignificantIdfa(idfa)) AS with_idfa,

            SOME(ids_cnt_per_fp) < $device_threshold AS with_fp
        FROM $_app_fp
        GROUP BY (fp, id, id_type, idfa)
    ;

    $app_fp_with_limits =
        SELECT
            *
        FROM $app_fp
        WHERE with_fp
    ;

    $ssp_data =
        SELECT
            *
        FROM $ssp_data
        WHERE sspid == $sspid
    ;

    $ssp_features =
        SELECT
            idfa,
            userid,
            uniqid, uniqidsource, ifv,
            UserAgent::Parse(useragent).OSVersion AS OSVersion,
            AsList(
                AsTuple("ip", CAST(clientip6 AS String)),
                AsTuple("ip.0", $round_ip(clientip6)),
                AsTuple("os_v", CAST(os_version AS String)),
                AsTuple("size", $canonize_screen_sizes(height, width)),
                AsTuple("lang", $canonize_lang(lang)),
                AsTuple("ppi", ppi),
                AsTuple("op_name", $canonize_carrier_to_op_name(carrier)),
                AsTuple("op_id", CAST($canonize_carrier_to_op_id(carrier) AS String)),
            ) AS fp_all_features,
        FROM $ssp_data
    ;

    $_ssp_fp =
        SELECT
            $convert_features_to_fp(fp_all_features, $features) AS fp,
            idfa,
            userid AS id,
            IdType::SSP_USER_ID() AS id_type,
            uniqid, uniqidsource, ifv,
        FROM $ssp_features
    ;

    $_ssp_fp =
        SELECT
            idfa,
            fp,
            id, id_type,
            uniqid,
            HyperLogLog(id) OVER w AS ids_cnt_per_fp,
            COUNT(*) OVER w AS fp_size,
            HyperLogLog(idfa) OVER w AS idfa_cnt,
            Identifiers::IsSignificantIdfa(idfa) AS with_idfa
        FROM $_ssp_fp
        WINDOW w AS (
            PARTITION BY fp
        )
    ;

    $ssp_fp =
        SELECT
            fp,
            id, id_type,
            idfa,
            COUNT(*) AS cnt,
            SOME(idfa_cnt) AS idfa_cnt,
            MAX(Identifiers::IsSignificantIdfa(idfa)) AS with_idfa,
            SOME(ids_cnt_per_fp) AS ids_cnt_per_fp,

            SOME(ids_cnt_per_fp) < $device_threshold AS with_fp
        FROM $_ssp_fp
        GROUP BY (fp, id, id_type, idfa)
    ;

    $ssp_fp_with_limits =
        SELECT
            *
        FROM $ssp_fp
        WHERE with_fp
    ;

    $ssp_cross_app =
        SELECT
            ssp.fp AS fp,
            ssp.id AS ssp_id,
            ssp.id_type AS ssp_id_type,
            app.id AS app_id,
            app.id_type AS app_id_type,

            ssp.idfa AS ssp_idfa,
            app.idfa As app_idfa,

            ssp.with_idfa AND app.with_idfa AS with_idfa,
            ssp.with_idfa AND app.with_idfa AND ssp.idfa == app.idfa AS true_idfa
        FROM $ssp_fp_with_limits AS ssp
        INNER JOIN $app_fp_with_limits AS app
        USING (fp)
    ;

    $ssp_cross_app_by_idfa =
        SELECT
            ssp.idfa AS idfa,
            ssp.id AS ssp_id,
            ssp.id_type AS ssp_id_type,
            app.id AS app_id,
            app.id_type AS app_id_type,

            ssp.fp AS ssp_fp,
            app.fp As app_fp,

            ssp.with_fp ANd app.with_fp AS with_fp,
            ssp.with_fp AND app.with_fp AND ssp.fp == app.fp AS true_fp,
            ssp.fp == app.fp AS _true_fp,
        FROM (SELECT * FROM $ssp_fp WHERE with_idfa) AS ssp
        INNER JOIN (SELECT * FROM $app_fp WHERE with_idfa) AS app
        USING (idfa)
    ;

    $pairs_distinct =
        SELECT
            ssp_id,
            ssp_id_type,
            app_id,
            app_id_type,

            MAX(with_idfa) AS with_idfa,
            MAX(true_idfa) AS true_idfa,

            MAX_BY(fp, AsTuple(true_idfa, fp)) AS fp,
            MAX_BY(ssp_idfa, AsTuple(true_idfa, ssp_idfa)) AS ssp_idfa,
            MAX_BY(app_idfa, AsTuple(true_idfa, app_idfa)) AS app_idfa
        FROM $ssp_cross_app
        GROUP BY (ssp_id, ssp_id_type, app_id, app_id_type)
    ;

    $pairs_by_idfa_distinct =
        SELECT
            ssp_id,
            ssp_id_type,
            app_id,
            app_id_type,

            MAX(with_fp) AS with_fp,
            MAX(true_fp) AS true_fp,
            MAX(_true_fp) AS _true_fp
        FROM $ssp_cross_app_by_idfa
        GROUP BY (ssp_id, ssp_id_type, app_id, app_id_type)
    ;

    $ssp_fp_rate = SELECT 1.-1.*SUM_IF(1./idfa_cnt, idfa_cnt > 1) / SUM(1./idfa_cnt) FROM $ssp_fp_with_limits;
    $app_fp_rate = SELECT 1.-1.*SUM_IF(1./idfa_cnt, idfa_cnt > 1) / SUM(1./idfa_cnt) FROM $app_fp_with_limits;

    $fp_type = String::JoinFromList(ListSort(DictKeys($features)), ",");
    INSERT INTO @pairs
        SELECT
            fp,
            ssp_id AS id1,
            ssp_id_type AS id1Type,
            app_id AS id2,
            app_id_type AS id2Type,

            $sspid AS sspid,
            ssp_idfa AS idfa1,
            app_idfa AS idfa2,
            with_idfa AS withIdfa,
            true_idfa AS trueIdfa,
            $timestamp AS firstTimestamp,
            $timestamp AS lastTimestamp,
            1 AS cnt,

            AsDict(AsTuple($fp_type, 1)) AS fpTypes,
            $fp_type AS lastFpType

        FROM $pairs_distinct
    ;

    $pairs_cnt = SELECT COUNT(*) FROM $pairs_distinct;
    $pairs_with_idfa_cnt = SELECT COUNT_IF(with_idfa) FROM $pairs_distinct;
    $pairs_true_idfa_cnt = SELECT COUNT_IF(true_idfa) FROM $pairs_distinct;

    $idfa_pairs_cnt = SELECT COUNT(*) FROM $pairs_by_idfa_distinct;
    $idfa_pairs_with_fp_cnt = SELECT COUNT_IF(with_fp) FROM $pairs_by_idfa_distinct;
    $idfa_pairs_true_fp_cnt = SELECT COUNT_IF(true_fp) FROM $pairs_by_idfa_distinct;
    $idfa_pairs_true_fp_without_limits_cnt = SELECT COUNT_IF(_true_fp) FROM $pairs_by_idfa_distinct;


    INSERT INTO @stats
    SELECT
        $sspid AS sspid,
        $date AS ts,

        $ssp_fp_rate AS fp_rate_ssp,
        $app_fp_rate As fp_rate_app,

        $pairs_cnt AS total_cnt,
        1.*$pairs_with_idfa_cnt / $pairs_cnt AS with_idfa_rate,
        $pairs_with_idfa_cnt AS pairs_with_idfa_cnt,
        $pairs_true_idfa_cnt AS pairs_true_idfa_cnt,  -- must be same as $idfa_pairs_true_fp_cnt
        $idfa_pairs_true_fp_cnt AS idfa_pairs_true_fp_cnt,
        $idfa_pairs_with_fp_cnt AS idfa_pairs_with_fp_cnt,
        $idfa_pairs_true_fp_without_limits_cnt AS idfa_pairs_true_fp_without_limits_cnt,
        1.*$idfa_pairs_true_fp_cnt / $idfa_pairs_true_fp_without_limits_cnt AS limit_rate,
        $idfa_pairs_cnt AS idfa_pairs_cnt,

        1.*$pairs_true_idfa_cnt / $pairs_with_idfa_cnt AS precision,
        1.*$idfa_pairs_true_fp_cnt / $idfa_pairs_cnt AS recall,
        1.*$idfa_pairs_true_fp_without_limits_cnt / $idfa_pairs_cnt AS available_recall,

        DictKeys($features) AS features,

END DEFINE ;


EVALUATE FOR $item IN $params DO BEGIN
    $sspid = $item.0;
    $features = $item.1;
    DO $compute_fp_pairs_with_stats($sspid, $features);
    COMMIT;
    END DO;
COMMIT;

INSERT INTO $output_stats WITH TRUNCATE
SELECT * FROM @stats;

INSERT INTO $output_pairs WITH TRUNCATE
SELECT * FROM @pairs;
