
$rlogin_raw = (
    SELECT login as rlogin, cast(WeakField(yuid, "String") AS Uint64) AS yandexuid
    FROM `//home/crypta/production/state/radius/metrics/{{ date }}/yuid_rlogin/yuid_rlogin_final_stable`
);

$rlogin = (
    SELECT DISTINCT rlogin, yandexuid
    FROM $rlogin_raw
);

$yandexuids = (
    SELECT DISTINCT yandexuid
    FROM $rlogin
);

$crypta = (
    select distinct
           crypta.crypta_id as cryptaid,
           crypta.yandexuid as yandexuid
    from $CRYPTA_RAW() as crypta
         inner join $yandexuids as yandexuids
             on (crypta.yandexuid == yandexuids.yandexuid)
);

$crypta_list = (
    select cryptaid,
           AGGREGATE_LIST(yandexuid) as yandexuids
    from $crypta
    group by cryptaid
);

$rlogin_list = (
    select rlogin,
           AGGREGATE_LIST(yandexuid) as yandexuids
    from $rlogin
    group by rlogin
);

$pairs = (
    select distinct
           rlogin.rlogin as rlogin,
           crypta.cryptaid as cryptaid
    from $rlogin as rlogin
         inner join $crypta as crypta
             on (rlogin.yandexuid == crypta.yandexuid)
);

$intersect = ($rlogin, $crypta) -> {
    $has = ($item, $checklist) -> {
        return case when ListHas($checklist, $item) then $item else null end;
    };
    $has_rlogin = ($item) -> {
        return $has($item, $rlogin);
    };
    $is_not_null = ($item) -> {
        return case when $item is Null then False else True end;
    };
    $all_results = ListMap($crypta, $has_rlogin);
    return ListFilter($all_results, $is_not_null);
};

-- Inner join $crypta_list: we don't need pessimistic metrics
$inner_matching = (
    select pairs.cryptaid as cryptaid,
           pairs.rlogin as rlogin,
           crypta_list.yandexuids as crypta_list,
           rlogin_list.yandexuids as rlogin_list,
           $intersect(rlogin_list.yandexuids, crypta_list.yandexuids) as intersect_list,
           ListUniq(ListExtend(rlogin_list.yandexuids, crypta_list.yandexuids)) as union_list
    from $pairs as pairs
         inner join $rlogin_list as rlogin_list
             on (rlogin_list.rlogin == pairs.rlogin)
         inner join $crypta_list as crypta_list
             on (crypta_list.cryptaid == pairs.cryptaid)
);

$not_found_matching = (
     select cast(null as string) as cryptaid,
            rlogin_list.rlogin as rlogin,
            ListCreate(UInt64) as crypta_list,
            rlogin_list.yandexuids as rlogin_list,
            ListCreate(UInt64) as intersect_list,
            rlogin_list.yandexuids as union_list
    from $rlogin_list as rlogin_list
        LEFT ONLY JOIN (
            SELECT DISTINCT rlogin from $pairs
        ) as has_crypta on (rlogin_list.rlogin == has_crypta.rlogin)
);

$matching = (
    select cryptaid, rlogin, crypta_list, rlogin_list, intersect_list, union_list
    from $inner_matching
    union all
    select cryptaid, rlogin, crypta_list, rlogin_list, intersect_list, union_list
    from $not_found_matching
);

-- select *
-- from $matching;

$rlogins_count = (
    select count(distinct rlogin) as rlogins_count
    from $rlogin_raw
);

$crypta_ids_count = (
    select count(*) as crypta_ids_count
    from $crypta_list
);

$matched_rlogins = (
    select count(distinct rlogin)
    from $inner_matching
    where ListLength(rlogin_list) > 1 or ListLength(crypta_list) > 1
);

$coverage = (
    select cast($matched_rlogins as double) / $rlogins_count
);

$similarity_intersect = (
    select count(distinct intersect_yandexuid)
    from $matching
    FLATTEN LIST BY intersect_list as intersect_yandexuid
);

$similarity_union = (
    select count(distinct union_yandexuid)
    from $matching
    FLATTEN LIST BY union_list as union_yandexuid
);

$similarity = (
    select cast($similarity_intersect as double) / $similarity_union
);

----- Precision/Recall common calculations -------------------------------------
$rlogin_intersections = (
    select rlogin,
            ListUniq(ListFlatMap(
                AGGREGATE_LIST(crypta_list), ($each) -> { RETURN $each; })) AS all_crypta_list
    from $matching
    group by rlogin
);

$spanning_tree_size = (
    select rlogin,
           cryptaid,
           max_of(ListLength(crypta_list) - 1., 0.) as crypta_length,
           max_of(ListLength(rlogin_list) - 1., 0.) as rlogin_length,
           max_of(ListLength(intersect_list) - 1., 0.) as intersect_length,
           max_of(ListLength(union_list) - 1., 0.) as union_length,
           max_of(ListLength(rlogin_intersect_list) - 1., 0.) as rlogin_intersect_length
    from (
        select matching.rlogin as rlogin,
               matching.cryptaid as cryptaid,
               matching.crypta_list as crypta_list,
               matching.rlogin_list as rlogin_list,
               matching.intersect_list as intersect_list,
               matching.union_list as union_list,
               $intersect(matching.rlogin_list,
                          rlogin_intersections.all_crypta_list) as rlogin_intersect_list
        from $matching as matching
             inner join $rlogin_intersections as rlogin_intersections
                 on (rlogin_intersections.rlogin == matching.rlogin)
    )
);

$precision_recall = (
    select rlogin,
           rlogin_length,
           case
               when sum(crypta_length) > 0
               then sum(intersect_length) / sum(crypta_length)
               else 0
           end as rlogin_precision,
           case
               when rlogin_length > 0
               then sum(intersect_length) / rlogin_length
               else 0
           end as rlogin_recall,
           case
               when sum(crypta_length) > 0
               then sum(intersect_length)
               else 0
           end as intersect_length,
           sum(crypta_length) as crypta_length
    from $spanning_tree_size
    where not (rlogin_intersect_length < 1 and rlogin_length < 1)
    group by rlogin, rlogin_length
);

----- Precision metrics --------------------------------------------------------
$precision_pessimistic = (
    select
        avg(rlogin_precision) as precision_mean_pes,
        median(rlogin_precision) as precision_median_pes,
        sum(intersect_length) / sum(crypta_length) as precision_cummulative_pes
    from $precision_recall
);

$precision_optimistic = (
    select
        avg(rlogin_precision) as precision_mean_opt,
        median(rlogin_precision) as precision_median_opt
    from $precision_recall
    where rlogin_precision > 0
);

$precision_mean_opt = (
    select precision_mean_opt
    from $precision_optimistic
);

$precision_median_opt = (
    select precision_median_opt
    from $precision_optimistic
);

----- Recall metrics --------------------------------------------------------
$recall_optimistic = (
    select avg(rlogin_recall) as recall_mean_opt,
           median(rlogin_recall) as recall_median_opt
    from $precision_recall
);

$recall_mean_opt = (
    select recall_mean_opt
    from $recall_optimistic
);

$recall_median_opt = (
    select recall_median_opt
    from $recall_optimistic
);

----- Final --------------------------------------------------------------------

INSERT INTO @metrics
SELECT
    absolute,
    IF(denominator IS NULL, absolute, denominator) AS denominator,
    '{{ prefix }}' AS kind,
    '{{ prefix }}_x_' || field AS field
FROM (
    SELECT
        $similarity_intersect AS absolute,
        'similarity_intersect' AS field
    UNION ALL SELECT
        $similarity_union AS absolute,
        'similarity_union' AS field
    UNION ALL SELECT
        $similarity_intersect AS absolute,
        $similarity_union AS denominator,
        'similarity' AS field
    UNION ALL SELECT
        $matched_rlogins AS absolute,
        'matched_rlogins' AS field
    UNION ALL SELECT
        $matched_rlogins AS absolute,
        $rlogins_count AS denominator,
        'coverage' AS field
    UNION ALL SELECT
        $rlogins_count AS absolute,
        'rlogins_count' AS field
    UNION ALL SELECT
        $crypta_ids_count AS absolute,
        'crypta_ids_count' AS field
    UNION ALL SELECT
        $precision_mean_opt AS absolute,
        'precision_mean_opt' AS field
    UNION ALL SELECT
        $precision_median_opt AS absolute,
        'precision_median_opt' AS field
    UNION ALL SELECT
        $recall_mean_opt AS absolute,
        'recall_mean_opt' AS field
    UNION ALL SELECT
        $recall_median_opt AS absolute,
        'recall_median_opt' AS field
);
