import common symbols $shift_date;

$crypta_profiles_path = "//statbox/crypta-yandexuid-profiles-log";

$extract_c2 = ($yson) -> (Yson::ConvertTo($yson, Struct<C2:Double?>).C2);

DEFINE SUBQUERY $raw_crypta_regular($start_date, $end_date, $cluster) AS
    SELECT
        CAST(yandexuid AS String) AS yandexuid,
        $start_date AS day,
        Yson::ConvertToStringDict(exact_socdem) AS exact_socdem,
        $extract_c2(income_5_segments) AS C2_prob,
        crypta_id,
    FROM yt:$cluster.`//home/crypta/production/profiles/export/profiles_for_14days`;
END DEFINE;

DEFINE SUBQUERY $raw_crypta_recalc($start_date, $end_date, $cluster) AS
    SELECT
        CAST(yandexuid AS String) AS yandexuid,
        TableName(TablePath(), "yt") AS day,
        Yson::ConvertToStringDict(exact_socdem) AS exact_socdem,
        $extract_c2(income_5_segments) AS C2_prob,
        crypta_id,
    FROM yt:$cluster.RANGE($crypta_profiles_path, $shift_date($start_date, -14), $end_date);
END DEFINE;

DEFINE SUBQUERY $crypta_profiles($stream, $start_date, $end_date, $cluster) AS
    $count_newer = (
        select count(*)
        from yt:$cluster.folder($crypta_profiles_path)
        where Type = "table" and ListLast(String::SplitToList(Path, "/")) > $end_date
    );
    $source = (
        EvaluateCode(
            IF(
                $start_date = $end_date and $count_newer = 0,
                QuoteCode($raw_crypta_regular),
                QuoteCode($raw_crypta_recalc)
            )
        )
    );
    select *
    from (
        select
            day,
            aggregate_list_distinct(user_id) as user_ids,
            max_by(c.exact_socdem, c.day) AS socdem,
            max_by(c.C2_prob, c.day) AS C2_prob,
            some(c.crypta_id) AS crypta_id,
        from $stream() as s
        join $source($start_date, $end_date, $cluster) as c
        using (yandexuid)
        where c.day between $shift_date(s.day, -14) and s.day
        group by s.day as day, s.yandexuid as yandexuid
    )
    flatten list by (user_ids as user_id);
END DEFINE;

DEFINE SUBQUERY $enrich_crypta($stream, $start_date, $end_date, $cluster) AS
    select
        s.*,
        cp.socdem as socdem,
        cp.C2_prob as C2_prob,
        cp.crypta_id as crypta_id,
    from $stream() as s
    left join any $crypta_profiles($stream, $start_date, $end_date, $cluster) as cp
    using (user_id);
END DEFINE;

DEFINE SUBQUERY $_add_yuid_to_similar($stream, $start_date, $end_date, $cluster) AS
    $days_before = 30;
    $days_after = 7;

    $sg_merged = (
        SELECT
            TableName(TablePath(), "yt") AS day,
            sg_uid,
            uid AS yandexuid,
        FROM yt:$cluster.Range(
            "//home/antifraud/daily/logs_merger/sg/merged",
            $shift_date($start_date, -$days_before),
            $shift_date($end_date, $days_after)
        )
    );

    $strip_user_id = ($user_id, $ui) -> (
        if($ui = "desktop", substring($user_id, 2), substring($user_id, 3))
    );

    $sg2yuid = (
        select
            day,
            user_id,
            max_by(sgm.yandexuid, sgm.day) as yandexuid,
        from $stream() as s
        join $sg_merged as sgm on $strip_user_id(s.user_id, s.ui) = sgm.sg_uid
        where sgm.day between $shift_date(s.day, -$days_before) and $shift_date(s.day, $days_after)
        group by s.day as day, s.user_id as user_id
    );

    select
        s.*,
        yandexuid,
    from $stream() AS s
    left join any $sg2yuid as sg2y
    using (day, user_id);
END DEFINE;

$add_yuid_to_similar = ($stream, $start_date, $end_date, $cluster) -> (
    ($world) -> (
        $_add_yuid_to_similar($world, $stream, $start_date, $end_date, $cluster)
    )
);

$enrich_crypta_subquery = ($stream, $start_date, $end_date, $cluster) -> (
    ($world) -> (
        $enrich_crypta($world, $stream, $start_date, $end_date, $cluster)
    )
);

export $enrich_crypta, $add_yuid_to_similar, $enrich_crypta_subquery;
