PRAGMA yson.DisableStrict;
PRAGMA yson.AutoConvert;

$date_days_before = ($now, $days) -> {
    $today = CAST($now AS Date);
    RETURN CAST($today - DateTime::IntervalFromDays(CAST($days AS Int16)) AS String)
};

DEFINE SUBQUERY $build_unique_permalinks() AS
    SELECT permalink
    FROM `home/travel/prod/general/altay_mappings/latest/permalink_to_hotel_info`
    WHERE permalink == cluster_permalink
END DEFINE;

DEFINE SUBQUERY $build_bookings($now, $delay, $delta) AS
    $date_format = DateTime::Format("%Y-%m-%d");

    SELECT
        label_permalink AS permalink,
        AVG(order_amount_rub) AS avg_order,
        COUNT(*) AS bookings
    FROM `//home/travel/prod/cpa/hotels/orders`
    WHERE
        $date_format(DateTime::FromSeconds(CAST(created_at AS Uint32))) >= $date_days_before($now, $delta + 1 + $delay) AND
        $date_format(DateTime::FromSeconds(CAST(created_at AS Uint32))) <= $date_days_before($now, 1 + $delay) AND label_permalink != 0
    GROUP BY (label_permalink)

END DEFINE;

DEFINE SUBQUERY $build_redirects($now, $delay, $delta) AS
    $redirects_data = (
        SELECT
            CAST(Yson::LookupString(FieldsMap, "Permalink") AS Uint64) AS permalink,
            String::Substring(iso_eventtime, 0, 10) AS sample_date,
        FROM RANGE(`//logs/travel-redir-log/1d`, $date_days_before($now, $delta + 1 + $delay), $date_days_before($now, 1 + $delay))
        WHERE CAST(Yson::LookupString(FieldsMap, "Permalink") AS Uint64) IS NOT NULL
    );

    SELECT
        permalink,
        SUM(redirs_weight) AS sum_redirects,
        AVG(redirs_weight) AS avg_redirects,
        STDDEV(redirs_weight) AS std_redirects,
    FROM (
        SELECT
            permalink,
            COUNT(*) AS redirs_weight
        FROM $redirects_data
        GROUP BY (permalink, sample_date))
    GROUP BY permalink
    HAVING COUNT(*) >= 10
END DEFINE;

DEFINE SUBQUERY $build_partners($now, $delay, $delta) AS
    $partners_data = (
        SELECT DISTINCT
            Permalink AS permalink,
            HotelBooked AS booked,
            CurrentDate AS sample_date,
            VisitID,
            UserID
        FROM RANGE(`//home/travel/prod/general/partners_stat`, $date_days_before($now, $delta + 1 + $delay), $date_days_before($now, 1 + $delay))
        WHERE Permalink IS NOT NULL
    );

    SELECT
        permalink,
        SUM(bookings) AS sum_bookings,
        AVG(bookings) AS avg_bookings,
        STDDEV(bookings) AS std_bookings,
        SUM(hits) AS sum_hits,
        AVG(hits) AS avg_hits,
        STDDEV(hits) AS std_hits
    FROM (
        SELECT
            permalink,
            sample_date,
            COUNT_IF(booked) AS bookings,
            COUNT(*) AS hits
        FROM $partners_data
        GROUP BY (permalink, sample_date))
    GROUP BY (permalink)
    HAVING SUM(bookings) > 5 AND SUM(hits) > 500
END DEFINE;

$PythonScript = @@
import cyson

def get_element_by_name(attrs, element_name):
    for attr in attrs:
        if attr.get(b'name', None) == element_name:
            return attr.get(b'attrs', {})
    return {}

def click_is_travel(click_data):
    attrs = click_data.get(b'baobab_path_with_attrs', [])
    res_attrs = get_element_by_name(attrs, b'$result')
    stype = res_attrs.get(b'subtype', None)
    return stype == b'travel_map' or stype == b'travel_company'

def has_travel_click(clicks):
    if clicks is None:
        return False
    for click in clicks:
        if click_is_travel(click):
            return True
    return False

def clicked_permalinks(clicks):
    if clicks is None:
        return []
    res = []
    for click in clicks:
        attrs = click.get(b'baobab_path_with_attrs', [])
        if len(attrs) == 0:
            continue
        permalink = attrs[-1].get(b'attrs', {}).get(b'externalId', {}).get(b'id', None)
        pos = get_element_by_name(attrs, b'item').get(b'item', None)
        if permalink is not None and pos is not None:
            try:
                res.append((int(permalink), int(pos)))
            except ValueError:
                continue
    return res

has_travel_click._yql_convert_yson = (cyson.loads, cyson.dumps)
clicked_permalinks._yql_convert_yson = (cyson.loads, cyson.dumps)
@@;

$has_travel_click = Python3::has_travel_click(Callable<(Yson?)->Bool>, $PythonScript);
$clicked_permalinks = Python3::clicked_permalinks(Callable<(Yson?)->List<Tuple<Uint64, Uint64>>>, $PythonScript);

$check_wizard_type = ($wizards) -> {
    $listed_res = ListFlatMap(Yson::ConvertToList($wizards), ($w) -> {
      $name = Yson::ConvertToString($w.name);
      $subtype = Yson::ConvertToString($w.subtype);
      $path = Yson::ConvertToString($w.path);
    RETURN CASE
    WHEN $subtype == 'travel_company' AND
        $path == '$page.$parallel.$result' THEN 'single'
    WHEN $subtype == 'travel_company' AND
        $path == '$page.$main.$result' THEN 'single'
    WHEN $subtype == 'travel_map' AND
        $path == '$page.$main.$result' THEN 'many'
    WHEN $subtype == 'travel_map' AND
        $path == '$page.$top.$result' THEN 'many'
    WHEN $subtype == 'travel_map' AND
        $path == '$page.$parallel.$result' THEN 'many'
    ELSE NULL
    END });
    $filtered_list = ListFilter($listed_res, ($x) -> { RETURN $x IS NOT NULL; });
    RETURN IF(ListLength($filtered_list) > 0, $filtered_list[0], NULL);
};

DEFINE SUBQUERY $build_user_sessions($now, $delay, $delta) AS
    PRAGMA yt.InferSchema = "1";

    SELECT
        reqid,
        permalink,
        wizards,
        click_data AS clicks,
        String::Substring(time_isoformatted, 0, 10) AS sample_date,
    FROM RANGE(`//home/travel/analytics/user_sessions`, $date_days_before($now, $delta + 1 + $delay), $date_days_before($now, 1 + $delay))
END DEFINE;

DEFINE SUBQUERY $build_singleorg($now, $delay, $delta) AS
    $user_sessions_single = (
        SELECT
            clicks,
            reqid,
            permalink,
            sample_date,
        FROM $build_user_sessions($now, $delay, $delta)
        WHERE $check_wizard_type(wizards) == 'single'
    );

    $single_with_travel_click = (
        SELECT DISTINCT
            reqid,
            permalink,
            sample_date,
            bin_has_travel_click,
        FROM (
            SELECT
                data.*,
                IF($has_travel_click(clicks), 1, 0) AS bin_has_travel_click
            FROM $user_sessions_single AS data
        )
    );

    $shows_singleorg = (
        SELECT
            permalink,
            SUM(shows_weight) AS sum_shows,
            AVG(shows_weight) AS avg_shows,
            STDDEV(shows_weight) AS std_shows
        FROM (
            SELECT
                permalink,
                COUNT(*) AS shows_weight
            FROM $single_with_travel_click
            GROUP BY (permalink, sample_date)
            HAVING COUNT(*) >= 100
        )
        GROUP BY permalink
        HAVING COUNT(*) >= 3
    );

    $clicks_singleorg = (
        SELECT
            permalink,
            SUM(single_clicks_weight) AS sum_clicks,
            AVG(single_clicks_weight) AS avg_clicks,
            STDDEV(single_clicks_weight) AS std_clicks
        FROM (
            SELECT
                permalink,
                SUM(bin_has_travel_click) AS single_clicks_weight
            FROM $single_with_travel_click
            GROUP BY (permalink, sample_date)
            HAVING COUNT(*) >= 10 AND CAST(SUM(bin_has_travel_click) AS Double) != 0
        )
        GROUP BY (permalink)
        HAVING COUNT(*) >= 3
    );

    SELECT
        b.permalink AS permalink,
        sum_shows ?? 0 AS sum_shows,
        avg_shows ?? 0 AS avg_shows,
        std_shows ?? 0 AS std_shows,
        sum_clicks ?? 0 AS sum_clicks,
        avg_clicks ?? 0 AS avg_clicks,
        std_clicks ?? 0 AS std_clicks,
    FROM $build_unique_permalinks() AS b
    LEFT JOIN $shows_singleorg AS s
    ON (b.permalink == s.permalink)
    LEFT JOIN $clicks_singleorg AS c
    ON (b.permalink == c.permalink)
END DEFINE;

DEFINE SUBQUERY $build_carousel($now, $delay, $delta) AS
    $user_sessions_multiple = (
        SELECT
            clicks,
            reqid,
            permalink,
            sample_date,
        FROM $build_user_sessions($now, $delay, $delta)
        WHERE $check_wizard_type(wizards) == 'many'
    );

    $marked_clicks = (
    SELECT
        clicks,
        reqid,
        sample_date,
        $clicked_permalinks(clicks) AS card_clicks,
        $has_travel_click(clicks) AS has_travel_click,
    FROM $user_sessions_multiple
    );

    $clicks_data = (
        SELECT
            click.1 AS item_n,
            click.0 AS permalink,
            sample_date,
            reqid
        FROM $marked_clicks
        FLATTEN LIST BY card_clicks AS click
        WHERE has_travel_click
    );

    $clicks_in_carousel = (
        SELECT
            permalink,
            SUM(click_weight) AS sum_clicks,
            AVG(click_weight) AS avg_clicks,
            STDDEV(click_weight) AS std_clicks
        FROM (
            SELECT
                permalink,
                COUNT(*) AS click_weight
            FROM $clicks_data
            GROUP BY (permalink, sample_date)
        )
        GROUP BY (permalink)
    );

    $shows_data = (
        SELECT
            click.1 AS item_n,
            click.0 AS permalink,
            sample_date,
            reqid
        FROM $marked_clicks
        FLATTEN LIST BY card_clicks AS click
    );

    $shows_in_carousel = (
        SELECT
            permalink,
            SUM(show_weight) AS sum_shows,
            AVG(show_weight) AS avg_shows,
            STDDEV(show_weight) AS std_shows
        FROM (
            SELECT
                permalink,
                COUNT(*) AS show_weight
            FROM $shows_data
            GROUP BY (permalink, sample_date)
        )
        GROUP BY (permalink)
    );

    $last_clicks_in_carousel = (
        SELECT
            permalink,
            SUM(last_click_weight) AS sum_last_click,
            AVG(last_click_weight) AS avg_last_click,
            STDDEV(last_click_weight) AS std_last_click,
        FROM (
            SELECT
                permalink,
                SUM(Math::Sqrt(1 + CAST(item_n AS Double))) AS last_click_weight
            FROM $clicks_data
            GROUP BY (permalink, sample_date)
            HAVING COUNT(*) >= 5
        )
        GROUP BY (permalink)
        HAVING COUNT(*) >= 3
    );

    SELECT
        b.permalink AS permalink,
        sum_shows ?? 0 AS sum_shows,
        avg_shows ?? 0 AS avg_shows,
        std_shows ?? 0 AS std_shows,
        sum_clicks ?? 0 AS sum_clicks,
        avg_clicks ?? 0 AS avg_clicks,
        std_clicks ?? 0 AS std_clicks,
        sum_last_click ?? 0 AS sum_last_click,
        avg_last_click ?? 0 AS avg_last_click,
        std_last_click ?? 0 AS std_last_click,
    FROM $build_unique_permalinks() AS b
    LEFT JOIN $shows_in_carousel AS s
    ON (b.permalink == s.permalink)
    LEFT JOIN $clicks_in_carousel AS c
    ON (b.permalink == c.permalink)
    LEFT JOIN $last_clicks_in_carousel AS lc
    ON (b.permalink == lc.permalink)
END DEFINE;

EXPORT $build_unique_permalinks, $build_bookings, $build_redirects, $build_partners, $build_singleorg, $build_carousel;