PRAGMA yt.UseNativeYtTypes;

{% include '/templates/yql/retargeting/parse_hit.sql.j2' %}
{% include '/templates/yql/retargeting/apply_offline.sql.j2' %}
{% include '/templates/yql/retargeting/count_metrics.sql.j2' %}
{% include '/templates/yql/retargeting/crypta_matching.sql.j2' %}
{% include '/templates/yql/retargeting/crypta_export.sql.j2' %}


-- ============================================================
$debug_dir = '{{ debug_dir }}';
$debug = {% if debug_dir %}True{% else %}False{% endif %};

$date = '{{ date }}';

$hit_log_name = $date || 'T11:00:00';
$row_hit_log = '//logs/beh-profile-hit-log/1h/' || $hit_log_name;

DEFINE SUBQUERY $sample_hit_log() AS
    SELECT UniqID, ProfileDump
    FROM $row_hit_log
    TABLESAMPLE SYSTEM(0.1); -- 0.1 % or 1/1000
END DEFINE;

-- ============================================================

$vulture_crypta_link_type = 3;
$crypta_export_link_type = 4;
$crypta_link_type = 5;


$type_preffix = ($link_type) -> {
    $preffix = CASE
        WHEN $link_type == $vulture_crypta_link_type THEN 'online'
        WHEN $link_type == $crypta_export_link_type THEN 'offline_export'
        ELSE 'offline'
    END;
    RETURN $preffix || '.';
};


DEFINE SUBQUERY $get_w_offers($input) AS
    SELECT * FROM $input() WHERE offers IS NOT NULL
END DEFINE;


DEFINE ACTION $do_debug_table($statistics_on_hit, $statistics_on_crypta_export, $statistics_on_crypta, $keyword) AS
    -- строим дебаг-таблицу со статистиками для каждого среза online / offline_export / offline, и offers на которых они посчитаны

    $debug_path = $debug_dir || '/retargeting/debug_cut.' || CAST($keyword AS STRING); -- в debug попадёт max 100 тыс примеров
    INSERT INTO $debug_path WITH TRUNCATE
    SELECT
        online.hit_id AS hit_id,
        online.UniqID AS UniqID,
        online.stats,
        online.offers,
        offline_export.stats,
        offline_export.offers,
        offline.stats,
        offline.offers,
    FROM $get_w_offers($statistics_on_hit) AS online
    LEFT JOIN $get_w_offers($statistics_on_crypta_export) AS offline_export ON online.hit_id == offline_export.hit_id
    LEFT JOIN $get_w_offers($statistics_on_crypta) AS offline ON online.hit_id == offline.hit_id

END DEFINE;


DEFINE ACTION $calculate_metrics($row_hit_log, $keyword) AS
    -- парсим события данного keyword из beh-profile-hit-log
    DEFINE SUBQUERY $hit_log() AS
        SELECT * FROM $parse_hit_log($row_hit_log, $keyword)
    END DEFINE;

    -- считаем метрики на хит-логе
    DEFINE SUBQUERY $statistics_on_hit() AS
        PROCESS $hit_log()
        USING $get_statistics(TableRow(), $vulture_crypta_link_type, $debug);
    END DEFINE;

    -- считаем оффлайн метрики при учёте crypta экспорта
    DEFINE SUBQUERY $statistics_on_crypta_export() AS
        PROCESS $apply_offline($hit_log, $exported_users, $date, $keyword, $crypta_export_link_type, $get_actions)
        USING $get_statistics(TableRow(), $crypta_export_link_type, $debug);
    END DEFINE;

    -- считаем оффлайн метрики при учёте склейки
    DEFINE SUBQUERY $statistics_on_crypta() AS
        PROCESS $apply_offline($hit_log, $crypta_users, $date, $keyword, $crypta_link_type, $get_actions)
        USING $get_statistics(TableRow(), $crypta_link_type, $debug);
    END DEFINE;


    -- пишем метрики
    $hit_metrics = SELECT * FROM $count_metrics($statistics_on_hit);
    $crypta_export_metrics = SELECT * FROM $count_metrics($statistics_on_crypta_export);
    $crypta_metrics = SELECT * FROM $count_metrics($statistics_on_crypta);

    $metrics = 'metrics_' || CAST($keyword AS String);
    INSERT INTO @$metrics
    SELECT
        FlattenMembers(
            ($type_preffix($vulture_crypta_link_type), $hit_metrics), -- ex.: hit.man_uniqs
            ($type_preffix($crypta_export_link_type), $crypta_export_metrics),
            ($type_preffix($crypta_link_type), $crypta_metrics)
        ) AS metrics
    ;

    -- дебаг
    EVALUATE IF $debug DO BEGIN
        DO $do_debug_table($statistics_on_hit, $statistics_on_crypta_export, $statistics_on_crypta, $keyword);
    END DO;

END DEFINE;


DO $calculate_metrics($sample_hit_log, 377); -- метрики для ecom-офферов для ретаргетинга
DO $calculate_metrics($sample_hit_log, 235); -- метрики для достигнутых целей метрики


COMMIT;

-- added harmonic mean

$metrics_377 = (SELECT metrics FROM @metrics_377);
$metrics_235 = (SELECT metrics FROM @metrics_235);


$harmonic_mean = ($values) -> {
    RETURN CAST(ListLength($values) AS DOUBLE) / ListSum(ListMap($values, ($i) -> (1.0 / $i)));
};

$get_harmonic_means = ($first_metrics, $senod_metrics, $field) -> {
    RETURN <|
        absolute: $harmonic_mean([$first_metrics.$field.absolute, $senod_metrics.$field.absolute]),
        denominator: $harmonic_mean([$first_metrics.$field.denominator, $senod_metrics.$field.denominator]),
    |>;
};


$harmonic_means = (
    SELECT <|
        "online.offers_recall": $get_harmonic_means($metrics_377, $metrics_235, "online.offers_recall"),
        "online.union_offers_recall": $get_harmonic_means($metrics_377, $metrics_235, "online.union_offers_recall"),
        "online.uniqs_recall": $get_harmonic_means($metrics_377, $metrics_235, "online.uniqs_recall"),
        "offline.offers_recall": $get_harmonic_means($metrics_377, $metrics_235, "offline.offers_recall"),
        "offline.union_offers_recall": $get_harmonic_means($metrics_377, $metrics_235, "offline.union_offers_recall"),
        "offline.uniqs_recall": $get_harmonic_means($metrics_377, $metrics_235, "offline.uniqs_recall"),
        "offline_export.offers_recall": $get_harmonic_means($metrics_377, $metrics_235, "offline_export.offers_recall"),
        "offline_export.union_offers_recall": $get_harmonic_means($metrics_377, $metrics_235, "offline_export.union_offers_recall"),
        "offline_export.uniqs_recall": $get_harmonic_means($metrics_377, $metrics_235, "offline_export.uniqs_recall"),
    |> AS metrics
);

-- пишем все метрики

$all_metrics = (SELECT
    FlattenMembers(
        ('377.', $metrics_377),
        ('235.', $metrics_235),
        ('mean.', $harmonic_means)
    ) AS metrics
);


$expended_metrics = ( -- добавляем само поле к absolute и denominators
    SELECT
        ListMap(
            GatherMembers(metrics),
            ($i) -> (ExpandStruct($i.1, $i.0 AS field))
        ) AS metrics
    FROM $all_metrics
);


$flattend = (
    SELECT * FROM $expended_metrics
    FLATTEN LIST BY metrics
);

INSERT INTO @metrics
SELECT
    absolute,
    denominator,
    '{{ prefix }}' AS kind,
    '{{ prefix }}_x_' || field AS field
FROM $flattend FLATTEN COLUMNS;
