USE hahn;
PRAGMA dq.AnalyzeQuery = '1';
PRAGMA yt.DefaultMaxJobFails = '1';
PRAGMA yt.TemporaryAutoMerge = 'disabled';
PRAGMA yson.DisableStrict = '1';
PRAGMA yt.DefaultMemoryLimit = '8G';

DECLARE $start_date AS String;
DECLARE $yt_pool AS String;

PRAGMA yt.Pool = $yt_pool;


-- Задаем месяц
$month = SUBSTRING($start_date, 0, 7);
$startdate = $month || "-01";
$enddate = $month || "-31";

$MAX_QUERY_LENGTH = 500;
$EVENTS_LIMIT = 1000000;

$datetime_parse = DateTime::Parse("%Y-%m-%d %H:%M:%S");
$timestamp_parse = ($s) -> { RETURN DateTime::ToSeconds(DateTime::MakeDatetime($datetime_parse($s))) };
$moscow_date = ($eventtime) -> { RETURN CAST(CAST($eventtime + 3600 * 3 AS DateTime) AS Date) };

$source = SELECT * FROM RANGE(`statbox/cube/daily/request/v4`, $startdate, $enddate);

$queries = SELECT
    yandexuid,
    Unwrap(CAST($timestamp_parse(`datetime`) AS Int32)) - 3 * 3600 AS ts,
    SUBSTRING(normal_query, 0, $MAX_QUERY_LENGTH) AS query,
    Unwrap(IF(is_mobile, 'touch', 'desktop')) AS ui
FROM $source
WHERE is_good_search AND NOT is_robot AND
    Geo::RoundRegionById(Cast(geo_id AS Int32), "country").en_name == "Russia";
    

-- Приклеиваем деньги

$money = SELECT
    yandexuid,
    -- +1 секунда на доезд
    Unwrap(CAST(showtime AS Int32)) + 1 AS ts,
    service,
    revenue
FROM RANGE(`//statbox/cube/daily/event_money/v2.1`, $startdate, $enddate)
WHERE revenue != 0 AND yandexuid IS NOT NULL AND showtime IS NOT NULL;

$attach_money = Python3::attach_money(
    Callable< (List<Struct <
        ts: Int32, query: String?, service: String?, revenue: Double?, ui: String? > >) ->
    List <Struct <ts: Int32, query: String, ui: String, revenue: Dict<String, Double> > > >,
        FileContent("attach_money.py")
);

$queries = SELECT
    yandexuid,
    $attach_money(ListSort(
        AGGREGATE_LIST(
            <|ts:ts, query:query, service:service, revenue:revenue, ui: ui|>,
            $EVENTS_LIMIT
        ), ($x) -> { RETURN $x.ts; }
    )) AS queries_list
FROM (
    SELECT * FROM $queries
    UNION ALL SELECT * FROM $money
)
GROUP BY yandexuid;


$queries = SELECT
    yandexuid,
    query_data.ui AS ui,
    query_data.query AS query,
    query_data.revenue AS revenue_dict,
    ListSum(DictPayloads(query_data.revenue)) AS revenue,
    $moscow_date(query_data.ts) AS day
FROM $queries
FLATTEN LIST BY queries_list AS query_data;

----- Приклеиваем кластера

$queries_with_cluster = SELECT 
    a.*,
    b.ClusterID AS cluster
FROM $queries AS a
LEFT JOIN ANY `//home/searchshare/common/query_to_cluster` AS b
USING(query);

----- Данные по призме

$prisma = SELECT
    yandexuid,
    cluster AS prism_cluster,
    CPT AS prism_CPT,
    norm_serp_revenue AS prism_norm_serp_revenue
    FROM RANGE(`//home/prism/user_weights`, $startdate, $enddate);

----- Данные по соцдему

$crypta = (
    SELECT 
        CAST(yandexuid AS String) AS yandexuid,
        Yson::LookupString(MAX_BY(exact_socdem, update_time), "gender") AS crypta_gender,
        Yson::LookupString(MAX_BY(exact_socdem, update_time), "age_segment") AS crypta_age,
        Yson::LookupDouble(MAX_BY(income_5_segments, update_time), "C2") AS crypta_C2_prob 
    FROM RANGE("//statbox/crypta-yandexuid-profiles-log", $startdate, $enddate)
    GROUP BY yandexuid
);


$output_table = "//home/searchshare/squeeze_cube/" || $month;
INSERT INTO $output_table WITH TRUNCATE 
SELECT 
    *
FROM $queries_with_cluster AS queries
LEFT JOIN ANY $crypta AS c
ON queries.yandexuid = c.yandexuid
LEFT JOIN ANY $prisma AS p
ON queries.yandexuid = p.yandexuid
ORDER BY query, day;
