
/*
Скрипт рассчитывает аггрегированные данные для баланса для рассчета премии агентствам.
В рамках рассчета фильтруются/аггрегируются данные по:
- техническим связкам доменов (домены вида c www/m и без него одинаковыми и деньги по ним суммируются)
- зеркала (mirrors + mirrors_correction)
- серая зона доменов - разметка заказов на такого рода доменов чтобы билинг делал специальный рассчет для них.
- черная зона доменов - так же разметка, чтобы НЕ платить комиссию по таким доменам
*/


/* ++++++++++++++++++++++++++++++++++++++++
Подготовка блек листа.
Блек лист для каждого месяца свой.
Рассчитывается всегда по данным за прошлый месяц
++++++++++++++++++++++++++++++++++++++++ */

--------------------------------------------------------------------------------
-- Параметризуем названия папок и таблиц ---------------------------------------

-- папка куда будут складываться блеклисты по месяцам
$blacklist_folder = ?;

$blacklist_table_name = $blacklist_folder || "/" || "2016-05-01";
insert into $blacklist_table_name WITH TRUNCATE
select "test.com" as domain, "2016-05-01" as month;

COMMIT;

$billing_order_domains = ?;
$billing_order_domains_splitted_stat = ?;
$direct_campaigns = "//home/direct/db/campaigns";
$direct_clients = "//home/direct/db/clients";
$domain_to_check_table_name = "//home/direct/export/balance/blacklist/domain_to_check";
$domain_to_check_increase_table_name = "//home/direct/export/balance/blacklist/domain_to_check_increase";
$pseudoagencies_table_name = "//home/direct/export/balance/blacklist/pseudoagencies";

$date1 = '2018-03-01';
$date2 = '2019-02-01';

--------------------------------------------------------------------------------
-- Функции для управления датами -----------------------------------------------

$format_yyyymmdd = DateTime::Format("%Y-%m-%d");

$now = AddTimezone(CurrentUtcDatetime(), "Europe/Moscow");

$start_of_month = ($datetime) -> {
    RETURN
        DateTime::MakeTzDate(
            DateTime::StartOfMonth(
            $datetime
        )
    );
};

$minus_year = ($date) -> {
    $tm = DateTime::Split($date);
    $year = cast(DateTime::GetYear($tm) - 1 as Uint16);
    return DateTime::MakeTzDate(DateTime::Update($tm, $year));
};

$seconds_to_month = ($seconds) -> {
    $seconds32 = cast(nvl($seconds, 0) as Uint32);
    $DateTime = DateTime::FromSeconds($seconds32);
    $DateTime = AddTimezone($DateTime, "Europe/Moscow");
    $DateTime = DateTime::StartOfMonth($DateTime);
    return DateTime::MakeTzDate($DateTime);
};

--------------------------------------------------------------------------------
-- Отчетный месяц и отчётный месяц прошлого года -------------------------------

$report_dt = $start_of_month($start_of_month($now) - Interval("P1D"));
$prev_report_dt = $minus_year($report_dt);

$thismonth = $format_yyyymmdd($report_dt);
$prev_year_month = $format_yyyymmdd($prev_report_dt);

--------------------------------------------------------------------------------
-- Данные из справочников ------------------------------------------------------

$domain_to_check = (     -- домены, которые не должны совершить переток
    select domain_to_check from $domain_to_check_table_name
);

$domain_to_check_increase = (   -- домены, которые не должны прирасти на 1,3 YY
    select domain_to_check_increase from $domain_to_check_increase_table_name
);

$pseudoagencies = ( -- псевдоагентства
    select agency_id from $pseudoagencies_table_name
);

--------------------------------------------------------------------------------
-- Открутки по Директу из БК ---------------------------------------------------

$direct_agency_stats = (
    select
        month,
        agency_id,
        domain,
        sum(d.Cost) * 30.0 / 1000000.0 / 1.18 as cost
    from
        $billing_order_domains as d
        left join $direct_campaigns as cmp on cmp.cid = d.ServiceOrderID
        left join $direct_clients as c on c.ClientID = cmp.ClientID
    where
        d.ServiceID = 7
        and d.Cost > 0
        and $format_yyyymmdd($seconds_to_month(d.EventDate))
            between min_of($date1, $prev_year_month) and max_of($date2, $thismonth)
        group by
            $format_yyyymmdd($seconds_to_month(d.EventDate)) as month,
            if(c.agency_client_id > 0, c.agency_client_id) as agency_id,
            d.Domain as domain
);

--------------------------------------------------------------------------------
-- Логика расчёта блеклиста ----------------------------------------------------

$percent_PK_18 = ( -- процент в пк за 18 год, формат домен-процент
    SELECT -- смотрим процент в ПК за 2018 год
        domain,
        if( (cost_PK+cost_AK) = 0, 0, cost_PK/(cost_PK+cost_AK) ) as percent_PK_18
    FROM
    (
        SELECT --смотрим данные по доле в 18 году
        domain,
        SUM( IF(agency_type = 'PK', part_cost, 0)) as cost_PK,
        SUM( IF(agency_type = 'AK', part_cost, 0)) as cost_AK
        From (
            SELECT -- считаем сумму домен-агентство исключая псевдоагентства
                domain,
                agency_type,
                SUM(cost) AS part_cost
            FROM
                $direct_agency_stats
            WHERE
                month between $date1 AND $date2
                AND domain IN $domain_to_check
            GROUP BY
                domain,
                case
                    when agency_id IN $pseudoagencies then 'PK'
                    when agency_id is NULL then 'PK'
                    else 'AK'
                end as agency_type
        )
        GROUP BY domain
    )
);

$percent_PK_now = ( -- процент в пк за текущий месяц, формат домен-процент
    SELECT -- смотрим процент в ПК за 2018 год
        domain,
        if( (cost_PK+cost_AK) =0 , 0, cost_PK/(cost_PK+cost_AK) ) as percent_PK_now
    FROM
    (
        SELECT --смотрим данные по доле в 18 году
            domain,
            SUM( IF(agency_type = 'PK', part_cost, 0)) as cost_PK,
            SUM( IF(agency_type = 'AK', part_cost, 0)) as cost_AK
        From (
            SELECT -- считаем сумму домен-агентство исключая псевдоагентства
                domain,
                agency_type,
                SUM(cost) AS part_cost
            FROM
                $direct_agency_stats
            WHERE
                month = $thismonth
                AND domain IN $domain_to_check
            GROUP BY
                domain,
                case
                    when agency_id IN $pseudoagencies then 'PK'
                    when agency_id is NULL then 'PK'
                    else 'AK'
                end as agency_type
        )
        GROUP BY domain
    )
);

$prev_year_cost = (
    SELECT -- считаем сумму домен за аналогичный период прошлого года по прирастающим
        domain,
        SUM(cost) AS total_cost_prev
    FROM
        $direct_agency_stats
    WHERE
        month = $prev_year_month
        AND domain IN $domain_to_check_increase
    GROUP BY
        domain
);

$this_year_cost = (
    SELECT -- считаем сумму по домену только внутри агентства по прирастающим
        domain,
        SUM( if(agency_id IS NULL, 0,  cost)) AS total_cost_this
    FROM
        $direct_agency_stats
    WHERE
        month = $thismonth
        AND domain IN $domain_to_check_increase
    GROUP BY
        domain
);

$part_one_check = ( -- итоги по проверке основной части на переток
    SELECT --раскладываю итоги domain to check на белый и черный лист (черный - лишаем премии,белый - все ок)
        p.domain as domain,
        p.percent_PK_18,
        n.percent_PK_now,
        p.percent_PK_18 - n.percent_PK_now as percent_diff,
        if((p.percent_PK_18 - n.percent_PK_now) >= 0.1 , "black_list", "white_list") as list_color
    from $percent_PK_18 as p
    left join $percent_PK_now as n ON p.domain = n.domain
);

$part_two_check = ( -- итоги по проверке доп части части на прирост выше 1,3
    Select
        p.domain as domain,
        p.total_cost_prev,
        t.total_cost_this,
        if (p.total_cost_prev = 0, 0, t.total_cost_this/p.total_cost_prev) as increase_YY,
        if (if (p.total_cost_prev = 0, 0, t.total_cost_this/p.total_cost_prev) >= 1.3 , "white_list", "black_list") as list_color
    from $prev_year_cost as p
    left join $this_year_cost as t ON p.domain = t.domain
);

--------------------------------------------------------------------------------
-- Результирующий блеклист -----------------------------------------------------

$blacklist_table_name = $blacklist_folder || "/" || $thismonth;

INSERT INTO $blacklist_table_name with truncate
select distinct month, domain
from (
    Select domain, $thismonth as month
    From $part_one_check
    Where list_color = "black_list"
    UNION ALL
    Select domain, $thismonth as month
    From $part_two_check
    Where list_color = "black_list"
    )
order by month, domain;

COMMIT;

/* ++++++++++++++++++++++++++
Конец рассчета блек листа
++++++++++++++++++++++++++++ */

$get_date_timestamp_sec = ($date_str) -> {
    $date_parser = DateTime::Parse('%Y-%m-%d, %Z');
    $date_to_parse = $date_str || ', '  || 'Europe/Moscow';
    return DateTime::ToSeconds(
            DateTime::MakeTimestamp($date_parser($date_to_parse))
    );
};

$bk_min_date = '2019-02-26'; -- дата, с которой коллектор БК стал собирать корректные данные

-- Таблица в которую БК складывает статистику агрегированная по заказам+доменам.
$bk = '//home/yabs/stat/BillingOrderDomains';
$bkSplittedStat = '//home/yabs/stat/BillingOrderDomainsSplittedStat';

-- Таблица от аналитиков, собранная за прошлые периоды https://st.yandex-team.ru/ADVERTANALYTICS-5364
$analytics = '//home/direct/import/analytics/BillingOrderDomains_20170101_20190225';

-- Объединяем данные из коллектора БК и данными за прошлые года от аналитиков
-- https://st.yandex-team.ru/DIRECT-95016
$logs = (
    select
        DomainType,
        Cost,
        EventDate,
        CAST(DomainID as uint64) as DomainID,
        ServiceID,
        UndoCost,
        UpdateTime,
        BillingExportID,
        ServiceOrderID
    from $bk
    where EventDate >= $get_date_timestamp_sec($bk_min_date)
    union all
    select
        DomainType,
        Cost,
        EventDate,
        CAST(DomainID as uint64) as DomainID,
        ServiceID,
        UndoCost,
        EventDate as UpdateTime,
        BillingExportID,
        ServiceOrderID
    from $analytics
);
-- справочник доменов TargetDomain (1/3)
$targetdomains = '//home/yabs/dict/TargetDomain';
-- Справочник доменов ClusterDomain (2/4)
-- Это домены-перевертыши (Директ перед отправкой в БК их переворачивает: vc.ru/alexey -> alexey.vc.ru)
$clusterdomains = '//home/direct/dict/ClusterDomain';
-- Справочник доменов для "серой зоны" (3/4).
-- список доменов (серая зона), их нужно отдельно размечать в выгрузке
-- за эти домены комиссия будет платиться как за грейд B.
$comdepGrayDomainTbl = '//home/direct/dict/ComdepGrayDomain';
$grayList = (select AGGREGATE_LIST(Domain) from $comdepGrayDomainTbl);
-- Таблица зеркал доменов (выгружает в YT Директ, а изначально берется из поиска).
$mirrors = '//home/direct/db/mirrors';
-- Таблица зеркал доменов заполняемых руками через админку Директа.
$mirrorsCorrectionTbl = '//home/direct/db/mirrors_correction';
$mirrorsCorrection = (select * from $mirrorsCorrectionTbl where usage_type = 1);
-- путь + таблица в которую выгружаются данные для билинга
$destTable = $billing_order_domains;

$EventDateToMonth = ($EventDate) -> {
    $format = DateTime::Format("%Y-%m-%d");
    $seconds = cast(nvl($EventDate, 0) as Uint32);
    $DateTime = DateTime::FromSeconds($seconds);
    $DateTime = AddTimezone($DateTime, "Europe/Moscow");
    $DateTime = DateTime::StartOfMonth($DateTime);
    return $format($DateTime);
};

$isGrayDomain = ($domain) -> {
    return ($domain == "") OR ListAny(ListMap(Unwrap($grayList), ($x) -> {
        RETURN
            Url::GetDomain($domain, 1) == $x
            or Url::GetDomain($domain, 2) == $x
            or Url::GetDomain($domain, 3) == $x
            or Url::GetDomain($domain, 4) == $x
            or Url::GetDomain($domain, 5) == $x
    }))
};

$get_domain = ($domain) -> {
    return Url::GetHost(String::ToLower($domain));
};

$get_pure_domain = ($domain) -> {
    $domain = Url::CutWWW2(Url::GetHost(String::ToLower($domain)));
    return IF(String::StartsWith($domain, "m."), SUBSTRING($domain, 2), $domain)
};

$domains = (
    select
        distinct
        DomainID,
        Domain
    from (
        select
            CAST(DomainID as uint64) as DomainID,
            $get_pure_domain(Domain) as Domain
        from $targetdomains
        union all
        select
            CAST(ClusterDomainID as uint64) as DomainID,
            $get_pure_domain(DomainBK) as Domain
        from $clusterdomains
    )
);

insert into $destTable with truncate
select
    BillingExportID,
    ServiceOrderID,
    ServiceID,
    EventDate,
    d.Domain as Domain,
    DomainIDs,
    UndoCost,
    UpdateTime,
    IF(ListLength(DomainIDs) > 1, 1, 0) as isTechDomain,
    COALESCE(b.isBlackDomain, 0) as isBlackDomain,
    CAST($isGrayDomain(d.Domain) as int8) as isGrayDomain,
    Cost
from (
    select
        BillingExportID,
        ServiceOrderID,
        ServiceID,
        EventDate,
        SUM(UndoCost) as UndoCost,
        UpdateTime,
        COALESCE(Domain, "") as Domain,
        AGGREGATE_LIST(DomainID) as DomainIDs,
        SUM(Cost) as Cost
    from (
        select
            bk.BillingExportID as BillingExportID,
            bk.ServiceOrderID as ServiceOrderID,
            bk.ServiceID as ServiceID,
            bk.EventDate as EventDate,
            bk.DomainID as DomainID,
            bk.UndoCost as UndoCost,
            bk.UpdateTime as UpdateTime,
            $get_pure_domain(COALESCE(m.redirect_domain, COALESCE(d.Domain, ""))) as Domain,
            bk.Cost as Cost
        from (
            select
                bk.BillingExportID as BillingExportID,
                bk.ServiceOrderID as ServiceOrderID,
                bk.ServiceID as ServiceID,
                bk.EventDate as EventDate,
                bk.DomainID as DomainID,
                bk.UpdateTime as UpdateTime,
                sum(bk.UndoCost) as UndoCost,
                sum(bk.Cost) as Cost
            from $logs as bk where ServiceID = 7
            group by
                bk.BillingExportID as BillingExportID,
                bk.ServiceOrderID as ServiceOrderID,
                bk.ServiceID as ServiceID,
                bk.EventDate as EventDate,
                bk.DomainID as DomainID,
                bk.UpdateTime as UpdateTime
        ) as bk
        left join $domains as d on d.DomainID = bk.DomainID
        left join any $mirrorsCorrection as m on m.domain = d.Domain
    ) as a
    group by
        BillingExportID,
        ServiceOrderID,
        ServiceID,
        EventDate,
        Domain,
        UpdateTime
    ) as d
    left join (
        select
        domain as Domain,
        month as Month,
        1 as isBlackDomain
    FROM
        RANGE($blacklist_folder)
) as b on b.Domain = d.Domain and $EventDateToMonth(d.EventDate) = b.Month;

insert into $billing_order_domains_splitted_stat with truncate
select
    IsSearch,
    BillingExportID,
    ServiceOrderID,
    ServiceID,
    EventDate,
    d.Domain as Domain,
    DomainIDs,
    UndoCost,
    UpdateTime,
    IF(ListLength(DomainIDs) > 1, 1, 0) as isTechDomain,
    COALESCE(b.isBlackDomain, 0) as isBlackDomain,
    CAST($isGrayDomain(d.Domain) as int8) as isGrayDomain,
    Cost
from (
    select
        IsSearch,
        BillingExportID,
        ServiceOrderID,
        ServiceID,
        EventDate,
        SUM(UndoCost) as UndoCost,
        UpdateTime,
        COALESCE(Domain, "") as Domain,
        AGGREGATE_LIST(DomainID) as DomainIDs,
        SUM(Cost) as Cost
    from (
        select
            bk.IsSearch as IsSearch,
            bk.BillingExportID as BillingExportID,
            bk.ServiceOrderID as ServiceOrderID,
            bk.ServiceID as ServiceID,
            bk.EventDate as EventDate,
            bk.DomainID as DomainID,
            bk.UndoCost as UndoCost,
            bk.UpdateTime as UpdateTime,
            $get_pure_domain(COALESCE(m.redirect_domain, COALESCE(d.Domain, ""))) as Domain,
            bk.Cost as Cost
        from (
            select
                bk.IsSearch as IsSearch,
                bk.BillingExportID as BillingExportID,
                bk.ServiceOrderID as ServiceOrderID,
                bk.ServiceID as ServiceID,
                bk.EventDate as EventDate,
                bk.DomainID as DomainID,
                bk.UpdateTime as UpdateTime,
                sum(bk.UndoCost) as UndoCost,
                sum(bk.Cost) as Cost
            from $bkSplittedStat as bk where ServiceID = 7
            group by
                bk.IsSearch as IsSearch,
                bk.BillingExportID as BillingExportID,
                bk.ServiceOrderID as ServiceOrderID,
                bk.ServiceID as ServiceID,
                bk.EventDate as EventDate,
                bk.DomainID as DomainID,
                bk.UpdateTime as UpdateTime
        ) as bk
        left join $domains as d on d.DomainID = bk.DomainID
        left join any $mirrorsCorrection as m on m.domain = d.Domain
    ) as a
    group by
        IsSearch,
        BillingExportID,
        ServiceOrderID,
        ServiceID,
        EventDate,
        Domain,
        UpdateTime
    ) as d
    left join (
        select
        domain as Domain,
        month as Month,
        1 as isBlackDomain
    FROM
        RANGE($blacklist_folder)
) as b on b.Domain = d.Domain and $EventDateToMonth(d.EventDate) = b.Month;
