USE hahn;
PRAGMA yson.DisableStrict;

DECLARE $param_dict AS Dict<String, String>;

$base_dir = IF(
  $param_dict["branch"] = "prod",
  "//home/vipplanners/sow",
  "//home/vipplanners/sow_dev" || '/' || $param_dict["branch"]
);
-- output
$result = $base_dir || '/' || "dict/sources/client_main_domain";

$NOW = CurrentUTCDate();
$yesterday = CAST($NOW - Interval("P1D") AS String);
$start = CAST($NOW - Interval("P1D") * 365 * 3 AS String);
$start_01 = CAST(DateTime::MakeDate(DateTime::StartOfMonth($NOW - Interval("P1D") * 365 * 3)) AS String);

pragma yt.TemporaryAutoMerge = "disabled";

------------------------------------------------------------------------------------------------
-- Фильтр для комдеповской выручки
------------------------------------------------------------------------------------------------

$is_comdep = ($activity_type_1, $activity_type_2, $product_id) -> {
    return
        (
            nvl($activity_type_1, "!") in ("Медиа - Баннеры", "Технологии", "Справочники - Контексты")
            or
            nvl($activity_type_2, "!") in ('Яндекс.Взгляд', 'Special projects Market', 'Billboard')
            or
            nvl($product_id, 0) = 509935
        )
        and nvl($activity_type_2, "!") not in ('Taxi');
};

------------------------------------------------------------------------------------------------
-- Логика очистки доменов
------------------------------------------------------------------------------------------------

$gray_domains = (
    -- социальные сети и мессенджеры
    'vkontakte.ru', 'vk.cc', 'vk.com', 'vk.me', 'ok.ru', 'odnoklassniki.ru',
    't.me', 'youtube.ru',  'youtu.be', 'youtube.com', 'facebook.com', 'fb.com',
    'instagram.com', 'instagr.am', 'plus.google.com', 'twitter.com',
    'whatsapp.com', 'wa.me', 'sms-viber-whatsapp.com',
    -- доски объявлений
    'farpost.ru', 'irr.ru', 'avito.ru', 'olx.ua', 'hh.ru', 'kvalto.ru',
    -- популярные редиректы
    'bit.ly', 'goo.gl', 'clckto.ru', 't.co',
    -- вредоносные ссылки (показывают всплывающую рекламу в браузере, etc.)
    'apytrc.com', 'xg4ken.com',
    -- треккинговые ссылки, счётчики, редиректы систем аналитики
    'pixel.everesttech.net', 'everesttech.net', 'rtrk1.ru', 'trklp.ru',
    'trtkp.ru', 'tvks.ru', 'onelink.to', 'kochava.com', 'tlnk.io',
    'appsflyer.com', 'adjust.io', 'adjust.com', 'measurementapi.com', 'apps.fm',
    'redirect.appmetrika.yandex.com', 'appmetrica.yandex.com',
    'redirect.appmetrica.yandex.ru',  'appmetrica.yandex.ru',
    'admetrica.ru', 'fas.st', 'app.android',
    -- редиректы и счётчики рекламных систем
    'r.mail.ru', 'doubleclick.net', 'begun.ru', 'matchid.adfox.yandex.ru',
    'yabs.yandex.ru', 'an.yandex.ru', 'bs.yandex.ru', 'ba.yandex.ru',
    -- магазины приложений (?)
    'play.google.com', 'itunes.apple.com', 'geo.itunes.apple.com',
    'search.itunes.apple.com', 'store.apple.com', 'apple.com',
    -- партнёрские (CPA) сети
    'go2cloud.org', 'cscore.ru',
    -- хостинги, конструкторы сайтов и посадочных страниц, не создающие поддоменов
    'testlpgenerator.ru', 'sites.google.com', 'business.google.com',
    -- публичные сервисы для хранения файлов и документов
    'docs.google.com', 'drive.google.com', 'yadi.sk', 'disk.yandex.ru',
    -- маркетплейсы и аггрегаторы объявлений (международные клиенты)
    'booking.com', 'booking.ru', 'aliexpress.com', 'aliexpress.ru', 'taobao.com',
    'alibaba.com', 'alibaba.ru', 'tripadvisor.ru', 'tripadvisor.com',
    -- прочее
    'ya.ru', 'yandex.net', 'google.ci',
    -- apps
    'play.google.com', 'itunes.apple.com', 'geo.itunes.apple.com',
    'search.itunes.apple.com', 'store.apple.com', 'apple.com',
    'mrqz.me', 'taplink.cc', 'clck.ru', 'smart.link', 'sonline.su',
);


$domains_lvl3 = (
    -- "региональные" домены
    'msk.ru', 'spb.ru', 'nsk.ru', 'vl.ru', 'nov.ru', 'tom.ru', 'irk.ru', 'ur.ru',
    'perm.ru', 'tomsk.ru', 'nnov.ru', 'spb.su', 'of.by',
    'kiev.ua', 'od.ua', 'dp.ua', 'lviv.ua', 'dn.ua', 'biz.ua', 'in.ua',
    'com.tr', 'ru.net', 'ru.com', 'ком.рф', 'спб.рф',
    -- в поддоменах размещена дилерская сеть
    'cherry.ru', 'lada.ru',
    -- выделяем сервисы яндекса
    'yandex.ru', 'yandex.kz', 'yandex.uz', 'yandex.by', 'yandex.ua', 'yandex.com', 'yandex.com.tr',
    -- торговые площадки
    'deal.by', 'satu.kz', 'pulscen.ru', 'tiu.ru', 'business.site', 'apishops.ru', 'myinsales.ru',
    'domovoys.ru', 'gdekupitkvartiru.ru', 'gdekupitkvartiru-spb.ru', 'novostroef.ru', 'kvarty.ru',
    -- хостинги, конструкторы сайтов и посадочных страниц
    'blogspot.ru', 'blogspot.com', 'wordpress.com', 'narod.ru', 'narod2.ru',
    'jimdo.com', 'wixsite.com', 'wix.com', 'tilda.ws', 'bitrix24.site',
    'ucoz.ru', 'ucoz.net', 'ukit.me', 'umi.ru', 'nethouse.ru', 'mya5.ru', 'tb.ru',
    'ex-in.online', 'ex-in.ua', 'ex-in.ru', 'myflexbe.com', 'satom.ru', 'blxy.ru',
    'bayoner.ru', 'all.biz', 'bmbullet.ru', 'bullet.bz', 'lpmotortest.com',
    'plp7.ru', 'tobiz.net', 'justclick.ru', 'beget.tech', 'dnk.bz', 'usluga.me',
    'pp.ru', 'turbo.site', 'timepad.ru', 'storeland.ru', 'yclients.com', 'onelink.me',
);


$domain_to_level = ($domain, $target_level) -> {
    $domain_level = Url::GetDomainLevel($domain);
    $target_domain_level = MIN_OF($domain_level, $target_level);
    $target_domain_level = nvl(cast($target_domain_level as Uint8), 1);
    $target_domain = Url::GetDomain($domain, $target_domain_level);
    return if($domain is null, null, $target_domain);
};


$billing_domain = ($url) -> {
    $domain = Url::ForcePunycodeToHostName(Url::CutWWW2(Url::GetHost($url)));
    return case when nvl($domain, "") = "" then null else String::ToLower($domain) end;
};


$welcome_domain = ($url) -> {
    $domain = $billing_domain($url);
    $significant_domain = Url::GetSignificantDomain($domain);
    return
        case
            when nvl($significant_domain, "") = "" then null
            when nvl($domain, "!") in $gray_domains then null
            when nvl($significant_domain, "!") in $gray_domains then null
            when nvl($significant_domain, "!") in $domains_lvl3 then $domain_to_level($domain, 3)
            else $significant_domain
        end;
};

------------------------------------------------------------------------------------------------
-- Домены AWAPS
------------------------------------------------------------------------------------------------

$awaps_creatives =
    select
        service_order_id,
        welcome_domain,
        billing_domain,
        count(distinct a.creative_id) as cnt_creatives
    from (
            select awaps_placementid, welcome_domain, billing_domain, creative_id
            from `//home/awaps/cooked/creative`
            group compact by
                awaps_placementid,
                $welcome_domain(link_url) as welcome_domain,
                $billing_domain(link_url) as billing_domain,
                bs_creative_nmb as creative_id
        ) as a
        inner join (
            select distinct awaps_placementid, flight_nmb as service_order_id
            from `//home/awaps/cooked/flight`
        ) as b ON b.awaps_placementid = a.awaps_placementid
    where a.billing_domain is not null
    group compact by
        a.welcome_domain as welcome_domain,
        a.billing_domain as billing_domain,
        b.service_order_id as service_order_id;


$awaps_domains =
    select
        service_order_id,
        max_by(billing_domain, cnt_creatives) as billing_domain,
        max_by(welcome_domain, cnt_creatives) as welcome_domain
    from $awaps_creatives
    group compact by service_order_id;

------------------------------------------------------------------------------------------------
-- Домены Справочника
------------------------------------------------------------------------------------------------

$sprav_orders =
    select
        service_order_id,
        nvl(
            $welcome_domain(link_click_to_action),
            $welcome_domain(link_text_advert),
            $welcome_domain(link_promotion),
        ) as welcome_domain,
        nvl(
            $billing_domain(link_click_to_action),
            $billing_domain(link_text_advert),
            $billing_domain(link_promotion),
        ) as billing_domain,
    from (
        select
            service_order_id,
            max(case when type = "CLICK_TO_ACTION" then link else null end) as link_click_to_action,
            max(case when type = "TEXT_ADVERT" then link else null end) as link_text_advert,
            max(case when type = "PROMOTION" then link else null end) as link_promotion
        from (
            select
                campaign_id as service_order_id,
                Yson::ConvertToString(type) as type,
                Yson::ConvertToString(
                    Yson::ParseJson(
                        Yson::ConvertToString(body["value"])
                    )["link"]
                ) as link
            from `//home/geoadv/geoprod_backend/production/export/smvp_latest_moderated_advert`
        )
        where $billing_domain(link) is not null
        group compact by service_order_id
    );

$client2permalinks =
    select ClientID as client_id, permalink, count(*) as cnt
    from `//home/altay/y-direct/current-state/banners-to-permalink`
    group compact by ClientID, permalink;

$sprav_clients =
    select
        client_id,
        $welcome_domain(link_url) as welcome_domain,
        $billing_domain(link_url) as billing_domain
    from (
        select
            cp.client_id as client_id,
            max_by(c.value, cp.cnt) as link_url
        from `//home/altay/db/export/current-state/snapshot/company_to_url` as c
            inner join $client2permalinks as cp on cp.permalink = c.permalink
        where
            c.type = 'main'
            and $billing_domain(c.value) is not null
        group compact by cp.client_id
    );

------------------------------------------------------------------------------------------------
-- Выручка Директа
------------------------------------------------------------------------------------------------

$direct_table_list =
    select aggregate_list(Path) from (
        select Path
        FROM FOLDER("//home/comdep-analytics/zoldatoff/dashboards/domain_agency_report/monthly", "key")
        WHERE
            Type = "table"
            AND Yson::LookupString(Attributes, "key") between $start_01 and $yesterday
        union all
        select Path
        FROM FOLDER("//home/comdep-analytics/zoldatoff/dashboards/domain_agency_report/daily", "key")
        WHERE
            Type = "table"
            AND Yson::LookupString(Attributes, "key") between $start and $yesterday
    );


$direct_revenue =
    select
        client_id,
        agency_id,
        welcome_domain,
        billing_domain,
        dt,
        'Директ: Рекламные кампании' as service_name,
        currency,
        sum(cost) as cost_net
    from
        EACH($direct_table_list)
    where
        dt between $start and $yesterday
        and client_id > 0
    group compact by
        client_id,
        agency_id,
        welcome_domain,
        billing_domain,
        dt,
        currency;

$max_date = select max(dt) from $direct_revenue;

------------------------------------------------------------------------------------------------
-- Выручка неДиректа
------------------------------------------------------------------------------------------------

$nondirect_revenue_daily =
    select
        client_id,
        agency_id,
        dt,
        currency,
        service_name,
        billing_domain,
        welcome_domain,
        sum(m.completion_rub_sum_wo_nds) as cost_net
    from
        `//home/balance/prod/bo/mv_compl_30_days` as m
        left join `//home/balance/prod/bo/v_product` with inline as p on p.id = m.product_id
        left join `//home/balance/prod/bo/t_service` with inline as s on s.id = m.service_id
        left join $awaps_domains as aw on aw.service_order_id = m.service_order_id
        left join $sprav_orders as so on so.service_order_id = m.service_order_id
        left join $sprav_clients as sc on sc.client_id = m.client_id
    where
        m.start_dt between $start and $max_date
        and $is_comdep(p.activity_type1_name, p.activity_type2_name, m.product_id)
        and nvl(s.name, "!") not in ("Директ: Рекламные кампании", "Разовые продажи")
        and m.completion_rub_sum_wo_nds > 0
        and m.client_id > 0
    group compact by
        m.client_id as client_id,
        m.agency_id as agency_id,
        m.start_dt as dt,
        m.currency as currency,
        s.name as service_name,
        case
            when s.name = 'Справочник' then nvl(so.billing_domain, sc.billing_domain)
            else aw.billing_domain
        end as billing_domain,
        case
            when s.name = 'Справочник' then nvl(so.welcome_domain, sc.welcome_domain)
            else aw.welcome_domain
        end as welcome_domain;

$nondirect_revenue_act =
    select
        client_id,
        agency_id,
        dt,
        currency,
        service_name,
        billing_domain,
        welcome_domain,
        sum(m.amt_rur) as cost_net
    from
        range(`//home/balance/prod/bo/f_sales_daily`, $start_01, $yesterday) as m
        left join (select distinct client_id from $nondirect_revenue_daily) as nd on nd.client_id = m.order_client_id
        left join (select distinct client_id from $direct_revenue) as d on d.client_id = m.order_client_id
        left join $awaps_domains as awaps on awaps.service_order_id = m.service_order_id
        left join $sprav_orders as so on so.service_order_id = m.service_order_id
        left join $sprav_clients as sc on sc.client_id = m.order_client_id
    where
        $is_comdep(m.activity_type_1_name, m.activity_type_2_name, m.product_id)
        and (m.service_name = "Разовые продажи" or nvl(nd.client_id, d.client_id) is null)
        and m.amt_rur > 0
        and m.order_client_id > 0
        and m.dt between $start_01 and $max_date
    group compact by
        m.order_client_id as client_id,
        m.agency_id as agency_id,
        m.dt as dt,
        m.currency as currency,
        m.service_name as service_name,
        case
            when m.service_name = 'Справочник' then nvl(so.billing_domain, sc.billing_domain)
            else awaps.billing_domain
        end as billing_domain,
        case
            when m.service_name = 'Справочник' then nvl(so.welcome_domain, sc.welcome_domain)
            else awaps.welcome_domain
        end as welcome_domain;

------------------------------------------------------------------------------------------------
-- Total выручка
------------------------------------------------------------------------------------------------

$revenue =
    select
        client_id,
        agency_id,
        welcome_domain,
        billing_domain,
        dt,
        service_name,
        currency,
        cost_net
    from $nondirect_revenue_daily
    union all
    select
        client_id,
        agency_id,
        welcome_domain,
        billing_domain,
        dt,
        service_name,
        currency,
        cost_net
    from $nondirect_revenue_act
    union all
    select
        client_id,
        agency_id,
        welcome_domain,
        billing_domain,
        dt,
        service_name,
        currency,
        cost_net
    from $direct_revenue;

------------------------------------------------------------------------------------------------
-- Статистика по клиентам
------------------------------------------------------------------------------------------------

$client_main_domain =
    select
        client_id,
        max_by(welcome_domain, cost_net) as main_domain
    from (
        select
            client_id,
            welcome_domain,
            sum(cost_net) as cost_net
        from
            $revenue
        where
            nvl(service_name, "!") in ('Медиаселлинг', 'Директ: Рекламные кампании', 'Справочник')
        group compact by
            client_id,
            welcome_domain
    )
    group compact by
        client_id
    having
        max(cost_net) > 0.33 * sum(cost_net);
        
INSERT INTO $result WITH TRUNCATE
SELECT *
FROM $client_main_domain
WHERE main_domain IS NOT NULL
ORDER BY client_id
;
