USE hahn;
pragma yt.Pool = "@[pool]";
PRAGMA SimpleColumns;
PRAGMA yt.InferSchema;
PRAGMA AnsiInForEmptyOrNullableItemsCollections;
pragma yson.DisableStrict;


$date = '@[date]';
$output_table = "//home/videoquality/vh_analytics/targeting/" || $date;
$output_table_campaigns = "//home/videoquality/vh_analytics/targeting_campaigns/" || $date;

$is_pos = ($x) -> {
    RETURN CASE WHEN CAST($x as Int32) >= 0 THEN True ELSE False END
};
$is_neg = ($x) -> {
    RETURN CASE WHEN CAST($x as Int32) >= 0 THEN False ELSE True END
};

$hm_logs = 'home/direct/db-archive/'|| $date || '/hierarchical_multipliers';
$cm_logs = 'home/direct/db-archive/'|| $date || '/campaigns';
$ph_logs = 'home/direct/db-archive/'|| $date || '/phrases';
$bn_logs = 'home/direct/db-archive/'|| $date || '/banners';
$br_logs = 'home/direct/db-archive/'|| $date || '/bids_retargeting';
$rg_logs = 'home/direct/db-archive/'|| $date || '/retargeting_goals';
$cr_logs = 'home/direct/db-archive/'|| $date || '/crypta_goals';

--------------------------all orderid--------------------------
$active_orderid = (
    select
        groups.cid as cid, --идентификатор кампании
        groups.pid as pid, -- id группы объявлений
        camps.OrderID as orderid, --БК-шный идентификатор заказа
        banners.banner_types as banner_types,
        groups.adgroup_type as adgroup_type
    from
        (select
            cid,
            pid,
            adgroup_type
        from $ph_logs
        ) as groups
        INNER JOIN
        (select
            cid,
            pid,
            AGGREGATE_LIST_DISTINCT(banner_type) as banner_types
        from $bn_logs
        where BannerID > 0 -- (так мы отфильтруем черновики баннеров)
        group by cid,
                 pid
        ) as banners
        ON groups.cid = banners.cid
        and groups.pid = banners.pid
        INNER JOIN
        (select
            *
        from $cm_logs
        where OrderID > 0 --, может немного ускорит запрос
        ) as camps
        ON groups.cid = camps.cid
    where nvl(groups.adgroup_type, '0') in ("cpm_banner",  -- охватный продукт, оплата за показы, баннер в сети
                                            "cpm_video", --охватный продукт, группа с видео-баннерами
                                            "cpm_deals", -- охватный продукт, показ по частным сделкам
                                            "cpm_yndx_frontpage", -- охватный продукт на главной
                                            "cpm_price")  -- прайсовые продаж
    );

--------------------------select interests--------------------------
$crypta_interests =
(select
        cid,
        pid,
        goal_id,
        crypta_goal_type,
        some(target) as target
    from
        (select
            cid,
            pid,
            ret_cond_id
        from $br_logs
        ) as bids
        INNER JOIN
        (select
            ret_cond_id,
            goal_id
        from $rg_logs
        ) as g
        on bids.ret_cond_id = g.ret_cond_id
        INNER JOIN
        (select
            goal_id,
            name,
            crypta_goal_type,
            IF(crypta_goal_type = "social_demo", AsDict(AsTuple(String::SplitToList(tanker_name_key, '_')[1], name))) as target
        from $cr_logs
       ) as c
        ON g.goal_id = c.goal_id
    group by bids.cid as cid, bids.pid as pid, c.goal_id as goal_id, c.crypta_goal_type as crypta_goal_type
    );

$crypta =
(select
        cid,
        pid,
        AGGREGATE_LIST_DISTINCT(target) as targets
    from
        $crypta_interests
    where crypta_goal_type = "social_demo"
    group by cid, pid
);

--------------------------select all types of hierarchical multipliers--------------------------
$hierarchical_multipliers = (
    SELECT
        h.cid as cid,
        pid,
        `values`,
        type,
        multiplier_pct
    FROM $hm_logs AS h INNER JOIN (SELECT distinct cid FROM $active_orderid) AS a ON h.cid=a.cid
    WHERE is_enabled=1
);

--------------------------select antidemography--------------------------
$demography_multiplier = (
    SELECT
        cid,
        pid,
        AGGREGATE_LIST_DISTINCT({'age': age, 'gender':gender}) as demography_multiplier
    FROM (
        SELECT
            cid,
            pid,
            a.age as age,
            a.gender as gender,
            a.multiplier_pct as multiplier_pct
        FROM
            (
            SELECT
                cid,
                pid,
                ListMap(Yson::ConvertToList(`values`, Yson::Options(false as Strict, True as AutoConvert)), ($x) -> {
                return (
                    Yson::LookupString($x, 'age', Yson::Options(false as Strict, True as AutoConvert)) AS age ,
                    Yson::LookupString($x, 'gender', Yson::Options(false as Strict, True as AutoConvert)) AS gender,
                    Yson::LookupInt64($x, 'multiplier_pct', Yson::Options(false as Strict, True as AutoConvert)) AS  multiplier_pct)
                }) as a
            FROM $hierarchical_multipliers
            WHERE type = 'demography_multiplier'
            )
        FLATTEN BY a
        )
    WHERE multiplier_pct=0
    GROUP BY cid, pid
);
--------------------------final table--------------------------

$interests = (
select
cid,
pid,
ListSort(AGGREGATE_LIST(goal_id)) as goal_ids,
ListSort(AGGREGATE_LIST(name)) as interests
from
    (select
        a.cid as cid,
        a.pid as pid,
        a.goal_id as goal_id,
        a.crypta_goal_type,
        name
    from
        $crypta_interests as a
    INNER JOIN
        $cr_logs as b ON a.goal_id=b.goal_id
    WHERE
        a.crypta_goal_type IN ("interests","behaviors","family","internal"))
    GROUP BY cid, pid
    );

$use_aud_metr = (
    select
        pid,
        max(CASE WHEN CAST(Yson::SerializePretty(Yson::Parse(condition_json)) as String) like '%audience%' THEN 1 ELSE 0 END) as use_auditory,
        max(CASE WHEN CAST(Yson::SerializePretty(Yson::Parse(condition_json)) as String) like '%goal_type" = "goal%' THEN 1 ELSE 0 END) as use_metrika
    from
        $br_logs
    GROUP BY
        pid);

$final_table = (
    SELECT
        DISTINCT
        cid, --идентификатор кампании
        pid, -- id группы объявлений
        orderid, --БК-шный идентификатор заказа
        adgroup_type,
        CASE WHEN pid IN (select pid from $use_aud_metr WHERE use_metrika=1) THEN 1 ELSE 0 END as use_metrika,
        CASE WHEN pid IN (select pid from $use_aud_metr WHERE use_auditory=1) THEN 1 ELSE 0 END as use_auditory,
        demography_pos,
        NVL(demography_multiplier_pid, demography_multiplier_cid) AS demography_neg
    FROM
        (
            SELECT
                ao.cid as cid, --идентификатор кампании
                ao.pid as pid, -- id группы объявлений
                orderid, --БК-шный идентификатор заказа
                adgroup_type,
                dem.demography_multiplier AS demography_multiplier_cid,
                dem2.demography_multiplier AS demography_multiplier_pid,
                c.targets AS demography_pos
            FROM
                $active_orderid AS ao
                LEFT JOIN
                    (select
                        *
                    from $demography_multiplier
                    where pid is NULL
                    ) AS dem
                    ON ao.cid=dem.cid
                LEFT JOIN
                    (select
                        *
                    from $demography_multiplier
                    where pid is not NULL
                    ) AS dem2 ON ao.cid=dem2.cid
                    AND ao.pid=dem2.pid
                LEFT JOIN $crypta AS c ON ao.cid=c.cid AND ao.pid=c.pid
        ));

$script_neg = @@
def get_antitargets(tars):
    combinations = []
    if len(tars) > 0:
        for tar in tars:
            if str(tar['age']) == "b'0-17'":
                if str(tar['gender']) == "b''":
                    combinations.append('male' + "_00_18")
                    combinations.append('female' + "_00_18")
                else:
                    combinations.append(str(tar['gender'])[2:-1]+ "_00_18")
            elif str(tar['age']) == "b'45-'":
                if str(tar['gender']) == "b''":
                    combinations.append("male" + "_55_99")
                    combinations.append("male" + "_45_54")
                    combinations.append('female' + "_55_99")
                    combinations.append('female' + "_45_54")
                else:
                    combinations.append(str(tar['gender'])[2:-1] + "_55_99")
                    combinations.append(str(tar['gender'])[2:-1] + "_45_54")

            elif str(tar['age']) == "b'55-'":
                if str(tar['gender']) == "b''":
                    combinations.append("male" + "_55_99")
                    combinations.append('female' + "_55_99")
                else:
                    combinations.append(str(tar['gender'])[2:-1] + "_55_99")

            elif str(tar['age']) == "b'_18_24'":
                if str(tar['gender']) == "b''":
                    combinations.append("male" + "_18_24")
                    combinations.append('female' + "_18_24")
                else:
                    combinations.append(str(tar['gender'])[2:-1] + "_18_24")
            elif str(tar['age']) == "b'_25_34'":
                if str(tar['gender']) == "b''":
                    combinations.append("male" + "_25_34")
                    combinations.append('female' + "_25_34")
                else:
                    combinations.append(str(tar['gender'])[2:-1] + "_25_34")

            elif str(tar['age']) == "b'_35_44'":
                if str(tar['gender']) == "b''":
                    combinations.append("male" + "_35_44")
                    combinations.append('female' + "_35_44")
                else:
                    combinations.append(str(tar['gender'])[2:-1] + "_35_44")

            elif str(tar['age']) == "b'_45_54'":
                if str(tar['gender']) == "b''":
                    combinations.append("male" + "_45_54")
                    combinations.append('female' + "_45_54")
                else:
                    combinations.append(str(tar['gender'])[2:-1] + "_45_54")

            elif str(tar['age']) == "b''":
                if str(tar['gender']) == "b''":
                    combinations.append('male' + "_00_18")
                    combinations.append('male' + "_18_24")
                    combinations.append('male' + "_25_34")
                    combinations.append('male' + "_35_44")
                    combinations.append('male' + "_45_54")
                    combinations.append('male' + "_55_99")
                    combinations.append('female' + "_00_18")
                    combinations.append('female' + "_18_24")
                    combinations.append('female' + "_25_34")
                    combinations.append('female' + "_35_44")
                    combinations.append('female' + "_45_54")
                    combinations.append('female' + "_55_99")
                else:
                    combinations.append(str(tar['gender'])[2:-1] + "_00_18")
                    combinations.append(str(tar['gender'])[2:-1] + "_18_24")
                    combinations.append(str(tar['gender'])[2:-1] + "_25_34")
                    combinations.append(str(tar['gender'])[2:-1] + "_35_44")
                    combinations.append(str(tar['gender'])[2:-1] + "_45_54")
                    combinations.append(str(tar['gender'])[2:-1] + "_55_99")

            else:
                combinations.append(str(tar['gender'])[2:-1]+ str(tar['age'])[2:-1])
    return combinations
@@;

$get_antitargets = Python3::get_antitargets(Callable<(List<Dict<String, String?>>) -> List<String>>, $script_neg);

$script = @@
def get_targets(tars):
    genders = []
    ages = []
    combinations = []
    for tar in tars:
        for d in tar.keys():
            if b'gender' == d:
                if str(tar['gender']) == "b'\\xd0\\x96\\xd0\\xb5\\xd0\\xbd\\xd1\\x89\\xd0\\xb8\\xd0\\xbd\\xd1\\x8b'":
                    genders.append('female')
                elif str(tar['gender']) == "b'\\xd0\\x9c\\xd1\\x83\\xd0\\xb6\\xd1\\x87\\xd0\\xb8\\xd0\\xbd\\xd1\\x8b'":
                    genders.append('male')
            elif b'age' == d:
                if str(tar[b'age'])[2:-1] == "<18":
                    ages.append('_00_18')
                elif str(tar[b'age'])[2:-1] == "18-24":
                    ages.append('_18_24')
                elif str(tar[b'age'])[2:-1] == "25-34":
                    ages.append('_25_34')
                elif str(tar[b'age'])[2:-1] == "35-44":
                    ages.append('_35_44')
                elif str(tar[b'age'])[2:-1] == "45-54":
                    ages.append('_45_54')
                elif str(tar[b'age'])[2:-1] == "45+":
                    ages.append('_45_54')
                    ages.append('_55_99')
                elif str(tar[b'age'])[2:-1] == "55+":
                    ages.append('_55_99')
                elif len(ages) == 0:
                    ages.append('_nonage')

    if len(genders) == 0:
        genders = ['male', 'female']
    if len(ages) == 0:
        ages = ["_00_18", "_18_24", "_25_34", "_35_44", "_45_54", "_55_99"]
    for gender in set(genders):
        for age in set(ages):
            combinations.append(gender + age)
    return combinations
@@;

$get_targets = Python3::get_targets(Callable<(List<Dict<String?, String?>>) -> List<String>>, $script);

$script_income = @@
def get_targets_income(tars):
    income = []
    for tar in tars:
        for d in tar.keys():
            if b'income' == d:
                if str(tar['income']) == "b'\\xd0\\x9f\\xd1\\x80\\xd0\\xb5\\xd0\\xbc\\xd0\\xb8\\xd1\\x83\\xd0\\xbc'":
                    income.append('income_high_plus')
                elif str(tar['income']) == "b'\\xd0\\x92\\xd1\\x8b\\xd1\\x81\\xd0\\xbe\\xd0\\xba\\xd0\\xb8\\xd0\\xb9'":
                    income.append('income_high')
                elif str(tar['income']) == "b'\\xd0\\x92\\xd1\\x8b\\xd1\\x88\\xd0\\xb5 \\xd1\\x81\\xd1\\x80\\xd0\\xb5\\xd0\\xb4\\xd0\\xbd\\xd0\\xb5\xd0\\xb3\\xd0\\xbe'":
                    income.append('income_medium_plus')
                elif str(tar['income']) == "b'\\xd0\\xa1\\xd1\\x80\\xd0\\xb5\\xd0\\xb4\\xd0\\xbd\\xd0\\xb8\\xd0\\xb9'":
                    income.append('income_medium')
                elif str(tar['income']) == "b'\\xd0\\x9d\\xd0\\xb8\\xd0\\xb7\\xd0\\xba\\xd0\\xb8\\xd0\\xb9'":
                    income.append('income_low')
                else:
                    pass
    if len(income) == 0:
        income.append('income_high_plus')
        income.append('income_high')
        income.append('income_medium_plus')
        income.append('income_medium')
        income.append('income_low')
    else:
        income.append('total')

    return income
@@;

$get_targets_income = Python3::get_targets_income(Callable<(List<Dict<String?, String?>>) -> List<String>>, $script_income);


$i = AggregationFactory("AGGREGATE_LIST_DISTINCT");
$j = AggregateFlatten($i);


$data = (
select
    cid,
    orderid,
    pid,
    use_auditory,
    use_metrika,
    adgroup_type,
    some(demography_pos) as demography_pos,
    some(demography_neg) as demography_neg,
    AggregateBy(socdem_list, $j) as socdem_list,
    AggregateBy(income_list, $j) as income_list
from
    (select
        cid,
        orderid,
        pid,
        adgroup_type,
        use_auditory,
        use_metrika,
        DictKeys(ToSet($get_targets_income(nvl(demography_pos, [])))) as income_list,
        demography_pos,
        demography_neg,
        DictKeys(SetDifference(ToSet($get_targets(nvl(demography_pos, []))),ToSet($get_antitargets(nvl(demography_neg, []))))) as socdem_list,
    from $final_table
    )
    GROUP BY
    cid,
    orderid,
    pid,
    adgroup_type,
    use_auditory,
    use_metrika
    );


$targeting_pids_ = (
select
ListSort(ListNotNull(ListUniq(ListMap(socdem_list, ($x)-> {RETURN String::SplitToList($x,'_')[0];})))) as gender,
ListSort(ListNotNull(ListUniq(ListMap(socdem_list, ($x)-> {RETURN String::SplitToList($x,'_')[1]||'_'||String::SplitToList($x,'_')[2];})))) as age,
ListSort(income_list) as income,
ListSort(interests) as interests,
use_auditory,
adgroup_type,
use_metrika,
demography_pos,
demography_neg,
pid,
orderid,
cid
from
    (select
    ListNotNull(ListMap(socdem_list,($x) -> {RETURN CASE WHEN $x like '%total%' or $x like '%non%' THEN null ELSE $x END;})) as socdem_list,
    ListNotNull(ListMap(income_list,($x) -> {RETURN CASE WHEN $x like '%total%' or $x like '%non%' THEN null ELSE $x END;})) as income_list,
    demography_pos,
    demography_neg,
    interests,
    adgroup_type,
    use_auditory,
    use_metrika,
    a.pid as pid,
    orderid,
    a.cid as cid
    FROM $data as a
    LEFT JOIN $interests as b ON a.pid = b.pid
    )
);

-- durations of creatives

$creatives_dict = (
    select
        creative_id,
        some(
            if(
                ConstructorData like '%duration%', 
                Yson::ConvertToDouble(
                    Yson::YPath(
                        Yson::ParseJson(ConstructorData),
                        "/duration"
                    )
                )
            )
        ) as duration
    from
        `home/yabs/dict/DSPCreative`
    group by 
        cast(CreativeID as Int64) as creative_id
);

$banners = (
    select
        cid,
        pid,
        b.creative_id as creative_id,
        duration
    from `home/direct/db/banners` as b
    inner join any (select * from $creatives_dict where duration is not null) as c using (creative_id)
);

$banners_grouped = (
    select
        pid,
        aggregate_list_distinct(duration) as durations
    from $banners
    group by pid
);

-- content categories

$categories_ids = (
    select * from
    (
        SELECT
            pid, Yson::ConvertToInt64List(Yson::ParseJson(value)) as content_category_id
        FROM LIKE(`home/direct/mysql-sync/current`, `ppc:%`, `straight/adgroup_additional_targetings`)
        where targeting_type = "content_categories"
    )
    flatten list by content_category_id
);

$categories_add_names = (
    select
        c.*, cr.name as content_category_name
    from $categories_ids as c
    left join any (
        select * from $cr_logs where crypta_goal_type = "content_category"
    ) as cr on (c.content_category_id = cr.goal_id)
);

$categories_grouped = (
    select
        pid,
        aggregate_list_distinct(content_category_id) as content_category_ids,
        aggregate_list_distinct(content_category_name) as content_category_names
    from $categories_add_names
    group by pid
);

-- non-skippable video

$non_skippable_pids = (
    select pid from
    LIKE(`//home/direct/mysql-sync/current`, `ppc:%`, `straight/adgroups_cpm_video`)
    where is_non_skippable = 1
);

$targeting_pids = (
    select t.*,
        durations,
        content_category_ids,
        content_category_names,
        ns.pid is not null as is_non_skippable
    from $targeting_pids_ as t
    left join any $banners_grouped as bg on (t.pid = bg.pid)
    left join any $categories_grouped as cat on (t.pid = cat.pid)
    left join any $non_skippable_pids as ns on (t.pid = ns.pid)
);


INSERT INTO $output_table WITH TRUNCATE
select * from $targeting_pids;

-- campaigns with brandsafety

$parse = ($s) -> {
    $parsed = Yson::ConvertToList(Yson::YPath(Yson::ConvertToList(Yson::ParseJson($s))[0], "/goals"));
    $bs = ListFilter($parsed, ($x)->(Yson::LookupString($x, "goal_type") == "brandsafety"));
    return ListMap($bs, ($x)->(Yson::LookupInt64($x, "goal_id")))
};

$bs_retargeting = (
    select *
    from (
        select
            $parse(condition_json) as goal_id,
            ClientID,
            ret_cond_id
        from LIKE(`//home/direct/mysql-sync/current`, `ppc:%`,`straight/retargeting_conditions`)
        WHERE retargeting_conditions_type = "brandsafety"
    )
    flatten list by goal_id
);

$bs_ret_campaigns = (
    select
        cid,
        brandsafety_ret_cond_id
    from LIKE(`//home/direct/mysql-sync/current`, `ppc:%`, `straight/campaigns`)
    where brandsafety_ret_cond_id is not null
);

$bs_goals = (
    select goal_id, name as bs_category_name
    from $cr_logs
    where crypta_goal_type = 'brandsafety'
);

$add_campaigns = (
    select
        b.*,
        cid,
    from $bs_ret_campaigns as c
    left join $bs_retargeting as b on (b.ret_cond_id = c.brandsafety_ret_cond_id)
);

$add_goals = (
    select
        c.*, bs_category_name
    from $add_campaigns as c
    left join any $bs_goals as g using (goal_id)
);

$bs_grouped = (
    select
        cid,
        some(ClientID) as client_id,
        aggregate_list_distinct(goal_id) as bs_categories_ids,
        aggregate_list_distinct(bs_category_name) as bs_categories_names
    from $add_goals
    group by cid
);

-- strategy, rf meta

$camps_strategies_ = (
    select
        cid,
        ClientID,
        strategy_name as strategy,
        rf,
        rfReset
    from $cm_logs
);

$camps_strategies = (
    select * from $camps_strategies_ as c
    left semi join $targeting_pids as t using (cid)
);

$pageids_filter = (
    select cid, Yson::ConvertToInt64List(Yson::ParseJson(allowed_page_ids)) as allowed_page_ids
    from LIKE(`//home/direct/mysql-sync/current`, `ppc:%`, `straight/camp_options`)
    where allowed_page_ids is not null
);

$output_campaigns = (
    select cm.*, bs_categories_ids, bs_categories_names, bl.cid is not null as has_brandlift,
        allowed_page_ids
    from $camps_strategies as cm
    left join any $bs_grouped as g on (g.cid = cm.cid)
    left join any `//home/pythia/export/prod/direct/brandlift_stats` as bl on (cm.cid = bl.cid)
    left join any $pageids_filter as pi on (cm.cid = pi.cid)
);

insert into $output_table_campaigns with truncate
select * from $output_campaigns;
