#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import argparse
import datetime
import json
from yql.api.v1.client import YqlClient
from videolog_common import YqlRunner, apply_replacements, get_date

TITLE = "[MMA-5340] Social SBR Basket Renewal | YQL"

query_stub = """
pragma yson.DisableStrict;
pragma AnsiInForEmptyOrNullableItemsCollections;
pragma yt.Pool = "@[pool]";

$output_table = "@[output_table]";

$dateFilter = ($lst)->(ListLength(ListFilter($lst, ($x)->($x >= "@[month_ago]" and $x <= "@[yesterday]"))) > 0);

$socialFilter = ($url)->(
    ($url like '%instagram.com/%'
    or $url like '%tiktok%'
    or $url like '%twitter%'
    or $url like '%facebook%'
    or $url like '%ok.ru/%'
    or $url like '%vk.com/%') and $url not like '%yandex%'
);

$stripPrefix = ($s, $prefix) -> (IF(
    $s like ($prefix || "%"),
    substring($s, unwrap(length($prefix))),
    $s
));

$hostFilter = ($url)->{
    $host = Url::GetHost($url);
    $host = $stripPrefix($host, "m.");
    $host = $stripPrefix($host, "www.");
    return $host in (
        "ok.ru",
        "vk.com",
        "instagram.com",
        "twitter.com",
        "facebook.com",
        "tiktok.com"
    )
};

define subquery $filter($table) as
select
    *
from $table
where $dateFilter(DictKeys(Yson::ConvertToDict(data))) and $socialFilter(frame_url) and $hostFilter(frame_url);
end define;

$raw_data_mobile = select * from $filter("//home/videolog/selrank_stats/additive_mobile_both_fields_wo_threshold_schematized");

$countTVT = ($data)->{
    $data = Yson::ConvertToDict($data);
    RETURN ListSum(
        ListMap(
            DictPayloads($data),
            ($x)->(Yson::LookupDouble($x, "tvt"))
        )
    ) ?? 0
};

$countUsers = ($data)->{
    $data = Yson::ConvertToDict($data);
    RETURN ListSum(
        ListMap(
            DictPayloads($data),
            ($x)->(Yson::LookupUint64($x, "users"))
        )
    ) ?? 0
};

$countShows = ($data)->{
    $data = Yson::ConvertToDict($data);
    RETURN ListSum(
        ListMap(
            DictPayloads($data),
            ($x)->(Yson::LookupUint64($x, "shows"))
        )
    ) ?? 0
};

$getCat2n = ($x)->(Math::Floor(Math::Log2($x)));

$replacePrefix = ($bad, $good) -> (($str) -> {
    $prefix = $bad || "%";
    RETURN IF(
        $str LIKE $prefix,
        $good || SUBSTRING($str, LENGTH($bad)),
        $str
    )
});

$mwww = $replacePrefix("m.", "www.");


define subquery $filter_social() as
select
    t.*, $countTVT(data) as tvt, $countUsers(data) as users, $countShows(data) as shows, $getCat2n($countUsers(data)) as cat2n,
    case
    when frame_url like '%instagram.com/p/%' then 'instagram'
    when frame_url like '%tiktok%/video/%' then 'tiktok'
    when frame_url like '%twitter%/status/%' then 'twitter'
    when frame_url like '%facebook%/videos/%' then 'facebook'
    when frame_url like '%ok.ru/video/%' then 'ok'
    when frame_url like '%vk.com/video%' then 'vk'
    else null
    end as site,
    "https://" || VideoCanonLib::CanonizeUrl($mwww(frame_url)) as frame_url_canon3_scheme
from $raw_data_mobile as t
where $socialFilter(frame_url) --and $countTVT(data) / cast($countUsers(data) as double) >= 30;
end define;

insert into $output_table WITH TRUNCATE
SELECT * from $filter_social()
"""

config_stub = """
{
    "entries": [
        {
            "source_table": "@[tmp_table]",
            "name": "instagram",
            "filter": ["lambda x: x == 'instagram'", "site"],
            "count_column_name": "shows",
            "target_number": 6500
        },
        {
            "source_table": "@[tmp_table]",
            "name": "tiktok",
            "filter": ["lambda x: x == 'tiktok'", "site"],
            "count_column_name": "shows",
            "target_number": 6500
        },
        {
            "source_table": "@[tmp_table]",
            "name": "twitter",
            "filter": ["lambda x: x == 'twitter'", "site"],
            "count_column_name": "shows",
            "target_number": 6500
        },
        {
            "source_table": "@[tmp_table]",
            "name": "facebook",
            "filter": ["lambda x: x == 'facebook'", "site"],
            "count_column_name": "shows",
            "target_number": 6500
        },
        {
            "source_table": "@[tmp_table]",
            "name": "vk",
            "filter": ["lambda x: x == 'vk'", "site"],
            "count_column_name": "shows",
            "target_number": 6500
        },
        {
            "source_table": "@[tmp_table]",
            "name": "ok",
            "filter": ["lambda x: x == 'ok'", "site"],
            "count_column_name": "shows",
            "target_number": 6500
        }
    ],
    "pool_table": "@[pool_table]",
    "layer_paths": [
        "//porto_layers/base/bionic/porto_layer_search_ubuntu_bionic_app_lastest.tar.gz",
        "//home/videolog/SBR/yaqlib-bionic-python3"
    ]
}
"""


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--date")
    parser.add_argument("--pool", default="robot-mma-nirvana")
    parser.add_argument("--tmp_table", default="//home/videolog/SBR/social/tmp")
    parser.add_argument("--basket_table", default="//home/videolog/SBR/social/basket")
    parser.add_argument("--output_json", default="output.json")
    args = parser.parse_args()

    if not args.date:
        yesterday = datetime.date.today() - datetime.timedelta(days=1)
    else:
        yesterday = get_date(args.date)
    month_ago = yesterday - datetime.timedelta(days=30)

    yc = YqlClient(db=os.environ["YT_PROXY"].lower())
    yr = YqlRunner(yc, title=TITLE)

    query = apply_replacements(
        query_stub,
        {
            "@[pool]": args.pool,
            "@[output_table]": args.tmp_table,
            "@[yesterday]": str(yesterday),
            "@[month_ago]": str(month_ago),
        },
    )
    yr.run(query)

    config = apply_replacements(
        config_stub,
        {"@[tmp_table]": args.tmp_table, "@[pool_table]": args.basket_table},
    )

    with open(args.output_json, "w") as f:
        f.write(config)


if __name__ == "__main__":
    main()
