use arnold;
pragma yt.Pool="robot-webmaster";
pragma yt.AutoMerge="disabled";

$src_root       = "//user_sessions/pub/similargroup/daily";
$dst_root       = "//home/webmaster/prod/analytics/engagement/gsc_vs_ywm/daily";
$dst_total_root = "//home/webmaster/prod/analytics/engagement/total/similargroup/daily";

$OFFSET         = 0;
$PERIOD         = 14;

$IsGSCVisit = ($url) -> {
    $host = Url::CutWWW(String::ToLower(Url::GetHost($url)));
    $path = Url::GetPath($url);
    return $host == "search.google.com"
        or ($host == "google.com" and $path like "/webmasters%");
};

$IsYWMVisit = ($url) -> {
    $host = String::ToLower(Url::GetHost($url));
    return $host in (
        "webmaster.yandex.ru",
        "webmaster.yandex.com",
        "webmaster.yandex.com.tr",
        "webmaster.yandex.ua",
        "webmaster.yandex.by",
        "webmaster.yandex.kz",
        "webmaster.yandex.uz",
    );
};

define action $parse($day) as 
    $src_path = $src_root || "/" || $day;
    $dst_path = $dst_root || "/" || $day;
    $dst_total_path = $dst_total_root || "/" || $day;

    insert into $dst_path
        with truncate
    select 
        $day as Period,
        TableName() as TableSource,
        subkey_unixtime as `Timestamp`,
        UserAgent::Parse(TSKV_DATA.u_agent).isMobile as IsMobile,
        TSKV_DATA.user_region as RegionId,
        Url::GetSchemeHost(TSKV_DATA.stuff_dict["url"]) as Host,
        Url::GetPath(TSKV_DATA.stuff_dict["url"]) as Path,
        key_uid as UID,
        $IsGSCVisit(TSKV_DATA.stuff_dict["url"]) as GoogleSC,
        $IsYWMVisit(TSKV_DATA.stuff_dict["url"]) as YandexWM,
    from concat(
        $src_path || "/clean",
        --$src_path || "/errors",
        $src_path || "/frauds",
        --$src_path || "/outstaff",
    )
    where $IsGSCVisit(TSKV_DATA.stuff_dict["url"])
        or $IsYWMVisit(TSKV_DATA.stuff_dict["url"])
    order by UID;



    insert into $dst_total_path
        with truncate
    select 
        TableSource,
        count(distinct key_uid) as Visitors,
        count(key_uid) as Visits,
        $day as Period,
    from concat(
        $src_path || "/clean",
        --$src_path || "/errors",
        $src_path || "/frauds",
        --$src_path || "/outstaff",
    )
    group by TableName() as TableSource
    order by Period, TableSource;

end define;

$src_days = (
    select aggregate_list(Period)
    from (
        select TableName(Path, "yt") as Period
        from folder($src_root)
        where Type == "map_node"
        order by Period desc
        limit $PERIOD offset $OFFSET
    )
);

$dst_days = (
    select aggregate_list(Period)
    from (
        select TableName(Path, "yt") as Period
        from folder($dst_root)
        where Type == "table"
        order by Period desc
        limit $PERIOD offset $OFFSET
    )
);

evaluate for $day in $src_days do
    begin
        evaluate if not ListHas($dst_days, $day) do
            $parse($day)
    end do
else 
    do empty_action();
