use arnold;
pragma yson.DisableStrict;
pragma yt.AutoMerge="disabled";
pragma yt.Pool="robot-webmaster";
pragma yt.MaxRowWeight="128M";

$capture_host = Re2::Capture('\\b(https?://[\\w-\\./]+)\\b');

$get_verification_url = ($fail_info) -> {
    return if(
        Yson::Contains($fail_info, "url"),
        Yson::ConvertToString($fail_info.url),
        "NONE"
    );
};

$host_id_to_host = ($host_id) -> {
    $parts = String::SplitToList($host_id, ":");
    return if (not Unicode::IsUtf($parts[1]),
        null,
        if (
            $parts[2] in ("80", "443"),
            $parts[0] || "://" || Url::HostNameToPunycode($parts[1]),
            $parts[0] || "://" || Url::HostNameToPunycode($parts[1]) || ":" || $parts[2]
        )
    );
};

$verifications = (
    select 
        $host_id_to_host(host_id) as host_address,
        host_id,
        ListTake(
            ListSort(
                aggregate_list(
                    AsStruct(
                        type as type,
                        status as status,
                        fail_info as fail_info,
                        `timestamp` as `timestamp`,
                        timestamp_str as timestamp_str,
                        verification_url as verification_url,
                    ),
                ),
                ($x) -> { return -$x.`timestamp`; }
            ),
            100
        ) as verifications
    from (
        select 
            host_id,
            `timestamp`,
            some(type) as type,
            some(status) as status,
            some(fail_info) as fail_info,
            cast(DateTime::FromMilliseconds(cast(`timestamp` as Uint64)) as String) as timestamp_str,
            $get_verification_url(some(fail_info)) as verification_url,
        from `home/webmaster/prod/export/archive/user-host-verifications`
        where status = "VERIFICATION_FAILED"
        group by
            host_id,
            `timestamp`
        --limit 10
    )
    group by
        host_id

);

$worker_logs = (
    select
        host_address,
        ListSort(
            aggregate_list(
                AsStruct(
                    iso_eventtime as iso_eventtime,
                    message as message
                )
            ),
            ($x) -> { return $x.iso_eventtime; }
        ) as logs
    from (
        select 
            Url::GetSchemeHost($capture_host(message)._0) as host_address,
            iso_eventtime,
            message,
        from range(`home/webmaster/prod/analytics/logs/deploy/backend/webmaster3-worker`)
        where message like '%Zora status %' 
        --limit 10
    )
    group by host_address
);

insert into `//home/webmaster/prod/analytics/support/verification_fails`
    with truncate
select 
    v.host_address as host_address,
    v.host_id as host_id,
    Yson::Serialize(Yson::From(v.verifications)) as verifications,
    Yson::Serialize(Yson::From(l.logs)) as logs
from $verifications as v
left join $worker_logs as l
    on v.host_address == l.host_address
order by host_address

/*
select 
    iso_eventtime as `Время проверки`,
    fail.host_address as `Адрес сайта`,
    status as `Статус проверки`,
    type as `Тип валидации`,
    fail_info as `Подробности о проверке`,
    message as `Сообщение об ошибке`,
    `timestamp`
from $verifications as fail 
join $worker_logs as data 
    on (fail.host_address == data.host_address)
*/
