use arnold;

$input_folder   = "//home/webmaster/prod/analytics/logs/deploy/frontend/webmaster-front-production";
$output_tail    = "//home/webmaster/prod/analytics/logs/webmaster-visits.tail";
$output         = "//home/webmaster/prod/analytics/logs/webmaster-visits";
$PERIOD         = 7;

$capture_host_id = Re2::Capture(".*hostId=(.+?:.+?:\\d+).*");
$capture_user_id = Re2::Capture(".*userId=(\\d+).*");
$capture_path = Re2::Capture(".*webmaster3-viewer.search.yandex.net:\\d+(.+?)\.json.*");

$host_id_to_host = ($host_id) -> {
    $parts = String::SplitToList($host_id, ":");
    return if (not Unicode::IsUtf($parts[1]),
        null,
        if (
            $parts[2] in ("80", "443"),
            $parts[0] || "://" || Url::HostNameToPunycode($parts[1]),
            $parts[0] || "://" || Url::HostNameToPunycode($parts[1]) || ":" || $parts[2]
        )
    );
};

$paths = (
    select aggregate_list(Period)
    from (
        select Path as Period
        from folder($input_folder)
        where Type == "table"
        order by Period desc
        limit $PERIOD
    )
);

insert into $output_tail
    with truncate
select
    Host,
    UserId,
    Route,
    Period,
from (
    select
        Host,
        UserId,
        Route,
        Period
    from each($paths)
    where message like "%webmaster3-viewer.search.yandex.net%"
    group by
        TableName() as Period,
        $host_id_to_host(
            $capture_host_id(
                Url::Decode(message)
            )._1
        ) as Host,
        cast($capture_user_id(message)._1 as Uint64) as UserId,
        $capture_path(
            Url::Decode(message)
        )._1 as Route
)
where Route is not null
    and (Host is not null or UserId is not null);

commit;

insert into $output
    with truncate
select 
    UserId, Period, Route, Host
from 
    concat($output, $output_tail)
group by
    UserId, Period, Route, Host
order by
    UserId, Period, Route, Host
