use hahn;
--pragma yt.QueryCacheMode="disable";
pragma yt.Pool="robot-webmaster";

$src_root           = "//logs/xml-reqans-log/1d";
$dst_raw_root       = "//home/webmaster/prod/export/catalogia/source/xmlsearch/raw";
$dst_parsed_root    = "//home/webmaster/prod/export/catalogia/source/xmlsearch/parsed";
$PERIOD = 15;

$src_days = (
    select aggregate_list(Period)
    from (
        select TableName(Path, "yt") as Period
        from folder($src_root)
        where Type == "table"
        order by Period desc
        limit $PERIOD
    )
);

$dst_parsed_days = (
    select aggregate_list(Period)
    from (
        select TableName(Path, "yt") as Period
        from folder($dst_parsed_root)
        where Type == "table"
        order by Period desc
        limit $PERIOD
    )
);

define action $parse_logs($day) as 
    $src_path = $src_root || "/" || $day;
    $dst_path = $dst_raw_root || "/" || $day;

    insert into $dst_path
        with truncate
    select 
        `dict` as ParsedRaw
    from $src_path;

    commit;

    insert into $dst_path
        with truncate
    select 
        Yson::From(ParsedRaw) as Parsed
    from $dst_path;
    
end define;

evaluate for $day in $src_days do
    begin
        evaluate if not ListHas($dst_parsed_days, $day) do
            $parse_logs($day)
    end do
else 
    do empty_action();
