use arnold;
PRAGMA yt.InferSchema = '1';

$input_mercury_path = "home/logfeller/logs/webmaster-prod-mercury-pushed-url-record/1h";
$output_mercury_prefix_path = "home/webmaster/test/important_urls";
$output_mercury_path = $output_mercury_prefix_path ||"/" ||"source_callisto";

$last_table_in_list = (
select TableName(Path) as tn
    FROM FOLDER($input_mercury_path, "schema;row_count")
    order by tn desc
    limit 1
);

$last_processed_table = (
    select value from (
        select value,`timestamp`
        FROM `//home/webmaster/prod/service/common_data_state`
        where type = 'IMPORTANT_URLS_FROM_MERCURY_FRESH_LOG'
        order by `timestamp` desc
        limit 1
    )
);

$output_mercury_table = $output_mercury_path || "/" ||
                    cast(DateTime::ToSeconds(DateTime::MakeTimestamp(DateTime::ParseIso8601($last_table_in_list)))/100 as String);

$start_processed_date = cast(DateTime::FromMilliseconds(DateTime::ToMilliseconds(DateTime::MakeTimestamp(DateTime::ParseIso8601($last_processed_table))) + 1000) as String);


DEFINE ACTION $process_table() AS

    insert into $output_mercury_table
    select  Distinct
            Url::GetHost(t.Url) as Host,
            Url::GetPath(t.Url) as `Path`,
            t.SearchAddTime as AddTime,
            (t.Url) as BeautyUrl,
            t.HttpCode as HttpCode, 
            false as IsFake, 
            true as IsIndexed,
            true as IsSearchable,
            t.LastAccess as LastAccess,
            Url::GetHost(t.Url) as MainHost,
            Url::GetHost(t.Url) as MainMirrorHost,
            su.`Path` as MainPath,
            2 as MimeType,
            null as RedirTarget,
            RelCanonicalTarget as RelCanonicalTarget,
            3 as UrlStatus,
            t.TitleRawUTF8 as TitleRawUTF8,
            `MetaDescription` as Description from Range($input_mercury_path,$start_processed_date,$last_table_in_list) as t
        inner join `home/webmaster/prod/important_urls/source_urls` as su on su.Host = Url::GetHost(t.Url) and su.Path = Url::GetPath(t.Url);
    commit;

    insert into `//home/webmaster/prod/service/common_data_state`
    select "IMPORTANT_URLS_FROM_MERCURY_FRESH_LOG" as type, CurrentUtcDateTime() as `timestamp`, coalesce(cast($last_table_in_list as String),"") as value;

    commit;
END DEFINE;


EVALUATE IF $start_processed_date < $last_table_in_list
DO $process_table();