use arnold;
PRAGMA yt.InferSchema = '1';

$parse = ($x) -> { 
    return DateTime::Update(DateTime::Parse("%Y%m%d-%H%M%S")($x), "Europe/Moscow" as Timezone); 
};

$input_calisto_path = "home/callisto";
$input_mercury_path = $input_calisto_path || "/mercury";
$input_path_prefix = "home/callisto/mercury/export/";
$input_url_postfix = "/metrics/in_search";
$input_attr_postfix = "/webmaster/content_attrs";
$output_dir = "home/webmaster/prod/import/robot/callisto/mercury/fresh";
$output_increment_dir = "home/webmaster/prod/import/robot/callisto/mercury/fresh/increment";
$output_important_dir = "//home/webmaster/prod/important_urls/source_callisto";

$jupiter_cur_state = (
    SELECT
        Yson::ConvertToString(Attributes.jupiter_meta.production_current_state) AS State
    FROM FOLDER($input_calisto_path, "jupiter_meta")
    WHERE Path == $input_mercury_path
);

$output_table = $output_dir ||"/"|| $jupiter_cur_state;
$increment_table = $output_increment_dir || "/" || $jupiter_cur_state;
$important_urls = $output_important_dir || "/" || cast(DateTime::ToSeconds(DateTime::MakeTimestamp($parse($jupiter_cur_state)))/100 as String);

DEFINE ACTION $process_table($x) AS

$input_url_path = $input_path_prefix || $x || $input_url_postfix;
$input_attr_path = $input_path_prefix || $x || $input_attr_postfix;

$group_select = (
    select 
        search.Host as Host,
        search.Path as Path,
        max(search.SearchAddTime) as SearchAddTime,
        max(search.LastAccess) as LastAccess,
        max(ValidFromMetrikaLastAccess) as ValidFromMetrikaLastAccess,
        max(ValidFromIndexNowLastAccess) as ValidFromIndexNowLastAccess
    from $input_url_path as search
    group by Host,Path
);

insert into $output_table 
select Host,Path,SearchAddTime, Title, LastAccess, RelCanonicalTarget,HttpCode,Description, InsertTime, ValidFromMetrikaLastAccess, ValidFromIndexNowLastAccess
from (
    select 
        search.Host as Host,
        search.Path as Path,
        search.SearchAddTime as SearchAddTime,
        search.LastAccess as LastAccess,
        attr.TitleRawUTF8 as Title,
        attr.RelCanonicalTarget as RelCanonicalTarget,
        attr.HttpCode as HttpCode,
        attr.MetaDescription as Description,
        DateTime::ToSeconds(DateTime::MakeTimestamp($parse($x))) as InsertTime,
        search.ValidFromMetrikaLastAccess as ValidFromMetrikaLastAccess,
        search.ValidFromIndexNowLastAccess as ValidFromIndexNowLastAccess
    from $group_select as search
        inner join  `//home/webmaster/prod/export/archive/webmaster-verified-hosts-latest` as w on w.host_url = search.Host
        left join `//home/webmaster/prod/user/hosts.monsters` as hm on hm.key = search.Host
        inner join $input_attr_path as attr on attr.Host = search.Host and search.Path = attr.Path and attr.LastAccess = search.LastAccess
    where hm.key is null

)
GROUP BY Host,Path, SearchAddTime,Title,LastAccess,RelCanonicalTarget,HttpCode,Description,InsertTime,ValidFromMetrikaLastAccess,ValidFromIndexNowLastAccess
order by Host,Path;

COMMIT ;

insert into $increment_table
    select  
        ot.Host as Host,
        ot.Path as Path, 
        ot.SearchAddTime as SearchAddTime, 
        ot.Title as Title, 
        ot.LastAccess as LastAccess, 
        ot.RelCanonicalTarget as RelCanonicalTarget,
        ot.HttpCode as HttpCode,
        ot.Description as Description, 
        ot.InsertTime as InsertTime,
        ot.ValidFromMetrikaLastAccess as ValidFromMetrikaLastAccess,
        ot.ValidFromIndexNowLastAccess as ValidFromIndexNowLastAccess
    from $output_table as ot
        left join `home/webmaster/prod/import/robot/callisto/mercury/fresh/webmaster-fresh-latest` as latest on ot.Host = latest.Host and ot.Path = latest.Path
where latest.Host is null
order by Host,Path;

COMMIT ;
insert into `home/webmaster/prod/import/robot/callisto/mercury/fresh/webmaster-fresh-latest` WITH TRUNCATE 
select * from $output_table
order by Host,Path;
COMMIT;

insert into $important_urls
select Distinct
        t.Host as Host,
        t.`Path` as `Path`,
        t.SearchAddTime as AddTime,
        (t.Host || t.`Path`) as BeautyUrl,
        t.HttpCode as HttpCode, 
        false as IsFake, 
        true as IsIndexed,
        true as IsSearchable,
        t.LastAccess as LastAccess,
        t.Host as MainHost,
        t.Host as MainMirrorHost,
        t.`Path` as MainPath,
        2 as MimeType,
        null as RedirTarget,
        RelCanonicalTarget as RelCanonicalTarget,
        3 as UrlStatus,
        Title as TitleRawUTF8,
        `Description` as Description
    from $increment_table as t
    inner join  `home/webmaster/prod/important_urls/source_urls` as su on su.Host = t.Host and su.Path = t.Host;
COMMIT;

END DEFINE;

 
$output_path_is_exists = (
    SELECT TablePath()
        FROM FOLDER($output_dir)
    WHERE Path == $output_table
);

EVALUATE IF $output_path_is_exists is NULL
DO $process_table($jupiter_cur_state);

