USE arnold; 
PRAGMA yt.InferSchema;
PRAGMA yt.Pool = 'robot-webmaster';

$input_base_folder = "home/jupiter/acceptance";
$output_base_folder = "home/webmaster/prod/checklist/domains_on_search";
$diff_output_base_folder = "home/webmaster/prod/checklist/new_domains_notification";


$basename = ($path) -> {
    $last_slash_pos = CAST(String::ReverseFind($path, "/") AS Uint64);
    RETURN String::Substring($path, $last_slash_pos + 1);
};

$dirname = ($path) -> {
    $last_slash_pos = CAST(String::ReverseFind($path, "/") AS Uint64);
    RETURN String::Substring($path, 0, $last_slash_pos);
};


$searchbase_date = (
    SELECT Yson::ConvertToString(Attributes.searchbase_prod) AS State
    FROM FOLDER($dirname($output_base_folder), "searchbase_prod")
    WHERE Path == $output_base_folder
);


$input_folder = (
    -- Finding latest unprocessed table
    SELECT $basename(I.Path) AS Name
    FROM FOLDER($input_base_folder) AS I
    LEFT ONLY JOIN FOLDER($output_base_folder) AS O
    ON $basename(I.Path) = $basename(O.Path)
    WHERE $basename(I.Path) = $searchbase_date
);


$prev_folder = (
    SELECT $input_base_folder || "/" || $basename(Path) AS Path
    FROM FOLDER($output_base_folder)
    WHERE $basename(Path) < $input_folder
    ORDER BY Path DESC LIMIT 1
);


$input = $input_base_folder || "/" || $input_folder || "/urls_for_webmaster_simple";
$output = $output_base_folder || "/" || $input_folder;
$diff_output = $diff_output_base_folder || "/" || $input_folder; 
$prev_input = $prev_folder || "/urls_for_webmaster_simple";


$hosts_in_webmaster = (
    SELECT Host
    FROM `//home/webmaster/prod/export/webmaster-hosts`
    GROUP BY Url::GetSchemeHost(key) AS Host
);


DEFINE SUBQUERY $eval_domains_on_search($table) AS
    $domains_on_search = (
        SELECT Domain, Url::GetOwner(Domain) AS Owner
        FROM $table
        WHERE IsFake = False
        GROUP BY Url::GetSchemeHost(Host) AS Domain
        HAVING COUNT_IF(IsSearchable) > 0 AND Url::CutWWW2(Url::CutScheme(Domain)) != Url::GetOwner(Domain)
    );
    
    $domains_on_search_with_owner_in_webmaster = (
        SELECT S.Owner AS Owner, S.Domain AS Domain FROM
        $domains_on_search AS S
        LEFT SEMI JOIN
        $hosts_in_webmaster AS W
        ON S.Owner = Url::CutWWW2(Url::CutScheme(W.Host))
    );
    
    SELECT S.Owner AS Owner, S.Domain AS Domain 
    FROM $domains_on_search_with_owner_in_webmaster AS S
    LEFT ONLY JOIN
    $hosts_in_webmaster AS W
    ON Url::CutWWW2(Url::CutScheme(S.Domain)) = Url::CutWWW2(Url::CutScheme(W.Host));  -- 1234 == 1234

END DEFINE;


$current_domains = (SELECT * FROM $eval_domains_on_search($input));

$prev_domains = (SELECT * FROM $eval_domains_on_search($prev_input));

$new_domains = (
    SELECT C.Domain AS Domain, C.Owner AS Owner
    FROM $current_domains AS C
    LEFT ONLY JOIN $prev_domains AS P
    USING (Domain)
);

$notifications_settings = (
    SELECT * 
    FROM `//home/webmaster/prod/user/user-notification-settings/user-notification-channel-settings`
    WHERE notification_type = "NEW_DOMAINS_NOTIFICATION"
);

$new_domains_samples = (
    SELECT Owner, AGGREGATE_LIST(Domain, 10) as Domains, COUNT(Domain) as DomainsCount 
    FROM $new_domains
    GROUP BY Owner
);

$new_domains_notification_data = (
    SELECT N.user_id AS user_id, N.host_id AS host_id, D.Domains AS domains, D.DomainsCount as domains_count,
          N.login AS login, N.email AS email, N.language AS language, N.fio AS fio,
          N.notification_type AS notification_type,
          N.channel_service AS channel_service, N.channel_email AS channel_email
    FROM $new_domains_samples AS D
    INNER JOIN $notifications_settings AS N
    ON D.Owner = String::SplitToList(N.host_id, ":")[1]  -- D.Owner: "ya.ru"; N.host_id: "https:ya.ru:443"
);


DEFINE ACTION $process_table() AS
    -- Precondition
    DISCARD SELECT ENSURE(True, COUNT(*) > 0, "Expected to find a non-empty table")
    FROM $input;
    
    INSERT INTO $output WITH TRUNCATE
    SELECT *
    FROM $current_domains
    ORDER BY Owner, Domain;
    
    INSERT INTO $diff_output WITH TRUNCATE
    SELECT *
    FROM $new_domains_notification_data
    ORDER BY host_id, user_id;
END DEFINE;

EVALUATE IF $input_folder IS NOT NULL AND $prev_folder IS NOT NULL
DO $process_table();
