#include <util/string/join.h>

#include <library/cpp/getopt/modchooser.h>

#include <wmconsole/version3/wmcutil/args.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/periodic.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/thread.h>
#include <wmconsole/version3/wmcutil/yt/misc.h>
#include <wmconsole/version3/wmcutil/yt/transfer_manager.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>

#include "achievements.h"
#include "beautyurl.h"
#include "config.h"
#include "geo.h"
#include "iks.h"
#include "mirrors.h"
#include "monitor.h"
#include "recrawl.h"
#include "search.h"
#include "site_reviews.h"
#include "week_config.h"

#include "task_update.h"

namespace NWebmaster {

bool GetLatestDailyTableSourceLag(NYT::IClientBasePtr client, const TString &prefix, const TString &regexStr, double &sourceLagHours) {
    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, prefix, tables);

    TRegularExpression regex(regexStr);
    TSet<time_t> periods;
    for (const NYTUtils::TTableInfo &table : tables) {
        TVector<TString> period;
        if (regex.GetMatches(NYTUtils::GetTableName(table.Name), period) != 1) {
            continue;
        }

        time_t timestamp = str2date(period[0]);
        periods.insert(timestamp);
    }

    if (periods.empty()) {
        return false;
    }

    sourceLagHours = static_cast<double>((Now().Seconds() - *periods.rbegin())) / 3600.0;
    return true;
}

void MonitorSources(NYT::IClientBasePtr clientQueries, NYT::IClientBasePtr clientSearch) {
    const auto &config = TConfig::CInstance();

    Y_UNUSED(clientQueries);

    const char *REGEX_DATE_ONLY = "^(\\d+)$";
    const char *REGEX_VERIFIED = "^webmaster-verified-hosts.(\\d+)$";

    double sourceLagHours = 0;
    if (GetLatestDailyTableSourceLag(clientSearch, config.TABLE_DIGEST_SOURCE_BEAUTYURL, REGEX_DATE_ONLY, sourceLagHours)) {
        MonitorBeautyUrlsSourceLag(config.MONITOR_PERFORMANCE_SUFFIX, sourceLagHours);
        LOG_INFO("monitor beauty urls source lag hours = %.1f", sourceLagHours);
    }

    if (GetLatestDailyTableSourceLag(clientSearch, config.TABLE_DIGEST_SOURCE_GEO_REGIONS, REGEX_DATE_ONLY, sourceLagHours)) {
        MonitorGeoRegionsSourceLag(config.MONITOR_PERFORMANCE_SUFFIX, sourceLagHours);
        LOG_INFO("monitor geo regions source lag hours = %.1f", sourceLagHours);
    }

    if (GetLatestDailyTableSourceLag(clientSearch, config.TABLE_DIGEST_SOURCE_SEARCH_SITETREE, REGEX_DATE_ONLY, sourceLagHours)) {
        MonitorImportantUrlsSourceLag(config.MONITOR_PERFORMANCE_SUFFIX, sourceLagHours);
        LOG_INFO("monitor important urls source lag hours = %.1f", sourceLagHours);
    }

    if (GetLatestDailyTableSourceLag(clientSearch, config.TABLE_DIGEST_SOURCE_SEARCH_SITETREE, REGEX_DATE_ONLY, sourceLagHours)) {
        MonitorSitetreesSourceLag(config.MONITOR_PERFORMANCE_SUFFIX, sourceLagHours);
        LOG_INFO("monitor sitetree source lag hours = %.1f", sourceLagHours);
    }

    if (GetLatestDailyTableSourceLag(clientSearch, config.TABLE_SOURCE_USER_VERIFICATIONS_PREFIX, REGEX_VERIFIED, sourceLagHours)) {
        MonitorVerifiedUsersSourceLag(config.MONITOR_PERFORMANCE_SUFFIX, sourceLagHours);
        LOG_INFO("monitor verified users source lag hours = %.1f", sourceLagHours);
    }
}

void CleanupDaily(NYT::IClientBasePtr client, const TString &root) {
    const size_t STORE_DAYS = 365;
    const char *FORMAT = "%Y%m%d";
    const time_t now = Now().TimeT();
    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, root, tables, Max<int>());
    for (const auto &table : tables) {
        const size_t ageSeconds = now - TDate(NYTUtils::GetTableName(table.Name), FORMAT).GetStart();
        const size_t ageDays = ageSeconds / 86400;
        if (ageDays > STORE_DAYS) {
            client->Remove(table.Name);
            LOG_INFO("source, removed %s", table.Name.c_str());
        }
    }
}

int TaskDailyUpdate(int, const char **) {
    const auto &config = TConfig::CInstance();

    TDigestWeekConfig weekConfig(TWeekConfig(Now().TimeT()).WeekStart);
    LOG_INFO("processing weeks %s - %s", weekConfig.OldWeek.WeekName().data(), weekConfig.NewWeek.WeekName().data());

    NYT::IClientPtr clientQueries = NYT::CreateClient(config.MR_SERVER_HOST_QUERIES);
    NYT::IClientPtr clientSearch = NYT::CreateClient(config.MR_SERVER_HOST_SEARCH);
    CleanupDaily(clientSearch, config.TABLE_DIGEST_SOURCE_BEAUTYURL);
    CleanupDaily(clientSearch, config.TABLE_DIGEST_SOURCE_GEO_REGIONS);
    CleanupDaily(clientSearch, config.TABLE_DIGEST_SOURCE_SEARCH_IMPORTANT_URLS);
    CleanupDaily(clientSearch, config.TABLE_DIGEST_SOURCE_SEARCH_SITETREE);

    MonitorSources(clientQueries, clientSearch);

    LOG_INFO("loading webmaster hosts");
    THashSet<TString> webmasterHosts;
    if (!NYTUtils::LoadWebmastersHosts(clientQueries, config.TABLE_SOURCE_WEBMASTER_HOSTS, webmasterHosts)) {
        ythrow yexception() << "webmaster hosts table is empty";
    }

    LOG_INFO("loaded %lu webmaster hosts", webmasterHosts.size());

    UploadBeautyUrls(clientSearch, webmasterHosts);
    UploadGeoRegions(clientSearch, webmasterHosts);
    PrepareIKSSource(clientSearch, weekConfig.ThisWeek);
    PrepareImportantUrlsSource(clientSearch);
    PrepareSitetreeSource(clientSearch);
    PrepareAchievementsSource(clientSearch, webmasterHosts);
    PrepareSiteReviewsSource(clientSearch, webmasterHosts);
    PrepareMirrorsSource(clientSearch, webmasterHosts);

    return 0;
}

} //namespace NWebmaster
