#include <util/datetime/base.h>
#include <util/generic/set.h>
#include <util/generic/size_literals.h>

#include <robot/kwyt/protos/kwyt.pb.h>
#include <robot/library/yt/static/command.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/processors/indexing/hostinfo/conf/config.h>
#include <wmconsole/version3/protos/exported.pb.h>
#include <wmconsole/version3/wmcutil/args.h>
#include <wmconsole/version3/wmcutil/mr/stage_utils.h>
#include <wmconsole/version3/wmcutil/mr/mr_tasks.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/misc.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>

#include "task_host_status.h"

namespace NWebmaster {

using namespace NJupiter;

namespace {
const char *F_HOST = "Host";
}

struct TExternalsHostStatusReducer : public NYT::IReducer<NYT::TTableReader<NKwYT::THost>, NYT::TTableWriter<NKwYT::THost>> {
    Y_SAVELOAD_JOB(WebmasterHosts)

public:
    TExternalsHostStatusReducer() = default;
    TExternalsHostStatusReducer(const THashSet<TString> &webmasterHosts)
        : WebmasterHosts(webmasterHosts)
    {
    }

    void Do(TReader *input, TWriter *output) override {
        const TString host = input->GetRow().GetHost();
        if (!WebmasterHosts.contains(host)) {
            return;
        }

        TMap<size_t, NKwYT::THost> rows;
        for (; input->IsValid(); input->Next()) {
            const NKwYT::THost &row = input->GetRow();
            rows[row.GetLastAccess()] = row;
        }

        output->AddRow(rows.rbegin()->second);
    }

public:
    THashSet<TString> WebmasterHosts;
};

REGISTER_REDUCER(TExternalsHostStatusReducer)

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://lenta.ru"))));
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("www.alfeco.ru"))));
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("http://www.alfeco.ru"))));
    return path;
}

void UpdateHostStatus(const THashSet<TString> &webmasterHosts) {
    const auto &cfg = NHostInfo::TConfig::CInstance();
    LOG_INFO("hostinfo, HostStatus, input %s", cfg.TABLE_SOURCE_HOST_STATUS.data());
    LOG_INFO("hostinfo, HostStatus, output %s", cfg.TABLE_EXPORT_HOST_STATUS.data());
    NYT::IClientPtr clientKwyt = NYT::CreateClient(cfg.MR_SERVER_HOST_KWYT);
    NYT::ITransactionPtr tx = clientKwyt->StartTransaction();
    TReduceCmd<TExternalsHostStatusReducer>(tx, new TExternalsHostStatusReducer(webmasterHosts))
        .Input(TTable<NKwYT::THost>(tx, DebugPath(cfg.TABLE_SOURCE_HOST_STATUS)))
        .Output(TTable<NKwYT::THost>(tx, cfg.TABLE_EXPORT_HOST_STATUS).AsSortedOutput({F_HOST}))
        .MemoryLimit(3_GBs)
        .ReduceBy({F_HOST})
        .Do()
    ;

    TSortCmd<NKwYT::THost>(tx, TTable<NKwYT::THost>(tx, cfg.TABLE_EXPORT_HOST_STATUS))
        .By({F_HOST})
        .Do()
    ;
    tx->Commit();

    LOG_INFO("hostinfo, HostStatus, done");
}

int TaskUpdateHostStatus(int argc, const char **argv) {
    NYT::Initialize(argc, argv);
    NLastGetopt::TOpts opts;
    TString envRoot;

    opts.AddCharOption('L', "Log path").StoreResult(&TArgs::Instance().LogPath).DefaultValue("");

    opts
        .AddCharOption('E', "Environment root")
        .StoreResult(&envRoot)
        .DefaultValue("prod")
    ;

    NLastGetopt::TOptsParseResult res(&opts, argc, argv);
    TCustomYTEnvironment::Instance().Init(envRoot);
    const auto &cfg = NHostInfo::TConfig::CInstance();

    const int HOSTS_COUNT_THRESHOLD = 500000;
    NYT::IClientPtr clientJupiter = NYT::CreateClient(cfg.MR_SERVER_HOST_JUPITER);

    const TString jupiterTable = GetJupiterAcceptanceInProdTable(clientJupiter);

    TYtSourceTrigger hostinfoTableTrigger(clientJupiter, cfg.TABLE_EXPORT_HOST_STATUS);
    if (!hostinfoTableTrigger.NeedUpdate(jupiterTable)) {
        LOG_INFO("hostinfo, acceptance table %s is already processed", jupiterTable.data());
        return 0;
    }

    THashSet<TString> webmasterHosts;
    if (!NYTUtils::LoadWebmastersHosts(clientJupiter, cfg.TABLE_SOURCE_WEBMASTER_HOSTS, webmasterHosts, HOSTS_COUNT_THRESHOLD)) {
        ythrow yexception() << "could not load webmaster hosts table";
    }
    LOG_INFO("externals loaded %lu webmasters hosts", webmasterHosts.size());

    LOG_INFO("hostinfo, HostStatus, input %s", cfg.TABLE_SOURCE_HOST_STATUS.data());
    LOG_INFO("hostinfo, HostStatus, output %s", cfg.TABLE_EXPORT_HOST_STATUS.data());
    NYT::ITransactionPtr tx = clientJupiter->StartTransaction();
    TReduceCmd<TExternalsHostStatusReducer>(tx, new TExternalsHostStatusReducer(webmasterHosts))
        .Input(TTable<NKwYT::THost>(tx, DebugPath(cfg.TABLE_SOURCE_HOST_STATUS)))
        .Output(TTable<NKwYT::THost>(tx, cfg.TABLE_EXPORT_HOST_STATUS).AsSortedOutput({F_HOST}))
        .MemoryLimit(3_GBs)
        .ReduceBy({F_HOST})
        .Do()
        ;

    TSortCmd<NKwYT::THost>(tx, TTable<NKwYT::THost>(tx, cfg.TABLE_EXPORT_HOST_STATUS))
        .By({F_HOST})
        .Do()
        ;
    hostinfoTableTrigger.Update(tx, jupiterTable);
    tx->Commit();

    LOG_INFO("hostinfo, HostStatus, done");
    return 0;
}
} //namespace NWebmaster
