#include <util/generic/hash_set.h>
#include <util/generic/size_literals.h>

#include <robot/library/yt/static/command.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>
#include <wmconsole/version3/processors/tools/host2vec/applier/protos/tables.pb.h>
#include <wmconsole/version3/processors/tools/host2vec/utils/utils.h>
#include <wmconsole/version3/processors/user_sessions/niche/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/niche/miner/tables.pb.h>

#include "task_source_host2vec.h"

namespace NWebmaster {
namespace NNiche {

using namespace NJupiter;

struct THost2vecHostExtractMapper : public NYT::IMapper<NYT::TTableReader<NProto::THost2Vec>, NYT::TTableWriter<NProto::THost2Vec>> {
    void Do(TReader *input, TWriter *output) override {
        THashSet<TString> domains;
        for (; input->IsValid(); input->Next()) {
            domains.insert(input->GetRow().GetHost());
            domains.insert(input->GetRow().GetAnalogy());
        }

        NProto::THost2Vec dstMsg;
        for (const TString &domain : domains) {
            dstMsg.SetHost(domain);
            output->AddRow(dstMsg);
        }
    }

public:
};

REGISTER_MAPPER(THost2vecHostExtractMapper)

//ReduceBy Host
struct THost2vecHostExtractReducer : public NYT::IReducer<NYT::TTableReader<NProto::THost2Vec>, NYT::TTableWriter<NProto::THost2Vec>> {
    void Do(TReader *input, TWriter *output) override {
        output->AddRow(input->GetRow());
    }

public:
};

REGISTER_REDUCER(THost2vecHostExtractReducer)

//ReduceBy MainHost
struct THost2vecMirrorsExtractReducer : public NYT::IReducer<NYT::TTableReader<NProto::TMirror>, NYT::TTableWriter<NProto::TMirror>> {
    Y_SAVELOAD_JOB(Domains)

    THost2vecMirrorsExtractReducer() = default;
    THost2vecMirrorsExtractReducer(const THashSet<TString> &domains)
        : Domains(domains)
    {
    }

    void Do(TReader *input, TWriter *output) override {
        NProto::TMirror dstMsg;
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            TString domain;
            if (!NHost2Vec::FixHost(row.GetHost(), domain)) {
                Cerr << "unable to parse host: " << row.GetHost() << Endl;
                continue;
            }

            if (Domains.contains(domain)) {
                dstMsg.SetHost(domain);
                dstMsg.SetMainHost(row.GetMainHost());
                dstMsg.SetRank(row.GetRank());
                output->AddRow(dstMsg);
                break;
            }
        }
    }

public:
    THashSet<TString> Domains;
};

REGISTER_REDUCER(THost2vecMirrorsExtractReducer)

struct THost2vecMirrorsEnrichMapper : public NYT::IMapper<NYT::TTableReader<NProto::THost2Vec>, NYT::TTableWriter<NProto::THost2Vec>> {
    Y_SAVELOAD_JOB(Mirrors)

    THost2vecMirrorsEnrichMapper() = default;
    THost2vecMirrorsEnrichMapper(const THashMap<TString, TString> &mirrors)
        : Mirrors(mirrors)
    {
    }

    inline TString GetMainHost(const TString &domain) {
        if (Mirrors.contains(domain)) {
            return Mirrors.at(domain);
        }
        return "http://" + domain;
    }

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            auto row = input->GetRow();
            row.SetHost(GetMainHost(row.GetHost()));
            row.SetAnalogy(GetMainHost(row.GetAnalogy()));
            output->AddRow(row);
        }
    }

public:
    THashMap<TString, TString> Mirrors;
};

REGISTER_MAPPER(THost2vecMirrorsEnrichMapper)

inline TString GetName(const TString &path) {
    return TString{TStringBuf(path).RAfter('/')};
}

void UpdateHost2vecSource(NYT::IClientBasePtr client, const TString &source, const TString &target) {
    const auto &cfg = TConfig::CInstance();

    TYtModificationTimeTrigger trigger(target);
    if (!trigger.NeedUpdate(client, TAttrName::SrcModificationTime, source)) {
        LOG_INFO("miner, source, host2vec is already updated: %s", source.c_str());
        return;
    }

    NYT::ITransactionPtr tx = client->StartTransaction();

    TTable<NProto::THost2Vec> intmHostsTable(tx, NYTUtils::JoinPath(cfg.TABLE_NICHE_TMP_ROOT, "host2vec", GetName(source) + "-intm-hosts"));
    TTable<NProto::TMirror> intmMirrorsTable(tx, NYTUtils::JoinPath(cfg.TABLE_NICHE_TMP_ROOT, "host2vec", GetName(source) + "-intm-mirrors"));

    TMapReduceCmd<THost2vecHostExtractMapper, THost2vecHostExtractReducer>(tx)
        .Input(TTable<NProto::THost2Vec>(tx, source))
        .Output(intmHostsTable)
        .ReduceBy({"Host"})
        .MapperMemoryLimit(2_GBs)
        .Do()
    ;

    THashSet<TString> domains;
    for (auto reader = intmHostsTable.SelectFields({"Host"}).GetReader(); reader->IsValid(); reader->Next()) {
        domains.insert(reader->GetRow().GetHost());
    }

    TReduceCmd<THost2vecMirrorsExtractReducer>(tx, new THost2vecMirrorsExtractReducer(domains))
        .Input(TTable<NProto::TMirror>(tx, GetJupiterMirrorsInProdTable(tx)))
        .Output(intmMirrorsTable)
        .ReduceBy({"MainHost"})
        .SortBy({"MainHost", "Rank"})
        .MemoryLimit(2_GBs)
        .Do()
    ;

    THashMap<TString, TString> mirrors;
    for (auto reader = intmMirrorsTable.GetReader(); reader->IsValid(); reader->Next()) {
        mirrors[reader->GetRow().GetHost()] = reader->GetRow().GetMainHost();
    }

    TMapCmd<THost2vecMirrorsEnrichMapper>(tx, new THost2vecMirrorsEnrichMapper(mirrors))
        .Input(TTable<NProto::THost2Vec>(tx, source))
        .Output(TTable<NProto::THost2Vec>(tx, target))
        .MemoryLimit(4_GBs)
        .Do()
    ;

    TSortCmd<NProto::THost2Vec>(tx, TTable<NProto::THost2Vec>(tx, target))
        .By({"Host", "Analogy"})
        .Do()
    ;

    trigger.Update(tx, TAttrName::SrcModificationTime, source);
    intmHostsTable.Drop();
    intmMirrorsTable.Drop();

    tx->Commit();
}

int TaskSourceHost2vec(int, const char **) {
    const auto &cfg = TConfig::CInstance();
    NYT::IClientBasePtr client = NYT::CreateClient(cfg.MR_SERVER_HOST);

    LOG_INFO("miner, source, host2vec");
    UpdateHost2vecSource(client, cfg.TABLE_SOURCE_H2V_SPYLOG, cfg.TABLE_NICHE_SOURCE_H2V_SPYLOG);
    UpdateHost2vecSource(client, cfg.TABLE_SOURCE_H2V_SIMILARGROUP, cfg.TABLE_NICHE_SOURCE_H2V_SIMILARGROUP);
    LOG_INFO("miner, source, host2vec - done");
    return 0;
}

} //namespace NNiche
} //namespace NWebmaster
