#include <util/draft/date.h>
#include <util/generic/queue.h>

#include <dict/word2vec/model/model.h>
#include <dict/word2vec/util/analogy/bruteforce/searcher.h>
#include <kernel/yt/attrs/attrs.h>
#include <library/cpp/getopt/last_getopt.h>
#include <mapreduce/yt/interface/constants.h>
#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/table.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/wmcutil/log.h>

#include <wmconsole/version3/library/conf/yt.h>
#include <wmconsole/version3/processors/acceptance/conf/config.h>
#include <wmconsole/version3/processors/acceptance/protos/acceptance.pb.h>
#include <wmconsole/version3/processors/achievements/conf/config.h>
#include <wmconsole/version3/processors/achievements/protos/achievements.pb.h>
#include <wmconsole/version3/processors/achievements/protos/quality.pb.h>
#include <wmconsole/version3/processors/achievements/protos/sources.pb.h>
#include <wmconsole/version3/searchqueries-mr/conf/yt.h>
#include <wmconsole/version3/searchqueries-mr/protos/user_sessions.pb.h>
#include <wmconsole/version3/searchqueries-mr/tools/host2vec-model/protos/embedding.pb.h>
#include <wmconsole/version3/searchqueries-mr/tools/host2vec-model/utils/utils.h>
#include <wmconsole/version3/processors/tools/IKS/conf/config.h>
#include <wmconsole/version3/processors/tools/IKS/protos/iks.pb.h>

#include "import_util.h"
#include "task_merge.h"

namespace NWebmaster {

using namespace NJupiter;

struct THost2VecMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NProto::TQualitySimilarHost>> {
    Y_SAVELOAD_JOB(OwnersTraffic, MaxAnalogies)

public:
    THost2VecMapper() = default;
    THost2VecMapper(const THashMap<TString, size_t> &ownersTraffic, size_t maxAnalogies)
        : OwnersTraffic(ownersTraffic)
        , MaxAnalogies(maxAnalogies)
    {
    }

    void Start(TWriter *) override {
        VectorsBlob = TBlob::FromFileContent("vectors");
        TFileInput wordsStream("words");
        Model.Reset(new NWord2Vec::TModel());
        Model->LoadFromYandex(&wordsStream, VectorsBlob);
        Searcher.Reset(new TBruteforceSearcher("1"));
        Searcher->SetModels(Model.Get(), Model.Get(), false /*normalized*/);
    }

    void Do(TReader *input, TWriter *output) override {
        NProto::TQualitySimilarHost dstRow;
        for (; input->IsValid(); input->Next()) {
            const TString host = input->GetRow()["host"].AsString();
            if (host != GetOwner(host)) {
                continue;
            }

            if (!OwnersTraffic.contains(host)) {
                continue;
            }

            const TUtf16String wHost = TUtf16String::FromAscii(host);
            if (!Model->Has(wHost)) {
                continue;
            }

            TVector<TUtf16String> words = { wHost };
            TVector<TWordQuality> results = Searcher->FindBestMatches(words, MaxAnalogies, false/* debug*/, 1);
            Sort(results.begin(), results.end());

            for (size_t i = 0; i < results.size(); ++i) {
                const TString analogy = WideToUTF8(results[i].Word);

                if (analogy != GetOwner(analogy)) {
                    continue;
                }

                if (!OwnersTraffic.contains(analogy)) {
                    continue;
                }

                dstRow.SetSourceOwner(host);
                dstRow.SetSourceClicks(OwnersTraffic.at(host));
                dstRow.SetAnalogyOwner(analogy);
                dstRow.SetAnalogyClicks(OwnersTraffic.at(analogy));
                dstRow.SetCosine(results[i].Quality);
                output->AddRow(dstRow);
            }
        }
    }

public:
    THashMap<TString, size_t> OwnersTraffic;
    size_t MaxAnalogies = 100;

    TSimpleSharedPtr<NWord2Vec::TModel> Model;
    THolder<TSearcher> Searcher;
    TBlob VectorsBlob;
};

REGISTER_MAPPER(THost2VecMapper)

void LoadOwnersTraffic(THashMap<TString, size_t> &ownersTraffic) {
    const size_t MAX_28D_TRAFFIC = 10000000;
    NYT::IClientPtr clientUserSessions = NYT::CreateClient(TCommonYTConfigSQ::CInstance().MR_SERVER_HOST_USER_SESSIONS);
    auto reader = TTable<NAcceptance::NProto::TTopHost>(clientUserSessions, NAcceptance::TConfig::CInstance().TABLE_ACCEPTANCE_TOP_HOSTS).GetReader();

    THashSet<TString> blacklist;
    for (; reader->IsValid(); reader->Next()) {
        const TString owner = GetOwner(reader->GetRow().GetHost());
        if (blacklist.contains(owner)) {
            continue;
        }
        auto &traffic = ownersTraffic[owner];
        traffic += reader->GetRow().GetClicks();
        if (traffic > MAX_28D_TRAFFIC) {
            blacklist.insert(owner);
            ownersTraffic.erase(owner);
        }
    }
}

void UpdateRivals(NYT::IClientBasePtr tx, const THashMap<TString, size_t> &ownersTraffic,
    const TString &modelRoot, const TString &outputTable
) {
    const auto &cfg = NAchievements::TConfig::CInstance();
    const TString modelVectors  = NYTUtils::JoinPath(modelRoot, "vectors");
    const TString modelWords    = NYTUtils::JoinPath(modelRoot, "words");
    const TString modelTable    = NYTUtils::JoinPath(modelRoot, "table");

    TMapCmd<THost2VecMapper>(tx, new THost2VecMapper(ownersTraffic, 100))
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Input<NYT::TNode>(modelTable)
        .Output(TTable<NProto::TQualitySimilarHost>(tx, outputTable))
        .JobCount(100000)
        .AddYtFile(modelWords)
        .AddYtFile(modelVectors)
        .MemoryLimit(4_GBs)
        .Do()
    ;
}

void UpdateSGFilteredModel(NYT::IClientBasePtr tx) {
    const auto &cfg = NAchievements::TConfig::CInstance();

    const TString srcFilterTable   = NYTUtils::JoinPath(TCommonYTConfig::CInstance().FILE_MODEL_HOST2VEC_SPYLOG, "table");
    const TString srcModelTable    = NYTUtils::JoinPath(TCommonYTConfig::CInstance().FILE_MODEL_HOST2VEC_SG, "table");
    const TString dstModelTable    = NYTUtils::JoinPath(cfg.TABLE_QUALITY_MODELS_SG_FILTERED, "table");
    const TString dstModelVectors  = NYTUtils::JoinPath(cfg.TABLE_QUALITY_MODELS_SG_FILTERED, "vectors");
    const TString dstModelWords    = NYTUtils::JoinPath(cfg.TABLE_QUALITY_MODELS_SG_FILTERED, "words");

    THashSet<TString> filter;
    auto readerFlt = TTable<NHost2Vec::NProto::TEmbedding>(tx, srcFilterTable).SelectFields({"host"}).GetReader();
    for (; readerFlt->IsValid(); readerFlt->Next()) {
        filter.insert(readerFlt->GetRow().Gethost());
    }

    auto reader = TTable<NHost2Vec::NProto::TEmbedding>(tx, srcModelTable).GetReader();
    auto writer = TTable<NHost2Vec::NProto::TEmbedding>(tx, dstModelTable).GetWriter();
    for (; reader->IsValid(); reader->Next()) {
        if (filter.contains(reader->GetRow().Gethost())) {
            writer->AddRow(reader->GetRow());
        }
    }
    writer->Finish();

    NHost2Vec::UpdateFilteredModel(tx, dstModelTable, dstModelWords, dstModelVectors);
}

void LoadIKS(THashMap<TString, size_t> &IKS) {
    NYT::IClientBasePtr client = NYT::CreateClient(NIks::TConfig::CInstance().MR_SERVER_HOST);
    auto reader = TTable<NIks::NProto::TIKS>(client, NIks::TConfig::CInstance().TABLE_IKS_DATA_ACCEPTED).GetReader();
    for (; reader->IsValid(); reader->Next()) {
        auto &row = reader->GetRow();
        IKS[row.GetHost()] = row.GetIKS();
    }
}

void GenerateRivals(NYT::IClientBasePtr tx) {
    const auto &cfg = NAchievements::TConfig::CInstance();

    struct TRival {
        bool SimilarGroup   = false;
        bool SpyLog         = false;
        bool Serp           = false;
        float Cosine        = 0.0f;
    };

    using TKey = std::pair<TString, TString>;
    //using TValue = std::pair<bool, bool>;

    THashMap<TString, size_t> traffic;
    THashMap<TKey, TRival> rivals;
    THashMap<TString, size_t> IKS;
    LoadIKS(IKS);

    auto readerSG = TTable<NProto::TQualitySimilarHost>(tx, cfg.TABLE_QUALITY_RIVALS_SG).GetReader();
    for (; readerSG->IsValid(); readerSG->Next()) {
        const auto &row = readerSG->GetRow();
        auto &rival = rivals[TKey(row.GetSourceOwner(), row.GetAnalogyOwner())];
        rival.SimilarGroup = true;
        traffic[row.GetSourceOwner()] = row.GetSourceClicks();
        traffic[row.GetAnalogyOwner()] = row.GetAnalogyClicks();
    }

    auto readerSL = TTable<NProto::TQualitySimilarHost>(tx, cfg.TABLE_QUALITY_RIVALS_SPYLOG).GetReader();
    for (; readerSL->IsValid(); readerSL->Next()) {
        const auto &row = readerSL->GetRow();
        auto &rival = rivals[TKey(row.GetSourceOwner(), row.GetAnalogyOwner())];
        rival.SpyLog = true;
        rival.Cosine = Max<float>(rival.Cosine, row.GetCosine());
        traffic[row.GetSourceOwner()] = row.GetSourceClicks();
        traffic[row.GetAnalogyOwner()] = row.GetAnalogyClicks();
    }

    auto readerSR = TTable<NProto::TQualitySimilarHost>(tx, cfg.TABLE_QUALITY_RIVALS_SERP).GetReader();
    for (; readerSR->IsValid(); readerSR->Next()) {
        const auto &row = readerSR->GetRow();
        auto &rival = rivals[TKey(row.GetSourceOwner(), row.GetAnalogyOwner())];
        rival.Serp = true;
        rival.Cosine = Max<float>(rival.Cosine, row.GetCosine());
        traffic[row.GetSourceOwner()] = row.GetSourceClicks();
        traffic[row.GetAnalogyOwner()] = row.GetAnalogyClicks();
    }

    NProto::TQualityGeneratedRival dstRow;
    auto writer = TTable<NProto::TQualityGeneratedRival>(tx, cfg.TABLE_QUALITY_RIVALS_GENERATED).GetWriter();
    for (const auto &obj : rivals) {
        const TString &source = obj.first.first;
        const TString &analogy = obj.first.second;
        if (!IKS.contains(source) || !IKS.contains(analogy)) {
            continue;
        }
        //if (!obj.second.SpyLog || !obj.second.SimilarGroup || !obj.second.Serp) {
        //    continue;
        //}
        if (source == analogy) {
            continue;
        }
        dstRow.SetSourceOwner(source);
        dstRow.SetSourceClicks(traffic.at(source));
        dstRow.SetAnalogyOwner(analogy);
        dstRow.SetAnalogyClicks(traffic.at(analogy));
        dstRow.SetSimilarGroup(obj.second.SimilarGroup);
        dstRow.SetSpyLog(obj.second.SpyLog);
        dstRow.SetSerp(obj.second.Serp);
        dstRow.SetSourceIKS(IKS.at(source));
        dstRow.SetAnalogyIKS(IKS.at(analogy));
        dstRow.SetCosine(obj.second.Cosine);
        writer->AddRow(dstRow);
    }

    writer->Finish();
}

int TaskGenerateRivals(int, const char **) {
    const auto &cfg = NAchievements::TConfig::CInstance();

    NYT::IClientPtr clientModels = NYT::CreateClient(TCommonYTConfig::CInstance().MR_SERVER_HOST_MODELS_HOST2VEC);
    NYTUtils::CreatePath(clientModels, cfg.TABLE_QUALITY_RIVALS_ROOT);
    NYTUtils::CreatePath(clientModels, cfg.TABLE_QUALITY_MODELS_SG_FILTERED);

    NYT::ITransactionPtr tx = clientModels->StartTransaction();

    THashMap<TString, size_t> ownersTraffic;
    LoadOwnersTraffic(ownersTraffic);

    UpdateRivals(tx,
        ownersTraffic,
        TCommonYTConfig::CInstance().FILE_MODEL_HOST2VEC_SPYLOG,
        cfg.TABLE_QUALITY_RIVALS_SPYLOG
    );

    UpdateRivals(tx,
        ownersTraffic,
        cfg.TABLE_QUALITY_MODELS_SG_FILTERED,
        cfg.TABLE_QUALITY_RIVALS_SG
    );

    UpdateRivals(tx,
        ownersTraffic,
        TCommonYTConfig::CInstance().FILE_MODEL_HOST2VEC_SERP,
        cfg.TABLE_QUALITY_RIVALS_SERP
    );

    GenerateRivals(tx);

    tx->Commit();

    return 0;
}

} //namespace NWebmaster
