#include <util/generic/size_literals.h>
#include <util/string/split.h>

#include <kernel/urlnorm/normalize.h>
#include <library/cpp/string_utils/base64/base64.h>
#include <robot/library/yt/static/table.h>
#include <yweb/antispam/common/owner/owner.h>

#include <wmconsole/version3/protos/digest.pb.h>
#include <wmconsole/version3/wmcutil/config_base.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/transfer_manager.h>

#include "site_reviews.h"

namespace NWebmaster {

namespace {
    const char* F_HOST_SRC                      = "object_id";
    const char* F_HOST_DST                      = "Host";
    const char* F_MASCOT_OWNER                  = "MascotOwner";
    const char* F_COUNT                         = "Count";
}

struct TReviewsReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(WebmasterHostOwner)

    TReviewsReducer() = default;

    TReviewsReducer(const THashMap<TString, THashSet<TString>>& webmasterHostOwner)
            : WebmasterHostOwner(webmasterHostOwner)
    {
    }

//public:
    THashMap<TString, THashSet<TString>> WebmasterHostOwner;

    void Do(TReader* input, TWriter* output) override {
        int count = 0;
        TString owner, normalized;
        TVector<TString> vec;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode& row = input->GetRow();

            if (owner.empty()){
                TString object_id = row[F_HOST_SRC].AsString();

                if (object_id.find("/site/") == (size_t) -1) {
                    return;
                }

                try {
                    StringSplitter(object_id).Split('/').Collect(&vec);
                    TString url = Base64Decode(vec[2]);
                    if (NUrlNorm::NormalizeUrl(url, normalized)){
                        owner = url;
                    }
                } catch (yexception &) {
                }
            }

            count++;
        }

        if (THashSet<TString> * pHosts = WebmasterHostOwner.FindPtr(owner)) {
            for (auto it = pHosts->begin(); it != pHosts->end(); it++) {
                output->AddRow(NYT::TNode()
                        (F_HOST_DST, *it)
                        (F_MASCOT_OWNER, owner)
                        (F_COUNT, count)
                );
            }
        }
    }
};

REGISTER_REDUCER(TReviewsReducer)

void PrepareSiteReviewsSource(NYT::IClientBasePtr clientSearch, const THashSet<TString>& webmasterHosts) {
    const auto &config = TConfig::CInstance();
    TString reviewsTable = NYTUtils::JoinPath(config.TABLE_DIGEST_SOURCE_REVIEWS, NUtils::Date2StrTZ(Now().TimeT()));

    if (clientSearch->Exists(reviewsTable)) {
        LOG_INFO("Current date table already processed %s", reviewsTable.c_str());
        return;
    }

    TString copyTable = config.TABLE_DIGEST_SOURCE_REVIEWS + "-copy";

    TTransferManager tManager(TConfigBase::GetYTToken());
    tManager.PostTaskAndWait( config.MR_SERVER_HOST_REVIEWS, config.TABLE_SOURCE_REVIEWS, config.MR_SERVER_HOST_SEARCH, copyTable);

    TMascotOwnerCanonizer MascotOwnerCanonizer;
    MascotOwnerCanonizer.LoadTrueOwners();

    THashMap<TString, THashSet<TString>> webmasterHostOwner;
    for (auto it = webmasterHosts.begin(); it != webmasterHosts.end(); it++) {
        TString owner = MascotOwnerCanonizer.GetHostOwner(*it);
        if (auto ownerPtr = webmasterHostOwner.FindPtr(owner)) {
            ownerPtr->insert(*it);
        } else {
            webmasterHostOwner[owner] = THashSet<TString>({ *it });
        }
    }

    NYT::ITransactionPtr tx = clientSearch->StartTransaction();

    TOpRunner(tx)
        .InputNode(copyTable)
        .OutputNode(reviewsTable)
        .MemoryLimit(4_GBs)
        .ReduceBy(F_HOST_SRC)
        .Reduce(new TReviewsReducer(webmasterHostOwner))
        .SortBy(F_HOST_DST)
        .Sort(reviewsTable);

    tx->Commit();
}

} //namespace NWebmaster
