#include <util/generic/hash_set.h>
#include <util/generic/size_literals.h>

#include <mapreduce/yt/interface/protos/yamr.pb.h>
#include <robot/library/yt/static/command.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/protos/querygroups.pb.h>
#include <wmconsole/version3/wmcutil/hostid.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>
#include <wmconsole/version3/processors/tools/IKS/utils/canonizer.h>
#include <wmconsole/version3/processors/tools/host2vec/utils/utils.h>
#include <wmconsole/version3/processors/user_sessions/niche/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/niche/miner/tables.pb.h>
#include <wmconsole/version3/processors/user_sessions/niche2/preparation/tables.pb.h>

#include "task_source_favorites.h"

namespace NWebmaster {
namespace NNiche {

using namespace NJupiter;

static TString HostUrlToDomain(TString hostUrl) {
    return TString(CutWWWPrefix(CutMPrefix(CutSchemePrefix(hostUrl))));
}

struct TPrepareFavoriteQueriesSettingsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NProto::TGroupIdToQuery>> {
    static TString PruneText(const TString &text, size_t length = 1024) {
        const TUtf16String wideString = UTF8ToWide(text);
        if (wideString.size() < length) {
            return text;
        }
        return WideToUTF8(wideString.substr(0, length));
    }

    void Do(TReader *input, TWriter *output) override {
        NProto::TGroupIdToQuery dstMsg;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            proto::querygroups::HostGroupInfo groupInfo;
            Y_PROTOBUF_SUPPRESS_NODISCARD groupInfo.ParseFromString(row["value"].AsString());
            dstMsg.SetGroupId(row["subkey"].AsString());
            dstMsg.SetDomain(HostUrlToDomain(row["key"].AsString()));
            for (int i = 0; i < groupInfo.queries_size(); i++) {
                dstMsg.SetQuery(PruneText(groupInfo.queries(i).text()));
                output->AddRow(dstMsg);
            }
        }
    }
}; //TPrepareFavoriteQueriesSettingsMapper

REGISTER_MAPPER(TPrepareFavoriteQueriesSettingsMapper)

//ReduceBy Host, CorrectedQuery
struct TPrepareFavoriteQueriesSettingsReducer : public NYT::IReducer<NYT::TTableReader<NProto::TGroupIdToQuery>, NYT::TTableWriter<NProto::TGroupIdToQuery>> {
    void Do(TReader *input, TWriter *output) override {
        output->AddRow(input->GetRow());
    }
};

REGISTER_REDUCER(TPrepareFavoriteQueriesSettingsReducer)

int TaskSourceFavorites(int, const char **) {
    const auto &cfg = TConfig::CInstance();
    NYT::IClientBasePtr client = NYT::CreateClient(cfg.MR_SERVER_HOST);

    TYtModificationTimeTrigger trigger(cfg.TABLE_NICHE_SOURCE_FAVORITE_QUERIES);
    if (!trigger.NeedUpdate(client, TAttrName::SrcModificationTime, cfg.TABLE_SOURCE_GROUPS_SETTINGS)) {
        LOG_INFO("niche, host2vec source is already updated");
        return 0;
    }

    NYT::ITransactionPtr tx = client->StartTransaction();

    LOG_INFO("miner, source, favorites");

    TMapCombineReduceCmd<
        TPrepareFavoriteQueriesSettingsMapper,
        TPrepareFavoriteQueriesSettingsReducer,
        TPrepareFavoriteQueriesSettingsReducer>(tx)
        .Input(TTable<NYT::TNode>(tx, cfg.TABLE_SOURCE_GROUPS_SETTINGS))
        .Output(TTable<NProto::TGroupIdToQuery>(tx, cfg.TABLE_NICHE_SOURCE_FAVORITE_QUERIES))
        .ReduceBy({"Domain", "Query", "GroupId"})
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Do();

    TSortCmd<NProto::TGroupIdToQuery>(tx, TTable<NProto::TGroupIdToQuery>(tx, cfg.TABLE_NICHE_SOURCE_FAVORITE_QUERIES))
        .By({"Query", "Domain", "GroupId"})
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Do();

    trigger.Update(tx, TAttrName::SrcModificationTime, cfg.TABLE_SOURCE_GROUPS_SETTINGS);
    tx->Commit();
    LOG_INFO("miner, source, favorites - done");

    return 0;
}

} //namespace NNiche
} //namespace NWebmaster
