#include <util/charset/wide.h>
#include <util/digest/fnv.h>
#include <util/draft/datetime.h>
#include <util/generic/hash_set.h>
#include <util/generic/map.h>
#include <util/generic/set.h>
#include <util/generic/size_literals.h>

#include <kernel/hosts/owner/owner.h>
#include <kernel/url_text_analyzer/url_analyzer.h>

#include <library/cpp/dot_product/dot_product.h>
#include <library/cpp/l2_distance/l2_distance.h>
#include <library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h>
#include <mapreduce/yt/interface/protos/yamr.pb.h>
#include <robot/jupiter/protos/export.pb.h>
#include <robot/library/dssm/utils/title_normalization.h>
#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>
#include <ysite/yandex/reqanalysis/fast_normalize_request.h>

#include <wmconsole/version3/library/conf/yt.h>
#include <wmconsole/version3/library/dssm/dssm_utils.h>
#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/searchqueries-mr/conf/yt.h>
#include <wmconsole/version3/searchqueries-mr/protos/catalogia2.pb.h>
#include <wmconsole/version3/wmcutil/math.h>
#include <wmconsole/version3/wmcutil/owners.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/serialize.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>

#include "config.h"
#include "source_tables.h"
#include "task_catalogia2.h"

namespace NWebmaster {
namespace NCatalogia2 {

const time_t WEEK_SECONDS       = 86400 * 7;
const char *ATTR_UPLOAD_TIME    = "upload_time";

using namespace NJupiter;

using THostId   = ui32;
using TOwnerId  = ui32;
using TQueryId  = ui64;
using TUrlId    = ui64;
using TRegionId = ui32;

struct TOwnersUrlsPerQuery {
    void Add(const TOwnerId &ownerId, const TUrlId &urlId) {
        Links[ownerId].insert(urlId);
    }

    void Add(const TOwnersUrlsPerQuery &rhs) {
        Links.insert(rhs.Links.begin(), rhs.Links.end());
    }

    void From(const TString &src) {
        NUtils::Deserialize(src, Links);
    }

    void To(TString &dst) const {
        NUtils::Serialize(Links, dst);
    }

public:
    THashMap<TOwnerId, THashSet<TUrlId>> Links;
};

TInputTag<NCatalogia2::NProto::TQuery> ExtractedQueriesInputTag                  (0);
TInputTag<NCatalogia2::NProto::TOwnerFilter> OwnerFilterInputTag                 (1);
TInputTag<NCatalogia2::NProto::THits> HitsInputTag                               (2);
TInputTag<NJupiter::TContentAttrsForWebmaster> ContentAttrsInputTag (3);
TInputTag<NCatalogia2::NProto::TQueryInfo> QueryInfoInputTag                     (4);
TInputTag<NCatalogia2::NProto::TQueryRegionUrlInfo> QueryRegionUrlInfoInputTag   (5);
TInputTag<NCatalogia2::NProto::TUrlInfo> UrlInfoInputTag                         (6);
TInputTag<NCatalogia2::NProto::TForeignQuery> ForeignQueryInputTag               (7);
TInputTag<NCatalogia2::NProto::TForeignUrl> ForeignUrlInputTag                   (8);
TInputTag<NCatalogia2::NProto::TGeneratedQuery> GeneratedQueryInputTag           (9);
TInputTag<NCatalogia2::NProto::TForeignQueryCounter> ForeignQueryCounterInputTag (10);

TOutputTag<NCatalogia2::NProto::TQuery> ExtractedQueriesOutputTag                    (0);
TOutputTag<NCatalogia2::NProto::TQueryRegionUrlInfo> QueryRegionUrlInfoOutputTag     (1);
TOutputTag<NCatalogia2::NProto::TForeignQuery> ForeignQueryOutputTag                 (2);
TOutputTag<NCatalogia2::NProto::TForeignQueryCounter> ForeignQueryCounterOutputTag   (3);
TOutputTag<NCatalogia2::NProto::TForeignUrl> ForeignUrlOutputTag                     (4);
TOutputTag<NCatalogia2::NProto::TGeneratedQuery> GeneratedQueryOutputTag             (5);
TOutputTag<NCatalogia2::NProto::TGeneratedUrl> GeneratedUrlOutputTag                 (6);

TUrlId GetUrlId(const TStringBuf &host, const TStringBuf &path) {
    const ui64 hostId = FnvHash<ui32>(host.data(), host.size());
    const ui64 pathId = FnvHash<ui32>(path.data(), path.size());
    return (hostId << 32ul) | pathId;
}

TQueryId GetQueryId(const TString &query) {
    return FnvHash<TQueryId>(query.data(), query.size());
}

THostId GetHostIdFromUrlId(TUrlId urlId) {
    return urlId >> 32ul;
}

const float OPERATION_WEIGHT = 10.0;

struct TUrlUTA {
    TString Get(const TString &url) const {
        return JoinSeq(" ", Analyzer.AnalyzeUrlUTF8(url));
    }

public:
    NUta::TSmartUrlAnalyzer Analyzer;
};

struct TMapper : public NYT::IMapper<NYT::TTableReader<NYT::TYamr>, NYT::TTableWriter<NCatalogia2::NProto::TQuery>> {
    void Do(TReader *input, TWriter *output) override {
        static const THashSet<TString> OWNERS_SKIP_LIST = {
            "free_entity_search_video",
            "lyrics_blender_wizard",
            "newswizardunderblender",

            "facebook.com",
            "google.com",
            "google.ru",
            "instagram.com",
            "ok.ru",
            "rambler.ru",
            "twitter.com",
            "vk.com",

            "yandex.by",
            "yandex.com",
            "yandex.com.tr",
            "yandex.kz",
            "yandex.ru",
            "yandex.ua",
            "youtube.com",
        };

        for (; input->IsValid(); input->Next()) {
            const NYT::TYamr &row = input->GetRow();

            proto::queries2::QueryMessage msg;
            msg.ParseFromArray(row.GetValue().data(), row.GetValue().size());
            const TString url = msg.url();

            TString host, path;
            if (!NUtils::SplitUrl(url, host, path)) {
                continue;
            }

            const TStringBuf owner = NUtils::GetMetrikaOwner(host);
            if (OWNERS_SKIP_LIST.contains(owner)) {
                continue;
            }

            const TString query = msg.corrected_query();
            const TUrlId urlId = GetUrlId(NUtils::GetHost2vecDomain(host), path);
            const TQueryId queryId = GetQueryId(query);
            const THostId ownerId = GetHostIdFromUrlId(urlId);
            const i32 upperPornoUpperPl = msg.GetUpperPornoUpperPl();

            NCatalogia2::NProto::TQuery outMsg;
            for (int i = 0; i < msg.reports_by_region_size(); i++) {
                size_t clicks = 0;
                size_t shows = 0;
                const auto &region = msg.reports_by_region(i);
                for (int p = 0; p < region.position_info_size(); p++) {
                    const auto &position = msg.reports_by_region(i).position_info(p);
                    clicks += position.clicks_count();
                    shows += position.shows_count();
                }

                outMsg.SetQuery(query);
                outMsg.SetHost(host);
                outMsg.SetPath(path);
                outMsg.SetQueryId(queryId);
                outMsg.SetUrlId(urlId);
                outMsg.SetOwnerId(ownerId);
                outMsg.SetRegionId(region.region_id());
                outMsg.SetQRUClicks(clicks);
                outMsg.SetQRUShows(shows);
                outMsg.SetUpperPornoUpperPl(upperPornoUpperPl);
                output->AddRow(outMsg);
            }
        }
    }
};

REGISTER_MAPPER(TMapper)

struct TSingleCounter {
    TSingleCounter() = default;
    TSingleCounter(const NCatalogia2::NProto::TCounters &counters)
        : Clicks(counters.GetClicks())
        , Shows(counters.GetShows())
    {
    }

    TSingleCounter(size_t clicks, size_t shows)
        : Clicks(clicks)
        , Shows(shows)
    {
    }

    void Add(const NCatalogia2::NProto::TQuery &msg) {
        Clicks += msg.GetQRUClicks();
        Shows += msg.GetQRUShows();
    }

    void From(const NCatalogia2::NProto::TCounters &counters) {
        Clicks = counters.GetClicks();
        Shows = counters.GetShows();
    }

    void To(NCatalogia2::NProto::TCounters &counters) const {
        counters.SetClicks(Clicks);
        counters.SetShows(Shows);
    }

public:
    int Clicks = 0;
    size_t Shows = 0;
};

struct TCounters {
    //void QueriesFrom(const NCatalogia2::NProto::TCounters &counters) {
    //    QueryCounters.From(counters);
    //}

    //void UrlsFrom(const NCatalogia2::NProto::TCounters &counters) {
    //    UrlsCounters.From(counters);
    //}

    void OwnersFrom(const NCatalogia2::NProto::TOwnerCounters &counters) {
        for (const NCatalogia2::NProto::TOwnerCounter &msg : counters.GetCounters()) {
            OwnerCounters.emplace(msg.GetOwnerId(), TSingleCounter(msg.GetCounters()));
        }
    }

    void OwnersRegionsFrom(const NCatalogia2::NProto::TOwnerRegionCounters &counters) {
        for (const NCatalogia2::NProto::TOwnerRegionCounter &msg : counters.GetCounters()) {
            OwnerRegionCounters.emplace(std::make_pair(msg.GetOwnerId(), msg.GetRegionId()), TSingleCounter(msg.GetCounters()));
        }
    }

    void RegionsFrom(const NCatalogia2::NProto::TRegionCounters &counters) {
        for (const NCatalogia2::NProto::TRegionCounter &msg : counters.GetCounters()) {
            RegionCounters.emplace(msg.GetRegionId(), TSingleCounter(msg.GetCounters()));
        }
    }

    static void QueriesUrlsFrom(const NCatalogia2::NProto::TUrlCounters &src, THashMap<TUrlId, TSingleCounter> &dst) {
        for (const NCatalogia2::NProto::TUrlCounter &msg : src.GetCounters()) {
            dst.emplace(msg.GetUrlId(), TSingleCounter(msg.GetCounters()));
        }
    }

    void QueriesUrlsFrom(const NCatalogia2::NProto::TUrlCounters &counters) {
        QueriesUrlsFrom(counters, QueryUrlCounters);
    }

    template<class TContainer, class TKey>
    const static TSingleCounter &GetCounter(const TContainer &container, const TKey &key) {
        const static TSingleCounter stub;
        if (container.contains(key)) {
            return container.at(key);
        }
        return stub;
    }

    const TSingleCounter &GetOwner(const TOwnerId &ownerId) const {
        return GetCounter(OwnerCounters, ownerId);
    }

    const TSingleCounter &GetOwnerRegion(const TOwnerId &ownerId, const TRegionId &regionId) const {
        return GetCounter(OwnerRegionCounters, std::make_pair(ownerId, regionId));
    }

    const TSingleCounter &GetRegion(const TRegionId &regionId) const {
        return GetCounter(RegionCounters, regionId);
    }

    //const TSingleCounter &GetQuery() const {
    //    return QueryCounters;
    //}

    const TSingleCounter &GetQueryUrl(const TUrlId &urlId) const {
        return GetCounter(QueryUrlCounters, urlId);
    }

    //const TSingleCounter &GetUrl() const {
    //    return UrlCounters;
    //}

    void OwnersTo(NCatalogia2::NProto::TOwnerCounters &counters) const {
        for (const auto &obj : OwnerCounters) {
            NCatalogia2::NProto::TOwnerCounter *counter = counters.AddCounters();
            counter->SetOwnerId(obj.first);
            obj.second.To(*counter->MutableCounters());
        }
    }

    void OwnersRegionsTo(NCatalogia2::NProto::TOwnerRegionCounters &counters) const {
        for (const auto &obj : OwnerRegionCounters) {
            NCatalogia2::NProto::TOwnerRegionCounter *counter = counters.AddCounters();
            counter->SetOwnerId(obj.first.first);
            counter->SetRegionId(obj.first.second);
            obj.second.To(*counter->MutableCounters());
        }
    }

    //void QueriesTo(NCatalogia2::NProto::TCounters &counters) {
    //    QueryCounters.To(counters);
    //}

    static void QueriesUrlsTo(const THashMap<TUrlId, TSingleCounter> &src, NCatalogia2::NProto::TUrlCounters &dst) {
        for (const auto &obj : src) {
            NCatalogia2::NProto::TUrlCounter *counter = dst.AddCounters();
            counter->SetUrlId(obj.first);
            obj.second.To(*counter->MutableCounters());
        }
    }

    void QueriesUrlsTo(NCatalogia2::NProto::TUrlCounters &counters) const {
        QueriesUrlsTo(QueryUrlCounters, counters);
    }

    /*
    void QueriesRegionsUrlsTo(const NCatalogia2::NProto::TQueryRegionUrlCounters &counters) {
        for (const auto &obj : QueryRegionUrlCounters) {
            NCatalogia2::NProto::TQueryRegionUrlCounter *counter = counters.AddCounters();
            counter->SetQueryId(std::get<0>(obj.first));
            counter->SetRegionId(std::get<1>(obj.first));
            counter->SetUrlId(std::get<2>(obj.first));
            obj.second.To(*counter->MutableCounters());
        }
    }
    */
    void RegionsTo(NCatalogia2::NProto::TRegionCounters &counters) const {
        for (const auto &obj : RegionCounters) {
            NCatalogia2::NProto::TRegionCounter *counter = counters.AddCounters();
            counter->SetRegionId(obj.first);
            obj.second.To(*counter->MutableCounters());
        }
    }

    //void UrlsTo(NCatalogia2::NProto::TCounters &counters) {
    //     UrlCounters.To(counters);
    //}

public:
    THashMap<TOwnerId, TSingleCounter> OwnerCounters;
    THashMap<std::pair<TOwnerId, TRegionId>, TSingleCounter> OwnerRegionCounters;
    THashMap<TRegionId, TSingleCounter> RegionCounters;
    THashMap<TUrlId, TSingleCounter> QueryUrlCounters;
    //THashMap<std::tuple<TQueryId, TRegionId, TUrlId>, TSingleCounter> QueryRegionUrlCounters;
    //THashMap<TQueryId, TSingleCounter> QueryCounters;
    //THashMap<TUrlId, TSingleCounter> UrlCounters;
};

//ReduceBy F_QUERY_ID
struct TOwnerFilterReducer : public NYT::IReducer<NYT::TTableReader<NCatalogia2::NProto::TQuery>, NYT::TTableWriter<NCatalogia2::NProto::TOwnerFilter>> {
    Y_SAVELOAD_JOB(Days)

    TOwnerFilterReducer() = default;
    TOwnerFilterReducer(size_t days)
        : Days(days)
    {
    }

public:
    void Do(TReader *input, TWriter *output) override {
        const size_t SHOWS_LIMIT = 334 * Days;

        const TQueryId queryId = input->GetRow().GetQueryId();
        THashSet<TOwnerId> owners;
        TCounters counters;
        TSingleCounter queryCounters;
        for (; input->IsValid(); input->Next()) {
            const NCatalogia2::NProto::TQuery &row = input->GetRow();
            queryCounters.Add(row);
            if (queryCounters.Shows > SHOWS_LIMIT) {
                return;
            }
            counters.OwnerCounters[row.GetOwnerId()].Add(row);
            counters.OwnerRegionCounters[std::make_pair(row.GetOwnerId(), row.GetRegionId())].Add(row);
            counters.RegionCounters[row.GetRegionId()].Add(row);
            counters.QueryUrlCounters[row.GetUrlId()].Add(row);
            owners.insert(row.GetOwnerId());
        }

        if (owners.size() > 5 && queryCounters.Shows < SHOWS_LIMIT) {
            NCatalogia2::NProto::TOwnerFilter outMsg;
            outMsg.SetQueryId(queryId);
            outMsg.SetOwners(owners.size());
            queryCounters.To(*outMsg.MutableQCounters());
            counters.OwnersTo(*outMsg.MutableOQCounters());
            counters.OwnersRegionsTo(*outMsg.MutableOQRCounters());
            counters.RegionsTo(*outMsg.MutableQRCounters());
            counters.QueriesUrlsTo(*outMsg.MutableQUCounters());
            output->AddRow(outMsg);
        }
    }
public:
    size_t Days = 1;
};

REGISTER_REDUCER(TOwnerFilterReducer)

//ReduceBy F_QUERY_ID
struct TFilterExtractedJoinReducer : public TTaggedReducer {
    void StartTagged(TTagedWriter) override final {
        Dssm.Reset(new TBoostingXfOneSEDssm);
    }

public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TMaybe<NCatalogia2::NProto::TOwnerFilter> ownerFilter = reader.GetSingleRowMaybe(OwnerFilterInputTag);
        if (!ownerFilter.Defined() || !reader.IsValid()) {
            return;
        }

        TMaybe<NCatalogia2::NProto::THits> hits = reader.GetSingleRowMaybe(HitsInputTag);
        if (!hits.Defined() || !reader.IsValid()) {
            return;
        }

        const TString query = hits.GetRef().GetQuery();
        TString normalizedQuery;
        TVector<TStringBuf> words;
        size_t editDistance = 0;

        try {
            normalizedQuery = FastNormalizeRequest(query, false);
            if (normalizedQuery.empty()) {
                return;
            }

            NUtils::FastSplit(TStringBuf(normalizedQuery), ' ', words);
            editDistance = NLevenshtein::Distance(::UTF8ToWide(query), ::UTF8ToWide(normalizedQuery));
        } catch (yexception &) {
            return;
        }

        const NCatalogia2::NProto::TOwnerFilter &ownerFilterMsg = ownerFilter.GetRef();

        TVector<float> embedding;
        Dssm->Apply(normalizedQuery, embedding);

        TSingleCounter queryCounters(ownerFilterMsg.GetQCounters());
        TCounters counters;
        //counters.QueriesFrom(ownerFilterMsg.GetQCounters());
        counters.OwnersFrom(ownerFilterMsg.GetOQCounters());
        counters.OwnersRegionsFrom(ownerFilterMsg.GetOQRCounters());
        counters.RegionsFrom(ownerFilterMsg.GetQRCounters());
        counters.QueriesUrlsFrom(ownerFilterMsg.GetQUCounters());

        for (auto row : reader.GetRows(ExtractedQueriesInputTag)) {
            const TSingleCounter &OQCounter = counters.GetOwner(row.GetOwnerId());
            const TSingleCounter &OQRCounter = counters.GetOwnerRegion(row.GetOwnerId(), row.GetRegionId());
            const TSingleCounter &QRCounter = counters.GetRegion(row.GetRegionId());
            const TSingleCounter &QUCounter = counters.GetQueryUrl(row.GetUrlId());

            row.SetOwnersPerQuery(ownerFilterMsg.GetOwners());
            row.SetOQClicks(OQCounter.Clicks);
            row.SetOQShows(OQCounter.Shows);
            row.SetOQRClicks(OQRCounter.Clicks);
            row.SetOQRShows(OQRCounter.Shows);
            row.SetQClicks(queryCounters.Clicks);
            row.SetQShows(queryCounters.Shows);
            row.SetQRClicks(QRCounter.Clicks);
            row.SetQRShows(QRCounter.Shows);
            row.SetQUClicks(QUCounter.Clicks);
            row.SetQUShows(QUCounter.Shows);

            row.SetNormalizedQuery(normalizedQuery);
            row.SetHits(hits.GetRef().GetHits());
            row.SetWordCount(words.size());
            row.SetEditDistance(editDistance);

            *row.MutableDssmQueryEmbeddings()->MutableXfOneSE() = { embedding.begin(), embedding.end() };

            writer.AddRow(row, ExtractedQueriesOutputTag);
        }
    }

public:
    THolder<TBoostingXfOneSEDssm> Dssm;
};

REGISTER_REDUCER(TFilterExtractedJoinReducer)

//ReduceBy F_QUERY_ID, F_REGION_ID
struct TGetQueryInfoReducer : public NYT::IReducer<NYT::TTableReader<NCatalogia2::NProto::TQuery>, NYT::TTableWriter<NCatalogia2::NProto::TQueryInfo>> {
public:
    void Start(TWriter *) override {
        TDssmL3Model::Ptr dssmL3Model(new TDssmL3Model);
        DssmCtr.Reset(new TDssmCtr(dssmL3Model));
        DssmCtrNoMiner.Reset(new TDssmCtrNoMiner(dssmL3Model));
        DssmUta.Reset(new TDssmUta(dssmL3Model));
    }

    void Do(TReader *input, TWriter *output) override {
        const NCatalogia2::NProto::TQuery firstRow = input->GetRow();

        TOwnersUrlsPerQuery ownersUrlsPerQuery;
        TCounters counters;
        TSingleCounter queryCounters = TSingleCounter(firstRow.GetQClicks(), firstRow.GetQShows());
        THashMap<TUrlId, TSingleCounter> urlsCounters, queriesRegionsUrlsCounters;
        TMaybe<NCatalogia2::NProto::TDssmEmbeddings> dssmQueryEmbeddings;

        for (; input->IsValid(); input->Next()) {
            const NCatalogia2::NProto::TQuery &row = input->GetRow();
            ownersUrlsPerQuery.Add(row.GetOwnerId(), row.GetUrlId());

            counters.OwnerCounters.emplace(row.GetOwnerId(),                                            TSingleCounter(row.GetOQClicks(),   row.GetOQShows()));
            counters.OwnerRegionCounters.emplace(std::make_pair(row.GetOwnerId(), row.GetRegionId()),   TSingleCounter(row.GetOQRClicks(),  row.GetOQRShows()));
            counters.QueryUrlCounters.emplace(row.GetUrlId(),                                           TSingleCounter(row.GetQUClicks(),   row.GetQUShows()));
            counters.RegionCounters.emplace(row.GetRegionId(),                                          TSingleCounter(row.GetQRClicks(),   row.GetQRShows()));
            queriesRegionsUrlsCounters.emplace(row.GetUrlId(),                                          TSingleCounter(row.GetQRUClicks(),  row.GetQRUShows()));
            urlsCounters.emplace(row.GetUrlId(),                                                        TSingleCounter(row.GetUClicks(),    row.GetUShows()));
        }

        NCatalogia2::NProto::TQueryInfo outMsg;
        outMsg.SetQuery(firstRow.GetQuery());
        outMsg.SetNormalizedQuery(firstRow.GetNormalizedQuery());
        outMsg.SetOwners(ownersUrlsPerQuery.Links.size());
        outMsg.SetQueryId(firstRow.GetQueryId());
        outMsg.SetRegionId(firstRow.GetRegionId());
        outMsg.SetHits(firstRow.GetHits());
        outMsg.SetWordCount(firstRow.GetWordCount());
        outMsg.SetEditDistance(firstRow.GetEditDistance());
        *outMsg.MutableDssmQueryEmbeddings()->MutableXfOneSE() = firstRow.GetDssmQueryEmbeddings().GetXfOneSE();
        counters.OwnersTo(*outMsg.MutableOQCounters());
        counters.OwnersRegionsTo(*outMsg.MutableOQRCounters());
        counters.QueriesUrlsTo(*outMsg.MutableQUCounters());
        counters.RegionsTo(*outMsg.MutableQRCounters());
        queryCounters.To(*outMsg.MutableQCounters());
        TCounters::QueriesUrlsTo(queriesRegionsUrlsCounters, *outMsg.MutableQRUCounters());
        TCounters::QueriesUrlsTo(urlsCounters, *outMsg.MutableUCounters());
        ownersUrlsPerQuery.To(*outMsg.MutableOwnersUrls());
        outMsg.SetUpperPornoUpperPl(firstRow.GetUpperPornoUpperPl());
        /*
        TVector<float> queryEmbeddingCtr;
        DssmCtr->GetQueryEmbedding(firstRow.GetNormalizedQuery(), queryEmbeddingCtr);
        *outMsg.MutableDssmQueryEmbeddings()->MutableCtr() = { queryEmbeddingCtr.begin(), queryEmbeddingCtr.end() };

        TVector<float> queryEmbeddingCtrNoMiner;
        DssmCtrNoMiner->GetQueryEmbedding(firstRow.GetNormalizedQuery(), queryEmbeddingCtrNoMiner);
        *outMsg.MutableDssmQueryEmbeddings()->MutableCtrNoMiner() = { queryEmbeddingCtrNoMiner.begin(), queryEmbeddingCtrNoMiner.end() };

        TVector<float> queryEmbeddingUta;
        DssmUta->GetQueryEmbedding(firstRow.GetNormalizedQuery(), queryEmbeddingUta);
        *outMsg.MutableDssmQueryEmbeddings()->MutableUta() = { queryEmbeddingUta.begin(), queryEmbeddingUta.end() };
        */
        output->AddRow(outMsg);
    }

public:
    THolder<TDssmCtr> DssmCtr;
    THolder<TDssmCtrNoMiner> DssmCtrNoMiner;
    THolder<TDssmUta> DssmUta;
};

REGISTER_REDUCER(TGetQueryInfoReducer)

//ReduceBy F_URL_ID, F_REGION_ID
struct TGetUrlInfoReducer : public NYT::IReducer<NYT::TTableReader<NCatalogia2::NProto::TQuery>, NYT::TTableWriter<NCatalogia2::NProto::TUrlInfo>> {
public:
    void Start(TWriter *) override {
        TDssmL3Model::Ptr dssmL3Model(new TDssmL3Model);
        DssmCtr.Reset(new TDssmCtr(dssmL3Model));
        DssmCtrNoMiner.Reset(new TDssmCtrNoMiner(dssmL3Model));
        DssmUta.Reset(new TDssmUta(dssmL3Model));
    }

    void Do(TReader *input, TWriter *output) override {
        const NCatalogia2::NProto::TQuery firstRow = input->GetRow();
        const TString url = firstRow.GetHost() + firstRow.GetPath();
        TSingleCounter urlCounters(firstRow.GetUClicks(), firstRow.GetUShows());

        THashSet<TQueryId> queriesPerUrl;
        for (; input->IsValid(); input->Next()) {
            const NCatalogia2::NProto::TQuery &row = input->GetRow();
            queriesPerUrl.insert(row.GetQueryId());
        }

        NCatalogia2::NProto::TUrlInfo outMsg;
        outMsg.SetUrlId(firstRow.GetUrlId());
        outMsg.SetHost(firstRow.GetHost());
        outMsg.SetPath(firstRow.GetPath());
        outMsg.SetNormalizedTitle(firstRow.GetNormalizedTitle());
        outMsg.SetUrlUTA(firstRow.GetUrlUTA());
        outMsg.SetRegionId(firstRow.GetRegionId());
        *outMsg.MutableDssmDocEmbeddings()->MutableCentroidXfOneSE() = firstRow.GetDssmDocEmbeddings().GetCentroidXfOneSE();
        *outMsg.MutableDocStatistics() = firstRow.GetDocStatistics();
        NUtils::Serialize(queriesPerUrl, *outMsg.MutableQueriesPerUrl());
        urlCounters.To(*outMsg.MutableUCounters());

        TVector<float> docEmbeddingCtr;
        DssmCtr->GetDocEmbedding(url, firstRow.GetUrlUTA(), firstRow.GetNormalizedTitle(), docEmbeddingCtr);
        *outMsg.MutableDssmDocEmbeddings()->MutableCtr() = { docEmbeddingCtr.begin(), docEmbeddingCtr.end() };

        TVector<float> docEmbeddingCtrNoMiner;
        DssmCtrNoMiner->GetDocEmbedding(url, firstRow.GetNormalizedTitle(), docEmbeddingCtrNoMiner);
        *outMsg.MutableDssmDocEmbeddings()->MutableCtrNoMiner() = { docEmbeddingCtrNoMiner.begin(), docEmbeddingCtrNoMiner.end() };

        TVector<float> docEmbeddingUta;
        DssmUta->GetDocEmbedding(url, firstRow.GetUrlUTA(), firstRow.GetNormalizedTitle(), docEmbeddingUta);
        *outMsg.MutableDssmDocEmbeddings()->MutableUta() = { docEmbeddingUta.begin(), docEmbeddingUta.end() };

        output->AddRow(outMsg);
    }

public:
    THolder<TDssmCtr> DssmCtr;
    THolder<TDssmCtrNoMiner> DssmCtrNoMiner;
    THolder<TDssmUta> DssmUta;
};

REGISTER_REDUCER(TGetUrlInfoReducer)

struct TGetOwnersInfoMapper : public NYT::IMapper<NYT::TTableReader<NCatalogia2::NProto::TQuery>, NYT::TTableWriter<NCatalogia2::NProto::TOwnerInfo>> {
    void Do(TReader *input, TWriter *output) override {
        TString prevHost;
        NCatalogia2::NProto::TOwnerInfo outMsg;
        for (; input->IsValid(); input->Next()) {
            const NCatalogia2::NProto::TQuery &row = input->GetRow();
            if (prevHost != row.GetHost()) {
                prevHost = row.GetHost();
                outMsg.SetHost(row.GetHost());
                outMsg.SetOwner(TString{NUtils::GetHost2vecDomain(row.GetHost())});
                outMsg.SetOwnerId(row.GetOwnerId());
                output->AddRow(outMsg);
            }
        }
    }
};

REGISTER_MAPPER(TGetOwnersInfoMapper)

//ReduceBy F_HOST
struct TGetOwnersInfoReducer : public NYT::IReducer<NYT::TTableReader<NCatalogia2::NProto::TOwnerInfo>, NYT::TTableWriter<NCatalogia2::NProto::TOwnerInfo>> {
public:
    void Do(TReader *input, TWriter *output) override {
        output->AddRow(input->GetRow());
    }
};

REGISTER_REDUCER(TGetOwnersInfoReducer)

//ReduceBy F_QUERY_ID, F_REGION_ID, F_URL_ID
struct TQueryRegionUrlUniqReducer : public NYT::IReducer<NYT::TTableReader<NCatalogia2::NProto::TQuery>, NYT::TTableWriter<NCatalogia2::NProto::TQueryRegionUrlInfo>> {
public:
    void Do(TReader *input, TWriter *output) override {
        const NCatalogia2::NProto::TQuery &row = input->GetRow();
        NCatalogia2::NProto::TQueryRegionUrlInfo outMsg;
        outMsg.SetQueryId(row.GetQueryId());
        outMsg.SetRegionId(row.GetRegionId());
        outMsg.SetUrlId(row.GetUrlId());
        output->AddRow(outMsg);
    }
};

REGISTER_REDUCER(TQueryRegionUrlUniqReducer)

//ReduceBy F_QUERY_ID, F_REGION_ID
struct TQueryUrlJoinInfoReducer0 : public TTaggedReducer {
public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TMaybe<NCatalogia2::NProto::TQueryInfo> queryInfo = reader.GetSingleRowMaybe(QueryInfoInputTag);
        if (!queryInfo.Defined() || !reader.IsValid()) {
            return;
        }

        for (auto row : reader.GetRows(QueryRegionUrlInfoInputTag)) {
            row.SetOwnersUrls(queryInfo.GetRef().GetOwnersUrls());
            writer.AddRow(row, QueryRegionUrlInfoOutputTag);
        }
    }
};

REGISTER_REDUCER(TQueryUrlJoinInfoReducer0)

//ReduceBy F_URL_ID, F_REGION_ID
struct TQueriesInfoQRUUrlsInfoJoinReducer : public TTaggedReducer {
public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TMaybe<NCatalogia2::NProto::TUrlInfo> urlInfo = reader.GetSingleRowMaybe(UrlInfoInputTag);
        if (!urlInfo.Defined() || !reader.IsValid()) {
            return;
        }

        TOwnersUrlsPerQuery ownersUrlsPerQuery;
        THashSet<TQueryId> queriesPerUrl;
        const TUrlId branchUrlId = urlInfo.GetRef().GetUrlId();
        const THostId branchHostId = GetHostIdFromUrlId(branchUrlId);
        const TRegionId regionId = urlInfo.GetRef().GetRegionId();
        NUtils::Deserialize(urlInfo.GetRef().GetQueriesPerUrl(), queriesPerUrl);

        NCatalogia2::NProto::TForeignUrl urlOutMsg;
        for (auto row : reader.GetRows(QueryRegionUrlInfoInputTag)) {
            TOwnersUrlsPerQuery tmpOwnersUrls;
            tmpOwnersUrls.From(row.GetOwnersUrls());
            ownersUrlsPerQuery.Add(tmpOwnersUrls);

            //const TQueryId branchQueryId = row.GetQueryId();
            //if (tmpOwnersUrls.Links.size() < 10000) {
            //    for (const auto &obj : tmpOwnersUrls.Links) {
            //        const TOwnerId ownerId = obj.first;
            //        for (const TUrlId originUrlId : obj.second) {
            //            urlOutMsg.SetOwnerId(ownerId);
            //            urlOutMsg.SetQueryId(branchQueryId);
            //            urlOutMsg.SetUrlId(originUrlId);
            //            urlOutMsg.SetRegionId(regionId);
            //            urlOutMsg.SetQueriesCount(queriesPerUrl.size());
            //            urlOutMsg.SetBranchUrlId(branchUrlId);
            //            writer.AddRow(urlOutMsg, ForeignUrlOutputTag);
            //        }
            //    }
            //}
        }

        size_t urlsMultiplier = 0;
        for (const auto &obj : ownersUrlsPerQuery.Links) {
            urlsMultiplier += obj.second.size();
        }

        const size_t generatedRows = queriesPerUrl.size() * urlsMultiplier;
        if (generatedRows > 10000000) {
            return;
        }

        NCatalogia2::NProto::TForeignQueryCounter queryOutCounterMsg;
        queryOutCounterMsg.SetUrlId(branchUrlId);
        queryOutCounterMsg.SetRegionId(regionId);
        queryOutCounterMsg.SetQueriesMultiplier(queriesPerUrl.size());
        queryOutCounterMsg.SetUrlsMultiplier(urlsMultiplier);
        writer.AddRow(queryOutCounterMsg, ForeignQueryCounterOutputTag);

        NCatalogia2::NProto::TForeignQuery queryOutMsg;
        for (const TQueryId queryId : queriesPerUrl) {
            for (const auto &obj : ownersUrlsPerQuery.Links) {
                const TOwnerId ownerId = obj.first;
                for (const TUrlId originUrlId : obj.second) {
                    queryOutMsg.SetOwnerId(ownerId);
                    queryOutMsg.SetQueryId(queryId);
                    queryOutMsg.SetRegionId(regionId);
                    queryOutMsg.SetUrlId(originUrlId);
                    queryOutMsg.SetBranchHostId(branchHostId);
                    writer.AddRow(queryOutMsg, ForeignQueryOutputTag);
                }
            }
        }
    }
};

REGISTER_REDUCER(TQueriesInfoQRUUrlsInfoJoinReducer)

//ReduceBy F_OWNER, F_REGION_ID, F_URL_ID, F_QUERY_ID
struct TOwnerQueryRegionUniqReducer : public NYT::IReducer<NYT::TTableReader<NCatalogia2::NProto::TForeignQuery>, NYT::TTableWriter<NCatalogia2::NProto::TForeignQuery>> {
public:
    void Do(TReader *input, TWriter *output) override {
        NCatalogia2::NProto::TForeignQuery firstRow = input->GetRow();
        THashSet<TUrlId> urlIds;

        for (; input->IsValid(); input->Next()) {
            const NCatalogia2::NProto::TForeignQuery &row = input->GetRow();
            urlIds.insert(row.GetBranchHostId());
        }

        if (urlIds.size() > 2) {
            //for (const TUrlId urlId : urlIds) { }
            firstRow.SetBranchUrlsCount(urlIds.size());
            firstRow.ClearBranchHostId();
            output->AddRow(firstRow);
        }
    }
};

REGISTER_REDUCER(TOwnerQueryRegionUniqReducer)

//ReduceBy F_OWNER, F_REGION_ID, F_URL_ID, F_BRANCH_URL_ID
struct TOwnerUrlRegionUniqReducer : public NYT::IReducer<NYT::TTableReader<NCatalogia2::NProto::TForeignUrl>, NYT::TTableWriter<NCatalogia2::NProto::TForeignUrl>> {
public:
    void Do(TReader *input, TWriter *output) override {
        NCatalogia2::NProto::TForeignUrl firstRow = input->GetRow();
        THashSet<TQueryId> queryIds;
        size_t queriesCount = 0;
        for (; input->IsValid(); input->Next()) {
            queryIds.insert(input->GetRow().GetQueryId());
            queriesCount += input->GetRow().GetQueriesCount();
        }

        firstRow.SetBranchQueriesCount(queryIds.size());
        firstRow.SetQueriesCount(queriesCount);
        output->AddRow(firstRow);
    }
};

REGISTER_REDUCER(TOwnerUrlRegionUniqReducer)

//ReduceBy F_QUERY_ID, F_REGION_ID
struct TQueriesInfoUniqOQRJoinReducer : public TTaggedReducer {
public:
    TQueriesInfoUniqOQRJoinReducer() = default;
    TQueriesInfoUniqOQRJoinReducer(const THashMap<TOwnerId, TString> &ownerIdToHosts)
        : OwnerIdToHosts(ownerIdToHosts)
    {
    }

    void Save(IOutputStream& stream) const {
        ::Save(&stream, OwnerIdToHosts);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) {
        ::Load(&stream, OwnerIdToHosts);
        TTaggedReducer::Load(stream);
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TMaybe<NCatalogia2::NProto::TQueryInfo> queryInfo = reader.GetSingleRowMaybe(QueryInfoInputTag);
        if (!queryInfo.Defined() || !reader.IsValid()) {
            return;
        }
        const NCatalogia2::NProto::TQueryInfo &queryInfoRow = queryInfo.GetRef();

        TCounters counters;
        TSingleCounter queryCounters;
        THashMap<TUrlId, TSingleCounter> urlsCounters, queriesRegionsUrlsCounters;

        counters.OwnersFrom(queryInfoRow.GetOQCounters());
        counters.OwnersRegionsFrom(queryInfoRow.GetOQRCounters());
        counters.QueriesUrlsFrom(queryInfoRow.GetQUCounters());
        counters.RegionsFrom(queryInfoRow.GetQRCounters());
        queryCounters.From(queryInfoRow.GetQCounters());
        TCounters::QueriesUrlsFrom(queryInfoRow.GetQRUCounters(), queriesRegionsUrlsCounters);
        TCounters::QueriesUrlsFrom(queryInfoRow.GetUCounters(), urlsCounters);

        NCatalogia2::NProto::TGeneratedQuery outMsg;
        *outMsg.MutableDssmQueryEmbeddings()->MutableXfOneSE() = queryInfo.GetRef().GetDssmQueryEmbeddings().GetXfOneSE();
        outMsg.SetUpperPornoUpperPl(queryInfoRow.GetUpperPornoUpperPl());
        for (auto row : reader.GetRows(ForeignQueryInputTag)) {
            const auto ownerIt = OwnerIdToHosts.find(row.GetOwnerId());
            if (ownerIt != OwnerIdToHosts.end()) {
                outMsg.SetQuery(queryInfoRow.GetQuery());
                outMsg.SetQueryId(queryInfoRow.GetQueryId());
                outMsg.SetNormalizedQuery(queryInfoRow.GetNormalizedQuery());
                outMsg.SetRegionId(queryInfoRow.GetRegionId());
                outMsg.SetOwner(ownerIt->second);
                outMsg.SetOwners(queryInfoRow.GetOwners());
                outMsg.SetUrlId(row.GetUrlId());
                outMsg.SetBranchUrlsCount(row.GetBranchUrlsCount());

                outMsg.SetQClicks(queryCounters.Clicks);
                outMsg.SetQShows(queryCounters.Shows);

                //outMsg.SetUClicks(TCounters::GetCounter(urlsCounters, row.GetUrlId()).Clicks);
                //outMsg.SetUShows(TCounters::GetCounter(urlsCounters, row.GetUrlId()).Shows);

                outMsg.SetQRUClicks(TCounters::GetCounter(queriesRegionsUrlsCounters, row.GetUrlId()).Clicks);
                outMsg.SetQRUShows(TCounters::GetCounter(queriesRegionsUrlsCounters, row.GetUrlId()).Shows);

                outMsg.SetOQClicks(TCounters::GetCounter(counters.OwnerCounters, row.GetOwnerId()).Clicks);
                outMsg.SetOQShows(TCounters::GetCounter(counters.OwnerCounters, row.GetOwnerId()).Shows);

                const auto oqrKey = std::make_pair(row.GetOwnerId(), row.GetRegionId());
                outMsg.SetOQRClicks(TCounters::GetCounter(counters.OwnerRegionCounters, oqrKey).Clicks);
                outMsg.SetOQRShows(TCounters::GetCounter(counters.OwnerRegionCounters, oqrKey).Shows);

                outMsg.SetQRClicks(TCounters::GetCounter(counters.RegionCounters, row.GetRegionId()).Clicks);
                outMsg.SetQRShows(TCounters::GetCounter(counters.RegionCounters, row.GetRegionId()).Shows);

                outMsg.SetQUClicks(TCounters::GetCounter(counters.QueryUrlCounters, row.GetUrlId()).Clicks);
                outMsg.SetQUShows(TCounters::GetCounter(counters.QueryUrlCounters, row.GetUrlId()).Shows);

                //*outMsg.MutableDssmQueryEmbeddings() = queryInfoRow.GetDssmQueryEmbeddings();

                outMsg.SetHits(queryInfoRow.GetHits());
                outMsg.SetEditDistance(queryInfoRow.GetEditDistance());
                outMsg.SetWordCount(queryInfoRow.GetWordCount());
                outMsg.SetBranchUrlsCount(row.GetBranchUrlsCount());

                writer.AddRow(outMsg, GeneratedQueryOutputTag);
            }
        }
    }

public:
    THashMap<TOwnerId, TString> OwnerIdToHosts;
};

REGISTER_REDUCER(TQueriesInfoUniqOQRJoinReducer)

TStringBuf FixPornoDomain(const TStringBuf &host) {
    return NUtils::FixDomainPrefix(NUtils::RemoveScheme(host));
}

//ReduceBy F_URL_ID, F_REGION_ID
struct TUrlsInfoUniqOQRJoinReducer : public TTaggedReducer {
    TUrlsInfoUniqOQRJoinReducer() = default;
    TUrlsInfoUniqOQRJoinReducer(const THashSet<TString> &pornoHosts)
        : PornoHosts(pornoHosts)
    {
    }

    void Save(IOutputStream& stream) const {
        ::Save(&stream, PornoHosts);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) {
        ::Load(&stream, PornoHosts);
        TTaggedReducer::Load(stream);
    }

    void StartTagged(TTagedWriter) override final {
        TDssmL3Model::Ptr dssmL3Model(new TDssmL3Model);
        DssmCtr.Reset(new TDssmCtr(dssmL3Model));
        DssmCtrNoMiner.Reset(new TDssmCtrNoMiner(dssmL3Model));
        DssmUta.Reset(new TDssmUta(dssmL3Model));
    }

    bool IsPornoHost(const TString &host) const {
        return PornoHosts.contains(FixPornoDomain(host));
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TMaybe<NCatalogia2::NProto::TUrlInfo> urlInfo = reader.GetSingleRowMaybe(UrlInfoInputTag);
        if (!urlInfo.Defined() || !reader.IsValid()) {
            return;
        }
        const NCatalogia2::NProto::TUrlInfo &urlInfoRow = urlInfo.GetRef();

        TMaybe<NCatalogia2::NProto::TForeignQueryCounter> counter = reader.GetSingleRowMaybe(ForeignQueryCounterInputTag);
        if (!counter.Defined() || !reader.IsValid()) {
            return;
        }
        const NCatalogia2::NProto::TForeignQueryCounter &counterRow = counter.GetRef();

        TSingleCounter urlCounters;
        urlCounters.From(urlInfoRow.GetUCounters());

        const float dssmTopValues[3] = {1.0, 1.0, 1.0};
        TVector<float> docEmbeddingCtr          (urlInfoRow.GetDssmDocEmbeddings().GetCtr().begin(),        urlInfoRow.GetDssmDocEmbeddings().GetCtr().end());
        TVector<float> docEmbeddingCtrNoMiner   (urlInfoRow.GetDssmDocEmbeddings().GetCtrNoMiner().begin(), urlInfoRow.GetDssmDocEmbeddings().GetCtrNoMiner().end());
        TVector<float> docEmbeddingUta          (urlInfoRow.GetDssmDocEmbeddings().GetUta().begin(),        urlInfoRow.GetDssmDocEmbeddings().GetUta().end());
        TVector<float> queryEmbeddingCtr;
        TVector<float> queryEmbeddingCtrNoMiner;
        TVector<float> queryEmbeddingUta;
        TQueryId prevQueryId = 0;

        float ctrJointOutput = 0;
        float ctrNoMinerJointOutput = 0;
        float utaJointOutput = 0;
        float topL2 = 0;
        float dssmValues[3] = {0, 0, 0};
        float l2ToCentroid = 0;
        float l2ToCentroidSD = 0;

        for (auto row : reader.GetRows(GeneratedQueryInputTag)) {
            if (prevQueryId != row.GetQueryId()) {
                DssmCtr->GetQueryEmbedding(row.GetNormalizedQuery(), queryEmbeddingCtr);
                DssmCtrNoMiner->GetQueryEmbedding(row.GetNormalizedQuery(), queryEmbeddingCtrNoMiner);
                DssmUta->GetQueryEmbedding(row.GetNormalizedQuery(), queryEmbeddingUta);

                ctrJointOutput = SoftSign(DssmCtr->GetJointOutput(docEmbeddingCtr, queryEmbeddingCtr));
                ctrNoMinerJointOutput = SoftSign(DssmCtrNoMiner->GetJointOutput(docEmbeddingCtrNoMiner, queryEmbeddingCtrNoMiner));
                utaJointOutput = SoftSign(DssmUta->GetJointOutput(docEmbeddingUta, queryEmbeddingUta));
                dssmValues[0] = ctrJointOutput;
                dssmValues[1] = ctrNoMinerJointOutput;
                dssmValues[2] = utaJointOutput;
                topL2 = L2SqrDistance(dssmValues, dssmTopValues, 3);

                l2ToCentroid = L2SqrDistance(
                    &urlInfoRow.GetDssmDocEmbeddings().GetCentroidXfOneSE()[0],
                    &row.GetDssmQueryEmbeddings().GetXfOneSE()[0],
                    row.GetDssmQueryEmbeddings().GetXfOneSE().size()
                );

                l2ToCentroidSD = 0.0f;
                if (urlInfoRow.GetDocStatistics().GetL2ToCentroidSigma() > 0) {
                    l2ToCentroidSD = (
                        l2ToCentroid - urlInfoRow.GetDocStatistics().GetL2ToCentroidMean()
                    ) / urlInfoRow.GetDocStatistics().GetL2ToCentroidSigma();
                }
                prevQueryId = row.GetQueryId();
            }

            const auto RoundD3 = NUtils::Round<-3, double>;
            row.SetHost(urlInfoRow.GetHost());
            row.SetPath(urlInfoRow.GetPath());
            row.SetUrlUTA(urlInfoRow.GetUrlUTA());
            row.SetUClicks(urlCounters.Clicks);
            row.SetUShows(urlCounters.Shows);
            row.SetNormalizedTitle(urlInfoRow.GetNormalizedTitle());
            row.SetDssmCtr(RoundD3(ctrJointOutput));
            row.SetDssmCtrNoMiner(RoundD3(ctrNoMinerJointOutput));
            row.SetDssmUta(RoundD3(utaJointOutput));
            row.SetDssmTopL2(RoundD3(topL2));
            row.SetQueriesMultiplier(counterRow.GetQueriesMultiplier());
            row.SetUrlsMultiplier(counterRow.GetUrlsMultiplier());

            row.SetL2ToCentroid(RoundD3(l2ToCentroid));
            row.SetL2ToCentroidMean(RoundD3(urlInfoRow.GetDocStatistics().GetL2ToCentroidMean()));
            row.SetL2ToCentroidSigma(RoundD3(urlInfoRow.GetDocStatistics().GetL2ToCentroidSigma()));
            row.SetL2ToCentroidSigmaDistance(RoundD3(l2ToCentroidSD));
            row.SetUrlOwnUniqQueries(urlInfoRow.GetDocStatistics().GetUniqQueries());
            row.SetUrlOwnQueries(urlInfoRow.GetDocStatistics().GetQueries());
            row.ClearDssmQueryEmbeddings();
            row.SetIsPornoHost(IsPornoHost(urlInfoRow.GetHost()));

            writer.AddRow(row, GeneratedQueryOutputTag);
        }
    }

public:
    THolder<TDssmCtr> DssmCtr;
    THolder<TDssmCtrNoMiner> DssmCtrNoMiner;
    THolder<TDssmUta> DssmUta;
    THashSet<TString> PornoHosts;
};

REGISTER_REDUCER(TUrlsInfoUniqOQRJoinReducer)
/*
//ReduceBy F_URL_ID, F_REGION_ID
struct TUrlsInfoUniqOURJoinReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(OwnerIdToHosts)
public:
    TUrlsInfoUniqOURJoinReducer() = default;
    TUrlsInfoUniqOURJoinReducer(const THashMap<TOwnerId, TString> &ownerIdToHosts)
        : OwnerIdToHosts(ownerIdToHosts)
    {
    }

    void Do(TReader *input, TWriter *output) override {
        const ui32 TABLENO_URLS_INFO = 0;
        const ui32 TABLENO_UNIQ_OUR = 1;
        TString url;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode row = input->GetRow();
            const ui32 tableNo = input->GetTableIndex();
            if (tableNo == TABLENO_URLS_INFO) {
                url = row[F_URL].AsString();
            } else if (tableNo == TABLENO_UNIQ_OUR && !url.empty()) {
                const TOwnerId ownerId = input->GetRow()[F_OWNER_ID].AsUint64();
                const auto ownerIt = OwnerIdToHosts.find(ownerId);
                if (ownerIt != OwnerIdToHosts.end()) {
                    output->AddRow(NYT::TNode()
                        (F_URL_ID, row[F_BRANCH_URL_ID]) //F_BRANCH_URL_ID becomes F_URL
                        (F_REGION_ID, row[F_REGION_ID])
                        (F_OWNER, ownerIt->second)
                        (F_SOURCE_URL, url)
                        (F_QUERIES, row[F_QUERIES])
                        (F_BRANCH_QUERIES, row[F_BRANCH_QUERIES])
                    );
                }
            }
        }
    }
public:
    THashMap<TOwnerId, TString> OwnerIdToHosts;
};

REGISTER_REDUCER(TUrlsInfoUniqOURJoinReducer)

//ReduceBy F_URL_ID, F_REGION_ID
struct TUrlsInfoUniqOURJoinStage2Reducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    void Do(TReader *input, TWriter *output) override {
        const ui32 TABLENO_URLS_INFO = 0;
        const ui32 TABLENO_UNIQ_OUR = 1;
        TString url;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode row = input->GetRow();
            const ui32 tableNo = input->GetTableIndex();
            if (tableNo == TABLENO_URLS_INFO) {
                url = row[F_URL].AsString();
            } else if (tableNo == TABLENO_UNIQ_OUR && !url.empty()) {
                output->AddRow(NYT::TNode()
                    (F_URL, url) //remapped from F_BRANCH_URL_ID
                    (F_REGION_ID, row[F_REGION_ID])
                    (F_OWNER, row[F_OWNER])
                    (F_SOURCE_URL, row[F_SOURCE_URL])
                    (F_QUERIES, row[F_QUERIES])
                    (F_BRANCH_QUERIES, row[F_BRANCH_QUERIES])
                );
            }
        }
    }
public:
    THashMap<TOwnerId, TString> OwnerIdToHosts;
};

REGISTER_REDUCER(TUrlsInfoUniqOURJoinStage2Reducer)
*/

struct THitsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TYamr>, NYT::TTableWriter<NCatalogia2::NProto::TWordstat>> {
    Y_SAVELOAD_JOB(TableConfig)

    using TWordstatCounter = TMap<ui32, size_t>;
    using TRegionCounter = THashMap<ui32, TWordstatCounter>;
    using TPeriodCounter = THashMap<time_t, TRegionCounter>;

public:
    THitsMapper() = default;
    THitsMapper(const TVector<time_t> &tableConfig)
        : TableConfig(tableConfig)
    {
    }

    void FlushCounters(TWriter *output, THashMap<TString, TPeriodCounter> &queryPositions) {
        NCatalogia2::NProto::TWordstat msg;
        for (const auto &queryObj : queryPositions) {
            for (const auto &periodObj : queryObj.second) {
                for (const auto &regionObj : periodObj.second) {
                    for (const auto &wordstatObj : regionObj.second) {
                        msg.SetQuery(queryObj.first);
                        msg.SetWordstat(wordstatObj.second);
                        msg.SetTimestamp(periodObj.first);
                        msg.SetRegionId(regionObj.first);
                        msg.SetPosition(wordstatObj.first);
                        output->AddRow(msg);
                    }
                }
            }
        }
        queryPositions.clear();
    }

    void Do(TReader *input, TWriter *output) override {
        THashMap<TString, TPeriodCounter> queryPositions;

        for (; input->IsValid(); input->Next()) {
            const time_t timestamp = TableConfig[input->GetTableIndex()];
            proto::queries2::QueryMessage msg;
            msg.ParseFromString(input->GetRow().value());

            auto &counter = queryPositions[msg.corrected_query()][timestamp];
            for (int i = 0; i < msg.reports_by_region_size(); i++) {
                const auto &region = msg.reports_by_region(i);
                for (int p = 0; p < region.position_info_size(); p++) {
                    const auto &position = msg.reports_by_region(i).position_info(p);
                    counter[region.region_id()][position.position()] += position.shows_count();
                }
            }

            if (queryPositions.size() > 1000) {
                FlushCounters(output, queryPositions);
            }
        }

        FlushCounters(output, queryPositions);
    }

public:
    TVector<time_t> TableConfig;
};

REGISTER_MAPPER(THitsMapper)

//ReduceBy F_QUERY
struct THitsReducer : public NYT::IReducer<NYT::TTableReader<NCatalogia2::NProto::TWordstat>, NYT::TTableWriter<NCatalogia2::NProto::THits>> {
    void Do(TReader *input, TWriter *output) override {
        //const ui32 TABLENO_WORDSTAT_DAILY = 0;
        const ui32 TABLENO_WORDSTAT_MONTHLY = 1;

        THitsMapper::TPeriodCounter periodCounter;
        const TString query = input->GetRow().query();
        const TQueryId queryId = GetQueryId(query);

        for (; input->IsValid(); input->Next()) {
            const auto &msg = input->GetRow();
            periodCounter[msg.GetTimestamp()][msg.GetRegionId()][msg.GetPosition()] += msg.GetWordstat();
        }

        size_t allWordstat = 0;
        NCatalogia2::NProto::THits msg;
        for (const auto &periodObj : periodCounter) {
            for (const auto &regionObj : periodObj.second) {
                const THitsMapper::TWordstatCounter &wordstatCounter = regionObj.second;
                const size_t wordstat = wordstatCounter.begin()->second;
                //msg.set_query(query);
                //msg.set_timestamp(periodObj.first);
                //msg.set_region_id(regionObj.first);
                //msg.set_wordstat(wordstat);
                //output->AddRow(msg, TABLENO_WORDSTAT_DAILY);
                allWordstat += wordstat;
            }
        }

        msg.Clear();
        msg.SetQuery(query);
        msg.SetQueryId(queryId);
        msg.SetHits(allWordstat);
        output->AddRow(msg, TABLENO_WORDSTAT_MONTHLY);
    }
};

REGISTER_REDUCER(THitsReducer)

//JoinBy F_HOST, F_PATH
struct TJoinContentAttrsReducer : public TTaggedReducer {
    using TUrlUTA = ::NWebmaster::TUrlUTA;

    void StartTagged(TTagedWriter) override final {
        TUrlUTA::CInstance();
    }

    float GetL2Mean(const TVector<float> &docCentroid, const TDeque<NCatalogia2::NProto::TQuery> &rows, TDeque<float> &calculatedL2) const {
        float l2Mean = 0.0f;
        for (auto &row : rows) {
            const float l2 = L2SqrDistance(&docCentroid[0], &row.GetDssmQueryEmbeddings().GetXfOneSE()[0], docCentroid.size());
            l2Mean += l2;
            calculatedL2.push_back(l2);
        }
        l2Mean /= static_cast<float>(rows.size());
        return l2Mean;
    }

    float GetL2Sigma(const TDeque<float> &calculatedL2, float l2Mean) const {
        float sigma2 = 0.0f;
        for (float l2 : calculatedL2) {
            const float diff = l2 - l2Mean;
            sigma2 += (diff * diff);
        }
        const float l2Sigma = sqrt(sigma2 / static_cast<float>(calculatedL2.size()));
        return l2Sigma;
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override final {
        TMaybe<NJupiter::TContentAttrsForWebmaster> contentAttrs = reader.GetRowMaybe(ContentAttrsInputTag);
        if (!contentAttrs.Defined()) {
            return;
        }

        const NJupiter::TContentAttrsForWebmaster &contentAttrsMsg = contentAttrs.GetRef();
        reader.SkipRows(ContentAttrsInputTag);
        if (!reader.IsValid()) {
            return;
        }

        const TString url = contentAttrsMsg.GetHost() + contentAttrsMsg.GetPath();
        const TString urlUta = TUrlUTA::CInstance().Get(url);
        const TString normalizedTitle = NDssm::NormalizeTitleUtf8(contentAttrsMsg.GetTitleRawUTF8());

        TVector<float> docCentroid;
        TSingleCounter urlCounters;
        TDeque<NCatalogia2::NProto::TQuery> rows;
        THashSet<TQueryId> uniqQueries;
        for (auto row : reader.GetRows(ExtractedQueriesInputTag)) {
            TVector<float> embedding(
                row.GetDssmQueryEmbeddings().GetXfOneSE().begin(),
                row.GetDssmQueryEmbeddings().GetXfOneSE().end()
            );
            if (docCentroid.empty()) {
                docCentroid.resize(embedding.size());
            }
            for (size_t i = 0; i < embedding.size(); i++) {
                docCentroid[i] += embedding[i];
            }
            rows.push_back(row);
            urlCounters.Add(row);
            uniqQueries.insert(row.GetQueryId());
        }

        if (rows.empty()) {
            return;
        }

        for (size_t i = 0; i < docCentroid.size(); i++) {
            docCentroid[i] /= static_cast<float>(rows.size());
        }

        TDeque<float> calculatedL2;
        const float l2Mean = GetL2Mean(docCentroid, rows, calculatedL2);
        const float l2Sigma = GetL2Sigma(calculatedL2, l2Mean);

        for (auto &row : rows) {
            *row.MutableDssmDocEmbeddings()->MutableCentroidXfOneSE() = { docCentroid.begin(), docCentroid.end() };
            row.MutableDocStatistics()->SetL2ToCentroidMean(l2Mean);
            row.MutableDocStatistics()->SetL2ToCentroidSigma(l2Sigma);
            row.MutableDocStatistics()->SetQueries(rows.size());
            row.MutableDocStatistics()->SetUniqQueries(uniqQueries.size());
            row.SetUrlUTA(urlUta);
            row.SetNormalizedTitle(normalizedTitle);
            row.SetUClicks(urlCounters.Clicks);
            row.SetUShows(urlCounters.Shows);
            writer.AddRow(row, ExtractedQueriesOutputTag);
        }
    }
};

REGISTER_REDUCER(TJoinContentAttrsReducer)

struct TGeneratedStatistics {
    void Add(const NCatalogia2::NProto::TGeneratedQuery &row) {
        IsPornoHost = row.GetIsPornoHost();
        if (IsPornoQuery(row.GetUpperPornoUpperPl())) {
            PornoQueries++;
        }
        TotalQueries++;
    }

    void Add(const NCatalogia2::NProto::TGeneratedStatistics &row) {
        IsPornoHost = row.GetIsPornoHost();
        PornoQueries += row.GetPornoQueries();
        TotalQueries += row.GetTotalQueries();
    }

    void Write(const TString &host, NYT::TTableWriter<NCatalogia2::NProto::TGeneratedStatistics> *output) const {
        NCatalogia2::NProto::TGeneratedStatistics dstMsg;
        dstMsg.SetHost(host);
        dstMsg.SetIsPornoHost(IsPornoHost);
        dstMsg.SetPornoQueries(PornoQueries);
        dstMsg.SetTotalQueries(TotalQueries);
        output->AddRow(dstMsg);
    }

public:
    bool IsPornoHost = false;
    size_t PornoQueries = 0;
    size_t TotalQueries = 0;
};

struct TGerenratedQueriesStatisticsMapper : public NYT::IMapper<NYT::TTableReader<NCatalogia2::NProto::TGeneratedQuery>, NYT::TTableWriter<NCatalogia2::NProto::TGeneratedStatistics>> {
    void Do(TReader *input, TWriter *output) override {
        THashMap<TString, TGeneratedStatistics> counters;
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            counters[row.GetHost()].Add(row);
        }
        for (const auto &obj : counters) {
            const TString &host = obj.first;
            const TGeneratedStatistics &statistics = obj.second;
            statistics.Write(host, output);
        }
    }
};

REGISTER_MAPPER(TGerenratedQueriesStatisticsMapper)

//ReduceBy F_QUERY
struct TGerenratedQueriesStatisticsReducer : public NYT::IReducer<NYT::TTableReader<NCatalogia2::NProto::TGeneratedStatistics>, NYT::TTableWriter<NCatalogia2::NProto::TGeneratedStatistics>> {
    void Do(TReader *input, TWriter *output) override {
        TGeneratedStatistics statistics;
        const TString host = input->GetRow().GetHost();
        for (; input->IsValid(); input->Next()) {
            statistics.Add(input->GetRow());
        }
        statistics.Write(host, output);
    }
};

REGISTER_REDUCER(TGerenratedQueriesStatisticsReducer)

void ProcessHits(NYT::IClientBasePtr client) {
    using namespace NJupiter;
    const ui32 USE_DAYS = 30;

    const NYT::TSortColumns KEYS_HITS            = {"Query"};
    const NYT::TSortColumns KEYS_OWNER_FILTER    = {"QueryId"};

    try {
        if ((Now().Seconds() - NYTUtils::GetModificationTime(client, TConfig::CInstance().TABLE_CATALOGIA2_HITS_MONTHLY)) < WEEK_SECONDS) {
            LOG_INFO("catalogia2, hits table is already processed");
            return;
        }
    } catch (...) {
    }

    TDeque<TSourceTable> sourceTables;
    LoadConvertedTables(client, TCommonYTConfigSQ::CInstance().TABLE_CONVERTED_QUERIES_V4_ROOT, sourceTables, USE_DAYS + 14);
    if (sourceTables.size() > USE_DAYS) {
        sourceTables.erase(sourceTables.begin(), sourceTables.begin() + (sourceTables.size() - USE_DAYS));
    }

    int lag = 0, holes = 0;
    if (!IsSourceTablesSetComplete(sourceTables, lag, holes)) {
        ythrow yexception() << "source tables set is incomplete: " << TCommonYTConfigSQ::CInstance().TABLE_CONVERTED_QUERIES_V4_ROOT;
        //if (!config.IgnoreChecks) {
        //    return;
        //}
    }

    NYT::ITransactionPtr tx = client->StartTransaction();

    TVector<time_t> tableConfig;
    TDeque<TTable<NYT::TYamr>> inputTables;
    for (const TSourceTable &table : sourceTables) {
        inputTables.push_back(TTable<NYT::TYamr>(tx, table.Name));
        tableConfig.push_back(table.PeriodBegin);
    }

    TMapCombineReduceCmd<THitsMapper, THitsReducer, THitsReducer>(
        tx, new THitsMapper(tableConfig), nullptr, new THitsReducer
    )
        .OperationWeight(OPERATION_WEIGHT)
        .Inputs(inputTables)
        .Output(TTable<NCatalogia2::NProto::THits>(tx, TConfig::CInstance().TABLE_CATALOGIA2_HITS_DAILY))
        .Output(TTable<NCatalogia2::NProto::THits>(tx, TConfig::CInstance().TABLE_CATALOGIA2_HITS_MONTHLY))
        .ReduceBy(KEYS_HITS)
        .MapperMemoryLimit(1_GBs)
        .ReducerMemoryLimit(1_GBs)
        .Do()
    ;

    TSortCmd<NCatalogia2::NProto::THits>(tx, TTable<NCatalogia2::NProto::THits>(tx, TConfig::CInstance().TABLE_CATALOGIA2_HITS_MONTHLY))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_OWNER_FILTER)
        .Do()
    ;

    tx->Commit();
}

void ExtractQueries(NYT::IClientBasePtr tx, const TString &inputTableRoot, const TTablePathConfig &tablePathConfig, size_t days) {
    const TString extractedQueries = tablePathConfig.URLCLUSTER_QUERIES_EXTRACTED();
    const TString ownersFilter = tablePathConfig.URLCLUSTER_QUERIES_OWNERS_FILTER();

    const NYT::TSortColumns KEYS_CONTENT_ATTRS   = {"Host", "Path"};
    const NYT::TSortColumns KEYS_OWNER_FILTER    = {"QueryId"};

    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(tx, inputTableRoot, tables);
    std::sort(tables.rbegin(), tables.rend(), NYTUtils::TTableInfo::TNameLess());
    tables.resize(days);

    TDeque<TTable<NYT::TYamr>> inputTables;
    for (auto &table : tables) {
        inputTables.emplace_back(tx, table.Name);
        LOG_INFO("catalogia2, input %s", table.Name.c_str());
    }

    TMapCmd<TMapper>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Inputs(inputTables)
        .Output(TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries))
        .Do()
    ;

    TSortCmd<NCatalogia2::NProto::TQuery>(tx, TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_OWNER_FILTER)
        .Do()
    ;

    TReduceCmd<TOwnerFilterReducer>(tx, new TOwnerFilterReducer(days))
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries))
        .Output(TTable<NCatalogia2::NProto::TOwnerFilter>(tx, ownersFilter).AsSortedOutput(KEYS_OWNER_FILTER))
        .ReduceBy(KEYS_OWNER_FILTER)
        .Do()
    ;

    TReduceCmd<TFilterExtractedJoinReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TOwnerFilter>(tx, ownersFilter), OwnerFilterInputTag)
        .Input(TTable<NCatalogia2::NProto::THits>(tx, TConfig::CInstance().TABLE_CATALOGIA2_HITS_MONTHLY), HitsInputTag)
        .Input(TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries), ExtractedQueriesInputTag)
        .Output(TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries).AsSortedOutput(KEYS_OWNER_FILTER), ExtractedQueriesOutputTag)
        .AddYtFile(TCommonYTConfig::CInstance().FILE_XF_ONE_SE_MODEL_DSSM)
        .MemoryLimit(6_GBs)
        .ReduceBy(KEYS_OWNER_FILTER)
        .Do()
    ;

    TSortCmd<NCatalogia2::NProto::TQuery>(tx, TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_CONTENT_ATTRS)
        .Do()
    ;

    TReduceCmd<TJoinContentAttrsReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NJupiter::TContentAttrsForWebmaster>(tx, GetJupiterContentAttrsInProdTable(tx)), ContentAttrsInputTag)
        .Input(TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries), ExtractedQueriesInputTag)
        .Output(TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries).AsSortedOutput(KEYS_CONTENT_ATTRS), ExtractedQueriesOutputTag)
        //.JobCount(20000)
        .MemoryLimit(2_GBs)
        .ReduceBy(KEYS_CONTENT_ATTRS)
        .Do()
    ;
}

void GetInfos(NYT::IClientBasePtr tx, const TTablePathConfig &tablePathConfig) {
    const TString extractedQueries = tablePathConfig.URLCLUSTER_QUERIES_EXTRACTED();
    const TString ownersInfo = tablePathConfig.URLCLUSTER_OWNERS_INFO();
    const TString queriesInfo = tablePathConfig.URLCLUSTER_QUERIES_INFO();
    const TString urlsInfo = tablePathConfig.URLCLUSTER_URLS_INFO();

    const NYT::TSortColumns KEYS_QUERIES_REGIONS_URLS_INFO   = {"QueryId", "RegionId", "UrlId"};
    const NYT::TSortColumns KEYS_QUERIES_INFO                = {"QueryId", "RegionId"};
    const NYT::TSortColumns KEYS_URLS_INFO                   = {"UrlId", "RegionId"};
    const NYT::TSortColumns KEYS_URLS_INFO_OPT               = {"UrlId", "RegionId", "QueryId"};

    TMapCombineReduceCmd<TGetOwnersInfoMapper, TGetOwnersInfoReducer, TGetOwnersInfoReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries))
        .Output(TTable<NCatalogia2::NProto::TOwnerInfo>(tx, ownersInfo))
        .ReduceBy({"Host"})
        .Do()
    ;

    TSortCmd<NCatalogia2::NProto::TOwnerInfo>(tx, TTable<NCatalogia2::NProto::TOwnerInfo>(tx, ownersInfo))
        .OperationWeight(OPERATION_WEIGHT)
        .By({"Host"})
        .Do()
    ;

    TSortCmd<NCatalogia2::NProto::TQuery>(tx, TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_URLS_INFO_OPT)
        .Do()
    ;

    TReduceCmd<TGetUrlInfoReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries))
        .Output(TTable<NCatalogia2::NProto::TUrlInfo>(tx, urlsInfo).AsSortedOutput(KEYS_URLS_INFO))
        .AddYtFile(TCommonYTConfig::CInstance().FILE_L3_MODEL_DSSM)
        .ReduceBy(KEYS_URLS_INFO)
        .Do()
    ;

    TSortCmd<NCatalogia2::NProto::TQuery>(tx, TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_QUERIES_REGIONS_URLS_INFO)
        .Do()
    ;

    TReduceCmd<TGetQueryInfoReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries))
        .Output(TTable<NCatalogia2::NProto::TQueryInfo>(tx, queriesInfo).AsSortedOutput(KEYS_QUERIES_INFO))
        .AddYtFile(TCommonYTConfig::CInstance().FILE_L3_MODEL_DSSM)
        .ReduceBy(KEYS_QUERIES_INFO)
        .Do()
    ;
}

void GenerateQueries(NYT::IClientBasePtr tx, const TTablePathConfig &tablePathConfig) {
    const TString extractedQueries = tablePathConfig.URLCLUSTER_QUERIES_EXTRACTED();
    const TString foreignQueries = tablePathConfig.URLCLUSTER_QUERIES_FOREIGN();
    const TString foreignQueriesCounters = tablePathConfig.URLCLUSTER_QUERIES_FOREIGN_COUNTER();
    const TString foreignUrls = tablePathConfig.URLCLUSTER_URLS_FOREIGN();
    const TString queriesInfo = tablePathConfig.URLCLUSTER_QUERIES_INFO();
    const TString queriesInfoQru = tablePathConfig.URLCLUSTER_QUERIES_INFO_QRU();
    const TString uniqOqr = tablePathConfig.URLCLUSTER_QUERIES_UNIQUE_OQR();
    const TString uniqOur = tablePathConfig.URLCLUSTER_QUERIES_UNIQUE_OUR();
    const TString uniqQru = tablePathConfig.URLCLUSTER_QUERIES_UNIQUE_QRU();
    const TString urlsInfo = tablePathConfig.URLCLUSTER_URLS_INFO();

    const NYT::TSortColumns KEYS_QUERIES_REGIONS_URLS_INFO   = {"QueryId", "RegionId", "UrlId"};
    const NYT::TSortColumns KEYS_QUERIES_INFO                = {"QueryId", "RegionId"};
    const NYT::TSortColumns KEYS_FOREIGN_QUERIES             = {"QueryId", "RegionId", "OwnerId", "UrlId"};
    const NYT::TSortColumns KEYS_FOREIGN_URLS                = {"UrlId", "RegionId", "OwnerId", "BranchUrlId"};
    const NYT::TSortColumns KEYS_URLS_INFO                   = {"UrlId", "RegionId"};

    TReduceCmd<TQueryRegionUrlUniqReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TQuery>(tx, extractedQueries))
        .Output(TTable<NCatalogia2::NProto::TQueryRegionUrlInfo>(tx, uniqQru).AsSortedOutput(KEYS_QUERIES_REGIONS_URLS_INFO))
        .ReduceBy(KEYS_QUERIES_REGIONS_URLS_INFO)
        .Do()
    ;

    TReduceCmd<TQueryUrlJoinInfoReducer0>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TQueryInfo>(tx, queriesInfo), QueryInfoInputTag)
        .Input(TTable<NCatalogia2::NProto::TQueryRegionUrlInfo>(tx, uniqQru), QueryRegionUrlInfoInputTag)
        .Output(TTable<NCatalogia2::NProto::TQueryRegionUrlInfo>(tx, queriesInfoQru).AsSortedOutput(KEYS_QUERIES_INFO), QueryRegionUrlInfoOutputTag)
        .ReduceBy(KEYS_QUERIES_INFO)
        .Do()
    ;

    TTable<NCatalogia2::NProto::TQueryRegionUrlInfo>(tx, uniqQru).Drop();

    TSortCmd<NCatalogia2::NProto::TQueryRegionUrlInfo>(tx, TTable<NCatalogia2::NProto::TQueryRegionUrlInfo>(tx, queriesInfoQru))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_URLS_INFO)
        .Do()
    ;

    TReduceCmd<TQueriesInfoQRUUrlsInfoJoinReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TUrlInfo>(tx, urlsInfo), UrlInfoInputTag)
        .Input(TTable<NCatalogia2::NProto::TQueryRegionUrlInfo>(tx, queriesInfoQru), QueryRegionUrlInfoInputTag)
        .Output(TTable<NCatalogia2::NProto::TForeignQuery>(tx, foreignQueries), ForeignQueryOutputTag)
        .Output(TTable<NCatalogia2::NProto::TForeignQueryCounter>(tx, foreignQueriesCounters).AsSortedOutput(KEYS_URLS_INFO), ForeignQueryCounterOutputTag)
        .Output(TTable<NCatalogia2::NProto::TForeignUrl>(tx, foreignUrls), ForeignUrlOutputTag)
        .ReduceBy(KEYS_URLS_INFO)
        .Do()
    ;

    TSortCmd<NCatalogia2::NProto::TForeignQuery>(tx, TTable<NCatalogia2::NProto::TForeignQuery>(tx, foreignQueries))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_FOREIGN_QUERIES)
        .Do()
    ;

    TReduceCmd<TOwnerQueryRegionUniqReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TForeignQuery>(tx, foreignQueries))
        .Output(TTable<NCatalogia2::NProto::TForeignQuery>(tx, uniqOqr).AsSortedOutput(KEYS_FOREIGN_QUERIES))
        .ReduceBy(KEYS_FOREIGN_QUERIES)
        .Do()
    ;

    TSortCmd<NCatalogia2::NProto::TForeignQuery>(tx, TTable<NCatalogia2::NProto::TForeignQuery>(tx, foreignUrls))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_FOREIGN_URLS)
        .Do()
    ;

    TReduceCmd<TOwnerUrlRegionUniqReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TForeignUrl>(tx, foreignUrls))
        .Output(TTable<NCatalogia2::NProto::TForeignUrl>(tx, uniqOur).AsSortedOutput(KEYS_FOREIGN_URLS))
        .ReduceBy(KEYS_FOREIGN_URLS)
        .Do()
    ;

    TTable<NCatalogia2::NProto::TForeignQuery>(tx, foreignQueries).Drop();
    TTable<NCatalogia2::NProto::TForeignUrl>(tx, foreignUrls).Drop();
}

void LoadOwnersInfo(NYT::IClientBasePtr tx, const TString &ownersInfo, THashMap<TOwnerId, TString> &ownerIdToHosts) {
    auto reader = tx->CreateTableReader<NCatalogia2::NProto::TOwnerInfo>(ownersInfo);
    for (; reader->IsValid(); reader->Next()) {
        const auto &row = reader->GetRow();
        const TOwnerId ownerId = row.GetOwnerId();
        ownerIdToHosts[ownerId] = row.GetOwner();
    }
}

void LoadPornoHosts(NYT::IClientBasePtr client, THashSet<TString> &pornoHosts) {
    auto reader = client->CreateTableReader<NYT::TNode>(TCommonYTConfig::CInstance().TABLE_SOURCE_PORNO_HOSTS);
    for (; reader->IsValid(); reader->Next()) {
        const NYT::TNode &row = reader->GetRow();
        TString host = row["Host"].AsString();
        host.to_lower();
        pornoHosts.insert(TString{FixPornoDomain(host)});
    }
}

void JoinContentToForeignQueries(NYT::IClientBasePtr tx, const TTablePathConfig &tablePathConfig, const THashMap<TOwnerId, TString> &ownerIdToHosts, const THashSet<TString> &pornoHosts) {
    const TString queriesInfo               = tablePathConfig.URLCLUSTER_QUERIES_INFO();
    const TString urlsInfo                  = tablePathConfig.URLCLUSTER_URLS_INFO();
    const TString foreignQueriesCounters    = tablePathConfig.URLCLUSTER_QUERIES_FOREIGN_COUNTER();
    const TString uniqOqr                   = tablePathConfig.URLCLUSTER_QUERIES_UNIQUE_OQR();
    const TString uniqOqrFinal              = tablePathConfig.URLCLUSTER_QUERIES_UNIQUE_OQR_FINAL();
    const TString uniqOqrFinalStatistics    = tablePathConfig.URLCLUSTER_QUERIES_UNIQUE_OQR_FINAL_STATISTICS();

    const NYT::TSortColumns KEYS_QUERIES_INFO    = {"QueryId", "RegionId"};
    const NYT::TSortColumns KEYS_URLS_INFO       = {"UrlId", "RegionId"};
    const NYT::TSortColumns KEYS_URLS_INFO_OPT   = {"UrlId", "RegionId", "QueryId"};
    const NYT::TSortColumns KEYS_FINAL           = {"Host", "Path", "RegionId", "DssmTopL2"};

    TReduceCmd<TQueriesInfoUniqOQRJoinReducer>(tx, new TQueriesInfoUniqOQRJoinReducer(ownerIdToHosts))
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TQueryInfo>(tx, queriesInfo), QueryInfoInputTag)
        .Input(TTable<NCatalogia2::NProto::TForeignQuery>(tx, uniqOqr), ForeignQueryInputTag)
        .Output(TTable<NCatalogia2::NProto::TGeneratedQuery>(tx, uniqOqr).AsSortedOutput(KEYS_QUERIES_INFO), GeneratedQueryOutputTag)
        .ReduceBy(KEYS_QUERIES_INFO)
        .MemoryLimit(6_GBs)
        .Do()
    ;

    TSortCmd<NCatalogia2::NProto::TGeneratedQuery>(tx, TTable<NCatalogia2::NProto::TGeneratedQuery>(tx, uniqOqr))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_URLS_INFO_OPT)
        .Do()
    ;

    TReduceCmd<TUrlsInfoUniqOQRJoinReducer>(tx, new TUrlsInfoUniqOQRJoinReducer(pornoHosts))
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TUrlInfo>(tx, urlsInfo), UrlInfoInputTag)
        .Input(TTable<NCatalogia2::NProto::TForeignQueryCounter>(tx, foreignQueriesCounters), ForeignQueryCounterInputTag)
        .Input(TTable<NCatalogia2::NProto::TGeneratedQuery>(tx, uniqOqr), GeneratedQueryInputTag)
        .Output(TTable<NCatalogia2::NProto::TGeneratedQuery>(tx, uniqOqr).AsSortedOutput(KEYS_URLS_INFO), GeneratedQueryOutputTag)
        .ReduceBy(KEYS_URLS_INFO)
        .AddYtFile(TCommonYTConfig::CInstance().FILE_L3_MODEL_DSSM)
        .MemoryLimit(2_GBs)
        .Do()
    ;

    TTable<NCatalogia2::NProto::TGeneratedQuery>(tx, uniqOqrFinal).Drop();

    TSortCmd<NCatalogia2::NProto::TGeneratedQuery>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TGeneratedQuery>(tx, uniqOqr))
        .Output(TTable<NCatalogia2::NProto::TGeneratedQuery>(tx, uniqOqrFinal)
            .SetCompressionCodec(ECompressionCodec::BROTLI_6)
            .SetErasureCodec(EErasureCodec::LRC_12_2_2)
        )
        .By(KEYS_FINAL)
        .Do()
    ;

    TMapCombineReduceCmd<
        TGerenratedQueriesStatisticsMapper,
        TGerenratedQueriesStatisticsReducer,
        TGerenratedQueriesStatisticsReducer
    >(tx, new TGerenratedQueriesStatisticsMapper, nullptr, new TGerenratedQueriesStatisticsReducer)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NCatalogia2::NProto::TGeneratedQuery>(tx, uniqOqrFinal).SelectFields({"Host", "IsPornoHost", "UpperPornoUpperPl"}))
        .Output(TTable<NCatalogia2::NProto::TGeneratedStatistics>(tx, uniqOqrFinalStatistics))
        .ReduceBy({"Host"})
        .Do()
    ;

    TSortCmd<NCatalogia2::NProto::TGeneratedStatistics>(tx, TTable<NCatalogia2::NProto::TGeneratedStatistics>(tx, uniqOqrFinalStatistics))
        .OperationWeight(OPERATION_WEIGHT)
        .By({"Host"})
        .Do()
    ;
}

void JoinContentToForeignUrls(NYT::IClientBasePtr tx, const TTablePathConfig &tablePathConfig, const THashMap<TOwnerId, TString> &ownerIdToHosts) {
    Y_UNUSED(tx);
    Y_UNUSED(tablePathConfig);
    Y_UNUSED(ownerIdToHosts);

    //const NYT::TSortColumns KEYS_URLS_INFO = {"UrlId", "RegionId"};
/*
        .SortBy(F_URL_ID, F_REGION_ID)
        .Sort(uniqOur)
        .InputNode(urlsInfo)
        .InputNode(uniqOur)
        .OutputNode(uniqOur)
        .ReduceBy(F_URL_ID, F_REGION_ID)
        .MemoryLimit(MEMORY_LIMIT_6GB)
        .Reduce(new TUrlsInfoUniqOURJoinReducer(ownerIdToHosts))

        .SortBy(F_URL_ID, F_REGION_ID)
        .Sort(uniqOur)
        .InputNode(urlsInfo)
        .InputNode(uniqOur)
        .OutputNode(uniqOur)
        .ReduceBy(F_URL_ID, F_REGION_ID)
        .MemoryLimit(MEMORY_LIMIT_4GB)
        .Reduce(new TUrlsInfoUniqOURJoinStage2Reducer)
        .Archive(uniqOur, SYNC_CTX, 15000)
        .Move(uniqOur, uniqOurFinal)
*/
}

void TaskBuildCatalogiaQueries(const TConfig &/*config*/) {
    NYT::IClientPtr client = NYT::CreateClient(TConfig::CInstance().MR_SERVER_HOST_MAIN);
    const TTablePathConfig tablePathConfig = GetPathConfig();
    NYTUtils::CreatePath(client, tablePathConfig.URLCLUSTER_ROOT_QUERIES);
    NYTUtils::CreatePath(client, tablePathConfig.URLCLUSTER_ROOT_TMP);

    try {
        if ((Now().Seconds() - NYTUtils::GetModificationTime(client, tablePathConfig.URLCLUSTER_QUERIES_UNIQUE_OQR_FINAL())) < WEEK_SECONDS) {
            LOG_INFO("catalogia2, final table is already processed");
            return;
        }
    } catch (...) {
    }

    const TString inputTableRoot = TCommonYTConfigSQ::CInstance().TABLE_CONVERTED_QUERIES_V4_ROOT;
    const TString ownersInfo = tablePathConfig.URLCLUSTER_OWNERS_INFO();

    ProcessHits(client);
    size_t days = 7;
    ExtractQueries(client, inputTableRoot, tablePathConfig, days);
    GetInfos(client, tablePathConfig);
    GenerateQueries(client, tablePathConfig);

    THashMap<TOwnerId, TString> ownerIdToHosts;
    LoadOwnersInfo(client, ownersInfo, ownerIdToHosts);

    THashSet<TString> pornoHosts;
    LoadPornoHosts(client, pornoHosts);

    JoinContentToForeignQueries(client, tablePathConfig, ownerIdToHosts, pornoHosts);
    //JoinContentToForeignUrls(client, ownerIdToHosts);
    NYTUtils::SetAttr(client, tablePathConfig.URLCLUSTER_QUERIES_UNIQUE_OQR_FINAL(), ATTR_UPLOAD_TIME, Now().ToStringLocalUpToSeconds());
}

} //namespace NCatalogia2
} //namespace NWebmaster

/*
v по Owner-Query            OQ
v по Owner-Query-RegionId   OQR
v по Query-RegionId-Url;    QRU
v по Query-RegionId;        QR
v по Query-Url;             QU
v по Query;                 Q
v по Url;                   U
*/
